Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include <math.h>
133 :
134 : #include "access/genam.h"
135 : #include "access/heapam.h"
136 : #include "access/htup_details.h"
137 : #include "access/multixact.h"
138 : #include "access/tidstore.h"
139 : #include "access/transam.h"
140 : #include "access/visibilitymap.h"
141 : #include "access/xloginsert.h"
142 : #include "catalog/storage.h"
143 : #include "commands/progress.h"
144 : #include "commands/vacuum.h"
145 : #include "common/int.h"
146 : #include "common/pg_prng.h"
147 : #include "executor/instrument.h"
148 : #include "miscadmin.h"
149 : #include "pgstat.h"
150 : #include "portability/instr_time.h"
151 : #include "postmaster/autovacuum.h"
152 : #include "storage/bufmgr.h"
153 : #include "storage/freespace.h"
154 : #include "storage/lmgr.h"
155 : #include "storage/read_stream.h"
156 : #include "utils/lsyscache.h"
157 : #include "utils/pg_rusage.h"
158 : #include "utils/timestamp.h"
159 :
160 :
161 : /*
162 : * Space/time tradeoff parameters: do these need to be user-tunable?
163 : *
164 : * To consider truncating the relation, we want there to be at least
165 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
166 : * is less) potentially-freeable pages.
167 : */
168 : #define REL_TRUNCATE_MINIMUM 1000
169 : #define REL_TRUNCATE_FRACTION 16
170 :
171 : /*
172 : * Timing parameters for truncate locking heuristics.
173 : *
174 : * These were not exposed as user tunable GUC values because it didn't seem
175 : * that the potential for improvement was great enough to merit the cost of
176 : * supporting them.
177 : */
178 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
179 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
181 :
182 : /*
183 : * Threshold that controls whether we bypass index vacuuming and heap
184 : * vacuuming as an optimization
185 : */
186 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
187 :
188 : /*
189 : * Perform a failsafe check each time we scan another 4GB of pages.
190 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
191 : */
192 : #define FAILSAFE_EVERY_PAGES \
193 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
194 :
195 : /*
196 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
197 : * (it won't be exact because we only vacuum FSM after processing a heap page
198 : * that has some removable tuples). When there are indexes, this is ignored,
199 : * and we vacuum FSM after each index/heap cleaning pass.
200 : */
201 : #define VACUUM_FSM_EVERY_PAGES \
202 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
203 :
204 : /*
205 : * Before we consider skipping a page that's marked as clean in
206 : * visibility map, we must've seen at least this many clean pages.
207 : */
208 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
209 :
210 : /*
211 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
212 : * Needs to be a power of 2.
213 : */
214 : #define PREFETCH_SIZE ((BlockNumber) 32)
215 :
216 : /*
217 : * Macro to check if we are in a parallel vacuum. If true, we are in the
218 : * parallel mode and the DSM segment is initialized.
219 : */
220 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
221 :
222 : /* Phases of vacuum during which we report error context. */
223 : typedef enum
224 : {
225 : VACUUM_ERRCB_PHASE_UNKNOWN,
226 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
227 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
228 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
229 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
230 : VACUUM_ERRCB_PHASE_TRUNCATE,
231 : } VacErrPhase;
232 :
233 : /*
234 : * An eager scan of a page that is set all-frozen in the VM is considered
235 : * "successful". To spread out freezing overhead across multiple normal
236 : * vacuums, we limit the number of successful eager page freezes. The maximum
237 : * number of eager page freezes is calculated as a ratio of the all-visible
238 : * but not all-frozen pages at the beginning of the vacuum.
239 : */
240 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
241 :
242 : /*
243 : * On the assumption that different regions of the table tend to have
244 : * similarly aged data, once vacuum fails to freeze
245 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
246 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
247 : * to another region of the table with potentially older data.
248 : */
249 : #define EAGER_SCAN_REGION_SIZE 4096
250 :
251 : /*
252 : * heap_vac_scan_next_block() sets these flags to communicate information
253 : * about the block it read to the caller.
254 : */
255 : #define VAC_BLK_WAS_EAGER_SCANNED (1 << 0)
256 : #define VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM (1 << 1)
257 :
258 : typedef struct LVRelState
259 : {
260 : /* Target heap relation and its indexes */
261 : Relation rel;
262 : Relation *indrels;
263 : int nindexes;
264 :
265 : /* Buffer access strategy and parallel vacuum state */
266 : BufferAccessStrategy bstrategy;
267 : ParallelVacuumState *pvs;
268 :
269 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
270 : bool aggressive;
271 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
272 : bool skipwithvm;
273 : /* Consider index vacuuming bypass optimization? */
274 : bool consider_bypass_optimization;
275 :
276 : /* Doing index vacuuming, index cleanup, rel truncation? */
277 : bool do_index_vacuuming;
278 : bool do_index_cleanup;
279 : bool do_rel_truncate;
280 :
281 : /* VACUUM operation's cutoffs for freezing and pruning */
282 : struct VacuumCutoffs cutoffs;
283 : GlobalVisState *vistest;
284 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
285 : TransactionId NewRelfrozenXid;
286 : MultiXactId NewRelminMxid;
287 : bool skippedallvis;
288 :
289 : /* Error reporting state */
290 : char *dbname;
291 : char *relnamespace;
292 : char *relname;
293 : char *indname; /* Current index name */
294 : BlockNumber blkno; /* used only for heap operations */
295 : OffsetNumber offnum; /* used only for heap operations */
296 : VacErrPhase phase;
297 : bool verbose; /* VACUUM VERBOSE? */
298 :
299 : /*
300 : * dead_items stores TIDs whose index tuples are deleted by index
301 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
302 : * that has been processed by lazy_scan_prune. Also needed by
303 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
304 : * LP_UNUSED during second heap pass.
305 : *
306 : * Both dead_items and dead_items_info are allocated in shared memory in
307 : * parallel vacuum cases.
308 : */
309 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
310 : VacDeadItemsInfo *dead_items_info;
311 :
312 : BlockNumber rel_pages; /* total number of pages */
313 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
314 :
315 : /*
316 : * Count of all-visible blocks eagerly scanned (for logging only). This
317 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
318 : */
319 : BlockNumber eager_scanned_pages;
320 :
321 : BlockNumber removed_pages; /* # pages removed by relation truncation */
322 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
323 :
324 : /* # pages newly set all-visible in the VM */
325 : BlockNumber vm_new_visible_pages;
326 :
327 : /*
328 : * # pages newly set all-visible and all-frozen in the VM. This is a
329 : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
330 : * all pages set all-visible, but vm_new_visible_frozen_pages includes
331 : * only those which were also set all-frozen.
332 : */
333 : BlockNumber vm_new_visible_frozen_pages;
334 :
335 : /* # all-visible pages newly set all-frozen in the VM */
336 : BlockNumber vm_new_frozen_pages;
337 :
338 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
339 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
340 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
341 :
342 : /* Statistics output by us, for table */
343 : double new_rel_tuples; /* new estimated total # of tuples */
344 : double new_live_tuples; /* new estimated total # of live tuples */
345 : /* Statistics output by index AMs */
346 : IndexBulkDeleteResult **indstats;
347 :
348 : /* Instrumentation counters */
349 : int num_index_scans;
350 : int num_dead_items_resets;
351 : Size total_dead_items_bytes;
352 : /* Counters that follow are only for scanned_pages */
353 : int64 tuples_deleted; /* # deleted from table */
354 : int64 tuples_frozen; /* # newly frozen */
355 : int64 lpdead_items; /* # deleted from indexes */
356 : int64 live_tuples; /* # live tuples remaining */
357 : int64 recently_dead_tuples; /* # dead, but not yet removable */
358 : int64 missed_dead_tuples; /* # removable, but not removed */
359 :
360 : /* State maintained by heap_vac_scan_next_block() */
361 : BlockNumber current_block; /* last block returned */
362 : BlockNumber next_unskippable_block; /* next unskippable block */
363 : bool next_unskippable_allvis; /* its visibility status */
364 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
365 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
366 :
367 : /* State related to managing eager scanning of all-visible pages */
368 :
369 : /*
370 : * A normal vacuum that has failed to freeze too many eagerly scanned
371 : * blocks in a region suspends eager scanning.
372 : * next_eager_scan_region_start is the block number of the first block
373 : * eligible for resumed eager scanning.
374 : *
375 : * When eager scanning is permanently disabled, either initially
376 : * (including for aggressive vacuum) or due to hitting the success cap,
377 : * this is set to InvalidBlockNumber.
378 : */
379 : BlockNumber next_eager_scan_region_start;
380 :
381 : /*
382 : * The remaining number of blocks a normal vacuum will consider eager
383 : * scanning when it is successful. When eager scanning is enabled, this is
384 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
385 : * all-visible but not all-frozen pages. For each eager freeze success,
386 : * this is decremented. Once it hits 0, eager scanning is permanently
387 : * disabled. It is initialized to 0 if eager scanning starts out disabled
388 : * (including for aggressive vacuum).
389 : */
390 : BlockNumber eager_scan_remaining_successes;
391 :
392 : /*
393 : * The maximum number of blocks which may be eagerly scanned and not
394 : * frozen before eager scanning is temporarily suspended. This is
395 : * configurable both globally, via the
396 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
397 : * storage parameter of the same name. It is calculated as
398 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
399 : * It is 0 when eager scanning is disabled.
400 : */
401 : BlockNumber eager_scan_max_fails_per_region;
402 :
403 : /*
404 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
405 : * age) in the current eager scan region. Vacuum resets it to
406 : * eager_scan_max_fails_per_region each time it enters a new region of the
407 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
408 : * suspended until the next region. It is also 0 if eager scanning has
409 : * been permanently disabled.
410 : */
411 : BlockNumber eager_scan_remaining_fails;
412 : } LVRelState;
413 :
414 :
415 : /* Struct for saving and restoring vacuum error information. */
416 : typedef struct LVSavedErrInfo
417 : {
418 : BlockNumber blkno;
419 : OffsetNumber offnum;
420 : VacErrPhase phase;
421 : } LVSavedErrInfo;
422 :
423 :
424 : /* non-export function prototypes */
425 : static void lazy_scan_heap(LVRelState *vacrel);
426 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
427 : const VacuumParams params);
428 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
429 : void *callback_private_data,
430 : void *per_buffer_data);
431 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
432 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
433 : BlockNumber blkno, Page page,
434 : bool sharelock, Buffer vmbuffer);
435 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
436 : BlockNumber blkno, Page page,
437 : Buffer vmbuffer, bool all_visible_according_to_vm,
438 : bool *has_lpdead_items, bool *vm_page_frozen);
439 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
440 : BlockNumber blkno, Page page,
441 : bool *has_lpdead_items);
442 : static void lazy_vacuum(LVRelState *vacrel);
443 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
444 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
445 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
446 : Buffer buffer, OffsetNumber *deadoffsets,
447 : int num_offsets, Buffer vmbuffer);
448 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
449 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
450 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
451 : IndexBulkDeleteResult *istat,
452 : double reltuples,
453 : LVRelState *vacrel);
454 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
455 : IndexBulkDeleteResult *istat,
456 : double reltuples,
457 : bool estimated_count,
458 : LVRelState *vacrel);
459 : static bool should_attempt_truncation(LVRelState *vacrel);
460 : static void lazy_truncate_heap(LVRelState *vacrel);
461 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
462 : bool *lock_waiter_detected);
463 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
464 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
465 : int num_offsets);
466 : static void dead_items_reset(LVRelState *vacrel);
467 : static void dead_items_cleanup(LVRelState *vacrel);
468 :
469 : #ifdef USE_ASSERT_CHECKING
470 : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
471 : TransactionId OldestXmin,
472 : bool *all_frozen,
473 : TransactionId *visibility_cutoff_xid,
474 : OffsetNumber *logging_offnum);
475 : #endif
476 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
477 : TransactionId OldestXmin,
478 : OffsetNumber *deadoffsets,
479 : int ndeadoffsets,
480 : bool *all_frozen,
481 : TransactionId *visibility_cutoff_xid,
482 : OffsetNumber *logging_offnum);
483 : static void update_relstats_all_indexes(LVRelState *vacrel);
484 : static void vacuum_error_callback(void *arg);
485 : static void update_vacuum_error_info(LVRelState *vacrel,
486 : LVSavedErrInfo *saved_vacrel,
487 : int phase, BlockNumber blkno,
488 : OffsetNumber offnum);
489 : static void restore_vacuum_error_info(LVRelState *vacrel,
490 : const LVSavedErrInfo *saved_vacrel);
491 :
492 :
493 :
494 : /*
495 : * Helper to set up the eager scanning state for vacuuming a single relation.
496 : * Initializes the eager scan management related members of the LVRelState.
497 : *
498 : * Caller provides whether or not an aggressive vacuum is required due to
499 : * vacuum options or for relfrozenxid/relminmxid advancement.
500 : */
501 : static void
502 148618 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
503 : {
504 : uint32 randseed;
505 : BlockNumber allvisible;
506 : BlockNumber allfrozen;
507 : float first_region_ratio;
508 148618 : bool oldest_unfrozen_before_cutoff = false;
509 :
510 : /*
511 : * Initialize eager scan management fields to their disabled values.
512 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
513 : * of tables without sufficiently old tuples disable eager scanning.
514 : */
515 148618 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
516 148618 : vacrel->eager_scan_max_fails_per_region = 0;
517 148618 : vacrel->eager_scan_remaining_fails = 0;
518 148618 : vacrel->eager_scan_remaining_successes = 0;
519 :
520 : /* If eager scanning is explicitly disabled, just return. */
521 148618 : if (params.max_eager_freeze_failure_rate == 0)
522 148618 : return;
523 :
524 : /*
525 : * The caller will have determined whether or not an aggressive vacuum is
526 : * required by either the vacuum parameters or the relative age of the
527 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
528 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
529 : * so scans of all-visible pages are not considered eager.
530 : */
531 148618 : if (vacrel->aggressive)
532 137546 : return;
533 :
534 : /*
535 : * Aggressively vacuuming a small relation shouldn't take long, so it
536 : * isn't worth amortizing. We use two times the region size as the size
537 : * cutoff because the eager scan start block is a random spot somewhere in
538 : * the first region, making the second region the first to be eager
539 : * scanned normally.
540 : */
541 11072 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
542 11072 : return;
543 :
544 : /*
545 : * We only want to enable eager scanning if we are likely to be able to
546 : * freeze some of the pages in the relation.
547 : *
548 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
549 : * are technically freezable, but we won't freeze them unless the criteria
550 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
551 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
552 : *
553 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
554 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
555 : * enable eager scanning.
556 : */
557 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
558 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
559 : vacrel->cutoffs.FreezeLimit))
560 0 : oldest_unfrozen_before_cutoff = true;
561 :
562 0 : if (!oldest_unfrozen_before_cutoff &&
563 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
564 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
565 : vacrel->cutoffs.MultiXactCutoff))
566 0 : oldest_unfrozen_before_cutoff = true;
567 :
568 0 : if (!oldest_unfrozen_before_cutoff)
569 0 : return;
570 :
571 : /* We have met the criteria to eagerly scan some pages. */
572 :
573 : /*
574 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
575 : * all-visible but not all-frozen blocks in the relation.
576 : */
577 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
578 :
579 0 : vacrel->eager_scan_remaining_successes =
580 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
581 0 : (allvisible - allfrozen));
582 :
583 : /* If every all-visible page is frozen, eager scanning is disabled. */
584 0 : if (vacrel->eager_scan_remaining_successes == 0)
585 0 : return;
586 :
587 : /*
588 : * Now calculate the bounds of the first eager scan region. Its end block
589 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
590 : * blocks. This affects the bounds of all subsequent regions and avoids
591 : * eager scanning and failing to freeze the same blocks each vacuum of the
592 : * relation.
593 : */
594 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
595 :
596 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
597 :
598 : Assert(params.max_eager_freeze_failure_rate > 0 &&
599 : params.max_eager_freeze_failure_rate <= 1);
600 :
601 0 : vacrel->eager_scan_max_fails_per_region =
602 0 : params.max_eager_freeze_failure_rate *
603 : EAGER_SCAN_REGION_SIZE;
604 :
605 : /*
606 : * The first region will be smaller than subsequent regions. As such,
607 : * adjust the eager freeze failures tolerated for this region.
608 : */
609 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
610 : EAGER_SCAN_REGION_SIZE;
611 :
612 0 : vacrel->eager_scan_remaining_fails =
613 0 : vacrel->eager_scan_max_fails_per_region *
614 : first_region_ratio;
615 : }
616 :
617 : /*
618 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
619 : *
620 : * This routine sets things up for and then calls lazy_scan_heap, where
621 : * almost all work actually takes place. Finalizes everything after call
622 : * returns by managing relation truncation and updating rel's pg_class
623 : * entry. (Also updates pg_class entries for any indexes that need it.)
624 : *
625 : * At entry, we have already established a transaction and opened
626 : * and locked the relation.
627 : */
628 : void
629 148618 : heap_vacuum_rel(Relation rel, const VacuumParams params,
630 : BufferAccessStrategy bstrategy)
631 : {
632 : LVRelState *vacrel;
633 : bool verbose,
634 : instrument,
635 : skipwithvm,
636 : frozenxid_updated,
637 : minmulti_updated;
638 : BlockNumber orig_rel_pages,
639 : new_rel_pages,
640 : new_rel_allvisible,
641 : new_rel_allfrozen;
642 : PGRUsage ru0;
643 148618 : TimestampTz starttime = 0;
644 148618 : PgStat_Counter startreadtime = 0,
645 148618 : startwritetime = 0;
646 148618 : WalUsage startwalusage = pgWalUsage;
647 148618 : BufferUsage startbufferusage = pgBufferUsage;
648 : ErrorContextCallback errcallback;
649 148618 : char **indnames = NULL;
650 148618 : Size dead_items_max_bytes = 0;
651 :
652 148618 : verbose = (params.options & VACOPT_VERBOSE) != 0;
653 269728 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
654 121110 : params.log_vacuum_min_duration >= 0));
655 148618 : if (instrument)
656 : {
657 121134 : pg_rusage_init(&ru0);
658 121134 : if (track_io_timing)
659 : {
660 0 : startreadtime = pgStatBlockReadTime;
661 0 : startwritetime = pgStatBlockWriteTime;
662 : }
663 : }
664 :
665 : /* Used for instrumentation and stats report */
666 148618 : starttime = GetCurrentTimestamp();
667 :
668 148618 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
669 : RelationGetRelid(rel));
670 148618 : if (AmAutoVacuumWorkerProcess())
671 121110 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
672 121110 : params.is_wraparound
673 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
674 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
675 : else
676 27508 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
677 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
678 :
679 : /*
680 : * Setup error traceback support for ereport() first. The idea is to set
681 : * up an error context callback to display additional information on any
682 : * error during a vacuum. During different phases of vacuum, we update
683 : * the state so that the error context callback always display current
684 : * information.
685 : *
686 : * Copy the names of heap rel into local memory for error reporting
687 : * purposes, too. It isn't always safe to assume that we can get the name
688 : * of each rel. It's convenient for code in lazy_scan_heap to always use
689 : * these temp copies.
690 : */
691 148618 : vacrel = palloc0_object(LVRelState);
692 148618 : vacrel->dbname = get_database_name(MyDatabaseId);
693 148618 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
694 148618 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
695 148618 : vacrel->indname = NULL;
696 148618 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
697 148618 : vacrel->verbose = verbose;
698 148618 : errcallback.callback = vacuum_error_callback;
699 148618 : errcallback.arg = vacrel;
700 148618 : errcallback.previous = error_context_stack;
701 148618 : error_context_stack = &errcallback;
702 :
703 : /* Set up high level stuff about rel and its indexes */
704 148618 : vacrel->rel = rel;
705 148618 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
706 : &vacrel->indrels);
707 148618 : vacrel->bstrategy = bstrategy;
708 148618 : if (instrument && vacrel->nindexes > 0)
709 : {
710 : /* Copy index names used by instrumentation (not error reporting) */
711 115936 : indnames = palloc_array(char *, vacrel->nindexes);
712 298908 : for (int i = 0; i < vacrel->nindexes; i++)
713 182972 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
714 : }
715 :
716 : /*
717 : * The index_cleanup param either disables index vacuuming and cleanup or
718 : * forces it to go ahead when we would otherwise apply the index bypass
719 : * optimization. The default is 'auto', which leaves the final decision
720 : * up to lazy_vacuum().
721 : *
722 : * The truncate param allows user to avoid attempting relation truncation,
723 : * though it can't force truncation to happen.
724 : */
725 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
726 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
727 : params.truncate != VACOPTVALUE_AUTO);
728 :
729 : /*
730 : * While VacuumFailSafeActive is reset to false before calling this, we
731 : * still need to reset it here due to recursive calls.
732 : */
733 148618 : VacuumFailsafeActive = false;
734 148618 : vacrel->consider_bypass_optimization = true;
735 148618 : vacrel->do_index_vacuuming = true;
736 148618 : vacrel->do_index_cleanup = true;
737 148618 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
738 148618 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
739 : {
740 : /* Force disable index vacuuming up-front */
741 260 : vacrel->do_index_vacuuming = false;
742 260 : vacrel->do_index_cleanup = false;
743 : }
744 148358 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
745 : {
746 : /* Force index vacuuming. Note that failsafe can still bypass. */
747 30 : vacrel->consider_bypass_optimization = false;
748 : }
749 : else
750 : {
751 : /* Default/auto, make all decisions dynamically */
752 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
753 : }
754 :
755 : /* Initialize page counters explicitly (be tidy) */
756 148618 : vacrel->scanned_pages = 0;
757 148618 : vacrel->eager_scanned_pages = 0;
758 148618 : vacrel->removed_pages = 0;
759 148618 : vacrel->new_frozen_tuple_pages = 0;
760 148618 : vacrel->lpdead_item_pages = 0;
761 148618 : vacrel->missed_dead_pages = 0;
762 148618 : vacrel->nonempty_pages = 0;
763 : /* dead_items_alloc allocates vacrel->dead_items later on */
764 :
765 : /* Allocate/initialize output statistics state */
766 148618 : vacrel->new_rel_tuples = 0;
767 148618 : vacrel->new_live_tuples = 0;
768 148618 : vacrel->indstats = (IndexBulkDeleteResult **)
769 148618 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
770 :
771 : /* Initialize remaining counters (be tidy) */
772 148618 : vacrel->num_index_scans = 0;
773 148618 : vacrel->num_dead_items_resets = 0;
774 148618 : vacrel->total_dead_items_bytes = 0;
775 148618 : vacrel->tuples_deleted = 0;
776 148618 : vacrel->tuples_frozen = 0;
777 148618 : vacrel->lpdead_items = 0;
778 148618 : vacrel->live_tuples = 0;
779 148618 : vacrel->recently_dead_tuples = 0;
780 148618 : vacrel->missed_dead_tuples = 0;
781 :
782 148618 : vacrel->vm_new_visible_pages = 0;
783 148618 : vacrel->vm_new_visible_frozen_pages = 0;
784 148618 : vacrel->vm_new_frozen_pages = 0;
785 :
786 : /*
787 : * Get cutoffs that determine which deleted tuples are considered DEAD,
788 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
789 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
790 : * happen in this order to ensure that the OldestXmin cutoff field works
791 : * as an upper bound on the XIDs stored in the pages we'll actually scan
792 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
793 : *
794 : * Next acquire vistest, a related cutoff that's used in pruning. We use
795 : * vistest in combination with OldestXmin to ensure that
796 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
797 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
798 : * whether a tuple should be frozen or removed. (In the future we might
799 : * want to teach lazy_scan_prune to recompute vistest from time to time,
800 : * to increase the number of dead tuples it can prune away.)
801 : */
802 148618 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
803 148618 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
804 148618 : vacrel->vistest = GlobalVisTestFor(rel);
805 :
806 : /* Initialize state used to track oldest extant XID/MXID */
807 148618 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
808 148618 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
809 :
810 : /*
811 : * Initialize state related to tracking all-visible page skipping. This is
812 : * very important to determine whether or not it is safe to advance the
813 : * relfrozenxid/relminmxid.
814 : */
815 148618 : vacrel->skippedallvis = false;
816 148618 : skipwithvm = true;
817 148618 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
818 : {
819 : /*
820 : * Force aggressive mode, and disable skipping blocks using the
821 : * visibility map (even those set all-frozen)
822 : */
823 344 : vacrel->aggressive = true;
824 344 : skipwithvm = false;
825 : }
826 :
827 148618 : vacrel->skipwithvm = skipwithvm;
828 :
829 : /*
830 : * Set up eager scan tracking state. This must happen after determining
831 : * whether or not the vacuum must be aggressive, because only normal
832 : * vacuums use the eager scan algorithm.
833 : */
834 148618 : heap_vacuum_eager_scan_setup(vacrel, params);
835 :
836 : /* Report the vacuum mode: 'normal' or 'aggressive' */
837 148618 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
838 148618 : vacrel->aggressive
839 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
840 : : PROGRESS_VACUUM_MODE_NORMAL);
841 :
842 148618 : if (verbose)
843 : {
844 24 : if (vacrel->aggressive)
845 2 : ereport(INFO,
846 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
847 : vacrel->dbname, vacrel->relnamespace,
848 : vacrel->relname)));
849 : else
850 22 : ereport(INFO,
851 : (errmsg("vacuuming \"%s.%s.%s\"",
852 : vacrel->dbname, vacrel->relnamespace,
853 : vacrel->relname)));
854 : }
855 :
856 : /*
857 : * Allocate dead_items memory using dead_items_alloc. This handles
858 : * parallel VACUUM initialization as part of allocating shared memory
859 : * space used for dead_items. (But do a failsafe precheck first, to
860 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
861 : * is already dangerously old.)
862 : */
863 148618 : lazy_check_wraparound_failsafe(vacrel);
864 148618 : dead_items_alloc(vacrel, params.nworkers);
865 :
866 : /*
867 : * Call lazy_scan_heap to perform all required heap pruning, index
868 : * vacuuming, and heap vacuuming (plus related processing)
869 : */
870 148618 : lazy_scan_heap(vacrel);
871 :
872 : /*
873 : * Save dead items max_bytes and update the memory usage statistics before
874 : * cleanup, they are freed in parallel vacuum cases during
875 : * dead_items_cleanup().
876 : */
877 148618 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
878 148618 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
879 :
880 : /*
881 : * Free resources managed by dead_items_alloc. This ends parallel mode in
882 : * passing when necessary.
883 : */
884 148618 : dead_items_cleanup(vacrel);
885 : Assert(!IsInParallelMode());
886 :
887 : /*
888 : * Update pg_class entries for each of rel's indexes where appropriate.
889 : *
890 : * Unlike the later update to rel's pg_class entry, this is not critical.
891 : * Maintains relpages/reltuples statistics used by the planner only.
892 : */
893 148618 : if (vacrel->do_index_cleanup)
894 112280 : update_relstats_all_indexes(vacrel);
895 :
896 : /* Done with rel's indexes */
897 148618 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
898 :
899 : /* Optionally truncate rel */
900 148618 : if (should_attempt_truncation(vacrel))
901 294 : lazy_truncate_heap(vacrel);
902 :
903 : /* Pop the error context stack */
904 148618 : error_context_stack = errcallback.previous;
905 :
906 : /* Report that we are now doing final cleanup */
907 148618 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
908 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
909 :
910 : /*
911 : * Prepare to update rel's pg_class entry.
912 : *
913 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
914 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
915 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
916 : */
917 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
918 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
919 : vacrel->cutoffs.relfrozenxid,
920 : vacrel->NewRelfrozenXid));
921 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
922 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
923 : vacrel->cutoffs.relminmxid,
924 : vacrel->NewRelminMxid));
925 148618 : if (vacrel->skippedallvis)
926 : {
927 : /*
928 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
929 : * chose to skip an all-visible page range. The state that tracks new
930 : * values will have missed unfrozen XIDs from the pages we skipped.
931 : */
932 : Assert(!vacrel->aggressive);
933 58 : vacrel->NewRelfrozenXid = InvalidTransactionId;
934 58 : vacrel->NewRelminMxid = InvalidMultiXactId;
935 : }
936 :
937 : /*
938 : * For safety, clamp relallvisible to be not more than what we're setting
939 : * pg_class.relpages to
940 : */
941 148618 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
942 148618 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
943 148618 : if (new_rel_allvisible > new_rel_pages)
944 0 : new_rel_allvisible = new_rel_pages;
945 :
946 : /*
947 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
948 : * all-frozen blocks to the count of all-visible blocks. This matches the
949 : * clamping of relallvisible above.
950 : */
951 148618 : if (new_rel_allfrozen > new_rel_allvisible)
952 0 : new_rel_allfrozen = new_rel_allvisible;
953 :
954 : /*
955 : * Now actually update rel's pg_class entry.
956 : *
957 : * In principle new_live_tuples could be -1 indicating that we (still)
958 : * don't know the tuple count. In practice that can't happen, since we
959 : * scan every page that isn't skipped using the visibility map.
960 : */
961 148618 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
962 : new_rel_allvisible, new_rel_allfrozen,
963 148618 : vacrel->nindexes > 0,
964 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
965 : &frozenxid_updated, &minmulti_updated, false);
966 :
967 : /*
968 : * Report results to the cumulative stats system, too.
969 : *
970 : * Deliberately avoid telling the stats system about LP_DEAD items that
971 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
972 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
973 : * It seems like a good idea to err on the side of not vacuuming again too
974 : * soon in cases where the failsafe prevented significant amounts of heap
975 : * vacuuming.
976 : */
977 148616 : pgstat_report_vacuum(rel,
978 59610 : Max(vacrel->new_live_tuples, 0),
979 148616 : vacrel->recently_dead_tuples +
980 148616 : vacrel->missed_dead_tuples,
981 : starttime);
982 148616 : pgstat_progress_end_command();
983 :
984 148616 : if (instrument)
985 : {
986 121134 : TimestampTz endtime = GetCurrentTimestamp();
987 :
988 121246 : if (verbose || params.log_vacuum_min_duration == 0 ||
989 112 : TimestampDifferenceExceeds(starttime, endtime,
990 112 : params.log_vacuum_min_duration))
991 : {
992 : long secs_dur;
993 : int usecs_dur;
994 : WalUsage walusage;
995 : BufferUsage bufferusage;
996 : StringInfoData buf;
997 : char *msgfmt;
998 : int32 diff;
999 121022 : double read_rate = 0,
1000 121022 : write_rate = 0;
1001 : int64 total_blks_hit;
1002 : int64 total_blks_read;
1003 : int64 total_blks_dirtied;
1004 :
1005 121022 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1006 121022 : memset(&walusage, 0, sizeof(WalUsage));
1007 121022 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1008 121022 : memset(&bufferusage, 0, sizeof(BufferUsage));
1009 121022 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1010 :
1011 121022 : total_blks_hit = bufferusage.shared_blks_hit +
1012 121022 : bufferusage.local_blks_hit;
1013 121022 : total_blks_read = bufferusage.shared_blks_read +
1014 121022 : bufferusage.local_blks_read;
1015 121022 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1016 121022 : bufferusage.local_blks_dirtied;
1017 :
1018 121022 : initStringInfo(&buf);
1019 121022 : if (verbose)
1020 : {
1021 : /*
1022 : * Aggressiveness already reported earlier, in dedicated
1023 : * VACUUM VERBOSE ereport
1024 : */
1025 : Assert(!params.is_wraparound);
1026 24 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1027 : }
1028 120998 : else if (params.is_wraparound)
1029 : {
1030 : /*
1031 : * While it's possible for a VACUUM to be both is_wraparound
1032 : * and !aggressive, that's just a corner-case -- is_wraparound
1033 : * implies aggressive. Produce distinct output for the corner
1034 : * case all the same, just in case.
1035 : */
1036 120960 : if (vacrel->aggressive)
1037 120950 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1038 : else
1039 10 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1040 : }
1041 : else
1042 : {
1043 38 : if (vacrel->aggressive)
1044 30 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1045 : else
1046 8 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1047 : }
1048 121022 : appendStringInfo(&buf, msgfmt,
1049 : vacrel->dbname,
1050 : vacrel->relnamespace,
1051 : vacrel->relname,
1052 : vacrel->num_index_scans);
1053 169240 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1054 : vacrel->removed_pages,
1055 : new_rel_pages,
1056 : vacrel->scanned_pages,
1057 : orig_rel_pages == 0 ? 100.0 :
1058 48218 : 100.0 * vacrel->scanned_pages /
1059 : orig_rel_pages,
1060 : vacrel->eager_scanned_pages);
1061 121022 : appendStringInfo(&buf,
1062 121022 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1063 : vacrel->tuples_deleted,
1064 121022 : (int64) vacrel->new_rel_tuples,
1065 : vacrel->recently_dead_tuples);
1066 121022 : if (vacrel->missed_dead_tuples > 0)
1067 0 : appendStringInfo(&buf,
1068 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1069 : vacrel->missed_dead_tuples,
1070 : vacrel->missed_dead_pages);
1071 121022 : diff = (int32) (ReadNextTransactionId() -
1072 121022 : vacrel->cutoffs.OldestXmin);
1073 121022 : appendStringInfo(&buf,
1074 121022 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1075 : vacrel->cutoffs.OldestXmin, diff);
1076 121022 : if (frozenxid_updated)
1077 : {
1078 38664 : diff = (int32) (vacrel->NewRelfrozenXid -
1079 38664 : vacrel->cutoffs.relfrozenxid);
1080 38664 : appendStringInfo(&buf,
1081 38664 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1082 : vacrel->NewRelfrozenXid, diff);
1083 : }
1084 121022 : if (minmulti_updated)
1085 : {
1086 8 : diff = (int32) (vacrel->NewRelminMxid -
1087 8 : vacrel->cutoffs.relminmxid);
1088 8 : appendStringInfo(&buf,
1089 8 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1090 : vacrel->NewRelminMxid, diff);
1091 : }
1092 169240 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1093 : vacrel->new_frozen_tuple_pages,
1094 : orig_rel_pages == 0 ? 100.0 :
1095 48218 : 100.0 * vacrel->new_frozen_tuple_pages /
1096 : orig_rel_pages,
1097 : vacrel->tuples_frozen);
1098 :
1099 121022 : appendStringInfo(&buf,
1100 121022 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1101 : vacrel->vm_new_visible_pages,
1102 121022 : vacrel->vm_new_visible_frozen_pages +
1103 121022 : vacrel->vm_new_frozen_pages,
1104 : vacrel->vm_new_frozen_pages);
1105 121022 : if (vacrel->do_index_vacuuming)
1106 : {
1107 85162 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1108 85130 : appendStringInfoString(&buf, _("index scan not needed: "));
1109 : else
1110 32 : appendStringInfoString(&buf, _("index scan needed: "));
1111 :
1112 85162 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1113 : }
1114 : else
1115 : {
1116 35860 : if (!VacuumFailsafeActive)
1117 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1118 : else
1119 35860 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1120 :
1121 35860 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1122 : }
1123 169240 : appendStringInfo(&buf, msgfmt,
1124 : vacrel->lpdead_item_pages,
1125 : orig_rel_pages == 0 ? 100.0 :
1126 48218 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1127 : vacrel->lpdead_items);
1128 303780 : for (int i = 0; i < vacrel->nindexes; i++)
1129 : {
1130 182758 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1131 :
1132 182758 : if (!istat)
1133 182704 : continue;
1134 :
1135 54 : appendStringInfo(&buf,
1136 54 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1137 54 : indnames[i],
1138 : istat->num_pages,
1139 : istat->pages_newly_deleted,
1140 : istat->pages_deleted,
1141 : istat->pages_free);
1142 : }
1143 121022 : if (track_cost_delay_timing)
1144 : {
1145 : /*
1146 : * We bypass the changecount mechanism because this value is
1147 : * only updated by the calling process. We also rely on the
1148 : * above call to pgstat_progress_end_command() to not clear
1149 : * the st_progress_param array.
1150 : */
1151 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1152 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1153 : }
1154 121022 : if (track_io_timing)
1155 : {
1156 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1157 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1158 :
1159 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1160 : read_ms, write_ms);
1161 : }
1162 121022 : if (secs_dur > 0 || usecs_dur > 0)
1163 : {
1164 121022 : read_rate = (double) BLCKSZ * total_blks_read /
1165 121022 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1166 121022 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1167 121022 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1168 : }
1169 121022 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1170 : read_rate, write_rate);
1171 121022 : appendStringInfo(&buf,
1172 121022 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1173 : total_blks_hit,
1174 : total_blks_read,
1175 : total_blks_dirtied);
1176 121022 : appendStringInfo(&buf,
1177 121022 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1178 : walusage.wal_records,
1179 : walusage.wal_fpi,
1180 : walusage.wal_bytes,
1181 : walusage.wal_fpi_bytes,
1182 : walusage.wal_buffers_full);
1183 :
1184 : /*
1185 : * Report the dead items memory usage.
1186 : *
1187 : * The num_dead_items_resets counter increases when we reset the
1188 : * collected dead items, so the counter is non-zero if at least
1189 : * one dead items are collected, even if index vacuuming is
1190 : * disabled.
1191 : */
1192 121022 : appendStringInfo(&buf,
1193 121022 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1194 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1195 121022 : vacrel->num_dead_items_resets),
1196 121022 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1197 : vacrel->num_dead_items_resets,
1198 121022 : (double) dead_items_max_bytes / (1024 * 1024));
1199 121022 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1200 :
1201 121022 : ereport(verbose ? INFO : LOG,
1202 : (errmsg_internal("%s", buf.data)));
1203 121022 : pfree(buf.data);
1204 : }
1205 : }
1206 :
1207 : /* Cleanup index statistics and index names */
1208 371086 : for (int i = 0; i < vacrel->nindexes; i++)
1209 : {
1210 222470 : if (vacrel->indstats[i])
1211 2584 : pfree(vacrel->indstats[i]);
1212 :
1213 222470 : if (instrument)
1214 182972 : pfree(indnames[i]);
1215 : }
1216 148616 : }
1217 :
1218 : /*
1219 : * lazy_scan_heap() -- workhorse function for VACUUM
1220 : *
1221 : * This routine prunes each page in the heap, and considers the need to
1222 : * freeze remaining tuples with storage (not including pages that can be
1223 : * skipped using the visibility map). Also performs related maintenance
1224 : * of the FSM and visibility map. These steps all take place during an
1225 : * initial pass over the target heap relation.
1226 : *
1227 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1228 : * consists of deleting index tuples that point to LP_DEAD items left in
1229 : * heap pages following pruning. Earlier initial pass over the heap will
1230 : * have collected the TIDs whose index tuples need to be removed.
1231 : *
1232 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1233 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1234 : * as LP_UNUSED. This has to happen in a second, final pass over the
1235 : * heap, to preserve a basic invariant that all index AMs rely on: no
1236 : * extant index tuple can ever be allowed to contain a TID that points to
1237 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1238 : * recycling of line pointers to avoid index scans that get confused
1239 : * about which TID points to which tuple immediately after recycling.
1240 : * (Actually, this isn't a concern when target heap relation happens to
1241 : * have no indexes, which allows us to safely apply the one-pass strategy
1242 : * as an optimization).
1243 : *
1244 : * In practice we often have enough space to fit all TIDs, and so won't
1245 : * need to call lazy_vacuum more than once, after our initial pass over
1246 : * the heap has totally finished. Otherwise things are slightly more
1247 : * complicated: our "initial pass" over the heap applies only to those
1248 : * pages that were pruned before we needed to call lazy_vacuum, and our
1249 : * "final pass" over the heap only vacuums these same heap pages.
1250 : * However, we process indexes in full every time lazy_vacuum is called,
1251 : * which makes index processing very inefficient when memory is in short
1252 : * supply.
1253 : */
1254 : static void
1255 148618 : lazy_scan_heap(LVRelState *vacrel)
1256 : {
1257 : ReadStream *stream;
1258 148618 : BlockNumber rel_pages = vacrel->rel_pages,
1259 148618 : blkno = 0,
1260 148618 : next_fsm_block_to_vacuum = 0;
1261 148618 : BlockNumber orig_eager_scan_success_limit =
1262 : vacrel->eager_scan_remaining_successes; /* for logging */
1263 148618 : Buffer vmbuffer = InvalidBuffer;
1264 148618 : const int initprog_index[] = {
1265 : PROGRESS_VACUUM_PHASE,
1266 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1267 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1268 : };
1269 : int64 initprog_val[3];
1270 :
1271 : /* Report that we're scanning the heap, advertising total # of blocks */
1272 148618 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1273 148618 : initprog_val[1] = rel_pages;
1274 148618 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1275 148618 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1276 :
1277 : /* Initialize for the first heap_vac_scan_next_block() call */
1278 148618 : vacrel->current_block = InvalidBlockNumber;
1279 148618 : vacrel->next_unskippable_block = InvalidBlockNumber;
1280 148618 : vacrel->next_unskippable_allvis = false;
1281 148618 : vacrel->next_unskippable_eager_scanned = false;
1282 148618 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1283 :
1284 : /*
1285 : * Set up the read stream for vacuum's first pass through the heap.
1286 : *
1287 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1288 : * explicit work in heap_vac_scan_next_block.
1289 : */
1290 148618 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1291 : vacrel->bstrategy,
1292 : vacrel->rel,
1293 : MAIN_FORKNUM,
1294 : heap_vac_scan_next_block,
1295 : vacrel,
1296 : sizeof(uint8));
1297 :
1298 : while (true)
1299 571500 : {
1300 : Buffer buf;
1301 : Page page;
1302 720118 : uint8 blk_info = 0;
1303 720118 : int ndeleted = 0;
1304 : bool has_lpdead_items;
1305 720118 : void *per_buffer_data = NULL;
1306 720118 : bool vm_page_frozen = false;
1307 720118 : bool got_cleanup_lock = false;
1308 :
1309 720118 : vacuum_delay_point(false);
1310 :
1311 : /*
1312 : * Regularly check if wraparound failsafe should trigger.
1313 : *
1314 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1315 : * relfrozenxid might start to look dangerously old before we reach
1316 : * that point. This check also provides failsafe coverage for the
1317 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1318 : * param set to 'off'.
1319 : */
1320 720118 : if (vacrel->scanned_pages > 0 &&
1321 571500 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1322 0 : lazy_check_wraparound_failsafe(vacrel);
1323 :
1324 : /*
1325 : * Consider if we definitely have enough space to process TIDs on page
1326 : * already. If we are close to overrunning the available space for
1327 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1328 : * this page. However, let's force at least one page-worth of tuples
1329 : * to be stored as to ensure we do at least some work when the memory
1330 : * configured is so low that we run out before storing anything.
1331 : */
1332 720118 : if (vacrel->dead_items_info->num_items > 0 &&
1333 46390 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1334 : {
1335 : /*
1336 : * Before beginning index vacuuming, we release any pin we may
1337 : * hold on the visibility map page. This isn't necessary for
1338 : * correctness, but we do it anyway to avoid holding the pin
1339 : * across a lengthy, unrelated operation.
1340 : */
1341 4 : if (BufferIsValid(vmbuffer))
1342 : {
1343 4 : ReleaseBuffer(vmbuffer);
1344 4 : vmbuffer = InvalidBuffer;
1345 : }
1346 :
1347 : /* Perform a round of index and heap vacuuming */
1348 4 : vacrel->consider_bypass_optimization = false;
1349 4 : lazy_vacuum(vacrel);
1350 :
1351 : /*
1352 : * Vacuum the Free Space Map to make newly-freed space visible on
1353 : * upper-level FSM pages. Note that blkno is the previously
1354 : * processed block.
1355 : */
1356 4 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1357 : blkno + 1);
1358 4 : next_fsm_block_to_vacuum = blkno;
1359 :
1360 : /* Report that we are once again scanning the heap */
1361 4 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1362 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1363 : }
1364 :
1365 720118 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1366 :
1367 : /* The relation is exhausted. */
1368 720118 : if (!BufferIsValid(buf))
1369 148618 : break;
1370 :
1371 571500 : blk_info = *((uint8 *) per_buffer_data);
1372 571500 : CheckBufferIsPinnedOnce(buf);
1373 571500 : page = BufferGetPage(buf);
1374 571500 : blkno = BufferGetBlockNumber(buf);
1375 :
1376 571500 : vacrel->scanned_pages++;
1377 571500 : if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
1378 0 : vacrel->eager_scanned_pages++;
1379 :
1380 : /* Report as block scanned, update error traceback information */
1381 571500 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1382 571500 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1383 : blkno, InvalidOffsetNumber);
1384 :
1385 : /*
1386 : * Pin the visibility map page in case we need to mark the page
1387 : * all-visible. In most cases this will be very cheap, because we'll
1388 : * already have the correct page pinned anyway.
1389 : */
1390 571500 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1391 :
1392 : /*
1393 : * We need a buffer cleanup lock to prune HOT chains and defragment
1394 : * the page in lazy_scan_prune. But when it's not possible to acquire
1395 : * a cleanup lock right away, we may be able to settle for reduced
1396 : * processing using lazy_scan_noprune.
1397 : */
1398 571500 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1399 :
1400 571500 : if (!got_cleanup_lock)
1401 72 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1402 :
1403 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1404 571500 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1405 571500 : vmbuffer))
1406 : {
1407 : /* Processed as new/empty page (lock and pin released) */
1408 1224 : continue;
1409 : }
1410 :
1411 : /*
1412 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1413 : * items in the dead_items area for later vacuuming, count live and
1414 : * recently dead tuples for vacuum logging, and determine if this
1415 : * block could later be truncated. If we encounter any xid/mxids that
1416 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1417 : * for a cleanup lock and call lazy_scan_prune().
1418 : */
1419 570276 : if (!got_cleanup_lock &&
1420 72 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1421 : {
1422 : /*
1423 : * lazy_scan_noprune could not do all required processing. Wait
1424 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1425 : */
1426 : Assert(vacrel->aggressive);
1427 16 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1428 16 : LockBufferForCleanup(buf);
1429 16 : got_cleanup_lock = true;
1430 : }
1431 :
1432 : /*
1433 : * If we have a cleanup lock, we must now prune, freeze, and count
1434 : * tuples. We may have acquired the cleanup lock originally, or we may
1435 : * have gone back and acquired it after lazy_scan_noprune() returned
1436 : * false. Either way, the page hasn't been processed yet.
1437 : *
1438 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1439 : * recently_dead_tuples and live tuples for vacuum logging, determine
1440 : * if the block can later be truncated, and accumulate the details of
1441 : * remaining LP_DEAD line pointers on the page into dead_items. These
1442 : * dead items include those pruned by lazy_scan_prune() as well as
1443 : * line pointers previously marked LP_DEAD.
1444 : */
1445 570276 : if (got_cleanup_lock)
1446 570220 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1447 : vmbuffer,
1448 570220 : blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
1449 : &has_lpdead_items, &vm_page_frozen);
1450 :
1451 : /*
1452 : * Count an eagerly scanned page as a failure or a success.
1453 : *
1454 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1455 : * cleanup lock, we won't have frozen the page. However, we only count
1456 : * pages that were too new to require freezing as eager freeze
1457 : * failures.
1458 : *
1459 : * We could gather more information from lazy_scan_noprune() about
1460 : * whether or not there were tuples with XIDs or MXIDs older than the
1461 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1462 : * exclude pages skipped due to cleanup lock contention from eager
1463 : * freeze algorithm caps.
1464 : */
1465 570276 : if (got_cleanup_lock &&
1466 570220 : (blk_info & VAC_BLK_WAS_EAGER_SCANNED))
1467 : {
1468 : /* Aggressive vacuums do not eager scan. */
1469 : Assert(!vacrel->aggressive);
1470 :
1471 0 : if (vm_page_frozen)
1472 : {
1473 0 : if (vacrel->eager_scan_remaining_successes > 0)
1474 0 : vacrel->eager_scan_remaining_successes--;
1475 :
1476 0 : if (vacrel->eager_scan_remaining_successes == 0)
1477 : {
1478 : /*
1479 : * Report only once that we disabled eager scanning. We
1480 : * may eagerly read ahead blocks in excess of the success
1481 : * or failure caps before attempting to freeze them, so we
1482 : * could reach here even after disabling additional eager
1483 : * scanning.
1484 : */
1485 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1486 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1487 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1488 : orig_eager_scan_success_limit,
1489 : vacrel->dbname, vacrel->relnamespace,
1490 : vacrel->relname)));
1491 :
1492 : /*
1493 : * If we hit our success cap, permanently disable eager
1494 : * scanning by setting the other eager scan management
1495 : * fields to their disabled values.
1496 : */
1497 0 : vacrel->eager_scan_remaining_fails = 0;
1498 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1499 0 : vacrel->eager_scan_max_fails_per_region = 0;
1500 : }
1501 : }
1502 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1503 0 : vacrel->eager_scan_remaining_fails--;
1504 : }
1505 :
1506 : /*
1507 : * Now drop the buffer lock and, potentially, update the FSM.
1508 : *
1509 : * Our goal is to update the freespace map the last time we touch the
1510 : * page. If we'll process a block in the second pass, we may free up
1511 : * additional space on the page, so it is better to update the FSM
1512 : * after the second pass. If the relation has no indexes, or if index
1513 : * vacuuming is disabled, there will be no second heap pass; if this
1514 : * particular page has no dead items, the second heap pass will not
1515 : * touch this page. So, in those cases, update the FSM now.
1516 : *
1517 : * Note: In corner cases, it's possible to miss updating the FSM
1518 : * entirely. If index vacuuming is currently enabled, we'll skip the
1519 : * FSM update now. But if failsafe mode is later activated, or there
1520 : * are so few dead tuples that index vacuuming is bypassed, there will
1521 : * also be no opportunity to update the FSM later, because we'll never
1522 : * revisit this page. Since updating the FSM is desirable but not
1523 : * absolutely required, that's OK.
1524 : */
1525 570276 : if (vacrel->nindexes == 0
1526 545978 : || !vacrel->do_index_vacuuming
1527 408204 : || !has_lpdead_items)
1528 546412 : {
1529 546412 : Size freespace = PageGetHeapFreeSpace(page);
1530 :
1531 546412 : UnlockReleaseBuffer(buf);
1532 546412 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1533 :
1534 : /*
1535 : * Periodically perform FSM vacuuming to make newly-freed space
1536 : * visible on upper FSM pages. This is done after vacuuming if the
1537 : * table has indexes. There will only be newly-freed space if we
1538 : * held the cleanup lock and lazy_scan_prune() was called.
1539 : */
1540 546412 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1541 904 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1542 : {
1543 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1544 : blkno);
1545 0 : next_fsm_block_to_vacuum = blkno;
1546 : }
1547 : }
1548 : else
1549 23864 : UnlockReleaseBuffer(buf);
1550 : }
1551 :
1552 148618 : vacrel->blkno = InvalidBlockNumber;
1553 148618 : if (BufferIsValid(vmbuffer))
1554 59772 : ReleaseBuffer(vmbuffer);
1555 :
1556 : /*
1557 : * Report that everything is now scanned. We never skip scanning the last
1558 : * block in the relation, so we can pass rel_pages here.
1559 : */
1560 148618 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1561 : rel_pages);
1562 :
1563 : /* now we can compute the new value for pg_class.reltuples */
1564 297236 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1565 : vacrel->scanned_pages,
1566 148618 : vacrel->live_tuples);
1567 :
1568 : /*
1569 : * Also compute the total number of surviving heap entries. In the
1570 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1571 : */
1572 148618 : vacrel->new_rel_tuples =
1573 148618 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1574 148618 : vacrel->missed_dead_tuples;
1575 :
1576 148618 : read_stream_end(stream);
1577 :
1578 : /*
1579 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1580 : * related heap vacuuming
1581 : */
1582 148618 : if (vacrel->dead_items_info->num_items > 0)
1583 1232 : lazy_vacuum(vacrel);
1584 :
1585 : /*
1586 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1587 : * not there were indexes, and whether or not we bypassed index vacuuming.
1588 : * We can pass rel_pages here because we never skip scanning the last
1589 : * block of the relation.
1590 : */
1591 148618 : if (rel_pages > next_fsm_block_to_vacuum)
1592 59772 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1593 :
1594 : /* report all blocks vacuumed */
1595 148618 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1596 :
1597 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1598 148618 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1599 106512 : lazy_cleanup_all_indexes(vacrel);
1600 148618 : }
1601 :
1602 : /*
1603 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1604 : * for vacuum to process
1605 : *
1606 : * Every time lazy_scan_heap() needs a new block to process during its first
1607 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1608 : * heap_vac_scan_next_block() to get the next block.
1609 : *
1610 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1611 : * various thresholds to skip blocks which do not need to be processed and
1612 : * returns the next block to process or InvalidBlockNumber if there are no
1613 : * remaining blocks.
1614 : *
1615 : * The visibility status of the next block to process and whether or not it
1616 : * was eager scanned is set in the per_buffer_data.
1617 : *
1618 : * callback_private_data contains a reference to the LVRelState, passed to the
1619 : * read stream API during stream setup. The LVRelState is an in/out parameter
1620 : * here (locally named `vacrel`). Vacuum options and information about the
1621 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1622 : * that's all-visible but not all-frozen (to ensure that we don't update
1623 : * relfrozenxid in that case). vacrel also holds information about the next
1624 : * unskippable block -- as bookkeeping for this function.
1625 : */
1626 : static BlockNumber
1627 720118 : heap_vac_scan_next_block(ReadStream *stream,
1628 : void *callback_private_data,
1629 : void *per_buffer_data)
1630 : {
1631 : BlockNumber next_block;
1632 720118 : LVRelState *vacrel = callback_private_data;
1633 720118 : uint8 blk_info = 0;
1634 :
1635 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1636 720118 : next_block = vacrel->current_block + 1;
1637 :
1638 : /* Have we reached the end of the relation? */
1639 720118 : if (next_block >= vacrel->rel_pages)
1640 : {
1641 148618 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1642 : {
1643 56818 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1644 56818 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1645 : }
1646 148618 : return InvalidBlockNumber;
1647 : }
1648 :
1649 : /*
1650 : * We must be in one of the three following states:
1651 : */
1652 571500 : if (next_block > vacrel->next_unskippable_block ||
1653 233100 : vacrel->next_unskippable_block == InvalidBlockNumber)
1654 : {
1655 : /*
1656 : * 1. We have just processed an unskippable block (or we're at the
1657 : * beginning of the scan). Find the next unskippable block using the
1658 : * visibility map.
1659 : */
1660 : bool skipsallvis;
1661 :
1662 398172 : find_next_unskippable_block(vacrel, &skipsallvis);
1663 :
1664 : /*
1665 : * We now know the next block that we must process. It can be the
1666 : * next block after the one we just processed, or something further
1667 : * ahead. If it's further ahead, we can jump to it, but we choose to
1668 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1669 : * pages. Since we're reading sequentially, the OS should be doing
1670 : * readahead for us, so there's no gain in skipping a page now and
1671 : * then. Skipping such a range might even discourage sequential
1672 : * detection.
1673 : *
1674 : * This test also enables more frequent relfrozenxid advancement
1675 : * during non-aggressive VACUUMs. If the range has any all-visible
1676 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1677 : * real downside.
1678 : */
1679 398172 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1680 : {
1681 5138 : next_block = vacrel->next_unskippable_block;
1682 5138 : if (skipsallvis)
1683 60 : vacrel->skippedallvis = true;
1684 : }
1685 : }
1686 :
1687 : /* Now we must be in one of the two remaining states: */
1688 571500 : if (next_block < vacrel->next_unskippable_block)
1689 : {
1690 : /*
1691 : * 2. We are processing a range of blocks that we could have skipped
1692 : * but chose not to. We know that they are all-visible in the VM,
1693 : * otherwise they would've been unskippable.
1694 : */
1695 173328 : vacrel->current_block = next_block;
1696 173328 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1697 173328 : *((uint8 *) per_buffer_data) = blk_info;
1698 173328 : return vacrel->current_block;
1699 : }
1700 : else
1701 : {
1702 : /*
1703 : * 3. We reached the next unskippable block. Process it. On next
1704 : * iteration, we will be back in state 1.
1705 : */
1706 : Assert(next_block == vacrel->next_unskippable_block);
1707 :
1708 398172 : vacrel->current_block = next_block;
1709 398172 : if (vacrel->next_unskippable_allvis)
1710 52754 : blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
1711 398172 : if (vacrel->next_unskippable_eager_scanned)
1712 0 : blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
1713 398172 : *((uint8 *) per_buffer_data) = blk_info;
1714 398172 : return vacrel->current_block;
1715 : }
1716 : }
1717 :
1718 : /*
1719 : * Find the next unskippable block in a vacuum scan using the visibility map.
1720 : * The next unskippable block and its visibility information is updated in
1721 : * vacrel.
1722 : *
1723 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1724 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1725 : * was concurrently cleared, though. All that matters is that caller scan all
1726 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1727 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1728 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1729 : * to skip such a range is actually made, making everything safe.)
1730 : */
1731 : static void
1732 398172 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1733 : {
1734 398172 : BlockNumber rel_pages = vacrel->rel_pages;
1735 398172 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1736 398172 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1737 398172 : bool next_unskippable_eager_scanned = false;
1738 : bool next_unskippable_allvis;
1739 :
1740 398172 : *skipsallvis = false;
1741 :
1742 539902 : for (;; next_unskippable_block++)
1743 539902 : {
1744 938074 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1745 : next_unskippable_block,
1746 : &next_unskippable_vmbuffer);
1747 :
1748 938074 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1749 :
1750 : /*
1751 : * At the start of each eager scan region, normal vacuums with eager
1752 : * scanning enabled reset the failure counter, allowing vacuum to
1753 : * resume eager scanning if it had been suspended in the previous
1754 : * region.
1755 : */
1756 938074 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1757 : {
1758 0 : vacrel->eager_scan_remaining_fails =
1759 0 : vacrel->eager_scan_max_fails_per_region;
1760 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1761 : }
1762 :
1763 : /*
1764 : * A block is unskippable if it is not all visible according to the
1765 : * visibility map.
1766 : */
1767 938074 : if (!next_unskippable_allvis)
1768 : {
1769 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1770 345418 : break;
1771 : }
1772 :
1773 : /*
1774 : * Caller must scan the last page to determine whether it has tuples
1775 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1776 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1777 : * lock on rel to attempt a truncation that fails anyway, just because
1778 : * there are tuples on the last page (it is likely that there will be
1779 : * tuples on other nearby pages as well, but those can be skipped).
1780 : *
1781 : * Implement this by always treating the last block as unsafe to skip.
1782 : */
1783 592656 : if (next_unskippable_block == rel_pages - 1)
1784 51944 : break;
1785 :
1786 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1787 540712 : if (!vacrel->skipwithvm)
1788 810 : break;
1789 :
1790 : /*
1791 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1792 : * already frozen by now), so this page can be skipped.
1793 : */
1794 539902 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1795 534036 : continue;
1796 :
1797 : /*
1798 : * Aggressive vacuums cannot skip any all-visible pages that are not
1799 : * also all-frozen.
1800 : */
1801 5866 : if (vacrel->aggressive)
1802 0 : break;
1803 :
1804 : /*
1805 : * Normal vacuums with eager scanning enabled only skip all-visible
1806 : * but not all-frozen pages if they have hit the failure limit for the
1807 : * current eager scan region.
1808 : */
1809 5866 : if (vacrel->eager_scan_remaining_fails > 0)
1810 : {
1811 0 : next_unskippable_eager_scanned = true;
1812 0 : break;
1813 : }
1814 :
1815 : /*
1816 : * All-visible blocks are safe to skip in a normal vacuum. But
1817 : * remember that the final range contains such a block for later.
1818 : */
1819 5866 : *skipsallvis = true;
1820 : }
1821 :
1822 : /* write the local variables back to vacrel */
1823 398172 : vacrel->next_unskippable_block = next_unskippable_block;
1824 398172 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1825 398172 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1826 398172 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1827 398172 : }
1828 :
1829 : /*
1830 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1831 : *
1832 : * Must call here to handle both new and empty pages before calling
1833 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1834 : * with new or empty pages.
1835 : *
1836 : * It's necessary to consider new pages as a special case, since the rules for
1837 : * maintaining the visibility map and FSM with empty pages are a little
1838 : * different (though new pages can be truncated away during rel truncation).
1839 : *
1840 : * Empty pages are not really a special case -- they're just heap pages that
1841 : * have no allocated tuples (including even LP_UNUSED items). You might
1842 : * wonder why we need to handle them here all the same. It's only necessary
1843 : * because of a corner-case involving a hard crash during heap relation
1844 : * extension. If we ever make relation-extension crash safe, then it should
1845 : * no longer be necessary to deal with empty pages here (or new pages, for
1846 : * that matter).
1847 : *
1848 : * Caller must hold at least a shared lock. We might need to escalate the
1849 : * lock in that case, so the type of lock caller holds needs to be specified
1850 : * using 'sharelock' argument.
1851 : *
1852 : * Returns false in common case where caller should go on to call
1853 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1854 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1855 : * behalf.
1856 : *
1857 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1858 : * is passed here because neither empty nor new pages can be eagerly frozen.
1859 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1860 : * the same time that they are set all-visible, and we don't eagerly scan
1861 : * frozen pages.
1862 : */
1863 : static bool
1864 571500 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1865 : Page page, bool sharelock, Buffer vmbuffer)
1866 : {
1867 : Size freespace;
1868 :
1869 571500 : if (PageIsNew(page))
1870 : {
1871 : /*
1872 : * All-zeroes pages can be left over if either a backend extends the
1873 : * relation by a single page, but crashes before the newly initialized
1874 : * page has been written out, or when bulk-extending the relation
1875 : * (which creates a number of empty pages at the tail end of the
1876 : * relation), and then enters them into the FSM.
1877 : *
1878 : * Note we do not enter the page into the visibilitymap. That has the
1879 : * downside that we repeatedly visit this page in subsequent vacuums,
1880 : * but otherwise we'll never discover the space on a promoted standby.
1881 : * The harm of repeated checking ought to normally not be too bad. The
1882 : * space usually should be used at some point, otherwise there
1883 : * wouldn't be any regular vacuums.
1884 : *
1885 : * Make sure these pages are in the FSM, to ensure they can be reused.
1886 : * Do that by testing if there's any space recorded for the page. If
1887 : * not, enter it. We do so after releasing the lock on the heap page,
1888 : * the FSM is approximate, after all.
1889 : */
1890 1170 : UnlockReleaseBuffer(buf);
1891 :
1892 1170 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1893 : {
1894 858 : freespace = BLCKSZ - SizeOfPageHeaderData;
1895 :
1896 858 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1897 : }
1898 :
1899 1170 : return true;
1900 : }
1901 :
1902 570330 : if (PageIsEmpty(page))
1903 : {
1904 : /*
1905 : * It seems likely that caller will always be able to get a cleanup
1906 : * lock on an empty page. But don't take any chances -- escalate to
1907 : * an exclusive lock (still don't need a cleanup lock, though).
1908 : */
1909 54 : if (sharelock)
1910 : {
1911 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1912 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1913 :
1914 0 : if (!PageIsEmpty(page))
1915 : {
1916 : /* page isn't new or empty -- keep lock and pin for now */
1917 0 : return false;
1918 : }
1919 : }
1920 : else
1921 : {
1922 : /* Already have a full cleanup lock (which is more than enough) */
1923 : }
1924 :
1925 : /*
1926 : * Unlike new pages, empty pages are always set all-visible and
1927 : * all-frozen.
1928 : */
1929 54 : if (!PageIsAllVisible(page))
1930 : {
1931 0 : START_CRIT_SECTION();
1932 :
1933 : /* mark buffer dirty before writing a WAL record */
1934 0 : MarkBufferDirty(buf);
1935 :
1936 : /*
1937 : * It's possible that another backend has extended the heap,
1938 : * initialized the page, and then failed to WAL-log the page due
1939 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1940 : * might try to replay our record setting the page all-visible and
1941 : * find that the page isn't initialized, which will cause a PANIC.
1942 : * To prevent that, check whether the page has been previously
1943 : * WAL-logged, and if not, do that now.
1944 : */
1945 0 : if (RelationNeedsWAL(vacrel->rel) &&
1946 0 : !XLogRecPtrIsValid(PageGetLSN(page)))
1947 0 : log_newpage_buffer(buf, true);
1948 :
1949 0 : PageSetAllVisible(page);
1950 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1951 : InvalidXLogRecPtr,
1952 : vmbuffer, InvalidTransactionId,
1953 : VISIBILITYMAP_ALL_VISIBLE |
1954 : VISIBILITYMAP_ALL_FROZEN);
1955 0 : END_CRIT_SECTION();
1956 :
1957 : /* Count the newly all-frozen pages for logging */
1958 0 : vacrel->vm_new_visible_pages++;
1959 0 : vacrel->vm_new_visible_frozen_pages++;
1960 : }
1961 :
1962 54 : freespace = PageGetHeapFreeSpace(page);
1963 54 : UnlockReleaseBuffer(buf);
1964 54 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1965 54 : return true;
1966 : }
1967 :
1968 : /* page isn't new or empty -- keep lock and pin */
1969 570276 : return false;
1970 : }
1971 :
1972 : /* qsort comparator for sorting OffsetNumbers */
1973 : static int
1974 5022190 : cmpOffsetNumbers(const void *a, const void *b)
1975 : {
1976 5022190 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1977 : }
1978 :
1979 : /*
1980 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1981 : *
1982 : * Caller must hold pin and buffer cleanup lock on the buffer.
1983 : *
1984 : * vmbuffer is the buffer containing the VM block with visibility information
1985 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1986 : * visibility status of the heap block looked up earlier by the caller. We
1987 : * won't rely entirely on this status, as it may be out of date.
1988 : *
1989 : * *has_lpdead_items is set to true or false depending on whether, upon return
1990 : * from this function, any LP_DEAD items are still present on the page.
1991 : *
1992 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
1993 : * VM. The caller currently only uses this for determining whether an eagerly
1994 : * scanned page was successfully set all-frozen.
1995 : *
1996 : * Returns the number of tuples deleted from the page during HOT pruning.
1997 : */
1998 : static int
1999 570220 : lazy_scan_prune(LVRelState *vacrel,
2000 : Buffer buf,
2001 : BlockNumber blkno,
2002 : Page page,
2003 : Buffer vmbuffer,
2004 : bool all_visible_according_to_vm,
2005 : bool *has_lpdead_items,
2006 : bool *vm_page_frozen)
2007 : {
2008 570220 : Relation rel = vacrel->rel;
2009 : PruneFreezeResult presult;
2010 570220 : PruneFreezeParams params = {
2011 : .relation = rel,
2012 : .buffer = buf,
2013 : .reason = PRUNE_VACUUM_SCAN,
2014 : .options = HEAP_PAGE_PRUNE_FREEZE,
2015 570220 : .vistest = vacrel->vistest,
2016 570220 : .cutoffs = &vacrel->cutoffs,
2017 : };
2018 :
2019 : Assert(BufferGetBlockNumber(buf) == blkno);
2020 :
2021 : /*
2022 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2023 : *
2024 : * If the relation has no indexes, we can immediately mark would-be dead
2025 : * items LP_UNUSED.
2026 : *
2027 : * The number of tuples removed from the page is returned in
2028 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2029 : * presult.lpdead_items's final value can be thought of as the number of
2030 : * tuples that were deleted from indexes.
2031 : *
2032 : * We will update the VM after collecting LP_DEAD items and freezing
2033 : * tuples. Pruning will have determined whether or not the page is
2034 : * all-visible.
2035 : */
2036 570220 : if (vacrel->nindexes == 0)
2037 24298 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2038 :
2039 570220 : heap_page_prune_and_freeze(¶ms,
2040 : &presult,
2041 : &vacrel->offnum,
2042 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2043 :
2044 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2045 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2046 :
2047 570220 : if (presult.nfrozen > 0)
2048 : {
2049 : /*
2050 : * We don't increment the new_frozen_tuple_pages instrumentation
2051 : * counter when nfrozen == 0, since it only counts pages with newly
2052 : * frozen tuples (don't confuse that with pages newly set all-frozen
2053 : * in VM).
2054 : */
2055 45124 : vacrel->new_frozen_tuple_pages++;
2056 : }
2057 :
2058 : /*
2059 : * VACUUM will call heap_page_is_all_visible() during the second pass over
2060 : * the heap to determine all_visible and all_frozen for the page -- this
2061 : * is a specialized version of the logic from this function. Now that
2062 : * we've finished pruning and freezing, make sure that we're in total
2063 : * agreement with heap_page_is_all_visible() using an assertion.
2064 : */
2065 : #ifdef USE_ASSERT_CHECKING
2066 : if (presult.all_visible)
2067 : {
2068 : TransactionId debug_cutoff;
2069 : bool debug_all_frozen;
2070 :
2071 : Assert(presult.lpdead_items == 0);
2072 :
2073 : Assert(heap_page_is_all_visible(vacrel->rel, buf,
2074 : vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2075 : &debug_cutoff, &vacrel->offnum));
2076 :
2077 : Assert(presult.all_frozen == debug_all_frozen);
2078 :
2079 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2080 : debug_cutoff == presult.vm_conflict_horizon);
2081 : }
2082 : #endif
2083 :
2084 : /*
2085 : * Now save details of the LP_DEAD items from the page in vacrel
2086 : */
2087 570220 : if (presult.lpdead_items > 0)
2088 : {
2089 28462 : vacrel->lpdead_item_pages++;
2090 :
2091 : /*
2092 : * deadoffsets are collected incrementally in
2093 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2094 : * with an indeterminate order, but dead_items_add requires them to be
2095 : * sorted.
2096 : */
2097 28462 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2098 : cmpOffsetNumbers);
2099 :
2100 28462 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2101 : }
2102 :
2103 : /* Finally, add page-local counts to whole-VACUUM counts */
2104 570220 : vacrel->tuples_deleted += presult.ndeleted;
2105 570220 : vacrel->tuples_frozen += presult.nfrozen;
2106 570220 : vacrel->lpdead_items += presult.lpdead_items;
2107 570220 : vacrel->live_tuples += presult.live_tuples;
2108 570220 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2109 :
2110 : /* Can't truncate this page */
2111 570220 : if (presult.hastup)
2112 555938 : vacrel->nonempty_pages = blkno + 1;
2113 :
2114 : /* Did we find LP_DEAD items? */
2115 570220 : *has_lpdead_items = (presult.lpdead_items > 0);
2116 :
2117 : Assert(!presult.all_visible || !(*has_lpdead_items));
2118 : Assert(!presult.all_frozen || presult.all_visible);
2119 :
2120 : /*
2121 : * Handle setting visibility map bit based on information from the VM (as
2122 : * of last heap_vac_scan_next_block() call), and from all_visible and
2123 : * all_frozen variables
2124 : */
2125 570220 : if (!all_visible_according_to_vm && presult.all_visible)
2126 71582 : {
2127 : uint8 old_vmbits;
2128 71582 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2129 :
2130 71582 : if (presult.all_frozen)
2131 : {
2132 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2133 53520 : flags |= VISIBILITYMAP_ALL_FROZEN;
2134 : }
2135 :
2136 : /*
2137 : * It should never be the case that the visibility map page is set
2138 : * while the page-level bit is clear, but the reverse is allowed (if
2139 : * checksums are not enabled). Regardless, set both bits so that we
2140 : * get back in sync.
2141 : *
2142 : * NB: If the heap page is all-visible but the VM bit is not set, we
2143 : * don't need to dirty the heap page. However, if checksums are
2144 : * enabled, we do need to make sure that the heap page is dirtied
2145 : * before passing it to visibilitymap_set(), because it may be logged.
2146 : * Given that this situation should only happen in rare cases after a
2147 : * crash, it is not worth optimizing.
2148 : */
2149 71582 : PageSetAllVisible(page);
2150 71582 : MarkBufferDirty(buf);
2151 71582 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2152 : InvalidXLogRecPtr,
2153 : vmbuffer, presult.vm_conflict_horizon,
2154 : flags);
2155 :
2156 : /*
2157 : * If the page wasn't already set all-visible and/or all-frozen in the
2158 : * VM, count it as newly set for logging.
2159 : */
2160 71582 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2161 : {
2162 71582 : vacrel->vm_new_visible_pages++;
2163 71582 : if (presult.all_frozen)
2164 : {
2165 53520 : vacrel->vm_new_visible_frozen_pages++;
2166 53520 : *vm_page_frozen = true;
2167 : }
2168 : }
2169 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2170 0 : presult.all_frozen)
2171 : {
2172 0 : vacrel->vm_new_frozen_pages++;
2173 0 : *vm_page_frozen = true;
2174 : }
2175 : }
2176 :
2177 : /*
2178 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
2179 : * page-level bit is clear. However, it's possible that the bit got
2180 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
2181 : * with buffer lock before concluding that the VM is corrupt.
2182 : */
2183 498638 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
2184 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
2185 : {
2186 0 : ereport(WARNING,
2187 : (errcode(ERRCODE_DATA_CORRUPTED),
2188 : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2189 : vacrel->relname, blkno)));
2190 :
2191 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2192 : VISIBILITYMAP_VALID_BITS);
2193 : }
2194 :
2195 : /*
2196 : * It's possible for the value returned by
2197 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2198 : * wrong for us to see tuples that appear to not be visible to everyone
2199 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2200 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2201 : * conservative and sometimes returns a value that's unnecessarily small,
2202 : * so if we see that contradiction it just means that the tuples that we
2203 : * think are not visible to everyone yet actually are, and the
2204 : * PD_ALL_VISIBLE flag is correct.
2205 : *
2206 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2207 : * however.
2208 : */
2209 498638 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
2210 : {
2211 0 : ereport(WARNING,
2212 : (errcode(ERRCODE_DATA_CORRUPTED),
2213 : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2214 : vacrel->relname, blkno)));
2215 :
2216 0 : PageClearAllVisible(page);
2217 0 : MarkBufferDirty(buf);
2218 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
2219 : VISIBILITYMAP_VALID_BITS);
2220 : }
2221 :
2222 : /*
2223 : * If the all-visible page is all-frozen but not marked as such yet, mark
2224 : * it as all-frozen.
2225 : */
2226 498638 : else if (all_visible_according_to_vm && presult.all_frozen &&
2227 225036 : !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
2228 : {
2229 : uint8 old_vmbits;
2230 :
2231 : /*
2232 : * Avoid relying on all_visible_according_to_vm as a proxy for the
2233 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
2234 : * stale -- even when all_visible is set
2235 : */
2236 46 : if (!PageIsAllVisible(page))
2237 : {
2238 0 : PageSetAllVisible(page);
2239 0 : MarkBufferDirty(buf);
2240 : }
2241 :
2242 : /*
2243 : * Set the page all-frozen (and all-visible) in the VM.
2244 : *
2245 : * We can pass InvalidTransactionId as our cutoff_xid, since a
2246 : * snapshotConflictHorizon sufficient to make everything safe for REDO
2247 : * was logged when the page's tuples were frozen.
2248 : */
2249 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
2250 46 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
2251 : InvalidXLogRecPtr,
2252 : vmbuffer, InvalidTransactionId,
2253 : VISIBILITYMAP_ALL_VISIBLE |
2254 : VISIBILITYMAP_ALL_FROZEN);
2255 :
2256 : /*
2257 : * The page was likely already set all-visible in the VM. However,
2258 : * there is a small chance that it was modified sometime between
2259 : * setting all_visible_according_to_vm and checking the visibility
2260 : * during pruning. Check the return value of old_vmbits anyway to
2261 : * ensure the visibility map counters used for logging are accurate.
2262 : */
2263 46 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2264 : {
2265 0 : vacrel->vm_new_visible_pages++;
2266 0 : vacrel->vm_new_visible_frozen_pages++;
2267 0 : *vm_page_frozen = true;
2268 : }
2269 :
2270 : /*
2271 : * We already checked that the page was not set all-frozen in the VM
2272 : * above, so we don't need to test the value of old_vmbits.
2273 : */
2274 : else
2275 : {
2276 46 : vacrel->vm_new_frozen_pages++;
2277 46 : *vm_page_frozen = true;
2278 : }
2279 : }
2280 :
2281 570220 : return presult.ndeleted;
2282 : }
2283 :
2284 : /*
2285 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2286 : *
2287 : * Caller need only hold a pin and share lock on the buffer, unlike
2288 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2289 : * performed here, it's quite possible that an earlier opportunistic pruning
2290 : * operation left LP_DEAD items behind. We'll at least collect any such items
2291 : * in dead_items for removal from indexes.
2292 : *
2293 : * For aggressive VACUUM callers, we may return false to indicate that a full
2294 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2295 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2296 : * one or more tuples on the page. We always return true for non-aggressive
2297 : * callers.
2298 : *
2299 : * If this function returns true, *has_lpdead_items gets set to true or false
2300 : * depending on whether, upon return from this function, any LP_DEAD items are
2301 : * present on the page. If this function returns false, *has_lpdead_items
2302 : * is not updated.
2303 : */
2304 : static bool
2305 72 : lazy_scan_noprune(LVRelState *vacrel,
2306 : Buffer buf,
2307 : BlockNumber blkno,
2308 : Page page,
2309 : bool *has_lpdead_items)
2310 : {
2311 : OffsetNumber offnum,
2312 : maxoff;
2313 : int lpdead_items,
2314 : live_tuples,
2315 : recently_dead_tuples,
2316 : missed_dead_tuples;
2317 : bool hastup;
2318 : HeapTupleHeader tupleheader;
2319 72 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2320 72 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2321 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2322 :
2323 : Assert(BufferGetBlockNumber(buf) == blkno);
2324 :
2325 72 : hastup = false; /* for now */
2326 :
2327 72 : lpdead_items = 0;
2328 72 : live_tuples = 0;
2329 72 : recently_dead_tuples = 0;
2330 72 : missed_dead_tuples = 0;
2331 :
2332 72 : maxoff = PageGetMaxOffsetNumber(page);
2333 72 : for (offnum = FirstOffsetNumber;
2334 2644 : offnum <= maxoff;
2335 2572 : offnum = OffsetNumberNext(offnum))
2336 : {
2337 : ItemId itemid;
2338 : HeapTupleData tuple;
2339 :
2340 2588 : vacrel->offnum = offnum;
2341 2588 : itemid = PageGetItemId(page, offnum);
2342 :
2343 2588 : if (!ItemIdIsUsed(itemid))
2344 200 : continue;
2345 :
2346 2460 : if (ItemIdIsRedirected(itemid))
2347 : {
2348 72 : hastup = true;
2349 72 : continue;
2350 : }
2351 :
2352 2388 : if (ItemIdIsDead(itemid))
2353 : {
2354 : /*
2355 : * Deliberately don't set hastup=true here. See same point in
2356 : * lazy_scan_prune for an explanation.
2357 : */
2358 0 : deadoffsets[lpdead_items++] = offnum;
2359 0 : continue;
2360 : }
2361 :
2362 2388 : hastup = true; /* page prevents rel truncation */
2363 2388 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2364 2388 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2365 : &NoFreezePageRelfrozenXid,
2366 : &NoFreezePageRelminMxid))
2367 : {
2368 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2369 144 : if (vacrel->aggressive)
2370 : {
2371 : /*
2372 : * Aggressive VACUUMs must always be able to advance rel's
2373 : * relfrozenxid to a value >= FreezeLimit (and be able to
2374 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2375 : * The ongoing aggressive VACUUM won't be able to do that
2376 : * unless it can freeze an XID (or MXID) from this tuple now.
2377 : *
2378 : * The only safe option is to have caller perform processing
2379 : * of this page using lazy_scan_prune. Caller might have to
2380 : * wait a while for a cleanup lock, but it can't be helped.
2381 : */
2382 16 : vacrel->offnum = InvalidOffsetNumber;
2383 16 : return false;
2384 : }
2385 :
2386 : /*
2387 : * Non-aggressive VACUUMs are under no obligation to advance
2388 : * relfrozenxid (even by one XID). We can be much laxer here.
2389 : *
2390 : * Currently we always just accept an older final relfrozenxid
2391 : * and/or relminmxid value. We never make caller wait or work a
2392 : * little harder, even when it likely makes sense to do so.
2393 : */
2394 : }
2395 :
2396 2372 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2397 2372 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2398 2372 : tuple.t_len = ItemIdGetLength(itemid);
2399 2372 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2400 :
2401 2372 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2402 : buf))
2403 : {
2404 2364 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2405 : case HEAPTUPLE_LIVE:
2406 :
2407 : /*
2408 : * Count both cases as live, just like lazy_scan_prune
2409 : */
2410 2364 : live_tuples++;
2411 :
2412 2364 : break;
2413 4 : case HEAPTUPLE_DEAD:
2414 :
2415 : /*
2416 : * There is some useful work for pruning to do, that won't be
2417 : * done due to failure to get a cleanup lock.
2418 : */
2419 4 : missed_dead_tuples++;
2420 4 : break;
2421 4 : case HEAPTUPLE_RECENTLY_DEAD:
2422 :
2423 : /*
2424 : * Count in recently_dead_tuples, just like lazy_scan_prune
2425 : */
2426 4 : recently_dead_tuples++;
2427 4 : break;
2428 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2429 :
2430 : /*
2431 : * Do not count these rows as live, just like lazy_scan_prune
2432 : */
2433 0 : break;
2434 0 : default:
2435 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2436 : break;
2437 : }
2438 : }
2439 :
2440 56 : vacrel->offnum = InvalidOffsetNumber;
2441 :
2442 : /*
2443 : * By here we know for sure that caller can put off freezing and pruning
2444 : * this particular page until the next VACUUM. Remember its details now.
2445 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2446 : */
2447 56 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2448 56 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2449 :
2450 : /* Save any LP_DEAD items found on the page in dead_items */
2451 56 : if (vacrel->nindexes == 0)
2452 : {
2453 : /* Using one-pass strategy (since table has no indexes) */
2454 0 : if (lpdead_items > 0)
2455 : {
2456 : /*
2457 : * Perfunctory handling for the corner case where a single pass
2458 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2459 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2460 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2461 : * but it beats having to maintain specialized heap vacuuming code
2462 : * forever, for vanishingly little benefit.)
2463 : */
2464 0 : hastup = true;
2465 0 : missed_dead_tuples += lpdead_items;
2466 : }
2467 : }
2468 56 : else if (lpdead_items > 0)
2469 : {
2470 : /*
2471 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2472 : * indexes will be deleted during index vacuuming (and then marked
2473 : * LP_UNUSED in the heap)
2474 : */
2475 0 : vacrel->lpdead_item_pages++;
2476 :
2477 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2478 :
2479 0 : vacrel->lpdead_items += lpdead_items;
2480 : }
2481 :
2482 : /*
2483 : * Finally, add relevant page-local counts to whole-VACUUM counts
2484 : */
2485 56 : vacrel->live_tuples += live_tuples;
2486 56 : vacrel->recently_dead_tuples += recently_dead_tuples;
2487 56 : vacrel->missed_dead_tuples += missed_dead_tuples;
2488 56 : if (missed_dead_tuples > 0)
2489 4 : vacrel->missed_dead_pages++;
2490 :
2491 : /* Can't truncate this page */
2492 56 : if (hastup)
2493 56 : vacrel->nonempty_pages = blkno + 1;
2494 :
2495 : /* Did we find LP_DEAD items? */
2496 56 : *has_lpdead_items = (lpdead_items > 0);
2497 :
2498 : /* Caller won't need to call lazy_scan_prune with same page */
2499 56 : return true;
2500 : }
2501 :
2502 : /*
2503 : * Main entry point for index vacuuming and heap vacuuming.
2504 : *
2505 : * Removes items collected in dead_items from table's indexes, then marks the
2506 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2507 : * for full details.
2508 : *
2509 : * Also empties dead_items, freeing up space for later TIDs.
2510 : *
2511 : * We may choose to bypass index vacuuming at this point, though only when the
2512 : * ongoing VACUUM operation will definitely only have one index scan/round of
2513 : * index vacuuming.
2514 : */
2515 : static void
2516 1236 : lazy_vacuum(LVRelState *vacrel)
2517 : {
2518 : bool bypass;
2519 :
2520 : /* Should not end up here with no indexes */
2521 : Assert(vacrel->nindexes > 0);
2522 : Assert(vacrel->lpdead_item_pages > 0);
2523 :
2524 1236 : if (!vacrel->do_index_vacuuming)
2525 : {
2526 : Assert(!vacrel->do_index_cleanup);
2527 18 : dead_items_reset(vacrel);
2528 18 : return;
2529 : }
2530 :
2531 : /*
2532 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2533 : *
2534 : * We currently only do this in cases where the number of LP_DEAD items
2535 : * for the entire VACUUM operation is close to zero. This avoids sharp
2536 : * discontinuities in the duration and overhead of successive VACUUM
2537 : * operations that run against the same table with a fixed workload.
2538 : * Ideally, successive VACUUM operations will behave as if there are
2539 : * exactly zero LP_DEAD items in cases where there are close to zero.
2540 : *
2541 : * This is likely to be helpful with a table that is continually affected
2542 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2543 : * have small aberrations that lead to just a few heap pages retaining
2544 : * only one or two LP_DEAD items. This is pretty common; even when the
2545 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2546 : * impossible to predict whether HOT will be applied in 100% of cases.
2547 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2548 : * HOT through careful tuning.
2549 : */
2550 1218 : bypass = false;
2551 1218 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2552 : {
2553 : BlockNumber threshold;
2554 :
2555 : Assert(vacrel->num_index_scans == 0);
2556 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2557 : Assert(vacrel->do_index_vacuuming);
2558 : Assert(vacrel->do_index_cleanup);
2559 :
2560 : /*
2561 : * This crossover point at which we'll start to do index vacuuming is
2562 : * expressed as a percentage of the total number of heap pages in the
2563 : * table that are known to have at least one LP_DEAD item. This is
2564 : * much more important than the total number of LP_DEAD items, since
2565 : * it's a proxy for the number of heap pages whose visibility map bits
2566 : * cannot be set on account of bypassing index and heap vacuuming.
2567 : *
2568 : * We apply one further precautionary test: the space currently used
2569 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2570 : * not exceed 32MB. This limits the risk that we will bypass index
2571 : * vacuuming again and again until eventually there is a VACUUM whose
2572 : * dead_items space is not CPU cache resident.
2573 : *
2574 : * We don't take any special steps to remember the LP_DEAD items (such
2575 : * as counting them in our final update to the stats system) when the
2576 : * optimization is applied. Though the accounting used in analyze.c's
2577 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2578 : * rows in its own stats report, that's okay. The discrepancy should
2579 : * be negligible. If this optimization is ever expanded to cover more
2580 : * cases then this may need to be reconsidered.
2581 : */
2582 1198 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2583 1204 : bypass = (vacrel->lpdead_item_pages < threshold &&
2584 6 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2585 : }
2586 :
2587 1218 : if (bypass)
2588 : {
2589 : /*
2590 : * There are almost zero TIDs. Behave as if there were precisely
2591 : * zero: bypass index vacuuming, but do index cleanup.
2592 : *
2593 : * We expect that the ongoing VACUUM operation will finish very
2594 : * quickly, so there is no point in considering speeding up as a
2595 : * failsafe against wraparound failure. (Index cleanup is expected to
2596 : * finish very quickly in cases where there were no ambulkdelete()
2597 : * calls.)
2598 : */
2599 6 : vacrel->do_index_vacuuming = false;
2600 : }
2601 1212 : else if (lazy_vacuum_all_indexes(vacrel))
2602 : {
2603 : /*
2604 : * We successfully completed a round of index vacuuming. Do related
2605 : * heap vacuuming now.
2606 : */
2607 1212 : lazy_vacuum_heap_rel(vacrel);
2608 : }
2609 : else
2610 : {
2611 : /*
2612 : * Failsafe case.
2613 : *
2614 : * We attempted index vacuuming, but didn't finish a full round/full
2615 : * index scan. This happens when relfrozenxid or relminmxid is too
2616 : * far in the past.
2617 : *
2618 : * From this point on the VACUUM operation will do no further index
2619 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2620 : * back here again.
2621 : */
2622 : Assert(VacuumFailsafeActive);
2623 : }
2624 :
2625 : /*
2626 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2627 : * vacuum)
2628 : */
2629 1218 : dead_items_reset(vacrel);
2630 : }
2631 :
2632 : /*
2633 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2634 : *
2635 : * Returns true in the common case when all indexes were successfully
2636 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2637 : * VACUUM operation is at risk of taking too long to finish, leading to
2638 : * wraparound failure.
2639 : */
2640 : static bool
2641 1212 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2642 : {
2643 1212 : bool allindexes = true;
2644 1212 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2645 1212 : const int progress_start_index[] = {
2646 : PROGRESS_VACUUM_PHASE,
2647 : PROGRESS_VACUUM_INDEXES_TOTAL
2648 : };
2649 1212 : const int progress_end_index[] = {
2650 : PROGRESS_VACUUM_INDEXES_TOTAL,
2651 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2652 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2653 : };
2654 : int64 progress_start_val[2];
2655 : int64 progress_end_val[3];
2656 :
2657 : Assert(vacrel->nindexes > 0);
2658 : Assert(vacrel->do_index_vacuuming);
2659 : Assert(vacrel->do_index_cleanup);
2660 :
2661 : /* Precheck for XID wraparound emergencies */
2662 1212 : if (lazy_check_wraparound_failsafe(vacrel))
2663 : {
2664 : /* Wraparound emergency -- don't even start an index scan */
2665 0 : return false;
2666 : }
2667 :
2668 : /*
2669 : * Report that we are now vacuuming indexes and the number of indexes to
2670 : * vacuum.
2671 : */
2672 1212 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2673 1212 : progress_start_val[1] = vacrel->nindexes;
2674 1212 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2675 :
2676 1212 : if (!ParallelVacuumIsActive(vacrel))
2677 : {
2678 3488 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2679 : {
2680 2288 : Relation indrel = vacrel->indrels[idx];
2681 2288 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2682 :
2683 2288 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2684 : old_live_tuples,
2685 : vacrel);
2686 :
2687 : /* Report the number of indexes vacuumed */
2688 2288 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2689 2288 : idx + 1);
2690 :
2691 2288 : if (lazy_check_wraparound_failsafe(vacrel))
2692 : {
2693 : /* Wraparound emergency -- end current index scan */
2694 0 : allindexes = false;
2695 0 : break;
2696 : }
2697 : }
2698 : }
2699 : else
2700 : {
2701 : /* Outsource everything to parallel variant */
2702 12 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2703 : vacrel->num_index_scans);
2704 :
2705 : /*
2706 : * Do a postcheck to consider applying wraparound failsafe now. Note
2707 : * that parallel VACUUM only gets the precheck and this postcheck.
2708 : */
2709 12 : if (lazy_check_wraparound_failsafe(vacrel))
2710 0 : allindexes = false;
2711 : }
2712 :
2713 : /*
2714 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2715 : * each call here (except calls where we choose to do the failsafe). This
2716 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2717 : * of the failsafe triggering, which prevents the next call from taking
2718 : * place).
2719 : */
2720 : Assert(vacrel->num_index_scans > 0 ||
2721 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2722 : Assert(allindexes || VacuumFailsafeActive);
2723 :
2724 : /*
2725 : * Increase and report the number of index scans. Also, we reset
2726 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2727 : *
2728 : * We deliberately include the case where we started a round of bulk
2729 : * deletes that we weren't able to finish due to the failsafe triggering.
2730 : */
2731 1212 : vacrel->num_index_scans++;
2732 1212 : progress_end_val[0] = 0;
2733 1212 : progress_end_val[1] = 0;
2734 1212 : progress_end_val[2] = vacrel->num_index_scans;
2735 1212 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2736 :
2737 1212 : return allindexes;
2738 : }
2739 :
2740 : /*
2741 : * Read stream callback for vacuum's third phase (second pass over the heap).
2742 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2743 : * if there are no further blocks to vacuum.
2744 : *
2745 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2746 : */
2747 : static BlockNumber
2748 25070 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2749 : void *callback_private_data,
2750 : void *per_buffer_data)
2751 : {
2752 25070 : TidStoreIter *iter = callback_private_data;
2753 : TidStoreIterResult *iter_result;
2754 :
2755 25070 : iter_result = TidStoreIterateNext(iter);
2756 25070 : if (iter_result == NULL)
2757 1212 : return InvalidBlockNumber;
2758 :
2759 : /*
2760 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2761 : * It is safe to copy the result, according to TidStoreIterateNext().
2762 : */
2763 23858 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2764 :
2765 23858 : return iter_result->blkno;
2766 : }
2767 :
2768 : /*
2769 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2770 : *
2771 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2772 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2773 : *
2774 : * We may also be able to truncate the line pointer array of the heap pages we
2775 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2776 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2777 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2778 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2779 : * page's line pointer array).
2780 : *
2781 : * Note: the reason for doing this as a second pass is we cannot remove the
2782 : * tuples until we've removed their index entries, and we want to process
2783 : * index entry removal in batches as large as possible.
2784 : */
2785 : static void
2786 1212 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2787 : {
2788 : ReadStream *stream;
2789 1212 : BlockNumber vacuumed_pages = 0;
2790 1212 : Buffer vmbuffer = InvalidBuffer;
2791 : LVSavedErrInfo saved_err_info;
2792 : TidStoreIter *iter;
2793 :
2794 : Assert(vacrel->do_index_vacuuming);
2795 : Assert(vacrel->do_index_cleanup);
2796 : Assert(vacrel->num_index_scans > 0);
2797 :
2798 : /* Report that we are now vacuuming the heap */
2799 1212 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2800 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2801 :
2802 : /* Update error traceback information */
2803 1212 : update_vacuum_error_info(vacrel, &saved_err_info,
2804 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2805 : InvalidBlockNumber, InvalidOffsetNumber);
2806 :
2807 1212 : iter = TidStoreBeginIterate(vacrel->dead_items);
2808 :
2809 : /*
2810 : * Set up the read stream for vacuum's second pass through the heap.
2811 : *
2812 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2813 : * not need to wait for IO and does not perform locking. Once we support
2814 : * parallelism it should still be fine, as presumably the holder of locks
2815 : * would never be blocked by IO while holding the lock.
2816 : */
2817 1212 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2818 : READ_STREAM_USE_BATCHING,
2819 : vacrel->bstrategy,
2820 : vacrel->rel,
2821 : MAIN_FORKNUM,
2822 : vacuum_reap_lp_read_stream_next,
2823 : iter,
2824 : sizeof(TidStoreIterResult));
2825 :
2826 : while (true)
2827 23858 : {
2828 : BlockNumber blkno;
2829 : Buffer buf;
2830 : Page page;
2831 : TidStoreIterResult *iter_result;
2832 : Size freespace;
2833 : OffsetNumber offsets[MaxOffsetNumber];
2834 : int num_offsets;
2835 :
2836 25070 : vacuum_delay_point(false);
2837 :
2838 25070 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2839 :
2840 : /* The relation is exhausted */
2841 25070 : if (!BufferIsValid(buf))
2842 1212 : break;
2843 :
2844 23858 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2845 :
2846 : Assert(iter_result);
2847 23858 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2848 : Assert(num_offsets <= lengthof(offsets));
2849 :
2850 : /*
2851 : * Pin the visibility map page in case we need to mark the page
2852 : * all-visible. In most cases this will be very cheap, because we'll
2853 : * already have the correct page pinned anyway.
2854 : */
2855 23858 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2856 :
2857 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2858 23858 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2859 23858 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2860 : num_offsets, vmbuffer);
2861 :
2862 : /* Now that we've vacuumed the page, record its available space */
2863 23858 : page = BufferGetPage(buf);
2864 23858 : freespace = PageGetHeapFreeSpace(page);
2865 :
2866 23858 : UnlockReleaseBuffer(buf);
2867 23858 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2868 23858 : vacuumed_pages++;
2869 : }
2870 :
2871 1212 : read_stream_end(stream);
2872 1212 : TidStoreEndIterate(iter);
2873 :
2874 1212 : vacrel->blkno = InvalidBlockNumber;
2875 1212 : if (BufferIsValid(vmbuffer))
2876 1212 : ReleaseBuffer(vmbuffer);
2877 :
2878 : /*
2879 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2880 : * the second heap pass. No more, no less.
2881 : */
2882 : Assert(vacrel->num_index_scans > 1 ||
2883 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2884 : vacuumed_pages == vacrel->lpdead_item_pages));
2885 :
2886 1212 : ereport(DEBUG2,
2887 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2888 : vacrel->relname, vacrel->dead_items_info->num_items,
2889 : vacuumed_pages)));
2890 :
2891 : /* Revert to the previous phase information for error traceback */
2892 1212 : restore_vacuum_error_info(vacrel, &saved_err_info);
2893 1212 : }
2894 :
2895 : /*
2896 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2897 : * vacrel->dead_items store.
2898 : *
2899 : * Caller must have an exclusive buffer lock on the buffer (though a full
2900 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2901 : * a pin on blkno's visibility map page.
2902 : */
2903 : static void
2904 23858 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2905 : OffsetNumber *deadoffsets, int num_offsets,
2906 : Buffer vmbuffer)
2907 : {
2908 23858 : Page page = BufferGetPage(buffer);
2909 : OffsetNumber unused[MaxHeapTuplesPerPage];
2910 23858 : int nunused = 0;
2911 : TransactionId visibility_cutoff_xid;
2912 23858 : TransactionId conflict_xid = InvalidTransactionId;
2913 : bool all_frozen;
2914 : LVSavedErrInfo saved_err_info;
2915 23858 : uint8 vmflags = 0;
2916 :
2917 : Assert(vacrel->do_index_vacuuming);
2918 :
2919 23858 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2920 :
2921 : /* Update error traceback information */
2922 23858 : update_vacuum_error_info(vacrel, &saved_err_info,
2923 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2924 : InvalidOffsetNumber);
2925 :
2926 : /*
2927 : * Before marking dead items unused, check whether the page will become
2928 : * all-visible once that change is applied. This lets us reap the tuples
2929 : * and mark the page all-visible within the same critical section,
2930 : * enabling both changes to be emitted in a single WAL record. Since the
2931 : * visibility checks may perform I/O and allocate memory, they must be
2932 : * done outside the critical section.
2933 : */
2934 23858 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2935 : vacrel->cutoffs.OldestXmin,
2936 : deadoffsets, num_offsets,
2937 : &all_frozen, &visibility_cutoff_xid,
2938 : &vacrel->offnum))
2939 : {
2940 23802 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2941 23802 : if (all_frozen)
2942 : {
2943 19222 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2944 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2945 : }
2946 :
2947 : /*
2948 : * Take the lock on the vmbuffer before entering a critical section.
2949 : * The heap page lock must also be held while updating the VM to
2950 : * ensure consistency.
2951 : */
2952 23802 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2953 : }
2954 :
2955 23858 : START_CRIT_SECTION();
2956 :
2957 1524024 : for (int i = 0; i < num_offsets; i++)
2958 : {
2959 : ItemId itemid;
2960 1500166 : OffsetNumber toff = deadoffsets[i];
2961 :
2962 1500166 : itemid = PageGetItemId(page, toff);
2963 :
2964 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2965 1500166 : ItemIdSetUnused(itemid);
2966 1500166 : unused[nunused++] = toff;
2967 : }
2968 :
2969 : Assert(nunused > 0);
2970 :
2971 : /* Attempt to truncate line pointer array now */
2972 23858 : PageTruncateLinePointerArray(page);
2973 :
2974 23858 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2975 : {
2976 : /*
2977 : * The page is guaranteed to have had dead line pointers, so we always
2978 : * set PD_ALL_VISIBLE.
2979 : */
2980 23802 : PageSetAllVisible(page);
2981 23802 : visibilitymap_set_vmbits(blkno,
2982 : vmbuffer, vmflags,
2983 23802 : vacrel->rel->rd_locator);
2984 23802 : conflict_xid = visibility_cutoff_xid;
2985 : }
2986 :
2987 : /*
2988 : * Mark buffer dirty before we write WAL.
2989 : */
2990 23858 : MarkBufferDirty(buffer);
2991 :
2992 : /* XLOG stuff */
2993 23858 : if (RelationNeedsWAL(vacrel->rel))
2994 : {
2995 22158 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2996 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2997 : vmflags,
2998 : conflict_xid,
2999 : false, /* no cleanup lock required */
3000 : PRUNE_VACUUM_CLEANUP,
3001 : NULL, 0, /* frozen */
3002 : NULL, 0, /* redirected */
3003 : NULL, 0, /* dead */
3004 : unused, nunused);
3005 : }
3006 :
3007 23858 : END_CRIT_SECTION();
3008 :
3009 23858 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
3010 : {
3011 : /* Count the newly set VM page for logging */
3012 23802 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
3013 23802 : vacrel->vm_new_visible_pages++;
3014 23802 : if (all_frozen)
3015 19222 : vacrel->vm_new_visible_frozen_pages++;
3016 : }
3017 :
3018 : /* Revert to the previous phase information for error traceback */
3019 23858 : restore_vacuum_error_info(vacrel, &saved_err_info);
3020 23858 : }
3021 :
3022 : /*
3023 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
3024 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
3025 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
3026 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
3027 : *
3028 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
3029 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
3030 : * that it started out with.
3031 : *
3032 : * Returns true when failsafe has been triggered.
3033 : */
3034 : static bool
3035 152130 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
3036 : {
3037 : /* Don't warn more than once per VACUUM */
3038 152130 : if (VacuumFailsafeActive)
3039 0 : return true;
3040 :
3041 152130 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
3042 : {
3043 36078 : const int progress_index[] = {
3044 : PROGRESS_VACUUM_INDEXES_TOTAL,
3045 : PROGRESS_VACUUM_INDEXES_PROCESSED,
3046 : PROGRESS_VACUUM_MODE
3047 : };
3048 36078 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
3049 :
3050 36078 : VacuumFailsafeActive = true;
3051 :
3052 : /*
3053 : * Abandon use of a buffer access strategy to allow use of all of
3054 : * shared buffers. We assume the caller who allocated the memory for
3055 : * the BufferAccessStrategy will free it.
3056 : */
3057 36078 : vacrel->bstrategy = NULL;
3058 :
3059 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
3060 36078 : vacrel->do_index_vacuuming = false;
3061 36078 : vacrel->do_index_cleanup = false;
3062 36078 : vacrel->do_rel_truncate = false;
3063 :
3064 : /* Reset the progress counters and set the failsafe mode */
3065 36078 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
3066 :
3067 36078 : ereport(WARNING,
3068 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3069 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3070 : vacrel->num_index_scans),
3071 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3072 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3073 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3074 :
3075 : /* Stop applying cost limits from this point on */
3076 36078 : VacuumCostActive = false;
3077 36078 : VacuumCostBalance = 0;
3078 :
3079 36078 : return true;
3080 : }
3081 :
3082 116052 : return false;
3083 : }
3084 :
3085 : /*
3086 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3087 : */
3088 : static void
3089 106512 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3090 : {
3091 106512 : double reltuples = vacrel->new_rel_tuples;
3092 106512 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3093 106512 : const int progress_start_index[] = {
3094 : PROGRESS_VACUUM_PHASE,
3095 : PROGRESS_VACUUM_INDEXES_TOTAL
3096 : };
3097 106512 : const int progress_end_index[] = {
3098 : PROGRESS_VACUUM_INDEXES_TOTAL,
3099 : PROGRESS_VACUUM_INDEXES_PROCESSED
3100 : };
3101 : int64 progress_start_val[2];
3102 106512 : int64 progress_end_val[2] = {0, 0};
3103 :
3104 : Assert(vacrel->do_index_cleanup);
3105 : Assert(vacrel->nindexes > 0);
3106 :
3107 : /*
3108 : * Report that we are now cleaning up indexes and the number of indexes to
3109 : * cleanup.
3110 : */
3111 106512 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3112 106512 : progress_start_val[1] = vacrel->nindexes;
3113 106512 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3114 :
3115 106512 : if (!ParallelVacuumIsActive(vacrel))
3116 : {
3117 274298 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3118 : {
3119 167820 : Relation indrel = vacrel->indrels[idx];
3120 167820 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3121 :
3122 335640 : vacrel->indstats[idx] =
3123 167820 : lazy_cleanup_one_index(indrel, istat, reltuples,
3124 : estimated_count, vacrel);
3125 :
3126 : /* Report the number of indexes cleaned up */
3127 167820 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3128 167820 : idx + 1);
3129 : }
3130 : }
3131 : else
3132 : {
3133 : /* Outsource everything to parallel variant */
3134 34 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3135 : vacrel->num_index_scans,
3136 : estimated_count);
3137 : }
3138 :
3139 : /* Reset the progress counters */
3140 106512 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3141 106512 : }
3142 :
3143 : /*
3144 : * lazy_vacuum_one_index() -- vacuum index relation.
3145 : *
3146 : * Delete all the index tuples containing a TID collected in
3147 : * vacrel->dead_items. Also update running statistics. Exact
3148 : * details depend on index AM's ambulkdelete routine.
3149 : *
3150 : * reltuples is the number of heap tuples to be passed to the
3151 : * bulkdelete callback. It's always assumed to be estimated.
3152 : * See indexam.sgml for more info.
3153 : *
3154 : * Returns bulk delete stats derived from input stats
3155 : */
3156 : static IndexBulkDeleteResult *
3157 2288 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3158 : double reltuples, LVRelState *vacrel)
3159 : {
3160 : IndexVacuumInfo ivinfo;
3161 : LVSavedErrInfo saved_err_info;
3162 :
3163 2288 : ivinfo.index = indrel;
3164 2288 : ivinfo.heaprel = vacrel->rel;
3165 2288 : ivinfo.analyze_only = false;
3166 2288 : ivinfo.report_progress = false;
3167 2288 : ivinfo.estimated_count = true;
3168 2288 : ivinfo.message_level = DEBUG2;
3169 2288 : ivinfo.num_heap_tuples = reltuples;
3170 2288 : ivinfo.strategy = vacrel->bstrategy;
3171 :
3172 : /*
3173 : * Update error traceback information.
3174 : *
3175 : * The index name is saved during this phase and restored immediately
3176 : * after this phase. See vacuum_error_callback.
3177 : */
3178 : Assert(vacrel->indname == NULL);
3179 2288 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3180 2288 : update_vacuum_error_info(vacrel, &saved_err_info,
3181 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3182 : InvalidBlockNumber, InvalidOffsetNumber);
3183 :
3184 : /* Do bulk deletion */
3185 2288 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3186 : vacrel->dead_items_info);
3187 :
3188 : /* Revert to the previous phase information for error traceback */
3189 2288 : restore_vacuum_error_info(vacrel, &saved_err_info);
3190 2288 : pfree(vacrel->indname);
3191 2288 : vacrel->indname = NULL;
3192 :
3193 2288 : return istat;
3194 : }
3195 :
3196 : /*
3197 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3198 : *
3199 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3200 : * of heap tuples and estimated_count is true if reltuples is an
3201 : * estimated value. See indexam.sgml for more info.
3202 : *
3203 : * Returns bulk delete stats derived from input stats
3204 : */
3205 : static IndexBulkDeleteResult *
3206 167820 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3207 : double reltuples, bool estimated_count,
3208 : LVRelState *vacrel)
3209 : {
3210 : IndexVacuumInfo ivinfo;
3211 : LVSavedErrInfo saved_err_info;
3212 :
3213 167820 : ivinfo.index = indrel;
3214 167820 : ivinfo.heaprel = vacrel->rel;
3215 167820 : ivinfo.analyze_only = false;
3216 167820 : ivinfo.report_progress = false;
3217 167820 : ivinfo.estimated_count = estimated_count;
3218 167820 : ivinfo.message_level = DEBUG2;
3219 :
3220 167820 : ivinfo.num_heap_tuples = reltuples;
3221 167820 : ivinfo.strategy = vacrel->bstrategy;
3222 :
3223 : /*
3224 : * Update error traceback information.
3225 : *
3226 : * The index name is saved during this phase and restored immediately
3227 : * after this phase. See vacuum_error_callback.
3228 : */
3229 : Assert(vacrel->indname == NULL);
3230 167820 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3231 167820 : update_vacuum_error_info(vacrel, &saved_err_info,
3232 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3233 : InvalidBlockNumber, InvalidOffsetNumber);
3234 :
3235 167820 : istat = vac_cleanup_one_index(&ivinfo, istat);
3236 :
3237 : /* Revert to the previous phase information for error traceback */
3238 167820 : restore_vacuum_error_info(vacrel, &saved_err_info);
3239 167820 : pfree(vacrel->indname);
3240 167820 : vacrel->indname = NULL;
3241 :
3242 167820 : return istat;
3243 : }
3244 :
3245 : /*
3246 : * should_attempt_truncation - should we attempt to truncate the heap?
3247 : *
3248 : * Don't even think about it unless we have a shot at releasing a goodly
3249 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3250 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3251 : * be particularly disruptive.
3252 : *
3253 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3254 : * system might be refusing to allocate new XIDs at this point. The system
3255 : * definitely won't return to normal unless and until VACUUM actually advances
3256 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3257 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3258 : * truncate the table under these circumstances, an XID exhaustion error might
3259 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3260 : * There is very little chance of truncation working out when the failsafe is
3261 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3262 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3263 : * we're called.
3264 : */
3265 : static bool
3266 148618 : should_attempt_truncation(LVRelState *vacrel)
3267 : {
3268 : BlockNumber possibly_freeable;
3269 :
3270 148618 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3271 36368 : return false;
3272 :
3273 112250 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3274 112250 : if (possibly_freeable > 0 &&
3275 312 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3276 312 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3277 294 : return true;
3278 :
3279 111956 : return false;
3280 : }
3281 :
3282 : /*
3283 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3284 : */
3285 : static void
3286 294 : lazy_truncate_heap(LVRelState *vacrel)
3287 : {
3288 294 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3289 : BlockNumber new_rel_pages;
3290 : bool lock_waiter_detected;
3291 : int lock_retry;
3292 :
3293 : /* Report that we are now truncating */
3294 294 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3295 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3296 :
3297 : /* Update error traceback information one last time */
3298 294 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3299 : vacrel->nonempty_pages, InvalidOffsetNumber);
3300 :
3301 : /*
3302 : * Loop until no more truncating can be done.
3303 : */
3304 : do
3305 : {
3306 : /*
3307 : * We need full exclusive lock on the relation in order to do
3308 : * truncation. If we can't get it, give up rather than waiting --- we
3309 : * don't want to block other backends, and we don't want to deadlock
3310 : * (which is quite possible considering we already hold a lower-grade
3311 : * lock).
3312 : */
3313 294 : lock_waiter_detected = false;
3314 294 : lock_retry = 0;
3315 : while (true)
3316 : {
3317 694 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3318 290 : break;
3319 :
3320 : /*
3321 : * Check for interrupts while trying to (re-)acquire the exclusive
3322 : * lock.
3323 : */
3324 404 : CHECK_FOR_INTERRUPTS();
3325 :
3326 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3327 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3328 : {
3329 : /*
3330 : * We failed to establish the lock in the specified number of
3331 : * retries. This means we give up truncating.
3332 : */
3333 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
3334 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3335 : vacrel->relname)));
3336 6 : return;
3337 : }
3338 :
3339 400 : (void) WaitLatch(MyLatch,
3340 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3341 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3342 : WAIT_EVENT_VACUUM_TRUNCATE);
3343 400 : ResetLatch(MyLatch);
3344 : }
3345 :
3346 : /*
3347 : * Now that we have exclusive lock, look to see if the rel has grown
3348 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3349 : * the newly added pages presumably contain non-deletable tuples.
3350 : */
3351 290 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3352 290 : if (new_rel_pages != orig_rel_pages)
3353 : {
3354 : /*
3355 : * Note: we intentionally don't update vacrel->rel_pages with the
3356 : * new rel size here. If we did, it would amount to assuming that
3357 : * the new pages are empty, which is unlikely. Leaving the numbers
3358 : * alone amounts to assuming that the new pages have the same
3359 : * tuple density as existing ones, which is less unlikely.
3360 : */
3361 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3362 0 : return;
3363 : }
3364 :
3365 : /*
3366 : * Scan backwards from the end to verify that the end pages actually
3367 : * contain no tuples. This is *necessary*, not optional, because
3368 : * other backends could have added tuples to these pages whilst we
3369 : * were vacuuming.
3370 : */
3371 290 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3372 290 : vacrel->blkno = new_rel_pages;
3373 :
3374 290 : if (new_rel_pages >= orig_rel_pages)
3375 : {
3376 : /* can't do anything after all */
3377 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3378 2 : return;
3379 : }
3380 :
3381 : /*
3382 : * Okay to truncate.
3383 : */
3384 288 : RelationTruncate(vacrel->rel, new_rel_pages);
3385 :
3386 : /*
3387 : * We can release the exclusive lock as soon as we have truncated.
3388 : * Other backends can't safely access the relation until they have
3389 : * processed the smgr invalidation that smgrtruncate sent out ... but
3390 : * that should happen as part of standard invalidation processing once
3391 : * they acquire lock on the relation.
3392 : */
3393 288 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3394 :
3395 : /*
3396 : * Update statistics. Here, it *is* correct to adjust rel_pages
3397 : * without also touching reltuples, since the tuple count wasn't
3398 : * changed by the truncation.
3399 : */
3400 288 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3401 288 : vacrel->rel_pages = new_rel_pages;
3402 :
3403 288 : ereport(vacrel->verbose ? INFO : DEBUG2,
3404 : (errmsg("table \"%s\": truncated %u to %u pages",
3405 : vacrel->relname,
3406 : orig_rel_pages, new_rel_pages)));
3407 288 : orig_rel_pages = new_rel_pages;
3408 288 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3409 : }
3410 :
3411 : /*
3412 : * Rescan end pages to verify that they are (still) empty of tuples.
3413 : *
3414 : * Returns number of nondeletable pages (last nonempty page + 1).
3415 : */
3416 : static BlockNumber
3417 290 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3418 : {
3419 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3420 : "prefetch size must be power of 2");
3421 :
3422 : BlockNumber blkno;
3423 : BlockNumber prefetchedUntil;
3424 : instr_time starttime;
3425 :
3426 : /* Initialize the starttime if we check for conflicting lock requests */
3427 290 : INSTR_TIME_SET_CURRENT(starttime);
3428 :
3429 : /*
3430 : * Start checking blocks at what we believe relation end to be and move
3431 : * backwards. (Strange coding of loop control is needed because blkno is
3432 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3433 : * in forward direction, so that OS-level readahead can kick in.
3434 : */
3435 290 : blkno = vacrel->rel_pages;
3436 290 : prefetchedUntil = InvalidBlockNumber;
3437 4262 : while (blkno > vacrel->nonempty_pages)
3438 : {
3439 : Buffer buf;
3440 : Page page;
3441 : OffsetNumber offnum,
3442 : maxoff;
3443 : bool hastup;
3444 :
3445 : /*
3446 : * Check if another process requests a lock on our relation. We are
3447 : * holding an AccessExclusiveLock here, so they will be waiting. We
3448 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3449 : * only check if that interval has elapsed once every 32 blocks to
3450 : * keep the number of system calls and actual shared lock table
3451 : * lookups to a minimum.
3452 : */
3453 3980 : if ((blkno % 32) == 0)
3454 : {
3455 : instr_time currenttime;
3456 : instr_time elapsed;
3457 :
3458 122 : INSTR_TIME_SET_CURRENT(currenttime);
3459 122 : elapsed = currenttime;
3460 122 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3461 122 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3462 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3463 : {
3464 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3465 : {
3466 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3467 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3468 : vacrel->relname)));
3469 :
3470 0 : *lock_waiter_detected = true;
3471 0 : return blkno;
3472 : }
3473 0 : starttime = currenttime;
3474 : }
3475 : }
3476 :
3477 : /*
3478 : * We don't insert a vacuum delay point here, because we have an
3479 : * exclusive lock on the table which we want to hold for as short a
3480 : * time as possible. We still need to check for interrupts however.
3481 : */
3482 3980 : CHECK_FOR_INTERRUPTS();
3483 :
3484 3980 : blkno--;
3485 :
3486 : /* If we haven't prefetched this lot yet, do so now. */
3487 3980 : if (prefetchedUntil > blkno)
3488 : {
3489 : BlockNumber prefetchStart;
3490 : BlockNumber pblkno;
3491 :
3492 376 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3493 5824 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3494 : {
3495 5448 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3496 5448 : CHECK_FOR_INTERRUPTS();
3497 : }
3498 376 : prefetchedUntil = prefetchStart;
3499 : }
3500 :
3501 3980 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3502 : vacrel->bstrategy);
3503 :
3504 : /* In this phase we only need shared access to the buffer */
3505 3980 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3506 :
3507 3980 : page = BufferGetPage(buf);
3508 :
3509 3980 : if (PageIsNew(page) || PageIsEmpty(page))
3510 : {
3511 1648 : UnlockReleaseBuffer(buf);
3512 1648 : continue;
3513 : }
3514 :
3515 2332 : hastup = false;
3516 2332 : maxoff = PageGetMaxOffsetNumber(page);
3517 2332 : for (offnum = FirstOffsetNumber;
3518 5262 : offnum <= maxoff;
3519 2930 : offnum = OffsetNumberNext(offnum))
3520 : {
3521 : ItemId itemid;
3522 :
3523 2938 : itemid = PageGetItemId(page, offnum);
3524 :
3525 : /*
3526 : * Note: any non-unused item should be taken as a reason to keep
3527 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3528 : * we must not have cleaned out its index entries.
3529 : */
3530 2938 : if (ItemIdIsUsed(itemid))
3531 : {
3532 8 : hastup = true;
3533 8 : break; /* can stop scanning */
3534 : }
3535 : } /* scan along page */
3536 :
3537 2332 : UnlockReleaseBuffer(buf);
3538 :
3539 : /* Done scanning if we found a tuple here */
3540 2332 : if (hastup)
3541 8 : return blkno + 1;
3542 : }
3543 :
3544 : /*
3545 : * If we fall out of the loop, all the previously-thought-to-be-empty
3546 : * pages still are; we need not bother to look at the last known-nonempty
3547 : * page.
3548 : */
3549 282 : return vacrel->nonempty_pages;
3550 : }
3551 :
3552 : /*
3553 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3554 : * shared memory). Sets both in vacrel for caller.
3555 : *
3556 : * Also handles parallel initialization as part of allocating dead_items in
3557 : * DSM when required.
3558 : */
3559 : static void
3560 148618 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3561 : {
3562 : VacDeadItemsInfo *dead_items_info;
3563 418346 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3564 121110 : autovacuum_work_mem != -1 ?
3565 269728 : autovacuum_work_mem : maintenance_work_mem;
3566 :
3567 : /*
3568 : * Initialize state for a parallel vacuum. As of now, only one worker can
3569 : * be used for an index, so we invoke parallelism only if there are at
3570 : * least two indexes on a table.
3571 : */
3572 148618 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3573 : {
3574 : /*
3575 : * Since parallel workers cannot access data in temporary tables, we
3576 : * can't perform parallel vacuum on them.
3577 : */
3578 10970 : if (RelationUsesLocalBuffers(vacrel->rel))
3579 : {
3580 : /*
3581 : * Give warning only if the user explicitly tries to perform a
3582 : * parallel vacuum on the temporary table.
3583 : */
3584 6 : if (nworkers > 0)
3585 6 : ereport(WARNING,
3586 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3587 : vacrel->relname)));
3588 : }
3589 : else
3590 10964 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3591 : vacrel->nindexes, nworkers,
3592 : vac_work_mem,
3593 10964 : vacrel->verbose ? INFO : DEBUG2,
3594 : vacrel->bstrategy);
3595 :
3596 : /*
3597 : * If parallel mode started, dead_items and dead_items_info spaces are
3598 : * allocated in DSM.
3599 : */
3600 10970 : if (ParallelVacuumIsActive(vacrel))
3601 : {
3602 34 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3603 : &vacrel->dead_items_info);
3604 34 : return;
3605 : }
3606 : }
3607 :
3608 : /*
3609 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3610 : * locally.
3611 : */
3612 :
3613 148584 : dead_items_info = palloc_object(VacDeadItemsInfo);
3614 148584 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3615 148584 : dead_items_info->num_items = 0;
3616 148584 : vacrel->dead_items_info = dead_items_info;
3617 :
3618 148584 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3619 : }
3620 :
3621 : /*
3622 : * Add the given block number and offset numbers to dead_items.
3623 : */
3624 : static void
3625 28462 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3626 : int num_offsets)
3627 : {
3628 28462 : const int prog_index[2] = {
3629 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3630 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3631 : };
3632 : int64 prog_val[2];
3633 :
3634 28462 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3635 28462 : vacrel->dead_items_info->num_items += num_offsets;
3636 :
3637 : /* update the progress information */
3638 28462 : prog_val[0] = vacrel->dead_items_info->num_items;
3639 28462 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3640 28462 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3641 28462 : }
3642 :
3643 : /*
3644 : * Forget all collected dead items.
3645 : */
3646 : static void
3647 1236 : dead_items_reset(LVRelState *vacrel)
3648 : {
3649 : /* Update statistics for dead items */
3650 1236 : vacrel->num_dead_items_resets++;
3651 1236 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3652 :
3653 1236 : if (ParallelVacuumIsActive(vacrel))
3654 : {
3655 12 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3656 12 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3657 : &vacrel->dead_items_info);
3658 12 : return;
3659 : }
3660 :
3661 : /* Recreate the tidstore with the same max_bytes limitation */
3662 1224 : TidStoreDestroy(vacrel->dead_items);
3663 1224 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3664 :
3665 : /* Reset the counter */
3666 1224 : vacrel->dead_items_info->num_items = 0;
3667 : }
3668 :
3669 : /*
3670 : * Perform cleanup for resources allocated in dead_items_alloc
3671 : */
3672 : static void
3673 148618 : dead_items_cleanup(LVRelState *vacrel)
3674 : {
3675 148618 : if (!ParallelVacuumIsActive(vacrel))
3676 : {
3677 : /* Don't bother with pfree here */
3678 148584 : return;
3679 : }
3680 :
3681 : /* End parallel mode */
3682 34 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3683 34 : vacrel->pvs = NULL;
3684 : }
3685 :
3686 : #ifdef USE_ASSERT_CHECKING
3687 :
3688 : /*
3689 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3690 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3691 : * reason not to use it outside of asserts.
3692 : */
3693 : static bool
3694 : heap_page_is_all_visible(Relation rel, Buffer buf,
3695 : TransactionId OldestXmin,
3696 : bool *all_frozen,
3697 : TransactionId *visibility_cutoff_xid,
3698 : OffsetNumber *logging_offnum)
3699 : {
3700 :
3701 : return heap_page_would_be_all_visible(rel, buf,
3702 : OldestXmin,
3703 : NULL, 0,
3704 : all_frozen,
3705 : visibility_cutoff_xid,
3706 : logging_offnum);
3707 : }
3708 : #endif
3709 :
3710 : /*
3711 : * Check whether the heap page in buf is all-visible except for the dead
3712 : * tuples referenced in the deadoffsets array.
3713 : *
3714 : * Vacuum uses this to check if a page would become all-visible after reaping
3715 : * known dead tuples. This function does not remove the dead items.
3716 : *
3717 : * This cannot be called in a critical section, as the visibility checks may
3718 : * perform IO and allocate memory.
3719 : *
3720 : * Returns true if the page is all-visible other than the provided
3721 : * deadoffsets and false otherwise.
3722 : *
3723 : * OldestXmin is used to determine visibility.
3724 : *
3725 : * Output parameters:
3726 : *
3727 : * - *all_frozen: true if every tuple on the page is frozen
3728 : * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3729 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3730 : * used by vacuum's error callback system.
3731 : *
3732 : * Callers looking to verify that the page is already all-visible can call
3733 : * heap_page_is_all_visible().
3734 : *
3735 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3736 : * If you modify this function, ensure consistency with that code. An
3737 : * assertion cross-checks that both remain in agreement. Do not introduce new
3738 : * side-effects.
3739 : */
3740 : static bool
3741 23858 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3742 : TransactionId OldestXmin,
3743 : OffsetNumber *deadoffsets,
3744 : int ndeadoffsets,
3745 : bool *all_frozen,
3746 : TransactionId *visibility_cutoff_xid,
3747 : OffsetNumber *logging_offnum)
3748 : {
3749 23858 : Page page = BufferGetPage(buf);
3750 23858 : BlockNumber blockno = BufferGetBlockNumber(buf);
3751 : OffsetNumber offnum,
3752 : maxoff;
3753 23858 : bool all_visible = true;
3754 23858 : int matched_dead_count = 0;
3755 :
3756 23858 : *visibility_cutoff_xid = InvalidTransactionId;
3757 23858 : *all_frozen = true;
3758 :
3759 : Assert(ndeadoffsets == 0 || deadoffsets);
3760 :
3761 : #ifdef USE_ASSERT_CHECKING
3762 : /* Confirm input deadoffsets[] is strictly sorted */
3763 : if (ndeadoffsets > 1)
3764 : {
3765 : for (int i = 1; i < ndeadoffsets; i++)
3766 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3767 : }
3768 : #endif
3769 :
3770 23858 : maxoff = PageGetMaxOffsetNumber(page);
3771 23858 : for (offnum = FirstOffsetNumber;
3772 2424540 : offnum <= maxoff && all_visible;
3773 2400682 : offnum = OffsetNumberNext(offnum))
3774 : {
3775 : ItemId itemid;
3776 : HeapTupleData tuple;
3777 :
3778 : /*
3779 : * Set the offset number so that we can display it along with any
3780 : * error that occurred while processing this tuple.
3781 : */
3782 2400682 : *logging_offnum = offnum;
3783 2400682 : itemid = PageGetItemId(page, offnum);
3784 :
3785 : /* Unused or redirect line pointers are of no interest */
3786 2400682 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3787 1546094 : continue;
3788 :
3789 2351414 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3790 :
3791 : /*
3792 : * Dead line pointers can have index pointers pointing to them. So
3793 : * they can't be treated as visible
3794 : */
3795 2351414 : if (ItemIdIsDead(itemid))
3796 : {
3797 1496826 : if (!deadoffsets ||
3798 1496826 : matched_dead_count >= ndeadoffsets ||
3799 1496826 : deadoffsets[matched_dead_count] != offnum)
3800 : {
3801 0 : *all_frozen = all_visible = false;
3802 0 : break;
3803 : }
3804 1496826 : matched_dead_count++;
3805 1496826 : continue;
3806 : }
3807 :
3808 : Assert(ItemIdIsNormal(itemid));
3809 :
3810 854588 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3811 854588 : tuple.t_len = ItemIdGetLength(itemid);
3812 854588 : tuple.t_tableOid = RelationGetRelid(rel);
3813 :
3814 : /* Visibility checks may do IO or allocate memory */
3815 : Assert(CritSectionCount == 0);
3816 854588 : switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
3817 : {
3818 854574 : case HEAPTUPLE_LIVE:
3819 : {
3820 : TransactionId xmin;
3821 :
3822 : /* Check comments in lazy_scan_prune. */
3823 854574 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3824 : {
3825 0 : all_visible = false;
3826 0 : *all_frozen = false;
3827 0 : break;
3828 : }
3829 :
3830 : /*
3831 : * The inserter definitely committed. But is it old enough
3832 : * that everyone sees it as committed?
3833 : */
3834 854574 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3835 854574 : if (!TransactionIdPrecedes(xmin, OldestXmin))
3836 : {
3837 42 : all_visible = false;
3838 42 : *all_frozen = false;
3839 42 : break;
3840 : }
3841 :
3842 : /* Track newest xmin on page. */
3843 854532 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3844 : TransactionIdIsNormal(xmin))
3845 14508 : *visibility_cutoff_xid = xmin;
3846 :
3847 : /* Check whether this tuple is already frozen or not */
3848 1099734 : if (all_visible && *all_frozen &&
3849 245202 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3850 4590 : *all_frozen = false;
3851 : }
3852 854532 : break;
3853 :
3854 14 : case HEAPTUPLE_DEAD:
3855 : case HEAPTUPLE_RECENTLY_DEAD:
3856 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3857 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3858 : {
3859 14 : all_visible = false;
3860 14 : *all_frozen = false;
3861 14 : break;
3862 : }
3863 0 : default:
3864 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3865 : break;
3866 : }
3867 : } /* scan along page */
3868 :
3869 : /* Clear the offset information once we have processed the given page. */
3870 23858 : *logging_offnum = InvalidOffsetNumber;
3871 :
3872 23858 : return all_visible;
3873 : }
3874 :
3875 : /*
3876 : * Update index statistics in pg_class if the statistics are accurate.
3877 : */
3878 : static void
3879 112280 : update_relstats_all_indexes(LVRelState *vacrel)
3880 : {
3881 112280 : Relation *indrels = vacrel->indrels;
3882 112280 : int nindexes = vacrel->nindexes;
3883 112280 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3884 :
3885 : Assert(vacrel->do_index_cleanup);
3886 :
3887 280222 : for (int idx = 0; idx < nindexes; idx++)
3888 : {
3889 167942 : Relation indrel = indrels[idx];
3890 167942 : IndexBulkDeleteResult *istat = indstats[idx];
3891 :
3892 167942 : if (istat == NULL || istat->estimated_count)
3893 165376 : continue;
3894 :
3895 : /* Update index statistics */
3896 2566 : vac_update_relstats(indrel,
3897 : istat->num_pages,
3898 : istat->num_index_tuples,
3899 : 0, 0,
3900 : false,
3901 : InvalidTransactionId,
3902 : InvalidMultiXactId,
3903 : NULL, NULL, false);
3904 : }
3905 112280 : }
3906 :
3907 : /*
3908 : * Error context callback for errors occurring during vacuum. The error
3909 : * context messages for index phases should match the messages set in parallel
3910 : * vacuum. If you change this function for those phases, change
3911 : * parallel_vacuum_error_callback() as well.
3912 : */
3913 : static void
3914 119068 : vacuum_error_callback(void *arg)
3915 : {
3916 119068 : LVRelState *errinfo = arg;
3917 :
3918 119068 : switch (errinfo->phase)
3919 : {
3920 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3921 0 : if (BlockNumberIsValid(errinfo->blkno))
3922 : {
3923 0 : if (OffsetNumberIsValid(errinfo->offnum))
3924 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3925 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3926 : else
3927 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3928 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3929 : }
3930 : else
3931 0 : errcontext("while scanning relation \"%s.%s\"",
3932 : errinfo->relnamespace, errinfo->relname);
3933 0 : break;
3934 :
3935 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3936 0 : if (BlockNumberIsValid(errinfo->blkno))
3937 : {
3938 0 : if (OffsetNumberIsValid(errinfo->offnum))
3939 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3940 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3941 : else
3942 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3943 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3944 : }
3945 : else
3946 0 : errcontext("while vacuuming relation \"%s.%s\"",
3947 : errinfo->relnamespace, errinfo->relname);
3948 0 : break;
3949 :
3950 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3951 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3952 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3953 0 : break;
3954 :
3955 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3956 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3957 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3958 0 : break;
3959 :
3960 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3961 6 : if (BlockNumberIsValid(errinfo->blkno))
3962 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3963 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3964 6 : break;
3965 :
3966 119062 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3967 : default:
3968 119062 : return; /* do nothing; the errinfo may not be
3969 : * initialized */
3970 : }
3971 : }
3972 :
3973 : /*
3974 : * Updates the information required for vacuum error callback. This also saves
3975 : * the current information which can be later restored via restore_vacuum_error_info.
3976 : */
3977 : static void
3978 766972 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3979 : int phase, BlockNumber blkno, OffsetNumber offnum)
3980 : {
3981 766972 : if (saved_vacrel)
3982 : {
3983 195178 : saved_vacrel->offnum = vacrel->offnum;
3984 195178 : saved_vacrel->blkno = vacrel->blkno;
3985 195178 : saved_vacrel->phase = vacrel->phase;
3986 : }
3987 :
3988 766972 : vacrel->blkno = blkno;
3989 766972 : vacrel->offnum = offnum;
3990 766972 : vacrel->phase = phase;
3991 766972 : }
3992 :
3993 : /*
3994 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3995 : */
3996 : static void
3997 195178 : restore_vacuum_error_info(LVRelState *vacrel,
3998 : const LVSavedErrInfo *saved_vacrel)
3999 : {
4000 195178 : vacrel->blkno = saved_vacrel->blkno;
4001 195178 : vacrel->offnum = saved_vacrel->offnum;
4002 195178 : vacrel->phase = saved_vacrel->phase;
4003 195178 : }
|