Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
21 : * to phases II and III, cleaning up the dead tuples referenced in the current
22 : * TID store. This empties the TID store, allowing vacuum to resume phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
45 : * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
46 : * wraparound. Normal vacuums may scan otherwise skippable pages for one of
47 : * two reasons:
48 : *
49 : * When page skipping is not disabled, a normal vacuum may scan pages that are
50 : * marked all-visible (and even all-frozen) in the visibility map if the range
51 : * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
52 : * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
53 : *
54 : * A normal vacuum may also scan skippable pages in an effort to freeze them
55 : * and decrease the backlog of all-visible but not all-frozen pages that have
56 : * to be processed by the next aggressive vacuum. These are referred to as
57 : * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
58 : * count as eagerly scanned pages.
59 : *
60 : * Eagerly scanned pages that are set all-frozen in the VM are successful
61 : * eager freezes and those not set all-frozen in the VM are failed eager
62 : * freezes.
63 : *
64 : * Because we want to amortize the overhead of freezing pages over multiple
65 : * vacuums, normal vacuums cap the number of successful eager freezes to
66 : * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
67 : * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
68 : * may be unfrozen before the next aggressive vacuum, capping the number of
69 : * successful eager freezes also caps the downside of eager freezing:
70 : * potentially wasted work.
71 : *
72 : * Once the success cap has been hit, eager scanning is disabled for the
73 : * remainder of the vacuum of the relation.
74 : *
75 : * Success is capped globally because we don't want to limit our successes if
76 : * old data happens to be concentrated in a particular part of the table. This
77 : * is especially likely to happen for append-mostly workloads where the oldest
78 : * data is at the beginning of the unfrozen portion of the relation.
79 : *
80 : * On the assumption that different regions of the table are likely to contain
81 : * similarly aged data, normal vacuums use a localized eager freeze failure
82 : * cap. The failure count is reset for each region of the table -- comprised
83 : * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
84 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
85 : * before suspending eager scanning until the end of the region.
86 : * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
87 : * table.
88 : *
89 : * Aggressive vacuums must examine every unfrozen tuple and thus are not
90 : * subject to any of the limits imposed by the eager scanning algorithm.
91 : *
92 : * Once vacuum has decided to scan a given block, it must read the block and
93 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
94 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
95 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
96 : * and their associated index entries (though it is free to reap any existing
97 : * dead items on the page).
98 : *
99 : * After pruning and freezing, pages that are newly all-visible and all-frozen
100 : * are marked as such in the visibility map.
101 : *
102 : * Dead TID Storage:
103 : *
104 : * The major space usage for vacuuming is storage for the dead tuple IDs that
105 : * are to be removed from indexes. We want to ensure we can vacuum even the
106 : * very largest relations with finite memory space usage. To do that, we set
107 : * upper bounds on the memory that can be used for keeping track of dead TIDs
108 : * at once.
109 : *
110 : * We are willing to use at most maintenance_work_mem (or perhaps
111 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
112 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
113 : * the pages that we've pruned). This frees up the memory space dedicated to
114 : * store dead TIDs.
115 : *
116 : * In practice VACUUM will often complete its initial pass over the target
117 : * heap relation without ever running out of space to store TIDs. This means
118 : * that there only needs to be one call to lazy_vacuum, after the initial pass
119 : * completes.
120 : *
121 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
122 : * Portions Copyright (c) 1994, Regents of the University of California
123 : *
124 : *
125 : * IDENTIFICATION
126 : * src/backend/access/heap/vacuumlazy.c
127 : *
128 : *-------------------------------------------------------------------------
129 : */
130 : #include "postgres.h"
131 :
132 : #include "access/genam.h"
133 : #include "access/heapam.h"
134 : #include "access/htup_details.h"
135 : #include "access/multixact.h"
136 : #include "access/tidstore.h"
137 : #include "access/transam.h"
138 : #include "access/visibilitymap.h"
139 : #include "access/xloginsert.h"
140 : #include "catalog/storage.h"
141 : #include "commands/progress.h"
142 : #include "commands/vacuum.h"
143 : #include "common/int.h"
144 : #include "common/pg_prng.h"
145 : #include "executor/instrument.h"
146 : #include "miscadmin.h"
147 : #include "pgstat.h"
148 : #include "portability/instr_time.h"
149 : #include "postmaster/autovacuum.h"
150 : #include "storage/bufmgr.h"
151 : #include "storage/freespace.h"
152 : #include "storage/latch.h"
153 : #include "storage/lmgr.h"
154 : #include "storage/read_stream.h"
155 : #include "utils/lsyscache.h"
156 : #include "utils/pg_rusage.h"
157 : #include "utils/timestamp.h"
158 : #include "utils/wait_event.h"
159 :
160 :
161 : /*
162 : * Space/time tradeoff parameters: do these need to be user-tunable?
163 : *
164 : * To consider truncating the relation, we want there to be at least
165 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
166 : * is less) potentially-freeable pages.
167 : */
168 : #define REL_TRUNCATE_MINIMUM 1000
169 : #define REL_TRUNCATE_FRACTION 16
170 :
171 : /*
172 : * Timing parameters for truncate locking heuristics.
173 : *
174 : * These were not exposed as user tunable GUC values because it didn't seem
175 : * that the potential for improvement was great enough to merit the cost of
176 : * supporting them.
177 : */
178 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
179 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
180 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
181 :
182 : /*
183 : * Threshold that controls whether we bypass index vacuuming and heap
184 : * vacuuming as an optimization
185 : */
186 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
187 :
188 : /*
189 : * Perform a failsafe check each time we scan another 4GB of pages.
190 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
191 : */
192 : #define FAILSAFE_EVERY_PAGES \
193 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
194 :
195 : /*
196 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
197 : * (it won't be exact because we only vacuum FSM after processing a heap page
198 : * that has some removable tuples). When there are indexes, this is ignored,
199 : * and we vacuum FSM after each index/heap cleaning pass.
200 : */
201 : #define VACUUM_FSM_EVERY_PAGES \
202 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
203 :
204 : /*
205 : * Before we consider skipping a page that's marked as clean in
206 : * visibility map, we must've seen at least this many clean pages.
207 : */
208 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
209 :
210 : /*
211 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
212 : * Needs to be a power of 2.
213 : */
214 : #define PREFETCH_SIZE ((BlockNumber) 32)
215 :
216 : /*
217 : * Macro to check if we are in a parallel vacuum. If true, we are in the
218 : * parallel mode and the DSM segment is initialized.
219 : */
220 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
221 :
222 : /* Phases of vacuum during which we report error context. */
223 : typedef enum
224 : {
225 : VACUUM_ERRCB_PHASE_UNKNOWN,
226 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
227 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
228 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
229 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
230 : VACUUM_ERRCB_PHASE_TRUNCATE,
231 : } VacErrPhase;
232 :
233 : /*
234 : * An eager scan of a page that is set all-frozen in the VM is considered
235 : * "successful". To spread out freezing overhead across multiple normal
236 : * vacuums, we limit the number of successful eager page freezes. The maximum
237 : * number of eager page freezes is calculated as a ratio of the all-visible
238 : * but not all-frozen pages at the beginning of the vacuum.
239 : */
240 : #define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
241 :
242 : /*
243 : * On the assumption that different regions of the table tend to have
244 : * similarly aged data, once vacuum fails to freeze
245 : * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
246 : * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
247 : * to another region of the table with potentially older data.
248 : */
249 : #define EAGER_SCAN_REGION_SIZE 4096
250 :
251 : typedef struct LVRelState
252 : {
253 : /* Target heap relation and its indexes */
254 : Relation rel;
255 : Relation *indrels;
256 : int nindexes;
257 :
258 : /* Buffer access strategy and parallel vacuum state */
259 : BufferAccessStrategy bstrategy;
260 : ParallelVacuumState *pvs;
261 :
262 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
263 : bool aggressive;
264 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
265 : bool skipwithvm;
266 : /* Consider index vacuuming bypass optimization? */
267 : bool consider_bypass_optimization;
268 :
269 : /* Doing index vacuuming, index cleanup, rel truncation? */
270 : bool do_index_vacuuming;
271 : bool do_index_cleanup;
272 : bool do_rel_truncate;
273 :
274 : /* VACUUM operation's cutoffs for freezing and pruning */
275 : struct VacuumCutoffs cutoffs;
276 : GlobalVisState *vistest;
277 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
278 : TransactionId NewRelfrozenXid;
279 : MultiXactId NewRelminMxid;
280 : bool skippedallvis;
281 :
282 : /* Error reporting state */
283 : char *dbname;
284 : char *relnamespace;
285 : char *relname;
286 : char *indname; /* Current index name */
287 : BlockNumber blkno; /* used only for heap operations */
288 : OffsetNumber offnum; /* used only for heap operations */
289 : VacErrPhase phase;
290 : bool verbose; /* VACUUM VERBOSE? */
291 :
292 : /*
293 : * dead_items stores TIDs whose index tuples are deleted by index
294 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
295 : * that has been processed by lazy_scan_prune. Also needed by
296 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
297 : * LP_UNUSED during second heap pass.
298 : *
299 : * Both dead_items and dead_items_info are allocated in shared memory in
300 : * parallel vacuum cases.
301 : */
302 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
303 : VacDeadItemsInfo *dead_items_info;
304 :
305 : BlockNumber rel_pages; /* total number of pages */
306 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
307 :
308 : /*
309 : * Count of all-visible blocks eagerly scanned (for logging only). This
310 : * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
311 : */
312 : BlockNumber eager_scanned_pages;
313 :
314 : BlockNumber removed_pages; /* # pages removed by relation truncation */
315 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
316 :
317 : /* # pages newly set all-visible in the VM */
318 : BlockNumber new_all_visible_pages;
319 :
320 : /*
321 : * # pages newly set all-visible and all-frozen in the VM. This is a
322 : * subset of new_all_visible_pages. That is, new_all_visible_pages
323 : * includes all pages set all-visible, but
324 : * new_all_visible_all_frozen_pages includes only those which were also
325 : * set all-frozen.
326 : */
327 : BlockNumber new_all_visible_all_frozen_pages;
328 :
329 : /* # all-visible pages newly set all-frozen in the VM */
330 : BlockNumber new_all_frozen_pages;
331 :
332 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
333 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
334 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
335 :
336 : /* Statistics output by us, for table */
337 : double new_rel_tuples; /* new estimated total # of tuples */
338 : double new_live_tuples; /* new estimated total # of live tuples */
339 : /* Statistics output by index AMs */
340 : IndexBulkDeleteResult **indstats;
341 :
342 : /* Instrumentation counters */
343 : int num_index_scans;
344 : int num_dead_items_resets;
345 : Size total_dead_items_bytes;
346 : /* Counters that follow are only for scanned_pages */
347 : int64 tuples_deleted; /* # deleted from table */
348 : int64 tuples_frozen; /* # newly frozen */
349 : int64 lpdead_items; /* # deleted from indexes */
350 : int64 live_tuples; /* # live tuples remaining */
351 : int64 recently_dead_tuples; /* # dead, but not yet removable */
352 : int64 missed_dead_tuples; /* # removable, but not removed */
353 :
354 : /* State maintained by heap_vac_scan_next_block() */
355 : BlockNumber current_block; /* last block returned */
356 : BlockNumber next_unskippable_block; /* next unskippable block */
357 : bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
358 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
359 :
360 : /* State related to managing eager scanning of all-visible pages */
361 :
362 : /*
363 : * A normal vacuum that has failed to freeze too many eagerly scanned
364 : * blocks in a region suspends eager scanning.
365 : * next_eager_scan_region_start is the block number of the first block
366 : * eligible for resumed eager scanning.
367 : *
368 : * When eager scanning is permanently disabled, either initially
369 : * (including for aggressive vacuum) or due to hitting the success cap,
370 : * this is set to InvalidBlockNumber.
371 : */
372 : BlockNumber next_eager_scan_region_start;
373 :
374 : /*
375 : * The remaining number of blocks a normal vacuum will consider eager
376 : * scanning when it is successful. When eager scanning is enabled, this is
377 : * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
378 : * all-visible but not all-frozen pages. For each eager freeze success,
379 : * this is decremented. Once it hits 0, eager scanning is permanently
380 : * disabled. It is initialized to 0 if eager scanning starts out disabled
381 : * (including for aggressive vacuum).
382 : */
383 : BlockNumber eager_scan_remaining_successes;
384 :
385 : /*
386 : * The maximum number of blocks which may be eagerly scanned and not
387 : * frozen before eager scanning is temporarily suspended. This is
388 : * configurable both globally, via the
389 : * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
390 : * storage parameter of the same name. It is calculated as
391 : * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
392 : * It is 0 when eager scanning is disabled.
393 : */
394 : BlockNumber eager_scan_max_fails_per_region;
395 :
396 : /*
397 : * The number of eagerly scanned blocks vacuum failed to freeze (due to
398 : * age) in the current eager scan region. Vacuum resets it to
399 : * eager_scan_max_fails_per_region each time it enters a new region of the
400 : * relation. If eager_scan_remaining_fails hits 0, eager scanning is
401 : * suspended until the next region. It is also 0 if eager scanning has
402 : * been permanently disabled.
403 : */
404 : BlockNumber eager_scan_remaining_fails;
405 : } LVRelState;
406 :
407 :
408 : /* Struct for saving and restoring vacuum error information. */
409 : typedef struct LVSavedErrInfo
410 : {
411 : BlockNumber blkno;
412 : OffsetNumber offnum;
413 : VacErrPhase phase;
414 : } LVSavedErrInfo;
415 :
416 :
417 : /* non-export function prototypes */
418 : static void lazy_scan_heap(LVRelState *vacrel);
419 : static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
420 : const VacuumParams params);
421 : static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
422 : void *callback_private_data,
423 : void *per_buffer_data);
424 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
425 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
426 : BlockNumber blkno, Page page,
427 : bool sharelock, Buffer vmbuffer);
428 : static void identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
429 : BlockNumber heap_blk, Page heap_page,
430 : int nlpdead_items,
431 : Buffer vmbuffer,
432 : uint8 *vmbits);
433 : static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
434 : BlockNumber blkno, Page page,
435 : Buffer vmbuffer,
436 : bool *has_lpdead_items, bool *vm_page_frozen);
437 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
438 : BlockNumber blkno, Page page,
439 : bool *has_lpdead_items);
440 : static void lazy_vacuum(LVRelState *vacrel);
441 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
442 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
443 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
444 : Buffer buffer, OffsetNumber *deadoffsets,
445 : int num_offsets, Buffer vmbuffer);
446 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
447 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
448 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
449 : IndexBulkDeleteResult *istat,
450 : double reltuples,
451 : LVRelState *vacrel);
452 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
453 : IndexBulkDeleteResult *istat,
454 : double reltuples,
455 : bool estimated_count,
456 : LVRelState *vacrel);
457 : static bool should_attempt_truncation(LVRelState *vacrel);
458 : static void lazy_truncate_heap(LVRelState *vacrel);
459 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
460 : bool *lock_waiter_detected);
461 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
462 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
463 : int num_offsets);
464 : static void dead_items_reset(LVRelState *vacrel);
465 : static void dead_items_cleanup(LVRelState *vacrel);
466 :
467 : #ifdef USE_ASSERT_CHECKING
468 : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
469 : TransactionId OldestXmin,
470 : bool *all_frozen,
471 : TransactionId *visibility_cutoff_xid,
472 : OffsetNumber *logging_offnum);
473 : #endif
474 : static bool heap_page_would_be_all_visible(Relation rel, Buffer buf,
475 : TransactionId OldestXmin,
476 : OffsetNumber *deadoffsets,
477 : int ndeadoffsets,
478 : bool *all_frozen,
479 : TransactionId *visibility_cutoff_xid,
480 : OffsetNumber *logging_offnum);
481 : static void update_relstats_all_indexes(LVRelState *vacrel);
482 : static void vacuum_error_callback(void *arg);
483 : static void update_vacuum_error_info(LVRelState *vacrel,
484 : LVSavedErrInfo *saved_vacrel,
485 : int phase, BlockNumber blkno,
486 : OffsetNumber offnum);
487 : static void restore_vacuum_error_info(LVRelState *vacrel,
488 : const LVSavedErrInfo *saved_vacrel);
489 :
490 :
491 :
492 : /*
493 : * Helper to set up the eager scanning state for vacuuming a single relation.
494 : * Initializes the eager scan management related members of the LVRelState.
495 : *
496 : * Caller provides whether or not an aggressive vacuum is required due to
497 : * vacuum options or for relfrozenxid/relminmxid advancement.
498 : */
499 : static void
500 126175 : heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
501 : {
502 : uint32 randseed;
503 : BlockNumber allvisible;
504 : BlockNumber allfrozen;
505 : float first_region_ratio;
506 126175 : bool oldest_unfrozen_before_cutoff = false;
507 :
508 : /*
509 : * Initialize eager scan management fields to their disabled values.
510 : * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
511 : * of tables without sufficiently old tuples disable eager scanning.
512 : */
513 126175 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
514 126175 : vacrel->eager_scan_max_fails_per_region = 0;
515 126175 : vacrel->eager_scan_remaining_fails = 0;
516 126175 : vacrel->eager_scan_remaining_successes = 0;
517 :
518 : /* If eager scanning is explicitly disabled, just return. */
519 126175 : if (params.max_eager_freeze_failure_rate == 0)
520 126175 : return;
521 :
522 : /*
523 : * The caller will have determined whether or not an aggressive vacuum is
524 : * required by either the vacuum parameters or the relative age of the
525 : * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
526 : * all-visible page to safely advance the relfrozenxid and/or relminmxid,
527 : * so scans of all-visible pages are not considered eager.
528 : */
529 126175 : if (vacrel->aggressive)
530 120605 : return;
531 :
532 : /*
533 : * Aggressively vacuuming a small relation shouldn't take long, so it
534 : * isn't worth amortizing. We use two times the region size as the size
535 : * cutoff because the eager scan start block is a random spot somewhere in
536 : * the first region, making the second region the first to be eager
537 : * scanned normally.
538 : */
539 5570 : if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
540 5570 : return;
541 :
542 : /*
543 : * We only want to enable eager scanning if we are likely to be able to
544 : * freeze some of the pages in the relation.
545 : *
546 : * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
547 : * are technically freezable, but we won't freeze them unless the criteria
548 : * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
549 : * than the FreezeLimit/MultiXactCutoff are frozen in the common case.
550 : *
551 : * So, as a heuristic, we wait until the FreezeLimit has advanced past the
552 : * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
553 : * enable eager scanning.
554 : */
555 0 : if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
556 0 : TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
557 : vacrel->cutoffs.FreezeLimit))
558 0 : oldest_unfrozen_before_cutoff = true;
559 :
560 0 : if (!oldest_unfrozen_before_cutoff &&
561 0 : MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
562 0 : MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
563 : vacrel->cutoffs.MultiXactCutoff))
564 0 : oldest_unfrozen_before_cutoff = true;
565 :
566 0 : if (!oldest_unfrozen_before_cutoff)
567 0 : return;
568 :
569 : /* We have met the criteria to eagerly scan some pages. */
570 :
571 : /*
572 : * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
573 : * all-visible but not all-frozen blocks in the relation.
574 : */
575 0 : visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
576 :
577 0 : vacrel->eager_scan_remaining_successes =
578 0 : (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
579 0 : (allvisible - allfrozen));
580 :
581 : /* If every all-visible page is frozen, eager scanning is disabled. */
582 0 : if (vacrel->eager_scan_remaining_successes == 0)
583 0 : return;
584 :
585 : /*
586 : * Now calculate the bounds of the first eager scan region. Its end block
587 : * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
588 : * blocks. This affects the bounds of all subsequent regions and avoids
589 : * eager scanning and failing to freeze the same blocks each vacuum of the
590 : * relation.
591 : */
592 0 : randseed = pg_prng_uint32(&pg_global_prng_state);
593 :
594 0 : vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
595 :
596 : Assert(params.max_eager_freeze_failure_rate > 0 &&
597 : params.max_eager_freeze_failure_rate <= 1);
598 :
599 0 : vacrel->eager_scan_max_fails_per_region =
600 0 : params.max_eager_freeze_failure_rate *
601 : EAGER_SCAN_REGION_SIZE;
602 :
603 : /*
604 : * The first region will be smaller than subsequent regions. As such,
605 : * adjust the eager freeze failures tolerated for this region.
606 : */
607 0 : first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
608 : EAGER_SCAN_REGION_SIZE;
609 :
610 0 : vacrel->eager_scan_remaining_fails =
611 0 : vacrel->eager_scan_max_fails_per_region *
612 : first_region_ratio;
613 : }
614 :
615 : /*
616 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
617 : *
618 : * This routine sets things up for and then calls lazy_scan_heap, where
619 : * almost all work actually takes place. Finalizes everything after call
620 : * returns by managing relation truncation and updating rel's pg_class
621 : * entry. (Also updates pg_class entries for any indexes that need it.)
622 : *
623 : * At entry, we have already established a transaction and opened
624 : * and locked the relation.
625 : */
626 : void
627 126175 : heap_vacuum_rel(Relation rel, const VacuumParams params,
628 : BufferAccessStrategy bstrategy)
629 : {
630 : LVRelState *vacrel;
631 : bool verbose,
632 : instrument,
633 : skipwithvm,
634 : frozenxid_updated,
635 : minmulti_updated;
636 : BlockNumber orig_rel_pages,
637 : new_rel_pages,
638 : new_rel_allvisible,
639 : new_rel_allfrozen;
640 : PGRUsage ru0;
641 126175 : TimestampTz starttime = 0;
642 126175 : PgStat_Counter startreadtime = 0,
643 126175 : startwritetime = 0;
644 126175 : WalUsage startwalusage = pgWalUsage;
645 126175 : BufferUsage startbufferusage = pgBufferUsage;
646 : ErrorContextCallback errcallback;
647 126175 : char **indnames = NULL;
648 126175 : Size dead_items_max_bytes = 0;
649 :
650 126175 : verbose = (params.options & VACOPT_VERBOSE) != 0;
651 238492 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
652 112317 : params.log_vacuum_min_duration >= 0));
653 126175 : if (instrument)
654 : {
655 112330 : pg_rusage_init(&ru0);
656 112330 : if (track_io_timing)
657 : {
658 0 : startreadtime = pgStatBlockReadTime;
659 0 : startwritetime = pgStatBlockWriteTime;
660 : }
661 : }
662 :
663 : /* Used for instrumentation and stats report */
664 126175 : starttime = GetCurrentTimestamp();
665 :
666 126175 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
667 : RelationGetRelid(rel));
668 126175 : if (AmAutoVacuumWorkerProcess())
669 112317 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
670 112317 : params.is_wraparound
671 : ? PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
672 : : PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM);
673 : else
674 13858 : pgstat_progress_update_param(PROGRESS_VACUUM_STARTED_BY,
675 : PROGRESS_VACUUM_STARTED_BY_MANUAL);
676 :
677 : /*
678 : * Setup error traceback support for ereport() first. The idea is to set
679 : * up an error context callback to display additional information on any
680 : * error during a vacuum. During different phases of vacuum, we update
681 : * the state so that the error context callback always display current
682 : * information.
683 : *
684 : * Copy the names of heap rel into local memory for error reporting
685 : * purposes, too. It isn't always safe to assume that we can get the name
686 : * of each rel. It's convenient for code in lazy_scan_heap to always use
687 : * these temp copies.
688 : */
689 126175 : vacrel = palloc0_object(LVRelState);
690 126175 : vacrel->dbname = get_database_name(MyDatabaseId);
691 126175 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
692 126175 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
693 126175 : vacrel->indname = NULL;
694 126175 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
695 126175 : vacrel->verbose = verbose;
696 126175 : errcallback.callback = vacuum_error_callback;
697 126175 : errcallback.arg = vacrel;
698 126175 : errcallback.previous = error_context_stack;
699 126175 : error_context_stack = &errcallback;
700 :
701 : /* Set up high level stuff about rel and its indexes */
702 126175 : vacrel->rel = rel;
703 126175 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
704 : &vacrel->indrels);
705 126175 : vacrel->bstrategy = bstrategy;
706 126175 : if (instrument && vacrel->nindexes > 0)
707 : {
708 : /* Copy index names used by instrumentation (not error reporting) */
709 107423 : indnames = palloc_array(char *, vacrel->nindexes);
710 276516 : for (int i = 0; i < vacrel->nindexes; i++)
711 169093 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
712 : }
713 :
714 : /*
715 : * The index_cleanup param either disables index vacuuming and cleanup or
716 : * forces it to go ahead when we would otherwise apply the index bypass
717 : * optimization. The default is 'auto', which leaves the final decision
718 : * up to lazy_vacuum().
719 : *
720 : * The truncate param allows user to avoid attempting relation truncation,
721 : * though it can't force truncation to happen.
722 : */
723 : Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
724 : Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
725 : params.truncate != VACOPTVALUE_AUTO);
726 :
727 : /*
728 : * While VacuumFailSafeActive is reset to false before calling this, we
729 : * still need to reset it here due to recursive calls.
730 : */
731 126175 : VacuumFailsafeActive = false;
732 126175 : vacrel->consider_bypass_optimization = true;
733 126175 : vacrel->do_index_vacuuming = true;
734 126175 : vacrel->do_index_cleanup = true;
735 126175 : vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
736 126175 : if (params.index_cleanup == VACOPTVALUE_DISABLED)
737 : {
738 : /* Force disable index vacuuming up-front */
739 130 : vacrel->do_index_vacuuming = false;
740 130 : vacrel->do_index_cleanup = false;
741 : }
742 126045 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
743 : {
744 : /* Force index vacuuming. Note that failsafe can still bypass. */
745 15 : vacrel->consider_bypass_optimization = false;
746 : }
747 : else
748 : {
749 : /* Default/auto, make all decisions dynamically */
750 : Assert(params.index_cleanup == VACOPTVALUE_AUTO);
751 : }
752 :
753 : /* Initialize page counters explicitly (be tidy) */
754 126175 : vacrel->scanned_pages = 0;
755 126175 : vacrel->eager_scanned_pages = 0;
756 126175 : vacrel->removed_pages = 0;
757 126175 : vacrel->new_frozen_tuple_pages = 0;
758 126175 : vacrel->lpdead_item_pages = 0;
759 126175 : vacrel->missed_dead_pages = 0;
760 126175 : vacrel->nonempty_pages = 0;
761 : /* dead_items_alloc allocates vacrel->dead_items later on */
762 :
763 : /* Allocate/initialize output statistics state */
764 126175 : vacrel->new_rel_tuples = 0;
765 126175 : vacrel->new_live_tuples = 0;
766 126175 : vacrel->indstats = (IndexBulkDeleteResult **)
767 126175 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
768 :
769 : /* Initialize remaining counters (be tidy) */
770 126175 : vacrel->num_index_scans = 0;
771 126175 : vacrel->num_dead_items_resets = 0;
772 126175 : vacrel->total_dead_items_bytes = 0;
773 126175 : vacrel->tuples_deleted = 0;
774 126175 : vacrel->tuples_frozen = 0;
775 126175 : vacrel->lpdead_items = 0;
776 126175 : vacrel->live_tuples = 0;
777 126175 : vacrel->recently_dead_tuples = 0;
778 126175 : vacrel->missed_dead_tuples = 0;
779 :
780 126175 : vacrel->new_all_visible_pages = 0;
781 126175 : vacrel->new_all_visible_all_frozen_pages = 0;
782 126175 : vacrel->new_all_frozen_pages = 0;
783 :
784 : /*
785 : * Get cutoffs that determine which deleted tuples are considered DEAD,
786 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
787 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
788 : * happen in this order to ensure that the OldestXmin cutoff field works
789 : * as an upper bound on the XIDs stored in the pages we'll actually scan
790 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
791 : *
792 : * Next acquire vistest, a related cutoff that's used in pruning. We use
793 : * vistest in combination with OldestXmin to ensure that
794 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
795 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
796 : * whether a tuple should be frozen or removed. (In the future we might
797 : * want to teach lazy_scan_prune to recompute vistest from time to time,
798 : * to increase the number of dead tuples it can prune away.)
799 : */
800 126175 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
801 126175 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
802 126175 : vacrel->vistest = GlobalVisTestFor(rel);
803 :
804 : /* Initialize state used to track oldest extant XID/MXID */
805 126175 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
806 126175 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
807 :
808 : /*
809 : * Initialize state related to tracking all-visible page skipping. This is
810 : * very important to determine whether or not it is safe to advance the
811 : * relfrozenxid/relminmxid.
812 : */
813 126175 : vacrel->skippedallvis = false;
814 126175 : skipwithvm = true;
815 126175 : if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
816 : {
817 : /*
818 : * Force aggressive mode, and disable skipping blocks using the
819 : * visibility map (even those set all-frozen)
820 : */
821 172 : vacrel->aggressive = true;
822 172 : skipwithvm = false;
823 : }
824 :
825 126175 : vacrel->skipwithvm = skipwithvm;
826 :
827 : /*
828 : * Set up eager scan tracking state. This must happen after determining
829 : * whether or not the vacuum must be aggressive, because only normal
830 : * vacuums use the eager scan algorithm.
831 : */
832 126175 : heap_vacuum_eager_scan_setup(vacrel, params);
833 :
834 : /* Report the vacuum mode: 'normal' or 'aggressive' */
835 126175 : pgstat_progress_update_param(PROGRESS_VACUUM_MODE,
836 126175 : vacrel->aggressive
837 : ? PROGRESS_VACUUM_MODE_AGGRESSIVE
838 : : PROGRESS_VACUUM_MODE_NORMAL);
839 :
840 126175 : if (verbose)
841 : {
842 13 : if (vacrel->aggressive)
843 1 : ereport(INFO,
844 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
845 : vacrel->dbname, vacrel->relnamespace,
846 : vacrel->relname)));
847 : else
848 12 : ereport(INFO,
849 : (errmsg("vacuuming \"%s.%s.%s\"",
850 : vacrel->dbname, vacrel->relnamespace,
851 : vacrel->relname)));
852 : }
853 :
854 : /*
855 : * Allocate dead_items memory using dead_items_alloc. This handles
856 : * parallel VACUUM initialization as part of allocating shared memory
857 : * space used for dead_items. (But do a failsafe precheck first, to
858 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
859 : * is already dangerously old.)
860 : */
861 126175 : lazy_check_wraparound_failsafe(vacrel);
862 126175 : dead_items_alloc(vacrel, params.nworkers);
863 :
864 : /*
865 : * Call lazy_scan_heap to perform all required heap pruning, index
866 : * vacuuming, and heap vacuuming (plus related processing)
867 : */
868 126175 : lazy_scan_heap(vacrel);
869 :
870 : /*
871 : * Save dead items max_bytes and update the memory usage statistics before
872 : * cleanup, they are freed in parallel vacuum cases during
873 : * dead_items_cleanup().
874 : */
875 126175 : dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
876 126175 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
877 :
878 : /*
879 : * Free resources managed by dead_items_alloc. This ends parallel mode in
880 : * passing when necessary.
881 : */
882 126175 : dead_items_cleanup(vacrel);
883 : Assert(!IsInParallelMode());
884 :
885 : /*
886 : * Update pg_class entries for each of rel's indexes where appropriate.
887 : *
888 : * Unlike the later update to rel's pg_class entry, this is not critical.
889 : * Maintains relpages/reltuples statistics used by the planner only.
890 : */
891 126175 : if (vacrel->do_index_cleanup)
892 104451 : update_relstats_all_indexes(vacrel);
893 :
894 : /* Done with rel's indexes */
895 126175 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
896 :
897 : /* Optionally truncate rel */
898 126175 : if (should_attempt_truncation(vacrel))
899 145 : lazy_truncate_heap(vacrel);
900 :
901 : /* Pop the error context stack */
902 126175 : error_context_stack = errcallback.previous;
903 :
904 : /* Report that we are now doing final cleanup */
905 126175 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
906 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
907 :
908 : /*
909 : * Prepare to update rel's pg_class entry.
910 : *
911 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
912 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
913 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
914 : */
915 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
916 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
917 : vacrel->cutoffs.relfrozenxid,
918 : vacrel->NewRelfrozenXid));
919 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
920 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
921 : vacrel->cutoffs.relminmxid,
922 : vacrel->NewRelminMxid));
923 126175 : if (vacrel->skippedallvis)
924 : {
925 : /*
926 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
927 : * chose to skip an all-visible page range. The state that tracks new
928 : * values will have missed unfrozen XIDs from the pages we skipped.
929 : */
930 : Assert(!vacrel->aggressive);
931 30 : vacrel->NewRelfrozenXid = InvalidTransactionId;
932 30 : vacrel->NewRelminMxid = InvalidMultiXactId;
933 : }
934 :
935 : /*
936 : * For safety, clamp relallvisible to be not more than what we're setting
937 : * pg_class.relpages to
938 : */
939 126175 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
940 126175 : visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
941 126175 : if (new_rel_allvisible > new_rel_pages)
942 0 : new_rel_allvisible = new_rel_pages;
943 :
944 : /*
945 : * An all-frozen block _must_ be all-visible. As such, clamp the count of
946 : * all-frozen blocks to the count of all-visible blocks. This matches the
947 : * clamping of relallvisible above.
948 : */
949 126175 : if (new_rel_allfrozen > new_rel_allvisible)
950 0 : new_rel_allfrozen = new_rel_allvisible;
951 :
952 : /*
953 : * Now actually update rel's pg_class entry.
954 : *
955 : * In principle new_live_tuples could be -1 indicating that we (still)
956 : * don't know the tuple count. In practice that can't happen, since we
957 : * scan every page that isn't skipped using the visibility map.
958 : */
959 126175 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
960 : new_rel_allvisible, new_rel_allfrozen,
961 126175 : vacrel->nindexes > 0,
962 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
963 : &frozenxid_updated, &minmulti_updated, false);
964 :
965 : /*
966 : * Report results to the cumulative stats system, too.
967 : *
968 : * Deliberately avoid telling the stats system about LP_DEAD items that
969 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
970 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
971 : * It seems like a good idea to err on the side of not vacuuming again too
972 : * soon in cases where the failsafe prevented significant amounts of heap
973 : * vacuuming.
974 : */
975 126175 : pgstat_report_vacuum(rel,
976 50456 : Max(vacrel->new_live_tuples, 0),
977 126175 : vacrel->recently_dead_tuples +
978 126175 : vacrel->missed_dead_tuples,
979 : starttime);
980 126175 : pgstat_progress_end_command();
981 :
982 126175 : if (instrument)
983 : {
984 112330 : TimestampTz endtime = GetCurrentTimestamp();
985 :
986 112417 : if (verbose || params.log_vacuum_min_duration == 0 ||
987 87 : TimestampDifferenceExceeds(starttime, endtime,
988 87 : params.log_vacuum_min_duration))
989 : {
990 : long secs_dur;
991 : int usecs_dur;
992 : WalUsage walusage;
993 : BufferUsage bufferusage;
994 : StringInfoData buf;
995 : char *msgfmt;
996 : int32 diff;
997 112243 : double read_rate = 0,
998 112243 : write_rate = 0;
999 : int64 total_blks_hit;
1000 : int64 total_blks_read;
1001 : int64 total_blks_dirtied;
1002 :
1003 112243 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
1004 112243 : memset(&walusage, 0, sizeof(WalUsage));
1005 112243 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
1006 112243 : memset(&bufferusage, 0, sizeof(BufferUsage));
1007 112243 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
1008 :
1009 112243 : total_blks_hit = bufferusage.shared_blks_hit +
1010 112243 : bufferusage.local_blks_hit;
1011 112243 : total_blks_read = bufferusage.shared_blks_read +
1012 112243 : bufferusage.local_blks_read;
1013 112243 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
1014 112243 : bufferusage.local_blks_dirtied;
1015 :
1016 112243 : initStringInfo(&buf);
1017 112243 : if (verbose)
1018 : {
1019 : /*
1020 : * Aggressiveness already reported earlier, in dedicated
1021 : * VACUUM VERBOSE ereport
1022 : */
1023 : Assert(!params.is_wraparound);
1024 13 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1025 : }
1026 112230 : else if (params.is_wraparound)
1027 : {
1028 : /*
1029 : * While it's possible for a VACUUM to be both is_wraparound
1030 : * and !aggressive, that's just a corner-case -- is_wraparound
1031 : * implies aggressive. Produce distinct output for the corner
1032 : * case all the same, just in case.
1033 : */
1034 112207 : if (vacrel->aggressive)
1035 112191 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1036 : else
1037 16 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1038 : }
1039 : else
1040 : {
1041 23 : if (vacrel->aggressive)
1042 19 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1043 : else
1044 4 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1045 : }
1046 112243 : appendStringInfo(&buf, msgfmt,
1047 : vacrel->dbname,
1048 : vacrel->relnamespace,
1049 : vacrel->relname,
1050 : vacrel->num_index_scans);
1051 156927 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1052 : vacrel->removed_pages,
1053 : new_rel_pages,
1054 : vacrel->scanned_pages,
1055 : orig_rel_pages == 0 ? 100.0 :
1056 44684 : 100.0 * vacrel->scanned_pages /
1057 : orig_rel_pages,
1058 : vacrel->eager_scanned_pages);
1059 112243 : appendStringInfo(&buf,
1060 112243 : _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1061 : vacrel->tuples_deleted,
1062 112243 : (int64) vacrel->new_rel_tuples,
1063 : vacrel->recently_dead_tuples);
1064 112243 : if (vacrel->missed_dead_tuples > 0)
1065 0 : appendStringInfo(&buf,
1066 0 : _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1067 : vacrel->missed_dead_tuples,
1068 : vacrel->missed_dead_pages);
1069 112243 : diff = (int32) (ReadNextTransactionId() -
1070 112243 : vacrel->cutoffs.OldestXmin);
1071 112243 : appendStringInfo(&buf,
1072 112243 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1073 : vacrel->cutoffs.OldestXmin, diff);
1074 112243 : if (frozenxid_updated)
1075 : {
1076 17757 : diff = (int32) (vacrel->NewRelfrozenXid -
1077 17757 : vacrel->cutoffs.relfrozenxid);
1078 17757 : appendStringInfo(&buf,
1079 17757 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1080 : vacrel->NewRelfrozenXid, diff);
1081 : }
1082 112243 : if (minmulti_updated)
1083 : {
1084 8 : diff = (int32) (vacrel->NewRelminMxid -
1085 8 : vacrel->cutoffs.relminmxid);
1086 8 : appendStringInfo(&buf,
1087 8 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1088 : vacrel->NewRelminMxid, diff);
1089 : }
1090 156927 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1091 : vacrel->new_frozen_tuple_pages,
1092 : orig_rel_pages == 0 ? 100.0 :
1093 44684 : 100.0 * vacrel->new_frozen_tuple_pages /
1094 : orig_rel_pages,
1095 : vacrel->tuples_frozen);
1096 :
1097 112243 : appendStringInfo(&buf,
1098 112243 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1099 : vacrel->new_all_visible_pages,
1100 112243 : vacrel->new_all_visible_all_frozen_pages +
1101 112243 : vacrel->new_all_frozen_pages,
1102 : vacrel->new_all_frozen_pages);
1103 112243 : if (vacrel->do_index_vacuuming)
1104 : {
1105 90758 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1106 90744 : appendStringInfoString(&buf, _("index scan not needed: "));
1107 : else
1108 14 : appendStringInfoString(&buf, _("index scan needed: "));
1109 :
1110 90758 : msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1111 : }
1112 : else
1113 : {
1114 21485 : if (!VacuumFailsafeActive)
1115 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
1116 : else
1117 21485 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1118 :
1119 21485 : msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1120 : }
1121 156927 : appendStringInfo(&buf, msgfmt,
1122 : vacrel->lpdead_item_pages,
1123 : orig_rel_pages == 0 ? 100.0 :
1124 44684 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1125 : vacrel->lpdead_items);
1126 281184 : for (int i = 0; i < vacrel->nindexes; i++)
1127 : {
1128 168941 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
1129 :
1130 168941 : if (!istat)
1131 168918 : continue;
1132 :
1133 23 : appendStringInfo(&buf,
1134 23 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1135 23 : indnames[i],
1136 : istat->num_pages,
1137 : istat->pages_newly_deleted,
1138 : istat->pages_deleted,
1139 : istat->pages_free);
1140 : }
1141 112243 : if (track_cost_delay_timing)
1142 : {
1143 : /*
1144 : * We bypass the changecount mechanism because this value is
1145 : * only updated by the calling process. We also rely on the
1146 : * above call to pgstat_progress_end_command() to not clear
1147 : * the st_progress_param array.
1148 : */
1149 0 : appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1150 0 : (double) MyBEEntry->st_progress_param[PROGRESS_VACUUM_DELAY_TIME] / 1000000.0);
1151 : }
1152 112243 : if (track_io_timing)
1153 : {
1154 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1155 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1156 :
1157 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1158 : read_ms, write_ms);
1159 : }
1160 112243 : if (secs_dur > 0 || usecs_dur > 0)
1161 : {
1162 112243 : read_rate = (double) BLCKSZ * total_blks_read /
1163 112243 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 112243 : write_rate = (double) BLCKSZ * total_blks_dirtied /
1165 112243 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1166 : }
1167 112243 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1168 : read_rate, write_rate);
1169 112243 : appendStringInfo(&buf,
1170 112243 : _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1171 : total_blks_hit,
1172 : total_blks_read,
1173 : total_blks_dirtied);
1174 112243 : appendStringInfo(&buf,
1175 112243 : _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1176 : walusage.wal_records,
1177 : walusage.wal_fpi,
1178 : walusage.wal_bytes,
1179 : walusage.wal_fpi_bytes,
1180 : walusage.wal_buffers_full);
1181 :
1182 : /*
1183 : * Report the dead items memory usage.
1184 : *
1185 : * The num_dead_items_resets counter increases when we reset the
1186 : * collected dead items, so the counter is non-zero if at least
1187 : * one dead items are collected, even if index vacuuming is
1188 : * disabled.
1189 : */
1190 112243 : appendStringInfo(&buf,
1191 112243 : ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1192 : "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1193 112243 : vacrel->num_dead_items_resets),
1194 112243 : (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1195 : vacrel->num_dead_items_resets,
1196 112243 : (double) dead_items_max_bytes / (1024 * 1024));
1197 112243 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1198 :
1199 112243 : ereport(verbose ? INFO : LOG,
1200 : (errmsg_internal("%s", buf.data)));
1201 112243 : pfree(buf.data);
1202 : }
1203 : }
1204 :
1205 : /* Cleanup index statistics and index names */
1206 315179 : for (int i = 0; i < vacrel->nindexes; i++)
1207 : {
1208 189004 : if (vacrel->indstats[i])
1209 1331 : pfree(vacrel->indstats[i]);
1210 :
1211 189004 : if (instrument)
1212 169093 : pfree(indnames[i]);
1213 : }
1214 126175 : }
1215 :
1216 : /*
1217 : * lazy_scan_heap() -- workhorse function for VACUUM
1218 : *
1219 : * This routine prunes each page in the heap, and considers the need to
1220 : * freeze remaining tuples with storage (not including pages that can be
1221 : * skipped using the visibility map). Also performs related maintenance
1222 : * of the FSM and visibility map. These steps all take place during an
1223 : * initial pass over the target heap relation.
1224 : *
1225 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
1226 : * consists of deleting index tuples that point to LP_DEAD items left in
1227 : * heap pages following pruning. Earlier initial pass over the heap will
1228 : * have collected the TIDs whose index tuples need to be removed.
1229 : *
1230 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
1231 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
1232 : * as LP_UNUSED. This has to happen in a second, final pass over the
1233 : * heap, to preserve a basic invariant that all index AMs rely on: no
1234 : * extant index tuple can ever be allowed to contain a TID that points to
1235 : * an LP_UNUSED line pointer in the heap. We must disallow premature
1236 : * recycling of line pointers to avoid index scans that get confused
1237 : * about which TID points to which tuple immediately after recycling.
1238 : * (Actually, this isn't a concern when target heap relation happens to
1239 : * have no indexes, which allows us to safely apply the one-pass strategy
1240 : * as an optimization).
1241 : *
1242 : * In practice we often have enough space to fit all TIDs, and so won't
1243 : * need to call lazy_vacuum more than once, after our initial pass over
1244 : * the heap has totally finished. Otherwise things are slightly more
1245 : * complicated: our "initial pass" over the heap applies only to those
1246 : * pages that were pruned before we needed to call lazy_vacuum, and our
1247 : * "final pass" over the heap only vacuums these same heap pages.
1248 : * However, we process indexes in full every time lazy_vacuum is called,
1249 : * which makes index processing very inefficient when memory is in short
1250 : * supply.
1251 : */
1252 : static void
1253 126175 : lazy_scan_heap(LVRelState *vacrel)
1254 : {
1255 : ReadStream *stream;
1256 126175 : BlockNumber rel_pages = vacrel->rel_pages,
1257 126175 : blkno = 0,
1258 126175 : next_fsm_block_to_vacuum = 0;
1259 126175 : BlockNumber orig_eager_scan_success_limit =
1260 : vacrel->eager_scan_remaining_successes; /* for logging */
1261 126175 : Buffer vmbuffer = InvalidBuffer;
1262 126175 : const int initprog_index[] = {
1263 : PROGRESS_VACUUM_PHASE,
1264 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
1265 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
1266 : };
1267 : int64 initprog_val[3];
1268 :
1269 : /* Report that we're scanning the heap, advertising total # of blocks */
1270 126175 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
1271 126175 : initprog_val[1] = rel_pages;
1272 126175 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
1273 126175 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
1274 :
1275 : /* Initialize for the first heap_vac_scan_next_block() call */
1276 126175 : vacrel->current_block = InvalidBlockNumber;
1277 126175 : vacrel->next_unskippable_block = InvalidBlockNumber;
1278 126175 : vacrel->next_unskippable_eager_scanned = false;
1279 126175 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1280 :
1281 : /*
1282 : * Set up the read stream for vacuum's first pass through the heap.
1283 : *
1284 : * This could be made safe for READ_STREAM_USE_BATCHING, but only with
1285 : * explicit work in heap_vac_scan_next_block.
1286 : */
1287 126175 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
1288 : vacrel->bstrategy,
1289 : vacrel->rel,
1290 : MAIN_FORKNUM,
1291 : heap_vac_scan_next_block,
1292 : vacrel,
1293 : sizeof(bool));
1294 :
1295 : while (true)
1296 540744 : {
1297 : Buffer buf;
1298 : Page page;
1299 666919 : bool was_eager_scanned = false;
1300 666919 : int ndeleted = 0;
1301 : bool has_lpdead_items;
1302 666919 : void *per_buffer_data = NULL;
1303 666919 : bool vm_page_frozen = false;
1304 666919 : bool got_cleanup_lock = false;
1305 :
1306 666919 : vacuum_delay_point(false);
1307 :
1308 : /*
1309 : * Regularly check if wraparound failsafe should trigger.
1310 : *
1311 : * There is a similar check inside lazy_vacuum_all_indexes(), but
1312 : * relfrozenxid might start to look dangerously old before we reach
1313 : * that point. This check also provides failsafe coverage for the
1314 : * one-pass strategy, and the two-pass strategy with the index_cleanup
1315 : * param set to 'off'.
1316 : */
1317 666919 : if (vacrel->scanned_pages > 0 &&
1318 540744 : vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
1319 0 : lazy_check_wraparound_failsafe(vacrel);
1320 :
1321 : /*
1322 : * Consider if we definitely have enough space to process TIDs on page
1323 : * already. If we are close to overrunning the available space for
1324 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
1325 : * this page. However, let's force at least one page-worth of tuples
1326 : * to be stored as to ensure we do at least some work when the memory
1327 : * configured is so low that we run out before storing anything.
1328 : */
1329 666919 : if (vacrel->dead_items_info->num_items > 0 &&
1330 24141 : TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
1331 : {
1332 : /*
1333 : * Before beginning index vacuuming, we release any pin we may
1334 : * hold on the visibility map page. This isn't necessary for
1335 : * correctness, but we do it anyway to avoid holding the pin
1336 : * across a lengthy, unrelated operation.
1337 : */
1338 2 : if (BufferIsValid(vmbuffer))
1339 : {
1340 2 : ReleaseBuffer(vmbuffer);
1341 2 : vmbuffer = InvalidBuffer;
1342 : }
1343 :
1344 : /* Perform a round of index and heap vacuuming */
1345 2 : vacrel->consider_bypass_optimization = false;
1346 2 : lazy_vacuum(vacrel);
1347 :
1348 : /*
1349 : * Vacuum the Free Space Map to make newly-freed space visible on
1350 : * upper-level FSM pages. Note that blkno is the previously
1351 : * processed block.
1352 : */
1353 2 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1354 : blkno + 1);
1355 2 : next_fsm_block_to_vacuum = blkno;
1356 :
1357 : /* Report that we are once again scanning the heap */
1358 2 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
1359 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
1360 : }
1361 :
1362 666919 : buf = read_stream_next_buffer(stream, &per_buffer_data);
1363 :
1364 : /* The relation is exhausted. */
1365 666919 : if (!BufferIsValid(buf))
1366 126175 : break;
1367 :
1368 540744 : was_eager_scanned = *((bool *) per_buffer_data);
1369 540744 : CheckBufferIsPinnedOnce(buf);
1370 540744 : page = BufferGetPage(buf);
1371 540744 : blkno = BufferGetBlockNumber(buf);
1372 :
1373 540744 : vacrel->scanned_pages++;
1374 540744 : if (was_eager_scanned)
1375 0 : vacrel->eager_scanned_pages++;
1376 :
1377 : /* Report as block scanned, update error traceback information */
1378 540744 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1379 540744 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
1380 : blkno, InvalidOffsetNumber);
1381 :
1382 : /*
1383 : * Pin the visibility map page in case we need to mark the page
1384 : * all-visible. In most cases this will be very cheap, because we'll
1385 : * already have the correct page pinned anyway.
1386 : */
1387 540744 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1388 :
1389 : /*
1390 : * We need a buffer cleanup lock to prune HOT chains and defragment
1391 : * the page in lazy_scan_prune. But when it's not possible to acquire
1392 : * a cleanup lock right away, we may be able to settle for reduced
1393 : * processing using lazy_scan_noprune.
1394 : */
1395 540744 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1396 :
1397 540744 : if (!got_cleanup_lock)
1398 130 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1399 :
1400 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1401 540744 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1402 540744 : vmbuffer))
1403 : {
1404 : /* Processed as new/empty page (lock and pin released) */
1405 612 : continue;
1406 : }
1407 :
1408 : /*
1409 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1410 : * items in the dead_items area for later vacuuming, count live and
1411 : * recently dead tuples for vacuum logging, and determine if this
1412 : * block could later be truncated. If we encounter any xid/mxids that
1413 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1414 : * for a cleanup lock and call lazy_scan_prune().
1415 : */
1416 540132 : if (!got_cleanup_lock &&
1417 130 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1418 : {
1419 : /*
1420 : * lazy_scan_noprune could not do all required processing. Wait
1421 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1422 : */
1423 : Assert(vacrel->aggressive);
1424 62 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1425 62 : LockBufferForCleanup(buf);
1426 62 : got_cleanup_lock = true;
1427 : }
1428 :
1429 : /*
1430 : * If we have a cleanup lock, we must now prune, freeze, and count
1431 : * tuples. We may have acquired the cleanup lock originally, or we may
1432 : * have gone back and acquired it after lazy_scan_noprune() returned
1433 : * false. Either way, the page hasn't been processed yet.
1434 : *
1435 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1436 : * recently_dead_tuples and live tuples for vacuum logging, determine
1437 : * if the block can later be truncated, and accumulate the details of
1438 : * remaining LP_DEAD line pointers on the page into dead_items. These
1439 : * dead items include those pruned by lazy_scan_prune() as well as
1440 : * line pointers previously marked LP_DEAD.
1441 : */
1442 540132 : if (got_cleanup_lock)
1443 540064 : ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
1444 : vmbuffer,
1445 : &has_lpdead_items, &vm_page_frozen);
1446 :
1447 : /*
1448 : * Count an eagerly scanned page as a failure or a success.
1449 : *
1450 : * Only lazy_scan_prune() freezes pages, so if we didn't get the
1451 : * cleanup lock, we won't have frozen the page. However, we only count
1452 : * pages that were too new to require freezing as eager freeze
1453 : * failures.
1454 : *
1455 : * We could gather more information from lazy_scan_noprune() about
1456 : * whether or not there were tuples with XIDs or MXIDs older than the
1457 : * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
1458 : * exclude pages skipped due to cleanup lock contention from eager
1459 : * freeze algorithm caps.
1460 : */
1461 540132 : if (got_cleanup_lock && was_eager_scanned)
1462 : {
1463 : /* Aggressive vacuums do not eager scan. */
1464 : Assert(!vacrel->aggressive);
1465 :
1466 0 : if (vm_page_frozen)
1467 : {
1468 0 : if (vacrel->eager_scan_remaining_successes > 0)
1469 0 : vacrel->eager_scan_remaining_successes--;
1470 :
1471 0 : if (vacrel->eager_scan_remaining_successes == 0)
1472 : {
1473 : /*
1474 : * Report only once that we disabled eager scanning. We
1475 : * may eagerly read ahead blocks in excess of the success
1476 : * or failure caps before attempting to freeze them, so we
1477 : * could reach here even after disabling additional eager
1478 : * scanning.
1479 : */
1480 0 : if (vacrel->eager_scan_max_fails_per_region > 0)
1481 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
1482 : (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
1483 : orig_eager_scan_success_limit,
1484 : vacrel->dbname, vacrel->relnamespace,
1485 : vacrel->relname)));
1486 :
1487 : /*
1488 : * If we hit our success cap, permanently disable eager
1489 : * scanning by setting the other eager scan management
1490 : * fields to their disabled values.
1491 : */
1492 0 : vacrel->eager_scan_remaining_fails = 0;
1493 0 : vacrel->next_eager_scan_region_start = InvalidBlockNumber;
1494 0 : vacrel->eager_scan_max_fails_per_region = 0;
1495 : }
1496 : }
1497 0 : else if (vacrel->eager_scan_remaining_fails > 0)
1498 0 : vacrel->eager_scan_remaining_fails--;
1499 : }
1500 :
1501 : /*
1502 : * Now drop the buffer lock and, potentially, update the FSM.
1503 : *
1504 : * Our goal is to update the freespace map the last time we touch the
1505 : * page. If we'll process a block in the second pass, we may free up
1506 : * additional space on the page, so it is better to update the FSM
1507 : * after the second pass. If the relation has no indexes, or if index
1508 : * vacuuming is disabled, there will be no second heap pass; if this
1509 : * particular page has no dead items, the second heap pass will not
1510 : * touch this page. So, in those cases, update the FSM now.
1511 : *
1512 : * Note: In corner cases, it's possible to miss updating the FSM
1513 : * entirely. If index vacuuming is currently enabled, we'll skip the
1514 : * FSM update now. But if failsafe mode is later activated, or there
1515 : * are so few dead tuples that index vacuuming is bypassed, there will
1516 : * also be no opportunity to update the FSM later, because we'll never
1517 : * revisit this page. Since updating the FSM is desirable but not
1518 : * absolutely required, that's OK.
1519 : */
1520 540132 : if (vacrel->nindexes == 0
1521 522088 : || !vacrel->do_index_vacuuming
1522 444864 : || !has_lpdead_items)
1523 528067 : {
1524 528067 : Size freespace = PageGetHeapFreeSpace(page);
1525 :
1526 528067 : UnlockReleaseBuffer(buf);
1527 528067 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1528 :
1529 : /*
1530 : * Periodically perform FSM vacuuming to make newly-freed space
1531 : * visible on upper FSM pages. This is done after vacuuming if the
1532 : * table has indexes. There will only be newly-freed space if we
1533 : * held the cleanup lock and lazy_scan_prune() was called.
1534 : */
1535 528067 : if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
1536 369 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1537 : {
1538 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1539 : blkno);
1540 0 : next_fsm_block_to_vacuum = blkno;
1541 : }
1542 : }
1543 : else
1544 12065 : UnlockReleaseBuffer(buf);
1545 : }
1546 :
1547 126175 : vacrel->blkno = InvalidBlockNumber;
1548 126175 : if (BufferIsValid(vmbuffer))
1549 50536 : ReleaseBuffer(vmbuffer);
1550 :
1551 : /*
1552 : * Report that everything is now scanned. We never skip scanning the last
1553 : * block in the relation, so we can pass rel_pages here.
1554 : */
1555 126175 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
1556 : rel_pages);
1557 :
1558 : /* now we can compute the new value for pg_class.reltuples */
1559 252350 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1560 : vacrel->scanned_pages,
1561 126175 : vacrel->live_tuples);
1562 :
1563 : /*
1564 : * Also compute the total number of surviving heap entries. In the
1565 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1566 : */
1567 126175 : vacrel->new_rel_tuples =
1568 126175 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1569 126175 : vacrel->missed_dead_tuples;
1570 :
1571 126175 : read_stream_end(stream);
1572 :
1573 : /*
1574 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1575 : * related heap vacuuming
1576 : */
1577 126175 : if (vacrel->dead_items_info->num_items > 0)
1578 639 : lazy_vacuum(vacrel);
1579 :
1580 : /*
1581 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1582 : * not there were indexes, and whether or not we bypassed index vacuuming.
1583 : * We can pass rel_pages here because we never skip scanning the last
1584 : * block of the relation.
1585 : */
1586 126175 : if (rel_pages > next_fsm_block_to_vacuum)
1587 50536 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
1588 :
1589 : /* report all blocks vacuumed */
1590 126175 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
1591 :
1592 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1593 126175 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1594 99420 : lazy_cleanup_all_indexes(vacrel);
1595 126175 : }
1596 :
1597 : /*
1598 : * heap_vac_scan_next_block() -- read stream callback to get the next block
1599 : * for vacuum to process
1600 : *
1601 : * Every time lazy_scan_heap() needs a new block to process during its first
1602 : * phase, it invokes read_stream_next_buffer() with a stream set up to call
1603 : * heap_vac_scan_next_block() to get the next block.
1604 : *
1605 : * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
1606 : * various thresholds to skip blocks which do not need to be processed and
1607 : * returns the next block to process or InvalidBlockNumber if there are no
1608 : * remaining blocks.
1609 : *
1610 : * The visibility status of the next block to process and whether or not it
1611 : * was eager scanned is set in the per_buffer_data.
1612 : *
1613 : * callback_private_data contains a reference to the LVRelState, passed to the
1614 : * read stream API during stream setup. The LVRelState is an in/out parameter
1615 : * here (locally named `vacrel`). Vacuum options and information about the
1616 : * relation are read from it. vacrel->skippedallvis is set if we skip a block
1617 : * that's all-visible but not all-frozen (to ensure that we don't update
1618 : * relfrozenxid in that case). vacrel also holds information about the next
1619 : * unskippable block -- as bookkeeping for this function.
1620 : */
1621 : static BlockNumber
1622 666919 : heap_vac_scan_next_block(ReadStream *stream,
1623 : void *callback_private_data,
1624 : void *per_buffer_data)
1625 : {
1626 : BlockNumber next_block;
1627 666919 : LVRelState *vacrel = callback_private_data;
1628 :
1629 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1630 666919 : next_block = vacrel->current_block + 1;
1631 :
1632 : /* Have we reached the end of the relation? */
1633 666919 : if (next_block >= vacrel->rel_pages)
1634 : {
1635 126175 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1636 : {
1637 49062 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1638 49062 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1639 : }
1640 126175 : return InvalidBlockNumber;
1641 : }
1642 :
1643 : /*
1644 : * We must be in one of the three following states:
1645 : */
1646 540744 : if (next_block > vacrel->next_unskippable_block ||
1647 205410 : vacrel->next_unskippable_block == InvalidBlockNumber)
1648 : {
1649 : /*
1650 : * 1. We have just processed an unskippable block (or we're at the
1651 : * beginning of the scan). Find the next unskippable block using the
1652 : * visibility map.
1653 : */
1654 : bool skipsallvis;
1655 :
1656 385870 : find_next_unskippable_block(vacrel, &skipsallvis);
1657 :
1658 : /*
1659 : * We now know the next block that we must process. It can be the
1660 : * next block after the one we just processed, or something further
1661 : * ahead. If it's further ahead, we can jump to it, but we choose to
1662 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1663 : * pages. Since we're reading sequentially, the OS should be doing
1664 : * readahead for us, so there's no gain in skipping a page now and
1665 : * then. Skipping such a range might even discourage sequential
1666 : * detection.
1667 : *
1668 : * This test also enables more frequent relfrozenxid advancement
1669 : * during non-aggressive VACUUMs. If the range has any all-visible
1670 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1671 : * real downside.
1672 : */
1673 385870 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1674 : {
1675 4523 : next_block = vacrel->next_unskippable_block;
1676 4523 : if (skipsallvis)
1677 31 : vacrel->skippedallvis = true;
1678 : }
1679 : }
1680 :
1681 : /* Now we must be in one of the two remaining states: */
1682 540744 : if (next_block < vacrel->next_unskippable_block)
1683 : {
1684 : /*
1685 : * 2. We are processing a range of blocks that we could have skipped
1686 : * but chose not to. We know that they are all-visible in the VM,
1687 : * otherwise they would've been unskippable.
1688 : */
1689 154874 : vacrel->current_block = next_block;
1690 : /* Block was not eager scanned */
1691 154874 : *((bool *) per_buffer_data) = false;
1692 154874 : return vacrel->current_block;
1693 : }
1694 : else
1695 : {
1696 : /*
1697 : * 3. We reached the next unskippable block. Process it. On next
1698 : * iteration, we will be back in state 1.
1699 : */
1700 : Assert(next_block == vacrel->next_unskippable_block);
1701 :
1702 385870 : vacrel->current_block = next_block;
1703 385870 : *((bool *) per_buffer_data) = vacrel->next_unskippable_eager_scanned;
1704 385870 : return vacrel->current_block;
1705 : }
1706 : }
1707 :
1708 : /*
1709 : * Find the next unskippable block in a vacuum scan using the visibility map.
1710 : * The next unskippable block and its visibility information is updated in
1711 : * vacrel.
1712 : *
1713 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1714 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1715 : * was concurrently cleared, though. All that matters is that caller scan all
1716 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1717 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1718 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1719 : * to skip such a range is actually made, making everything safe.)
1720 : */
1721 : static void
1722 385870 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1723 : {
1724 385870 : BlockNumber rel_pages = vacrel->rel_pages;
1725 385870 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1726 385870 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1727 385870 : bool next_unskippable_eager_scanned = false;
1728 :
1729 385870 : *skipsallvis = false;
1730 :
1731 473320 : for (;; next_unskippable_block++)
1732 473320 : {
1733 859190 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1734 : next_unskippable_block,
1735 : &next_unskippable_vmbuffer);
1736 :
1737 :
1738 : /*
1739 : * At the start of each eager scan region, normal vacuums with eager
1740 : * scanning enabled reset the failure counter, allowing vacuum to
1741 : * resume eager scanning if it had been suspended in the previous
1742 : * region.
1743 : */
1744 859190 : if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
1745 : {
1746 0 : vacrel->eager_scan_remaining_fails =
1747 0 : vacrel->eager_scan_max_fails_per_region;
1748 0 : vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
1749 : }
1750 :
1751 : /*
1752 : * A block is unskippable if it is not all visible according to the
1753 : * visibility map.
1754 : */
1755 859190 : if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1756 : {
1757 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1758 339678 : break;
1759 : }
1760 :
1761 : /*
1762 : * Caller must scan the last page to determine whether it has tuples
1763 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1764 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1765 : * lock on rel to attempt a truncation that fails anyway, just because
1766 : * there are tuples on the last page (it is likely that there will be
1767 : * tuples on other nearby pages as well, but those can be skipped).
1768 : *
1769 : * Implement this by always treating the last block as unsafe to skip.
1770 : */
1771 519512 : if (next_unskippable_block == rel_pages - 1)
1772 45774 : break;
1773 :
1774 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1775 473738 : if (!vacrel->skipwithvm)
1776 397 : break;
1777 :
1778 : /*
1779 : * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
1780 : * already frozen by now), so this page can be skipped.
1781 : */
1782 473341 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
1783 470295 : continue;
1784 :
1785 : /*
1786 : * Aggressive vacuums cannot skip any all-visible pages that are not
1787 : * also all-frozen.
1788 : */
1789 3046 : if (vacrel->aggressive)
1790 21 : break;
1791 :
1792 : /*
1793 : * Normal vacuums with eager scanning enabled only skip all-visible
1794 : * but not all-frozen pages if they have hit the failure limit for the
1795 : * current eager scan region.
1796 : */
1797 3025 : if (vacrel->eager_scan_remaining_fails > 0)
1798 : {
1799 0 : next_unskippable_eager_scanned = true;
1800 0 : break;
1801 : }
1802 :
1803 : /*
1804 : * All-visible blocks are safe to skip in a normal vacuum. But
1805 : * remember that the final range contains such a block for later.
1806 : */
1807 3025 : *skipsallvis = true;
1808 : }
1809 :
1810 : /* write the local variables back to vacrel */
1811 385870 : vacrel->next_unskippable_block = next_unskippable_block;
1812 385870 : vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
1813 385870 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1814 385870 : }
1815 :
1816 : /*
1817 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1818 : *
1819 : * Must call here to handle both new and empty pages before calling
1820 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1821 : * with new or empty pages.
1822 : *
1823 : * It's necessary to consider new pages as a special case, since the rules for
1824 : * maintaining the visibility map and FSM with empty pages are a little
1825 : * different (though new pages can be truncated away during rel truncation).
1826 : *
1827 : * Empty pages are not really a special case -- they're just heap pages that
1828 : * have no allocated tuples (including even LP_UNUSED items). You might
1829 : * wonder why we need to handle them here all the same. It's only necessary
1830 : * because of a corner-case involving a hard crash during heap relation
1831 : * extension. If we ever make relation-extension crash safe, then it should
1832 : * no longer be necessary to deal with empty pages here (or new pages, for
1833 : * that matter).
1834 : *
1835 : * Caller must hold at least a shared lock. We might need to escalate the
1836 : * lock in that case, so the type of lock caller holds needs to be specified
1837 : * using 'sharelock' argument.
1838 : *
1839 : * Returns false in common case where caller should go on to call
1840 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1841 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1842 : * behalf.
1843 : *
1844 : * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
1845 : * is passed here because neither empty nor new pages can be eagerly frozen.
1846 : * New pages are never frozen. Empty pages are always set frozen in the VM at
1847 : * the same time that they are set all-visible, and we don't eagerly scan
1848 : * frozen pages.
1849 : */
1850 : static bool
1851 540744 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1852 : Page page, bool sharelock, Buffer vmbuffer)
1853 : {
1854 : Size freespace;
1855 :
1856 540744 : if (PageIsNew(page))
1857 : {
1858 : /*
1859 : * All-zeroes pages can be left over if either a backend extends the
1860 : * relation by a single page, but crashes before the newly initialized
1861 : * page has been written out, or when bulk-extending the relation
1862 : * (which creates a number of empty pages at the tail end of the
1863 : * relation), and then enters them into the FSM.
1864 : *
1865 : * Note we do not enter the page into the visibilitymap. That has the
1866 : * downside that we repeatedly visit this page in subsequent vacuums,
1867 : * but otherwise we'll never discover the space on a promoted standby.
1868 : * The harm of repeated checking ought to normally not be too bad. The
1869 : * space usually should be used at some point, otherwise there
1870 : * wouldn't be any regular vacuums.
1871 : *
1872 : * Make sure these pages are in the FSM, to ensure they can be reused.
1873 : * Do that by testing if there's any space recorded for the page. If
1874 : * not, enter it. We do so after releasing the lock on the heap page,
1875 : * the FSM is approximate, after all.
1876 : */
1877 585 : UnlockReleaseBuffer(buf);
1878 :
1879 585 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1880 : {
1881 429 : freespace = BLCKSZ - SizeOfPageHeaderData;
1882 :
1883 429 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1884 : }
1885 :
1886 585 : return true;
1887 : }
1888 :
1889 540159 : if (PageIsEmpty(page))
1890 : {
1891 : /*
1892 : * It seems likely that caller will always be able to get a cleanup
1893 : * lock on an empty page. But don't take any chances -- escalate to
1894 : * an exclusive lock (still don't need a cleanup lock, though).
1895 : */
1896 27 : if (sharelock)
1897 : {
1898 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1899 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1900 :
1901 0 : if (!PageIsEmpty(page))
1902 : {
1903 : /* page isn't new or empty -- keep lock and pin for now */
1904 0 : return false;
1905 : }
1906 : }
1907 : else
1908 : {
1909 : /* Already have a full cleanup lock (which is more than enough) */
1910 : }
1911 :
1912 : /*
1913 : * Unlike new pages, empty pages are always set all-visible and
1914 : * all-frozen.
1915 : */
1916 27 : if (!PageIsAllVisible(page))
1917 : {
1918 0 : START_CRIT_SECTION();
1919 :
1920 : /* mark buffer dirty before writing a WAL record */
1921 0 : MarkBufferDirty(buf);
1922 :
1923 : /*
1924 : * It's possible that another backend has extended the heap,
1925 : * initialized the page, and then failed to WAL-log the page due
1926 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1927 : * might try to replay our record setting the page all-visible and
1928 : * find that the page isn't initialized, which will cause a PANIC.
1929 : * To prevent that, check whether the page has been previously
1930 : * WAL-logged, and if not, do that now.
1931 : */
1932 0 : if (RelationNeedsWAL(vacrel->rel) &&
1933 0 : !XLogRecPtrIsValid(PageGetLSN(page)))
1934 0 : log_newpage_buffer(buf, true);
1935 :
1936 0 : PageSetAllVisible(page);
1937 0 : PageClearPrunable(page);
1938 0 : visibilitymap_set(vacrel->rel, blkno, buf,
1939 : InvalidXLogRecPtr,
1940 : vmbuffer, InvalidTransactionId,
1941 : VISIBILITYMAP_ALL_VISIBLE |
1942 : VISIBILITYMAP_ALL_FROZEN);
1943 0 : END_CRIT_SECTION();
1944 :
1945 : /* Count the newly all-frozen pages for logging */
1946 0 : vacrel->new_all_visible_pages++;
1947 0 : vacrel->new_all_visible_all_frozen_pages++;
1948 : }
1949 :
1950 27 : freespace = PageGetHeapFreeSpace(page);
1951 27 : UnlockReleaseBuffer(buf);
1952 27 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1953 27 : return true;
1954 : }
1955 :
1956 : /* page isn't new or empty -- keep lock and pin */
1957 540132 : return false;
1958 : }
1959 :
1960 : /* qsort comparator for sorting OffsetNumbers */
1961 : static int
1962 3006622 : cmpOffsetNumbers(const void *a, const void *b)
1963 : {
1964 3006622 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1965 : }
1966 :
1967 : /*
1968 : * Helper to correct any corruption detected on a heap page and its
1969 : * corresponding visibility map page after pruning but before setting the
1970 : * visibility map. It examines the heap page, the associated VM page, and the
1971 : * number of dead items previously identified.
1972 : *
1973 : * This function must be called while holding an exclusive lock on the heap
1974 : * buffer, and the dead items must have been discovered under that same lock.
1975 :
1976 : * The provided vmbits must reflect the current state of the VM block
1977 : * referenced by vmbuffer. Although we do not hold a lock on the VM buffer, it
1978 : * is pinned, and the heap buffer is exclusively locked, ensuring that no
1979 : * other backend can update the VM bits corresponding to this heap page.
1980 : *
1981 : * If it clears corruption, it will zero out vmbits.
1982 : */
1983 : static void
1984 540064 : identify_and_fix_vm_corruption(Relation rel, Buffer heap_buffer,
1985 : BlockNumber heap_blk, Page heap_page,
1986 : int nlpdead_items,
1987 : Buffer vmbuffer,
1988 : uint8 *vmbits)
1989 : {
1990 : Assert(visibilitymap_get_status(rel, heap_blk, &vmbuffer) == *vmbits);
1991 :
1992 : Assert(BufferIsLockedByMeInMode(heap_buffer, BUFFER_LOCK_EXCLUSIVE));
1993 :
1994 : /*
1995 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1996 : * page-level bit is clear. However, it's possible that the bit got
1997 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1998 : * with buffer lock before concluding that the VM is corrupt.
1999 : */
2000 540064 : if (!PageIsAllVisible(heap_page) &&
2001 339084 : ((*vmbits & VISIBILITYMAP_VALID_BITS) != 0))
2002 : {
2003 0 : ereport(WARNING,
2004 : (errcode(ERRCODE_DATA_CORRUPTED),
2005 : errmsg("page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
2006 : RelationGetRelationName(rel), heap_blk)));
2007 :
2008 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2009 : VISIBILITYMAP_VALID_BITS);
2010 0 : *vmbits = 0;
2011 : }
2012 :
2013 : /*
2014 : * It's possible for the value returned by
2015 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
2016 : * wrong for us to see tuples that appear to not be visible to everyone
2017 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
2018 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
2019 : * conservative and sometimes returns a value that's unnecessarily small,
2020 : * so if we see that contradiction it just means that the tuples that we
2021 : * think are not visible to everyone yet actually are, and the
2022 : * PD_ALL_VISIBLE flag is correct.
2023 : *
2024 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
2025 : * however.
2026 : */
2027 540064 : else if (PageIsAllVisible(heap_page) && nlpdead_items > 0)
2028 : {
2029 0 : ereport(WARNING,
2030 : (errcode(ERRCODE_DATA_CORRUPTED),
2031 : errmsg("page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
2032 : RelationGetRelationName(rel), heap_blk)));
2033 :
2034 0 : PageClearAllVisible(heap_page);
2035 0 : MarkBufferDirty(heap_buffer);
2036 0 : visibilitymap_clear(rel, heap_blk, vmbuffer,
2037 : VISIBILITYMAP_VALID_BITS);
2038 0 : *vmbits = 0;
2039 : }
2040 540064 : }
2041 :
2042 : /*
2043 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
2044 : *
2045 : * Caller must hold pin and buffer cleanup lock on the buffer.
2046 : *
2047 : * vmbuffer is the buffer containing the VM block with visibility information
2048 : * for the heap block, blkno.
2049 : *
2050 : * *has_lpdead_items is set to true or false depending on whether, upon return
2051 : * from this function, any LP_DEAD items are still present on the page.
2052 : *
2053 : * *vm_page_frozen is set to true if the page is newly set all-frozen in the
2054 : * VM. The caller currently only uses this for determining whether an eagerly
2055 : * scanned page was successfully set all-frozen.
2056 : *
2057 : * Returns the number of tuples deleted from the page during HOT pruning.
2058 : */
2059 : static int
2060 540064 : lazy_scan_prune(LVRelState *vacrel,
2061 : Buffer buf,
2062 : BlockNumber blkno,
2063 : Page page,
2064 : Buffer vmbuffer,
2065 : bool *has_lpdead_items,
2066 : bool *vm_page_frozen)
2067 : {
2068 540064 : Relation rel = vacrel->rel;
2069 : PruneFreezeResult presult;
2070 540064 : PruneFreezeParams params = {
2071 : .relation = rel,
2072 : .buffer = buf,
2073 : .reason = PRUNE_VACUUM_SCAN,
2074 : .options = HEAP_PAGE_PRUNE_FREEZE,
2075 540064 : .vistest = vacrel->vistest,
2076 540064 : .cutoffs = &vacrel->cutoffs,
2077 : };
2078 540064 : uint8 old_vmbits = 0;
2079 540064 : uint8 new_vmbits = 0;
2080 :
2081 : Assert(BufferGetBlockNumber(buf) == blkno);
2082 :
2083 : /*
2084 : * Prune all HOT-update chains and potentially freeze tuples on this page.
2085 : *
2086 : * If the relation has no indexes, we can immediately mark would-be dead
2087 : * items LP_UNUSED.
2088 : *
2089 : * The number of tuples removed from the page is returned in
2090 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
2091 : * presult.lpdead_items's final value can be thought of as the number of
2092 : * tuples that were deleted from indexes.
2093 : *
2094 : * We will update the VM after collecting LP_DEAD items and freezing
2095 : * tuples. Pruning will have determined whether or not the page is
2096 : * all-visible.
2097 : */
2098 540064 : if (vacrel->nindexes == 0)
2099 18044 : params.options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
2100 :
2101 540064 : heap_page_prune_and_freeze(¶ms,
2102 : &presult,
2103 : &vacrel->offnum,
2104 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
2105 :
2106 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
2107 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
2108 :
2109 540064 : if (presult.nfrozen > 0)
2110 : {
2111 : /*
2112 : * We don't increment the new_frozen_tuple_pages instrumentation
2113 : * counter when nfrozen == 0, since it only counts pages with newly
2114 : * frozen tuples (don't confuse that with pages newly set all-frozen
2115 : * in VM).
2116 : */
2117 21624 : vacrel->new_frozen_tuple_pages++;
2118 : }
2119 :
2120 : /*
2121 : * VACUUM will call heap_page_is_all_visible() during the second pass over
2122 : * the heap to determine all_visible and all_frozen for the page -- this
2123 : * is a specialized version of the logic from this function. Now that
2124 : * we've finished pruning and freezing, make sure that we're in total
2125 : * agreement with heap_page_is_all_visible() using an assertion.
2126 : */
2127 : #ifdef USE_ASSERT_CHECKING
2128 : if (presult.set_all_visible)
2129 : {
2130 : TransactionId debug_cutoff;
2131 : bool debug_all_frozen;
2132 :
2133 : Assert(presult.lpdead_items == 0);
2134 :
2135 : Assert(heap_page_is_all_visible(vacrel->rel, buf,
2136 : vacrel->cutoffs.OldestXmin, &debug_all_frozen,
2137 : &debug_cutoff, &vacrel->offnum));
2138 :
2139 : Assert(presult.set_all_frozen == debug_all_frozen);
2140 :
2141 : Assert(!TransactionIdIsValid(debug_cutoff) ||
2142 : debug_cutoff == presult.vm_conflict_horizon);
2143 : }
2144 : #endif
2145 :
2146 : /*
2147 : * Now save details of the LP_DEAD items from the page in vacrel
2148 : */
2149 540064 : if (presult.lpdead_items > 0)
2150 : {
2151 14367 : vacrel->lpdead_item_pages++;
2152 :
2153 : /*
2154 : * deadoffsets are collected incrementally in
2155 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
2156 : * with an indeterminate order, but dead_items_add requires them to be
2157 : * sorted.
2158 : */
2159 14367 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
2160 : cmpOffsetNumbers);
2161 :
2162 14367 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
2163 : }
2164 :
2165 : /* Finally, add page-local counts to whole-VACUUM counts */
2166 540064 : vacrel->tuples_deleted += presult.ndeleted;
2167 540064 : vacrel->tuples_frozen += presult.nfrozen;
2168 540064 : vacrel->lpdead_items += presult.lpdead_items;
2169 540064 : vacrel->live_tuples += presult.live_tuples;
2170 540064 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
2171 :
2172 : /* Can't truncate this page */
2173 540064 : if (presult.hastup)
2174 532715 : vacrel->nonempty_pages = blkno + 1;
2175 :
2176 : /* Did we find LP_DEAD items? */
2177 540064 : *has_lpdead_items = (presult.lpdead_items > 0);
2178 :
2179 : Assert(!presult.set_all_visible || !(*has_lpdead_items));
2180 : Assert(!presult.set_all_frozen || presult.set_all_visible);
2181 :
2182 540064 : old_vmbits = visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer);
2183 :
2184 540064 : identify_and_fix_vm_corruption(vacrel->rel, buf, blkno, page,
2185 : presult.lpdead_items, vmbuffer,
2186 : &old_vmbits);
2187 :
2188 540064 : if (!presult.set_all_visible)
2189 302487 : return presult.ndeleted;
2190 :
2191 : /* Set the visibility map and page visibility hint */
2192 237577 : new_vmbits = VISIBILITYMAP_ALL_VISIBLE;
2193 :
2194 237577 : if (presult.set_all_frozen)
2195 227484 : new_vmbits |= VISIBILITYMAP_ALL_FROZEN;
2196 :
2197 : /* Nothing to do */
2198 237577 : if (old_vmbits == new_vmbits)
2199 200934 : return presult.ndeleted;
2200 :
2201 : /*
2202 : * It should never be the case that the visibility map page is set while
2203 : * the page-level bit is clear (and if so, we cleared it above), but the
2204 : * reverse is allowed (if checksums are not enabled). Regardless, set both
2205 : * bits so that we get back in sync.
2206 : *
2207 : * The heap buffer must be marked dirty before adding it to the WAL chain
2208 : * when setting the VM. We don't worry about unnecessarily dirtying the
2209 : * heap buffer if PD_ALL_VISIBLE is already set, though. It is extremely
2210 : * rare to have a clean heap buffer with PD_ALL_VISIBLE already set and
2211 : * the VM bits clear, so there is no point in optimizing it.
2212 : */
2213 36643 : PageSetAllVisible(page);
2214 36643 : PageClearPrunable(page);
2215 36643 : MarkBufferDirty(buf);
2216 :
2217 : /*
2218 : * If the page is being set all-frozen, we pass InvalidTransactionId as
2219 : * the cutoff_xid, since a snapshot conflict horizon sufficient to make
2220 : * everything safe for REDO was logged when the page's tuples were frozen.
2221 : */
2222 : Assert(!presult.set_all_frozen ||
2223 : !TransactionIdIsValid(presult.vm_conflict_horizon));
2224 :
2225 36643 : visibilitymap_set(vacrel->rel, blkno, buf,
2226 : InvalidXLogRecPtr,
2227 : vmbuffer, presult.vm_conflict_horizon,
2228 : new_vmbits);
2229 :
2230 : /*
2231 : * If the page wasn't already set all-visible and/or all-frozen in the VM,
2232 : * count it as newly set for logging.
2233 : */
2234 36643 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2235 : {
2236 36598 : vacrel->new_all_visible_pages++;
2237 36598 : if (presult.set_all_frozen)
2238 : {
2239 27136 : vacrel->new_all_visible_all_frozen_pages++;
2240 27136 : *vm_page_frozen = true;
2241 : }
2242 : }
2243 45 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2244 45 : presult.set_all_frozen)
2245 : {
2246 45 : vacrel->new_all_frozen_pages++;
2247 45 : *vm_page_frozen = true;
2248 : }
2249 :
2250 36643 : return presult.ndeleted;
2251 : }
2252 :
2253 : /*
2254 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
2255 : *
2256 : * Caller need only hold a pin and share lock on the buffer, unlike
2257 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
2258 : * performed here, it's quite possible that an earlier opportunistic pruning
2259 : * operation left LP_DEAD items behind. We'll at least collect any such items
2260 : * in dead_items for removal from indexes.
2261 : *
2262 : * For aggressive VACUUM callers, we may return false to indicate that a full
2263 : * cleanup lock is required for processing by lazy_scan_prune. This is only
2264 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
2265 : * one or more tuples on the page. We always return true for non-aggressive
2266 : * callers.
2267 : *
2268 : * If this function returns true, *has_lpdead_items gets set to true or false
2269 : * depending on whether, upon return from this function, any LP_DEAD items are
2270 : * present on the page. If this function returns false, *has_lpdead_items
2271 : * is not updated.
2272 : */
2273 : static bool
2274 130 : lazy_scan_noprune(LVRelState *vacrel,
2275 : Buffer buf,
2276 : BlockNumber blkno,
2277 : Page page,
2278 : bool *has_lpdead_items)
2279 : {
2280 : OffsetNumber offnum,
2281 : maxoff;
2282 : int lpdead_items,
2283 : live_tuples,
2284 : recently_dead_tuples,
2285 : missed_dead_tuples;
2286 : bool hastup;
2287 : HeapTupleHeader tupleheader;
2288 130 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
2289 130 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
2290 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
2291 :
2292 : Assert(BufferGetBlockNumber(buf) == blkno);
2293 :
2294 130 : hastup = false; /* for now */
2295 :
2296 130 : lpdead_items = 0;
2297 130 : live_tuples = 0;
2298 130 : recently_dead_tuples = 0;
2299 130 : missed_dead_tuples = 0;
2300 :
2301 130 : maxoff = PageGetMaxOffsetNumber(page);
2302 130 : for (offnum = FirstOffsetNumber;
2303 3459 : offnum <= maxoff;
2304 3329 : offnum = OffsetNumberNext(offnum))
2305 : {
2306 : ItemId itemid;
2307 : HeapTupleData tuple;
2308 :
2309 3391 : vacrel->offnum = offnum;
2310 3391 : itemid = PageGetItemId(page, offnum);
2311 :
2312 3391 : if (!ItemIdIsUsed(itemid))
2313 420 : continue;
2314 :
2315 3182 : if (ItemIdIsRedirected(itemid))
2316 : {
2317 116 : hastup = true;
2318 116 : continue;
2319 : }
2320 :
2321 3066 : if (ItemIdIsDead(itemid))
2322 : {
2323 : /*
2324 : * Deliberately don't set hastup=true here. See same point in
2325 : * lazy_scan_prune for an explanation.
2326 : */
2327 95 : deadoffsets[lpdead_items++] = offnum;
2328 95 : continue;
2329 : }
2330 :
2331 2971 : hastup = true; /* page prevents rel truncation */
2332 2971 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
2333 2971 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
2334 : &NoFreezePageRelfrozenXid,
2335 : &NoFreezePageRelminMxid))
2336 : {
2337 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
2338 126 : if (vacrel->aggressive)
2339 : {
2340 : /*
2341 : * Aggressive VACUUMs must always be able to advance rel's
2342 : * relfrozenxid to a value >= FreezeLimit (and be able to
2343 : * advance rel's relminmxid to a value >= MultiXactCutoff).
2344 : * The ongoing aggressive VACUUM won't be able to do that
2345 : * unless it can freeze an XID (or MXID) from this tuple now.
2346 : *
2347 : * The only safe option is to have caller perform processing
2348 : * of this page using lazy_scan_prune. Caller might have to
2349 : * wait a while for a cleanup lock, but it can't be helped.
2350 : */
2351 62 : vacrel->offnum = InvalidOffsetNumber;
2352 62 : return false;
2353 : }
2354 :
2355 : /*
2356 : * Non-aggressive VACUUMs are under no obligation to advance
2357 : * relfrozenxid (even by one XID). We can be much laxer here.
2358 : *
2359 : * Currently we always just accept an older final relfrozenxid
2360 : * and/or relminmxid value. We never make caller wait or work a
2361 : * little harder, even when it likely makes sense to do so.
2362 : */
2363 : }
2364 :
2365 2909 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
2366 2909 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2367 2909 : tuple.t_len = ItemIdGetLength(itemid);
2368 2909 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2369 :
2370 2909 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2371 : buf))
2372 : {
2373 2806 : case HEAPTUPLE_DELETE_IN_PROGRESS:
2374 : case HEAPTUPLE_LIVE:
2375 :
2376 : /*
2377 : * Count both cases as live, just like lazy_scan_prune
2378 : */
2379 2806 : live_tuples++;
2380 :
2381 2806 : break;
2382 92 : case HEAPTUPLE_DEAD:
2383 :
2384 : /*
2385 : * There is some useful work for pruning to do, that won't be
2386 : * done due to failure to get a cleanup lock.
2387 : */
2388 92 : missed_dead_tuples++;
2389 92 : break;
2390 10 : case HEAPTUPLE_RECENTLY_DEAD:
2391 :
2392 : /*
2393 : * Count in recently_dead_tuples, just like lazy_scan_prune
2394 : */
2395 10 : recently_dead_tuples++;
2396 10 : break;
2397 1 : case HEAPTUPLE_INSERT_IN_PROGRESS:
2398 :
2399 : /*
2400 : * Do not count these rows as live, just like lazy_scan_prune
2401 : */
2402 1 : break;
2403 0 : default:
2404 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2405 : break;
2406 : }
2407 : }
2408 :
2409 68 : vacrel->offnum = InvalidOffsetNumber;
2410 :
2411 : /*
2412 : * By here we know for sure that caller can put off freezing and pruning
2413 : * this particular page until the next VACUUM. Remember its details now.
2414 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
2415 : */
2416 68 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
2417 68 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
2418 :
2419 : /* Save any LP_DEAD items found on the page in dead_items */
2420 68 : if (vacrel->nindexes == 0)
2421 : {
2422 : /* Using one-pass strategy (since table has no indexes) */
2423 0 : if (lpdead_items > 0)
2424 : {
2425 : /*
2426 : * Perfunctory handling for the corner case where a single pass
2427 : * strategy VACUUM cannot get a cleanup lock, and it turns out
2428 : * that there is one or more LP_DEAD items: just count the LP_DEAD
2429 : * items as missed_dead_tuples instead. (This is a bit dishonest,
2430 : * but it beats having to maintain specialized heap vacuuming code
2431 : * forever, for vanishingly little benefit.)
2432 : */
2433 0 : hastup = true;
2434 0 : missed_dead_tuples += lpdead_items;
2435 : }
2436 : }
2437 68 : else if (lpdead_items > 0)
2438 : {
2439 : /*
2440 : * Page has LP_DEAD items, and so any references/TIDs that remain in
2441 : * indexes will be deleted during index vacuuming (and then marked
2442 : * LP_UNUSED in the heap)
2443 : */
2444 1 : vacrel->lpdead_item_pages++;
2445 :
2446 1 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
2447 :
2448 1 : vacrel->lpdead_items += lpdead_items;
2449 : }
2450 :
2451 : /*
2452 : * Finally, add relevant page-local counts to whole-VACUUM counts
2453 : */
2454 68 : vacrel->live_tuples += live_tuples;
2455 68 : vacrel->recently_dead_tuples += recently_dead_tuples;
2456 68 : vacrel->missed_dead_tuples += missed_dead_tuples;
2457 68 : if (missed_dead_tuples > 0)
2458 3 : vacrel->missed_dead_pages++;
2459 :
2460 : /* Can't truncate this page */
2461 68 : if (hastup)
2462 68 : vacrel->nonempty_pages = blkno + 1;
2463 :
2464 : /* Did we find LP_DEAD items? */
2465 68 : *has_lpdead_items = (lpdead_items > 0);
2466 :
2467 : /* Caller won't need to call lazy_scan_prune with same page */
2468 68 : return true;
2469 : }
2470 :
2471 : /*
2472 : * Main entry point for index vacuuming and heap vacuuming.
2473 : *
2474 : * Removes items collected in dead_items from table's indexes, then marks the
2475 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
2476 : * for full details.
2477 : *
2478 : * Also empties dead_items, freeing up space for later TIDs.
2479 : *
2480 : * We may choose to bypass index vacuuming at this point, though only when the
2481 : * ongoing VACUUM operation will definitely only have one index scan/round of
2482 : * index vacuuming.
2483 : */
2484 : static void
2485 641 : lazy_vacuum(LVRelState *vacrel)
2486 : {
2487 : bool bypass;
2488 :
2489 : /* Should not end up here with no indexes */
2490 : Assert(vacrel->nindexes > 0);
2491 : Assert(vacrel->lpdead_item_pages > 0);
2492 :
2493 641 : if (!vacrel->do_index_vacuuming)
2494 : {
2495 : Assert(!vacrel->do_index_cleanup);
2496 11 : dead_items_reset(vacrel);
2497 11 : return;
2498 : }
2499 :
2500 : /*
2501 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2502 : *
2503 : * We currently only do this in cases where the number of LP_DEAD items
2504 : * for the entire VACUUM operation is close to zero. This avoids sharp
2505 : * discontinuities in the duration and overhead of successive VACUUM
2506 : * operations that run against the same table with a fixed workload.
2507 : * Ideally, successive VACUUM operations will behave as if there are
2508 : * exactly zero LP_DEAD items in cases where there are close to zero.
2509 : *
2510 : * This is likely to be helpful with a table that is continually affected
2511 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2512 : * have small aberrations that lead to just a few heap pages retaining
2513 : * only one or two LP_DEAD items. This is pretty common; even when the
2514 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2515 : * impossible to predict whether HOT will be applied in 100% of cases.
2516 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2517 : * HOT through careful tuning.
2518 : */
2519 630 : bypass = false;
2520 630 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2521 : {
2522 : BlockNumber threshold;
2523 :
2524 : Assert(vacrel->num_index_scans == 0);
2525 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2526 : Assert(vacrel->do_index_vacuuming);
2527 : Assert(vacrel->do_index_cleanup);
2528 :
2529 : /*
2530 : * This crossover point at which we'll start to do index vacuuming is
2531 : * expressed as a percentage of the total number of heap pages in the
2532 : * table that are known to have at least one LP_DEAD item. This is
2533 : * much more important than the total number of LP_DEAD items, since
2534 : * it's a proxy for the number of heap pages whose visibility map bits
2535 : * cannot be set on account of bypassing index and heap vacuuming.
2536 : *
2537 : * We apply one further precautionary test: the space currently used
2538 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2539 : * not exceed 32MB. This limits the risk that we will bypass index
2540 : * vacuuming again and again until eventually there is a VACUUM whose
2541 : * dead_items space is not CPU cache resident.
2542 : *
2543 : * We don't take any special steps to remember the LP_DEAD items (such
2544 : * as counting them in our final update to the stats system) when the
2545 : * optimization is applied. Though the accounting used in analyze.c's
2546 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2547 : * rows in its own stats report, that's okay. The discrepancy should
2548 : * be negligible. If this optimization is ever expanded to cover more
2549 : * cases then this may need to be reconsidered.
2550 : */
2551 616 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2552 618 : bypass = (vacrel->lpdead_item_pages < threshold &&
2553 2 : TidStoreMemoryUsage(vacrel->dead_items) < 32 * 1024 * 1024);
2554 : }
2555 :
2556 630 : if (bypass)
2557 : {
2558 : /*
2559 : * There are almost zero TIDs. Behave as if there were precisely
2560 : * zero: bypass index vacuuming, but do index cleanup.
2561 : *
2562 : * We expect that the ongoing VACUUM operation will finish very
2563 : * quickly, so there is no point in considering speeding up as a
2564 : * failsafe against wraparound failure. (Index cleanup is expected to
2565 : * finish very quickly in cases where there were no ambulkdelete()
2566 : * calls.)
2567 : */
2568 2 : vacrel->do_index_vacuuming = false;
2569 : }
2570 628 : else if (lazy_vacuum_all_indexes(vacrel))
2571 : {
2572 : /*
2573 : * We successfully completed a round of index vacuuming. Do related
2574 : * heap vacuuming now.
2575 : */
2576 628 : lazy_vacuum_heap_rel(vacrel);
2577 : }
2578 : else
2579 : {
2580 : /*
2581 : * Failsafe case.
2582 : *
2583 : * We attempted index vacuuming, but didn't finish a full round/full
2584 : * index scan. This happens when relfrozenxid or relminmxid is too
2585 : * far in the past.
2586 : *
2587 : * From this point on the VACUUM operation will do no further index
2588 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2589 : * back here again.
2590 : */
2591 : Assert(VacuumFailsafeActive);
2592 : }
2593 :
2594 : /*
2595 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2596 : * vacuum)
2597 : */
2598 630 : dead_items_reset(vacrel);
2599 : }
2600 :
2601 : /*
2602 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2603 : *
2604 : * Returns true in the common case when all indexes were successfully
2605 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2606 : * VACUUM operation is at risk of taking too long to finish, leading to
2607 : * wraparound failure.
2608 : */
2609 : static bool
2610 628 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2611 : {
2612 628 : bool allindexes = true;
2613 628 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2614 628 : const int progress_start_index[] = {
2615 : PROGRESS_VACUUM_PHASE,
2616 : PROGRESS_VACUUM_INDEXES_TOTAL
2617 : };
2618 628 : const int progress_end_index[] = {
2619 : PROGRESS_VACUUM_INDEXES_TOTAL,
2620 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2621 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2622 : };
2623 : int64 progress_start_val[2];
2624 : int64 progress_end_val[3];
2625 :
2626 : Assert(vacrel->nindexes > 0);
2627 : Assert(vacrel->do_index_vacuuming);
2628 : Assert(vacrel->do_index_cleanup);
2629 :
2630 : /* Precheck for XID wraparound emergencies */
2631 628 : if (lazy_check_wraparound_failsafe(vacrel))
2632 : {
2633 : /* Wraparound emergency -- don't even start an index scan */
2634 0 : return false;
2635 : }
2636 :
2637 : /*
2638 : * Report that we are now vacuuming indexes and the number of indexes to
2639 : * vacuum.
2640 : */
2641 628 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2642 628 : progress_start_val[1] = vacrel->nindexes;
2643 628 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2644 :
2645 628 : if (!ParallelVacuumIsActive(vacrel))
2646 : {
2647 1807 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2648 : {
2649 1185 : Relation indrel = vacrel->indrels[idx];
2650 1185 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2651 :
2652 1185 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2653 : old_live_tuples,
2654 : vacrel);
2655 :
2656 : /* Report the number of indexes vacuumed */
2657 1185 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2658 1185 : idx + 1);
2659 :
2660 1185 : if (lazy_check_wraparound_failsafe(vacrel))
2661 : {
2662 : /* Wraparound emergency -- end current index scan */
2663 0 : allindexes = false;
2664 0 : break;
2665 : }
2666 : }
2667 : }
2668 : else
2669 : {
2670 : /* Outsource everything to parallel variant */
2671 6 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2672 : vacrel->num_index_scans);
2673 :
2674 : /*
2675 : * Do a postcheck to consider applying wraparound failsafe now. Note
2676 : * that parallel VACUUM only gets the precheck and this postcheck.
2677 : */
2678 6 : if (lazy_check_wraparound_failsafe(vacrel))
2679 0 : allindexes = false;
2680 : }
2681 :
2682 : /*
2683 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2684 : * each call here (except calls where we choose to do the failsafe). This
2685 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2686 : * of the failsafe triggering, which prevents the next call from taking
2687 : * place).
2688 : */
2689 : Assert(vacrel->num_index_scans > 0 ||
2690 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2691 : Assert(allindexes || VacuumFailsafeActive);
2692 :
2693 : /*
2694 : * Increase and report the number of index scans. Also, we reset
2695 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2696 : *
2697 : * We deliberately include the case where we started a round of bulk
2698 : * deletes that we weren't able to finish due to the failsafe triggering.
2699 : */
2700 628 : vacrel->num_index_scans++;
2701 628 : progress_end_val[0] = 0;
2702 628 : progress_end_val[1] = 0;
2703 628 : progress_end_val[2] = vacrel->num_index_scans;
2704 628 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2705 :
2706 628 : return allindexes;
2707 : }
2708 :
2709 : /*
2710 : * Read stream callback for vacuum's third phase (second pass over the heap).
2711 : * Gets the next block from the TID store and returns it or InvalidBlockNumber
2712 : * if there are no further blocks to vacuum.
2713 : *
2714 : * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
2715 : */
2716 : static BlockNumber
2717 12691 : vacuum_reap_lp_read_stream_next(ReadStream *stream,
2718 : void *callback_private_data,
2719 : void *per_buffer_data)
2720 : {
2721 12691 : TidStoreIter *iter = callback_private_data;
2722 : TidStoreIterResult *iter_result;
2723 :
2724 12691 : iter_result = TidStoreIterateNext(iter);
2725 12691 : if (iter_result == NULL)
2726 628 : return InvalidBlockNumber;
2727 :
2728 : /*
2729 : * Save the TidStoreIterResult for later, so we can extract the offsets.
2730 : * It is safe to copy the result, according to TidStoreIterateNext().
2731 : */
2732 12063 : memcpy(per_buffer_data, iter_result, sizeof(*iter_result));
2733 :
2734 12063 : return iter_result->blkno;
2735 : }
2736 :
2737 : /*
2738 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2739 : *
2740 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2741 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2742 : *
2743 : * We may also be able to truncate the line pointer array of the heap pages we
2744 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2745 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2746 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2747 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2748 : * page's line pointer array).
2749 : *
2750 : * Note: the reason for doing this as a second pass is we cannot remove the
2751 : * tuples until we've removed their index entries, and we want to process
2752 : * index entry removal in batches as large as possible.
2753 : */
2754 : static void
2755 628 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2756 : {
2757 : ReadStream *stream;
2758 628 : BlockNumber vacuumed_pages = 0;
2759 628 : Buffer vmbuffer = InvalidBuffer;
2760 : LVSavedErrInfo saved_err_info;
2761 : TidStoreIter *iter;
2762 :
2763 : Assert(vacrel->do_index_vacuuming);
2764 : Assert(vacrel->do_index_cleanup);
2765 : Assert(vacrel->num_index_scans > 0);
2766 :
2767 : /* Report that we are now vacuuming the heap */
2768 628 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2769 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2770 :
2771 : /* Update error traceback information */
2772 628 : update_vacuum_error_info(vacrel, &saved_err_info,
2773 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2774 : InvalidBlockNumber, InvalidOffsetNumber);
2775 :
2776 628 : iter = TidStoreBeginIterate(vacrel->dead_items);
2777 :
2778 : /*
2779 : * Set up the read stream for vacuum's second pass through the heap.
2780 : *
2781 : * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
2782 : * not need to wait for IO and does not perform locking. Once we support
2783 : * parallelism it should still be fine, as presumably the holder of locks
2784 : * would never be blocked by IO while holding the lock.
2785 : */
2786 628 : stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
2787 : READ_STREAM_USE_BATCHING,
2788 : vacrel->bstrategy,
2789 : vacrel->rel,
2790 : MAIN_FORKNUM,
2791 : vacuum_reap_lp_read_stream_next,
2792 : iter,
2793 : sizeof(TidStoreIterResult));
2794 :
2795 : while (true)
2796 12063 : {
2797 : BlockNumber blkno;
2798 : Buffer buf;
2799 : Page page;
2800 : TidStoreIterResult *iter_result;
2801 : Size freespace;
2802 : OffsetNumber offsets[MaxOffsetNumber];
2803 : int num_offsets;
2804 :
2805 12691 : vacuum_delay_point(false);
2806 :
2807 12691 : buf = read_stream_next_buffer(stream, (void **) &iter_result);
2808 :
2809 : /* The relation is exhausted */
2810 12691 : if (!BufferIsValid(buf))
2811 628 : break;
2812 :
2813 12063 : vacrel->blkno = blkno = BufferGetBlockNumber(buf);
2814 :
2815 : Assert(iter_result);
2816 12063 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2817 : Assert(num_offsets <= lengthof(offsets));
2818 :
2819 : /*
2820 : * Pin the visibility map page in case we need to mark the page
2821 : * all-visible. In most cases this will be very cheap, because we'll
2822 : * already have the correct page pinned anyway.
2823 : */
2824 12063 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2825 :
2826 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2827 12063 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2828 12063 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2829 : num_offsets, vmbuffer);
2830 :
2831 : /* Now that we've vacuumed the page, record its available space */
2832 12063 : page = BufferGetPage(buf);
2833 12063 : freespace = PageGetHeapFreeSpace(page);
2834 :
2835 12063 : UnlockReleaseBuffer(buf);
2836 12063 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2837 12063 : vacuumed_pages++;
2838 : }
2839 :
2840 628 : read_stream_end(stream);
2841 628 : TidStoreEndIterate(iter);
2842 :
2843 628 : vacrel->blkno = InvalidBlockNumber;
2844 628 : if (BufferIsValid(vmbuffer))
2845 628 : ReleaseBuffer(vmbuffer);
2846 :
2847 : /*
2848 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2849 : * the second heap pass. No more, no less.
2850 : */
2851 : Assert(vacrel->num_index_scans > 1 ||
2852 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2853 : vacuumed_pages == vacrel->lpdead_item_pages));
2854 :
2855 628 : ereport(DEBUG2,
2856 : (errmsg("table \"%s\": removed %" PRId64 " dead item identifiers in %u pages",
2857 : vacrel->relname, vacrel->dead_items_info->num_items,
2858 : vacuumed_pages)));
2859 :
2860 : /* Revert to the previous phase information for error traceback */
2861 628 : restore_vacuum_error_info(vacrel, &saved_err_info);
2862 628 : }
2863 :
2864 : /*
2865 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2866 : * vacrel->dead_items store.
2867 : *
2868 : * Caller must have an exclusive buffer lock on the buffer (though a full
2869 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2870 : * a pin on blkno's visibility map page.
2871 : */
2872 : static void
2873 12063 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2874 : OffsetNumber *deadoffsets, int num_offsets,
2875 : Buffer vmbuffer)
2876 : {
2877 12063 : Page page = BufferGetPage(buffer);
2878 : OffsetNumber unused[MaxHeapTuplesPerPage];
2879 12063 : int nunused = 0;
2880 : TransactionId visibility_cutoff_xid;
2881 12063 : TransactionId conflict_xid = InvalidTransactionId;
2882 : bool all_frozen;
2883 : LVSavedErrInfo saved_err_info;
2884 12063 : uint8 vmflags = 0;
2885 :
2886 : Assert(vacrel->do_index_vacuuming);
2887 :
2888 12063 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2889 :
2890 : /* Update error traceback information */
2891 12063 : update_vacuum_error_info(vacrel, &saved_err_info,
2892 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2893 : InvalidOffsetNumber);
2894 :
2895 : /*
2896 : * Before marking dead items unused, check whether the page will become
2897 : * all-visible once that change is applied. This lets us reap the tuples
2898 : * and mark the page all-visible within the same critical section,
2899 : * enabling both changes to be emitted in a single WAL record. Since the
2900 : * visibility checks may perform I/O and allocate memory, they must be
2901 : * done outside the critical section.
2902 : */
2903 12063 : if (heap_page_would_be_all_visible(vacrel->rel, buffer,
2904 : vacrel->cutoffs.OldestXmin,
2905 : deadoffsets, num_offsets,
2906 : &all_frozen, &visibility_cutoff_xid,
2907 : &vacrel->offnum))
2908 : {
2909 11982 : vmflags |= VISIBILITYMAP_ALL_VISIBLE;
2910 11982 : if (all_frozen)
2911 : {
2912 9289 : vmflags |= VISIBILITYMAP_ALL_FROZEN;
2913 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2914 : }
2915 :
2916 : /*
2917 : * Take the lock on the vmbuffer before entering a critical section.
2918 : * The heap page lock must also be held while updating the VM to
2919 : * ensure consistency.
2920 : */
2921 11982 : LockBuffer(vmbuffer, BUFFER_LOCK_EXCLUSIVE);
2922 : }
2923 :
2924 12063 : START_CRIT_SECTION();
2925 :
2926 805963 : for (int i = 0; i < num_offsets; i++)
2927 : {
2928 : ItemId itemid;
2929 793900 : OffsetNumber toff = deadoffsets[i];
2930 :
2931 793900 : itemid = PageGetItemId(page, toff);
2932 :
2933 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2934 793900 : ItemIdSetUnused(itemid);
2935 793900 : unused[nunused++] = toff;
2936 : }
2937 :
2938 : Assert(nunused > 0);
2939 :
2940 : /* Attempt to truncate line pointer array now */
2941 12063 : PageTruncateLinePointerArray(page);
2942 :
2943 12063 : if ((vmflags & VISIBILITYMAP_VALID_BITS) != 0)
2944 : {
2945 : /*
2946 : * The page is guaranteed to have had dead line pointers, so we always
2947 : * set PD_ALL_VISIBLE.
2948 : */
2949 11982 : PageSetAllVisible(page);
2950 11982 : PageClearPrunable(page);
2951 11982 : visibilitymap_set_vmbits(blkno,
2952 : vmbuffer, vmflags,
2953 11982 : vacrel->rel->rd_locator);
2954 11982 : conflict_xid = visibility_cutoff_xid;
2955 : }
2956 :
2957 : /*
2958 : * Mark buffer dirty before we write WAL.
2959 : */
2960 12063 : MarkBufferDirty(buffer);
2961 :
2962 : /* XLOG stuff */
2963 12063 : if (RelationNeedsWAL(vacrel->rel))
2964 : {
2965 11215 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2966 : vmflags != 0 ? vmbuffer : InvalidBuffer,
2967 : vmflags,
2968 : conflict_xid,
2969 : false, /* no cleanup lock required */
2970 : PRUNE_VACUUM_CLEANUP,
2971 : NULL, 0, /* frozen */
2972 : NULL, 0, /* redirected */
2973 : NULL, 0, /* dead */
2974 : unused, nunused);
2975 : }
2976 :
2977 12063 : END_CRIT_SECTION();
2978 :
2979 12063 : if ((vmflags & VISIBILITYMAP_ALL_VISIBLE) != 0)
2980 : {
2981 : /* Count the newly set VM page for logging */
2982 11982 : LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2983 11982 : vacrel->new_all_visible_pages++;
2984 11982 : if (all_frozen)
2985 9289 : vacrel->new_all_visible_all_frozen_pages++;
2986 : }
2987 :
2988 : /* Revert to the previous phase information for error traceback */
2989 12063 : restore_vacuum_error_info(vacrel, &saved_err_info);
2990 12063 : }
2991 :
2992 : /*
2993 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2994 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2995 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2996 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2997 : *
2998 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2999 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
3000 : * that it started out with.
3001 : *
3002 : * Returns true when failsafe has been triggered.
3003 : */
3004 : static bool
3005 127994 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
3006 : {
3007 : /* Don't warn more than once per VACUUM */
3008 127994 : if (VacuumFailsafeActive)
3009 0 : return true;
3010 :
3011 127994 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
3012 : {
3013 21594 : const int progress_index[] = {
3014 : PROGRESS_VACUUM_INDEXES_TOTAL,
3015 : PROGRESS_VACUUM_INDEXES_PROCESSED,
3016 : PROGRESS_VACUUM_MODE
3017 : };
3018 21594 : int64 progress_val[3] = {0, 0, PROGRESS_VACUUM_MODE_FAILSAFE};
3019 :
3020 21594 : VacuumFailsafeActive = true;
3021 :
3022 : /*
3023 : * Abandon use of a buffer access strategy to allow use of all of
3024 : * shared buffers. We assume the caller who allocated the memory for
3025 : * the BufferAccessStrategy will free it.
3026 : */
3027 21594 : vacrel->bstrategy = NULL;
3028 :
3029 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
3030 21594 : vacrel->do_index_vacuuming = false;
3031 21594 : vacrel->do_index_cleanup = false;
3032 21594 : vacrel->do_rel_truncate = false;
3033 :
3034 : /* Reset the progress counters and set the failsafe mode */
3035 21594 : pgstat_progress_update_multi_param(3, progress_index, progress_val);
3036 :
3037 21594 : ereport(WARNING,
3038 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
3039 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
3040 : vacrel->num_index_scans),
3041 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
3042 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
3043 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
3044 :
3045 : /* Stop applying cost limits from this point on */
3046 21594 : VacuumCostActive = false;
3047 21594 : VacuumCostBalance = 0;
3048 :
3049 21594 : return true;
3050 : }
3051 :
3052 106400 : return false;
3053 : }
3054 :
3055 : /*
3056 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
3057 : */
3058 : static void
3059 99420 : lazy_cleanup_all_indexes(LVRelState *vacrel)
3060 : {
3061 99420 : double reltuples = vacrel->new_rel_tuples;
3062 99420 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
3063 99420 : const int progress_start_index[] = {
3064 : PROGRESS_VACUUM_PHASE,
3065 : PROGRESS_VACUUM_INDEXES_TOTAL
3066 : };
3067 99420 : const int progress_end_index[] = {
3068 : PROGRESS_VACUUM_INDEXES_TOTAL,
3069 : PROGRESS_VACUUM_INDEXES_PROCESSED
3070 : };
3071 : int64 progress_start_val[2];
3072 99420 : int64 progress_end_val[2] = {0, 0};
3073 :
3074 : Assert(vacrel->do_index_cleanup);
3075 : Assert(vacrel->nindexes > 0);
3076 :
3077 : /*
3078 : * Report that we are now cleaning up indexes and the number of indexes to
3079 : * cleanup.
3080 : */
3081 99420 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
3082 99420 : progress_start_val[1] = vacrel->nindexes;
3083 99420 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
3084 :
3085 99420 : if (!ParallelVacuumIsActive(vacrel))
3086 : {
3087 255671 : for (int idx = 0; idx < vacrel->nindexes; idx++)
3088 : {
3089 156268 : Relation indrel = vacrel->indrels[idx];
3090 156268 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
3091 :
3092 312536 : vacrel->indstats[idx] =
3093 156268 : lazy_cleanup_one_index(indrel, istat, reltuples,
3094 : estimated_count, vacrel);
3095 :
3096 : /* Report the number of indexes cleaned up */
3097 156268 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
3098 156268 : idx + 1);
3099 : }
3100 : }
3101 : else
3102 : {
3103 : /* Outsource everything to parallel variant */
3104 17 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
3105 : vacrel->num_index_scans,
3106 : estimated_count);
3107 : }
3108 :
3109 : /* Reset the progress counters */
3110 99420 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
3111 99420 : }
3112 :
3113 : /*
3114 : * lazy_vacuum_one_index() -- vacuum index relation.
3115 : *
3116 : * Delete all the index tuples containing a TID collected in
3117 : * vacrel->dead_items. Also update running statistics. Exact
3118 : * details depend on index AM's ambulkdelete routine.
3119 : *
3120 : * reltuples is the number of heap tuples to be passed to the
3121 : * bulkdelete callback. It's always assumed to be estimated.
3122 : * See indexam.sgml for more info.
3123 : *
3124 : * Returns bulk delete stats derived from input stats
3125 : */
3126 : static IndexBulkDeleteResult *
3127 1185 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3128 : double reltuples, LVRelState *vacrel)
3129 : {
3130 : IndexVacuumInfo ivinfo;
3131 : LVSavedErrInfo saved_err_info;
3132 :
3133 1185 : ivinfo.index = indrel;
3134 1185 : ivinfo.heaprel = vacrel->rel;
3135 1185 : ivinfo.analyze_only = false;
3136 1185 : ivinfo.report_progress = false;
3137 1185 : ivinfo.estimated_count = true;
3138 1185 : ivinfo.message_level = DEBUG2;
3139 1185 : ivinfo.num_heap_tuples = reltuples;
3140 1185 : ivinfo.strategy = vacrel->bstrategy;
3141 :
3142 : /*
3143 : * Update error traceback information.
3144 : *
3145 : * The index name is saved during this phase and restored immediately
3146 : * after this phase. See vacuum_error_callback.
3147 : */
3148 : Assert(vacrel->indname == NULL);
3149 1185 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3150 1185 : update_vacuum_error_info(vacrel, &saved_err_info,
3151 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
3152 : InvalidBlockNumber, InvalidOffsetNumber);
3153 :
3154 : /* Do bulk deletion */
3155 1185 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
3156 : vacrel->dead_items_info);
3157 :
3158 : /* Revert to the previous phase information for error traceback */
3159 1185 : restore_vacuum_error_info(vacrel, &saved_err_info);
3160 1185 : pfree(vacrel->indname);
3161 1185 : vacrel->indname = NULL;
3162 :
3163 1185 : return istat;
3164 : }
3165 :
3166 : /*
3167 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
3168 : *
3169 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
3170 : * of heap tuples and estimated_count is true if reltuples is an
3171 : * estimated value. See indexam.sgml for more info.
3172 : *
3173 : * Returns bulk delete stats derived from input stats
3174 : */
3175 : static IndexBulkDeleteResult *
3176 156268 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
3177 : double reltuples, bool estimated_count,
3178 : LVRelState *vacrel)
3179 : {
3180 : IndexVacuumInfo ivinfo;
3181 : LVSavedErrInfo saved_err_info;
3182 :
3183 156268 : ivinfo.index = indrel;
3184 156268 : ivinfo.heaprel = vacrel->rel;
3185 156268 : ivinfo.analyze_only = false;
3186 156268 : ivinfo.report_progress = false;
3187 156268 : ivinfo.estimated_count = estimated_count;
3188 156268 : ivinfo.message_level = DEBUG2;
3189 :
3190 156268 : ivinfo.num_heap_tuples = reltuples;
3191 156268 : ivinfo.strategy = vacrel->bstrategy;
3192 :
3193 : /*
3194 : * Update error traceback information.
3195 : *
3196 : * The index name is saved during this phase and restored immediately
3197 : * after this phase. See vacuum_error_callback.
3198 : */
3199 : Assert(vacrel->indname == NULL);
3200 156268 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
3201 156268 : update_vacuum_error_info(vacrel, &saved_err_info,
3202 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
3203 : InvalidBlockNumber, InvalidOffsetNumber);
3204 :
3205 156268 : istat = vac_cleanup_one_index(&ivinfo, istat);
3206 :
3207 : /* Revert to the previous phase information for error traceback */
3208 156268 : restore_vacuum_error_info(vacrel, &saved_err_info);
3209 156268 : pfree(vacrel->indname);
3210 156268 : vacrel->indname = NULL;
3211 :
3212 156268 : return istat;
3213 : }
3214 :
3215 : /*
3216 : * should_attempt_truncation - should we attempt to truncate the heap?
3217 : *
3218 : * Don't even think about it unless we have a shot at releasing a goodly
3219 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
3220 : * an AccessExclusive lock must be replayed on any hot standby, where it can
3221 : * be particularly disruptive.
3222 : *
3223 : * Also don't attempt it if wraparound failsafe is in effect. The entire
3224 : * system might be refusing to allocate new XIDs at this point. The system
3225 : * definitely won't return to normal unless and until VACUUM actually advances
3226 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
3227 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
3228 : * truncate the table under these circumstances, an XID exhaustion error might
3229 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
3230 : * There is very little chance of truncation working out when the failsafe is
3231 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
3232 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
3233 : * we're called.
3234 : */
3235 : static bool
3236 126175 : should_attempt_truncation(LVRelState *vacrel)
3237 : {
3238 : BlockNumber possibly_freeable;
3239 :
3240 126175 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
3241 21739 : return false;
3242 :
3243 104436 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
3244 104436 : if (possibly_freeable > 0 &&
3245 157 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
3246 157 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
3247 145 : return true;
3248 :
3249 104291 : return false;
3250 : }
3251 :
3252 : /*
3253 : * lazy_truncate_heap - try to truncate off any empty pages at the end
3254 : */
3255 : static void
3256 145 : lazy_truncate_heap(LVRelState *vacrel)
3257 : {
3258 145 : BlockNumber orig_rel_pages = vacrel->rel_pages;
3259 : BlockNumber new_rel_pages;
3260 : bool lock_waiter_detected;
3261 : int lock_retry;
3262 :
3263 : /* Report that we are now truncating */
3264 145 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
3265 : PROGRESS_VACUUM_PHASE_TRUNCATE);
3266 :
3267 : /* Update error traceback information one last time */
3268 145 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
3269 : vacrel->nonempty_pages, InvalidOffsetNumber);
3270 :
3271 : /*
3272 : * Loop until no more truncating can be done.
3273 : */
3274 : do
3275 : {
3276 : /*
3277 : * We need full exclusive lock on the relation in order to do
3278 : * truncation. If we can't get it, give up rather than waiting --- we
3279 : * don't want to block other backends, and we don't want to deadlock
3280 : * (which is quite possible considering we already hold a lower-grade
3281 : * lock).
3282 : */
3283 145 : lock_waiter_detected = false;
3284 145 : lock_retry = 0;
3285 : while (true)
3286 : {
3287 445 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
3288 142 : break;
3289 :
3290 : /*
3291 : * Check for interrupts while trying to (re-)acquire the exclusive
3292 : * lock.
3293 : */
3294 303 : CHECK_FOR_INTERRUPTS();
3295 :
3296 303 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
3297 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
3298 : {
3299 : /*
3300 : * We failed to establish the lock in the specified number of
3301 : * retries. This means we give up truncating.
3302 : */
3303 3 : ereport(vacrel->verbose ? INFO : DEBUG2,
3304 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
3305 : vacrel->relname)));
3306 4 : return;
3307 : }
3308 :
3309 300 : (void) WaitLatch(MyLatch,
3310 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3311 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
3312 : WAIT_EVENT_VACUUM_TRUNCATE);
3313 300 : ResetLatch(MyLatch);
3314 : }
3315 :
3316 : /*
3317 : * Now that we have exclusive lock, look to see if the rel has grown
3318 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
3319 : * the newly added pages presumably contain non-deletable tuples.
3320 : */
3321 142 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
3322 142 : if (new_rel_pages != orig_rel_pages)
3323 : {
3324 : /*
3325 : * Note: we intentionally don't update vacrel->rel_pages with the
3326 : * new rel size here. If we did, it would amount to assuming that
3327 : * the new pages are empty, which is unlikely. Leaving the numbers
3328 : * alone amounts to assuming that the new pages have the same
3329 : * tuple density as existing ones, which is less unlikely.
3330 : */
3331 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3332 0 : return;
3333 : }
3334 :
3335 : /*
3336 : * Scan backwards from the end to verify that the end pages actually
3337 : * contain no tuples. This is *necessary*, not optional, because
3338 : * other backends could have added tuples to these pages whilst we
3339 : * were vacuuming.
3340 : */
3341 142 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
3342 142 : vacrel->blkno = new_rel_pages;
3343 :
3344 142 : if (new_rel_pages >= orig_rel_pages)
3345 : {
3346 : /* can't do anything after all */
3347 1 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3348 1 : return;
3349 : }
3350 :
3351 : /*
3352 : * Okay to truncate.
3353 : */
3354 141 : RelationTruncate(vacrel->rel, new_rel_pages);
3355 :
3356 : /*
3357 : * We can release the exclusive lock as soon as we have truncated.
3358 : * Other backends can't safely access the relation until they have
3359 : * processed the smgr invalidation that smgrtruncate sent out ... but
3360 : * that should happen as part of standard invalidation processing once
3361 : * they acquire lock on the relation.
3362 : */
3363 141 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
3364 :
3365 : /*
3366 : * Update statistics. Here, it *is* correct to adjust rel_pages
3367 : * without also touching reltuples, since the tuple count wasn't
3368 : * changed by the truncation.
3369 : */
3370 141 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
3371 141 : vacrel->rel_pages = new_rel_pages;
3372 :
3373 141 : ereport(vacrel->verbose ? INFO : DEBUG2,
3374 : (errmsg("table \"%s\": truncated %u to %u pages",
3375 : vacrel->relname,
3376 : orig_rel_pages, new_rel_pages)));
3377 141 : orig_rel_pages = new_rel_pages;
3378 141 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
3379 : }
3380 :
3381 : /*
3382 : * Rescan end pages to verify that they are (still) empty of tuples.
3383 : *
3384 : * Returns number of nondeletable pages (last nonempty page + 1).
3385 : */
3386 : static BlockNumber
3387 142 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
3388 : {
3389 : StaticAssertDecl((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
3390 : "prefetch size must be power of 2");
3391 :
3392 : BlockNumber blkno;
3393 : BlockNumber prefetchedUntil;
3394 : instr_time starttime;
3395 :
3396 : /* Initialize the starttime if we check for conflicting lock requests */
3397 142 : INSTR_TIME_SET_CURRENT(starttime);
3398 :
3399 : /*
3400 : * Start checking blocks at what we believe relation end to be and move
3401 : * backwards. (Strange coding of loop control is needed because blkno is
3402 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
3403 : * in forward direction, so that OS-level readahead can kick in.
3404 : */
3405 142 : blkno = vacrel->rel_pages;
3406 142 : prefetchedUntil = InvalidBlockNumber;
3407 2138 : while (blkno > vacrel->nonempty_pages)
3408 : {
3409 : Buffer buf;
3410 : Page page;
3411 : OffsetNumber offnum,
3412 : maxoff;
3413 : bool hastup;
3414 :
3415 : /*
3416 : * Check if another process requests a lock on our relation. We are
3417 : * holding an AccessExclusiveLock here, so they will be waiting. We
3418 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
3419 : * only check if that interval has elapsed once every 32 blocks to
3420 : * keep the number of system calls and actual shared lock table
3421 : * lookups to a minimum.
3422 : */
3423 1998 : if ((blkno % 32) == 0)
3424 : {
3425 : instr_time currenttime;
3426 : instr_time elapsed;
3427 :
3428 65 : INSTR_TIME_SET_CURRENT(currenttime);
3429 65 : elapsed = currenttime;
3430 65 : INSTR_TIME_SUBTRACT(elapsed, starttime);
3431 65 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
3432 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
3433 : {
3434 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
3435 : {
3436 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
3437 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
3438 : vacrel->relname)));
3439 :
3440 0 : *lock_waiter_detected = true;
3441 0 : return blkno;
3442 : }
3443 0 : starttime = currenttime;
3444 : }
3445 : }
3446 :
3447 : /*
3448 : * We don't insert a vacuum delay point here, because we have an
3449 : * exclusive lock on the table which we want to hold for as short a
3450 : * time as possible. We still need to check for interrupts however.
3451 : */
3452 1998 : CHECK_FOR_INTERRUPTS();
3453 :
3454 1998 : blkno--;
3455 :
3456 : /* If we haven't prefetched this lot yet, do so now. */
3457 1998 : if (prefetchedUntil > blkno)
3458 : {
3459 : BlockNumber prefetchStart;
3460 : BlockNumber pblkno;
3461 :
3462 188 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
3463 3034 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
3464 : {
3465 2846 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
3466 2846 : CHECK_FOR_INTERRUPTS();
3467 : }
3468 188 : prefetchedUntil = prefetchStart;
3469 : }
3470 :
3471 1998 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
3472 : vacrel->bstrategy);
3473 :
3474 : /* In this phase we only need shared access to the buffer */
3475 1998 : LockBuffer(buf, BUFFER_LOCK_SHARE);
3476 :
3477 1998 : page = BufferGetPage(buf);
3478 :
3479 1998 : if (PageIsNew(page) || PageIsEmpty(page))
3480 : {
3481 743 : UnlockReleaseBuffer(buf);
3482 743 : continue;
3483 : }
3484 :
3485 1255 : hastup = false;
3486 1255 : maxoff = PageGetMaxOffsetNumber(page);
3487 1255 : for (offnum = FirstOffsetNumber;
3488 2508 : offnum <= maxoff;
3489 1253 : offnum = OffsetNumberNext(offnum))
3490 : {
3491 : ItemId itemid;
3492 :
3493 1255 : itemid = PageGetItemId(page, offnum);
3494 :
3495 : /*
3496 : * Note: any non-unused item should be taken as a reason to keep
3497 : * this page. Even an LP_DEAD item makes truncation unsafe, since
3498 : * we must not have cleaned out its index entries.
3499 : */
3500 1255 : if (ItemIdIsUsed(itemid))
3501 : {
3502 2 : hastup = true;
3503 2 : break; /* can stop scanning */
3504 : }
3505 : } /* scan along page */
3506 :
3507 1255 : UnlockReleaseBuffer(buf);
3508 :
3509 : /* Done scanning if we found a tuple here */
3510 1255 : if (hastup)
3511 2 : return blkno + 1;
3512 : }
3513 :
3514 : /*
3515 : * If we fall out of the loop, all the previously-thought-to-be-empty
3516 : * pages still are; we need not bother to look at the last known-nonempty
3517 : * page.
3518 : */
3519 140 : return vacrel->nonempty_pages;
3520 : }
3521 :
3522 : /*
3523 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
3524 : * shared memory). Sets both in vacrel for caller.
3525 : *
3526 : * Also handles parallel initialization as part of allocating dead_items in
3527 : * DSM when required.
3528 : */
3529 : static void
3530 126175 : dead_items_alloc(LVRelState *vacrel, int nworkers)
3531 : {
3532 : VacDeadItemsInfo *dead_items_info;
3533 364667 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
3534 112317 : autovacuum_work_mem != -1 ?
3535 238492 : autovacuum_work_mem : maintenance_work_mem;
3536 :
3537 : /*
3538 : * Initialize state for a parallel vacuum. As of now, only one worker can
3539 : * be used for an index, so we invoke parallelism only if there are at
3540 : * least two indexes on a table.
3541 : */
3542 126175 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
3543 : {
3544 : /*
3545 : * Since parallel workers cannot access data in temporary tables, we
3546 : * can't perform parallel vacuum on them.
3547 : */
3548 5533 : if (RelationUsesLocalBuffers(vacrel->rel))
3549 : {
3550 : /*
3551 : * Give warning only if the user explicitly tries to perform a
3552 : * parallel vacuum on the temporary table.
3553 : */
3554 3 : if (nworkers > 0)
3555 3 : ereport(WARNING,
3556 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3557 : vacrel->relname)));
3558 : }
3559 : else
3560 5530 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3561 : vacrel->nindexes, nworkers,
3562 : vac_work_mem,
3563 5530 : vacrel->verbose ? INFO : DEBUG2,
3564 : vacrel->bstrategy);
3565 :
3566 : /*
3567 : * If parallel mode started, dead_items and dead_items_info spaces are
3568 : * allocated in DSM.
3569 : */
3570 5533 : if (ParallelVacuumIsActive(vacrel))
3571 : {
3572 17 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3573 : &vacrel->dead_items_info);
3574 17 : return;
3575 : }
3576 : }
3577 :
3578 : /*
3579 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3580 : * locally.
3581 : */
3582 :
3583 126158 : dead_items_info = palloc_object(VacDeadItemsInfo);
3584 126158 : dead_items_info->max_bytes = vac_work_mem * (Size) 1024;
3585 126158 : dead_items_info->num_items = 0;
3586 126158 : vacrel->dead_items_info = dead_items_info;
3587 :
3588 126158 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3589 : }
3590 :
3591 : /*
3592 : * Add the given block number and offset numbers to dead_items.
3593 : */
3594 : static void
3595 14368 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3596 : int num_offsets)
3597 : {
3598 14368 : const int prog_index[2] = {
3599 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3600 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3601 : };
3602 : int64 prog_val[2];
3603 :
3604 14368 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3605 14368 : vacrel->dead_items_info->num_items += num_offsets;
3606 :
3607 : /* update the progress information */
3608 14368 : prog_val[0] = vacrel->dead_items_info->num_items;
3609 14368 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3610 14368 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3611 14368 : }
3612 :
3613 : /*
3614 : * Forget all collected dead items.
3615 : */
3616 : static void
3617 641 : dead_items_reset(LVRelState *vacrel)
3618 : {
3619 : /* Update statistics for dead items */
3620 641 : vacrel->num_dead_items_resets++;
3621 641 : vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
3622 :
3623 641 : if (ParallelVacuumIsActive(vacrel))
3624 : {
3625 6 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3626 6 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3627 : &vacrel->dead_items_info);
3628 6 : return;
3629 : }
3630 :
3631 : /* Recreate the tidstore with the same max_bytes limitation */
3632 635 : TidStoreDestroy(vacrel->dead_items);
3633 635 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3634 :
3635 : /* Reset the counter */
3636 635 : vacrel->dead_items_info->num_items = 0;
3637 : }
3638 :
3639 : /*
3640 : * Perform cleanup for resources allocated in dead_items_alloc
3641 : */
3642 : static void
3643 126175 : dead_items_cleanup(LVRelState *vacrel)
3644 : {
3645 126175 : if (!ParallelVacuumIsActive(vacrel))
3646 : {
3647 : /* Don't bother with pfree here */
3648 126158 : return;
3649 : }
3650 :
3651 : /* End parallel mode */
3652 17 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3653 17 : vacrel->pvs = NULL;
3654 : }
3655 :
3656 : #ifdef USE_ASSERT_CHECKING
3657 :
3658 : /*
3659 : * Wrapper for heap_page_would_be_all_visible() which can be used for callers
3660 : * that expect no LP_DEAD on the page. Currently assert-only, but there is no
3661 : * reason not to use it outside of asserts.
3662 : */
3663 : static bool
3664 : heap_page_is_all_visible(Relation rel, Buffer buf,
3665 : TransactionId OldestXmin,
3666 : bool *all_frozen,
3667 : TransactionId *visibility_cutoff_xid,
3668 : OffsetNumber *logging_offnum)
3669 : {
3670 :
3671 : return heap_page_would_be_all_visible(rel, buf,
3672 : OldestXmin,
3673 : NULL, 0,
3674 : all_frozen,
3675 : visibility_cutoff_xid,
3676 : logging_offnum);
3677 : }
3678 : #endif
3679 :
3680 : /*
3681 : * Check whether the heap page in buf is all-visible except for the dead
3682 : * tuples referenced in the deadoffsets array.
3683 : *
3684 : * Vacuum uses this to check if a page would become all-visible after reaping
3685 : * known dead tuples. This function does not remove the dead items.
3686 : *
3687 : * This cannot be called in a critical section, as the visibility checks may
3688 : * perform IO and allocate memory.
3689 : *
3690 : * Returns true if the page is all-visible other than the provided
3691 : * deadoffsets and false otherwise.
3692 : *
3693 : * OldestXmin is used to determine visibility.
3694 : *
3695 : * Output parameters:
3696 : *
3697 : * - *all_frozen: true if every tuple on the page is frozen
3698 : * - *visibility_cutoff_xid: newest xmin; valid only if page is all-visible
3699 : * - *logging_offnum: OffsetNumber of current tuple being processed;
3700 : * used by vacuum's error callback system.
3701 : *
3702 : * Callers looking to verify that the page is already all-visible can call
3703 : * heap_page_is_all_visible().
3704 : *
3705 : * This logic is closely related to heap_prune_record_unchanged_lp_normal().
3706 : * If you modify this function, ensure consistency with that code. An
3707 : * assertion cross-checks that both remain in agreement. Do not introduce new
3708 : * side-effects.
3709 : */
3710 : static bool
3711 12063 : heap_page_would_be_all_visible(Relation rel, Buffer buf,
3712 : TransactionId OldestXmin,
3713 : OffsetNumber *deadoffsets,
3714 : int ndeadoffsets,
3715 : bool *all_frozen,
3716 : TransactionId *visibility_cutoff_xid,
3717 : OffsetNumber *logging_offnum)
3718 : {
3719 12063 : Page page = BufferGetPage(buf);
3720 12063 : BlockNumber blockno = BufferGetBlockNumber(buf);
3721 : OffsetNumber offnum,
3722 : maxoff;
3723 12063 : bool all_visible = true;
3724 12063 : int matched_dead_count = 0;
3725 :
3726 12063 : *visibility_cutoff_xid = InvalidTransactionId;
3727 12063 : *all_frozen = true;
3728 :
3729 : Assert(ndeadoffsets == 0 || deadoffsets);
3730 :
3731 : #ifdef USE_ASSERT_CHECKING
3732 : /* Confirm input deadoffsets[] is strictly sorted */
3733 : if (ndeadoffsets > 1)
3734 : {
3735 : for (int i = 1; i < ndeadoffsets; i++)
3736 : Assert(deadoffsets[i - 1] < deadoffsets[i]);
3737 : }
3738 : #endif
3739 :
3740 12063 : maxoff = PageGetMaxOffsetNumber(page);
3741 12063 : for (offnum = FirstOffsetNumber;
3742 1263452 : offnum <= maxoff && all_visible;
3743 1251389 : offnum = OffsetNumberNext(offnum))
3744 : {
3745 : ItemId itemid;
3746 : HeapTupleData tuple;
3747 : TransactionId dead_after;
3748 :
3749 : /*
3750 : * Set the offset number so that we can display it along with any
3751 : * error that occurred while processing this tuple.
3752 : */
3753 1251391 : *logging_offnum = offnum;
3754 1251391 : itemid = PageGetItemId(page, offnum);
3755 :
3756 : /* Unused or redirect line pointers are of no interest */
3757 1251391 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3758 827835 : continue;
3759 :
3760 1215429 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3761 :
3762 : /*
3763 : * Dead line pointers can have index pointers pointing to them. So
3764 : * they can't be treated as visible
3765 : */
3766 1215429 : if (ItemIdIsDead(itemid))
3767 : {
3768 791875 : if (!deadoffsets ||
3769 791875 : matched_dead_count >= ndeadoffsets ||
3770 791875 : deadoffsets[matched_dead_count] != offnum)
3771 : {
3772 2 : *all_frozen = all_visible = false;
3773 2 : break;
3774 : }
3775 791873 : matched_dead_count++;
3776 791873 : continue;
3777 : }
3778 :
3779 : Assert(ItemIdIsNormal(itemid));
3780 :
3781 423554 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3782 423554 : tuple.t_len = ItemIdGetLength(itemid);
3783 423554 : tuple.t_tableOid = RelationGetRelid(rel);
3784 :
3785 : /* Visibility checks may do IO or allocate memory */
3786 : Assert(CritSectionCount == 0);
3787 423554 : switch (HeapTupleSatisfiesVacuumHorizon(&tuple, buf, &dead_after))
3788 : {
3789 423500 : case HEAPTUPLE_LIVE:
3790 : {
3791 : TransactionId xmin;
3792 :
3793 : /* Check comments in lazy_scan_prune. */
3794 423500 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3795 : {
3796 0 : all_visible = false;
3797 0 : *all_frozen = false;
3798 0 : break;
3799 : }
3800 :
3801 : /*
3802 : * The inserter definitely committed. But is it old enough
3803 : * that everyone sees it as committed?
3804 : */
3805 423500 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3806 423500 : if (!TransactionIdPrecedes(xmin, OldestXmin))
3807 : {
3808 25 : all_visible = false;
3809 25 : *all_frozen = false;
3810 25 : break;
3811 : }
3812 :
3813 : /* Track newest xmin on page. */
3814 423475 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3815 : TransactionIdIsNormal(xmin))
3816 9630 : *visibility_cutoff_xid = xmin;
3817 :
3818 : /* Check whether this tuple is already frozen or not */
3819 531963 : if (all_visible && *all_frozen &&
3820 108488 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3821 2712 : *all_frozen = false;
3822 : }
3823 423475 : break;
3824 :
3825 54 : case HEAPTUPLE_DEAD:
3826 : case HEAPTUPLE_RECENTLY_DEAD:
3827 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3828 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3829 : {
3830 54 : all_visible = false;
3831 54 : *all_frozen = false;
3832 54 : break;
3833 : }
3834 0 : default:
3835 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3836 : break;
3837 : }
3838 : } /* scan along page */
3839 :
3840 : /* Clear the offset information once we have processed the given page. */
3841 12063 : *logging_offnum = InvalidOffsetNumber;
3842 :
3843 12063 : return all_visible;
3844 : }
3845 :
3846 : /*
3847 : * Update index statistics in pg_class if the statistics are accurate.
3848 : */
3849 : static void
3850 104451 : update_relstats_all_indexes(LVRelState *vacrel)
3851 : {
3852 104451 : Relation *indrels = vacrel->indrels;
3853 104451 : int nindexes = vacrel->nindexes;
3854 104451 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3855 :
3856 : Assert(vacrel->do_index_cleanup);
3857 :
3858 260780 : for (int idx = 0; idx < nindexes; idx++)
3859 : {
3860 156329 : Relation indrel = indrels[idx];
3861 156329 : IndexBulkDeleteResult *istat = indstats[idx];
3862 :
3863 156329 : if (istat == NULL || istat->estimated_count)
3864 155006 : continue;
3865 :
3866 : /* Update index statistics */
3867 1323 : vac_update_relstats(indrel,
3868 : istat->num_pages,
3869 : istat->num_index_tuples,
3870 : 0, 0,
3871 : false,
3872 : InvalidTransactionId,
3873 : InvalidMultiXactId,
3874 : NULL, NULL, false);
3875 : }
3876 104451 : }
3877 :
3878 : /*
3879 : * Error context callback for errors occurring during vacuum. The error
3880 : * context messages for index phases should match the messages set in parallel
3881 : * vacuum. If you change this function for those phases, change
3882 : * parallel_vacuum_error_callback() as well.
3883 : */
3884 : static void
3885 116401 : vacuum_error_callback(void *arg)
3886 : {
3887 116401 : LVRelState *errinfo = arg;
3888 :
3889 116401 : switch (errinfo->phase)
3890 : {
3891 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3892 0 : if (BlockNumberIsValid(errinfo->blkno))
3893 : {
3894 0 : if (OffsetNumberIsValid(errinfo->offnum))
3895 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3896 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3897 : else
3898 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3899 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3900 : }
3901 : else
3902 0 : errcontext("while scanning relation \"%s.%s\"",
3903 : errinfo->relnamespace, errinfo->relname);
3904 0 : break;
3905 :
3906 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3907 0 : if (BlockNumberIsValid(errinfo->blkno))
3908 : {
3909 0 : if (OffsetNumberIsValid(errinfo->offnum))
3910 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3911 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3912 : else
3913 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3914 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3915 : }
3916 : else
3917 0 : errcontext("while vacuuming relation \"%s.%s\"",
3918 : errinfo->relnamespace, errinfo->relname);
3919 0 : break;
3920 :
3921 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3922 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3923 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3924 0 : break;
3925 :
3926 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3927 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3928 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3929 0 : break;
3930 :
3931 4 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3932 4 : if (BlockNumberIsValid(errinfo->blkno))
3933 4 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3934 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3935 4 : break;
3936 :
3937 116397 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3938 : default:
3939 116397 : return; /* do nothing; the errinfo may not be
3940 : * initialized */
3941 : }
3942 : }
3943 :
3944 : /*
3945 : * Updates the information required for vacuum error callback. This also saves
3946 : * the current information which can be later restored via restore_vacuum_error_info.
3947 : */
3948 : static void
3949 711033 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3950 : int phase, BlockNumber blkno, OffsetNumber offnum)
3951 : {
3952 711033 : if (saved_vacrel)
3953 : {
3954 170144 : saved_vacrel->offnum = vacrel->offnum;
3955 170144 : saved_vacrel->blkno = vacrel->blkno;
3956 170144 : saved_vacrel->phase = vacrel->phase;
3957 : }
3958 :
3959 711033 : vacrel->blkno = blkno;
3960 711033 : vacrel->offnum = offnum;
3961 711033 : vacrel->phase = phase;
3962 711033 : }
3963 :
3964 : /*
3965 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3966 : */
3967 : static void
3968 170144 : restore_vacuum_error_info(LVRelState *vacrel,
3969 : const LVSavedErrInfo *saved_vacrel)
3970 : {
3971 170144 : vacrel->blkno = saved_vacrel->blkno;
3972 170144 : vacrel->offnum = saved_vacrel->offnum;
3973 170144 : vacrel->phase = saved_vacrel->phase;
3974 170144 : }
|