Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * Heap relations are vacuumed in three main phases. In phase I, vacuum scans
7 : * relation pages, pruning and freezing tuples and saving dead tuples' TIDs in
8 : * a TID store. If that TID store fills up or vacuum finishes scanning the
9 : * relation, it progresses to phase II: index vacuuming. Index vacuuming
10 : * deletes the dead index entries referenced in the TID store. In phase III,
11 : * vacuum scans the blocks of the relation referred to by the TIDs in the TID
12 : * store and reaps the corresponding dead items, freeing that space for future
13 : * tuples.
14 : *
15 : * If there are no indexes or index scanning is disabled, phase II may be
16 : * skipped. If phase I identified very few dead index entries or if vacuum's
17 : * failsafe mechanism has triggered (to avoid transaction ID wraparound),
18 : * vacuum may skip phases II and III.
19 : *
20 : * If the TID store fills up in phase I, vacuum suspends phase I, proceeds to
21 : * phases II and II, cleaning up the dead tuples referenced in the current TID
22 : * store. This empties the TID store resumes phase I.
23 : *
24 : * In a way, the phases are more like states in a state machine, but they have
25 : * been referred to colloquially as phases for so long that they are referred
26 : * to as such here.
27 : *
28 : * Manually invoked VACUUMs may scan indexes during phase II in parallel. For
29 : * more information on this, see the comment at the top of vacuumparallel.c.
30 : *
31 : * In between phases, vacuum updates the freespace map (every
32 : * VACUUM_FSM_EVERY_PAGES).
33 : *
34 : * After completing all three phases, vacuum may truncate the relation if it
35 : * has emptied pages at the end. Finally, vacuum updates relation statistics
36 : * in pg_class and the cumulative statistics subsystem.
37 : *
38 : * Relation Scanning:
39 : *
40 : * Vacuum scans the heap relation, starting at the beginning and progressing
41 : * to the end, skipping pages as permitted by their visibility status, vacuum
42 : * options, and various other requirements.
43 : *
44 : * When page skipping is not disabled, a non-aggressive vacuum may scan pages
45 : * that are marked all-visible (and even all-frozen) in the visibility map if
46 : * the range of skippable pages is below SKIP_PAGES_THRESHOLD.
47 : *
48 : * Once vacuum has decided to scan a given block, it must read the block and
49 : * obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
50 : * may choose to skip pruning and freezing if it cannot acquire a cleanup lock
51 : * on the buffer right away. In this case, it may miss cleaning up dead tuples
52 : * and their associated index entries (though it is free to reap any existing
53 : * dead items on the page).
54 : *
55 : * After pruning and freezing, pages that are newly all-visible and all-frozen
56 : * are marked as such in the visibility map.
57 : *
58 : * Dead TID Storage:
59 : *
60 : * The major space usage for vacuuming is storage for the dead tuple IDs that
61 : * are to be removed from indexes. We want to ensure we can vacuum even the
62 : * very largest relations with finite memory space usage. To do that, we set
63 : * upper bounds on the memory that can be used for keeping track of dead TIDs
64 : * at once.
65 : *
66 : * We are willing to use at most maintenance_work_mem (or perhaps
67 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
68 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
69 : * the pages that we've pruned). This frees up the memory space dedicated to
70 : * store dead TIDs.
71 : *
72 : * In practice VACUUM will often complete its initial pass over the target
73 : * heap relation without ever running out of space to store TIDs. This means
74 : * that there only needs to be one call to lazy_vacuum, after the initial pass
75 : * completes.
76 : *
77 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
78 : * Portions Copyright (c) 1994, Regents of the University of California
79 : *
80 : *
81 : * IDENTIFICATION
82 : * src/backend/access/heap/vacuumlazy.c
83 : *
84 : *-------------------------------------------------------------------------
85 : */
86 : #include "postgres.h"
87 :
88 : #include <math.h>
89 :
90 : #include "access/genam.h"
91 : #include "access/heapam.h"
92 : #include "access/htup_details.h"
93 : #include "access/multixact.h"
94 : #include "access/tidstore.h"
95 : #include "access/transam.h"
96 : #include "access/visibilitymap.h"
97 : #include "access/xloginsert.h"
98 : #include "catalog/storage.h"
99 : #include "commands/dbcommands.h"
100 : #include "commands/progress.h"
101 : #include "commands/vacuum.h"
102 : #include "common/int.h"
103 : #include "executor/instrument.h"
104 : #include "miscadmin.h"
105 : #include "pgstat.h"
106 : #include "portability/instr_time.h"
107 : #include "postmaster/autovacuum.h"
108 : #include "storage/bufmgr.h"
109 : #include "storage/freespace.h"
110 : #include "storage/lmgr.h"
111 : #include "utils/lsyscache.h"
112 : #include "utils/pg_rusage.h"
113 : #include "utils/timestamp.h"
114 :
115 :
116 : /*
117 : * Space/time tradeoff parameters: do these need to be user-tunable?
118 : *
119 : * To consider truncating the relation, we want there to be at least
120 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
121 : * is less) potentially-freeable pages.
122 : */
123 : #define REL_TRUNCATE_MINIMUM 1000
124 : #define REL_TRUNCATE_FRACTION 16
125 :
126 : /*
127 : * Timing parameters for truncate locking heuristics.
128 : *
129 : * These were not exposed as user tunable GUC values because it didn't seem
130 : * that the potential for improvement was great enough to merit the cost of
131 : * supporting them.
132 : */
133 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
134 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
135 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
136 :
137 : /*
138 : * Threshold that controls whether we bypass index vacuuming and heap
139 : * vacuuming as an optimization
140 : */
141 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
142 :
143 : /*
144 : * Perform a failsafe check each time we scan another 4GB of pages.
145 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
146 : */
147 : #define FAILSAFE_EVERY_PAGES \
148 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
149 :
150 : /*
151 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
152 : * (it won't be exact because we only vacuum FSM after processing a heap page
153 : * that has some removable tuples). When there are indexes, this is ignored,
154 : * and we vacuum FSM after each index/heap cleaning pass.
155 : */
156 : #define VACUUM_FSM_EVERY_PAGES \
157 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
158 :
159 : /*
160 : * Before we consider skipping a page that's marked as clean in
161 : * visibility map, we must've seen at least this many clean pages.
162 : */
163 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
164 :
165 : /*
166 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
167 : * Needs to be a power of 2.
168 : */
169 : #define PREFETCH_SIZE ((BlockNumber) 32)
170 :
171 : /*
172 : * Macro to check if we are in a parallel vacuum. If true, we are in the
173 : * parallel mode and the DSM segment is initialized.
174 : */
175 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
176 :
177 : /* Phases of vacuum during which we report error context. */
178 : typedef enum
179 : {
180 : VACUUM_ERRCB_PHASE_UNKNOWN,
181 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
182 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
183 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
184 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
185 : VACUUM_ERRCB_PHASE_TRUNCATE,
186 : } VacErrPhase;
187 :
188 : typedef struct LVRelState
189 : {
190 : /* Target heap relation and its indexes */
191 : Relation rel;
192 : Relation *indrels;
193 : int nindexes;
194 :
195 : /* Buffer access strategy and parallel vacuum state */
196 : BufferAccessStrategy bstrategy;
197 : ParallelVacuumState *pvs;
198 :
199 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
200 : bool aggressive;
201 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
202 : bool skipwithvm;
203 : /* Consider index vacuuming bypass optimization? */
204 : bool consider_bypass_optimization;
205 :
206 : /* Doing index vacuuming, index cleanup, rel truncation? */
207 : bool do_index_vacuuming;
208 : bool do_index_cleanup;
209 : bool do_rel_truncate;
210 :
211 : /* VACUUM operation's cutoffs for freezing and pruning */
212 : struct VacuumCutoffs cutoffs;
213 : GlobalVisState *vistest;
214 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
215 : TransactionId NewRelfrozenXid;
216 : MultiXactId NewRelminMxid;
217 : bool skippedallvis;
218 :
219 : /* Error reporting state */
220 : char *dbname;
221 : char *relnamespace;
222 : char *relname;
223 : char *indname; /* Current index name */
224 : BlockNumber blkno; /* used only for heap operations */
225 : OffsetNumber offnum; /* used only for heap operations */
226 : VacErrPhase phase;
227 : bool verbose; /* VACUUM VERBOSE? */
228 :
229 : /*
230 : * dead_items stores TIDs whose index tuples are deleted by index
231 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
232 : * that has been processed by lazy_scan_prune. Also needed by
233 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
234 : * LP_UNUSED during second heap pass.
235 : *
236 : * Both dead_items and dead_items_info are allocated in shared memory in
237 : * parallel vacuum cases.
238 : */
239 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
240 : VacDeadItemsInfo *dead_items_info;
241 :
242 : BlockNumber rel_pages; /* total number of pages */
243 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
244 : BlockNumber removed_pages; /* # pages removed by relation truncation */
245 : BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
246 :
247 : /* # pages newly set all-visible in the VM */
248 : BlockNumber vm_new_visible_pages;
249 :
250 : /*
251 : * # pages newly set all-visible and all-frozen in the VM. This is a
252 : * subset of vm_new_visible_pages. That is, vm_new_visible_pages includes
253 : * all pages set all-visible, but vm_new_visible_frozen_pages includes
254 : * only those which were also set all-frozen.
255 : */
256 : BlockNumber vm_new_visible_frozen_pages;
257 :
258 : /* # all-visible pages newly set all-frozen in the VM */
259 : BlockNumber vm_new_frozen_pages;
260 :
261 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
262 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
263 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
264 :
265 : /* Statistics output by us, for table */
266 : double new_rel_tuples; /* new estimated total # of tuples */
267 : double new_live_tuples; /* new estimated total # of live tuples */
268 : /* Statistics output by index AMs */
269 : IndexBulkDeleteResult **indstats;
270 :
271 : /* Instrumentation counters */
272 : int num_index_scans;
273 : /* Counters that follow are only for scanned_pages */
274 : int64 tuples_deleted; /* # deleted from table */
275 : int64 tuples_frozen; /* # newly frozen */
276 : int64 lpdead_items; /* # deleted from indexes */
277 : int64 live_tuples; /* # live tuples remaining */
278 : int64 recently_dead_tuples; /* # dead, but not yet removable */
279 : int64 missed_dead_tuples; /* # removable, but not removed */
280 :
281 : /* State maintained by heap_vac_scan_next_block() */
282 : BlockNumber current_block; /* last block returned */
283 : BlockNumber next_unskippable_block; /* next unskippable block */
284 : bool next_unskippable_allvis; /* its visibility status */
285 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
286 : } LVRelState;
287 :
288 : /* Struct for saving and restoring vacuum error information. */
289 : typedef struct LVSavedErrInfo
290 : {
291 : BlockNumber blkno;
292 : OffsetNumber offnum;
293 : VacErrPhase phase;
294 : } LVSavedErrInfo;
295 :
296 :
297 : /* non-export function prototypes */
298 : static void lazy_scan_heap(LVRelState *vacrel);
299 : static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
300 : bool *all_visible_according_to_vm);
301 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
302 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
303 : BlockNumber blkno, Page page,
304 : bool sharelock, Buffer vmbuffer);
305 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
306 : BlockNumber blkno, Page page,
307 : Buffer vmbuffer, bool all_visible_according_to_vm,
308 : bool *has_lpdead_items);
309 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
310 : BlockNumber blkno, Page page,
311 : bool *has_lpdead_items);
312 : static void lazy_vacuum(LVRelState *vacrel);
313 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
314 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
315 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
316 : Buffer buffer, OffsetNumber *deadoffsets,
317 : int num_offsets, Buffer vmbuffer);
318 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
319 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
320 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
321 : IndexBulkDeleteResult *istat,
322 : double reltuples,
323 : LVRelState *vacrel);
324 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
325 : IndexBulkDeleteResult *istat,
326 : double reltuples,
327 : bool estimated_count,
328 : LVRelState *vacrel);
329 : static bool should_attempt_truncation(LVRelState *vacrel);
330 : static void lazy_truncate_heap(LVRelState *vacrel);
331 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
332 : bool *lock_waiter_detected);
333 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
334 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
335 : int num_offsets);
336 : static void dead_items_reset(LVRelState *vacrel);
337 : static void dead_items_cleanup(LVRelState *vacrel);
338 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
339 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
340 : static void update_relstats_all_indexes(LVRelState *vacrel);
341 : static void vacuum_error_callback(void *arg);
342 : static void update_vacuum_error_info(LVRelState *vacrel,
343 : LVSavedErrInfo *saved_vacrel,
344 : int phase, BlockNumber blkno,
345 : OffsetNumber offnum);
346 : static void restore_vacuum_error_info(LVRelState *vacrel,
347 : const LVSavedErrInfo *saved_vacrel);
348 :
349 :
350 : /*
351 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
352 : *
353 : * This routine sets things up for and then calls lazy_scan_heap, where
354 : * almost all work actually takes place. Finalizes everything after call
355 : * returns by managing relation truncation and updating rel's pg_class
356 : * entry. (Also updates pg_class entries for any indexes that need it.)
357 : *
358 : * At entry, we have already established a transaction and opened
359 : * and locked the relation.
360 : */
361 : void
362 114420 : heap_vacuum_rel(Relation rel, VacuumParams *params,
363 : BufferAccessStrategy bstrategy)
364 : {
365 : LVRelState *vacrel;
366 : bool verbose,
367 : instrument,
368 : skipwithvm,
369 : frozenxid_updated,
370 : minmulti_updated;
371 : BlockNumber orig_rel_pages,
372 : new_rel_pages,
373 : new_rel_allvisible;
374 : PGRUsage ru0;
375 114420 : TimestampTz starttime = 0;
376 114420 : PgStat_Counter startreadtime = 0,
377 114420 : startwritetime = 0;
378 114420 : WalUsage startwalusage = pgWalUsage;
379 114420 : BufferUsage startbufferusage = pgBufferUsage;
380 : ErrorContextCallback errcallback;
381 114420 : char **indnames = NULL;
382 :
383 114420 : verbose = (params->options & VACOPT_VERBOSE) != 0;
384 208062 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
385 93642 : params->log_min_duration >= 0));
386 114420 : if (instrument)
387 : {
388 93662 : pg_rusage_init(&ru0);
389 93662 : starttime = GetCurrentTimestamp();
390 93662 : if (track_io_timing)
391 : {
392 0 : startreadtime = pgStatBlockReadTime;
393 0 : startwritetime = pgStatBlockWriteTime;
394 : }
395 : }
396 :
397 114420 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
398 : RelationGetRelid(rel));
399 :
400 : /*
401 : * Setup error traceback support for ereport() first. The idea is to set
402 : * up an error context callback to display additional information on any
403 : * error during a vacuum. During different phases of vacuum, we update
404 : * the state so that the error context callback always display current
405 : * information.
406 : *
407 : * Copy the names of heap rel into local memory for error reporting
408 : * purposes, too. It isn't always safe to assume that we can get the name
409 : * of each rel. It's convenient for code in lazy_scan_heap to always use
410 : * these temp copies.
411 : */
412 114420 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
413 114420 : vacrel->dbname = get_database_name(MyDatabaseId);
414 114420 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
415 114420 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
416 114420 : vacrel->indname = NULL;
417 114420 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
418 114420 : vacrel->verbose = verbose;
419 114420 : errcallback.callback = vacuum_error_callback;
420 114420 : errcallback.arg = vacrel;
421 114420 : errcallback.previous = error_context_stack;
422 114420 : error_context_stack = &errcallback;
423 :
424 : /* Set up high level stuff about rel and its indexes */
425 114420 : vacrel->rel = rel;
426 114420 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
427 : &vacrel->indrels);
428 114420 : vacrel->bstrategy = bstrategy;
429 114420 : if (instrument && vacrel->nindexes > 0)
430 : {
431 : /* Copy index names used by instrumentation (not error reporting) */
432 89868 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
433 230576 : for (int i = 0; i < vacrel->nindexes; i++)
434 140708 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
435 : }
436 :
437 : /*
438 : * The index_cleanup param either disables index vacuuming and cleanup or
439 : * forces it to go ahead when we would otherwise apply the index bypass
440 : * optimization. The default is 'auto', which leaves the final decision
441 : * up to lazy_vacuum().
442 : *
443 : * The truncate param allows user to avoid attempting relation truncation,
444 : * though it can't force truncation to happen.
445 : */
446 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
447 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
448 : params->truncate != VACOPTVALUE_AUTO);
449 :
450 : /*
451 : * While VacuumFailSafeActive is reset to false before calling this, we
452 : * still need to reset it here due to recursive calls.
453 : */
454 114420 : VacuumFailsafeActive = false;
455 114420 : vacrel->consider_bypass_optimization = true;
456 114420 : vacrel->do_index_vacuuming = true;
457 114420 : vacrel->do_index_cleanup = true;
458 114420 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
459 114420 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
460 : {
461 : /* Force disable index vacuuming up-front */
462 264 : vacrel->do_index_vacuuming = false;
463 264 : vacrel->do_index_cleanup = false;
464 : }
465 114156 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
466 : {
467 : /* Force index vacuuming. Note that failsafe can still bypass. */
468 32 : vacrel->consider_bypass_optimization = false;
469 : }
470 : else
471 : {
472 : /* Default/auto, make all decisions dynamically */
473 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
474 : }
475 :
476 : /* Initialize page counters explicitly (be tidy) */
477 114420 : vacrel->scanned_pages = 0;
478 114420 : vacrel->removed_pages = 0;
479 114420 : vacrel->new_frozen_tuple_pages = 0;
480 114420 : vacrel->lpdead_item_pages = 0;
481 114420 : vacrel->missed_dead_pages = 0;
482 114420 : vacrel->nonempty_pages = 0;
483 : /* dead_items_alloc allocates vacrel->dead_items later on */
484 :
485 : /* Allocate/initialize output statistics state */
486 114420 : vacrel->new_rel_tuples = 0;
487 114420 : vacrel->new_live_tuples = 0;
488 114420 : vacrel->indstats = (IndexBulkDeleteResult **)
489 114420 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
490 :
491 : /* Initialize remaining counters (be tidy) */
492 114420 : vacrel->num_index_scans = 0;
493 114420 : vacrel->tuples_deleted = 0;
494 114420 : vacrel->tuples_frozen = 0;
495 114420 : vacrel->lpdead_items = 0;
496 114420 : vacrel->live_tuples = 0;
497 114420 : vacrel->recently_dead_tuples = 0;
498 114420 : vacrel->missed_dead_tuples = 0;
499 :
500 114420 : vacrel->vm_new_visible_pages = 0;
501 114420 : vacrel->vm_new_visible_frozen_pages = 0;
502 114420 : vacrel->vm_new_frozen_pages = 0;
503 :
504 : /*
505 : * Get cutoffs that determine which deleted tuples are considered DEAD,
506 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
507 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
508 : * happen in this order to ensure that the OldestXmin cutoff field works
509 : * as an upper bound on the XIDs stored in the pages we'll actually scan
510 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
511 : *
512 : * Next acquire vistest, a related cutoff that's used in pruning. We use
513 : * vistest in combination with OldestXmin to ensure that
514 : * heap_page_prune_and_freeze() always removes any deleted tuple whose
515 : * xmax is < OldestXmin. lazy_scan_prune must never become confused about
516 : * whether a tuple should be frozen or removed. (In the future we might
517 : * want to teach lazy_scan_prune to recompute vistest from time to time,
518 : * to increase the number of dead tuples it can prune away.)
519 : */
520 114420 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
521 114420 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
522 114420 : vacrel->vistest = GlobalVisTestFor(rel);
523 : /* Initialize state used to track oldest extant XID/MXID */
524 114420 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
525 114420 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
526 114420 : vacrel->skippedallvis = false;
527 114420 : skipwithvm = true;
528 114420 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
529 : {
530 : /*
531 : * Force aggressive mode, and disable skipping blocks using the
532 : * visibility map (even those set all-frozen)
533 : */
534 298 : vacrel->aggressive = true;
535 298 : skipwithvm = false;
536 : }
537 :
538 114420 : vacrel->skipwithvm = skipwithvm;
539 :
540 114420 : if (verbose)
541 : {
542 20 : if (vacrel->aggressive)
543 0 : ereport(INFO,
544 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
545 : vacrel->dbname, vacrel->relnamespace,
546 : vacrel->relname)));
547 : else
548 20 : ereport(INFO,
549 : (errmsg("vacuuming \"%s.%s.%s\"",
550 : vacrel->dbname, vacrel->relnamespace,
551 : vacrel->relname)));
552 : }
553 :
554 : /*
555 : * Allocate dead_items memory using dead_items_alloc. This handles
556 : * parallel VACUUM initialization as part of allocating shared memory
557 : * space used for dead_items. (But do a failsafe precheck first, to
558 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
559 : * is already dangerously old.)
560 : */
561 114420 : lazy_check_wraparound_failsafe(vacrel);
562 114420 : dead_items_alloc(vacrel, params->nworkers);
563 :
564 : /*
565 : * Call lazy_scan_heap to perform all required heap pruning, index
566 : * vacuuming, and heap vacuuming (plus related processing)
567 : */
568 114420 : lazy_scan_heap(vacrel);
569 :
570 : /*
571 : * Free resources managed by dead_items_alloc. This ends parallel mode in
572 : * passing when necessary.
573 : */
574 114420 : dead_items_cleanup(vacrel);
575 : Assert(!IsInParallelMode());
576 :
577 : /*
578 : * Update pg_class entries for each of rel's indexes where appropriate.
579 : *
580 : * Unlike the later update to rel's pg_class entry, this is not critical.
581 : * Maintains relpages/reltuples statistics used by the planner only.
582 : */
583 114420 : if (vacrel->do_index_cleanup)
584 94878 : update_relstats_all_indexes(vacrel);
585 :
586 : /* Done with rel's indexes */
587 114420 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
588 :
589 : /* Optionally truncate rel */
590 114420 : if (should_attempt_truncation(vacrel))
591 310 : lazy_truncate_heap(vacrel);
592 :
593 : /* Pop the error context stack */
594 114420 : error_context_stack = errcallback.previous;
595 :
596 : /* Report that we are now doing final cleanup */
597 114420 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
598 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
599 :
600 : /*
601 : * Prepare to update rel's pg_class entry.
602 : *
603 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
604 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
605 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
606 : */
607 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
608 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
609 : vacrel->cutoffs.relfrozenxid,
610 : vacrel->NewRelfrozenXid));
611 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
612 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
613 : vacrel->cutoffs.relminmxid,
614 : vacrel->NewRelminMxid));
615 114420 : if (vacrel->skippedallvis)
616 : {
617 : /*
618 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
619 : * chose to skip an all-visible page range. The state that tracks new
620 : * values will have missed unfrozen XIDs from the pages we skipped.
621 : */
622 : Assert(!vacrel->aggressive);
623 54 : vacrel->NewRelfrozenXid = InvalidTransactionId;
624 54 : vacrel->NewRelminMxid = InvalidMultiXactId;
625 : }
626 :
627 : /*
628 : * For safety, clamp relallvisible to be not more than what we're setting
629 : * pg_class.relpages to
630 : */
631 114420 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
632 114420 : visibilitymap_count(rel, &new_rel_allvisible, NULL);
633 114420 : if (new_rel_allvisible > new_rel_pages)
634 0 : new_rel_allvisible = new_rel_pages;
635 :
636 : /*
637 : * Now actually update rel's pg_class entry.
638 : *
639 : * In principle new_live_tuples could be -1 indicating that we (still)
640 : * don't know the tuple count. In practice that can't happen, since we
641 : * scan every page that isn't skipped using the visibility map.
642 : */
643 114420 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
644 114420 : new_rel_allvisible, vacrel->nindexes > 0,
645 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
646 : &frozenxid_updated, &minmulti_updated, false);
647 :
648 : /*
649 : * Report results to the cumulative stats system, too.
650 : *
651 : * Deliberately avoid telling the stats system about LP_DEAD items that
652 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
653 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
654 : * It seems like a good idea to err on the side of not vacuuming again too
655 : * soon in cases where the failsafe prevented significant amounts of heap
656 : * vacuuming.
657 : */
658 68510 : pgstat_report_vacuum(RelationGetRelid(rel),
659 114420 : rel->rd_rel->relisshared,
660 45910 : Max(vacrel->new_live_tuples, 0),
661 114420 : vacrel->recently_dead_tuples +
662 114420 : vacrel->missed_dead_tuples);
663 114420 : pgstat_progress_end_command();
664 :
665 114420 : if (instrument)
666 : {
667 93662 : TimestampTz endtime = GetCurrentTimestamp();
668 :
669 93694 : if (verbose || params->log_min_duration == 0 ||
670 32 : TimestampDifferenceExceeds(starttime, endtime,
671 : params->log_min_duration))
672 : {
673 : long secs_dur;
674 : int usecs_dur;
675 : WalUsage walusage;
676 : BufferUsage bufferusage;
677 : StringInfoData buf;
678 : char *msgfmt;
679 : int32 diff;
680 93630 : double read_rate = 0,
681 93630 : write_rate = 0;
682 : int64 total_blks_hit;
683 : int64 total_blks_read;
684 : int64 total_blks_dirtied;
685 :
686 93630 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
687 93630 : memset(&walusage, 0, sizeof(WalUsage));
688 93630 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
689 93630 : memset(&bufferusage, 0, sizeof(BufferUsage));
690 93630 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
691 :
692 93630 : total_blks_hit = bufferusage.shared_blks_hit +
693 93630 : bufferusage.local_blks_hit;
694 93630 : total_blks_read = bufferusage.shared_blks_read +
695 93630 : bufferusage.local_blks_read;
696 93630 : total_blks_dirtied = bufferusage.shared_blks_dirtied +
697 93630 : bufferusage.local_blks_dirtied;
698 :
699 93630 : initStringInfo(&buf);
700 93630 : if (verbose)
701 : {
702 : /*
703 : * Aggressiveness already reported earlier, in dedicated
704 : * VACUUM VERBOSE ereport
705 : */
706 : Assert(!params->is_wraparound);
707 20 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
708 : }
709 93610 : else if (params->is_wraparound)
710 : {
711 : /*
712 : * While it's possible for a VACUUM to be both is_wraparound
713 : * and !aggressive, that's just a corner-case -- is_wraparound
714 : * implies aggressive. Produce distinct output for the corner
715 : * case all the same, just in case.
716 : */
717 93490 : if (vacrel->aggressive)
718 93490 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
719 : else
720 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
721 : }
722 : else
723 : {
724 120 : if (vacrel->aggressive)
725 16 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
726 : else
727 104 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
728 : }
729 93630 : appendStringInfo(&buf, msgfmt,
730 : vacrel->dbname,
731 : vacrel->relnamespace,
732 : vacrel->relname,
733 : vacrel->num_index_scans);
734 130924 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
735 : vacrel->removed_pages,
736 : new_rel_pages,
737 : vacrel->scanned_pages,
738 : orig_rel_pages == 0 ? 100.0 :
739 37294 : 100.0 * vacrel->scanned_pages / orig_rel_pages);
740 93630 : appendStringInfo(&buf,
741 93630 : _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
742 93630 : (long long) vacrel->tuples_deleted,
743 93630 : (long long) vacrel->new_rel_tuples,
744 93630 : (long long) vacrel->recently_dead_tuples);
745 93630 : if (vacrel->missed_dead_tuples > 0)
746 0 : appendStringInfo(&buf,
747 0 : _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
748 0 : (long long) vacrel->missed_dead_tuples,
749 : vacrel->missed_dead_pages);
750 93630 : diff = (int32) (ReadNextTransactionId() -
751 93630 : vacrel->cutoffs.OldestXmin);
752 93630 : appendStringInfo(&buf,
753 93630 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
754 : vacrel->cutoffs.OldestXmin, diff);
755 93630 : if (frozenxid_updated)
756 : {
757 37034 : diff = (int32) (vacrel->NewRelfrozenXid -
758 37034 : vacrel->cutoffs.relfrozenxid);
759 37034 : appendStringInfo(&buf,
760 37034 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
761 : vacrel->NewRelfrozenXid, diff);
762 : }
763 93630 : if (minmulti_updated)
764 : {
765 88 : diff = (int32) (vacrel->NewRelminMxid -
766 88 : vacrel->cutoffs.relminmxid);
767 88 : appendStringInfo(&buf,
768 88 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
769 : vacrel->NewRelminMxid, diff);
770 : }
771 93630 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
772 : vacrel->new_frozen_tuple_pages,
773 : orig_rel_pages == 0 ? 100.0 :
774 37294 : 100.0 * vacrel->new_frozen_tuple_pages /
775 : orig_rel_pages,
776 93630 : (long long) vacrel->tuples_frozen);
777 :
778 93630 : appendStringInfo(&buf,
779 93630 : _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
780 : vacrel->vm_new_visible_pages,
781 93630 : vacrel->vm_new_visible_frozen_pages +
782 93630 : vacrel->vm_new_frozen_pages,
783 : vacrel->vm_new_frozen_pages);
784 93630 : if (vacrel->do_index_vacuuming)
785 : {
786 74570 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
787 74478 : appendStringInfoString(&buf, _("index scan not needed: "));
788 : else
789 92 : appendStringInfoString(&buf, _("index scan needed: "));
790 :
791 74570 : msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
792 : }
793 : else
794 : {
795 19060 : if (!VacuumFailsafeActive)
796 2 : appendStringInfoString(&buf, _("index scan bypassed: "));
797 : else
798 19058 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
799 :
800 19060 : msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
801 : }
802 93630 : appendStringInfo(&buf, msgfmt,
803 : vacrel->lpdead_item_pages,
804 : orig_rel_pages == 0 ? 100.0 :
805 37294 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
806 93630 : (long long) vacrel->lpdead_items);
807 234266 : for (int i = 0; i < vacrel->nindexes; i++)
808 : {
809 140636 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
810 :
811 140636 : if (!istat)
812 140440 : continue;
813 :
814 196 : appendStringInfo(&buf,
815 196 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
816 196 : indnames[i],
817 : istat->num_pages,
818 : istat->pages_newly_deleted,
819 : istat->pages_deleted,
820 : istat->pages_free);
821 : }
822 93630 : if (track_io_timing)
823 : {
824 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
825 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
826 :
827 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
828 : read_ms, write_ms);
829 : }
830 93630 : if (secs_dur > 0 || usecs_dur > 0)
831 : {
832 93630 : read_rate = (double) BLCKSZ * total_blks_read /
833 93630 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
834 93630 : write_rate = (double) BLCKSZ * total_blks_dirtied /
835 93630 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
836 : }
837 93630 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
838 : read_rate, write_rate);
839 93630 : appendStringInfo(&buf,
840 93630 : _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
841 : (long long) total_blks_hit,
842 : (long long) total_blks_read,
843 : (long long) total_blks_dirtied);
844 93630 : appendStringInfo(&buf,
845 93630 : _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
846 93630 : (long long) walusage.wal_records,
847 93630 : (long long) walusage.wal_fpi,
848 93630 : (unsigned long long) walusage.wal_bytes);
849 93630 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
850 :
851 93630 : ereport(verbose ? INFO : LOG,
852 : (errmsg_internal("%s", buf.data)));
853 93630 : pfree(buf.data);
854 : }
855 : }
856 :
857 : /* Cleanup index statistics and index names */
858 284470 : for (int i = 0; i < vacrel->nindexes; i++)
859 : {
860 170050 : if (vacrel->indstats[i])
861 2316 : pfree(vacrel->indstats[i]);
862 :
863 170050 : if (instrument)
864 140708 : pfree(indnames[i]);
865 : }
866 114420 : }
867 :
868 : /*
869 : * lazy_scan_heap() -- workhorse function for VACUUM
870 : *
871 : * This routine prunes each page in the heap, and considers the need to
872 : * freeze remaining tuples with storage (not including pages that can be
873 : * skipped using the visibility map). Also performs related maintenance
874 : * of the FSM and visibility map. These steps all take place during an
875 : * initial pass over the target heap relation.
876 : *
877 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
878 : * consists of deleting index tuples that point to LP_DEAD items left in
879 : * heap pages following pruning. Earlier initial pass over the heap will
880 : * have collected the TIDs whose index tuples need to be removed.
881 : *
882 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
883 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
884 : * as LP_UNUSED. This has to happen in a second, final pass over the
885 : * heap, to preserve a basic invariant that all index AMs rely on: no
886 : * extant index tuple can ever be allowed to contain a TID that points to
887 : * an LP_UNUSED line pointer in the heap. We must disallow premature
888 : * recycling of line pointers to avoid index scans that get confused
889 : * about which TID points to which tuple immediately after recycling.
890 : * (Actually, this isn't a concern when target heap relation happens to
891 : * have no indexes, which allows us to safely apply the one-pass strategy
892 : * as an optimization).
893 : *
894 : * In practice we often have enough space to fit all TIDs, and so won't
895 : * need to call lazy_vacuum more than once, after our initial pass over
896 : * the heap has totally finished. Otherwise things are slightly more
897 : * complicated: our "initial pass" over the heap applies only to those
898 : * pages that were pruned before we needed to call lazy_vacuum, and our
899 : * "final pass" over the heap only vacuums these same heap pages.
900 : * However, we process indexes in full every time lazy_vacuum is called,
901 : * which makes index processing very inefficient when memory is in short
902 : * supply.
903 : */
904 : static void
905 114420 : lazy_scan_heap(LVRelState *vacrel)
906 : {
907 114420 : BlockNumber rel_pages = vacrel->rel_pages,
908 : blkno,
909 114420 : next_fsm_block_to_vacuum = 0;
910 : bool all_visible_according_to_vm;
911 :
912 114420 : Buffer vmbuffer = InvalidBuffer;
913 114420 : const int initprog_index[] = {
914 : PROGRESS_VACUUM_PHASE,
915 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
916 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
917 : };
918 : int64 initprog_val[3];
919 :
920 : /* Report that we're scanning the heap, advertising total # of blocks */
921 114420 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
922 114420 : initprog_val[1] = rel_pages;
923 114420 : initprog_val[2] = vacrel->dead_items_info->max_bytes;
924 114420 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
925 :
926 : /* Initialize for the first heap_vac_scan_next_block() call */
927 114420 : vacrel->current_block = InvalidBlockNumber;
928 114420 : vacrel->next_unskippable_block = InvalidBlockNumber;
929 114420 : vacrel->next_unskippable_allvis = false;
930 114420 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
931 :
932 858196 : while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
933 : {
934 : Buffer buf;
935 : Page page;
936 : bool has_lpdead_items;
937 743776 : bool got_cleanup_lock = false;
938 :
939 743776 : vacrel->scanned_pages++;
940 :
941 : /* Report as block scanned, update error traceback information */
942 743776 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
943 743776 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
944 : blkno, InvalidOffsetNumber);
945 :
946 743776 : vacuum_delay_point();
947 :
948 : /*
949 : * Regularly check if wraparound failsafe should trigger.
950 : *
951 : * There is a similar check inside lazy_vacuum_all_indexes(), but
952 : * relfrozenxid might start to look dangerously old before we reach
953 : * that point. This check also provides failsafe coverage for the
954 : * one-pass strategy, and the two-pass strategy with the index_cleanup
955 : * param set to 'off'.
956 : */
957 743776 : if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
958 0 : lazy_check_wraparound_failsafe(vacrel);
959 :
960 : /*
961 : * Consider if we definitely have enough space to process TIDs on page
962 : * already. If we are close to overrunning the available space for
963 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
964 : * this page.
965 : */
966 743776 : if (TidStoreMemoryUsage(vacrel->dead_items) > vacrel->dead_items_info->max_bytes)
967 : {
968 : /*
969 : * Before beginning index vacuuming, we release any pin we may
970 : * hold on the visibility map page. This isn't necessary for
971 : * correctness, but we do it anyway to avoid holding the pin
972 : * across a lengthy, unrelated operation.
973 : */
974 0 : if (BufferIsValid(vmbuffer))
975 : {
976 0 : ReleaseBuffer(vmbuffer);
977 0 : vmbuffer = InvalidBuffer;
978 : }
979 :
980 : /* Perform a round of index and heap vacuuming */
981 0 : vacrel->consider_bypass_optimization = false;
982 0 : lazy_vacuum(vacrel);
983 :
984 : /*
985 : * Vacuum the Free Space Map to make newly-freed space visible on
986 : * upper-level FSM pages. Note we have not yet processed blkno.
987 : */
988 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
989 : blkno);
990 0 : next_fsm_block_to_vacuum = blkno;
991 :
992 : /* Report that we are once again scanning the heap */
993 0 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
994 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
995 : }
996 :
997 : /*
998 : * Pin the visibility map page in case we need to mark the page
999 : * all-visible. In most cases this will be very cheap, because we'll
1000 : * already have the correct page pinned anyway.
1001 : */
1002 743776 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
1003 :
1004 743776 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1005 : vacrel->bstrategy);
1006 743776 : page = BufferGetPage(buf);
1007 :
1008 : /*
1009 : * We need a buffer cleanup lock to prune HOT chains and defragment
1010 : * the page in lazy_scan_prune. But when it's not possible to acquire
1011 : * a cleanup lock right away, we may be able to settle for reduced
1012 : * processing using lazy_scan_noprune.
1013 : */
1014 743776 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
1015 :
1016 743776 : if (!got_cleanup_lock)
1017 12 : LockBuffer(buf, BUFFER_LOCK_SHARE);
1018 :
1019 : /* Check for new or empty pages before lazy_scan_[no]prune call */
1020 743776 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
1021 743776 : vmbuffer))
1022 : {
1023 : /* Processed as new/empty page (lock and pin released) */
1024 2092 : continue;
1025 : }
1026 :
1027 : /*
1028 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
1029 : * items in the dead_items area for later vacuuming, count live and
1030 : * recently dead tuples for vacuum logging, and determine if this
1031 : * block could later be truncated. If we encounter any xid/mxids that
1032 : * require advancing the relfrozenxid/relminxid, we'll have to wait
1033 : * for a cleanup lock and call lazy_scan_prune().
1034 : */
1035 741684 : if (!got_cleanup_lock &&
1036 12 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
1037 : {
1038 : /*
1039 : * lazy_scan_noprune could not do all required processing. Wait
1040 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
1041 : */
1042 : Assert(vacrel->aggressive);
1043 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1044 0 : LockBufferForCleanup(buf);
1045 0 : got_cleanup_lock = true;
1046 : }
1047 :
1048 : /*
1049 : * If we have a cleanup lock, we must now prune, freeze, and count
1050 : * tuples. We may have acquired the cleanup lock originally, or we may
1051 : * have gone back and acquired it after lazy_scan_noprune() returned
1052 : * false. Either way, the page hasn't been processed yet.
1053 : *
1054 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
1055 : * recently_dead_tuples and live tuples for vacuum logging, determine
1056 : * if the block can later be truncated, and accumulate the details of
1057 : * remaining LP_DEAD line pointers on the page into dead_items. These
1058 : * dead items include those pruned by lazy_scan_prune() as well as
1059 : * line pointers previously marked LP_DEAD.
1060 : */
1061 741684 : if (got_cleanup_lock)
1062 741672 : lazy_scan_prune(vacrel, buf, blkno, page,
1063 : vmbuffer, all_visible_according_to_vm,
1064 : &has_lpdead_items);
1065 :
1066 : /*
1067 : * Now drop the buffer lock and, potentially, update the FSM.
1068 : *
1069 : * Our goal is to update the freespace map the last time we touch the
1070 : * page. If we'll process a block in the second pass, we may free up
1071 : * additional space on the page, so it is better to update the FSM
1072 : * after the second pass. If the relation has no indexes, or if index
1073 : * vacuuming is disabled, there will be no second heap pass; if this
1074 : * particular page has no dead items, the second heap pass will not
1075 : * touch this page. So, in those cases, update the FSM now.
1076 : *
1077 : * Note: In corner cases, it's possible to miss updating the FSM
1078 : * entirely. If index vacuuming is currently enabled, we'll skip the
1079 : * FSM update now. But if failsafe mode is later activated, or there
1080 : * are so few dead tuples that index vacuuming is bypassed, there will
1081 : * also be no opportunity to update the FSM later, because we'll never
1082 : * revisit this page. Since updating the FSM is desirable but not
1083 : * absolutely required, that's OK.
1084 : */
1085 741684 : if (vacrel->nindexes == 0
1086 721244 : || !vacrel->do_index_vacuuming
1087 559092 : || !has_lpdead_items)
1088 717012 : {
1089 717012 : Size freespace = PageGetHeapFreeSpace(page);
1090 :
1091 717012 : UnlockReleaseBuffer(buf);
1092 717012 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1093 :
1094 : /*
1095 : * Periodically perform FSM vacuuming to make newly-freed space
1096 : * visible on upper FSM pages. This is done after vacuuming if the
1097 : * table has indexes. There will only be newly-freed space if we
1098 : * held the cleanup lock and lazy_scan_prune() was called.
1099 : */
1100 717012 : if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1101 0 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1102 : {
1103 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1104 : blkno);
1105 0 : next_fsm_block_to_vacuum = blkno;
1106 : }
1107 : }
1108 : else
1109 24672 : UnlockReleaseBuffer(buf);
1110 : }
1111 :
1112 114420 : vacrel->blkno = InvalidBlockNumber;
1113 114420 : if (BufferIsValid(vmbuffer))
1114 46052 : ReleaseBuffer(vmbuffer);
1115 :
1116 : /* report that everything is now scanned */
1117 114420 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1118 :
1119 : /* now we can compute the new value for pg_class.reltuples */
1120 228840 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1121 : vacrel->scanned_pages,
1122 114420 : vacrel->live_tuples);
1123 :
1124 : /*
1125 : * Also compute the total number of surviving heap entries. In the
1126 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1127 : */
1128 114420 : vacrel->new_rel_tuples =
1129 114420 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1130 114420 : vacrel->missed_dead_tuples;
1131 :
1132 : /*
1133 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1134 : * related heap vacuuming
1135 : */
1136 114420 : if (vacrel->dead_items_info->num_items > 0)
1137 1060 : lazy_vacuum(vacrel);
1138 :
1139 : /*
1140 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1141 : * not there were indexes, and whether or not we bypassed index vacuuming.
1142 : */
1143 114420 : if (blkno > next_fsm_block_to_vacuum)
1144 46052 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1145 :
1146 : /* report all blocks vacuumed */
1147 114420 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
1148 :
1149 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1150 114420 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1151 90064 : lazy_cleanup_all_indexes(vacrel);
1152 114420 : }
1153 :
1154 : /*
1155 : * heap_vac_scan_next_block() -- get next block for vacuum to process
1156 : *
1157 : * lazy_scan_heap() calls here every time it needs to get the next block to
1158 : * prune and vacuum. The function uses the visibility map, vacuum options,
1159 : * and various thresholds to skip blocks which do not need to be processed and
1160 : * sets blkno to the next block to process.
1161 : *
1162 : * The block number and visibility status of the next block to process are set
1163 : * in *blkno and *all_visible_according_to_vm. The return value is false if
1164 : * there are no further blocks to process.
1165 : *
1166 : * vacrel is an in/out parameter here. Vacuum options and information about
1167 : * the relation are read. vacrel->skippedallvis is set if we skip a block
1168 : * that's all-visible but not all-frozen, to ensure that we don't update
1169 : * relfrozenxid in that case. vacrel also holds information about the next
1170 : * unskippable block, as bookkeeping for this function.
1171 : */
1172 : static bool
1173 858196 : heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
1174 : bool *all_visible_according_to_vm)
1175 : {
1176 : BlockNumber next_block;
1177 :
1178 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1179 858196 : next_block = vacrel->current_block + 1;
1180 :
1181 : /* Have we reached the end of the relation? */
1182 858196 : if (next_block >= vacrel->rel_pages)
1183 : {
1184 114420 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1185 : {
1186 43688 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1187 43688 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1188 : }
1189 114420 : *blkno = vacrel->rel_pages;
1190 114420 : return false;
1191 : }
1192 :
1193 : /*
1194 : * We must be in one of the three following states:
1195 : */
1196 743776 : if (next_block > vacrel->next_unskippable_block ||
1197 177056 : vacrel->next_unskippable_block == InvalidBlockNumber)
1198 : {
1199 : /*
1200 : * 1. We have just processed an unskippable block (or we're at the
1201 : * beginning of the scan). Find the next unskippable block using the
1202 : * visibility map.
1203 : */
1204 : bool skipsallvis;
1205 :
1206 612772 : find_next_unskippable_block(vacrel, &skipsallvis);
1207 :
1208 : /*
1209 : * We now know the next block that we must process. It can be the
1210 : * next block after the one we just processed, or something further
1211 : * ahead. If it's further ahead, we can jump to it, but we choose to
1212 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1213 : * pages. Since we're reading sequentially, the OS should be doing
1214 : * readahead for us, so there's no gain in skipping a page now and
1215 : * then. Skipping such a range might even discourage sequential
1216 : * detection.
1217 : *
1218 : * This test also enables more frequent relfrozenxid advancement
1219 : * during non-aggressive VACUUMs. If the range has any all-visible
1220 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1221 : * real downside.
1222 : */
1223 612772 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1224 : {
1225 3940 : next_block = vacrel->next_unskippable_block;
1226 3940 : if (skipsallvis)
1227 54 : vacrel->skippedallvis = true;
1228 : }
1229 : }
1230 :
1231 : /* Now we must be in one of the two remaining states: */
1232 743776 : if (next_block < vacrel->next_unskippable_block)
1233 : {
1234 : /*
1235 : * 2. We are processing a range of blocks that we could have skipped
1236 : * but chose not to. We know that they are all-visible in the VM,
1237 : * otherwise they would've been unskippable.
1238 : */
1239 131004 : *blkno = vacrel->current_block = next_block;
1240 131004 : *all_visible_according_to_vm = true;
1241 131004 : return true;
1242 : }
1243 : else
1244 : {
1245 : /*
1246 : * 3. We reached the next unskippable block. Process it. On next
1247 : * iteration, we will be back in state 1.
1248 : */
1249 : Assert(next_block == vacrel->next_unskippable_block);
1250 :
1251 612772 : *blkno = vacrel->current_block = next_block;
1252 612772 : *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
1253 612772 : return true;
1254 : }
1255 : }
1256 :
1257 : /*
1258 : * Find the next unskippable block in a vacuum scan using the visibility map.
1259 : * The next unskippable block and its visibility information is updated in
1260 : * vacrel.
1261 : *
1262 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1263 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1264 : * was concurrently cleared, though. All that matters is that caller scan all
1265 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1266 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1267 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1268 : * to skip such a range is actually made, making everything safe.)
1269 : */
1270 : static void
1271 612772 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1272 : {
1273 612772 : BlockNumber rel_pages = vacrel->rel_pages;
1274 612772 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1275 612772 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1276 : bool next_unskippable_allvis;
1277 :
1278 612772 : *skipsallvis = false;
1279 :
1280 : for (;;)
1281 439924 : {
1282 1052696 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1283 : next_unskippable_block,
1284 : &next_unskippable_vmbuffer);
1285 :
1286 1052696 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1287 :
1288 : /*
1289 : * A block is unskippable if it is not all visible according to the
1290 : * visibility map.
1291 : */
1292 1052696 : if (!next_unskippable_allvis)
1293 : {
1294 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1295 573364 : break;
1296 : }
1297 :
1298 : /*
1299 : * Caller must scan the last page to determine whether it has tuples
1300 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1301 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1302 : * lock on rel to attempt a truncation that fails anyway, just because
1303 : * there are tuples on the last page (it is likely that there will be
1304 : * tuples on other nearby pages as well, but those can be skipped).
1305 : *
1306 : * Implement this by always treating the last block as unsafe to skip.
1307 : */
1308 479332 : if (next_unskippable_block == rel_pages - 1)
1309 38664 : break;
1310 :
1311 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1312 440668 : if (!vacrel->skipwithvm)
1313 742 : break;
1314 :
1315 : /*
1316 : * Aggressive VACUUM caller can't skip pages just because they are
1317 : * all-visible. They may still skip all-frozen pages, which can't
1318 : * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1319 : */
1320 439926 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1321 : {
1322 5104 : if (vacrel->aggressive)
1323 2 : break;
1324 :
1325 : /*
1326 : * All-visible block is safe to skip in non-aggressive case. But
1327 : * remember that the final range contains such a block for later.
1328 : */
1329 5102 : *skipsallvis = true;
1330 : }
1331 :
1332 439924 : next_unskippable_block++;
1333 : }
1334 :
1335 : /* write the local variables back to vacrel */
1336 612772 : vacrel->next_unskippable_block = next_unskippable_block;
1337 612772 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1338 612772 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1339 612772 : }
1340 :
1341 : /*
1342 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1343 : *
1344 : * Must call here to handle both new and empty pages before calling
1345 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1346 : * with new or empty pages.
1347 : *
1348 : * It's necessary to consider new pages as a special case, since the rules for
1349 : * maintaining the visibility map and FSM with empty pages are a little
1350 : * different (though new pages can be truncated away during rel truncation).
1351 : *
1352 : * Empty pages are not really a special case -- they're just heap pages that
1353 : * have no allocated tuples (including even LP_UNUSED items). You might
1354 : * wonder why we need to handle them here all the same. It's only necessary
1355 : * because of a corner-case involving a hard crash during heap relation
1356 : * extension. If we ever make relation-extension crash safe, then it should
1357 : * no longer be necessary to deal with empty pages here (or new pages, for
1358 : * that matter).
1359 : *
1360 : * Caller must hold at least a shared lock. We might need to escalate the
1361 : * lock in that case, so the type of lock caller holds needs to be specified
1362 : * using 'sharelock' argument.
1363 : *
1364 : * Returns false in common case where caller should go on to call
1365 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1366 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1367 : * behalf.
1368 : */
1369 : static bool
1370 743776 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1371 : Page page, bool sharelock, Buffer vmbuffer)
1372 : {
1373 : Size freespace;
1374 :
1375 743776 : if (PageIsNew(page))
1376 : {
1377 : /*
1378 : * All-zeroes pages can be left over if either a backend extends the
1379 : * relation by a single page, but crashes before the newly initialized
1380 : * page has been written out, or when bulk-extending the relation
1381 : * (which creates a number of empty pages at the tail end of the
1382 : * relation), and then enters them into the FSM.
1383 : *
1384 : * Note we do not enter the page into the visibilitymap. That has the
1385 : * downside that we repeatedly visit this page in subsequent vacuums,
1386 : * but otherwise we'll never discover the space on a promoted standby.
1387 : * The harm of repeated checking ought to normally not be too bad. The
1388 : * space usually should be used at some point, otherwise there
1389 : * wouldn't be any regular vacuums.
1390 : *
1391 : * Make sure these pages are in the FSM, to ensure they can be reused.
1392 : * Do that by testing if there's any space recorded for the page. If
1393 : * not, enter it. We do so after releasing the lock on the heap page,
1394 : * the FSM is approximate, after all.
1395 : */
1396 1906 : UnlockReleaseBuffer(buf);
1397 :
1398 1906 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1399 : {
1400 950 : freespace = BLCKSZ - SizeOfPageHeaderData;
1401 :
1402 950 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1403 : }
1404 :
1405 1906 : return true;
1406 : }
1407 :
1408 741870 : if (PageIsEmpty(page))
1409 : {
1410 : /*
1411 : * It seems likely that caller will always be able to get a cleanup
1412 : * lock on an empty page. But don't take any chances -- escalate to
1413 : * an exclusive lock (still don't need a cleanup lock, though).
1414 : */
1415 186 : if (sharelock)
1416 : {
1417 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1418 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1419 :
1420 0 : if (!PageIsEmpty(page))
1421 : {
1422 : /* page isn't new or empty -- keep lock and pin for now */
1423 0 : return false;
1424 : }
1425 : }
1426 : else
1427 : {
1428 : /* Already have a full cleanup lock (which is more than enough) */
1429 : }
1430 :
1431 : /*
1432 : * Unlike new pages, empty pages are always set all-visible and
1433 : * all-frozen.
1434 : */
1435 186 : if (!PageIsAllVisible(page))
1436 : {
1437 : uint8 old_vmbits;
1438 :
1439 0 : START_CRIT_SECTION();
1440 :
1441 : /* mark buffer dirty before writing a WAL record */
1442 0 : MarkBufferDirty(buf);
1443 :
1444 : /*
1445 : * It's possible that another backend has extended the heap,
1446 : * initialized the page, and then failed to WAL-log the page due
1447 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1448 : * might try to replay our record setting the page all-visible and
1449 : * find that the page isn't initialized, which will cause a PANIC.
1450 : * To prevent that, check whether the page has been previously
1451 : * WAL-logged, and if not, do that now.
1452 : */
1453 0 : if (RelationNeedsWAL(vacrel->rel) &&
1454 0 : PageGetLSN(page) == InvalidXLogRecPtr)
1455 0 : log_newpage_buffer(buf, true);
1456 :
1457 0 : PageSetAllVisible(page);
1458 0 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1459 : InvalidXLogRecPtr,
1460 : vmbuffer, InvalidTransactionId,
1461 : VISIBILITYMAP_ALL_VISIBLE |
1462 : VISIBILITYMAP_ALL_FROZEN);
1463 0 : END_CRIT_SECTION();
1464 :
1465 : /*
1466 : * If the page wasn't already set all-visible and/or all-frozen in
1467 : * the VM, count it as newly set for logging.
1468 : */
1469 0 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1470 : {
1471 0 : vacrel->vm_new_visible_pages++;
1472 0 : vacrel->vm_new_visible_frozen_pages++;
1473 : }
1474 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1475 0 : vacrel->vm_new_frozen_pages++;
1476 : }
1477 :
1478 186 : freespace = PageGetHeapFreeSpace(page);
1479 186 : UnlockReleaseBuffer(buf);
1480 186 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1481 186 : return true;
1482 : }
1483 :
1484 : /* page isn't new or empty -- keep lock and pin */
1485 741684 : return false;
1486 : }
1487 :
1488 : /* qsort comparator for sorting OffsetNumbers */
1489 : static int
1490 6061452 : cmpOffsetNumbers(const void *a, const void *b)
1491 : {
1492 6061452 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1493 : }
1494 :
1495 : /*
1496 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1497 : *
1498 : * Caller must hold pin and buffer cleanup lock on the buffer.
1499 : *
1500 : * vmbuffer is the buffer containing the VM block with visibility information
1501 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1502 : * visibility status of the heap block looked up earlier by the caller. We
1503 : * won't rely entirely on this status, as it may be out of date.
1504 : *
1505 : * *has_lpdead_items is set to true or false depending on whether, upon return
1506 : * from this function, any LP_DEAD items are still present on the page.
1507 : */
1508 : static void
1509 741672 : lazy_scan_prune(LVRelState *vacrel,
1510 : Buffer buf,
1511 : BlockNumber blkno,
1512 : Page page,
1513 : Buffer vmbuffer,
1514 : bool all_visible_according_to_vm,
1515 : bool *has_lpdead_items)
1516 : {
1517 741672 : Relation rel = vacrel->rel;
1518 : PruneFreezeResult presult;
1519 741672 : int prune_options = 0;
1520 :
1521 : Assert(BufferGetBlockNumber(buf) == blkno);
1522 :
1523 : /*
1524 : * Prune all HOT-update chains and potentially freeze tuples on this page.
1525 : *
1526 : * If the relation has no indexes, we can immediately mark would-be dead
1527 : * items LP_UNUSED.
1528 : *
1529 : * The number of tuples removed from the page is returned in
1530 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
1531 : * presult.lpdead_items's final value can be thought of as the number of
1532 : * tuples that were deleted from indexes.
1533 : *
1534 : * We will update the VM after collecting LP_DEAD items and freezing
1535 : * tuples. Pruning will have determined whether or not the page is
1536 : * all-visible.
1537 : */
1538 741672 : prune_options = HEAP_PAGE_PRUNE_FREEZE;
1539 741672 : if (vacrel->nindexes == 0)
1540 20440 : prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1541 :
1542 741672 : heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1543 : &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1544 : &vacrel->offnum,
1545 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1546 :
1547 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
1548 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
1549 :
1550 741672 : if (presult.nfrozen > 0)
1551 : {
1552 : /*
1553 : * We don't increment the new_frozen_tuple_pages instrumentation
1554 : * counter when nfrozen == 0, since it only counts pages with newly
1555 : * frozen tuples (don't confuse that with pages newly set all-frozen
1556 : * in VM).
1557 : */
1558 32006 : vacrel->new_frozen_tuple_pages++;
1559 : }
1560 :
1561 : /*
1562 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1563 : * the heap to determine all_visible and all_frozen for the page -- this
1564 : * is a specialized version of the logic from this function. Now that
1565 : * we've finished pruning and freezing, make sure that we're in total
1566 : * agreement with heap_page_is_all_visible() using an assertion.
1567 : */
1568 : #ifdef USE_ASSERT_CHECKING
1569 : /* Note that all_frozen value does not matter when !all_visible */
1570 : if (presult.all_visible)
1571 : {
1572 : TransactionId debug_cutoff;
1573 : bool debug_all_frozen;
1574 :
1575 : Assert(presult.lpdead_items == 0);
1576 :
1577 : if (!heap_page_is_all_visible(vacrel, buf,
1578 : &debug_cutoff, &debug_all_frozen))
1579 : Assert(false);
1580 :
1581 : Assert(presult.all_frozen == debug_all_frozen);
1582 :
1583 : Assert(!TransactionIdIsValid(debug_cutoff) ||
1584 : debug_cutoff == presult.vm_conflict_horizon);
1585 : }
1586 : #endif
1587 :
1588 : /*
1589 : * Now save details of the LP_DEAD items from the page in vacrel
1590 : */
1591 741672 : if (presult.lpdead_items > 0)
1592 : {
1593 29304 : vacrel->lpdead_item_pages++;
1594 :
1595 : /*
1596 : * deadoffsets are collected incrementally in
1597 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
1598 : * with an indeterminate order, but dead_items_add requires them to be
1599 : * sorted.
1600 : */
1601 29304 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
1602 : cmpOffsetNumbers);
1603 :
1604 29304 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
1605 : }
1606 :
1607 : /* Finally, add page-local counts to whole-VACUUM counts */
1608 741672 : vacrel->tuples_deleted += presult.ndeleted;
1609 741672 : vacrel->tuples_frozen += presult.nfrozen;
1610 741672 : vacrel->lpdead_items += presult.lpdead_items;
1611 741672 : vacrel->live_tuples += presult.live_tuples;
1612 741672 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
1613 :
1614 : /* Can't truncate this page */
1615 741672 : if (presult.hastup)
1616 726476 : vacrel->nonempty_pages = blkno + 1;
1617 :
1618 : /* Did we find LP_DEAD items? */
1619 741672 : *has_lpdead_items = (presult.lpdead_items > 0);
1620 :
1621 : Assert(!presult.all_visible || !(*has_lpdead_items));
1622 :
1623 : /*
1624 : * Handle setting visibility map bit based on information from the VM (as
1625 : * of last heap_vac_scan_next_block() call), and from all_visible and
1626 : * all_frozen variables
1627 : */
1628 741672 : if (!all_visible_according_to_vm && presult.all_visible)
1629 58072 : {
1630 : uint8 old_vmbits;
1631 58072 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
1632 :
1633 58072 : if (presult.all_frozen)
1634 : {
1635 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1636 39788 : flags |= VISIBILITYMAP_ALL_FROZEN;
1637 : }
1638 :
1639 : /*
1640 : * It should never be the case that the visibility map page is set
1641 : * while the page-level bit is clear, but the reverse is allowed (if
1642 : * checksums are not enabled). Regardless, set both bits so that we
1643 : * get back in sync.
1644 : *
1645 : * NB: If the heap page is all-visible but the VM bit is not set, we
1646 : * don't need to dirty the heap page. However, if checksums are
1647 : * enabled, we do need to make sure that the heap page is dirtied
1648 : * before passing it to visibilitymap_set(), because it may be logged.
1649 : * Given that this situation should only happen in rare cases after a
1650 : * crash, it is not worth optimizing.
1651 : */
1652 58072 : PageSetAllVisible(page);
1653 58072 : MarkBufferDirty(buf);
1654 58072 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1655 : InvalidXLogRecPtr,
1656 : vmbuffer, presult.vm_conflict_horizon,
1657 : flags);
1658 :
1659 : /*
1660 : * If the page wasn't already set all-visible and/or all-frozen in the
1661 : * VM, count it as newly set for logging.
1662 : */
1663 58072 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1664 : {
1665 58072 : vacrel->vm_new_visible_pages++;
1666 58072 : if (presult.all_frozen)
1667 39788 : vacrel->vm_new_visible_frozen_pages++;
1668 : }
1669 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
1670 0 : presult.all_frozen)
1671 0 : vacrel->vm_new_frozen_pages++;
1672 : }
1673 :
1674 : /*
1675 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1676 : * page-level bit is clear. However, it's possible that the bit got
1677 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1678 : * with buffer lock before concluding that the VM is corrupt.
1679 : */
1680 683600 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1681 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1682 : {
1683 0 : elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1684 : vacrel->relname, blkno);
1685 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1686 : VISIBILITYMAP_VALID_BITS);
1687 : }
1688 :
1689 : /*
1690 : * It's possible for the value returned by
1691 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1692 : * wrong for us to see tuples that appear to not be visible to everyone
1693 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
1694 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
1695 : * conservative and sometimes returns a value that's unnecessarily small,
1696 : * so if we see that contradiction it just means that the tuples that we
1697 : * think are not visible to everyone yet actually are, and the
1698 : * PD_ALL_VISIBLE flag is correct.
1699 : *
1700 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
1701 : * however.
1702 : */
1703 683600 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
1704 : {
1705 0 : elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1706 : vacrel->relname, blkno);
1707 0 : PageClearAllVisible(page);
1708 0 : MarkBufferDirty(buf);
1709 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1710 : VISIBILITYMAP_VALID_BITS);
1711 : }
1712 :
1713 : /*
1714 : * If the all-visible page is all-frozen but not marked as such yet, mark
1715 : * it as all-frozen. Note that all_frozen is only valid if all_visible is
1716 : * true, so we must check both all_visible and all_frozen.
1717 : */
1718 683600 : else if (all_visible_according_to_vm && presult.all_visible &&
1719 170222 : presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1720 : {
1721 : uint8 old_vmbits;
1722 :
1723 : /*
1724 : * Avoid relying on all_visible_according_to_vm as a proxy for the
1725 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
1726 : * stale -- even when all_visible is set
1727 : */
1728 26 : if (!PageIsAllVisible(page))
1729 : {
1730 0 : PageSetAllVisible(page);
1731 0 : MarkBufferDirty(buf);
1732 : }
1733 :
1734 : /*
1735 : * Set the page all-frozen (and all-visible) in the VM.
1736 : *
1737 : * We can pass InvalidTransactionId as our cutoff_xid, since a
1738 : * snapshotConflictHorizon sufficient to make everything safe for REDO
1739 : * was logged when the page's tuples were frozen.
1740 : */
1741 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1742 26 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
1743 : InvalidXLogRecPtr,
1744 : vmbuffer, InvalidTransactionId,
1745 : VISIBILITYMAP_ALL_VISIBLE |
1746 : VISIBILITYMAP_ALL_FROZEN);
1747 :
1748 : /*
1749 : * The page was likely already set all-visible in the VM. However,
1750 : * there is a small chance that it was modified sometime between
1751 : * setting all_visible_according_to_vm and checking the visibility
1752 : * during pruning. Check the return value of old_vmbits anyway to
1753 : * ensure the visibility map counters used for logging are accurate.
1754 : */
1755 26 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
1756 : {
1757 0 : vacrel->vm_new_visible_pages++;
1758 0 : vacrel->vm_new_visible_frozen_pages++;
1759 : }
1760 :
1761 : /*
1762 : * We already checked that the page was not set all-frozen in the VM
1763 : * above, so we don't need to test the value of old_vmbits.
1764 : */
1765 : else
1766 26 : vacrel->vm_new_frozen_pages++;
1767 : }
1768 741672 : }
1769 :
1770 : /*
1771 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1772 : *
1773 : * Caller need only hold a pin and share lock on the buffer, unlike
1774 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1775 : * performed here, it's quite possible that an earlier opportunistic pruning
1776 : * operation left LP_DEAD items behind. We'll at least collect any such items
1777 : * in dead_items for removal from indexes.
1778 : *
1779 : * For aggressive VACUUM callers, we may return false to indicate that a full
1780 : * cleanup lock is required for processing by lazy_scan_prune. This is only
1781 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1782 : * one or more tuples on the page. We always return true for non-aggressive
1783 : * callers.
1784 : *
1785 : * If this function returns true, *has_lpdead_items gets set to true or false
1786 : * depending on whether, upon return from this function, any LP_DEAD items are
1787 : * present on the page. If this function returns false, *has_lpdead_items
1788 : * is not updated.
1789 : */
1790 : static bool
1791 12 : lazy_scan_noprune(LVRelState *vacrel,
1792 : Buffer buf,
1793 : BlockNumber blkno,
1794 : Page page,
1795 : bool *has_lpdead_items)
1796 : {
1797 : OffsetNumber offnum,
1798 : maxoff;
1799 : int lpdead_items,
1800 : live_tuples,
1801 : recently_dead_tuples,
1802 : missed_dead_tuples;
1803 : bool hastup;
1804 : HeapTupleHeader tupleheader;
1805 12 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1806 12 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1807 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1808 :
1809 : Assert(BufferGetBlockNumber(buf) == blkno);
1810 :
1811 12 : hastup = false; /* for now */
1812 :
1813 12 : lpdead_items = 0;
1814 12 : live_tuples = 0;
1815 12 : recently_dead_tuples = 0;
1816 12 : missed_dead_tuples = 0;
1817 :
1818 12 : maxoff = PageGetMaxOffsetNumber(page);
1819 364 : for (offnum = FirstOffsetNumber;
1820 : offnum <= maxoff;
1821 352 : offnum = OffsetNumberNext(offnum))
1822 : {
1823 : ItemId itemid;
1824 : HeapTupleData tuple;
1825 :
1826 352 : vacrel->offnum = offnum;
1827 352 : itemid = PageGetItemId(page, offnum);
1828 :
1829 352 : if (!ItemIdIsUsed(itemid))
1830 0 : continue;
1831 :
1832 352 : if (ItemIdIsRedirected(itemid))
1833 : {
1834 0 : hastup = true;
1835 0 : continue;
1836 : }
1837 :
1838 352 : if (ItemIdIsDead(itemid))
1839 : {
1840 : /*
1841 : * Deliberately don't set hastup=true here. See same point in
1842 : * lazy_scan_prune for an explanation.
1843 : */
1844 0 : deadoffsets[lpdead_items++] = offnum;
1845 0 : continue;
1846 : }
1847 :
1848 352 : hastup = true; /* page prevents rel truncation */
1849 352 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1850 352 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
1851 : &NoFreezePageRelfrozenXid,
1852 : &NoFreezePageRelminMxid))
1853 : {
1854 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
1855 128 : if (vacrel->aggressive)
1856 : {
1857 : /*
1858 : * Aggressive VACUUMs must always be able to advance rel's
1859 : * relfrozenxid to a value >= FreezeLimit (and be able to
1860 : * advance rel's relminmxid to a value >= MultiXactCutoff).
1861 : * The ongoing aggressive VACUUM won't be able to do that
1862 : * unless it can freeze an XID (or MXID) from this tuple now.
1863 : *
1864 : * The only safe option is to have caller perform processing
1865 : * of this page using lazy_scan_prune. Caller might have to
1866 : * wait a while for a cleanup lock, but it can't be helped.
1867 : */
1868 0 : vacrel->offnum = InvalidOffsetNumber;
1869 0 : return false;
1870 : }
1871 :
1872 : /*
1873 : * Non-aggressive VACUUMs are under no obligation to advance
1874 : * relfrozenxid (even by one XID). We can be much laxer here.
1875 : *
1876 : * Currently we always just accept an older final relfrozenxid
1877 : * and/or relminmxid value. We never make caller wait or work a
1878 : * little harder, even when it likely makes sense to do so.
1879 : */
1880 : }
1881 :
1882 352 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
1883 352 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1884 352 : tuple.t_len = ItemIdGetLength(itemid);
1885 352 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
1886 :
1887 352 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1888 : buf))
1889 : {
1890 346 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1891 : case HEAPTUPLE_LIVE:
1892 :
1893 : /*
1894 : * Count both cases as live, just like lazy_scan_prune
1895 : */
1896 346 : live_tuples++;
1897 :
1898 346 : break;
1899 2 : case HEAPTUPLE_DEAD:
1900 :
1901 : /*
1902 : * There is some useful work for pruning to do, that won't be
1903 : * done due to failure to get a cleanup lock.
1904 : */
1905 2 : missed_dead_tuples++;
1906 2 : break;
1907 4 : case HEAPTUPLE_RECENTLY_DEAD:
1908 :
1909 : /*
1910 : * Count in recently_dead_tuples, just like lazy_scan_prune
1911 : */
1912 4 : recently_dead_tuples++;
1913 4 : break;
1914 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1915 :
1916 : /*
1917 : * Do not count these rows as live, just like lazy_scan_prune
1918 : */
1919 0 : break;
1920 0 : default:
1921 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1922 : break;
1923 : }
1924 : }
1925 :
1926 12 : vacrel->offnum = InvalidOffsetNumber;
1927 :
1928 : /*
1929 : * By here we know for sure that caller can put off freezing and pruning
1930 : * this particular page until the next VACUUM. Remember its details now.
1931 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
1932 : */
1933 12 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
1934 12 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
1935 :
1936 : /* Save any LP_DEAD items found on the page in dead_items */
1937 12 : if (vacrel->nindexes == 0)
1938 : {
1939 : /* Using one-pass strategy (since table has no indexes) */
1940 0 : if (lpdead_items > 0)
1941 : {
1942 : /*
1943 : * Perfunctory handling for the corner case where a single pass
1944 : * strategy VACUUM cannot get a cleanup lock, and it turns out
1945 : * that there is one or more LP_DEAD items: just count the LP_DEAD
1946 : * items as missed_dead_tuples instead. (This is a bit dishonest,
1947 : * but it beats having to maintain specialized heap vacuuming code
1948 : * forever, for vanishingly little benefit.)
1949 : */
1950 0 : hastup = true;
1951 0 : missed_dead_tuples += lpdead_items;
1952 : }
1953 : }
1954 12 : else if (lpdead_items > 0)
1955 : {
1956 : /*
1957 : * Page has LP_DEAD items, and so any references/TIDs that remain in
1958 : * indexes will be deleted during index vacuuming (and then marked
1959 : * LP_UNUSED in the heap)
1960 : */
1961 0 : vacrel->lpdead_item_pages++;
1962 :
1963 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
1964 :
1965 0 : vacrel->lpdead_items += lpdead_items;
1966 : }
1967 :
1968 : /*
1969 : * Finally, add relevant page-local counts to whole-VACUUM counts
1970 : */
1971 12 : vacrel->live_tuples += live_tuples;
1972 12 : vacrel->recently_dead_tuples += recently_dead_tuples;
1973 12 : vacrel->missed_dead_tuples += missed_dead_tuples;
1974 12 : if (missed_dead_tuples > 0)
1975 2 : vacrel->missed_dead_pages++;
1976 :
1977 : /* Can't truncate this page */
1978 12 : if (hastup)
1979 12 : vacrel->nonempty_pages = blkno + 1;
1980 :
1981 : /* Did we find LP_DEAD items? */
1982 12 : *has_lpdead_items = (lpdead_items > 0);
1983 :
1984 : /* Caller won't need to call lazy_scan_prune with same page */
1985 12 : return true;
1986 : }
1987 :
1988 : /*
1989 : * Main entry point for index vacuuming and heap vacuuming.
1990 : *
1991 : * Removes items collected in dead_items from table's indexes, then marks the
1992 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
1993 : * for full details.
1994 : *
1995 : * Also empties dead_items, freeing up space for later TIDs.
1996 : *
1997 : * We may choose to bypass index vacuuming at this point, though only when the
1998 : * ongoing VACUUM operation will definitely only have one index scan/round of
1999 : * index vacuuming.
2000 : */
2001 : static void
2002 1060 : lazy_vacuum(LVRelState *vacrel)
2003 : {
2004 : bool bypass;
2005 :
2006 : /* Should not end up here with no indexes */
2007 : Assert(vacrel->nindexes > 0);
2008 : Assert(vacrel->lpdead_item_pages > 0);
2009 :
2010 1060 : if (!vacrel->do_index_vacuuming)
2011 : {
2012 : Assert(!vacrel->do_index_cleanup);
2013 18 : dead_items_reset(vacrel);
2014 18 : return;
2015 : }
2016 :
2017 : /*
2018 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
2019 : *
2020 : * We currently only do this in cases where the number of LP_DEAD items
2021 : * for the entire VACUUM operation is close to zero. This avoids sharp
2022 : * discontinuities in the duration and overhead of successive VACUUM
2023 : * operations that run against the same table with a fixed workload.
2024 : * Ideally, successive VACUUM operations will behave as if there are
2025 : * exactly zero LP_DEAD items in cases where there are close to zero.
2026 : *
2027 : * This is likely to be helpful with a table that is continually affected
2028 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
2029 : * have small aberrations that lead to just a few heap pages retaining
2030 : * only one or two LP_DEAD items. This is pretty common; even when the
2031 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
2032 : * impossible to predict whether HOT will be applied in 100% of cases.
2033 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
2034 : * HOT through careful tuning.
2035 : */
2036 1042 : bypass = false;
2037 1042 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
2038 : {
2039 : BlockNumber threshold;
2040 :
2041 : Assert(vacrel->num_index_scans == 0);
2042 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
2043 : Assert(vacrel->do_index_vacuuming);
2044 : Assert(vacrel->do_index_cleanup);
2045 :
2046 : /*
2047 : * This crossover point at which we'll start to do index vacuuming is
2048 : * expressed as a percentage of the total number of heap pages in the
2049 : * table that are known to have at least one LP_DEAD item. This is
2050 : * much more important than the total number of LP_DEAD items, since
2051 : * it's a proxy for the number of heap pages whose visibility map bits
2052 : * cannot be set on account of bypassing index and heap vacuuming.
2053 : *
2054 : * We apply one further precautionary test: the space currently used
2055 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
2056 : * not exceed 32MB. This limits the risk that we will bypass index
2057 : * vacuuming again and again until eventually there is a VACUUM whose
2058 : * dead_items space is not CPU cache resident.
2059 : *
2060 : * We don't take any special steps to remember the LP_DEAD items (such
2061 : * as counting them in our final update to the stats system) when the
2062 : * optimization is applied. Though the accounting used in analyze.c's
2063 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
2064 : * rows in its own stats report, that's okay. The discrepancy should
2065 : * be negligible. If this optimization is ever expanded to cover more
2066 : * cases then this may need to be reconsidered.
2067 : */
2068 1022 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
2069 1024 : bypass = (vacrel->lpdead_item_pages < threshold &&
2070 2 : (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
2071 : }
2072 :
2073 1042 : if (bypass)
2074 : {
2075 : /*
2076 : * There are almost zero TIDs. Behave as if there were precisely
2077 : * zero: bypass index vacuuming, but do index cleanup.
2078 : *
2079 : * We expect that the ongoing VACUUM operation will finish very
2080 : * quickly, so there is no point in considering speeding up as a
2081 : * failsafe against wraparound failure. (Index cleanup is expected to
2082 : * finish very quickly in cases where there were no ambulkdelete()
2083 : * calls.)
2084 : */
2085 2 : vacrel->do_index_vacuuming = false;
2086 : }
2087 1040 : else if (lazy_vacuum_all_indexes(vacrel))
2088 : {
2089 : /*
2090 : * We successfully completed a round of index vacuuming. Do related
2091 : * heap vacuuming now.
2092 : */
2093 1040 : lazy_vacuum_heap_rel(vacrel);
2094 : }
2095 : else
2096 : {
2097 : /*
2098 : * Failsafe case.
2099 : *
2100 : * We attempted index vacuuming, but didn't finish a full round/full
2101 : * index scan. This happens when relfrozenxid or relminmxid is too
2102 : * far in the past.
2103 : *
2104 : * From this point on the VACUUM operation will do no further index
2105 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
2106 : * back here again.
2107 : */
2108 : Assert(VacuumFailsafeActive);
2109 : }
2110 :
2111 : /*
2112 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
2113 : * vacuum)
2114 : */
2115 1042 : dead_items_reset(vacrel);
2116 : }
2117 :
2118 : /*
2119 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
2120 : *
2121 : * Returns true in the common case when all indexes were successfully
2122 : * vacuumed. Returns false in rare cases where we determined that the ongoing
2123 : * VACUUM operation is at risk of taking too long to finish, leading to
2124 : * wraparound failure.
2125 : */
2126 : static bool
2127 1040 : lazy_vacuum_all_indexes(LVRelState *vacrel)
2128 : {
2129 1040 : bool allindexes = true;
2130 1040 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
2131 1040 : const int progress_start_index[] = {
2132 : PROGRESS_VACUUM_PHASE,
2133 : PROGRESS_VACUUM_INDEXES_TOTAL
2134 : };
2135 1040 : const int progress_end_index[] = {
2136 : PROGRESS_VACUUM_INDEXES_TOTAL,
2137 : PROGRESS_VACUUM_INDEXES_PROCESSED,
2138 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
2139 : };
2140 : int64 progress_start_val[2];
2141 : int64 progress_end_val[3];
2142 :
2143 : Assert(vacrel->nindexes > 0);
2144 : Assert(vacrel->do_index_vacuuming);
2145 : Assert(vacrel->do_index_cleanup);
2146 :
2147 : /* Precheck for XID wraparound emergencies */
2148 1040 : if (lazy_check_wraparound_failsafe(vacrel))
2149 : {
2150 : /* Wraparound emergency -- don't even start an index scan */
2151 0 : return false;
2152 : }
2153 :
2154 : /*
2155 : * Report that we are now vacuuming indexes and the number of indexes to
2156 : * vacuum.
2157 : */
2158 1040 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2159 1040 : progress_start_val[1] = vacrel->nindexes;
2160 1040 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2161 :
2162 1040 : if (!ParallelVacuumIsActive(vacrel))
2163 : {
2164 3062 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2165 : {
2166 2032 : Relation indrel = vacrel->indrels[idx];
2167 2032 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2168 :
2169 2032 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2170 : old_live_tuples,
2171 : vacrel);
2172 :
2173 : /* Report the number of indexes vacuumed */
2174 2032 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2175 2032 : idx + 1);
2176 :
2177 2032 : if (lazy_check_wraparound_failsafe(vacrel))
2178 : {
2179 : /* Wraparound emergency -- end current index scan */
2180 0 : allindexes = false;
2181 0 : break;
2182 : }
2183 : }
2184 : }
2185 : else
2186 : {
2187 : /* Outsource everything to parallel variant */
2188 10 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2189 : vacrel->num_index_scans);
2190 :
2191 : /*
2192 : * Do a postcheck to consider applying wraparound failsafe now. Note
2193 : * that parallel VACUUM only gets the precheck and this postcheck.
2194 : */
2195 10 : if (lazy_check_wraparound_failsafe(vacrel))
2196 0 : allindexes = false;
2197 : }
2198 :
2199 : /*
2200 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2201 : * each call here (except calls where we choose to do the failsafe). This
2202 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2203 : * of the failsafe triggering, which prevents the next call from taking
2204 : * place).
2205 : */
2206 : Assert(vacrel->num_index_scans > 0 ||
2207 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2208 : Assert(allindexes || VacuumFailsafeActive);
2209 :
2210 : /*
2211 : * Increase and report the number of index scans. Also, we reset
2212 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2213 : *
2214 : * We deliberately include the case where we started a round of bulk
2215 : * deletes that we weren't able to finish due to the failsafe triggering.
2216 : */
2217 1040 : vacrel->num_index_scans++;
2218 1040 : progress_end_val[0] = 0;
2219 1040 : progress_end_val[1] = 0;
2220 1040 : progress_end_val[2] = vacrel->num_index_scans;
2221 1040 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2222 :
2223 1040 : return allindexes;
2224 : }
2225 :
2226 : /*
2227 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2228 : *
2229 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2230 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2231 : *
2232 : * We may also be able to truncate the line pointer array of the heap pages we
2233 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2234 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2235 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2236 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2237 : * page's line pointer array).
2238 : *
2239 : * Note: the reason for doing this as a second pass is we cannot remove the
2240 : * tuples until we've removed their index entries, and we want to process
2241 : * index entry removal in batches as large as possible.
2242 : */
2243 : static void
2244 1040 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2245 : {
2246 1040 : BlockNumber vacuumed_pages = 0;
2247 1040 : Buffer vmbuffer = InvalidBuffer;
2248 : LVSavedErrInfo saved_err_info;
2249 : TidStoreIter *iter;
2250 : TidStoreIterResult *iter_result;
2251 :
2252 : Assert(vacrel->do_index_vacuuming);
2253 : Assert(vacrel->do_index_cleanup);
2254 : Assert(vacrel->num_index_scans > 0);
2255 :
2256 : /* Report that we are now vacuuming the heap */
2257 1040 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2258 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2259 :
2260 : /* Update error traceback information */
2261 1040 : update_vacuum_error_info(vacrel, &saved_err_info,
2262 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2263 : InvalidBlockNumber, InvalidOffsetNumber);
2264 :
2265 1040 : iter = TidStoreBeginIterate(vacrel->dead_items);
2266 25710 : while ((iter_result = TidStoreIterateNext(iter)) != NULL)
2267 : {
2268 : BlockNumber blkno;
2269 : Buffer buf;
2270 : Page page;
2271 : Size freespace;
2272 : OffsetNumber offsets[MaxOffsetNumber];
2273 : int num_offsets;
2274 :
2275 24670 : vacuum_delay_point();
2276 :
2277 24670 : blkno = iter_result->blkno;
2278 24670 : vacrel->blkno = blkno;
2279 :
2280 24670 : num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
2281 : Assert(num_offsets <= lengthof(offsets));
2282 :
2283 : /*
2284 : * Pin the visibility map page in case we need to mark the page
2285 : * all-visible. In most cases this will be very cheap, because we'll
2286 : * already have the correct page pinned anyway.
2287 : */
2288 24670 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2289 :
2290 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2291 24670 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2292 : vacrel->bstrategy);
2293 24670 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2294 24670 : lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
2295 : num_offsets, vmbuffer);
2296 :
2297 : /* Now that we've vacuumed the page, record its available space */
2298 24670 : page = BufferGetPage(buf);
2299 24670 : freespace = PageGetHeapFreeSpace(page);
2300 :
2301 24670 : UnlockReleaseBuffer(buf);
2302 24670 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2303 24670 : vacuumed_pages++;
2304 : }
2305 1040 : TidStoreEndIterate(iter);
2306 :
2307 1040 : vacrel->blkno = InvalidBlockNumber;
2308 1040 : if (BufferIsValid(vmbuffer))
2309 1040 : ReleaseBuffer(vmbuffer);
2310 :
2311 : /*
2312 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2313 : * the second heap pass. No more, no less.
2314 : */
2315 : Assert(vacrel->num_index_scans > 1 ||
2316 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2317 : vacuumed_pages == vacrel->lpdead_item_pages));
2318 :
2319 1040 : ereport(DEBUG2,
2320 : (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2321 : vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2322 : vacuumed_pages)));
2323 :
2324 : /* Revert to the previous phase information for error traceback */
2325 1040 : restore_vacuum_error_info(vacrel, &saved_err_info);
2326 1040 : }
2327 :
2328 : /*
2329 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2330 : * vacrel->dead_items store.
2331 : *
2332 : * Caller must have an exclusive buffer lock on the buffer (though a full
2333 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2334 : * a pin on blkno's visibility map page.
2335 : */
2336 : static void
2337 24670 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2338 : OffsetNumber *deadoffsets, int num_offsets,
2339 : Buffer vmbuffer)
2340 : {
2341 24670 : Page page = BufferGetPage(buffer);
2342 : OffsetNumber unused[MaxHeapTuplesPerPage];
2343 24670 : int nunused = 0;
2344 : TransactionId visibility_cutoff_xid;
2345 : bool all_frozen;
2346 : LVSavedErrInfo saved_err_info;
2347 :
2348 : Assert(vacrel->do_index_vacuuming);
2349 :
2350 24670 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2351 :
2352 : /* Update error traceback information */
2353 24670 : update_vacuum_error_info(vacrel, &saved_err_info,
2354 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2355 : InvalidOffsetNumber);
2356 :
2357 24670 : START_CRIT_SECTION();
2358 :
2359 1613526 : for (int i = 0; i < num_offsets; i++)
2360 : {
2361 : ItemId itemid;
2362 1588856 : OffsetNumber toff = deadoffsets[i];
2363 :
2364 1588856 : itemid = PageGetItemId(page, toff);
2365 :
2366 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2367 1588856 : ItemIdSetUnused(itemid);
2368 1588856 : unused[nunused++] = toff;
2369 : }
2370 :
2371 : Assert(nunused > 0);
2372 :
2373 : /* Attempt to truncate line pointer array now */
2374 24670 : PageTruncateLinePointerArray(page);
2375 :
2376 : /*
2377 : * Mark buffer dirty before we write WAL.
2378 : */
2379 24670 : MarkBufferDirty(buffer);
2380 :
2381 : /* XLOG stuff */
2382 24670 : if (RelationNeedsWAL(vacrel->rel))
2383 : {
2384 22970 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2385 : InvalidTransactionId,
2386 : false, /* no cleanup lock required */
2387 : PRUNE_VACUUM_CLEANUP,
2388 : NULL, 0, /* frozen */
2389 : NULL, 0, /* redirected */
2390 : NULL, 0, /* dead */
2391 : unused, nunused);
2392 : }
2393 :
2394 : /*
2395 : * End critical section, so we safely can do visibility tests (which
2396 : * possibly need to perform IO and allocate memory!). If we crash now the
2397 : * page (including the corresponding vm bit) might not be marked all
2398 : * visible, but that's fine. A later vacuum will fix that.
2399 : */
2400 24670 : END_CRIT_SECTION();
2401 :
2402 : /*
2403 : * Now that we have removed the LP_DEAD items from the page, once again
2404 : * check if the page has become all-visible. The page is already marked
2405 : * dirty, exclusively locked, and, if needed, a full page image has been
2406 : * emitted.
2407 : */
2408 : Assert(!PageIsAllVisible(page));
2409 24670 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2410 : &all_frozen))
2411 : {
2412 : uint8 old_vmbits;
2413 24616 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2414 :
2415 24616 : if (all_frozen)
2416 : {
2417 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2418 18572 : flags |= VISIBILITYMAP_ALL_FROZEN;
2419 : }
2420 :
2421 24616 : PageSetAllVisible(page);
2422 24616 : old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
2423 : InvalidXLogRecPtr,
2424 : vmbuffer, visibility_cutoff_xid,
2425 : flags);
2426 :
2427 : /*
2428 : * If the page wasn't already set all-visible and/or all-frozen in the
2429 : * VM, count it as newly set for logging.
2430 : */
2431 24616 : if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
2432 : {
2433 24616 : vacrel->vm_new_visible_pages++;
2434 24616 : if (all_frozen)
2435 18572 : vacrel->vm_new_visible_frozen_pages++;
2436 : }
2437 :
2438 0 : else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
2439 : all_frozen)
2440 0 : vacrel->vm_new_frozen_pages++;
2441 : }
2442 :
2443 : /* Revert to the previous phase information for error traceback */
2444 24670 : restore_vacuum_error_info(vacrel, &saved_err_info);
2445 24670 : }
2446 :
2447 : /*
2448 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2449 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2450 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2451 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2452 : *
2453 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2454 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2455 : * that it started out with.
2456 : *
2457 : * Returns true when failsafe has been triggered.
2458 : */
2459 : static bool
2460 117502 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2461 : {
2462 : /* Don't warn more than once per VACUUM */
2463 117502 : if (VacuumFailsafeActive)
2464 0 : return true;
2465 :
2466 117502 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2467 : {
2468 19278 : const int progress_index[] = {
2469 : PROGRESS_VACUUM_INDEXES_TOTAL,
2470 : PROGRESS_VACUUM_INDEXES_PROCESSED
2471 : };
2472 19278 : int64 progress_val[2] = {0, 0};
2473 :
2474 19278 : VacuumFailsafeActive = true;
2475 :
2476 : /*
2477 : * Abandon use of a buffer access strategy to allow use of all of
2478 : * shared buffers. We assume the caller who allocated the memory for
2479 : * the BufferAccessStrategy will free it.
2480 : */
2481 19278 : vacrel->bstrategy = NULL;
2482 :
2483 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2484 19278 : vacrel->do_index_vacuuming = false;
2485 19278 : vacrel->do_index_cleanup = false;
2486 19278 : vacrel->do_rel_truncate = false;
2487 :
2488 : /* Reset the progress counters */
2489 19278 : pgstat_progress_update_multi_param(2, progress_index, progress_val);
2490 :
2491 19278 : ereport(WARNING,
2492 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2493 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2494 : vacrel->num_index_scans),
2495 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2496 : errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
2497 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2498 :
2499 : /* Stop applying cost limits from this point on */
2500 19278 : VacuumCostActive = false;
2501 19278 : VacuumCostBalance = 0;
2502 :
2503 19278 : return true;
2504 : }
2505 :
2506 98224 : return false;
2507 : }
2508 :
2509 : /*
2510 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2511 : */
2512 : static void
2513 90064 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2514 : {
2515 90064 : double reltuples = vacrel->new_rel_tuples;
2516 90064 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2517 90064 : const int progress_start_index[] = {
2518 : PROGRESS_VACUUM_PHASE,
2519 : PROGRESS_VACUUM_INDEXES_TOTAL
2520 : };
2521 90064 : const int progress_end_index[] = {
2522 : PROGRESS_VACUUM_INDEXES_TOTAL,
2523 : PROGRESS_VACUUM_INDEXES_PROCESSED
2524 : };
2525 : int64 progress_start_val[2];
2526 90064 : int64 progress_end_val[2] = {0, 0};
2527 :
2528 : Assert(vacrel->do_index_cleanup);
2529 : Assert(vacrel->nindexes > 0);
2530 :
2531 : /*
2532 : * Report that we are now cleaning up indexes and the number of indexes to
2533 : * cleanup.
2534 : */
2535 90064 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2536 90064 : progress_start_val[1] = vacrel->nindexes;
2537 90064 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2538 :
2539 90064 : if (!ParallelVacuumIsActive(vacrel))
2540 : {
2541 230738 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2542 : {
2543 140696 : Relation indrel = vacrel->indrels[idx];
2544 140696 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2545 :
2546 281392 : vacrel->indstats[idx] =
2547 140696 : lazy_cleanup_one_index(indrel, istat, reltuples,
2548 : estimated_count, vacrel);
2549 :
2550 : /* Report the number of indexes cleaned up */
2551 140696 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2552 140696 : idx + 1);
2553 : }
2554 : }
2555 : else
2556 : {
2557 : /* Outsource everything to parallel variant */
2558 22 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2559 : vacrel->num_index_scans,
2560 : estimated_count);
2561 : }
2562 :
2563 : /* Reset the progress counters */
2564 90064 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2565 90064 : }
2566 :
2567 : /*
2568 : * lazy_vacuum_one_index() -- vacuum index relation.
2569 : *
2570 : * Delete all the index tuples containing a TID collected in
2571 : * vacrel->dead_items. Also update running statistics. Exact
2572 : * details depend on index AM's ambulkdelete routine.
2573 : *
2574 : * reltuples is the number of heap tuples to be passed to the
2575 : * bulkdelete callback. It's always assumed to be estimated.
2576 : * See indexam.sgml for more info.
2577 : *
2578 : * Returns bulk delete stats derived from input stats
2579 : */
2580 : static IndexBulkDeleteResult *
2581 2032 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2582 : double reltuples, LVRelState *vacrel)
2583 : {
2584 : IndexVacuumInfo ivinfo;
2585 : LVSavedErrInfo saved_err_info;
2586 :
2587 2032 : ivinfo.index = indrel;
2588 2032 : ivinfo.heaprel = vacrel->rel;
2589 2032 : ivinfo.analyze_only = false;
2590 2032 : ivinfo.report_progress = false;
2591 2032 : ivinfo.estimated_count = true;
2592 2032 : ivinfo.message_level = DEBUG2;
2593 2032 : ivinfo.num_heap_tuples = reltuples;
2594 2032 : ivinfo.strategy = vacrel->bstrategy;
2595 :
2596 : /*
2597 : * Update error traceback information.
2598 : *
2599 : * The index name is saved during this phase and restored immediately
2600 : * after this phase. See vacuum_error_callback.
2601 : */
2602 : Assert(vacrel->indname == NULL);
2603 2032 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2604 2032 : update_vacuum_error_info(vacrel, &saved_err_info,
2605 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
2606 : InvalidBlockNumber, InvalidOffsetNumber);
2607 :
2608 : /* Do bulk deletion */
2609 2032 : istat = vac_bulkdel_one_index(&ivinfo, istat, vacrel->dead_items,
2610 : vacrel->dead_items_info);
2611 :
2612 : /* Revert to the previous phase information for error traceback */
2613 2032 : restore_vacuum_error_info(vacrel, &saved_err_info);
2614 2032 : pfree(vacrel->indname);
2615 2032 : vacrel->indname = NULL;
2616 :
2617 2032 : return istat;
2618 : }
2619 :
2620 : /*
2621 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2622 : *
2623 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
2624 : * of heap tuples and estimated_count is true if reltuples is an
2625 : * estimated value. See indexam.sgml for more info.
2626 : *
2627 : * Returns bulk delete stats derived from input stats
2628 : */
2629 : static IndexBulkDeleteResult *
2630 140696 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2631 : double reltuples, bool estimated_count,
2632 : LVRelState *vacrel)
2633 : {
2634 : IndexVacuumInfo ivinfo;
2635 : LVSavedErrInfo saved_err_info;
2636 :
2637 140696 : ivinfo.index = indrel;
2638 140696 : ivinfo.heaprel = vacrel->rel;
2639 140696 : ivinfo.analyze_only = false;
2640 140696 : ivinfo.report_progress = false;
2641 140696 : ivinfo.estimated_count = estimated_count;
2642 140696 : ivinfo.message_level = DEBUG2;
2643 :
2644 140696 : ivinfo.num_heap_tuples = reltuples;
2645 140696 : ivinfo.strategy = vacrel->bstrategy;
2646 :
2647 : /*
2648 : * Update error traceback information.
2649 : *
2650 : * The index name is saved during this phase and restored immediately
2651 : * after this phase. See vacuum_error_callback.
2652 : */
2653 : Assert(vacrel->indname == NULL);
2654 140696 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2655 140696 : update_vacuum_error_info(vacrel, &saved_err_info,
2656 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
2657 : InvalidBlockNumber, InvalidOffsetNumber);
2658 :
2659 140696 : istat = vac_cleanup_one_index(&ivinfo, istat);
2660 :
2661 : /* Revert to the previous phase information for error traceback */
2662 140696 : restore_vacuum_error_info(vacrel, &saved_err_info);
2663 140696 : pfree(vacrel->indname);
2664 140696 : vacrel->indname = NULL;
2665 :
2666 140696 : return istat;
2667 : }
2668 :
2669 : /*
2670 : * should_attempt_truncation - should we attempt to truncate the heap?
2671 : *
2672 : * Don't even think about it unless we have a shot at releasing a goodly
2673 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
2674 : * an AccessExclusive lock must be replayed on any hot standby, where it can
2675 : * be particularly disruptive.
2676 : *
2677 : * Also don't attempt it if wraparound failsafe is in effect. The entire
2678 : * system might be refusing to allocate new XIDs at this point. The system
2679 : * definitely won't return to normal unless and until VACUUM actually advances
2680 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2681 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2682 : * truncate the table under these circumstances, an XID exhaustion error might
2683 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2684 : * There is very little chance of truncation working out when the failsafe is
2685 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
2686 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2687 : * we're called.
2688 : */
2689 : static bool
2690 114420 : should_attempt_truncation(LVRelState *vacrel)
2691 : {
2692 : BlockNumber possibly_freeable;
2693 :
2694 114420 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2695 19518 : return false;
2696 :
2697 94902 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2698 94902 : if (possibly_freeable > 0 &&
2699 334 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2700 334 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2701 310 : return true;
2702 :
2703 94592 : return false;
2704 : }
2705 :
2706 : /*
2707 : * lazy_truncate_heap - try to truncate off any empty pages at the end
2708 : */
2709 : static void
2710 310 : lazy_truncate_heap(LVRelState *vacrel)
2711 : {
2712 310 : BlockNumber orig_rel_pages = vacrel->rel_pages;
2713 : BlockNumber new_rel_pages;
2714 : bool lock_waiter_detected;
2715 : int lock_retry;
2716 :
2717 : /* Report that we are now truncating */
2718 310 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2719 : PROGRESS_VACUUM_PHASE_TRUNCATE);
2720 :
2721 : /* Update error traceback information one last time */
2722 310 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
2723 : vacrel->nonempty_pages, InvalidOffsetNumber);
2724 :
2725 : /*
2726 : * Loop until no more truncating can be done.
2727 : */
2728 : do
2729 : {
2730 : /*
2731 : * We need full exclusive lock on the relation in order to do
2732 : * truncation. If we can't get it, give up rather than waiting --- we
2733 : * don't want to block other backends, and we don't want to deadlock
2734 : * (which is quite possible considering we already hold a lower-grade
2735 : * lock).
2736 : */
2737 310 : lock_waiter_detected = false;
2738 310 : lock_retry = 0;
2739 : while (true)
2740 : {
2741 710 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
2742 306 : break;
2743 :
2744 : /*
2745 : * Check for interrupts while trying to (re-)acquire the exclusive
2746 : * lock.
2747 : */
2748 404 : CHECK_FOR_INTERRUPTS();
2749 :
2750 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2751 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
2752 : {
2753 : /*
2754 : * We failed to establish the lock in the specified number of
2755 : * retries. This means we give up truncating.
2756 : */
2757 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
2758 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2759 : vacrel->relname)));
2760 6 : return;
2761 : }
2762 :
2763 400 : (void) WaitLatch(MyLatch,
2764 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
2765 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
2766 : WAIT_EVENT_VACUUM_TRUNCATE);
2767 400 : ResetLatch(MyLatch);
2768 : }
2769 :
2770 : /*
2771 : * Now that we have exclusive lock, look to see if the rel has grown
2772 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
2773 : * the newly added pages presumably contain non-deletable tuples.
2774 : */
2775 306 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2776 306 : if (new_rel_pages != orig_rel_pages)
2777 : {
2778 : /*
2779 : * Note: we intentionally don't update vacrel->rel_pages with the
2780 : * new rel size here. If we did, it would amount to assuming that
2781 : * the new pages are empty, which is unlikely. Leaving the numbers
2782 : * alone amounts to assuming that the new pages have the same
2783 : * tuple density as existing ones, which is less unlikely.
2784 : */
2785 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2786 0 : return;
2787 : }
2788 :
2789 : /*
2790 : * Scan backwards from the end to verify that the end pages actually
2791 : * contain no tuples. This is *necessary*, not optional, because
2792 : * other backends could have added tuples to these pages whilst we
2793 : * were vacuuming.
2794 : */
2795 306 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2796 306 : vacrel->blkno = new_rel_pages;
2797 :
2798 306 : if (new_rel_pages >= orig_rel_pages)
2799 : {
2800 : /* can't do anything after all */
2801 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2802 2 : return;
2803 : }
2804 :
2805 : /*
2806 : * Okay to truncate.
2807 : */
2808 304 : RelationTruncate(vacrel->rel, new_rel_pages);
2809 :
2810 : /*
2811 : * We can release the exclusive lock as soon as we have truncated.
2812 : * Other backends can't safely access the relation until they have
2813 : * processed the smgr invalidation that smgrtruncate sent out ... but
2814 : * that should happen as part of standard invalidation processing once
2815 : * they acquire lock on the relation.
2816 : */
2817 304 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2818 :
2819 : /*
2820 : * Update statistics. Here, it *is* correct to adjust rel_pages
2821 : * without also touching reltuples, since the tuple count wasn't
2822 : * changed by the truncation.
2823 : */
2824 304 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2825 304 : vacrel->rel_pages = new_rel_pages;
2826 :
2827 304 : ereport(vacrel->verbose ? INFO : DEBUG2,
2828 : (errmsg("table \"%s\": truncated %u to %u pages",
2829 : vacrel->relname,
2830 : orig_rel_pages, new_rel_pages)));
2831 304 : orig_rel_pages = new_rel_pages;
2832 304 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2833 : }
2834 :
2835 : /*
2836 : * Rescan end pages to verify that they are (still) empty of tuples.
2837 : *
2838 : * Returns number of nondeletable pages (last nonempty page + 1).
2839 : */
2840 : static BlockNumber
2841 306 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2842 : {
2843 : BlockNumber blkno;
2844 : BlockNumber prefetchedUntil;
2845 : instr_time starttime;
2846 :
2847 : /* Initialize the starttime if we check for conflicting lock requests */
2848 306 : INSTR_TIME_SET_CURRENT(starttime);
2849 :
2850 : /*
2851 : * Start checking blocks at what we believe relation end to be and move
2852 : * backwards. (Strange coding of loop control is needed because blkno is
2853 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2854 : * in forward direction, so that OS-level readahead can kick in.
2855 : */
2856 306 : blkno = vacrel->rel_pages;
2857 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
2858 : "prefetch size must be power of 2");
2859 306 : prefetchedUntil = InvalidBlockNumber;
2860 5002 : while (blkno > vacrel->nonempty_pages)
2861 : {
2862 : Buffer buf;
2863 : Page page;
2864 : OffsetNumber offnum,
2865 : maxoff;
2866 : bool hastup;
2867 :
2868 : /*
2869 : * Check if another process requests a lock on our relation. We are
2870 : * holding an AccessExclusiveLock here, so they will be waiting. We
2871 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2872 : * only check if that interval has elapsed once every 32 blocks to
2873 : * keep the number of system calls and actual shared lock table
2874 : * lookups to a minimum.
2875 : */
2876 4700 : if ((blkno % 32) == 0)
2877 : {
2878 : instr_time currenttime;
2879 : instr_time elapsed;
2880 :
2881 156 : INSTR_TIME_SET_CURRENT(currenttime);
2882 156 : elapsed = currenttime;
2883 156 : INSTR_TIME_SUBTRACT(elapsed, starttime);
2884 156 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2885 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
2886 : {
2887 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
2888 : {
2889 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
2890 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
2891 : vacrel->relname)));
2892 :
2893 0 : *lock_waiter_detected = true;
2894 0 : return blkno;
2895 : }
2896 0 : starttime = currenttime;
2897 : }
2898 : }
2899 :
2900 : /*
2901 : * We don't insert a vacuum delay point here, because we have an
2902 : * exclusive lock on the table which we want to hold for as short a
2903 : * time as possible. We still need to check for interrupts however.
2904 : */
2905 4700 : CHECK_FOR_INTERRUPTS();
2906 :
2907 4700 : blkno--;
2908 :
2909 : /* If we haven't prefetched this lot yet, do so now. */
2910 4700 : if (prefetchedUntil > blkno)
2911 : {
2912 : BlockNumber prefetchStart;
2913 : BlockNumber pblkno;
2914 :
2915 416 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2916 6878 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2917 : {
2918 6462 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2919 6462 : CHECK_FOR_INTERRUPTS();
2920 : }
2921 416 : prefetchedUntil = prefetchStart;
2922 : }
2923 :
2924 4700 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2925 : vacrel->bstrategy);
2926 :
2927 : /* In this phase we only need shared access to the buffer */
2928 4700 : LockBuffer(buf, BUFFER_LOCK_SHARE);
2929 :
2930 4700 : page = BufferGetPage(buf);
2931 :
2932 4700 : if (PageIsNew(page) || PageIsEmpty(page))
2933 : {
2934 2380 : UnlockReleaseBuffer(buf);
2935 2380 : continue;
2936 : }
2937 :
2938 2320 : hastup = false;
2939 2320 : maxoff = PageGetMaxOffsetNumber(page);
2940 4636 : for (offnum = FirstOffsetNumber;
2941 : offnum <= maxoff;
2942 2316 : offnum = OffsetNumberNext(offnum))
2943 : {
2944 : ItemId itemid;
2945 :
2946 2320 : itemid = PageGetItemId(page, offnum);
2947 :
2948 : /*
2949 : * Note: any non-unused item should be taken as a reason to keep
2950 : * this page. Even an LP_DEAD item makes truncation unsafe, since
2951 : * we must not have cleaned out its index entries.
2952 : */
2953 2320 : if (ItemIdIsUsed(itemid))
2954 : {
2955 4 : hastup = true;
2956 4 : break; /* can stop scanning */
2957 : }
2958 : } /* scan along page */
2959 :
2960 2320 : UnlockReleaseBuffer(buf);
2961 :
2962 : /* Done scanning if we found a tuple here */
2963 2320 : if (hastup)
2964 4 : return blkno + 1;
2965 : }
2966 :
2967 : /*
2968 : * If we fall out of the loop, all the previously-thought-to-be-empty
2969 : * pages still are; we need not bother to look at the last known-nonempty
2970 : * page.
2971 : */
2972 302 : return vacrel->nonempty_pages;
2973 : }
2974 :
2975 : /*
2976 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
2977 : * shared memory). Sets both in vacrel for caller.
2978 : *
2979 : * Also handles parallel initialization as part of allocating dead_items in
2980 : * DSM when required.
2981 : */
2982 : static void
2983 114420 : dead_items_alloc(LVRelState *vacrel, int nworkers)
2984 : {
2985 : VacDeadItemsInfo *dead_items_info;
2986 322482 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
2987 93642 : autovacuum_work_mem != -1 ?
2988 208062 : autovacuum_work_mem : maintenance_work_mem;
2989 :
2990 : /*
2991 : * Initialize state for a parallel vacuum. As of now, only one worker can
2992 : * be used for an index, so we invoke parallelism only if there are at
2993 : * least two indexes on a table.
2994 : */
2995 114420 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
2996 : {
2997 : /*
2998 : * Since parallel workers cannot access data in temporary tables, we
2999 : * can't perform parallel vacuum on them.
3000 : */
3001 7938 : if (RelationUsesLocalBuffers(vacrel->rel))
3002 : {
3003 : /*
3004 : * Give warning only if the user explicitly tries to perform a
3005 : * parallel vacuum on the temporary table.
3006 : */
3007 6 : if (nworkers > 0)
3008 6 : ereport(WARNING,
3009 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
3010 : vacrel->relname)));
3011 : }
3012 : else
3013 7932 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
3014 : vacrel->nindexes, nworkers,
3015 : vac_work_mem,
3016 7932 : vacrel->verbose ? INFO : DEBUG2,
3017 : vacrel->bstrategy);
3018 :
3019 : /*
3020 : * If parallel mode started, dead_items and dead_items_info spaces are
3021 : * allocated in DSM.
3022 : */
3023 7938 : if (ParallelVacuumIsActive(vacrel))
3024 : {
3025 22 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
3026 : &vacrel->dead_items_info);
3027 22 : return;
3028 : }
3029 : }
3030 :
3031 : /*
3032 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
3033 : * locally.
3034 : */
3035 :
3036 114398 : dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
3037 114398 : dead_items_info->max_bytes = vac_work_mem * 1024L;
3038 114398 : dead_items_info->num_items = 0;
3039 114398 : vacrel->dead_items_info = dead_items_info;
3040 :
3041 114398 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
3042 : }
3043 :
3044 : /*
3045 : * Add the given block number and offset numbers to dead_items.
3046 : */
3047 : static void
3048 29304 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
3049 : int num_offsets)
3050 : {
3051 29304 : const int prog_index[2] = {
3052 : PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
3053 : PROGRESS_VACUUM_DEAD_TUPLE_BYTES
3054 : };
3055 : int64 prog_val[2];
3056 :
3057 29304 : TidStoreSetBlockOffsets(vacrel->dead_items, blkno, offsets, num_offsets);
3058 29304 : vacrel->dead_items_info->num_items += num_offsets;
3059 :
3060 : /* update the progress information */
3061 29304 : prog_val[0] = vacrel->dead_items_info->num_items;
3062 29304 : prog_val[1] = TidStoreMemoryUsage(vacrel->dead_items);
3063 29304 : pgstat_progress_update_multi_param(2, prog_index, prog_val);
3064 29304 : }
3065 :
3066 : /*
3067 : * Forget all collected dead items.
3068 : */
3069 : static void
3070 1060 : dead_items_reset(LVRelState *vacrel)
3071 : {
3072 1060 : if (ParallelVacuumIsActive(vacrel))
3073 : {
3074 10 : parallel_vacuum_reset_dead_items(vacrel->pvs);
3075 10 : return;
3076 : }
3077 :
3078 : /* Recreate the tidstore with the same max_bytes limitation */
3079 1050 : TidStoreDestroy(vacrel->dead_items);
3080 1050 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
3081 :
3082 : /* Reset the counter */
3083 1050 : vacrel->dead_items_info->num_items = 0;
3084 : }
3085 :
3086 : /*
3087 : * Perform cleanup for resources allocated in dead_items_alloc
3088 : */
3089 : static void
3090 114420 : dead_items_cleanup(LVRelState *vacrel)
3091 : {
3092 114420 : if (!ParallelVacuumIsActive(vacrel))
3093 : {
3094 : /* Don't bother with pfree here */
3095 114398 : return;
3096 : }
3097 :
3098 : /* End parallel mode */
3099 22 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
3100 22 : vacrel->pvs = NULL;
3101 : }
3102 :
3103 : /*
3104 : * Check if every tuple in the given page is visible to all current and future
3105 : * transactions. Also return the visibility_cutoff_xid which is the highest
3106 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
3107 : * on this page is frozen.
3108 : *
3109 : * This is a stripped down version of lazy_scan_prune(). If you change
3110 : * anything here, make sure that everything stays in sync. Note that an
3111 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
3112 : * introducing new side-effects here.
3113 : */
3114 : static bool
3115 24670 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
3116 : TransactionId *visibility_cutoff_xid,
3117 : bool *all_frozen)
3118 : {
3119 24670 : Page page = BufferGetPage(buf);
3120 24670 : BlockNumber blockno = BufferGetBlockNumber(buf);
3121 : OffsetNumber offnum,
3122 : maxoff;
3123 24670 : bool all_visible = true;
3124 :
3125 24670 : *visibility_cutoff_xid = InvalidTransactionId;
3126 24670 : *all_frozen = true;
3127 :
3128 24670 : maxoff = PageGetMaxOffsetNumber(page);
3129 1197452 : for (offnum = FirstOffsetNumber;
3130 1172836 : offnum <= maxoff && all_visible;
3131 1172782 : offnum = OffsetNumberNext(offnum))
3132 : {
3133 : ItemId itemid;
3134 : HeapTupleData tuple;
3135 :
3136 : /*
3137 : * Set the offset number so that we can display it along with any
3138 : * error that occurred while processing this tuple.
3139 : */
3140 1172784 : vacrel->offnum = offnum;
3141 1172784 : itemid = PageGetItemId(page, offnum);
3142 :
3143 : /* Unused or redirect line pointers are of no interest */
3144 1172784 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
3145 308916 : continue;
3146 :
3147 863868 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
3148 :
3149 : /*
3150 : * Dead line pointers can have index pointers pointing to them. So
3151 : * they can't be treated as visible
3152 : */
3153 863868 : if (ItemIdIsDead(itemid))
3154 : {
3155 2 : all_visible = false;
3156 2 : *all_frozen = false;
3157 2 : break;
3158 : }
3159 :
3160 : Assert(ItemIdIsNormal(itemid));
3161 :
3162 863866 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
3163 863866 : tuple.t_len = ItemIdGetLength(itemid);
3164 863866 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
3165 :
3166 863866 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3167 : buf))
3168 : {
3169 863850 : case HEAPTUPLE_LIVE:
3170 : {
3171 : TransactionId xmin;
3172 :
3173 : /* Check comments in lazy_scan_prune. */
3174 863850 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3175 : {
3176 0 : all_visible = false;
3177 0 : *all_frozen = false;
3178 0 : break;
3179 : }
3180 :
3181 : /*
3182 : * The inserter definitely committed. But is it old enough
3183 : * that everyone sees it as committed?
3184 : */
3185 863850 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3186 863850 : if (!TransactionIdPrecedes(xmin,
3187 : vacrel->cutoffs.OldestXmin))
3188 : {
3189 36 : all_visible = false;
3190 36 : *all_frozen = false;
3191 36 : break;
3192 : }
3193 :
3194 : /* Track newest xmin on page. */
3195 863814 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3196 : TransactionIdIsNormal(xmin))
3197 21868 : *visibility_cutoff_xid = xmin;
3198 :
3199 : /* Check whether this tuple is already frozen or not */
3200 1072186 : if (all_visible && *all_frozen &&
3201 208372 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3202 6052 : *all_frozen = false;
3203 : }
3204 863814 : break;
3205 :
3206 16 : case HEAPTUPLE_DEAD:
3207 : case HEAPTUPLE_RECENTLY_DEAD:
3208 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3209 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3210 : {
3211 16 : all_visible = false;
3212 16 : *all_frozen = false;
3213 16 : break;
3214 : }
3215 0 : default:
3216 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3217 : break;
3218 : }
3219 : } /* scan along page */
3220 :
3221 : /* Clear the offset information once we have processed the given page. */
3222 24670 : vacrel->offnum = InvalidOffsetNumber;
3223 :
3224 24670 : return all_visible;
3225 : }
3226 :
3227 : /*
3228 : * Update index statistics in pg_class if the statistics are accurate.
3229 : */
3230 : static void
3231 94878 : update_relstats_all_indexes(LVRelState *vacrel)
3232 : {
3233 94878 : Relation *indrels = vacrel->indrels;
3234 94878 : int nindexes = vacrel->nindexes;
3235 94878 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3236 :
3237 : Assert(vacrel->do_index_cleanup);
3238 :
3239 235672 : for (int idx = 0; idx < nindexes; idx++)
3240 : {
3241 140794 : Relation indrel = indrels[idx];
3242 140794 : IndexBulkDeleteResult *istat = indstats[idx];
3243 :
3244 140794 : if (istat == NULL || istat->estimated_count)
3245 138490 : continue;
3246 :
3247 : /* Update index statistics */
3248 2304 : vac_update_relstats(indrel,
3249 : istat->num_pages,
3250 : istat->num_index_tuples,
3251 : 0,
3252 : false,
3253 : InvalidTransactionId,
3254 : InvalidMultiXactId,
3255 : NULL, NULL, false);
3256 : }
3257 94878 : }
3258 :
3259 : /*
3260 : * Error context callback for errors occurring during vacuum. The error
3261 : * context messages for index phases should match the messages set in parallel
3262 : * vacuum. If you change this function for those phases, change
3263 : * parallel_vacuum_error_callback() as well.
3264 : */
3265 : static void
3266 76472 : vacuum_error_callback(void *arg)
3267 : {
3268 76472 : LVRelState *errinfo = arg;
3269 :
3270 76472 : switch (errinfo->phase)
3271 : {
3272 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3273 0 : if (BlockNumberIsValid(errinfo->blkno))
3274 : {
3275 0 : if (OffsetNumberIsValid(errinfo->offnum))
3276 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3277 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3278 : else
3279 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3280 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3281 : }
3282 : else
3283 0 : errcontext("while scanning relation \"%s.%s\"",
3284 : errinfo->relnamespace, errinfo->relname);
3285 0 : break;
3286 :
3287 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3288 0 : if (BlockNumberIsValid(errinfo->blkno))
3289 : {
3290 0 : if (OffsetNumberIsValid(errinfo->offnum))
3291 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3292 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3293 : else
3294 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3295 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3296 : }
3297 : else
3298 0 : errcontext("while vacuuming relation \"%s.%s\"",
3299 : errinfo->relnamespace, errinfo->relname);
3300 0 : break;
3301 :
3302 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3303 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3304 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3305 0 : break;
3306 :
3307 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3308 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3309 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3310 0 : break;
3311 :
3312 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3313 6 : if (BlockNumberIsValid(errinfo->blkno))
3314 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3315 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3316 6 : break;
3317 :
3318 76466 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3319 : default:
3320 76466 : return; /* do nothing; the errinfo may not be
3321 : * initialized */
3322 : }
3323 : }
3324 :
3325 : /*
3326 : * Updates the information required for vacuum error callback. This also saves
3327 : * the current information which can be later restored via restore_vacuum_error_info.
3328 : */
3329 : static void
3330 912524 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3331 : int phase, BlockNumber blkno, OffsetNumber offnum)
3332 : {
3333 912524 : if (saved_vacrel)
3334 : {
3335 168438 : saved_vacrel->offnum = vacrel->offnum;
3336 168438 : saved_vacrel->blkno = vacrel->blkno;
3337 168438 : saved_vacrel->phase = vacrel->phase;
3338 : }
3339 :
3340 912524 : vacrel->blkno = blkno;
3341 912524 : vacrel->offnum = offnum;
3342 912524 : vacrel->phase = phase;
3343 912524 : }
3344 :
3345 : /*
3346 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3347 : */
3348 : static void
3349 168438 : restore_vacuum_error_info(LVRelState *vacrel,
3350 : const LVSavedErrInfo *saved_vacrel)
3351 : {
3352 168438 : vacrel->blkno = saved_vacrel->blkno;
3353 168438 : vacrel->offnum = saved_vacrel->offnum;
3354 168438 : vacrel->phase = saved_vacrel->phase;
3355 168438 : }
|