Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * The major space usage for vacuuming is storage for the dead tuple IDs that
7 : * are to be removed from indexes. We want to ensure we can vacuum even the
8 : * very largest relations with finite memory space usage. To do that, we set
9 : * upper bounds on the memory that can be used for keeping track of dead TIDs
10 : * at once.
11 : *
12 : * We are willing to use at most maintenance_work_mem (or perhaps
13 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
14 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
15 : * the pages that we've pruned). This frees up the memory space dedicated to
16 : * store dead TIDs.
17 : *
18 : * In practice VACUUM will often complete its initial pass over the target
19 : * heap relation without ever running out of space to store TIDs. This means
20 : * that there only needs to be one call to lazy_vacuum, after the initial pass
21 : * completes.
22 : *
23 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : *
27 : * IDENTIFICATION
28 : * src/backend/access/heap/vacuumlazy.c
29 : *
30 : *-------------------------------------------------------------------------
31 : */
32 : #include "postgres.h"
33 :
34 : #include <math.h>
35 :
36 : #include "access/genam.h"
37 : #include "access/heapam.h"
38 : #include "access/heapam_xlog.h"
39 : #include "access/htup_details.h"
40 : #include "access/multixact.h"
41 : #include "access/tidstore.h"
42 : #include "access/transam.h"
43 : #include "access/visibilitymap.h"
44 : #include "access/xloginsert.h"
45 : #include "catalog/storage.h"
46 : #include "commands/dbcommands.h"
47 : #include "commands/progress.h"
48 : #include "commands/vacuum.h"
49 : #include "common/int.h"
50 : #include "executor/instrument.h"
51 : #include "miscadmin.h"
52 : #include "pgstat.h"
53 : #include "portability/instr_time.h"
54 : #include "postmaster/autovacuum.h"
55 : #include "storage/bufmgr.h"
56 : #include "storage/freespace.h"
57 : #include "storage/lmgr.h"
58 : #include "utils/lsyscache.h"
59 : #include "utils/memutils.h"
60 : #include "utils/pg_rusage.h"
61 : #include "utils/timestamp.h"
62 :
63 :
64 : /*
65 : * Space/time tradeoff parameters: do these need to be user-tunable?
66 : *
67 : * To consider truncating the relation, we want there to be at least
68 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
69 : * is less) potentially-freeable pages.
70 : */
71 : #define REL_TRUNCATE_MINIMUM 1000
72 : #define REL_TRUNCATE_FRACTION 16
73 :
74 : /*
75 : * Timing parameters for truncate locking heuristics.
76 : *
77 : * These were not exposed as user tunable GUC values because it didn't seem
78 : * that the potential for improvement was great enough to merit the cost of
79 : * supporting them.
80 : */
81 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
82 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
83 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
84 :
85 : /*
86 : * Threshold that controls whether we bypass index vacuuming and heap
87 : * vacuuming as an optimization
88 : */
89 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
90 :
91 : /*
92 : * Perform a failsafe check each time we scan another 4GB of pages.
93 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
94 : */
95 : #define FAILSAFE_EVERY_PAGES \
96 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
97 :
98 : /*
99 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
100 : * (it won't be exact because we only vacuum FSM after processing a heap page
101 : * that has some removable tuples). When there are indexes, this is ignored,
102 : * and we vacuum FSM after each index/heap cleaning pass.
103 : */
104 : #define VACUUM_FSM_EVERY_PAGES \
105 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
106 :
107 : /*
108 : * Before we consider skipping a page that's marked as clean in
109 : * visibility map, we must've seen at least this many clean pages.
110 : */
111 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
112 :
113 : /*
114 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
115 : * Needs to be a power of 2.
116 : */
117 : #define PREFETCH_SIZE ((BlockNumber) 32)
118 :
119 : /*
120 : * Macro to check if we are in a parallel vacuum. If true, we are in the
121 : * parallel mode and the DSM segment is initialized.
122 : */
123 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
124 :
125 : /* Phases of vacuum during which we report error context. */
126 : typedef enum
127 : {
128 : VACUUM_ERRCB_PHASE_UNKNOWN,
129 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
130 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
131 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
132 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
133 : VACUUM_ERRCB_PHASE_TRUNCATE,
134 : } VacErrPhase;
135 :
136 : typedef struct LVRelState
137 : {
138 : /* Target heap relation and its indexes */
139 : Relation rel;
140 : Relation *indrels;
141 : int nindexes;
142 :
143 : /* Buffer access strategy and parallel vacuum state */
144 : BufferAccessStrategy bstrategy;
145 : ParallelVacuumState *pvs;
146 :
147 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
148 : bool aggressive;
149 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
150 : bool skipwithvm;
151 : /* Consider index vacuuming bypass optimization? */
152 : bool consider_bypass_optimization;
153 :
154 : /* Doing index vacuuming, index cleanup, rel truncation? */
155 : bool do_index_vacuuming;
156 : bool do_index_cleanup;
157 : bool do_rel_truncate;
158 :
159 : /* VACUUM operation's cutoffs for freezing and pruning */
160 : struct VacuumCutoffs cutoffs;
161 : GlobalVisState *vistest;
162 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
163 : TransactionId NewRelfrozenXid;
164 : MultiXactId NewRelminMxid;
165 : bool skippedallvis;
166 :
167 : /* Error reporting state */
168 : char *dbname;
169 : char *relnamespace;
170 : char *relname;
171 : char *indname; /* Current index name */
172 : BlockNumber blkno; /* used only for heap operations */
173 : OffsetNumber offnum; /* used only for heap operations */
174 : VacErrPhase phase;
175 : bool verbose; /* VACUUM VERBOSE? */
176 :
177 : /*
178 : * dead_items stores TIDs whose index tuples are deleted by index
179 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
180 : * that has been processed by lazy_scan_prune. Also needed by
181 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
182 : * LP_UNUSED during second heap pass.
183 : *
184 : * Both dead_items and dead_items_info are allocated in shared memory in
185 : * parallel vacuum cases.
186 : */
187 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
188 : VacDeadItemsInfo *dead_items_info;
189 :
190 : BlockNumber rel_pages; /* total number of pages */
191 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192 : BlockNumber removed_pages; /* # pages removed by relation truncation */
193 : BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 :
198 : /* Statistics output by us, for table */
199 : double new_rel_tuples; /* new estimated total # of tuples */
200 : double new_live_tuples; /* new estimated total # of live tuples */
201 : /* Statistics output by index AMs */
202 : IndexBulkDeleteResult **indstats;
203 :
204 : /* Instrumentation counters */
205 : int num_index_scans;
206 : /* Counters that follow are only for scanned_pages */
207 : int64 tuples_deleted; /* # deleted from table */
208 : int64 tuples_frozen; /* # newly frozen */
209 : int64 lpdead_items; /* # deleted from indexes */
210 : int64 live_tuples; /* # live tuples remaining */
211 : int64 recently_dead_tuples; /* # dead, but not yet removable */
212 : int64 missed_dead_tuples; /* # removable, but not removed */
213 :
214 : /* State maintained by heap_vac_scan_next_block() */
215 : BlockNumber current_block; /* last block returned */
216 : BlockNumber next_unskippable_block; /* next unskippable block */
217 : bool next_unskippable_allvis; /* its visibility status */
218 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
219 : } LVRelState;
220 :
221 : /* Struct for saving and restoring vacuum error information. */
222 : typedef struct LVSavedErrInfo
223 : {
224 : BlockNumber blkno;
225 : OffsetNumber offnum;
226 : VacErrPhase phase;
227 : } LVSavedErrInfo;
228 :
229 :
230 : /* non-export function prototypes */
231 : static void lazy_scan_heap(LVRelState *vacrel);
232 : static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
233 : bool *all_visible_according_to_vm);
234 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
235 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
236 : BlockNumber blkno, Page page,
237 : bool sharelock, Buffer vmbuffer);
238 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
239 : BlockNumber blkno, Page page,
240 : Buffer vmbuffer, bool all_visible_according_to_vm,
241 : bool *has_lpdead_items);
242 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
243 : BlockNumber blkno, Page page,
244 : bool *has_lpdead_items);
245 : static void lazy_vacuum(LVRelState *vacrel);
246 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
247 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
248 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
249 : Buffer buffer, OffsetNumber *offsets,
250 : int num_offsets, Buffer vmbuffer);
251 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
252 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
253 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
254 : IndexBulkDeleteResult *istat,
255 : double reltuples,
256 : LVRelState *vacrel);
257 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
258 : IndexBulkDeleteResult *istat,
259 : double reltuples,
260 : bool estimated_count,
261 : LVRelState *vacrel);
262 : static bool should_attempt_truncation(LVRelState *vacrel);
263 : static void lazy_truncate_heap(LVRelState *vacrel);
264 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
265 : bool *lock_waiter_detected);
266 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
267 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
268 : int num_offsets);
269 : static void dead_items_reset(LVRelState *vacrel);
270 : static void dead_items_cleanup(LVRelState *vacrel);
271 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
272 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
273 : static void update_relstats_all_indexes(LVRelState *vacrel);
274 : static void vacuum_error_callback(void *arg);
275 : static void update_vacuum_error_info(LVRelState *vacrel,
276 : LVSavedErrInfo *saved_vacrel,
277 : int phase, BlockNumber blkno,
278 : OffsetNumber offnum);
279 : static void restore_vacuum_error_info(LVRelState *vacrel,
280 : const LVSavedErrInfo *saved_vacrel);
281 :
282 :
283 : /*
284 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
285 : *
286 : * This routine sets things up for and then calls lazy_scan_heap, where
287 : * almost all work actually takes place. Finalizes everything after call
288 : * returns by managing relation truncation and updating rel's pg_class
289 : * entry. (Also updates pg_class entries for any indexes that need it.)
290 : *
291 : * At entry, we have already established a transaction and opened
292 : * and locked the relation.
293 : */
294 : void
295 19098 : heap_vacuum_rel(Relation rel, VacuumParams *params,
296 : BufferAccessStrategy bstrategy)
297 : {
298 : LVRelState *vacrel;
299 : bool verbose,
300 : instrument,
301 : skipwithvm,
302 : frozenxid_updated,
303 : minmulti_updated;
304 : BlockNumber orig_rel_pages,
305 : new_rel_pages,
306 : new_rel_allvisible;
307 : PGRUsage ru0;
308 19098 : TimestampTz starttime = 0;
309 19098 : PgStat_Counter startreadtime = 0,
310 19098 : startwritetime = 0;
311 19098 : WalUsage startwalusage = pgWalUsage;
312 19098 : BufferUsage startbufferusage = pgBufferUsage;
313 : ErrorContextCallback errcallback;
314 19098 : char **indnames = NULL;
315 :
316 19098 : verbose = (params->options & VACOPT_VERBOSE) != 0;
317 19146 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
318 48 : params->log_min_duration >= 0));
319 19098 : if (instrument)
320 : {
321 70 : pg_rusage_init(&ru0);
322 70 : starttime = GetCurrentTimestamp();
323 70 : if (track_io_timing)
324 : {
325 0 : startreadtime = pgStatBlockReadTime;
326 0 : startwritetime = pgStatBlockWriteTime;
327 : }
328 : }
329 :
330 19098 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
331 : RelationGetRelid(rel));
332 :
333 : /*
334 : * Setup error traceback support for ereport() first. The idea is to set
335 : * up an error context callback to display additional information on any
336 : * error during a vacuum. During different phases of vacuum, we update
337 : * the state so that the error context callback always display current
338 : * information.
339 : *
340 : * Copy the names of heap rel into local memory for error reporting
341 : * purposes, too. It isn't always safe to assume that we can get the name
342 : * of each rel. It's convenient for code in lazy_scan_heap to always use
343 : * these temp copies.
344 : */
345 19098 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
346 19098 : vacrel->dbname = get_database_name(MyDatabaseId);
347 19098 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
348 19098 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
349 19098 : vacrel->indname = NULL;
350 19098 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
351 19098 : vacrel->verbose = verbose;
352 19098 : errcallback.callback = vacuum_error_callback;
353 19098 : errcallback.arg = vacrel;
354 19098 : errcallback.previous = error_context_stack;
355 19098 : error_context_stack = &errcallback;
356 :
357 : /* Set up high level stuff about rel and its indexes */
358 19098 : vacrel->rel = rel;
359 19098 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
360 : &vacrel->indrels);
361 19098 : vacrel->bstrategy = bstrategy;
362 19098 : if (instrument && vacrel->nindexes > 0)
363 : {
364 : /* Copy index names used by instrumentation (not error reporting) */
365 52 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
366 164 : for (int i = 0; i < vacrel->nindexes; i++)
367 112 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
368 : }
369 :
370 : /*
371 : * The index_cleanup param either disables index vacuuming and cleanup or
372 : * forces it to go ahead when we would otherwise apply the index bypass
373 : * optimization. The default is 'auto', which leaves the final decision
374 : * up to lazy_vacuum().
375 : *
376 : * The truncate param allows user to avoid attempting relation truncation,
377 : * though it can't force truncation to happen.
378 : */
379 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
380 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
381 : params->truncate != VACOPTVALUE_AUTO);
382 :
383 : /*
384 : * While VacuumFailSafeActive is reset to false before calling this, we
385 : * still need to reset it here due to recursive calls.
386 : */
387 19098 : VacuumFailsafeActive = false;
388 19098 : vacrel->consider_bypass_optimization = true;
389 19098 : vacrel->do_index_vacuuming = true;
390 19098 : vacrel->do_index_cleanup = true;
391 19098 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
392 19098 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
393 : {
394 : /* Force disable index vacuuming up-front */
395 264 : vacrel->do_index_vacuuming = false;
396 264 : vacrel->do_index_cleanup = false;
397 : }
398 18834 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
399 : {
400 : /* Force index vacuuming. Note that failsafe can still bypass. */
401 32 : vacrel->consider_bypass_optimization = false;
402 : }
403 : else
404 : {
405 : /* Default/auto, make all decisions dynamically */
406 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
407 : }
408 :
409 : /* Initialize page counters explicitly (be tidy) */
410 19098 : vacrel->scanned_pages = 0;
411 19098 : vacrel->removed_pages = 0;
412 19098 : vacrel->frozen_pages = 0;
413 19098 : vacrel->lpdead_item_pages = 0;
414 19098 : vacrel->missed_dead_pages = 0;
415 19098 : vacrel->nonempty_pages = 0;
416 : /* dead_items_alloc allocates vacrel->dead_items later on */
417 :
418 : /* Allocate/initialize output statistics state */
419 19098 : vacrel->new_rel_tuples = 0;
420 19098 : vacrel->new_live_tuples = 0;
421 19098 : vacrel->indstats = (IndexBulkDeleteResult **)
422 19098 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
423 :
424 : /* Initialize remaining counters (be tidy) */
425 19098 : vacrel->num_index_scans = 0;
426 19098 : vacrel->tuples_deleted = 0;
427 19098 : vacrel->tuples_frozen = 0;
428 19098 : vacrel->lpdead_items = 0;
429 19098 : vacrel->live_tuples = 0;
430 19098 : vacrel->recently_dead_tuples = 0;
431 19098 : vacrel->missed_dead_tuples = 0;
432 :
433 : /*
434 : * Get cutoffs that determine which deleted tuples are considered DEAD,
435 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
436 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
437 : * happen in this order to ensure that the OldestXmin cutoff field works
438 : * as an upper bound on the XIDs stored in the pages we'll actually scan
439 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
440 : *
441 : * Next acquire vistest, a related cutoff that's used in pruning. We
442 : * expect vistest will always make heap_page_prune_and_freeze() remove any
443 : * deleted tuple whose xmax is < OldestXmin. lazy_scan_prune must never
444 : * become confused about whether a tuple should be frozen or removed. (In
445 : * the future we might want to teach lazy_scan_prune to recompute vistest
446 : * from time to time, to increase the number of dead tuples it can prune
447 : * away.)
448 : */
449 19098 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
450 19098 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
451 19098 : vacrel->vistest = GlobalVisTestFor(rel);
452 : /* Initialize state used to track oldest extant XID/MXID */
453 19098 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
454 19098 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
455 19098 : vacrel->skippedallvis = false;
456 19098 : skipwithvm = true;
457 19098 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
458 : {
459 : /*
460 : * Force aggressive mode, and disable skipping blocks using the
461 : * visibility map (even those set all-frozen)
462 : */
463 294 : vacrel->aggressive = true;
464 294 : skipwithvm = false;
465 : }
466 :
467 19098 : vacrel->skipwithvm = skipwithvm;
468 :
469 19098 : if (verbose)
470 : {
471 22 : if (vacrel->aggressive)
472 0 : ereport(INFO,
473 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
474 : vacrel->dbname, vacrel->relnamespace,
475 : vacrel->relname)));
476 : else
477 22 : ereport(INFO,
478 : (errmsg("vacuuming \"%s.%s.%s\"",
479 : vacrel->dbname, vacrel->relnamespace,
480 : vacrel->relname)));
481 : }
482 :
483 : /*
484 : * Allocate dead_items memory using dead_items_alloc. This handles
485 : * parallel VACUUM initialization as part of allocating shared memory
486 : * space used for dead_items. (But do a failsafe precheck first, to
487 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
488 : * is already dangerously old.)
489 : */
490 19098 : lazy_check_wraparound_failsafe(vacrel);
491 19098 : dead_items_alloc(vacrel, params->nworkers);
492 :
493 : /*
494 : * Call lazy_scan_heap to perform all required heap pruning, index
495 : * vacuuming, and heap vacuuming (plus related processing)
496 : */
497 19098 : lazy_scan_heap(vacrel);
498 :
499 : /*
500 : * Free resources managed by dead_items_alloc. This ends parallel mode in
501 : * passing when necessary.
502 : */
503 19098 : dead_items_cleanup(vacrel);
504 : Assert(!IsInParallelMode());
505 :
506 : /*
507 : * Update pg_class entries for each of rel's indexes where appropriate.
508 : *
509 : * Unlike the later update to rel's pg_class entry, this is not critical.
510 : * Maintains relpages/reltuples statistics used by the planner only.
511 : */
512 19098 : if (vacrel->do_index_cleanup)
513 18834 : update_relstats_all_indexes(vacrel);
514 :
515 : /* Done with rel's indexes */
516 19098 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
517 :
518 : /* Optionally truncate rel */
519 19098 : if (should_attempt_truncation(vacrel))
520 246 : lazy_truncate_heap(vacrel);
521 :
522 : /* Pop the error context stack */
523 19098 : error_context_stack = errcallback.previous;
524 :
525 : /* Report that we are now doing final cleanup */
526 19098 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
527 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
528 :
529 : /*
530 : * Prepare to update rel's pg_class entry.
531 : *
532 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
533 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
534 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
535 : */
536 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
537 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
538 : vacrel->cutoffs.relfrozenxid,
539 : vacrel->NewRelfrozenXid));
540 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
541 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
542 : vacrel->cutoffs.relminmxid,
543 : vacrel->NewRelminMxid));
544 19098 : if (vacrel->skippedallvis)
545 : {
546 : /*
547 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
548 : * chose to skip an all-visible page range. The state that tracks new
549 : * values will have missed unfrozen XIDs from the pages we skipped.
550 : */
551 : Assert(!vacrel->aggressive);
552 52 : vacrel->NewRelfrozenXid = InvalidTransactionId;
553 52 : vacrel->NewRelminMxid = InvalidMultiXactId;
554 : }
555 :
556 : /*
557 : * For safety, clamp relallvisible to be not more than what we're setting
558 : * pg_class.relpages to
559 : */
560 19098 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
561 19098 : visibilitymap_count(rel, &new_rel_allvisible, NULL);
562 19098 : if (new_rel_allvisible > new_rel_pages)
563 0 : new_rel_allvisible = new_rel_pages;
564 :
565 : /*
566 : * Now actually update rel's pg_class entry.
567 : *
568 : * In principle new_live_tuples could be -1 indicating that we (still)
569 : * don't know the tuple count. In practice that can't happen, since we
570 : * scan every page that isn't skipped using the visibility map.
571 : */
572 19098 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
573 19098 : new_rel_allvisible, vacrel->nindexes > 0,
574 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
575 : &frozenxid_updated, &minmulti_updated, false);
576 :
577 : /*
578 : * Report results to the cumulative stats system, too.
579 : *
580 : * Deliberately avoid telling the stats system about LP_DEAD items that
581 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
582 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
583 : * It seems like a good idea to err on the side of not vacuuming again too
584 : * soon in cases where the failsafe prevented significant amounts of heap
585 : * vacuuming.
586 : */
587 11122 : pgstat_report_vacuum(RelationGetRelid(rel),
588 19098 : rel->rd_rel->relisshared,
589 7976 : Max(vacrel->new_live_tuples, 0),
590 19098 : vacrel->recently_dead_tuples +
591 19098 : vacrel->missed_dead_tuples);
592 19098 : pgstat_progress_end_command();
593 :
594 19098 : if (instrument)
595 : {
596 70 : TimestampTz endtime = GetCurrentTimestamp();
597 :
598 82 : if (verbose || params->log_min_duration == 0 ||
599 12 : TimestampDifferenceExceeds(starttime, endtime,
600 : params->log_min_duration))
601 : {
602 : long secs_dur;
603 : int usecs_dur;
604 : WalUsage walusage;
605 : BufferUsage bufferusage;
606 : StringInfoData buf;
607 : char *msgfmt;
608 : int32 diff;
609 58 : double read_rate = 0,
610 58 : write_rate = 0;
611 :
612 58 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
613 58 : memset(&walusage, 0, sizeof(WalUsage));
614 58 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
615 58 : memset(&bufferusage, 0, sizeof(BufferUsage));
616 58 : BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
617 :
618 58 : initStringInfo(&buf);
619 58 : if (verbose)
620 : {
621 : /*
622 : * Aggressiveness already reported earlier, in dedicated
623 : * VACUUM VERBOSE ereport
624 : */
625 : Assert(!params->is_wraparound);
626 22 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
627 : }
628 36 : else if (params->is_wraparound)
629 : {
630 : /*
631 : * While it's possible for a VACUUM to be both is_wraparound
632 : * and !aggressive, that's just a corner-case -- is_wraparound
633 : * implies aggressive. Produce distinct output for the corner
634 : * case all the same, just in case.
635 : */
636 0 : if (vacrel->aggressive)
637 0 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
638 : else
639 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
640 : }
641 : else
642 : {
643 36 : if (vacrel->aggressive)
644 4 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
645 : else
646 32 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
647 : }
648 58 : appendStringInfo(&buf, msgfmt,
649 : vacrel->dbname,
650 : vacrel->relnamespace,
651 : vacrel->relname,
652 : vacrel->num_index_scans);
653 110 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
654 : vacrel->removed_pages,
655 : new_rel_pages,
656 : vacrel->scanned_pages,
657 : orig_rel_pages == 0 ? 100.0 :
658 52 : 100.0 * vacrel->scanned_pages / orig_rel_pages);
659 58 : appendStringInfo(&buf,
660 58 : _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
661 58 : (long long) vacrel->tuples_deleted,
662 58 : (long long) vacrel->new_rel_tuples,
663 58 : (long long) vacrel->recently_dead_tuples);
664 58 : if (vacrel->missed_dead_tuples > 0)
665 0 : appendStringInfo(&buf,
666 0 : _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
667 0 : (long long) vacrel->missed_dead_tuples,
668 : vacrel->missed_dead_pages);
669 58 : diff = (int32) (ReadNextTransactionId() -
670 58 : vacrel->cutoffs.OldestXmin);
671 58 : appendStringInfo(&buf,
672 58 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
673 : vacrel->cutoffs.OldestXmin, diff);
674 58 : if (frozenxid_updated)
675 : {
676 34 : diff = (int32) (vacrel->NewRelfrozenXid -
677 34 : vacrel->cutoffs.relfrozenxid);
678 34 : appendStringInfo(&buf,
679 34 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
680 : vacrel->NewRelfrozenXid, diff);
681 : }
682 58 : if (minmulti_updated)
683 : {
684 22 : diff = (int32) (vacrel->NewRelminMxid -
685 22 : vacrel->cutoffs.relminmxid);
686 22 : appendStringInfo(&buf,
687 22 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
688 : vacrel->NewRelminMxid, diff);
689 : }
690 58 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
691 : vacrel->frozen_pages,
692 : orig_rel_pages == 0 ? 100.0 :
693 52 : 100.0 * vacrel->frozen_pages / orig_rel_pages,
694 58 : (long long) vacrel->tuples_frozen);
695 58 : if (vacrel->do_index_vacuuming)
696 : {
697 58 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
698 24 : appendStringInfoString(&buf, _("index scan not needed: "));
699 : else
700 34 : appendStringInfoString(&buf, _("index scan needed: "));
701 :
702 58 : msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
703 : }
704 : else
705 : {
706 0 : if (!VacuumFailsafeActive)
707 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
708 : else
709 0 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
710 :
711 0 : msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
712 : }
713 58 : appendStringInfo(&buf, msgfmt,
714 : vacrel->lpdead_item_pages,
715 : orig_rel_pages == 0 ? 100.0 :
716 52 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
717 58 : (long long) vacrel->lpdead_items);
718 142 : for (int i = 0; i < vacrel->nindexes; i++)
719 : {
720 84 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
721 :
722 84 : if (!istat)
723 8 : continue;
724 :
725 76 : appendStringInfo(&buf,
726 76 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
727 76 : indnames[i],
728 : istat->num_pages,
729 : istat->pages_newly_deleted,
730 : istat->pages_deleted,
731 : istat->pages_free);
732 : }
733 58 : if (track_io_timing)
734 : {
735 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
736 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
737 :
738 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
739 : read_ms, write_ms);
740 : }
741 58 : if (secs_dur > 0 || usecs_dur > 0)
742 : {
743 58 : read_rate = (double) BLCKSZ * (bufferusage.shared_blks_read + bufferusage.local_blks_read) /
744 58 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
745 58 : write_rate = (double) BLCKSZ * (bufferusage.shared_blks_dirtied + bufferusage.local_blks_dirtied) /
746 58 : (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
747 : }
748 58 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
749 : read_rate, write_rate);
750 58 : appendStringInfo(&buf,
751 58 : _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
752 58 : (long long) (bufferusage.shared_blks_hit + bufferusage.local_blks_hit),
753 58 : (long long) (bufferusage.shared_blks_read + bufferusage.local_blks_read),
754 58 : (long long) (bufferusage.shared_blks_dirtied + bufferusage.local_blks_dirtied));
755 58 : appendStringInfo(&buf,
756 58 : _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
757 58 : (long long) walusage.wal_records,
758 58 : (long long) walusage.wal_fpi,
759 58 : (unsigned long long) walusage.wal_bytes);
760 58 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
761 :
762 58 : ereport(verbose ? INFO : LOG,
763 : (errmsg_internal("%s", buf.data)));
764 58 : pfree(buf.data);
765 : }
766 : }
767 :
768 : /* Cleanup index statistics and index names */
769 46140 : for (int i = 0; i < vacrel->nindexes; i++)
770 : {
771 27042 : if (vacrel->indstats[i])
772 1928 : pfree(vacrel->indstats[i]);
773 :
774 27042 : if (instrument)
775 112 : pfree(indnames[i]);
776 : }
777 19098 : }
778 :
779 : /*
780 : * lazy_scan_heap() -- workhorse function for VACUUM
781 : *
782 : * This routine prunes each page in the heap, and considers the need to
783 : * freeze remaining tuples with storage (not including pages that can be
784 : * skipped using the visibility map). Also performs related maintenance
785 : * of the FSM and visibility map. These steps all take place during an
786 : * initial pass over the target heap relation.
787 : *
788 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
789 : * consists of deleting index tuples that point to LP_DEAD items left in
790 : * heap pages following pruning. Earlier initial pass over the heap will
791 : * have collected the TIDs whose index tuples need to be removed.
792 : *
793 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
794 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
795 : * as LP_UNUSED. This has to happen in a second, final pass over the
796 : * heap, to preserve a basic invariant that all index AMs rely on: no
797 : * extant index tuple can ever be allowed to contain a TID that points to
798 : * an LP_UNUSED line pointer in the heap. We must disallow premature
799 : * recycling of line pointers to avoid index scans that get confused
800 : * about which TID points to which tuple immediately after recycling.
801 : * (Actually, this isn't a concern when target heap relation happens to
802 : * have no indexes, which allows us to safely apply the one-pass strategy
803 : * as an optimization).
804 : *
805 : * In practice we often have enough space to fit all TIDs, and so won't
806 : * need to call lazy_vacuum more than once, after our initial pass over
807 : * the heap has totally finished. Otherwise things are slightly more
808 : * complicated: our "initial pass" over the heap applies only to those
809 : * pages that were pruned before we needed to call lazy_vacuum, and our
810 : * "final pass" over the heap only vacuums these same heap pages.
811 : * However, we process indexes in full every time lazy_vacuum is called,
812 : * which makes index processing very inefficient when memory is in short
813 : * supply.
814 : */
815 : static void
816 19098 : lazy_scan_heap(LVRelState *vacrel)
817 : {
818 19098 : BlockNumber rel_pages = vacrel->rel_pages,
819 : blkno,
820 19098 : next_fsm_block_to_vacuum = 0;
821 : bool all_visible_according_to_vm;
822 :
823 19098 : TidStore *dead_items = vacrel->dead_items;
824 19098 : VacDeadItemsInfo *dead_items_info = vacrel->dead_items_info;
825 19098 : Buffer vmbuffer = InvalidBuffer;
826 19098 : const int initprog_index[] = {
827 : PROGRESS_VACUUM_PHASE,
828 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
829 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
830 : };
831 : int64 initprog_val[3];
832 :
833 : /* Report that we're scanning the heap, advertising total # of blocks */
834 19098 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
835 19098 : initprog_val[1] = rel_pages;
836 19098 : initprog_val[2] = dead_items_info->max_bytes;
837 19098 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
838 :
839 : /* Initialize for the first heap_vac_scan_next_block() call */
840 19098 : vacrel->current_block = InvalidBlockNumber;
841 19098 : vacrel->next_unskippable_block = InvalidBlockNumber;
842 19098 : vacrel->next_unskippable_allvis = false;
843 19098 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
844 :
845 110716 : while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
846 : {
847 : Buffer buf;
848 : Page page;
849 : bool has_lpdead_items;
850 91618 : bool got_cleanup_lock = false;
851 :
852 91618 : vacrel->scanned_pages++;
853 :
854 : /* Report as block scanned, update error traceback information */
855 91618 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
856 91618 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
857 : blkno, InvalidOffsetNumber);
858 :
859 91618 : vacuum_delay_point();
860 :
861 : /*
862 : * Regularly check if wraparound failsafe should trigger.
863 : *
864 : * There is a similar check inside lazy_vacuum_all_indexes(), but
865 : * relfrozenxid might start to look dangerously old before we reach
866 : * that point. This check also provides failsafe coverage for the
867 : * one-pass strategy, and the two-pass strategy with the index_cleanup
868 : * param set to 'off'.
869 : */
870 91618 : if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
871 0 : lazy_check_wraparound_failsafe(vacrel);
872 :
873 : /*
874 : * Consider if we definitely have enough space to process TIDs on page
875 : * already. If we are close to overrunning the available space for
876 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
877 : * this page.
878 : */
879 91618 : if (TidStoreMemoryUsage(dead_items) > dead_items_info->max_bytes)
880 : {
881 : /*
882 : * Before beginning index vacuuming, we release any pin we may
883 : * hold on the visibility map page. This isn't necessary for
884 : * correctness, but we do it anyway to avoid holding the pin
885 : * across a lengthy, unrelated operation.
886 : */
887 0 : if (BufferIsValid(vmbuffer))
888 : {
889 0 : ReleaseBuffer(vmbuffer);
890 0 : vmbuffer = InvalidBuffer;
891 : }
892 :
893 : /* Perform a round of index and heap vacuuming */
894 0 : vacrel->consider_bypass_optimization = false;
895 0 : lazy_vacuum(vacrel);
896 :
897 : /*
898 : * Vacuum the Free Space Map to make newly-freed space visible on
899 : * upper-level FSM pages. Note we have not yet processed blkno.
900 : */
901 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
902 : blkno);
903 0 : next_fsm_block_to_vacuum = blkno;
904 :
905 : /* Report that we are once again scanning the heap */
906 0 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
907 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
908 : }
909 :
910 : /*
911 : * Pin the visibility map page in case we need to mark the page
912 : * all-visible. In most cases this will be very cheap, because we'll
913 : * already have the correct page pinned anyway.
914 : */
915 91618 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
916 :
917 91618 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
918 : vacrel->bstrategy);
919 91618 : page = BufferGetPage(buf);
920 :
921 : /*
922 : * We need a buffer cleanup lock to prune HOT chains and defragment
923 : * the page in lazy_scan_prune. But when it's not possible to acquire
924 : * a cleanup lock right away, we may be able to settle for reduced
925 : * processing using lazy_scan_noprune.
926 : */
927 91618 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
928 :
929 91618 : if (!got_cleanup_lock)
930 8 : LockBuffer(buf, BUFFER_LOCK_SHARE);
931 :
932 : /* Check for new or empty pages before lazy_scan_[no]prune call */
933 91618 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
934 91618 : vmbuffer))
935 : {
936 : /* Processed as new/empty page (lock and pin released) */
937 1174 : continue;
938 : }
939 :
940 : /*
941 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
942 : * items in the dead_items area for later vacuuming, count live and
943 : * recently dead tuples for vacuum logging, and determine if this
944 : * block could later be truncated. If we encounter any xid/mxids that
945 : * require advancing the relfrozenxid/relminxid, we'll have to wait
946 : * for a cleanup lock and call lazy_scan_prune().
947 : */
948 90444 : if (!got_cleanup_lock &&
949 8 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
950 : {
951 : /*
952 : * lazy_scan_noprune could not do all required processing. Wait
953 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
954 : */
955 : Assert(vacrel->aggressive);
956 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
957 0 : LockBufferForCleanup(buf);
958 0 : got_cleanup_lock = true;
959 : }
960 :
961 : /*
962 : * If we have a cleanup lock, we must now prune, freeze, and count
963 : * tuples. We may have acquired the cleanup lock originally, or we may
964 : * have gone back and acquired it after lazy_scan_noprune() returned
965 : * false. Either way, the page hasn't been processed yet.
966 : *
967 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
968 : * recently_dead_tuples and live tuples for vacuum logging, determine
969 : * if the block can later be truncated, and accumulate the details of
970 : * remaining LP_DEAD line pointers on the page into dead_items. These
971 : * dead items include those pruned by lazy_scan_prune() as well as
972 : * line pointers previously marked LP_DEAD.
973 : */
974 90444 : if (got_cleanup_lock)
975 90436 : lazy_scan_prune(vacrel, buf, blkno, page,
976 : vmbuffer, all_visible_according_to_vm,
977 : &has_lpdead_items);
978 :
979 : /*
980 : * Now drop the buffer lock and, potentially, update the FSM.
981 : *
982 : * Our goal is to update the freespace map the last time we touch the
983 : * page. If we'll process a block in the second pass, we may free up
984 : * additional space on the page, so it is better to update the FSM
985 : * after the second pass. If the relation has no indexes, or if index
986 : * vacuuming is disabled, there will be no second heap pass; if this
987 : * particular page has no dead items, the second heap pass will not
988 : * touch this page. So, in those cases, update the FSM now.
989 : *
990 : * Note: In corner cases, it's possible to miss updating the FSM
991 : * entirely. If index vacuuming is currently enabled, we'll skip the
992 : * FSM update now. But if failsafe mode is later activated, or there
993 : * are so few dead tuples that index vacuuming is bypassed, there will
994 : * also be no opportunity to update the FSM later, because we'll never
995 : * revisit this page. Since updating the FSM is desirable but not
996 : * absolutely required, that's OK.
997 : */
998 90444 : if (vacrel->nindexes == 0
999 80080 : || !vacrel->do_index_vacuuming
1000 79322 : || !has_lpdead_items)
1001 70222 : {
1002 70222 : Size freespace = PageGetHeapFreeSpace(page);
1003 :
1004 70222 : UnlockReleaseBuffer(buf);
1005 70222 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1006 :
1007 : /*
1008 : * Periodically perform FSM vacuuming to make newly-freed space
1009 : * visible on upper FSM pages. This is done after vacuuming if the
1010 : * table has indexes. There will only be newly-freed space if we
1011 : * held the cleanup lock and lazy_scan_prune() was called.
1012 : */
1013 70222 : if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1014 0 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1015 : {
1016 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1017 : blkno);
1018 0 : next_fsm_block_to_vacuum = blkno;
1019 : }
1020 : }
1021 : else
1022 20222 : UnlockReleaseBuffer(buf);
1023 : }
1024 :
1025 19098 : vacrel->blkno = InvalidBlockNumber;
1026 19098 : if (BufferIsValid(vmbuffer))
1027 8094 : ReleaseBuffer(vmbuffer);
1028 :
1029 : /* report that everything is now scanned */
1030 19098 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1031 :
1032 : /* now we can compute the new value for pg_class.reltuples */
1033 38196 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1034 : vacrel->scanned_pages,
1035 19098 : vacrel->live_tuples);
1036 :
1037 : /*
1038 : * Also compute the total number of surviving heap entries. In the
1039 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1040 : */
1041 19098 : vacrel->new_rel_tuples =
1042 19098 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1043 19098 : vacrel->missed_dead_tuples;
1044 :
1045 : /*
1046 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1047 : * related heap vacuuming
1048 : */
1049 19098 : if (dead_items_info->num_items > 0)
1050 870 : lazy_vacuum(vacrel);
1051 :
1052 : /*
1053 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1054 : * not there were indexes, and whether or not we bypassed index vacuuming.
1055 : */
1056 19098 : if (blkno > next_fsm_block_to_vacuum)
1057 8094 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1058 :
1059 : /* report all blocks vacuumed */
1060 19098 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
1061 :
1062 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1063 19098 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1064 17194 : lazy_cleanup_all_indexes(vacrel);
1065 19098 : }
1066 :
1067 : /*
1068 : * heap_vac_scan_next_block() -- get next block for vacuum to process
1069 : *
1070 : * lazy_scan_heap() calls here every time it needs to get the next block to
1071 : * prune and vacuum. The function uses the visibility map, vacuum options,
1072 : * and various thresholds to skip blocks which do not need to be processed and
1073 : * sets blkno to the next block to process.
1074 : *
1075 : * The block number and visibility status of the next block to process are set
1076 : * in *blkno and *all_visible_according_to_vm. The return value is false if
1077 : * there are no further blocks to process.
1078 : *
1079 : * vacrel is an in/out parameter here. Vacuum options and information about
1080 : * the relation are read. vacrel->skippedallvis is set if we skip a block
1081 : * that's all-visible but not all-frozen, to ensure that we don't update
1082 : * relfrozenxid in that case. vacrel also holds information about the next
1083 : * unskippable block, as bookkeeping for this function.
1084 : */
1085 : static bool
1086 110716 : heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
1087 : bool *all_visible_according_to_vm)
1088 : {
1089 : BlockNumber next_block;
1090 :
1091 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1092 110716 : next_block = vacrel->current_block + 1;
1093 :
1094 : /* Have we reached the end of the relation? */
1095 110716 : if (next_block >= vacrel->rel_pages)
1096 : {
1097 19098 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1098 : {
1099 5960 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1100 5960 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1101 : }
1102 19098 : *blkno = vacrel->rel_pages;
1103 19098 : return false;
1104 : }
1105 :
1106 : /*
1107 : * We must be in one of the three following states:
1108 : */
1109 91618 : if (next_block > vacrel->next_unskippable_block ||
1110 18318 : vacrel->next_unskippable_block == InvalidBlockNumber)
1111 : {
1112 : /*
1113 : * 1. We have just processed an unskippable block (or we're at the
1114 : * beginning of the scan). Find the next unskippable block using the
1115 : * visibility map.
1116 : */
1117 : bool skipsallvis;
1118 :
1119 81394 : find_next_unskippable_block(vacrel, &skipsallvis);
1120 :
1121 : /*
1122 : * We now know the next block that we must process. It can be the
1123 : * next block after the one we just processed, or something further
1124 : * ahead. If it's further ahead, we can jump to it, but we choose to
1125 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1126 : * pages. Since we're reading sequentially, the OS should be doing
1127 : * readahead for us, so there's no gain in skipping a page now and
1128 : * then. Skipping such a range might even discourage sequential
1129 : * detection.
1130 : *
1131 : * This test also enables more frequent relfrozenxid advancement
1132 : * during non-aggressive VACUUMs. If the range has any all-visible
1133 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1134 : * real downside.
1135 : */
1136 81394 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1137 : {
1138 328 : next_block = vacrel->next_unskippable_block;
1139 328 : if (skipsallvis)
1140 52 : vacrel->skippedallvis = true;
1141 : }
1142 : }
1143 :
1144 : /* Now we must be in one of the two remaining states: */
1145 91618 : if (next_block < vacrel->next_unskippable_block)
1146 : {
1147 : /*
1148 : * 2. We are processing a range of blocks that we could have skipped
1149 : * but chose not to. We know that they are all-visible in the VM,
1150 : * otherwise they would've been unskippable.
1151 : */
1152 10224 : *blkno = vacrel->current_block = next_block;
1153 10224 : *all_visible_according_to_vm = true;
1154 10224 : return true;
1155 : }
1156 : else
1157 : {
1158 : /*
1159 : * 3. We reached the next unskippable block. Process it. On next
1160 : * iteration, we will be back in state 1.
1161 : */
1162 : Assert(next_block == vacrel->next_unskippable_block);
1163 :
1164 81394 : *blkno = vacrel->current_block = next_block;
1165 81394 : *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
1166 81394 : return true;
1167 : }
1168 : }
1169 :
1170 : /*
1171 : * Find the next unskippable block in a vacuum scan using the visibility map.
1172 : * The next unskippable block and its visibility information is updated in
1173 : * vacrel.
1174 : *
1175 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1176 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1177 : * was concurrently cleared, though. All that matters is that caller scan all
1178 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1179 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1180 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1181 : * to skip such a range is actually made, making everything safe.)
1182 : */
1183 : static void
1184 81394 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1185 : {
1186 81394 : BlockNumber rel_pages = vacrel->rel_pages;
1187 81394 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1188 81394 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1189 : bool next_unskippable_allvis;
1190 :
1191 81394 : *skipsallvis = false;
1192 :
1193 : for (;;)
1194 36352 : {
1195 117746 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1196 : next_unskippable_block,
1197 : &next_unskippable_vmbuffer);
1198 :
1199 117746 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1200 :
1201 : /*
1202 : * A block is unskippable if it is not all visible according to the
1203 : * visibility map.
1204 : */
1205 117746 : if (!next_unskippable_allvis)
1206 : {
1207 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1208 77590 : break;
1209 : }
1210 :
1211 : /*
1212 : * Caller must scan the last page to determine whether it has tuples
1213 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1214 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1215 : * lock on rel to attempt a truncation that fails anyway, just because
1216 : * there are tuples on the last page (it is likely that there will be
1217 : * tuples on other nearby pages as well, but those can be skipped).
1218 : *
1219 : * Implement this by always treating the last block as unsafe to skip.
1220 : */
1221 40156 : if (next_unskippable_block == rel_pages - 1)
1222 3074 : break;
1223 :
1224 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1225 37082 : if (!vacrel->skipwithvm)
1226 728 : break;
1227 :
1228 : /*
1229 : * Aggressive VACUUM caller can't skip pages just because they are
1230 : * all-visible. They may still skip all-frozen pages, which can't
1231 : * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1232 : */
1233 36354 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1234 : {
1235 5976 : if (vacrel->aggressive)
1236 2 : break;
1237 :
1238 : /*
1239 : * All-visible block is safe to skip in non-aggressive case. But
1240 : * remember that the final range contains such a block for later.
1241 : */
1242 5974 : *skipsallvis = true;
1243 : }
1244 :
1245 36352 : next_unskippable_block++;
1246 : }
1247 :
1248 : /* write the local variables back to vacrel */
1249 81394 : vacrel->next_unskippable_block = next_unskippable_block;
1250 81394 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1251 81394 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1252 81394 : }
1253 :
1254 : /*
1255 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1256 : *
1257 : * Must call here to handle both new and empty pages before calling
1258 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1259 : * with new or empty pages.
1260 : *
1261 : * It's necessary to consider new pages as a special case, since the rules for
1262 : * maintaining the visibility map and FSM with empty pages are a little
1263 : * different (though new pages can be truncated away during rel truncation).
1264 : *
1265 : * Empty pages are not really a special case -- they're just heap pages that
1266 : * have no allocated tuples (including even LP_UNUSED items). You might
1267 : * wonder why we need to handle them here all the same. It's only necessary
1268 : * because of a corner-case involving a hard crash during heap relation
1269 : * extension. If we ever make relation-extension crash safe, then it should
1270 : * no longer be necessary to deal with empty pages here (or new pages, for
1271 : * that matter).
1272 : *
1273 : * Caller must hold at least a shared lock. We might need to escalate the
1274 : * lock in that case, so the type of lock caller holds needs to be specified
1275 : * using 'sharelock' argument.
1276 : *
1277 : * Returns false in common case where caller should go on to call
1278 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1279 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1280 : * behalf.
1281 : */
1282 : static bool
1283 91618 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1284 : Page page, bool sharelock, Buffer vmbuffer)
1285 : {
1286 : Size freespace;
1287 :
1288 91618 : if (PageIsNew(page))
1289 : {
1290 : /*
1291 : * All-zeroes pages can be left over if either a backend extends the
1292 : * relation by a single page, but crashes before the newly initialized
1293 : * page has been written out, or when bulk-extending the relation
1294 : * (which creates a number of empty pages at the tail end of the
1295 : * relation), and then enters them into the FSM.
1296 : *
1297 : * Note we do not enter the page into the visibilitymap. That has the
1298 : * downside that we repeatedly visit this page in subsequent vacuums,
1299 : * but otherwise we'll never discover the space on a promoted standby.
1300 : * The harm of repeated checking ought to normally not be too bad. The
1301 : * space usually should be used at some point, otherwise there
1302 : * wouldn't be any regular vacuums.
1303 : *
1304 : * Make sure these pages are in the FSM, to ensure they can be reused.
1305 : * Do that by testing if there's any space recorded for the page. If
1306 : * not, enter it. We do so after releasing the lock on the heap page,
1307 : * the FSM is approximate, after all.
1308 : */
1309 1132 : UnlockReleaseBuffer(buf);
1310 :
1311 1132 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1312 : {
1313 820 : freespace = BLCKSZ - SizeOfPageHeaderData;
1314 :
1315 820 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1316 : }
1317 :
1318 1132 : return true;
1319 : }
1320 :
1321 90486 : if (PageIsEmpty(page))
1322 : {
1323 : /*
1324 : * It seems likely that caller will always be able to get a cleanup
1325 : * lock on an empty page. But don't take any chances -- escalate to
1326 : * an exclusive lock (still don't need a cleanup lock, though).
1327 : */
1328 42 : if (sharelock)
1329 : {
1330 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1331 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1332 :
1333 0 : if (!PageIsEmpty(page))
1334 : {
1335 : /* page isn't new or empty -- keep lock and pin for now */
1336 0 : return false;
1337 : }
1338 : }
1339 : else
1340 : {
1341 : /* Already have a full cleanup lock (which is more than enough) */
1342 : }
1343 :
1344 : /*
1345 : * Unlike new pages, empty pages are always set all-visible and
1346 : * all-frozen.
1347 : */
1348 42 : if (!PageIsAllVisible(page))
1349 : {
1350 0 : START_CRIT_SECTION();
1351 :
1352 : /* mark buffer dirty before writing a WAL record */
1353 0 : MarkBufferDirty(buf);
1354 :
1355 : /*
1356 : * It's possible that another backend has extended the heap,
1357 : * initialized the page, and then failed to WAL-log the page due
1358 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1359 : * might try to replay our record setting the page all-visible and
1360 : * find that the page isn't initialized, which will cause a PANIC.
1361 : * To prevent that, check whether the page has been previously
1362 : * WAL-logged, and if not, do that now.
1363 : */
1364 0 : if (RelationNeedsWAL(vacrel->rel) &&
1365 0 : PageGetLSN(page) == InvalidXLogRecPtr)
1366 0 : log_newpage_buffer(buf, true);
1367 :
1368 0 : PageSetAllVisible(page);
1369 0 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1370 : vmbuffer, InvalidTransactionId,
1371 : VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
1372 0 : END_CRIT_SECTION();
1373 : }
1374 :
1375 42 : freespace = PageGetHeapFreeSpace(page);
1376 42 : UnlockReleaseBuffer(buf);
1377 42 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1378 42 : return true;
1379 : }
1380 :
1381 : /* page isn't new or empty -- keep lock and pin */
1382 90444 : return false;
1383 : }
1384 :
1385 : /* qsort comparator for sorting OffsetNumbers */
1386 : static int
1387 4309300 : cmpOffsetNumbers(const void *a, const void *b)
1388 : {
1389 4309300 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1390 : }
1391 :
1392 : /*
1393 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1394 : *
1395 : * Caller must hold pin and buffer cleanup lock on the buffer.
1396 : *
1397 : * vmbuffer is the buffer containing the VM block with visibility information
1398 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1399 : * visibility status of the heap block looked up earlier by the caller. We
1400 : * won't rely entirely on this status, as it may be out of date.
1401 : *
1402 : * *has_lpdead_items is set to true or false depending on whether, upon return
1403 : * from this function, any LP_DEAD items are still present on the page.
1404 : */
1405 : static void
1406 90436 : lazy_scan_prune(LVRelState *vacrel,
1407 : Buffer buf,
1408 : BlockNumber blkno,
1409 : Page page,
1410 : Buffer vmbuffer,
1411 : bool all_visible_according_to_vm,
1412 : bool *has_lpdead_items)
1413 : {
1414 90436 : Relation rel = vacrel->rel;
1415 : PruneFreezeResult presult;
1416 90436 : int prune_options = 0;
1417 :
1418 : Assert(BufferGetBlockNumber(buf) == blkno);
1419 :
1420 : /*
1421 : * Prune all HOT-update chains and potentially freeze tuples on this page.
1422 : *
1423 : * If the relation has no indexes, we can immediately mark would-be dead
1424 : * items LP_UNUSED.
1425 : *
1426 : * The number of tuples removed from the page is returned in
1427 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
1428 : * presult.lpdead_items's final value can be thought of as the number of
1429 : * tuples that were deleted from indexes.
1430 : *
1431 : * We will update the VM after collecting LP_DEAD items and freezing
1432 : * tuples. Pruning will have determined whether or not the page is
1433 : * all-visible.
1434 : */
1435 90436 : prune_options = HEAP_PAGE_PRUNE_FREEZE;
1436 90436 : if (vacrel->nindexes == 0)
1437 10364 : prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1438 :
1439 90436 : heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1440 : &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1441 : &vacrel->offnum,
1442 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1443 :
1444 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
1445 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
1446 :
1447 90436 : if (presult.nfrozen > 0)
1448 : {
1449 : /*
1450 : * We don't increment the frozen_pages instrumentation counter when
1451 : * nfrozen == 0, since it only counts pages with newly frozen tuples
1452 : * (don't confuse that with pages newly set all-frozen in VM).
1453 : */
1454 22368 : vacrel->frozen_pages++;
1455 : }
1456 :
1457 : /*
1458 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1459 : * the heap to determine all_visible and all_frozen for the page -- this
1460 : * is a specialized version of the logic from this function. Now that
1461 : * we've finished pruning and freezing, make sure that we're in total
1462 : * agreement with heap_page_is_all_visible() using an assertion.
1463 : */
1464 : #ifdef USE_ASSERT_CHECKING
1465 : /* Note that all_frozen value does not matter when !all_visible */
1466 : if (presult.all_visible)
1467 : {
1468 : TransactionId debug_cutoff;
1469 : bool debug_all_frozen;
1470 :
1471 : Assert(presult.lpdead_items == 0);
1472 :
1473 : if (!heap_page_is_all_visible(vacrel, buf,
1474 : &debug_cutoff, &debug_all_frozen))
1475 : Assert(false);
1476 :
1477 : Assert(presult.all_frozen == debug_all_frozen);
1478 :
1479 : Assert(!TransactionIdIsValid(debug_cutoff) ||
1480 : debug_cutoff == presult.vm_conflict_horizon);
1481 : }
1482 : #endif
1483 :
1484 : /*
1485 : * Now save details of the LP_DEAD items from the page in vacrel
1486 : */
1487 90436 : if (presult.lpdead_items > 0)
1488 : {
1489 20320 : vacrel->lpdead_item_pages++;
1490 :
1491 : /*
1492 : * deadoffsets are collected incrementally in
1493 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
1494 : * with an indeterminate order, but dead_items_add requires them to be
1495 : * sorted.
1496 : */
1497 20320 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
1498 : cmpOffsetNumbers);
1499 :
1500 20320 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
1501 : }
1502 :
1503 : /* Finally, add page-local counts to whole-VACUUM counts */
1504 90436 : vacrel->tuples_deleted += presult.ndeleted;
1505 90436 : vacrel->tuples_frozen += presult.nfrozen;
1506 90436 : vacrel->lpdead_items += presult.lpdead_items;
1507 90436 : vacrel->live_tuples += presult.live_tuples;
1508 90436 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
1509 :
1510 : /* Can't truncate this page */
1511 90436 : if (presult.hastup)
1512 79494 : vacrel->nonempty_pages = blkno + 1;
1513 :
1514 : /* Did we find LP_DEAD items? */
1515 90436 : *has_lpdead_items = (presult.lpdead_items > 0);
1516 :
1517 : Assert(!presult.all_visible || !(*has_lpdead_items));
1518 :
1519 : /*
1520 : * Handle setting visibility map bit based on information from the VM (as
1521 : * of last heap_vac_scan_next_block() call), and from all_visible and
1522 : * all_frozen variables
1523 : */
1524 90436 : if (!all_visible_according_to_vm && presult.all_visible)
1525 47354 : {
1526 47354 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
1527 :
1528 47354 : if (presult.all_frozen)
1529 : {
1530 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1531 33276 : flags |= VISIBILITYMAP_ALL_FROZEN;
1532 : }
1533 :
1534 : /*
1535 : * It should never be the case that the visibility map page is set
1536 : * while the page-level bit is clear, but the reverse is allowed (if
1537 : * checksums are not enabled). Regardless, set both bits so that we
1538 : * get back in sync.
1539 : *
1540 : * NB: If the heap page is all-visible but the VM bit is not set, we
1541 : * don't need to dirty the heap page. However, if checksums are
1542 : * enabled, we do need to make sure that the heap page is dirtied
1543 : * before passing it to visibilitymap_set(), because it may be logged.
1544 : * Given that this situation should only happen in rare cases after a
1545 : * crash, it is not worth optimizing.
1546 : */
1547 47354 : PageSetAllVisible(page);
1548 47354 : MarkBufferDirty(buf);
1549 47354 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1550 : vmbuffer, presult.vm_conflict_horizon,
1551 : flags);
1552 : }
1553 :
1554 : /*
1555 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1556 : * page-level bit is clear. However, it's possible that the bit got
1557 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1558 : * with buffer lock before concluding that the VM is corrupt.
1559 : */
1560 43082 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1561 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1562 : {
1563 0 : elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1564 : vacrel->relname, blkno);
1565 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1566 : VISIBILITYMAP_VALID_BITS);
1567 : }
1568 :
1569 : /*
1570 : * It's possible for the value returned by
1571 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1572 : * wrong for us to see tuples that appear to not be visible to everyone
1573 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
1574 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
1575 : * conservative and sometimes returns a value that's unnecessarily small,
1576 : * so if we see that contradiction it just means that the tuples that we
1577 : * think are not visible to everyone yet actually are, and the
1578 : * PD_ALL_VISIBLE flag is correct.
1579 : *
1580 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
1581 : * however.
1582 : */
1583 43082 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
1584 : {
1585 0 : elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1586 : vacrel->relname, blkno);
1587 0 : PageClearAllVisible(page);
1588 0 : MarkBufferDirty(buf);
1589 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1590 : VISIBILITYMAP_VALID_BITS);
1591 : }
1592 :
1593 : /*
1594 : * If the all-visible page is all-frozen but not marked as such yet, mark
1595 : * it as all-frozen. Note that all_frozen is only valid if all_visible is
1596 : * true, so we must check both all_visible and all_frozen.
1597 : */
1598 43082 : else if (all_visible_according_to_vm && presult.all_visible &&
1599 13986 : presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1600 : {
1601 : /*
1602 : * Avoid relying on all_visible_according_to_vm as a proxy for the
1603 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
1604 : * stale -- even when all_visible is set
1605 : */
1606 24 : if (!PageIsAllVisible(page))
1607 : {
1608 0 : PageSetAllVisible(page);
1609 0 : MarkBufferDirty(buf);
1610 : }
1611 :
1612 : /*
1613 : * Set the page all-frozen (and all-visible) in the VM.
1614 : *
1615 : * We can pass InvalidTransactionId as our cutoff_xid, since a
1616 : * snapshotConflictHorizon sufficient to make everything safe for REDO
1617 : * was logged when the page's tuples were frozen.
1618 : */
1619 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1620 24 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1621 : vmbuffer, InvalidTransactionId,
1622 : VISIBILITYMAP_ALL_VISIBLE |
1623 : VISIBILITYMAP_ALL_FROZEN);
1624 : }
1625 90436 : }
1626 :
1627 : /*
1628 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1629 : *
1630 : * Caller need only hold a pin and share lock on the buffer, unlike
1631 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1632 : * performed here, it's quite possible that an earlier opportunistic pruning
1633 : * operation left LP_DEAD items behind. We'll at least collect any such items
1634 : * in dead_items for removal from indexes.
1635 : *
1636 : * For aggressive VACUUM callers, we may return false to indicate that a full
1637 : * cleanup lock is required for processing by lazy_scan_prune. This is only
1638 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1639 : * one or more tuples on the page. We always return true for non-aggressive
1640 : * callers.
1641 : *
1642 : * If this function returns true, *has_lpdead_items gets set to true or false
1643 : * depending on whether, upon return from this function, any LP_DEAD items are
1644 : * present on the page. If this function returns false, *has_lpdead_items
1645 : * is not updated.
1646 : */
1647 : static bool
1648 8 : lazy_scan_noprune(LVRelState *vacrel,
1649 : Buffer buf,
1650 : BlockNumber blkno,
1651 : Page page,
1652 : bool *has_lpdead_items)
1653 : {
1654 : OffsetNumber offnum,
1655 : maxoff;
1656 : int lpdead_items,
1657 : live_tuples,
1658 : recently_dead_tuples,
1659 : missed_dead_tuples;
1660 : bool hastup;
1661 : HeapTupleHeader tupleheader;
1662 8 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1663 8 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1664 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1665 :
1666 : Assert(BufferGetBlockNumber(buf) == blkno);
1667 :
1668 8 : hastup = false; /* for now */
1669 :
1670 8 : lpdead_items = 0;
1671 8 : live_tuples = 0;
1672 8 : recently_dead_tuples = 0;
1673 8 : missed_dead_tuples = 0;
1674 :
1675 8 : maxoff = PageGetMaxOffsetNumber(page);
1676 180 : for (offnum = FirstOffsetNumber;
1677 : offnum <= maxoff;
1678 172 : offnum = OffsetNumberNext(offnum))
1679 : {
1680 : ItemId itemid;
1681 : HeapTupleData tuple;
1682 :
1683 172 : vacrel->offnum = offnum;
1684 172 : itemid = PageGetItemId(page, offnum);
1685 :
1686 172 : if (!ItemIdIsUsed(itemid))
1687 0 : continue;
1688 :
1689 172 : if (ItemIdIsRedirected(itemid))
1690 : {
1691 0 : hastup = true;
1692 0 : continue;
1693 : }
1694 :
1695 172 : if (ItemIdIsDead(itemid))
1696 : {
1697 : /*
1698 : * Deliberately don't set hastup=true here. See same point in
1699 : * lazy_scan_prune for an explanation.
1700 : */
1701 0 : deadoffsets[lpdead_items++] = offnum;
1702 0 : continue;
1703 : }
1704 :
1705 172 : hastup = true; /* page prevents rel truncation */
1706 172 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1707 172 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
1708 : &NoFreezePageRelfrozenXid,
1709 : &NoFreezePageRelminMxid))
1710 : {
1711 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
1712 128 : if (vacrel->aggressive)
1713 : {
1714 : /*
1715 : * Aggressive VACUUMs must always be able to advance rel's
1716 : * relfrozenxid to a value >= FreezeLimit (and be able to
1717 : * advance rel's relminmxid to a value >= MultiXactCutoff).
1718 : * The ongoing aggressive VACUUM won't be able to do that
1719 : * unless it can freeze an XID (or MXID) from this tuple now.
1720 : *
1721 : * The only safe option is to have caller perform processing
1722 : * of this page using lazy_scan_prune. Caller might have to
1723 : * wait a while for a cleanup lock, but it can't be helped.
1724 : */
1725 0 : vacrel->offnum = InvalidOffsetNumber;
1726 0 : return false;
1727 : }
1728 :
1729 : /*
1730 : * Non-aggressive VACUUMs are under no obligation to advance
1731 : * relfrozenxid (even by one XID). We can be much laxer here.
1732 : *
1733 : * Currently we always just accept an older final relfrozenxid
1734 : * and/or relminmxid value. We never make caller wait or work a
1735 : * little harder, even when it likely makes sense to do so.
1736 : */
1737 : }
1738 :
1739 172 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
1740 172 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1741 172 : tuple.t_len = ItemIdGetLength(itemid);
1742 172 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
1743 :
1744 172 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1745 : buf))
1746 : {
1747 166 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1748 : case HEAPTUPLE_LIVE:
1749 :
1750 : /*
1751 : * Count both cases as live, just like lazy_scan_prune
1752 : */
1753 166 : live_tuples++;
1754 :
1755 166 : break;
1756 2 : case HEAPTUPLE_DEAD:
1757 :
1758 : /*
1759 : * There is some useful work for pruning to do, that won't be
1760 : * done due to failure to get a cleanup lock.
1761 : */
1762 2 : missed_dead_tuples++;
1763 2 : break;
1764 4 : case HEAPTUPLE_RECENTLY_DEAD:
1765 :
1766 : /*
1767 : * Count in recently_dead_tuples, just like lazy_scan_prune
1768 : */
1769 4 : recently_dead_tuples++;
1770 4 : break;
1771 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1772 :
1773 : /*
1774 : * Do not count these rows as live, just like lazy_scan_prune
1775 : */
1776 0 : break;
1777 0 : default:
1778 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1779 : break;
1780 : }
1781 : }
1782 :
1783 8 : vacrel->offnum = InvalidOffsetNumber;
1784 :
1785 : /*
1786 : * By here we know for sure that caller can put off freezing and pruning
1787 : * this particular page until the next VACUUM. Remember its details now.
1788 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
1789 : */
1790 8 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
1791 8 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
1792 :
1793 : /* Save any LP_DEAD items found on the page in dead_items */
1794 8 : if (vacrel->nindexes == 0)
1795 : {
1796 : /* Using one-pass strategy (since table has no indexes) */
1797 0 : if (lpdead_items > 0)
1798 : {
1799 : /*
1800 : * Perfunctory handling for the corner case where a single pass
1801 : * strategy VACUUM cannot get a cleanup lock, and it turns out
1802 : * that there is one or more LP_DEAD items: just count the LP_DEAD
1803 : * items as missed_dead_tuples instead. (This is a bit dishonest,
1804 : * but it beats having to maintain specialized heap vacuuming code
1805 : * forever, for vanishingly little benefit.)
1806 : */
1807 0 : hastup = true;
1808 0 : missed_dead_tuples += lpdead_items;
1809 : }
1810 : }
1811 8 : else if (lpdead_items > 0)
1812 : {
1813 : /*
1814 : * Page has LP_DEAD items, and so any references/TIDs that remain in
1815 : * indexes will be deleted during index vacuuming (and then marked
1816 : * LP_UNUSED in the heap)
1817 : */
1818 0 : vacrel->lpdead_item_pages++;
1819 :
1820 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
1821 :
1822 0 : vacrel->lpdead_items += lpdead_items;
1823 : }
1824 :
1825 : /*
1826 : * Finally, add relevant page-local counts to whole-VACUUM counts
1827 : */
1828 8 : vacrel->live_tuples += live_tuples;
1829 8 : vacrel->recently_dead_tuples += recently_dead_tuples;
1830 8 : vacrel->missed_dead_tuples += missed_dead_tuples;
1831 8 : if (missed_dead_tuples > 0)
1832 2 : vacrel->missed_dead_pages++;
1833 :
1834 : /* Can't truncate this page */
1835 8 : if (hastup)
1836 8 : vacrel->nonempty_pages = blkno + 1;
1837 :
1838 : /* Did we find LP_DEAD items? */
1839 8 : *has_lpdead_items = (lpdead_items > 0);
1840 :
1841 : /* Caller won't need to call lazy_scan_prune with same page */
1842 8 : return true;
1843 : }
1844 :
1845 : /*
1846 : * Main entry point for index vacuuming and heap vacuuming.
1847 : *
1848 : * Removes items collected in dead_items from table's indexes, then marks the
1849 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
1850 : * for full details.
1851 : *
1852 : * Also empties dead_items, freeing up space for later TIDs.
1853 : *
1854 : * We may choose to bypass index vacuuming at this point, though only when the
1855 : * ongoing VACUUM operation will definitely only have one index scan/round of
1856 : * index vacuuming.
1857 : */
1858 : static void
1859 870 : lazy_vacuum(LVRelState *vacrel)
1860 : {
1861 : bool bypass;
1862 :
1863 : /* Should not end up here with no indexes */
1864 : Assert(vacrel->nindexes > 0);
1865 : Assert(vacrel->lpdead_item_pages > 0);
1866 :
1867 870 : if (!vacrel->do_index_vacuuming)
1868 : {
1869 : Assert(!vacrel->do_index_cleanup);
1870 12 : dead_items_reset(vacrel);
1871 12 : return;
1872 : }
1873 :
1874 : /*
1875 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
1876 : *
1877 : * We currently only do this in cases where the number of LP_DEAD items
1878 : * for the entire VACUUM operation is close to zero. This avoids sharp
1879 : * discontinuities in the duration and overhead of successive VACUUM
1880 : * operations that run against the same table with a fixed workload.
1881 : * Ideally, successive VACUUM operations will behave as if there are
1882 : * exactly zero LP_DEAD items in cases where there are close to zero.
1883 : *
1884 : * This is likely to be helpful with a table that is continually affected
1885 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
1886 : * have small aberrations that lead to just a few heap pages retaining
1887 : * only one or two LP_DEAD items. This is pretty common; even when the
1888 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
1889 : * impossible to predict whether HOT will be applied in 100% of cases.
1890 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
1891 : * HOT through careful tuning.
1892 : */
1893 858 : bypass = false;
1894 858 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
1895 : {
1896 : BlockNumber threshold;
1897 :
1898 : Assert(vacrel->num_index_scans == 0);
1899 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
1900 : Assert(vacrel->do_index_vacuuming);
1901 : Assert(vacrel->do_index_cleanup);
1902 :
1903 : /*
1904 : * This crossover point at which we'll start to do index vacuuming is
1905 : * expressed as a percentage of the total number of heap pages in the
1906 : * table that are known to have at least one LP_DEAD item. This is
1907 : * much more important than the total number of LP_DEAD items, since
1908 : * it's a proxy for the number of heap pages whose visibility map bits
1909 : * cannot be set on account of bypassing index and heap vacuuming.
1910 : *
1911 : * We apply one further precautionary test: the space currently used
1912 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
1913 : * not exceed 32MB. This limits the risk that we will bypass index
1914 : * vacuuming again and again until eventually there is a VACUUM whose
1915 : * dead_items space is not CPU cache resident.
1916 : *
1917 : * We don't take any special steps to remember the LP_DEAD items (such
1918 : * as counting them in our final update to the stats system) when the
1919 : * optimization is applied. Though the accounting used in analyze.c's
1920 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
1921 : * rows in its own stats report, that's okay. The discrepancy should
1922 : * be negligible. If this optimization is ever expanded to cover more
1923 : * cases then this may need to be reconsidered.
1924 : */
1925 842 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
1926 844 : bypass = (vacrel->lpdead_item_pages < threshold &&
1927 2 : (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
1928 : }
1929 :
1930 858 : if (bypass)
1931 : {
1932 : /*
1933 : * There are almost zero TIDs. Behave as if there were precisely
1934 : * zero: bypass index vacuuming, but do index cleanup.
1935 : *
1936 : * We expect that the ongoing VACUUM operation will finish very
1937 : * quickly, so there is no point in considering speeding up as a
1938 : * failsafe against wraparound failure. (Index cleanup is expected to
1939 : * finish very quickly in cases where there were no ambulkdelete()
1940 : * calls.)
1941 : */
1942 2 : vacrel->do_index_vacuuming = false;
1943 : }
1944 856 : else if (lazy_vacuum_all_indexes(vacrel))
1945 : {
1946 : /*
1947 : * We successfully completed a round of index vacuuming. Do related
1948 : * heap vacuuming now.
1949 : */
1950 856 : lazy_vacuum_heap_rel(vacrel);
1951 : }
1952 : else
1953 : {
1954 : /*
1955 : * Failsafe case.
1956 : *
1957 : * We attempted index vacuuming, but didn't finish a full round/full
1958 : * index scan. This happens when relfrozenxid or relminmxid is too
1959 : * far in the past.
1960 : *
1961 : * From this point on the VACUUM operation will do no further index
1962 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
1963 : * back here again.
1964 : */
1965 : Assert(VacuumFailsafeActive);
1966 : }
1967 :
1968 : /*
1969 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
1970 : * vacuum)
1971 : */
1972 858 : dead_items_reset(vacrel);
1973 : }
1974 :
1975 : /*
1976 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
1977 : *
1978 : * Returns true in the common case when all indexes were successfully
1979 : * vacuumed. Returns false in rare cases where we determined that the ongoing
1980 : * VACUUM operation is at risk of taking too long to finish, leading to
1981 : * wraparound failure.
1982 : */
1983 : static bool
1984 856 : lazy_vacuum_all_indexes(LVRelState *vacrel)
1985 : {
1986 856 : bool allindexes = true;
1987 856 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
1988 856 : const int progress_start_index[] = {
1989 : PROGRESS_VACUUM_PHASE,
1990 : PROGRESS_VACUUM_INDEXES_TOTAL
1991 : };
1992 856 : const int progress_end_index[] = {
1993 : PROGRESS_VACUUM_INDEXES_TOTAL,
1994 : PROGRESS_VACUUM_INDEXES_PROCESSED,
1995 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
1996 : };
1997 : int64 progress_start_val[2];
1998 : int64 progress_end_val[3];
1999 :
2000 : Assert(vacrel->nindexes > 0);
2001 : Assert(vacrel->do_index_vacuuming);
2002 : Assert(vacrel->do_index_cleanup);
2003 :
2004 : /* Precheck for XID wraparound emergencies */
2005 856 : if (lazy_check_wraparound_failsafe(vacrel))
2006 : {
2007 : /* Wraparound emergency -- don't even start an index scan */
2008 0 : return false;
2009 : }
2010 :
2011 : /*
2012 : * Report that we are now vacuuming indexes and the number of indexes to
2013 : * vacuum.
2014 : */
2015 856 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2016 856 : progress_start_val[1] = vacrel->nindexes;
2017 856 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2018 :
2019 856 : if (!ParallelVacuumIsActive(vacrel))
2020 : {
2021 2508 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2022 : {
2023 1660 : Relation indrel = vacrel->indrels[idx];
2024 1660 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2025 :
2026 1660 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2027 : old_live_tuples,
2028 : vacrel);
2029 :
2030 : /* Report the number of indexes vacuumed */
2031 1660 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2032 1660 : idx + 1);
2033 :
2034 1660 : if (lazy_check_wraparound_failsafe(vacrel))
2035 : {
2036 : /* Wraparound emergency -- end current index scan */
2037 0 : allindexes = false;
2038 0 : break;
2039 : }
2040 : }
2041 : }
2042 : else
2043 : {
2044 : /* Outsource everything to parallel variant */
2045 8 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2046 : vacrel->num_index_scans);
2047 :
2048 : /*
2049 : * Do a postcheck to consider applying wraparound failsafe now. Note
2050 : * that parallel VACUUM only gets the precheck and this postcheck.
2051 : */
2052 8 : if (lazy_check_wraparound_failsafe(vacrel))
2053 0 : allindexes = false;
2054 : }
2055 :
2056 : /*
2057 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2058 : * each call here (except calls where we choose to do the failsafe). This
2059 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2060 : * of the failsafe triggering, which prevents the next call from taking
2061 : * place).
2062 : */
2063 : Assert(vacrel->num_index_scans > 0 ||
2064 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2065 : Assert(allindexes || VacuumFailsafeActive);
2066 :
2067 : /*
2068 : * Increase and report the number of index scans. Also, we reset
2069 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2070 : *
2071 : * We deliberately include the case where we started a round of bulk
2072 : * deletes that we weren't able to finish due to the failsafe triggering.
2073 : */
2074 856 : vacrel->num_index_scans++;
2075 856 : progress_end_val[0] = 0;
2076 856 : progress_end_val[1] = 0;
2077 856 : progress_end_val[2] = vacrel->num_index_scans;
2078 856 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2079 :
2080 856 : return allindexes;
2081 : }
2082 :
2083 : /*
2084 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2085 : *
2086 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2087 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2088 : *
2089 : * We may also be able to truncate the line pointer array of the heap pages we
2090 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2091 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2092 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2093 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2094 : * page's line pointer array).
2095 : *
2096 : * Note: the reason for doing this as a second pass is we cannot remove the
2097 : * tuples until we've removed their index entries, and we want to process
2098 : * index entry removal in batches as large as possible.
2099 : */
2100 : static void
2101 856 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2102 : {
2103 856 : BlockNumber vacuumed_pages = 0;
2104 856 : Buffer vmbuffer = InvalidBuffer;
2105 : LVSavedErrInfo saved_err_info;
2106 : TidStoreIter *iter;
2107 : TidStoreIterResult *iter_result;
2108 :
2109 : Assert(vacrel->do_index_vacuuming);
2110 : Assert(vacrel->do_index_cleanup);
2111 : Assert(vacrel->num_index_scans > 0);
2112 :
2113 : /* Report that we are now vacuuming the heap */
2114 856 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2115 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2116 :
2117 : /* Update error traceback information */
2118 856 : update_vacuum_error_info(vacrel, &saved_err_info,
2119 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2120 : InvalidBlockNumber, InvalidOffsetNumber);
2121 :
2122 856 : iter = TidStoreBeginIterate(vacrel->dead_items);
2123 21076 : while ((iter_result = TidStoreIterateNext(iter)) != NULL)
2124 : {
2125 : BlockNumber blkno;
2126 : Buffer buf;
2127 : Page page;
2128 : Size freespace;
2129 :
2130 20220 : vacuum_delay_point();
2131 :
2132 20220 : blkno = iter_result->blkno;
2133 20220 : vacrel->blkno = blkno;
2134 :
2135 : /*
2136 : * Pin the visibility map page in case we need to mark the page
2137 : * all-visible. In most cases this will be very cheap, because we'll
2138 : * already have the correct page pinned anyway.
2139 : */
2140 20220 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2141 :
2142 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2143 20220 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2144 : vacrel->bstrategy);
2145 20220 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2146 20220 : lazy_vacuum_heap_page(vacrel, blkno, buf, iter_result->offsets,
2147 : iter_result->num_offsets, vmbuffer);
2148 :
2149 : /* Now that we've vacuumed the page, record its available space */
2150 20220 : page = BufferGetPage(buf);
2151 20220 : freespace = PageGetHeapFreeSpace(page);
2152 :
2153 20220 : UnlockReleaseBuffer(buf);
2154 20220 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2155 20220 : vacuumed_pages++;
2156 : }
2157 856 : TidStoreEndIterate(iter);
2158 :
2159 856 : vacrel->blkno = InvalidBlockNumber;
2160 856 : if (BufferIsValid(vmbuffer))
2161 856 : ReleaseBuffer(vmbuffer);
2162 :
2163 : /*
2164 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2165 : * the second heap pass. No more, no less.
2166 : */
2167 : Assert(vacrel->num_index_scans > 1 ||
2168 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2169 : vacuumed_pages == vacrel->lpdead_item_pages));
2170 :
2171 856 : ereport(DEBUG2,
2172 : (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2173 : vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2174 : vacuumed_pages)));
2175 :
2176 : /* Revert to the previous phase information for error traceback */
2177 856 : restore_vacuum_error_info(vacrel, &saved_err_info);
2178 856 : }
2179 :
2180 : /*
2181 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2182 : * vacrel->dead_items store.
2183 : *
2184 : * Caller must have an exclusive buffer lock on the buffer (though a full
2185 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2186 : * a pin on blkno's visibility map page.
2187 : */
2188 : static void
2189 20220 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2190 : OffsetNumber *deadoffsets, int num_offsets,
2191 : Buffer vmbuffer)
2192 : {
2193 20220 : Page page = BufferGetPage(buffer);
2194 : OffsetNumber unused[MaxHeapTuplesPerPage];
2195 20220 : int nunused = 0;
2196 : TransactionId visibility_cutoff_xid;
2197 : bool all_frozen;
2198 : LVSavedErrInfo saved_err_info;
2199 :
2200 : Assert(vacrel->do_index_vacuuming);
2201 :
2202 20220 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2203 :
2204 : /* Update error traceback information */
2205 20220 : update_vacuum_error_info(vacrel, &saved_err_info,
2206 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2207 : InvalidOffsetNumber);
2208 :
2209 20220 : START_CRIT_SECTION();
2210 :
2211 1344222 : for (int i = 0; i < num_offsets; i++)
2212 : {
2213 : ItemId itemid;
2214 1324002 : OffsetNumber toff = deadoffsets[i];
2215 :
2216 1324002 : itemid = PageGetItemId(page, toff);
2217 :
2218 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2219 1324002 : ItemIdSetUnused(itemid);
2220 1324002 : unused[nunused++] = toff;
2221 : }
2222 :
2223 : Assert(nunused > 0);
2224 :
2225 : /* Attempt to truncate line pointer array now */
2226 20220 : PageTruncateLinePointerArray(page);
2227 :
2228 : /*
2229 : * Mark buffer dirty before we write WAL.
2230 : */
2231 20220 : MarkBufferDirty(buffer);
2232 :
2233 : /* XLOG stuff */
2234 20220 : if (RelationNeedsWAL(vacrel->rel))
2235 : {
2236 18736 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2237 : InvalidTransactionId,
2238 : false, /* no cleanup lock required */
2239 : PRUNE_VACUUM_CLEANUP,
2240 : NULL, 0, /* frozen */
2241 : NULL, 0, /* redirected */
2242 : NULL, 0, /* dead */
2243 : unused, nunused);
2244 : }
2245 :
2246 : /*
2247 : * End critical section, so we safely can do visibility tests (which
2248 : * possibly need to perform IO and allocate memory!). If we crash now the
2249 : * page (including the corresponding vm bit) might not be marked all
2250 : * visible, but that's fine. A later vacuum will fix that.
2251 : */
2252 20220 : END_CRIT_SECTION();
2253 :
2254 : /*
2255 : * Now that we have removed the LP_DEAD items from the page, once again
2256 : * check if the page has become all-visible. The page is already marked
2257 : * dirty, exclusively locked, and, if needed, a full page image has been
2258 : * emitted.
2259 : */
2260 : Assert(!PageIsAllVisible(page));
2261 20220 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2262 : &all_frozen))
2263 : {
2264 20164 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2265 :
2266 20164 : if (all_frozen)
2267 : {
2268 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2269 15152 : flags |= VISIBILITYMAP_ALL_FROZEN;
2270 : }
2271 :
2272 20164 : PageSetAllVisible(page);
2273 20164 : visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2274 : vmbuffer, visibility_cutoff_xid, flags);
2275 : }
2276 :
2277 : /* Revert to the previous phase information for error traceback */
2278 20220 : restore_vacuum_error_info(vacrel, &saved_err_info);
2279 20220 : }
2280 :
2281 : /*
2282 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2283 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2284 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2285 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2286 : *
2287 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2288 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2289 : * that it started out with.
2290 : *
2291 : * Returns true when failsafe has been triggered.
2292 : */
2293 : static bool
2294 21622 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2295 : {
2296 : /* Don't warn more than once per VACUUM */
2297 21622 : if (VacuumFailsafeActive)
2298 0 : return true;
2299 :
2300 21622 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2301 : {
2302 0 : const int progress_index[] = {
2303 : PROGRESS_VACUUM_INDEXES_TOTAL,
2304 : PROGRESS_VACUUM_INDEXES_PROCESSED
2305 : };
2306 0 : int64 progress_val[2] = {0, 0};
2307 :
2308 0 : VacuumFailsafeActive = true;
2309 :
2310 : /*
2311 : * Abandon use of a buffer access strategy to allow use of all of
2312 : * shared buffers. We assume the caller who allocated the memory for
2313 : * the BufferAccessStrategy will free it.
2314 : */
2315 0 : vacrel->bstrategy = NULL;
2316 :
2317 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2318 0 : vacrel->do_index_vacuuming = false;
2319 0 : vacrel->do_index_cleanup = false;
2320 0 : vacrel->do_rel_truncate = false;
2321 :
2322 : /* Reset the progress counters */
2323 0 : pgstat_progress_update_multi_param(2, progress_index, progress_val);
2324 :
2325 0 : ereport(WARNING,
2326 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2327 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2328 : vacrel->num_index_scans),
2329 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2330 : errhint("Consider increasing configuration parameter maintenance_work_mem or autovacuum_work_mem.\n"
2331 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2332 :
2333 : /* Stop applying cost limits from this point on */
2334 0 : VacuumCostActive = false;
2335 0 : VacuumCostBalance = 0;
2336 :
2337 0 : return true;
2338 : }
2339 :
2340 21622 : return false;
2341 : }
2342 :
2343 : /*
2344 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2345 : */
2346 : static void
2347 17194 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2348 : {
2349 17194 : double reltuples = vacrel->new_rel_tuples;
2350 17194 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2351 17194 : const int progress_start_index[] = {
2352 : PROGRESS_VACUUM_PHASE,
2353 : PROGRESS_VACUUM_INDEXES_TOTAL
2354 : };
2355 17194 : const int progress_end_index[] = {
2356 : PROGRESS_VACUUM_INDEXES_TOTAL,
2357 : PROGRESS_VACUUM_INDEXES_PROCESSED
2358 : };
2359 : int64 progress_start_val[2];
2360 17194 : int64 progress_end_val[2] = {0, 0};
2361 :
2362 : Assert(vacrel->do_index_cleanup);
2363 : Assert(vacrel->nindexes > 0);
2364 :
2365 : /*
2366 : * Report that we are now cleaning up indexes and the number of indexes to
2367 : * cleanup.
2368 : */
2369 17194 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2370 17194 : progress_start_val[1] = vacrel->nindexes;
2371 17194 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2372 :
2373 17194 : if (!ParallelVacuumIsActive(vacrel))
2374 : {
2375 43698 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2376 : {
2377 26522 : Relation indrel = vacrel->indrels[idx];
2378 26522 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2379 :
2380 53044 : vacrel->indstats[idx] =
2381 26522 : lazy_cleanup_one_index(indrel, istat, reltuples,
2382 : estimated_count, vacrel);
2383 :
2384 : /* Report the number of indexes cleaned up */
2385 26522 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2386 26522 : idx + 1);
2387 : }
2388 : }
2389 : else
2390 : {
2391 : /* Outsource everything to parallel variant */
2392 18 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2393 : vacrel->num_index_scans,
2394 : estimated_count);
2395 : }
2396 :
2397 : /* Reset the progress counters */
2398 17194 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2399 17194 : }
2400 :
2401 : /*
2402 : * lazy_vacuum_one_index() -- vacuum index relation.
2403 : *
2404 : * Delete all the index tuples containing a TID collected in
2405 : * vacrel->dead_items. Also update running statistics. Exact
2406 : * details depend on index AM's ambulkdelete routine.
2407 : *
2408 : * reltuples is the number of heap tuples to be passed to the
2409 : * bulkdelete callback. It's always assumed to be estimated.
2410 : * See indexam.sgml for more info.
2411 : *
2412 : * Returns bulk delete stats derived from input stats
2413 : */
2414 : static IndexBulkDeleteResult *
2415 1660 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2416 : double reltuples, LVRelState *vacrel)
2417 : {
2418 : IndexVacuumInfo ivinfo;
2419 : LVSavedErrInfo saved_err_info;
2420 :
2421 1660 : ivinfo.index = indrel;
2422 1660 : ivinfo.heaprel = vacrel->rel;
2423 1660 : ivinfo.analyze_only = false;
2424 1660 : ivinfo.report_progress = false;
2425 1660 : ivinfo.estimated_count = true;
2426 1660 : ivinfo.message_level = DEBUG2;
2427 1660 : ivinfo.num_heap_tuples = reltuples;
2428 1660 : ivinfo.strategy = vacrel->bstrategy;
2429 :
2430 : /*
2431 : * Update error traceback information.
2432 : *
2433 : * The index name is saved during this phase and restored immediately
2434 : * after this phase. See vacuum_error_callback.
2435 : */
2436 : Assert(vacrel->indname == NULL);
2437 1660 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2438 1660 : update_vacuum_error_info(vacrel, &saved_err_info,
2439 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
2440 : InvalidBlockNumber, InvalidOffsetNumber);
2441 :
2442 : /* Do bulk deletion */
2443 1660 : istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items,
2444 : vacrel->dead_items_info);
2445 :
2446 : /* Revert to the previous phase information for error traceback */
2447 1660 : restore_vacuum_error_info(vacrel, &saved_err_info);
2448 1660 : pfree(vacrel->indname);
2449 1660 : vacrel->indname = NULL;
2450 :
2451 1660 : return istat;
2452 : }
2453 :
2454 : /*
2455 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2456 : *
2457 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
2458 : * of heap tuples and estimated_count is true if reltuples is an
2459 : * estimated value. See indexam.sgml for more info.
2460 : *
2461 : * Returns bulk delete stats derived from input stats
2462 : */
2463 : static IndexBulkDeleteResult *
2464 26522 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2465 : double reltuples, bool estimated_count,
2466 : LVRelState *vacrel)
2467 : {
2468 : IndexVacuumInfo ivinfo;
2469 : LVSavedErrInfo saved_err_info;
2470 :
2471 26522 : ivinfo.index = indrel;
2472 26522 : ivinfo.heaprel = vacrel->rel;
2473 26522 : ivinfo.analyze_only = false;
2474 26522 : ivinfo.report_progress = false;
2475 26522 : ivinfo.estimated_count = estimated_count;
2476 26522 : ivinfo.message_level = DEBUG2;
2477 :
2478 26522 : ivinfo.num_heap_tuples = reltuples;
2479 26522 : ivinfo.strategy = vacrel->bstrategy;
2480 :
2481 : /*
2482 : * Update error traceback information.
2483 : *
2484 : * The index name is saved during this phase and restored immediately
2485 : * after this phase. See vacuum_error_callback.
2486 : */
2487 : Assert(vacrel->indname == NULL);
2488 26522 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2489 26522 : update_vacuum_error_info(vacrel, &saved_err_info,
2490 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
2491 : InvalidBlockNumber, InvalidOffsetNumber);
2492 :
2493 26522 : istat = vac_cleanup_one_index(&ivinfo, istat);
2494 :
2495 : /* Revert to the previous phase information for error traceback */
2496 26522 : restore_vacuum_error_info(vacrel, &saved_err_info);
2497 26522 : pfree(vacrel->indname);
2498 26522 : vacrel->indname = NULL;
2499 :
2500 26522 : return istat;
2501 : }
2502 :
2503 : /*
2504 : * should_attempt_truncation - should we attempt to truncate the heap?
2505 : *
2506 : * Don't even think about it unless we have a shot at releasing a goodly
2507 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
2508 : * an AccessExclusive lock must be replayed on any hot standby, where it can
2509 : * be particularly disruptive.
2510 : *
2511 : * Also don't attempt it if wraparound failsafe is in effect. The entire
2512 : * system might be refusing to allocate new XIDs at this point. The system
2513 : * definitely won't return to normal unless and until VACUUM actually advances
2514 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2515 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2516 : * truncate the table under these circumstances, an XID exhaustion error might
2517 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2518 : * There is very little chance of truncation working out when the failsafe is
2519 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
2520 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2521 : * we're called.
2522 : */
2523 : static bool
2524 19098 : should_attempt_truncation(LVRelState *vacrel)
2525 : {
2526 : BlockNumber possibly_freeable;
2527 :
2528 19098 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2529 240 : return false;
2530 :
2531 18858 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2532 18858 : if (possibly_freeable > 0 &&
2533 258 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2534 258 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2535 246 : return true;
2536 :
2537 18612 : return false;
2538 : }
2539 :
2540 : /*
2541 : * lazy_truncate_heap - try to truncate off any empty pages at the end
2542 : */
2543 : static void
2544 246 : lazy_truncate_heap(LVRelState *vacrel)
2545 : {
2546 246 : BlockNumber orig_rel_pages = vacrel->rel_pages;
2547 : BlockNumber new_rel_pages;
2548 : bool lock_waiter_detected;
2549 : int lock_retry;
2550 :
2551 : /* Report that we are now truncating */
2552 246 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2553 : PROGRESS_VACUUM_PHASE_TRUNCATE);
2554 :
2555 : /* Update error traceback information one last time */
2556 246 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
2557 : vacrel->nonempty_pages, InvalidOffsetNumber);
2558 :
2559 : /*
2560 : * Loop until no more truncating can be done.
2561 : */
2562 : do
2563 : {
2564 : /*
2565 : * We need full exclusive lock on the relation in order to do
2566 : * truncation. If we can't get it, give up rather than waiting --- we
2567 : * don't want to block other backends, and we don't want to deadlock
2568 : * (which is quite possible considering we already hold a lower-grade
2569 : * lock).
2570 : */
2571 246 : lock_waiter_detected = false;
2572 246 : lock_retry = 0;
2573 : while (true)
2574 : {
2575 646 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
2576 242 : break;
2577 :
2578 : /*
2579 : * Check for interrupts while trying to (re-)acquire the exclusive
2580 : * lock.
2581 : */
2582 404 : CHECK_FOR_INTERRUPTS();
2583 :
2584 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2585 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
2586 : {
2587 : /*
2588 : * We failed to establish the lock in the specified number of
2589 : * retries. This means we give up truncating.
2590 : */
2591 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
2592 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2593 : vacrel->relname)));
2594 6 : return;
2595 : }
2596 :
2597 400 : (void) WaitLatch(MyLatch,
2598 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
2599 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
2600 : WAIT_EVENT_VACUUM_TRUNCATE);
2601 400 : ResetLatch(MyLatch);
2602 : }
2603 :
2604 : /*
2605 : * Now that we have exclusive lock, look to see if the rel has grown
2606 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
2607 : * the newly added pages presumably contain non-deletable tuples.
2608 : */
2609 242 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2610 242 : if (new_rel_pages != orig_rel_pages)
2611 : {
2612 : /*
2613 : * Note: we intentionally don't update vacrel->rel_pages with the
2614 : * new rel size here. If we did, it would amount to assuming that
2615 : * the new pages are empty, which is unlikely. Leaving the numbers
2616 : * alone amounts to assuming that the new pages have the same
2617 : * tuple density as existing ones, which is less unlikely.
2618 : */
2619 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2620 0 : return;
2621 : }
2622 :
2623 : /*
2624 : * Scan backwards from the end to verify that the end pages actually
2625 : * contain no tuples. This is *necessary*, not optional, because
2626 : * other backends could have added tuples to these pages whilst we
2627 : * were vacuuming.
2628 : */
2629 242 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2630 242 : vacrel->blkno = new_rel_pages;
2631 :
2632 242 : if (new_rel_pages >= orig_rel_pages)
2633 : {
2634 : /* can't do anything after all */
2635 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2636 2 : return;
2637 : }
2638 :
2639 : /*
2640 : * Okay to truncate.
2641 : */
2642 240 : RelationTruncate(vacrel->rel, new_rel_pages);
2643 :
2644 : /*
2645 : * We can release the exclusive lock as soon as we have truncated.
2646 : * Other backends can't safely access the relation until they have
2647 : * processed the smgr invalidation that smgrtruncate sent out ... but
2648 : * that should happen as part of standard invalidation processing once
2649 : * they acquire lock on the relation.
2650 : */
2651 240 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2652 :
2653 : /*
2654 : * Update statistics. Here, it *is* correct to adjust rel_pages
2655 : * without also touching reltuples, since the tuple count wasn't
2656 : * changed by the truncation.
2657 : */
2658 240 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2659 240 : vacrel->rel_pages = new_rel_pages;
2660 :
2661 240 : ereport(vacrel->verbose ? INFO : DEBUG2,
2662 : (errmsg("table \"%s\": truncated %u to %u pages",
2663 : vacrel->relname,
2664 : orig_rel_pages, new_rel_pages)));
2665 240 : orig_rel_pages = new_rel_pages;
2666 240 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2667 : }
2668 :
2669 : /*
2670 : * Rescan end pages to verify that they are (still) empty of tuples.
2671 : *
2672 : * Returns number of nondeletable pages (last nonempty page + 1).
2673 : */
2674 : static BlockNumber
2675 242 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2676 : {
2677 : BlockNumber blkno;
2678 : BlockNumber prefetchedUntil;
2679 : instr_time starttime;
2680 :
2681 : /* Initialize the starttime if we check for conflicting lock requests */
2682 242 : INSTR_TIME_SET_CURRENT(starttime);
2683 :
2684 : /*
2685 : * Start checking blocks at what we believe relation end to be and move
2686 : * backwards. (Strange coding of loop control is needed because blkno is
2687 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2688 : * in forward direction, so that OS-level readahead can kick in.
2689 : */
2690 242 : blkno = vacrel->rel_pages;
2691 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
2692 : "prefetch size must be power of 2");
2693 242 : prefetchedUntil = InvalidBlockNumber;
2694 3946 : while (blkno > vacrel->nonempty_pages)
2695 : {
2696 : Buffer buf;
2697 : Page page;
2698 : OffsetNumber offnum,
2699 : maxoff;
2700 : bool hastup;
2701 :
2702 : /*
2703 : * Check if another process requests a lock on our relation. We are
2704 : * holding an AccessExclusiveLock here, so they will be waiting. We
2705 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2706 : * only check if that interval has elapsed once every 32 blocks to
2707 : * keep the number of system calls and actual shared lock table
2708 : * lookups to a minimum.
2709 : */
2710 3714 : if ((blkno % 32) == 0)
2711 : {
2712 : instr_time currenttime;
2713 : instr_time elapsed;
2714 :
2715 122 : INSTR_TIME_SET_CURRENT(currenttime);
2716 122 : elapsed = currenttime;
2717 122 : INSTR_TIME_SUBTRACT(elapsed, starttime);
2718 122 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2719 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
2720 : {
2721 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
2722 : {
2723 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
2724 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
2725 : vacrel->relname)));
2726 :
2727 0 : *lock_waiter_detected = true;
2728 0 : return blkno;
2729 : }
2730 0 : starttime = currenttime;
2731 : }
2732 : }
2733 :
2734 : /*
2735 : * We don't insert a vacuum delay point here, because we have an
2736 : * exclusive lock on the table which we want to hold for as short a
2737 : * time as possible. We still need to check for interrupts however.
2738 : */
2739 3714 : CHECK_FOR_INTERRUPTS();
2740 :
2741 3714 : blkno--;
2742 :
2743 : /* If we haven't prefetched this lot yet, do so now. */
2744 3714 : if (prefetchedUntil > blkno)
2745 : {
2746 : BlockNumber prefetchStart;
2747 : BlockNumber pblkno;
2748 :
2749 332 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2750 5544 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2751 : {
2752 5212 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2753 5212 : CHECK_FOR_INTERRUPTS();
2754 : }
2755 332 : prefetchedUntil = prefetchStart;
2756 : }
2757 :
2758 3714 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2759 : vacrel->bstrategy);
2760 :
2761 : /* In this phase we only need shared access to the buffer */
2762 3714 : LockBuffer(buf, BUFFER_LOCK_SHARE);
2763 :
2764 3714 : page = BufferGetPage(buf);
2765 :
2766 3714 : if (PageIsNew(page) || PageIsEmpty(page))
2767 : {
2768 1600 : UnlockReleaseBuffer(buf);
2769 1600 : continue;
2770 : }
2771 :
2772 2114 : hastup = false;
2773 2114 : maxoff = PageGetMaxOffsetNumber(page);
2774 4218 : for (offnum = FirstOffsetNumber;
2775 : offnum <= maxoff;
2776 2104 : offnum = OffsetNumberNext(offnum))
2777 : {
2778 : ItemId itemid;
2779 :
2780 2114 : itemid = PageGetItemId(page, offnum);
2781 :
2782 : /*
2783 : * Note: any non-unused item should be taken as a reason to keep
2784 : * this page. Even an LP_DEAD item makes truncation unsafe, since
2785 : * we must not have cleaned out its index entries.
2786 : */
2787 2114 : if (ItemIdIsUsed(itemid))
2788 : {
2789 10 : hastup = true;
2790 10 : break; /* can stop scanning */
2791 : }
2792 : } /* scan along page */
2793 :
2794 2114 : UnlockReleaseBuffer(buf);
2795 :
2796 : /* Done scanning if we found a tuple here */
2797 2114 : if (hastup)
2798 10 : return blkno + 1;
2799 : }
2800 :
2801 : /*
2802 : * If we fall out of the loop, all the previously-thought-to-be-empty
2803 : * pages still are; we need not bother to look at the last known-nonempty
2804 : * page.
2805 : */
2806 232 : return vacrel->nonempty_pages;
2807 : }
2808 :
2809 : /*
2810 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
2811 : * shared memory). Sets both in vacrel for caller.
2812 : *
2813 : * Also handles parallel initialization as part of allocating dead_items in
2814 : * DSM when required.
2815 : */
2816 : static void
2817 19098 : dead_items_alloc(LVRelState *vacrel, int nworkers)
2818 : {
2819 : VacDeadItemsInfo *dead_items_info;
2820 38244 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
2821 48 : autovacuum_work_mem != -1 ?
2822 19146 : autovacuum_work_mem : maintenance_work_mem;
2823 :
2824 : /*
2825 : * Initialize state for a parallel vacuum. As of now, only one worker can
2826 : * be used for an index, so we invoke parallelism only if there are at
2827 : * least two indexes on a table.
2828 : */
2829 19098 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
2830 : {
2831 : /*
2832 : * Since parallel workers cannot access data in temporary tables, we
2833 : * can't perform parallel vacuum on them.
2834 : */
2835 7340 : if (RelationUsesLocalBuffers(vacrel->rel))
2836 : {
2837 : /*
2838 : * Give warning only if the user explicitly tries to perform a
2839 : * parallel vacuum on the temporary table.
2840 : */
2841 6 : if (nworkers > 0)
2842 6 : ereport(WARNING,
2843 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
2844 : vacrel->relname)));
2845 : }
2846 : else
2847 7334 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
2848 : vacrel->nindexes, nworkers,
2849 : vac_work_mem,
2850 7334 : vacrel->verbose ? INFO : DEBUG2,
2851 : vacrel->bstrategy);
2852 :
2853 : /*
2854 : * If parallel mode started, dead_items and dead_items_info spaces are
2855 : * allocated in DSM.
2856 : */
2857 7340 : if (ParallelVacuumIsActive(vacrel))
2858 : {
2859 18 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
2860 : &vacrel->dead_items_info);
2861 18 : return;
2862 : }
2863 : }
2864 :
2865 : /*
2866 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
2867 : * locally.
2868 : */
2869 :
2870 19080 : dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
2871 19080 : dead_items_info->max_bytes = vac_work_mem * 1024L;
2872 19080 : dead_items_info->num_items = 0;
2873 19080 : vacrel->dead_items_info = dead_items_info;
2874 :
2875 19080 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
2876 : }
2877 :
2878 : /*
2879 : * Add the given block number and offset numbers to dead_items.
2880 : */
2881 : static void
2882 20320 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
2883 : int num_offsets)
2884 : {
2885 20320 : TidStore *dead_items = vacrel->dead_items;
2886 :
2887 20320 : TidStoreSetBlockOffsets(dead_items, blkno, offsets, num_offsets);
2888 20320 : vacrel->dead_items_info->num_items += num_offsets;
2889 :
2890 : /* update the memory usage report */
2891 20320 : pgstat_progress_update_param(PROGRESS_VACUUM_DEAD_TUPLE_BYTES,
2892 20320 : TidStoreMemoryUsage(dead_items));
2893 20320 : }
2894 :
2895 : /*
2896 : * Forget all collected dead items.
2897 : */
2898 : static void
2899 870 : dead_items_reset(LVRelState *vacrel)
2900 : {
2901 870 : TidStore *dead_items = vacrel->dead_items;
2902 :
2903 870 : if (ParallelVacuumIsActive(vacrel))
2904 : {
2905 8 : parallel_vacuum_reset_dead_items(vacrel->pvs);
2906 8 : return;
2907 : }
2908 :
2909 : /* Recreate the tidstore with the same max_bytes limitation */
2910 862 : TidStoreDestroy(dead_items);
2911 862 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
2912 :
2913 : /* Reset the counter */
2914 862 : vacrel->dead_items_info->num_items = 0;
2915 : }
2916 :
2917 : /*
2918 : * Perform cleanup for resources allocated in dead_items_alloc
2919 : */
2920 : static void
2921 19098 : dead_items_cleanup(LVRelState *vacrel)
2922 : {
2923 19098 : if (!ParallelVacuumIsActive(vacrel))
2924 : {
2925 : /* Don't bother with pfree here */
2926 19080 : return;
2927 : }
2928 :
2929 : /* End parallel mode */
2930 18 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
2931 18 : vacrel->pvs = NULL;
2932 : }
2933 :
2934 : /*
2935 : * Check if every tuple in the given page is visible to all current and future
2936 : * transactions. Also return the visibility_cutoff_xid which is the highest
2937 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2938 : * on this page is frozen.
2939 : *
2940 : * This is a stripped down version of lazy_scan_prune(). If you change
2941 : * anything here, make sure that everything stays in sync. Note that an
2942 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
2943 : * introducing new side-effects here.
2944 : */
2945 : static bool
2946 20220 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
2947 : TransactionId *visibility_cutoff_xid,
2948 : bool *all_frozen)
2949 : {
2950 20220 : Page page = BufferGetPage(buf);
2951 20220 : BlockNumber blockno = BufferGetBlockNumber(buf);
2952 : OffsetNumber offnum,
2953 : maxoff;
2954 20220 : bool all_visible = true;
2955 :
2956 20220 : *visibility_cutoff_xid = InvalidTransactionId;
2957 20220 : *all_frozen = true;
2958 :
2959 20220 : maxoff = PageGetMaxOffsetNumber(page);
2960 1062752 : for (offnum = FirstOffsetNumber;
2961 1042588 : offnum <= maxoff && all_visible;
2962 1042532 : offnum = OffsetNumberNext(offnum))
2963 : {
2964 : ItemId itemid;
2965 : HeapTupleData tuple;
2966 :
2967 : /*
2968 : * Set the offset number so that we can display it along with any
2969 : * error that occurred while processing this tuple.
2970 : */
2971 1042534 : vacrel->offnum = offnum;
2972 1042534 : itemid = PageGetItemId(page, offnum);
2973 :
2974 : /* Unused or redirect line pointers are of no interest */
2975 1042534 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2976 258718 : continue;
2977 :
2978 783816 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
2979 :
2980 : /*
2981 : * Dead line pointers can have index pointers pointing to them. So
2982 : * they can't be treated as visible
2983 : */
2984 783816 : if (ItemIdIsDead(itemid))
2985 : {
2986 2 : all_visible = false;
2987 2 : *all_frozen = false;
2988 2 : break;
2989 : }
2990 :
2991 : Assert(ItemIdIsNormal(itemid));
2992 :
2993 783814 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2994 783814 : tuple.t_len = ItemIdGetLength(itemid);
2995 783814 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2996 :
2997 783814 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
2998 : buf))
2999 : {
3000 783796 : case HEAPTUPLE_LIVE:
3001 : {
3002 : TransactionId xmin;
3003 :
3004 : /* Check comments in lazy_scan_prune. */
3005 783796 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3006 : {
3007 0 : all_visible = false;
3008 0 : *all_frozen = false;
3009 0 : break;
3010 : }
3011 :
3012 : /*
3013 : * The inserter definitely committed. But is it old enough
3014 : * that everyone sees it as committed?
3015 : */
3016 783796 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3017 783796 : if (!TransactionIdPrecedes(xmin,
3018 : vacrel->cutoffs.OldestXmin))
3019 : {
3020 36 : all_visible = false;
3021 36 : *all_frozen = false;
3022 36 : break;
3023 : }
3024 :
3025 : /* Track newest xmin on page. */
3026 783760 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3027 : TransactionIdIsNormal(xmin))
3028 18630 : *visibility_cutoff_xid = xmin;
3029 :
3030 : /* Check whether this tuple is already frozen or not */
3031 954394 : if (all_visible && *all_frozen &&
3032 170634 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3033 5022 : *all_frozen = false;
3034 : }
3035 783760 : break;
3036 :
3037 18 : case HEAPTUPLE_DEAD:
3038 : case HEAPTUPLE_RECENTLY_DEAD:
3039 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3040 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3041 : {
3042 18 : all_visible = false;
3043 18 : *all_frozen = false;
3044 18 : break;
3045 : }
3046 0 : default:
3047 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3048 : break;
3049 : }
3050 : } /* scan along page */
3051 :
3052 : /* Clear the offset information once we have processed the given page. */
3053 20220 : vacrel->offnum = InvalidOffsetNumber;
3054 :
3055 20220 : return all_visible;
3056 : }
3057 :
3058 : /*
3059 : * Update index statistics in pg_class if the statistics are accurate.
3060 : */
3061 : static void
3062 18834 : update_relstats_all_indexes(LVRelState *vacrel)
3063 : {
3064 18834 : Relation *indrels = vacrel->indrels;
3065 18834 : int nindexes = vacrel->nindexes;
3066 18834 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3067 :
3068 : Assert(vacrel->do_index_cleanup);
3069 :
3070 45446 : for (int idx = 0; idx < nindexes; idx++)
3071 : {
3072 26612 : Relation indrel = indrels[idx];
3073 26612 : IndexBulkDeleteResult *istat = indstats[idx];
3074 :
3075 26612 : if (istat == NULL || istat->estimated_count)
3076 24696 : continue;
3077 :
3078 : /* Update index statistics */
3079 1916 : vac_update_relstats(indrel,
3080 : istat->num_pages,
3081 : istat->num_index_tuples,
3082 : 0,
3083 : false,
3084 : InvalidTransactionId,
3085 : InvalidMultiXactId,
3086 : NULL, NULL, false);
3087 : }
3088 18834 : }
3089 :
3090 : /*
3091 : * Error context callback for errors occurring during vacuum. The error
3092 : * context messages for index phases should match the messages set in parallel
3093 : * vacuum. If you change this function for those phases, change
3094 : * parallel_vacuum_error_callback() as well.
3095 : */
3096 : static void
3097 34 : vacuum_error_callback(void *arg)
3098 : {
3099 34 : LVRelState *errinfo = arg;
3100 :
3101 34 : switch (errinfo->phase)
3102 : {
3103 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3104 0 : if (BlockNumberIsValid(errinfo->blkno))
3105 : {
3106 0 : if (OffsetNumberIsValid(errinfo->offnum))
3107 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3108 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3109 : else
3110 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3111 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3112 : }
3113 : else
3114 0 : errcontext("while scanning relation \"%s.%s\"",
3115 : errinfo->relnamespace, errinfo->relname);
3116 0 : break;
3117 :
3118 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3119 0 : if (BlockNumberIsValid(errinfo->blkno))
3120 : {
3121 0 : if (OffsetNumberIsValid(errinfo->offnum))
3122 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3123 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3124 : else
3125 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3126 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3127 : }
3128 : else
3129 0 : errcontext("while vacuuming relation \"%s.%s\"",
3130 : errinfo->relnamespace, errinfo->relname);
3131 0 : break;
3132 :
3133 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3134 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3135 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3136 0 : break;
3137 :
3138 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3139 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3140 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3141 0 : break;
3142 :
3143 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3144 6 : if (BlockNumberIsValid(errinfo->blkno))
3145 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3146 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3147 6 : break;
3148 :
3149 28 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3150 : default:
3151 28 : return; /* do nothing; the errinfo may not be
3152 : * initialized */
3153 : }
3154 : }
3155 :
3156 : /*
3157 : * Updates the information required for vacuum error callback. This also saves
3158 : * the current information which can be later restored via restore_vacuum_error_info.
3159 : */
3160 : static void
3161 141122 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3162 : int phase, BlockNumber blkno, OffsetNumber offnum)
3163 : {
3164 141122 : if (saved_vacrel)
3165 : {
3166 49258 : saved_vacrel->offnum = vacrel->offnum;
3167 49258 : saved_vacrel->blkno = vacrel->blkno;
3168 49258 : saved_vacrel->phase = vacrel->phase;
3169 : }
3170 :
3171 141122 : vacrel->blkno = blkno;
3172 141122 : vacrel->offnum = offnum;
3173 141122 : vacrel->phase = phase;
3174 141122 : }
3175 :
3176 : /*
3177 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3178 : */
3179 : static void
3180 49258 : restore_vacuum_error_info(LVRelState *vacrel,
3181 : const LVSavedErrInfo *saved_vacrel)
3182 : {
3183 49258 : vacrel->blkno = saved_vacrel->blkno;
3184 49258 : vacrel->offnum = saved_vacrel->offnum;
3185 49258 : vacrel->phase = saved_vacrel->phase;
3186 49258 : }
|