Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuumlazy.c
4 : * Concurrent ("lazy") vacuuming.
5 : *
6 : * The major space usage for vacuuming is storage for the dead tuple IDs that
7 : * are to be removed from indexes. We want to ensure we can vacuum even the
8 : * very largest relations with finite memory space usage. To do that, we set
9 : * upper bounds on the memory that can be used for keeping track of dead TIDs
10 : * at once.
11 : *
12 : * We are willing to use at most maintenance_work_mem (or perhaps
13 : * autovacuum_work_mem) memory space to keep track of dead TIDs. If the
14 : * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
15 : * the pages that we've pruned). This frees up the memory space dedicated to
16 : * to store dead TIDs.
17 : *
18 : * In practice VACUUM will often complete its initial pass over the target
19 : * heap relation without ever running out of space to store TIDs. This means
20 : * that there only needs to be one call to lazy_vacuum, after the initial pass
21 : * completes.
22 : *
23 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : *
27 : * IDENTIFICATION
28 : * src/backend/access/heap/vacuumlazy.c
29 : *
30 : *-------------------------------------------------------------------------
31 : */
32 : #include "postgres.h"
33 :
34 : #include <math.h>
35 :
36 : #include "access/genam.h"
37 : #include "access/heapam.h"
38 : #include "access/heapam_xlog.h"
39 : #include "access/htup_details.h"
40 : #include "access/multixact.h"
41 : #include "access/tidstore.h"
42 : #include "access/transam.h"
43 : #include "access/visibilitymap.h"
44 : #include "access/xloginsert.h"
45 : #include "catalog/storage.h"
46 : #include "commands/dbcommands.h"
47 : #include "commands/progress.h"
48 : #include "commands/vacuum.h"
49 : #include "common/int.h"
50 : #include "executor/instrument.h"
51 : #include "miscadmin.h"
52 : #include "pgstat.h"
53 : #include "portability/instr_time.h"
54 : #include "postmaster/autovacuum.h"
55 : #include "storage/bufmgr.h"
56 : #include "storage/freespace.h"
57 : #include "storage/lmgr.h"
58 : #include "utils/lsyscache.h"
59 : #include "utils/memutils.h"
60 : #include "utils/pg_rusage.h"
61 : #include "utils/timestamp.h"
62 :
63 :
64 : /*
65 : * Space/time tradeoff parameters: do these need to be user-tunable?
66 : *
67 : * To consider truncating the relation, we want there to be at least
68 : * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
69 : * is less) potentially-freeable pages.
70 : */
71 : #define REL_TRUNCATE_MINIMUM 1000
72 : #define REL_TRUNCATE_FRACTION 16
73 :
74 : /*
75 : * Timing parameters for truncate locking heuristics.
76 : *
77 : * These were not exposed as user tunable GUC values because it didn't seem
78 : * that the potential for improvement was great enough to merit the cost of
79 : * supporting them.
80 : */
81 : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */
82 : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */
83 : #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */
84 :
85 : /*
86 : * Threshold that controls whether we bypass index vacuuming and heap
87 : * vacuuming as an optimization
88 : */
89 : #define BYPASS_THRESHOLD_PAGES 0.02 /* i.e. 2% of rel_pages */
90 :
91 : /*
92 : * Perform a failsafe check each time we scan another 4GB of pages.
93 : * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
94 : */
95 : #define FAILSAFE_EVERY_PAGES \
96 : ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
97 :
98 : /*
99 : * When a table has no indexes, vacuum the FSM after every 8GB, approximately
100 : * (it won't be exact because we only vacuum FSM after processing a heap page
101 : * that has some removable tuples). When there are indexes, this is ignored,
102 : * and we vacuum FSM after each index/heap cleaning pass.
103 : */
104 : #define VACUUM_FSM_EVERY_PAGES \
105 : ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
106 :
107 : /*
108 : * Before we consider skipping a page that's marked as clean in
109 : * visibility map, we must've seen at least this many clean pages.
110 : */
111 : #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
112 :
113 : /*
114 : * Size of the prefetch window for lazy vacuum backwards truncation scan.
115 : * Needs to be a power of 2.
116 : */
117 : #define PREFETCH_SIZE ((BlockNumber) 32)
118 :
119 : /*
120 : * Macro to check if we are in a parallel vacuum. If true, we are in the
121 : * parallel mode and the DSM segment is initialized.
122 : */
123 : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
124 :
125 : /* Phases of vacuum during which we report error context. */
126 : typedef enum
127 : {
128 : VACUUM_ERRCB_PHASE_UNKNOWN,
129 : VACUUM_ERRCB_PHASE_SCAN_HEAP,
130 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
131 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
132 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
133 : VACUUM_ERRCB_PHASE_TRUNCATE,
134 : } VacErrPhase;
135 :
136 : typedef struct LVRelState
137 : {
138 : /* Target heap relation and its indexes */
139 : Relation rel;
140 : Relation *indrels;
141 : int nindexes;
142 :
143 : /* Buffer access strategy and parallel vacuum state */
144 : BufferAccessStrategy bstrategy;
145 : ParallelVacuumState *pvs;
146 :
147 : /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
148 : bool aggressive;
149 : /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
150 : bool skipwithvm;
151 : /* Consider index vacuuming bypass optimization? */
152 : bool consider_bypass_optimization;
153 :
154 : /* Doing index vacuuming, index cleanup, rel truncation? */
155 : bool do_index_vacuuming;
156 : bool do_index_cleanup;
157 : bool do_rel_truncate;
158 :
159 : /* VACUUM operation's cutoffs for freezing and pruning */
160 : struct VacuumCutoffs cutoffs;
161 : GlobalVisState *vistest;
162 : /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
163 : TransactionId NewRelfrozenXid;
164 : MultiXactId NewRelminMxid;
165 : bool skippedallvis;
166 :
167 : /* Error reporting state */
168 : char *dbname;
169 : char *relnamespace;
170 : char *relname;
171 : char *indname; /* Current index name */
172 : BlockNumber blkno; /* used only for heap operations */
173 : OffsetNumber offnum; /* used only for heap operations */
174 : VacErrPhase phase;
175 : bool verbose; /* VACUUM VERBOSE? */
176 :
177 : /*
178 : * dead_items stores TIDs whose index tuples are deleted by index
179 : * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
180 : * that has been processed by lazy_scan_prune. Also needed by
181 : * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
182 : * LP_UNUSED during second heap pass.
183 : *
184 : * Both dead_items and dead_items_info are allocated in shared memory in
185 : * parallel vacuum cases.
186 : */
187 : TidStore *dead_items; /* TIDs whose index tuples we'll delete */
188 : VacDeadItemsInfo *dead_items_info;
189 :
190 : BlockNumber rel_pages; /* total number of pages */
191 : BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
192 : BlockNumber removed_pages; /* # pages removed by relation truncation */
193 : BlockNumber frozen_pages; /* # pages with newly frozen tuples */
194 : BlockNumber lpdead_item_pages; /* # pages with LP_DEAD items */
195 : BlockNumber missed_dead_pages; /* # pages with missed dead tuples */
196 : BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
197 :
198 : /* Statistics output by us, for table */
199 : double new_rel_tuples; /* new estimated total # of tuples */
200 : double new_live_tuples; /* new estimated total # of live tuples */
201 : /* Statistics output by index AMs */
202 : IndexBulkDeleteResult **indstats;
203 :
204 : /* Instrumentation counters */
205 : int num_index_scans;
206 : /* Counters that follow are only for scanned_pages */
207 : int64 tuples_deleted; /* # deleted from table */
208 : int64 tuples_frozen; /* # newly frozen */
209 : int64 lpdead_items; /* # deleted from indexes */
210 : int64 live_tuples; /* # live tuples remaining */
211 : int64 recently_dead_tuples; /* # dead, but not yet removable */
212 : int64 missed_dead_tuples; /* # removable, but not removed */
213 :
214 : /* State maintained by heap_vac_scan_next_block() */
215 : BlockNumber current_block; /* last block returned */
216 : BlockNumber next_unskippable_block; /* next unskippable block */
217 : bool next_unskippable_allvis; /* its visibility status */
218 : Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
219 : } LVRelState;
220 :
221 : /* Struct for saving and restoring vacuum error information. */
222 : typedef struct LVSavedErrInfo
223 : {
224 : BlockNumber blkno;
225 : OffsetNumber offnum;
226 : VacErrPhase phase;
227 : } LVSavedErrInfo;
228 :
229 :
230 : /* non-export function prototypes */
231 : static void lazy_scan_heap(LVRelState *vacrel);
232 : static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
233 : bool *all_visible_according_to_vm);
234 : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
235 : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
236 : BlockNumber blkno, Page page,
237 : bool sharelock, Buffer vmbuffer);
238 : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
239 : BlockNumber blkno, Page page,
240 : Buffer vmbuffer, bool all_visible_according_to_vm,
241 : bool *has_lpdead_items);
242 : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
243 : BlockNumber blkno, Page page,
244 : bool *has_lpdead_items);
245 : static void lazy_vacuum(LVRelState *vacrel);
246 : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
247 : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
248 : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
249 : Buffer buffer, OffsetNumber *offsets,
250 : int num_offsets, Buffer vmbuffer);
251 : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
252 : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
253 : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
254 : IndexBulkDeleteResult *istat,
255 : double reltuples,
256 : LVRelState *vacrel);
257 : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
258 : IndexBulkDeleteResult *istat,
259 : double reltuples,
260 : bool estimated_count,
261 : LVRelState *vacrel);
262 : static bool should_attempt_truncation(LVRelState *vacrel);
263 : static void lazy_truncate_heap(LVRelState *vacrel);
264 : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
265 : bool *lock_waiter_detected);
266 : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
267 : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
268 : int num_offsets);
269 : static void dead_items_reset(LVRelState *vacrel);
270 : static void dead_items_cleanup(LVRelState *vacrel);
271 : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
272 : TransactionId *visibility_cutoff_xid, bool *all_frozen);
273 : static void update_relstats_all_indexes(LVRelState *vacrel);
274 : static void vacuum_error_callback(void *arg);
275 : static void update_vacuum_error_info(LVRelState *vacrel,
276 : LVSavedErrInfo *saved_vacrel,
277 : int phase, BlockNumber blkno,
278 : OffsetNumber offnum);
279 : static void restore_vacuum_error_info(LVRelState *vacrel,
280 : const LVSavedErrInfo *saved_vacrel);
281 :
282 :
283 : /*
284 : * heap_vacuum_rel() -- perform VACUUM for one heap relation
285 : *
286 : * This routine sets things up for and then calls lazy_scan_heap, where
287 : * almost all work actually takes place. Finalizes everything after call
288 : * returns by managing relation truncation and updating rel's pg_class
289 : * entry. (Also updates pg_class entries for any indexes that need it.)
290 : *
291 : * At entry, we have already established a transaction and opened
292 : * and locked the relation.
293 : */
294 : void
295 19096 : heap_vacuum_rel(Relation rel, VacuumParams *params,
296 : BufferAccessStrategy bstrategy)
297 : {
298 : LVRelState *vacrel;
299 : bool verbose,
300 : instrument,
301 : skipwithvm,
302 : frozenxid_updated,
303 : minmulti_updated;
304 : BlockNumber orig_rel_pages,
305 : new_rel_pages,
306 : new_rel_allvisible;
307 : PGRUsage ru0;
308 19096 : TimestampTz starttime = 0;
309 19096 : PgStat_Counter startreadtime = 0,
310 19096 : startwritetime = 0;
311 19096 : WalUsage startwalusage = pgWalUsage;
312 19096 : int64 StartPageHit = VacuumPageHit,
313 19096 : StartPageMiss = VacuumPageMiss,
314 19096 : StartPageDirty = VacuumPageDirty;
315 : ErrorContextCallback errcallback;
316 19096 : char **indnames = NULL;
317 :
318 19096 : verbose = (params->options & VACOPT_VERBOSE) != 0;
319 19140 : instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
320 44 : params->log_min_duration >= 0));
321 19096 : if (instrument)
322 : {
323 66 : pg_rusage_init(&ru0);
324 66 : starttime = GetCurrentTimestamp();
325 66 : if (track_io_timing)
326 : {
327 0 : startreadtime = pgStatBlockReadTime;
328 0 : startwritetime = pgStatBlockWriteTime;
329 : }
330 : }
331 :
332 19096 : pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
333 : RelationGetRelid(rel));
334 :
335 : /*
336 : * Setup error traceback support for ereport() first. The idea is to set
337 : * up an error context callback to display additional information on any
338 : * error during a vacuum. During different phases of vacuum, we update
339 : * the state so that the error context callback always display current
340 : * information.
341 : *
342 : * Copy the names of heap rel into local memory for error reporting
343 : * purposes, too. It isn't always safe to assume that we can get the name
344 : * of each rel. It's convenient for code in lazy_scan_heap to always use
345 : * these temp copies.
346 : */
347 19096 : vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
348 19096 : vacrel->dbname = get_database_name(MyDatabaseId);
349 19096 : vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
350 19096 : vacrel->relname = pstrdup(RelationGetRelationName(rel));
351 19096 : vacrel->indname = NULL;
352 19096 : vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
353 19096 : vacrel->verbose = verbose;
354 19096 : errcallback.callback = vacuum_error_callback;
355 19096 : errcallback.arg = vacrel;
356 19096 : errcallback.previous = error_context_stack;
357 19096 : error_context_stack = &errcallback;
358 :
359 : /* Set up high level stuff about rel and its indexes */
360 19096 : vacrel->rel = rel;
361 19096 : vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
362 : &vacrel->indrels);
363 19096 : vacrel->bstrategy = bstrategy;
364 19096 : if (instrument && vacrel->nindexes > 0)
365 : {
366 : /* Copy index names used by instrumentation (not error reporting) */
367 54 : indnames = palloc(sizeof(char *) * vacrel->nindexes);
368 160 : for (int i = 0; i < vacrel->nindexes; i++)
369 106 : indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
370 : }
371 :
372 : /*
373 : * The index_cleanup param either disables index vacuuming and cleanup or
374 : * forces it to go ahead when we would otherwise apply the index bypass
375 : * optimization. The default is 'auto', which leaves the final decision
376 : * up to lazy_vacuum().
377 : *
378 : * The truncate param allows user to avoid attempting relation truncation,
379 : * though it can't force truncation to happen.
380 : */
381 : Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
382 : Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
383 : params->truncate != VACOPTVALUE_AUTO);
384 :
385 : /*
386 : * While VacuumFailSafeActive is reset to false before calling this, we
387 : * still need to reset it here due to recursive calls.
388 : */
389 19096 : VacuumFailsafeActive = false;
390 19096 : vacrel->consider_bypass_optimization = true;
391 19096 : vacrel->do_index_vacuuming = true;
392 19096 : vacrel->do_index_cleanup = true;
393 19096 : vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
394 19096 : if (params->index_cleanup == VACOPTVALUE_DISABLED)
395 : {
396 : /* Force disable index vacuuming up-front */
397 264 : vacrel->do_index_vacuuming = false;
398 264 : vacrel->do_index_cleanup = false;
399 : }
400 18832 : else if (params->index_cleanup == VACOPTVALUE_ENABLED)
401 : {
402 : /* Force index vacuuming. Note that failsafe can still bypass. */
403 32 : vacrel->consider_bypass_optimization = false;
404 : }
405 : else
406 : {
407 : /* Default/auto, make all decisions dynamically */
408 : Assert(params->index_cleanup == VACOPTVALUE_AUTO);
409 : }
410 :
411 : /* Initialize page counters explicitly (be tidy) */
412 19096 : vacrel->scanned_pages = 0;
413 19096 : vacrel->removed_pages = 0;
414 19096 : vacrel->frozen_pages = 0;
415 19096 : vacrel->lpdead_item_pages = 0;
416 19096 : vacrel->missed_dead_pages = 0;
417 19096 : vacrel->nonempty_pages = 0;
418 : /* dead_items_alloc allocates vacrel->dead_items later on */
419 :
420 : /* Allocate/initialize output statistics state */
421 19096 : vacrel->new_rel_tuples = 0;
422 19096 : vacrel->new_live_tuples = 0;
423 19096 : vacrel->indstats = (IndexBulkDeleteResult **)
424 19096 : palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
425 :
426 : /* Initialize remaining counters (be tidy) */
427 19096 : vacrel->num_index_scans = 0;
428 19096 : vacrel->tuples_deleted = 0;
429 19096 : vacrel->tuples_frozen = 0;
430 19096 : vacrel->lpdead_items = 0;
431 19096 : vacrel->live_tuples = 0;
432 19096 : vacrel->recently_dead_tuples = 0;
433 19096 : vacrel->missed_dead_tuples = 0;
434 :
435 : /*
436 : * Get cutoffs that determine which deleted tuples are considered DEAD,
437 : * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
438 : * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
439 : * happen in this order to ensure that the OldestXmin cutoff field works
440 : * as an upper bound on the XIDs stored in the pages we'll actually scan
441 : * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
442 : *
443 : * Next acquire vistest, a related cutoff that's used in pruning. We
444 : * expect vistest will always make heap_page_prune_and_freeze() remove any
445 : * deleted tuple whose xmax is < OldestXmin. lazy_scan_prune must never
446 : * become confused about whether a tuple should be frozen or removed. (In
447 : * the future we might want to teach lazy_scan_prune to recompute vistest
448 : * from time to time, to increase the number of dead tuples it can prune
449 : * away.)
450 : */
451 19096 : vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
452 19096 : vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
453 19096 : vacrel->vistest = GlobalVisTestFor(rel);
454 : /* Initialize state used to track oldest extant XID/MXID */
455 19096 : vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
456 19096 : vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
457 19096 : vacrel->skippedallvis = false;
458 19096 : skipwithvm = true;
459 19096 : if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
460 : {
461 : /*
462 : * Force aggressive mode, and disable skipping blocks using the
463 : * visibility map (even those set all-frozen)
464 : */
465 294 : vacrel->aggressive = true;
466 294 : skipwithvm = false;
467 : }
468 :
469 19096 : vacrel->skipwithvm = skipwithvm;
470 :
471 19096 : if (verbose)
472 : {
473 22 : if (vacrel->aggressive)
474 0 : ereport(INFO,
475 : (errmsg("aggressively vacuuming \"%s.%s.%s\"",
476 : vacrel->dbname, vacrel->relnamespace,
477 : vacrel->relname)));
478 : else
479 22 : ereport(INFO,
480 : (errmsg("vacuuming \"%s.%s.%s\"",
481 : vacrel->dbname, vacrel->relnamespace,
482 : vacrel->relname)));
483 : }
484 :
485 : /*
486 : * Allocate dead_items memory using dead_items_alloc. This handles
487 : * parallel VACUUM initialization as part of allocating shared memory
488 : * space used for dead_items. (But do a failsafe precheck first, to
489 : * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
490 : * is already dangerously old.)
491 : */
492 19096 : lazy_check_wraparound_failsafe(vacrel);
493 19096 : dead_items_alloc(vacrel, params->nworkers);
494 :
495 : /*
496 : * Call lazy_scan_heap to perform all required heap pruning, index
497 : * vacuuming, and heap vacuuming (plus related processing)
498 : */
499 19096 : lazy_scan_heap(vacrel);
500 :
501 : /*
502 : * Free resources managed by dead_items_alloc. This ends parallel mode in
503 : * passing when necessary.
504 : */
505 19096 : dead_items_cleanup(vacrel);
506 : Assert(!IsInParallelMode());
507 :
508 : /*
509 : * Update pg_class entries for each of rel's indexes where appropriate.
510 : *
511 : * Unlike the later update to rel's pg_class entry, this is not critical.
512 : * Maintains relpages/reltuples statistics used by the planner only.
513 : */
514 19096 : if (vacrel->do_index_cleanup)
515 18832 : update_relstats_all_indexes(vacrel);
516 :
517 : /* Done with rel's indexes */
518 19096 : vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
519 :
520 : /* Optionally truncate rel */
521 19096 : if (should_attempt_truncation(vacrel))
522 246 : lazy_truncate_heap(vacrel);
523 :
524 : /* Pop the error context stack */
525 19096 : error_context_stack = errcallback.previous;
526 :
527 : /* Report that we are now doing final cleanup */
528 19096 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
529 : PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
530 :
531 : /*
532 : * Prepare to update rel's pg_class entry.
533 : *
534 : * Aggressive VACUUMs must always be able to advance relfrozenxid to a
535 : * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
536 : * Non-aggressive VACUUMs may advance them by any amount, or not at all.
537 : */
538 : Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
539 : TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
540 : vacrel->cutoffs.relfrozenxid,
541 : vacrel->NewRelfrozenXid));
542 : Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
543 : MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
544 : vacrel->cutoffs.relminmxid,
545 : vacrel->NewRelminMxid));
546 19096 : if (vacrel->skippedallvis)
547 : {
548 : /*
549 : * Must keep original relfrozenxid in a non-aggressive VACUUM that
550 : * chose to skip an all-visible page range. The state that tracks new
551 : * values will have missed unfrozen XIDs from the pages we skipped.
552 : */
553 : Assert(!vacrel->aggressive);
554 52 : vacrel->NewRelfrozenXid = InvalidTransactionId;
555 52 : vacrel->NewRelminMxid = InvalidMultiXactId;
556 : }
557 :
558 : /*
559 : * For safety, clamp relallvisible to be not more than what we're setting
560 : * pg_class.relpages to
561 : */
562 19096 : new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
563 19096 : visibilitymap_count(rel, &new_rel_allvisible, NULL);
564 19096 : if (new_rel_allvisible > new_rel_pages)
565 0 : new_rel_allvisible = new_rel_pages;
566 :
567 : /*
568 : * Now actually update rel's pg_class entry.
569 : *
570 : * In principle new_live_tuples could be -1 indicating that we (still)
571 : * don't know the tuple count. In practice that can't happen, since we
572 : * scan every page that isn't skipped using the visibility map.
573 : */
574 19096 : vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
575 19096 : new_rel_allvisible, vacrel->nindexes > 0,
576 : vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
577 : &frozenxid_updated, &minmulti_updated, false);
578 :
579 : /*
580 : * Report results to the cumulative stats system, too.
581 : *
582 : * Deliberately avoid telling the stats system about LP_DEAD items that
583 : * remain in the table due to VACUUM bypassing index and heap vacuuming.
584 : * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
585 : * It seems like a good idea to err on the side of not vacuuming again too
586 : * soon in cases where the failsafe prevented significant amounts of heap
587 : * vacuuming.
588 : */
589 11130 : pgstat_report_vacuum(RelationGetRelid(rel),
590 19096 : rel->rd_rel->relisshared,
591 7966 : Max(vacrel->new_live_tuples, 0),
592 19096 : vacrel->recently_dead_tuples +
593 19096 : vacrel->missed_dead_tuples);
594 19096 : pgstat_progress_end_command();
595 :
596 19096 : if (instrument)
597 : {
598 66 : TimestampTz endtime = GetCurrentTimestamp();
599 :
600 82 : if (verbose || params->log_min_duration == 0 ||
601 16 : TimestampDifferenceExceeds(starttime, endtime,
602 : params->log_min_duration))
603 : {
604 : long secs_dur;
605 : int usecs_dur;
606 : WalUsage walusage;
607 : StringInfoData buf;
608 : char *msgfmt;
609 : int32 diff;
610 50 : int64 PageHitOp = VacuumPageHit - StartPageHit,
611 50 : PageMissOp = VacuumPageMiss - StartPageMiss,
612 50 : PageDirtyOp = VacuumPageDirty - StartPageDirty;
613 50 : double read_rate = 0,
614 50 : write_rate = 0;
615 :
616 50 : TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
617 50 : memset(&walusage, 0, sizeof(WalUsage));
618 50 : WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
619 :
620 50 : initStringInfo(&buf);
621 50 : if (verbose)
622 : {
623 : /*
624 : * Aggressiveness already reported earlier, in dedicated
625 : * VACUUM VERBOSE ereport
626 : */
627 : Assert(!params->is_wraparound);
628 22 : msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
629 : }
630 28 : else if (params->is_wraparound)
631 : {
632 : /*
633 : * While it's possible for a VACUUM to be both is_wraparound
634 : * and !aggressive, that's just a corner-case -- is_wraparound
635 : * implies aggressive. Produce distinct output for the corner
636 : * case all the same, just in case.
637 : */
638 0 : if (vacrel->aggressive)
639 0 : msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
640 : else
641 0 : msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
642 : }
643 : else
644 : {
645 28 : if (vacrel->aggressive)
646 6 : msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
647 : else
648 22 : msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
649 : }
650 50 : appendStringInfo(&buf, msgfmt,
651 : vacrel->dbname,
652 : vacrel->relnamespace,
653 : vacrel->relname,
654 : vacrel->num_index_scans);
655 94 : appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
656 : vacrel->removed_pages,
657 : new_rel_pages,
658 : vacrel->scanned_pages,
659 : orig_rel_pages == 0 ? 100.0 :
660 44 : 100.0 * vacrel->scanned_pages / orig_rel_pages);
661 50 : appendStringInfo(&buf,
662 50 : _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
663 50 : (long long) vacrel->tuples_deleted,
664 50 : (long long) vacrel->new_rel_tuples,
665 50 : (long long) vacrel->recently_dead_tuples);
666 50 : if (vacrel->missed_dead_tuples > 0)
667 0 : appendStringInfo(&buf,
668 0 : _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
669 0 : (long long) vacrel->missed_dead_tuples,
670 : vacrel->missed_dead_pages);
671 50 : diff = (int32) (ReadNextTransactionId() -
672 50 : vacrel->cutoffs.OldestXmin);
673 50 : appendStringInfo(&buf,
674 50 : _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
675 : vacrel->cutoffs.OldestXmin, diff);
676 50 : if (frozenxid_updated)
677 : {
678 30 : diff = (int32) (vacrel->NewRelfrozenXid -
679 30 : vacrel->cutoffs.relfrozenxid);
680 30 : appendStringInfo(&buf,
681 30 : _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
682 : vacrel->NewRelfrozenXid, diff);
683 : }
684 50 : if (minmulti_updated)
685 : {
686 20 : diff = (int32) (vacrel->NewRelminMxid -
687 20 : vacrel->cutoffs.relminmxid);
688 20 : appendStringInfo(&buf,
689 20 : _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
690 : vacrel->NewRelminMxid, diff);
691 : }
692 50 : appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
693 : vacrel->frozen_pages,
694 : orig_rel_pages == 0 ? 100.0 :
695 44 : 100.0 * vacrel->frozen_pages / orig_rel_pages,
696 50 : (long long) vacrel->tuples_frozen);
697 50 : if (vacrel->do_index_vacuuming)
698 : {
699 50 : if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
700 20 : appendStringInfoString(&buf, _("index scan not needed: "));
701 : else
702 30 : appendStringInfoString(&buf, _("index scan needed: "));
703 :
704 50 : msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
705 : }
706 : else
707 : {
708 0 : if (!VacuumFailsafeActive)
709 0 : appendStringInfoString(&buf, _("index scan bypassed: "));
710 : else
711 0 : appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
712 :
713 0 : msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
714 : }
715 50 : appendStringInfo(&buf, msgfmt,
716 : vacrel->lpdead_item_pages,
717 : orig_rel_pages == 0 ? 100.0 :
718 44 : 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
719 50 : (long long) vacrel->lpdead_items);
720 124 : for (int i = 0; i < vacrel->nindexes; i++)
721 : {
722 74 : IndexBulkDeleteResult *istat = vacrel->indstats[i];
723 :
724 74 : if (!istat)
725 12 : continue;
726 :
727 62 : appendStringInfo(&buf,
728 62 : _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
729 62 : indnames[i],
730 : istat->num_pages,
731 : istat->pages_newly_deleted,
732 : istat->pages_deleted,
733 : istat->pages_free);
734 : }
735 50 : if (track_io_timing)
736 : {
737 0 : double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
738 0 : double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
739 :
740 0 : appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
741 : read_ms, write_ms);
742 : }
743 50 : if (secs_dur > 0 || usecs_dur > 0)
744 : {
745 50 : read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
746 50 : (secs_dur + usecs_dur / 1000000.0);
747 50 : write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
748 50 : (secs_dur + usecs_dur / 1000000.0);
749 : }
750 50 : appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
751 : read_rate, write_rate);
752 50 : appendStringInfo(&buf,
753 50 : _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
754 : (long long) PageHitOp,
755 : (long long) PageMissOp,
756 : (long long) PageDirtyOp);
757 50 : appendStringInfo(&buf,
758 50 : _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
759 50 : (long long) walusage.wal_records,
760 50 : (long long) walusage.wal_fpi,
761 50 : (unsigned long long) walusage.wal_bytes);
762 50 : appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
763 :
764 50 : ereport(verbose ? INFO : LOG,
765 : (errmsg_internal("%s", buf.data)));
766 50 : pfree(buf.data);
767 : }
768 : }
769 :
770 : /* Cleanup index statistics and index names */
771 46138 : for (int i = 0; i < vacrel->nindexes; i++)
772 : {
773 27042 : if (vacrel->indstats[i])
774 1914 : pfree(vacrel->indstats[i]);
775 :
776 27042 : if (instrument)
777 106 : pfree(indnames[i]);
778 : }
779 19096 : }
780 :
781 : /*
782 : * lazy_scan_heap() -- workhorse function for VACUUM
783 : *
784 : * This routine prunes each page in the heap, and considers the need to
785 : * freeze remaining tuples with storage (not including pages that can be
786 : * skipped using the visibility map). Also performs related maintenance
787 : * of the FSM and visibility map. These steps all take place during an
788 : * initial pass over the target heap relation.
789 : *
790 : * Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
791 : * consists of deleting index tuples that point to LP_DEAD items left in
792 : * heap pages following pruning. Earlier initial pass over the heap will
793 : * have collected the TIDs whose index tuples need to be removed.
794 : *
795 : * Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
796 : * largely consists of marking LP_DEAD items (from vacrel->dead_items)
797 : * as LP_UNUSED. This has to happen in a second, final pass over the
798 : * heap, to preserve a basic invariant that all index AMs rely on: no
799 : * extant index tuple can ever be allowed to contain a TID that points to
800 : * an LP_UNUSED line pointer in the heap. We must disallow premature
801 : * recycling of line pointers to avoid index scans that get confused
802 : * about which TID points to which tuple immediately after recycling.
803 : * (Actually, this isn't a concern when target heap relation happens to
804 : * have no indexes, which allows us to safely apply the one-pass strategy
805 : * as an optimization).
806 : *
807 : * In practice we often have enough space to fit all TIDs, and so won't
808 : * need to call lazy_vacuum more than once, after our initial pass over
809 : * the heap has totally finished. Otherwise things are slightly more
810 : * complicated: our "initial pass" over the heap applies only to those
811 : * pages that were pruned before we needed to call lazy_vacuum, and our
812 : * "final pass" over the heap only vacuums these same heap pages.
813 : * However, we process indexes in full every time lazy_vacuum is called,
814 : * which makes index processing very inefficient when memory is in short
815 : * supply.
816 : */
817 : static void
818 19096 : lazy_scan_heap(LVRelState *vacrel)
819 : {
820 19096 : BlockNumber rel_pages = vacrel->rel_pages,
821 : blkno,
822 19096 : next_fsm_block_to_vacuum = 0;
823 : bool all_visible_according_to_vm;
824 :
825 19096 : TidStore *dead_items = vacrel->dead_items;
826 19096 : VacDeadItemsInfo *dead_items_info = vacrel->dead_items_info;
827 19096 : Buffer vmbuffer = InvalidBuffer;
828 19096 : const int initprog_index[] = {
829 : PROGRESS_VACUUM_PHASE,
830 : PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
831 : PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
832 : };
833 : int64 initprog_val[3];
834 :
835 : /* Report that we're scanning the heap, advertising total # of blocks */
836 19096 : initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
837 19096 : initprog_val[1] = rel_pages;
838 19096 : initprog_val[2] = dead_items_info->max_bytes;
839 19096 : pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
840 :
841 : /* Initialize for the first heap_vac_scan_next_block() call */
842 19096 : vacrel->current_block = InvalidBlockNumber;
843 19096 : vacrel->next_unskippable_block = InvalidBlockNumber;
844 19096 : vacrel->next_unskippable_allvis = false;
845 19096 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
846 :
847 110106 : while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
848 : {
849 : Buffer buf;
850 : Page page;
851 : bool has_lpdead_items;
852 91010 : bool got_cleanup_lock = false;
853 :
854 91010 : vacrel->scanned_pages++;
855 :
856 : /* Report as block scanned, update error traceback information */
857 91010 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
858 91010 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
859 : blkno, InvalidOffsetNumber);
860 :
861 91010 : vacuum_delay_point();
862 :
863 : /*
864 : * Regularly check if wraparound failsafe should trigger.
865 : *
866 : * There is a similar check inside lazy_vacuum_all_indexes(), but
867 : * relfrozenxid might start to look dangerously old before we reach
868 : * that point. This check also provides failsafe coverage for the
869 : * one-pass strategy, and the two-pass strategy with the index_cleanup
870 : * param set to 'off'.
871 : */
872 91010 : if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
873 0 : lazy_check_wraparound_failsafe(vacrel);
874 :
875 : /*
876 : * Consider if we definitely have enough space to process TIDs on page
877 : * already. If we are close to overrunning the available space for
878 : * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
879 : * this page.
880 : */
881 91010 : if (TidStoreMemoryUsage(dead_items) > dead_items_info->max_bytes)
882 : {
883 : /*
884 : * Before beginning index vacuuming, we release any pin we may
885 : * hold on the visibility map page. This isn't necessary for
886 : * correctness, but we do it anyway to avoid holding the pin
887 : * across a lengthy, unrelated operation.
888 : */
889 0 : if (BufferIsValid(vmbuffer))
890 : {
891 0 : ReleaseBuffer(vmbuffer);
892 0 : vmbuffer = InvalidBuffer;
893 : }
894 :
895 : /* Perform a round of index and heap vacuuming */
896 0 : vacrel->consider_bypass_optimization = false;
897 0 : lazy_vacuum(vacrel);
898 :
899 : /*
900 : * Vacuum the Free Space Map to make newly-freed space visible on
901 : * upper-level FSM pages. Note we have not yet processed blkno.
902 : */
903 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
904 : blkno);
905 0 : next_fsm_block_to_vacuum = blkno;
906 :
907 : /* Report that we are once again scanning the heap */
908 0 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
909 : PROGRESS_VACUUM_PHASE_SCAN_HEAP);
910 : }
911 :
912 : /*
913 : * Pin the visibility map page in case we need to mark the page
914 : * all-visible. In most cases this will be very cheap, because we'll
915 : * already have the correct page pinned anyway.
916 : */
917 91010 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
918 :
919 91010 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
920 : vacrel->bstrategy);
921 91010 : page = BufferGetPage(buf);
922 :
923 : /*
924 : * We need a buffer cleanup lock to prune HOT chains and defragment
925 : * the page in lazy_scan_prune. But when it's not possible to acquire
926 : * a cleanup lock right away, we may be able to settle for reduced
927 : * processing using lazy_scan_noprune.
928 : */
929 91010 : got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
930 :
931 91010 : if (!got_cleanup_lock)
932 12 : LockBuffer(buf, BUFFER_LOCK_SHARE);
933 :
934 : /* Check for new or empty pages before lazy_scan_[no]prune call */
935 91010 : if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
936 91010 : vmbuffer))
937 : {
938 : /* Processed as new/empty page (lock and pin released) */
939 1174 : continue;
940 : }
941 :
942 : /*
943 : * If we didn't get the cleanup lock, we can still collect LP_DEAD
944 : * items in the dead_items area for later vacuuming, count live and
945 : * recently dead tuples for vacuum logging, and determine if this
946 : * block could later be truncated. If we encounter any xid/mxids that
947 : * require advancing the relfrozenxid/relminxid, we'll have to wait
948 : * for a cleanup lock and call lazy_scan_prune().
949 : */
950 89836 : if (!got_cleanup_lock &&
951 12 : !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
952 : {
953 : /*
954 : * lazy_scan_noprune could not do all required processing. Wait
955 : * for a cleanup lock, and call lazy_scan_prune in the usual way.
956 : */
957 : Assert(vacrel->aggressive);
958 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
959 0 : LockBufferForCleanup(buf);
960 0 : got_cleanup_lock = true;
961 : }
962 :
963 : /*
964 : * If we have a cleanup lock, we must now prune, freeze, and count
965 : * tuples. We may have acquired the cleanup lock originally, or we may
966 : * have gone back and acquired it after lazy_scan_noprune() returned
967 : * false. Either way, the page hasn't been processed yet.
968 : *
969 : * Like lazy_scan_noprune(), lazy_scan_prune() will count
970 : * recently_dead_tuples and live tuples for vacuum logging, determine
971 : * if the block can later be truncated, and accumulate the details of
972 : * remaining LP_DEAD line pointers on the page into dead_items. These
973 : * dead items include those pruned by lazy_scan_prune() as well as
974 : * line pointers previously marked LP_DEAD.
975 : */
976 89836 : if (got_cleanup_lock)
977 89824 : lazy_scan_prune(vacrel, buf, blkno, page,
978 : vmbuffer, all_visible_according_to_vm,
979 : &has_lpdead_items);
980 :
981 : /*
982 : * Now drop the buffer lock and, potentially, update the FSM.
983 : *
984 : * Our goal is to update the freespace map the last time we touch the
985 : * page. If we'll process a block in the second pass, we may free up
986 : * additional space on the page, so it is better to update the FSM
987 : * after the second pass. If the relation has no indexes, or if index
988 : * vacuuming is disabled, there will be no second heap pass; if this
989 : * particular page has no dead items, the second heap pass will not
990 : * touch this page. So, in those cases, update the FSM now.
991 : *
992 : * Note: In corner cases, it's possible to miss updating the FSM
993 : * entirely. If index vacuuming is currently enabled, we'll skip the
994 : * FSM update now. But if failsafe mode is later activated, or there
995 : * are so few dead tuples that index vacuuming is bypassed, there will
996 : * also be no opportunity to update the FSM later, because we'll never
997 : * revisit this page. Since updating the FSM is desirable but not
998 : * absolutely required, that's OK.
999 : */
1000 89836 : if (vacrel->nindexes == 0
1001 79754 : || !vacrel->do_index_vacuuming
1002 78986 : || !has_lpdead_items)
1003 70090 : {
1004 70090 : Size freespace = PageGetHeapFreeSpace(page);
1005 :
1006 70090 : UnlockReleaseBuffer(buf);
1007 70090 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1008 :
1009 : /*
1010 : * Periodically perform FSM vacuuming to make newly-freed space
1011 : * visible on upper FSM pages. This is done after vacuuming if the
1012 : * table has indexes. There will only be newly-freed space if we
1013 : * held the cleanup lock and lazy_scan_prune() was called.
1014 : */
1015 70090 : if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
1016 0 : blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
1017 : {
1018 0 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
1019 : blkno);
1020 0 : next_fsm_block_to_vacuum = blkno;
1021 : }
1022 : }
1023 : else
1024 19746 : UnlockReleaseBuffer(buf);
1025 : }
1026 :
1027 19096 : vacrel->blkno = InvalidBlockNumber;
1028 19096 : if (BufferIsValid(vmbuffer))
1029 8084 : ReleaseBuffer(vmbuffer);
1030 :
1031 : /* report that everything is now scanned */
1032 19096 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
1033 :
1034 : /* now we can compute the new value for pg_class.reltuples */
1035 38192 : vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
1036 : vacrel->scanned_pages,
1037 19096 : vacrel->live_tuples);
1038 :
1039 : /*
1040 : * Also compute the total number of surviving heap entries. In the
1041 : * (unlikely) scenario that new_live_tuples is -1, take it as zero.
1042 : */
1043 19096 : vacrel->new_rel_tuples =
1044 19096 : Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
1045 19096 : vacrel->missed_dead_tuples;
1046 :
1047 : /*
1048 : * Do index vacuuming (call each index's ambulkdelete routine), then do
1049 : * related heap vacuuming
1050 : */
1051 19096 : if (dead_items_info->num_items > 0)
1052 870 : lazy_vacuum(vacrel);
1053 :
1054 : /*
1055 : * Vacuum the remainder of the Free Space Map. We must do this whether or
1056 : * not there were indexes, and whether or not we bypassed index vacuuming.
1057 : */
1058 19096 : if (blkno > next_fsm_block_to_vacuum)
1059 8084 : FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
1060 :
1061 : /* report all blocks vacuumed */
1062 19096 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
1063 :
1064 : /* Do final index cleanup (call each index's amvacuumcleanup routine) */
1065 19096 : if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
1066 17198 : lazy_cleanup_all_indexes(vacrel);
1067 19096 : }
1068 :
1069 : /*
1070 : * heap_vac_scan_next_block() -- get next block for vacuum to process
1071 : *
1072 : * lazy_scan_heap() calls here every time it needs to get the next block to
1073 : * prune and vacuum. The function uses the visibility map, vacuum options,
1074 : * and various thresholds to skip blocks which do not need to be processed and
1075 : * sets blkno to the next block to process.
1076 : *
1077 : * The block number and visibility status of the next block to process are set
1078 : * in *blkno and *all_visible_according_to_vm. The return value is false if
1079 : * there are no further blocks to process.
1080 : *
1081 : * vacrel is an in/out parameter here. Vacuum options and information about
1082 : * the relation are read. vacrel->skippedallvis is set if we skip a block
1083 : * that's all-visible but not all-frozen, to ensure that we don't update
1084 : * relfrozenxid in that case. vacrel also holds information about the next
1085 : * unskippable block, as bookkeeping for this function.
1086 : */
1087 : static bool
1088 110106 : heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
1089 : bool *all_visible_according_to_vm)
1090 : {
1091 : BlockNumber next_block;
1092 :
1093 : /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
1094 110106 : next_block = vacrel->current_block + 1;
1095 :
1096 : /* Have we reached the end of the relation? */
1097 110106 : if (next_block >= vacrel->rel_pages)
1098 : {
1099 19096 : if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
1100 : {
1101 5956 : ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
1102 5956 : vacrel->next_unskippable_vmbuffer = InvalidBuffer;
1103 : }
1104 19096 : *blkno = vacrel->rel_pages;
1105 19096 : return false;
1106 : }
1107 :
1108 : /*
1109 : * We must be in one of the three following states:
1110 : */
1111 91010 : if (next_block > vacrel->next_unskippable_block ||
1112 18362 : vacrel->next_unskippable_block == InvalidBlockNumber)
1113 : {
1114 : /*
1115 : * 1. We have just processed an unskippable block (or we're at the
1116 : * beginning of the scan). Find the next unskippable block using the
1117 : * visibility map.
1118 : */
1119 : bool skipsallvis;
1120 :
1121 80732 : find_next_unskippable_block(vacrel, &skipsallvis);
1122 :
1123 : /*
1124 : * We now know the next block that we must process. It can be the
1125 : * next block after the one we just processed, or something further
1126 : * ahead. If it's further ahead, we can jump to it, but we choose to
1127 : * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
1128 : * pages. Since we're reading sequentially, the OS should be doing
1129 : * readahead for us, so there's no gain in skipping a page now and
1130 : * then. Skipping such a range might even discourage sequential
1131 : * detection.
1132 : *
1133 : * This test also enables more frequent relfrozenxid advancement
1134 : * during non-aggressive VACUUMs. If the range has any all-visible
1135 : * pages then skipping makes updating relfrozenxid unsafe, which is a
1136 : * real downside.
1137 : */
1138 80732 : if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
1139 : {
1140 328 : next_block = vacrel->next_unskippable_block;
1141 328 : if (skipsallvis)
1142 52 : vacrel->skippedallvis = true;
1143 : }
1144 : }
1145 :
1146 : /* Now we must be in one of the two remaining states: */
1147 91010 : if (next_block < vacrel->next_unskippable_block)
1148 : {
1149 : /*
1150 : * 2. We are processing a range of blocks that we could have skipped
1151 : * but chose not to. We know that they are all-visible in the VM,
1152 : * otherwise they would've been unskippable.
1153 : */
1154 10278 : *blkno = vacrel->current_block = next_block;
1155 10278 : *all_visible_according_to_vm = true;
1156 10278 : return true;
1157 : }
1158 : else
1159 : {
1160 : /*
1161 : * 3. We reached the next unskippable block. Process it. On next
1162 : * iteration, we will be back in state 1.
1163 : */
1164 : Assert(next_block == vacrel->next_unskippable_block);
1165 :
1166 80732 : *blkno = vacrel->current_block = next_block;
1167 80732 : *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
1168 80732 : return true;
1169 : }
1170 : }
1171 :
1172 : /*
1173 : * Find the next unskippable block in a vacuum scan using the visibility map.
1174 : * The next unskippable block and its visibility information is updated in
1175 : * vacrel.
1176 : *
1177 : * Note: our opinion of which blocks can be skipped can go stale immediately.
1178 : * It's okay if caller "misses" a page whose all-visible or all-frozen marking
1179 : * was concurrently cleared, though. All that matters is that caller scan all
1180 : * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
1181 : * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
1182 : * older XIDs/MXIDs. The *skippedallvis flag will be set here when the choice
1183 : * to skip such a range is actually made, making everything safe.)
1184 : */
1185 : static void
1186 80732 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
1187 : {
1188 80732 : BlockNumber rel_pages = vacrel->rel_pages;
1189 80732 : BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
1190 80732 : Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
1191 : bool next_unskippable_allvis;
1192 :
1193 80732 : *skipsallvis = false;
1194 :
1195 : for (;;)
1196 36400 : {
1197 117132 : uint8 mapbits = visibilitymap_get_status(vacrel->rel,
1198 : next_unskippable_block,
1199 : &next_unskippable_vmbuffer);
1200 :
1201 117132 : next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
1202 :
1203 : /*
1204 : * A block is unskippable if it is not all visible according to the
1205 : * visibility map.
1206 : */
1207 117132 : if (!next_unskippable_allvis)
1208 : {
1209 : Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
1210 76932 : break;
1211 : }
1212 :
1213 : /*
1214 : * Caller must scan the last page to determine whether it has tuples
1215 : * (caller must have the opportunity to set vacrel->nonempty_pages).
1216 : * This rule avoids having lazy_truncate_heap() take access-exclusive
1217 : * lock on rel to attempt a truncation that fails anyway, just because
1218 : * there are tuples on the last page (it is likely that there will be
1219 : * tuples on other nearby pages as well, but those can be skipped).
1220 : *
1221 : * Implement this by always treating the last block as unsafe to skip.
1222 : */
1223 40200 : if (next_unskippable_block == rel_pages - 1)
1224 3072 : break;
1225 :
1226 : /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
1227 37128 : if (!vacrel->skipwithvm)
1228 728 : break;
1229 :
1230 : /*
1231 : * Aggressive VACUUM caller can't skip pages just because they are
1232 : * all-visible. They may still skip all-frozen pages, which can't
1233 : * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
1234 : */
1235 36400 : if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
1236 : {
1237 5924 : if (vacrel->aggressive)
1238 0 : break;
1239 :
1240 : /*
1241 : * All-visible block is safe to skip in non-aggressive case. But
1242 : * remember that the final range contains such a block for later.
1243 : */
1244 5924 : *skipsallvis = true;
1245 : }
1246 :
1247 36400 : next_unskippable_block++;
1248 : }
1249 :
1250 : /* write the local variables back to vacrel */
1251 80732 : vacrel->next_unskippable_block = next_unskippable_block;
1252 80732 : vacrel->next_unskippable_allvis = next_unskippable_allvis;
1253 80732 : vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
1254 80732 : }
1255 :
1256 : /*
1257 : * lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
1258 : *
1259 : * Must call here to handle both new and empty pages before calling
1260 : * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
1261 : * with new or empty pages.
1262 : *
1263 : * It's necessary to consider new pages as a special case, since the rules for
1264 : * maintaining the visibility map and FSM with empty pages are a little
1265 : * different (though new pages can be truncated away during rel truncation).
1266 : *
1267 : * Empty pages are not really a special case -- they're just heap pages that
1268 : * have no allocated tuples (including even LP_UNUSED items). You might
1269 : * wonder why we need to handle them here all the same. It's only necessary
1270 : * because of a corner-case involving a hard crash during heap relation
1271 : * extension. If we ever make relation-extension crash safe, then it should
1272 : * no longer be necessary to deal with empty pages here (or new pages, for
1273 : * that matter).
1274 : *
1275 : * Caller must hold at least a shared lock. We might need to escalate the
1276 : * lock in that case, so the type of lock caller holds needs to be specified
1277 : * using 'sharelock' argument.
1278 : *
1279 : * Returns false in common case where caller should go on to call
1280 : * lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
1281 : * that lazy_scan_heap is done processing the page, releasing lock on caller's
1282 : * behalf.
1283 : */
1284 : static bool
1285 91010 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
1286 : Page page, bool sharelock, Buffer vmbuffer)
1287 : {
1288 : Size freespace;
1289 :
1290 91010 : if (PageIsNew(page))
1291 : {
1292 : /*
1293 : * All-zeroes pages can be left over if either a backend extends the
1294 : * relation by a single page, but crashes before the newly initialized
1295 : * page has been written out, or when bulk-extending the relation
1296 : * (which creates a number of empty pages at the tail end of the
1297 : * relation), and then enters them into the FSM.
1298 : *
1299 : * Note we do not enter the page into the visibilitymap. That has the
1300 : * downside that we repeatedly visit this page in subsequent vacuums,
1301 : * but otherwise we'll never discover the space on a promoted standby.
1302 : * The harm of repeated checking ought to normally not be too bad. The
1303 : * space usually should be used at some point, otherwise there
1304 : * wouldn't be any regular vacuums.
1305 : *
1306 : * Make sure these pages are in the FSM, to ensure they can be reused.
1307 : * Do that by testing if there's any space recorded for the page. If
1308 : * not, enter it. We do so after releasing the lock on the heap page,
1309 : * the FSM is approximate, after all.
1310 : */
1311 1132 : UnlockReleaseBuffer(buf);
1312 :
1313 1132 : if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
1314 : {
1315 820 : freespace = BLCKSZ - SizeOfPageHeaderData;
1316 :
1317 820 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1318 : }
1319 :
1320 1132 : return true;
1321 : }
1322 :
1323 89878 : if (PageIsEmpty(page))
1324 : {
1325 : /*
1326 : * It seems likely that caller will always be able to get a cleanup
1327 : * lock on an empty page. But don't take any chances -- escalate to
1328 : * an exclusive lock (still don't need a cleanup lock, though).
1329 : */
1330 42 : if (sharelock)
1331 : {
1332 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
1333 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
1334 :
1335 0 : if (!PageIsEmpty(page))
1336 : {
1337 : /* page isn't new or empty -- keep lock and pin for now */
1338 0 : return false;
1339 : }
1340 : }
1341 : else
1342 : {
1343 : /* Already have a full cleanup lock (which is more than enough) */
1344 : }
1345 :
1346 : /*
1347 : * Unlike new pages, empty pages are always set all-visible and
1348 : * all-frozen.
1349 : */
1350 42 : if (!PageIsAllVisible(page))
1351 : {
1352 0 : START_CRIT_SECTION();
1353 :
1354 : /* mark buffer dirty before writing a WAL record */
1355 0 : MarkBufferDirty(buf);
1356 :
1357 : /*
1358 : * It's possible that another backend has extended the heap,
1359 : * initialized the page, and then failed to WAL-log the page due
1360 : * to an ERROR. Since heap extension is not WAL-logged, recovery
1361 : * might try to replay our record setting the page all-visible and
1362 : * find that the page isn't initialized, which will cause a PANIC.
1363 : * To prevent that, check whether the page has been previously
1364 : * WAL-logged, and if not, do that now.
1365 : */
1366 0 : if (RelationNeedsWAL(vacrel->rel) &&
1367 0 : PageGetLSN(page) == InvalidXLogRecPtr)
1368 0 : log_newpage_buffer(buf, true);
1369 :
1370 0 : PageSetAllVisible(page);
1371 0 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1372 : vmbuffer, InvalidTransactionId,
1373 : VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
1374 0 : END_CRIT_SECTION();
1375 : }
1376 :
1377 42 : freespace = PageGetHeapFreeSpace(page);
1378 42 : UnlockReleaseBuffer(buf);
1379 42 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
1380 42 : return true;
1381 : }
1382 :
1383 : /* page isn't new or empty -- keep lock and pin */
1384 89836 : return false;
1385 : }
1386 :
1387 : /* qsort comparator for sorting OffsetNumbers */
1388 : static int
1389 4079700 : cmpOffsetNumbers(const void *a, const void *b)
1390 : {
1391 4079700 : return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
1392 : }
1393 :
1394 : /*
1395 : * lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
1396 : *
1397 : * Caller must hold pin and buffer cleanup lock on the buffer.
1398 : *
1399 : * vmbuffer is the buffer containing the VM block with visibility information
1400 : * for the heap block, blkno. all_visible_according_to_vm is the saved
1401 : * visibility status of the heap block looked up earlier by the caller. We
1402 : * won't rely entirely on this status, as it may be out of date.
1403 : *
1404 : * *has_lpdead_items is set to true or false depending on whether, upon return
1405 : * from this function, any LP_DEAD items are still present on the page.
1406 : */
1407 : static void
1408 89824 : lazy_scan_prune(LVRelState *vacrel,
1409 : Buffer buf,
1410 : BlockNumber blkno,
1411 : Page page,
1412 : Buffer vmbuffer,
1413 : bool all_visible_according_to_vm,
1414 : bool *has_lpdead_items)
1415 : {
1416 89824 : Relation rel = vacrel->rel;
1417 : PruneFreezeResult presult;
1418 89824 : int prune_options = 0;
1419 :
1420 : Assert(BufferGetBlockNumber(buf) == blkno);
1421 :
1422 : /*
1423 : * Prune all HOT-update chains and potentially freeze tuples on this page.
1424 : *
1425 : * If the relation has no indexes, we can immediately mark would-be dead
1426 : * items LP_UNUSED.
1427 : *
1428 : * The number of tuples removed from the page is returned in
1429 : * presult.ndeleted. It should not be confused with presult.lpdead_items;
1430 : * presult.lpdead_items's final value can be thought of as the number of
1431 : * tuples that were deleted from indexes.
1432 : *
1433 : * We will update the VM after collecting LP_DEAD items and freezing
1434 : * tuples. Pruning will have determined whether or not the page is
1435 : * all-visible.
1436 : */
1437 89824 : prune_options = HEAP_PAGE_PRUNE_FREEZE;
1438 89824 : if (vacrel->nindexes == 0)
1439 10082 : prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
1440 :
1441 89824 : heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
1442 : &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
1443 : &vacrel->offnum,
1444 : &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
1445 :
1446 : Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
1447 : Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
1448 :
1449 89824 : if (presult.nfrozen > 0)
1450 : {
1451 : /*
1452 : * We don't increment the frozen_pages instrumentation counter when
1453 : * nfrozen == 0, since it only counts pages with newly frozen tuples
1454 : * (don't confuse that with pages newly set all-frozen in VM).
1455 : */
1456 22352 : vacrel->frozen_pages++;
1457 : }
1458 :
1459 : /*
1460 : * VACUUM will call heap_page_is_all_visible() during the second pass over
1461 : * the heap to determine all_visible and all_frozen for the page -- this
1462 : * is a specialized version of the logic from this function. Now that
1463 : * we've finished pruning and freezing, make sure that we're in total
1464 : * agreement with heap_page_is_all_visible() using an assertion.
1465 : */
1466 : #ifdef USE_ASSERT_CHECKING
1467 : /* Note that all_frozen value does not matter when !all_visible */
1468 : if (presult.all_visible)
1469 : {
1470 : TransactionId debug_cutoff;
1471 : bool debug_all_frozen;
1472 :
1473 : Assert(presult.lpdead_items == 0);
1474 :
1475 : if (!heap_page_is_all_visible(vacrel, buf,
1476 : &debug_cutoff, &debug_all_frozen))
1477 : Assert(false);
1478 :
1479 : Assert(presult.all_frozen == debug_all_frozen);
1480 :
1481 : Assert(!TransactionIdIsValid(debug_cutoff) ||
1482 : debug_cutoff == presult.vm_conflict_horizon);
1483 : }
1484 : #endif
1485 :
1486 : /*
1487 : * Now save details of the LP_DEAD items from the page in vacrel
1488 : */
1489 89824 : if (presult.lpdead_items > 0)
1490 : {
1491 19834 : vacrel->lpdead_item_pages++;
1492 :
1493 : /*
1494 : * deadoffsets are collected incrementally in
1495 : * heap_page_prune_and_freeze() as each dead line pointer is recorded,
1496 : * with an indeterminate order, but dead_items_add requires them to be
1497 : * sorted.
1498 : */
1499 19834 : qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
1500 : cmpOffsetNumbers);
1501 :
1502 19834 : dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
1503 : }
1504 :
1505 : /* Finally, add page-local counts to whole-VACUUM counts */
1506 89824 : vacrel->tuples_deleted += presult.ndeleted;
1507 89824 : vacrel->tuples_frozen += presult.nfrozen;
1508 89824 : vacrel->lpdead_items += presult.lpdead_items;
1509 89824 : vacrel->live_tuples += presult.live_tuples;
1510 89824 : vacrel->recently_dead_tuples += presult.recently_dead_tuples;
1511 :
1512 : /* Can't truncate this page */
1513 89824 : if (presult.hastup)
1514 79138 : vacrel->nonempty_pages = blkno + 1;
1515 :
1516 : /* Did we find LP_DEAD items? */
1517 89824 : *has_lpdead_items = (presult.lpdead_items > 0);
1518 :
1519 : Assert(!presult.all_visible || !(*has_lpdead_items));
1520 :
1521 : /*
1522 : * Handle setting visibility map bit based on information from the VM (as
1523 : * of last heap_vac_scan_next_block() call), and from all_visible and
1524 : * all_frozen variables
1525 : */
1526 89824 : if (!all_visible_according_to_vm && presult.all_visible)
1527 46992 : {
1528 46992 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
1529 :
1530 46992 : if (presult.all_frozen)
1531 : {
1532 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1533 33332 : flags |= VISIBILITYMAP_ALL_FROZEN;
1534 : }
1535 :
1536 : /*
1537 : * It should never be the case that the visibility map page is set
1538 : * while the page-level bit is clear, but the reverse is allowed (if
1539 : * checksums are not enabled). Regardless, set both bits so that we
1540 : * get back in sync.
1541 : *
1542 : * NB: If the heap page is all-visible but the VM bit is not set, we
1543 : * don't need to dirty the heap page. However, if checksums are
1544 : * enabled, we do need to make sure that the heap page is dirtied
1545 : * before passing it to visibilitymap_set(), because it may be logged.
1546 : * Given that this situation should only happen in rare cases after a
1547 : * crash, it is not worth optimizing.
1548 : */
1549 46992 : PageSetAllVisible(page);
1550 46992 : MarkBufferDirty(buf);
1551 46992 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1552 : vmbuffer, presult.vm_conflict_horizon,
1553 : flags);
1554 : }
1555 :
1556 : /*
1557 : * As of PostgreSQL 9.2, the visibility map bit should never be set if the
1558 : * page-level bit is clear. However, it's possible that the bit got
1559 : * cleared after heap_vac_scan_next_block() was called, so we must recheck
1560 : * with buffer lock before concluding that the VM is corrupt.
1561 : */
1562 42832 : else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
1563 0 : visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
1564 : {
1565 0 : elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
1566 : vacrel->relname, blkno);
1567 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1568 : VISIBILITYMAP_VALID_BITS);
1569 : }
1570 :
1571 : /*
1572 : * It's possible for the value returned by
1573 : * GetOldestNonRemovableTransactionId() to move backwards, so it's not
1574 : * wrong for us to see tuples that appear to not be visible to everyone
1575 : * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
1576 : * never moves backwards, but GetOldestNonRemovableTransactionId() is
1577 : * conservative and sometimes returns a value that's unnecessarily small,
1578 : * so if we see that contradiction it just means that the tuples that we
1579 : * think are not visible to everyone yet actually are, and the
1580 : * PD_ALL_VISIBLE flag is correct.
1581 : *
1582 : * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
1583 : * however.
1584 : */
1585 42832 : else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
1586 : {
1587 0 : elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
1588 : vacrel->relname, blkno);
1589 0 : PageClearAllVisible(page);
1590 0 : MarkBufferDirty(buf);
1591 0 : visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
1592 : VISIBILITYMAP_VALID_BITS);
1593 : }
1594 :
1595 : /*
1596 : * If the all-visible page is all-frozen but not marked as such yet, mark
1597 : * it as all-frozen. Note that all_frozen is only valid if all_visible is
1598 : * true, so we must check both all_visible and all_frozen.
1599 : */
1600 42832 : else if (all_visible_according_to_vm && presult.all_visible &&
1601 14036 : presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
1602 : {
1603 : /*
1604 : * Avoid relying on all_visible_according_to_vm as a proxy for the
1605 : * page-level PD_ALL_VISIBLE bit being set, since it might have become
1606 : * stale -- even when all_visible is set
1607 : */
1608 20 : if (!PageIsAllVisible(page))
1609 : {
1610 0 : PageSetAllVisible(page);
1611 0 : MarkBufferDirty(buf);
1612 : }
1613 :
1614 : /*
1615 : * Set the page all-frozen (and all-visible) in the VM.
1616 : *
1617 : * We can pass InvalidTransactionId as our cutoff_xid, since a
1618 : * snapshotConflictHorizon sufficient to make everything safe for REDO
1619 : * was logged when the page's tuples were frozen.
1620 : */
1621 : Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
1622 20 : visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
1623 : vmbuffer, InvalidTransactionId,
1624 : VISIBILITYMAP_ALL_VISIBLE |
1625 : VISIBILITYMAP_ALL_FROZEN);
1626 : }
1627 89824 : }
1628 :
1629 : /*
1630 : * lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
1631 : *
1632 : * Caller need only hold a pin and share lock on the buffer, unlike
1633 : * lazy_scan_prune, which requires a full cleanup lock. While pruning isn't
1634 : * performed here, it's quite possible that an earlier opportunistic pruning
1635 : * operation left LP_DEAD items behind. We'll at least collect any such items
1636 : * in dead_items for removal from indexes.
1637 : *
1638 : * For aggressive VACUUM callers, we may return false to indicate that a full
1639 : * cleanup lock is required for processing by lazy_scan_prune. This is only
1640 : * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
1641 : * one or more tuples on the page. We always return true for non-aggressive
1642 : * callers.
1643 : *
1644 : * If this function returns true, *has_lpdead_items gets set to true or false
1645 : * depending on whether, upon return from this function, any LP_DEAD items are
1646 : * present on the page. If this function returns false, *has_lpdead_items
1647 : * is not updated.
1648 : */
1649 : static bool
1650 12 : lazy_scan_noprune(LVRelState *vacrel,
1651 : Buffer buf,
1652 : BlockNumber blkno,
1653 : Page page,
1654 : bool *has_lpdead_items)
1655 : {
1656 : OffsetNumber offnum,
1657 : maxoff;
1658 : int lpdead_items,
1659 : live_tuples,
1660 : recently_dead_tuples,
1661 : missed_dead_tuples;
1662 : bool hastup;
1663 : HeapTupleHeader tupleheader;
1664 12 : TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
1665 12 : MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
1666 : OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
1667 :
1668 : Assert(BufferGetBlockNumber(buf) == blkno);
1669 :
1670 12 : hastup = false; /* for now */
1671 :
1672 12 : lpdead_items = 0;
1673 12 : live_tuples = 0;
1674 12 : recently_dead_tuples = 0;
1675 12 : missed_dead_tuples = 0;
1676 :
1677 12 : maxoff = PageGetMaxOffsetNumber(page);
1678 530 : for (offnum = FirstOffsetNumber;
1679 : offnum <= maxoff;
1680 518 : offnum = OffsetNumberNext(offnum))
1681 : {
1682 : ItemId itemid;
1683 : HeapTupleData tuple;
1684 :
1685 518 : vacrel->offnum = offnum;
1686 518 : itemid = PageGetItemId(page, offnum);
1687 :
1688 518 : if (!ItemIdIsUsed(itemid))
1689 0 : continue;
1690 :
1691 518 : if (ItemIdIsRedirected(itemid))
1692 : {
1693 0 : hastup = true;
1694 0 : continue;
1695 : }
1696 :
1697 518 : if (ItemIdIsDead(itemid))
1698 : {
1699 : /*
1700 : * Deliberately don't set hastup=true here. See same point in
1701 : * lazy_scan_prune for an explanation.
1702 : */
1703 0 : deadoffsets[lpdead_items++] = offnum;
1704 0 : continue;
1705 : }
1706 :
1707 518 : hastup = true; /* page prevents rel truncation */
1708 518 : tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
1709 518 : if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
1710 : &NoFreezePageRelfrozenXid,
1711 : &NoFreezePageRelminMxid))
1712 : {
1713 : /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
1714 128 : if (vacrel->aggressive)
1715 : {
1716 : /*
1717 : * Aggressive VACUUMs must always be able to advance rel's
1718 : * relfrozenxid to a value >= FreezeLimit (and be able to
1719 : * advance rel's relminmxid to a value >= MultiXactCutoff).
1720 : * The ongoing aggressive VACUUM won't be able to do that
1721 : * unless it can freeze an XID (or MXID) from this tuple now.
1722 : *
1723 : * The only safe option is to have caller perform processing
1724 : * of this page using lazy_scan_prune. Caller might have to
1725 : * wait a while for a cleanup lock, but it can't be helped.
1726 : */
1727 0 : vacrel->offnum = InvalidOffsetNumber;
1728 0 : return false;
1729 : }
1730 :
1731 : /*
1732 : * Non-aggressive VACUUMs are under no obligation to advance
1733 : * relfrozenxid (even by one XID). We can be much laxer here.
1734 : *
1735 : * Currently we always just accept an older final relfrozenxid
1736 : * and/or relminmxid value. We never make caller wait or work a
1737 : * little harder, even when it likely makes sense to do so.
1738 : */
1739 : }
1740 :
1741 518 : ItemPointerSet(&(tuple.t_self), blkno, offnum);
1742 518 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
1743 518 : tuple.t_len = ItemIdGetLength(itemid);
1744 518 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
1745 :
1746 518 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
1747 : buf))
1748 : {
1749 198 : case HEAPTUPLE_DELETE_IN_PROGRESS:
1750 : case HEAPTUPLE_LIVE:
1751 :
1752 : /*
1753 : * Count both cases as live, just like lazy_scan_prune
1754 : */
1755 198 : live_tuples++;
1756 :
1757 198 : break;
1758 2 : case HEAPTUPLE_DEAD:
1759 :
1760 : /*
1761 : * There is some useful work for pruning to do, that won't be
1762 : * done due to failure to get a cleanup lock.
1763 : */
1764 2 : missed_dead_tuples++;
1765 2 : break;
1766 318 : case HEAPTUPLE_RECENTLY_DEAD:
1767 :
1768 : /*
1769 : * Count in recently_dead_tuples, just like lazy_scan_prune
1770 : */
1771 318 : recently_dead_tuples++;
1772 318 : break;
1773 0 : case HEAPTUPLE_INSERT_IN_PROGRESS:
1774 :
1775 : /*
1776 : * Do not count these rows as live, just like lazy_scan_prune
1777 : */
1778 0 : break;
1779 0 : default:
1780 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
1781 : break;
1782 : }
1783 : }
1784 :
1785 12 : vacrel->offnum = InvalidOffsetNumber;
1786 :
1787 : /*
1788 : * By here we know for sure that caller can put off freezing and pruning
1789 : * this particular page until the next VACUUM. Remember its details now.
1790 : * (lazy_scan_prune expects a clean slate, so we have to do this last.)
1791 : */
1792 12 : vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
1793 12 : vacrel->NewRelminMxid = NoFreezePageRelminMxid;
1794 :
1795 : /* Save any LP_DEAD items found on the page in dead_items */
1796 12 : if (vacrel->nindexes == 0)
1797 : {
1798 : /* Using one-pass strategy (since table has no indexes) */
1799 0 : if (lpdead_items > 0)
1800 : {
1801 : /*
1802 : * Perfunctory handling for the corner case where a single pass
1803 : * strategy VACUUM cannot get a cleanup lock, and it turns out
1804 : * that there is one or more LP_DEAD items: just count the LP_DEAD
1805 : * items as missed_dead_tuples instead. (This is a bit dishonest,
1806 : * but it beats having to maintain specialized heap vacuuming code
1807 : * forever, for vanishingly little benefit.)
1808 : */
1809 0 : hastup = true;
1810 0 : missed_dead_tuples += lpdead_items;
1811 : }
1812 : }
1813 12 : else if (lpdead_items > 0)
1814 : {
1815 : /*
1816 : * Page has LP_DEAD items, and so any references/TIDs that remain in
1817 : * indexes will be deleted during index vacuuming (and then marked
1818 : * LP_UNUSED in the heap)
1819 : */
1820 0 : vacrel->lpdead_item_pages++;
1821 :
1822 0 : dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
1823 :
1824 0 : vacrel->lpdead_items += lpdead_items;
1825 : }
1826 :
1827 : /*
1828 : * Finally, add relevant page-local counts to whole-VACUUM counts
1829 : */
1830 12 : vacrel->live_tuples += live_tuples;
1831 12 : vacrel->recently_dead_tuples += recently_dead_tuples;
1832 12 : vacrel->missed_dead_tuples += missed_dead_tuples;
1833 12 : if (missed_dead_tuples > 0)
1834 2 : vacrel->missed_dead_pages++;
1835 :
1836 : /* Can't truncate this page */
1837 12 : if (hastup)
1838 12 : vacrel->nonempty_pages = blkno + 1;
1839 :
1840 : /* Did we find LP_DEAD items? */
1841 12 : *has_lpdead_items = (lpdead_items > 0);
1842 :
1843 : /* Caller won't need to call lazy_scan_prune with same page */
1844 12 : return true;
1845 : }
1846 :
1847 : /*
1848 : * Main entry point for index vacuuming and heap vacuuming.
1849 : *
1850 : * Removes items collected in dead_items from table's indexes, then marks the
1851 : * same items LP_UNUSED in the heap. See the comments above lazy_scan_heap
1852 : * for full details.
1853 : *
1854 : * Also empties dead_items, freeing up space for later TIDs.
1855 : *
1856 : * We may choose to bypass index vacuuming at this point, though only when the
1857 : * ongoing VACUUM operation will definitely only have one index scan/round of
1858 : * index vacuuming.
1859 : */
1860 : static void
1861 870 : lazy_vacuum(LVRelState *vacrel)
1862 : {
1863 : bool bypass;
1864 :
1865 : /* Should not end up here with no indexes */
1866 : Assert(vacrel->nindexes > 0);
1867 : Assert(vacrel->lpdead_item_pages > 0);
1868 :
1869 870 : if (!vacrel->do_index_vacuuming)
1870 : {
1871 : Assert(!vacrel->do_index_cleanup);
1872 12 : dead_items_reset(vacrel);
1873 12 : return;
1874 : }
1875 :
1876 : /*
1877 : * Consider bypassing index vacuuming (and heap vacuuming) entirely.
1878 : *
1879 : * We currently only do this in cases where the number of LP_DEAD items
1880 : * for the entire VACUUM operation is close to zero. This avoids sharp
1881 : * discontinuities in the duration and overhead of successive VACUUM
1882 : * operations that run against the same table with a fixed workload.
1883 : * Ideally, successive VACUUM operations will behave as if there are
1884 : * exactly zero LP_DEAD items in cases where there are close to zero.
1885 : *
1886 : * This is likely to be helpful with a table that is continually affected
1887 : * by UPDATEs that can mostly apply the HOT optimization, but occasionally
1888 : * have small aberrations that lead to just a few heap pages retaining
1889 : * only one or two LP_DEAD items. This is pretty common; even when the
1890 : * DBA goes out of their way to make UPDATEs use HOT, it is practically
1891 : * impossible to predict whether HOT will be applied in 100% of cases.
1892 : * It's far easier to ensure that 99%+ of all UPDATEs against a table use
1893 : * HOT through careful tuning.
1894 : */
1895 858 : bypass = false;
1896 858 : if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
1897 : {
1898 : BlockNumber threshold;
1899 :
1900 : Assert(vacrel->num_index_scans == 0);
1901 : Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
1902 : Assert(vacrel->do_index_vacuuming);
1903 : Assert(vacrel->do_index_cleanup);
1904 :
1905 : /*
1906 : * This crossover point at which we'll start to do index vacuuming is
1907 : * expressed as a percentage of the total number of heap pages in the
1908 : * table that are known to have at least one LP_DEAD item. This is
1909 : * much more important than the total number of LP_DEAD items, since
1910 : * it's a proxy for the number of heap pages whose visibility map bits
1911 : * cannot be set on account of bypassing index and heap vacuuming.
1912 : *
1913 : * We apply one further precautionary test: the space currently used
1914 : * to store the TIDs (TIDs that now all point to LP_DEAD items) must
1915 : * not exceed 32MB. This limits the risk that we will bypass index
1916 : * vacuuming again and again until eventually there is a VACUUM whose
1917 : * dead_items space is not CPU cache resident.
1918 : *
1919 : * We don't take any special steps to remember the LP_DEAD items (such
1920 : * as counting them in our final update to the stats system) when the
1921 : * optimization is applied. Though the accounting used in analyze.c's
1922 : * acquire_sample_rows() will recognize the same LP_DEAD items as dead
1923 : * rows in its own stats report, that's okay. The discrepancy should
1924 : * be negligible. If this optimization is ever expanded to cover more
1925 : * cases then this may need to be reconsidered.
1926 : */
1927 842 : threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
1928 844 : bypass = (vacrel->lpdead_item_pages < threshold &&
1929 2 : (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
1930 : }
1931 :
1932 858 : if (bypass)
1933 : {
1934 : /*
1935 : * There are almost zero TIDs. Behave as if there were precisely
1936 : * zero: bypass index vacuuming, but do index cleanup.
1937 : *
1938 : * We expect that the ongoing VACUUM operation will finish very
1939 : * quickly, so there is no point in considering speeding up as a
1940 : * failsafe against wraparound failure. (Index cleanup is expected to
1941 : * finish very quickly in cases where there were no ambulkdelete()
1942 : * calls.)
1943 : */
1944 2 : vacrel->do_index_vacuuming = false;
1945 : }
1946 856 : else if (lazy_vacuum_all_indexes(vacrel))
1947 : {
1948 : /*
1949 : * We successfully completed a round of index vacuuming. Do related
1950 : * heap vacuuming now.
1951 : */
1952 856 : lazy_vacuum_heap_rel(vacrel);
1953 : }
1954 : else
1955 : {
1956 : /*
1957 : * Failsafe case.
1958 : *
1959 : * We attempted index vacuuming, but didn't finish a full round/full
1960 : * index scan. This happens when relfrozenxid or relminmxid is too
1961 : * far in the past.
1962 : *
1963 : * From this point on the VACUUM operation will do no further index
1964 : * vacuuming or heap vacuuming. This VACUUM operation won't end up
1965 : * back here again.
1966 : */
1967 : Assert(VacuumFailsafeActive);
1968 : }
1969 :
1970 : /*
1971 : * Forget the LP_DEAD items that we just vacuumed (or just decided to not
1972 : * vacuum)
1973 : */
1974 858 : dead_items_reset(vacrel);
1975 : }
1976 :
1977 : /*
1978 : * lazy_vacuum_all_indexes() -- Main entry for index vacuuming
1979 : *
1980 : * Returns true in the common case when all indexes were successfully
1981 : * vacuumed. Returns false in rare cases where we determined that the ongoing
1982 : * VACUUM operation is at risk of taking too long to finish, leading to
1983 : * wraparound failure.
1984 : */
1985 : static bool
1986 856 : lazy_vacuum_all_indexes(LVRelState *vacrel)
1987 : {
1988 856 : bool allindexes = true;
1989 856 : double old_live_tuples = vacrel->rel->rd_rel->reltuples;
1990 856 : const int progress_start_index[] = {
1991 : PROGRESS_VACUUM_PHASE,
1992 : PROGRESS_VACUUM_INDEXES_TOTAL
1993 : };
1994 856 : const int progress_end_index[] = {
1995 : PROGRESS_VACUUM_INDEXES_TOTAL,
1996 : PROGRESS_VACUUM_INDEXES_PROCESSED,
1997 : PROGRESS_VACUUM_NUM_INDEX_VACUUMS
1998 : };
1999 : int64 progress_start_val[2];
2000 : int64 progress_end_val[3];
2001 :
2002 : Assert(vacrel->nindexes > 0);
2003 : Assert(vacrel->do_index_vacuuming);
2004 : Assert(vacrel->do_index_cleanup);
2005 :
2006 : /* Precheck for XID wraparound emergencies */
2007 856 : if (lazy_check_wraparound_failsafe(vacrel))
2008 : {
2009 : /* Wraparound emergency -- don't even start an index scan */
2010 0 : return false;
2011 : }
2012 :
2013 : /*
2014 : * Report that we are now vacuuming indexes and the number of indexes to
2015 : * vacuum.
2016 : */
2017 856 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
2018 856 : progress_start_val[1] = vacrel->nindexes;
2019 856 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2020 :
2021 856 : if (!ParallelVacuumIsActive(vacrel))
2022 : {
2023 2508 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2024 : {
2025 1658 : Relation indrel = vacrel->indrels[idx];
2026 1658 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2027 :
2028 1658 : vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
2029 : old_live_tuples,
2030 : vacrel);
2031 :
2032 : /* Report the number of indexes vacuumed */
2033 1658 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2034 1658 : idx + 1);
2035 :
2036 1658 : if (lazy_check_wraparound_failsafe(vacrel))
2037 : {
2038 : /* Wraparound emergency -- end current index scan */
2039 0 : allindexes = false;
2040 0 : break;
2041 : }
2042 : }
2043 : }
2044 : else
2045 : {
2046 : /* Outsource everything to parallel variant */
2047 6 : parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
2048 : vacrel->num_index_scans);
2049 :
2050 : /*
2051 : * Do a postcheck to consider applying wraparound failsafe now. Note
2052 : * that parallel VACUUM only gets the precheck and this postcheck.
2053 : */
2054 6 : if (lazy_check_wraparound_failsafe(vacrel))
2055 0 : allindexes = false;
2056 : }
2057 :
2058 : /*
2059 : * We delete all LP_DEAD items from the first heap pass in all indexes on
2060 : * each call here (except calls where we choose to do the failsafe). This
2061 : * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
2062 : * of the failsafe triggering, which prevents the next call from taking
2063 : * place).
2064 : */
2065 : Assert(vacrel->num_index_scans > 0 ||
2066 : vacrel->dead_items_info->num_items == vacrel->lpdead_items);
2067 : Assert(allindexes || VacuumFailsafeActive);
2068 :
2069 : /*
2070 : * Increase and report the number of index scans. Also, we reset
2071 : * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
2072 : *
2073 : * We deliberately include the case where we started a round of bulk
2074 : * deletes that we weren't able to finish due to the failsafe triggering.
2075 : */
2076 856 : vacrel->num_index_scans++;
2077 856 : progress_end_val[0] = 0;
2078 856 : progress_end_val[1] = 0;
2079 856 : progress_end_val[2] = vacrel->num_index_scans;
2080 856 : pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
2081 :
2082 856 : return allindexes;
2083 : }
2084 :
2085 : /*
2086 : * lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
2087 : *
2088 : * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
2089 : * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
2090 : *
2091 : * We may also be able to truncate the line pointer array of the heap pages we
2092 : * visit. If there is a contiguous group of LP_UNUSED items at the end of the
2093 : * array, it can be reclaimed as free space. These LP_UNUSED items usually
2094 : * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
2095 : * each page to LP_UNUSED, and then consider if it's possible to truncate the
2096 : * page's line pointer array).
2097 : *
2098 : * Note: the reason for doing this as a second pass is we cannot remove the
2099 : * tuples until we've removed their index entries, and we want to process
2100 : * index entry removal in batches as large as possible.
2101 : */
2102 : static void
2103 856 : lazy_vacuum_heap_rel(LVRelState *vacrel)
2104 : {
2105 856 : BlockNumber vacuumed_pages = 0;
2106 856 : Buffer vmbuffer = InvalidBuffer;
2107 : LVSavedErrInfo saved_err_info;
2108 : TidStoreIter *iter;
2109 : TidStoreIterResult *iter_result;
2110 :
2111 : Assert(vacrel->do_index_vacuuming);
2112 : Assert(vacrel->do_index_cleanup);
2113 : Assert(vacrel->num_index_scans > 0);
2114 :
2115 : /* Report that we are now vacuuming the heap */
2116 856 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2117 : PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
2118 :
2119 : /* Update error traceback information */
2120 856 : update_vacuum_error_info(vacrel, &saved_err_info,
2121 : VACUUM_ERRCB_PHASE_VACUUM_HEAP,
2122 : InvalidBlockNumber, InvalidOffsetNumber);
2123 :
2124 856 : iter = TidStoreBeginIterate(vacrel->dead_items);
2125 20600 : while ((iter_result = TidStoreIterateNext(iter)) != NULL)
2126 : {
2127 : BlockNumber blkno;
2128 : Buffer buf;
2129 : Page page;
2130 : Size freespace;
2131 :
2132 19744 : vacuum_delay_point();
2133 :
2134 19744 : blkno = iter_result->blkno;
2135 19744 : vacrel->blkno = blkno;
2136 :
2137 : /*
2138 : * Pin the visibility map page in case we need to mark the page
2139 : * all-visible. In most cases this will be very cheap, because we'll
2140 : * already have the correct page pinned anyway.
2141 : */
2142 19744 : visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
2143 :
2144 : /* We need a non-cleanup exclusive lock to mark dead_items unused */
2145 19744 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2146 : vacrel->bstrategy);
2147 19744 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2148 19744 : lazy_vacuum_heap_page(vacrel, blkno, buf, iter_result->offsets,
2149 : iter_result->num_offsets, vmbuffer);
2150 :
2151 : /* Now that we've vacuumed the page, record its available space */
2152 19744 : page = BufferGetPage(buf);
2153 19744 : freespace = PageGetHeapFreeSpace(page);
2154 :
2155 19744 : UnlockReleaseBuffer(buf);
2156 19744 : RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
2157 19744 : vacuumed_pages++;
2158 : }
2159 856 : TidStoreEndIterate(iter);
2160 :
2161 856 : vacrel->blkno = InvalidBlockNumber;
2162 856 : if (BufferIsValid(vmbuffer))
2163 856 : ReleaseBuffer(vmbuffer);
2164 :
2165 : /*
2166 : * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
2167 : * the second heap pass. No more, no less.
2168 : */
2169 : Assert(vacrel->num_index_scans > 1 ||
2170 : (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
2171 : vacuumed_pages == vacrel->lpdead_item_pages));
2172 :
2173 856 : ereport(DEBUG2,
2174 : (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
2175 : vacrel->relname, (long long) vacrel->dead_items_info->num_items,
2176 : vacuumed_pages)));
2177 :
2178 : /* Revert to the previous phase information for error traceback */
2179 856 : restore_vacuum_error_info(vacrel, &saved_err_info);
2180 856 : }
2181 :
2182 : /*
2183 : * lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
2184 : * vacrel->dead_items store.
2185 : *
2186 : * Caller must have an exclusive buffer lock on the buffer (though a full
2187 : * cleanup lock is also acceptable). vmbuffer must be valid and already have
2188 : * a pin on blkno's visibility map page.
2189 : */
2190 : static void
2191 19744 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
2192 : OffsetNumber *deadoffsets, int num_offsets,
2193 : Buffer vmbuffer)
2194 : {
2195 19744 : Page page = BufferGetPage(buffer);
2196 : OffsetNumber unused[MaxHeapTuplesPerPage];
2197 19744 : int nunused = 0;
2198 : TransactionId visibility_cutoff_xid;
2199 : bool all_frozen;
2200 : LVSavedErrInfo saved_err_info;
2201 :
2202 : Assert(vacrel->do_index_vacuuming);
2203 :
2204 19744 : pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
2205 :
2206 : /* Update error traceback information */
2207 19744 : update_vacuum_error_info(vacrel, &saved_err_info,
2208 : VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
2209 : InvalidOffsetNumber);
2210 :
2211 19744 : START_CRIT_SECTION();
2212 :
2213 1324230 : for (int i = 0; i < num_offsets; i++)
2214 : {
2215 : ItemId itemid;
2216 1304486 : OffsetNumber toff = deadoffsets[i];
2217 :
2218 1304486 : itemid = PageGetItemId(page, toff);
2219 :
2220 : Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
2221 1304486 : ItemIdSetUnused(itemid);
2222 1304486 : unused[nunused++] = toff;
2223 : }
2224 :
2225 : Assert(nunused > 0);
2226 :
2227 : /* Attempt to truncate line pointer array now */
2228 19744 : PageTruncateLinePointerArray(page);
2229 :
2230 : /*
2231 : * Mark buffer dirty before we write WAL.
2232 : */
2233 19744 : MarkBufferDirty(buffer);
2234 :
2235 : /* XLOG stuff */
2236 19744 : if (RelationNeedsWAL(vacrel->rel))
2237 : {
2238 18260 : log_heap_prune_and_freeze(vacrel->rel, buffer,
2239 : InvalidTransactionId,
2240 : false, /* no cleanup lock required */
2241 : PRUNE_VACUUM_CLEANUP,
2242 : NULL, 0, /* frozen */
2243 : NULL, 0, /* redirected */
2244 : NULL, 0, /* dead */
2245 : unused, nunused);
2246 : }
2247 :
2248 : /*
2249 : * End critical section, so we safely can do visibility tests (which
2250 : * possibly need to perform IO and allocate memory!). If we crash now the
2251 : * page (including the corresponding vm bit) might not be marked all
2252 : * visible, but that's fine. A later vacuum will fix that.
2253 : */
2254 19744 : END_CRIT_SECTION();
2255 :
2256 : /*
2257 : * Now that we have removed the LP_DEAD items from the page, once again
2258 : * check if the page has become all-visible. The page is already marked
2259 : * dirty, exclusively locked, and, if needed, a full page image has been
2260 : * emitted.
2261 : */
2262 : Assert(!PageIsAllVisible(page));
2263 19744 : if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
2264 : &all_frozen))
2265 : {
2266 19654 : uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
2267 :
2268 19654 : if (all_frozen)
2269 : {
2270 : Assert(!TransactionIdIsValid(visibility_cutoff_xid));
2271 14880 : flags |= VISIBILITYMAP_ALL_FROZEN;
2272 : }
2273 :
2274 19654 : PageSetAllVisible(page);
2275 19654 : visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
2276 : vmbuffer, visibility_cutoff_xid, flags);
2277 : }
2278 :
2279 : /* Revert to the previous phase information for error traceback */
2280 19744 : restore_vacuum_error_info(vacrel, &saved_err_info);
2281 19744 : }
2282 :
2283 : /*
2284 : * Trigger the failsafe to avoid wraparound failure when vacrel table has a
2285 : * relfrozenxid and/or relminmxid that is dangerously far in the past.
2286 : * Triggering the failsafe makes the ongoing VACUUM bypass any further index
2287 : * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
2288 : *
2289 : * Any remaining work (work that VACUUM cannot just bypass) is typically sped
2290 : * up when the failsafe triggers. VACUUM stops applying any cost-based delay
2291 : * that it started out with.
2292 : *
2293 : * Returns true when failsafe has been triggered.
2294 : */
2295 : static bool
2296 21616 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
2297 : {
2298 : /* Don't warn more than once per VACUUM */
2299 21616 : if (VacuumFailsafeActive)
2300 0 : return true;
2301 :
2302 21616 : if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
2303 : {
2304 0 : const int progress_index[] = {
2305 : PROGRESS_VACUUM_INDEXES_TOTAL,
2306 : PROGRESS_VACUUM_INDEXES_PROCESSED
2307 : };
2308 0 : int64 progress_val[2] = {0, 0};
2309 :
2310 0 : VacuumFailsafeActive = true;
2311 :
2312 : /*
2313 : * Abandon use of a buffer access strategy to allow use of all of
2314 : * shared buffers. We assume the caller who allocated the memory for
2315 : * the BufferAccessStrategy will free it.
2316 : */
2317 0 : vacrel->bstrategy = NULL;
2318 :
2319 : /* Disable index vacuuming, index cleanup, and heap rel truncation */
2320 0 : vacrel->do_index_vacuuming = false;
2321 0 : vacrel->do_index_cleanup = false;
2322 0 : vacrel->do_rel_truncate = false;
2323 :
2324 : /* Reset the progress counters */
2325 0 : pgstat_progress_update_multi_param(2, progress_index, progress_val);
2326 :
2327 0 : ereport(WARNING,
2328 : (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
2329 : vacrel->dbname, vacrel->relnamespace, vacrel->relname,
2330 : vacrel->num_index_scans),
2331 : errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
2332 : errhint("Consider increasing configuration parameter maintenance_work_mem or autovacuum_work_mem.\n"
2333 : "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
2334 :
2335 : /* Stop applying cost limits from this point on */
2336 0 : VacuumCostActive = false;
2337 0 : VacuumCostBalance = 0;
2338 :
2339 0 : return true;
2340 : }
2341 :
2342 21616 : return false;
2343 : }
2344 :
2345 : /*
2346 : * lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
2347 : */
2348 : static void
2349 17198 : lazy_cleanup_all_indexes(LVRelState *vacrel)
2350 : {
2351 17198 : double reltuples = vacrel->new_rel_tuples;
2352 17198 : bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
2353 17198 : const int progress_start_index[] = {
2354 : PROGRESS_VACUUM_PHASE,
2355 : PROGRESS_VACUUM_INDEXES_TOTAL
2356 : };
2357 17198 : const int progress_end_index[] = {
2358 : PROGRESS_VACUUM_INDEXES_TOTAL,
2359 : PROGRESS_VACUUM_INDEXES_PROCESSED
2360 : };
2361 : int64 progress_start_val[2];
2362 17198 : int64 progress_end_val[2] = {0, 0};
2363 :
2364 : Assert(vacrel->do_index_cleanup);
2365 : Assert(vacrel->nindexes > 0);
2366 :
2367 : /*
2368 : * Report that we are now cleaning up indexes and the number of indexes to
2369 : * cleanup.
2370 : */
2371 17198 : progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
2372 17198 : progress_start_val[1] = vacrel->nindexes;
2373 17198 : pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
2374 :
2375 17198 : if (!ParallelVacuumIsActive(vacrel))
2376 : {
2377 43702 : for (int idx = 0; idx < vacrel->nindexes; idx++)
2378 : {
2379 26522 : Relation indrel = vacrel->indrels[idx];
2380 26522 : IndexBulkDeleteResult *istat = vacrel->indstats[idx];
2381 :
2382 53044 : vacrel->indstats[idx] =
2383 26522 : lazy_cleanup_one_index(indrel, istat, reltuples,
2384 : estimated_count, vacrel);
2385 :
2386 : /* Report the number of indexes cleaned up */
2387 26522 : pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
2388 26522 : idx + 1);
2389 : }
2390 : }
2391 : else
2392 : {
2393 : /* Outsource everything to parallel variant */
2394 18 : parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
2395 : vacrel->num_index_scans,
2396 : estimated_count);
2397 : }
2398 :
2399 : /* Reset the progress counters */
2400 17198 : pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
2401 17198 : }
2402 :
2403 : /*
2404 : * lazy_vacuum_one_index() -- vacuum index relation.
2405 : *
2406 : * Delete all the index tuples containing a TID collected in
2407 : * vacrel->dead_items. Also update running statistics. Exact
2408 : * details depend on index AM's ambulkdelete routine.
2409 : *
2410 : * reltuples is the number of heap tuples to be passed to the
2411 : * bulkdelete callback. It's always assumed to be estimated.
2412 : * See indexam.sgml for more info.
2413 : *
2414 : * Returns bulk delete stats derived from input stats
2415 : */
2416 : static IndexBulkDeleteResult *
2417 1658 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2418 : double reltuples, LVRelState *vacrel)
2419 : {
2420 : IndexVacuumInfo ivinfo;
2421 : LVSavedErrInfo saved_err_info;
2422 :
2423 1658 : ivinfo.index = indrel;
2424 1658 : ivinfo.heaprel = vacrel->rel;
2425 1658 : ivinfo.analyze_only = false;
2426 1658 : ivinfo.report_progress = false;
2427 1658 : ivinfo.estimated_count = true;
2428 1658 : ivinfo.message_level = DEBUG2;
2429 1658 : ivinfo.num_heap_tuples = reltuples;
2430 1658 : ivinfo.strategy = vacrel->bstrategy;
2431 :
2432 : /*
2433 : * Update error traceback information.
2434 : *
2435 : * The index name is saved during this phase and restored immediately
2436 : * after this phase. See vacuum_error_callback.
2437 : */
2438 : Assert(vacrel->indname == NULL);
2439 1658 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2440 1658 : update_vacuum_error_info(vacrel, &saved_err_info,
2441 : VACUUM_ERRCB_PHASE_VACUUM_INDEX,
2442 : InvalidBlockNumber, InvalidOffsetNumber);
2443 :
2444 : /* Do bulk deletion */
2445 1658 : istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items,
2446 : vacrel->dead_items_info);
2447 :
2448 : /* Revert to the previous phase information for error traceback */
2449 1658 : restore_vacuum_error_info(vacrel, &saved_err_info);
2450 1658 : pfree(vacrel->indname);
2451 1658 : vacrel->indname = NULL;
2452 :
2453 1658 : return istat;
2454 : }
2455 :
2456 : /*
2457 : * lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2458 : *
2459 : * Calls index AM's amvacuumcleanup routine. reltuples is the number
2460 : * of heap tuples and estimated_count is true if reltuples is an
2461 : * estimated value. See indexam.sgml for more info.
2462 : *
2463 : * Returns bulk delete stats derived from input stats
2464 : */
2465 : static IndexBulkDeleteResult *
2466 26522 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
2467 : double reltuples, bool estimated_count,
2468 : LVRelState *vacrel)
2469 : {
2470 : IndexVacuumInfo ivinfo;
2471 : LVSavedErrInfo saved_err_info;
2472 :
2473 26522 : ivinfo.index = indrel;
2474 26522 : ivinfo.heaprel = vacrel->rel;
2475 26522 : ivinfo.analyze_only = false;
2476 26522 : ivinfo.report_progress = false;
2477 26522 : ivinfo.estimated_count = estimated_count;
2478 26522 : ivinfo.message_level = DEBUG2;
2479 :
2480 26522 : ivinfo.num_heap_tuples = reltuples;
2481 26522 : ivinfo.strategy = vacrel->bstrategy;
2482 :
2483 : /*
2484 : * Update error traceback information.
2485 : *
2486 : * The index name is saved during this phase and restored immediately
2487 : * after this phase. See vacuum_error_callback.
2488 : */
2489 : Assert(vacrel->indname == NULL);
2490 26522 : vacrel->indname = pstrdup(RelationGetRelationName(indrel));
2491 26522 : update_vacuum_error_info(vacrel, &saved_err_info,
2492 : VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
2493 : InvalidBlockNumber, InvalidOffsetNumber);
2494 :
2495 26522 : istat = vac_cleanup_one_index(&ivinfo, istat);
2496 :
2497 : /* Revert to the previous phase information for error traceback */
2498 26522 : restore_vacuum_error_info(vacrel, &saved_err_info);
2499 26522 : pfree(vacrel->indname);
2500 26522 : vacrel->indname = NULL;
2501 :
2502 26522 : return istat;
2503 : }
2504 :
2505 : /*
2506 : * should_attempt_truncation - should we attempt to truncate the heap?
2507 : *
2508 : * Don't even think about it unless we have a shot at releasing a goodly
2509 : * number of pages. Otherwise, the time taken isn't worth it, mainly because
2510 : * an AccessExclusive lock must be replayed on any hot standby, where it can
2511 : * be particularly disruptive.
2512 : *
2513 : * Also don't attempt it if wraparound failsafe is in effect. The entire
2514 : * system might be refusing to allocate new XIDs at this point. The system
2515 : * definitely won't return to normal unless and until VACUUM actually advances
2516 : * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
2517 : * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
2518 : * truncate the table under these circumstances, an XID exhaustion error might
2519 : * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
2520 : * There is very little chance of truncation working out when the failsafe is
2521 : * in effect in any case. lazy_scan_prune makes the optimistic assumption
2522 : * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
2523 : * we're called.
2524 : */
2525 : static bool
2526 19096 : should_attempt_truncation(LVRelState *vacrel)
2527 : {
2528 : BlockNumber possibly_freeable;
2529 :
2530 19096 : if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
2531 240 : return false;
2532 :
2533 18856 : possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
2534 18856 : if (possibly_freeable > 0 &&
2535 260 : (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
2536 260 : possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
2537 246 : return true;
2538 :
2539 18610 : return false;
2540 : }
2541 :
2542 : /*
2543 : * lazy_truncate_heap - try to truncate off any empty pages at the end
2544 : */
2545 : static void
2546 246 : lazy_truncate_heap(LVRelState *vacrel)
2547 : {
2548 246 : BlockNumber orig_rel_pages = vacrel->rel_pages;
2549 : BlockNumber new_rel_pages;
2550 : bool lock_waiter_detected;
2551 : int lock_retry;
2552 :
2553 : /* Report that we are now truncating */
2554 246 : pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
2555 : PROGRESS_VACUUM_PHASE_TRUNCATE);
2556 :
2557 : /* Update error traceback information one last time */
2558 246 : update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
2559 : vacrel->nonempty_pages, InvalidOffsetNumber);
2560 :
2561 : /*
2562 : * Loop until no more truncating can be done.
2563 : */
2564 : do
2565 : {
2566 : /*
2567 : * We need full exclusive lock on the relation in order to do
2568 : * truncation. If we can't get it, give up rather than waiting --- we
2569 : * don't want to block other backends, and we don't want to deadlock
2570 : * (which is quite possible considering we already hold a lower-grade
2571 : * lock).
2572 : */
2573 246 : lock_waiter_detected = false;
2574 246 : lock_retry = 0;
2575 : while (true)
2576 : {
2577 646 : if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
2578 242 : break;
2579 :
2580 : /*
2581 : * Check for interrupts while trying to (re-)acquire the exclusive
2582 : * lock.
2583 : */
2584 404 : CHECK_FOR_INTERRUPTS();
2585 :
2586 404 : if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
2587 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
2588 : {
2589 : /*
2590 : * We failed to establish the lock in the specified number of
2591 : * retries. This means we give up truncating.
2592 : */
2593 4 : ereport(vacrel->verbose ? INFO : DEBUG2,
2594 : (errmsg("\"%s\": stopping truncate due to conflicting lock request",
2595 : vacrel->relname)));
2596 6 : return;
2597 : }
2598 :
2599 400 : (void) WaitLatch(MyLatch,
2600 : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
2601 : VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
2602 : WAIT_EVENT_VACUUM_TRUNCATE);
2603 400 : ResetLatch(MyLatch);
2604 : }
2605 :
2606 : /*
2607 : * Now that we have exclusive lock, look to see if the rel has grown
2608 : * whilst we were vacuuming with non-exclusive lock. If so, give up;
2609 : * the newly added pages presumably contain non-deletable tuples.
2610 : */
2611 242 : new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
2612 242 : if (new_rel_pages != orig_rel_pages)
2613 : {
2614 : /*
2615 : * Note: we intentionally don't update vacrel->rel_pages with the
2616 : * new rel size here. If we did, it would amount to assuming that
2617 : * the new pages are empty, which is unlikely. Leaving the numbers
2618 : * alone amounts to assuming that the new pages have the same
2619 : * tuple density as existing ones, which is less unlikely.
2620 : */
2621 0 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2622 0 : return;
2623 : }
2624 :
2625 : /*
2626 : * Scan backwards from the end to verify that the end pages actually
2627 : * contain no tuples. This is *necessary*, not optional, because
2628 : * other backends could have added tuples to these pages whilst we
2629 : * were vacuuming.
2630 : */
2631 242 : new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
2632 242 : vacrel->blkno = new_rel_pages;
2633 :
2634 242 : if (new_rel_pages >= orig_rel_pages)
2635 : {
2636 : /* can't do anything after all */
2637 2 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2638 2 : return;
2639 : }
2640 :
2641 : /*
2642 : * Okay to truncate.
2643 : */
2644 240 : RelationTruncate(vacrel->rel, new_rel_pages);
2645 :
2646 : /*
2647 : * We can release the exclusive lock as soon as we have truncated.
2648 : * Other backends can't safely access the relation until they have
2649 : * processed the smgr invalidation that smgrtruncate sent out ... but
2650 : * that should happen as part of standard invalidation processing once
2651 : * they acquire lock on the relation.
2652 : */
2653 240 : UnlockRelation(vacrel->rel, AccessExclusiveLock);
2654 :
2655 : /*
2656 : * Update statistics. Here, it *is* correct to adjust rel_pages
2657 : * without also touching reltuples, since the tuple count wasn't
2658 : * changed by the truncation.
2659 : */
2660 240 : vacrel->removed_pages += orig_rel_pages - new_rel_pages;
2661 240 : vacrel->rel_pages = new_rel_pages;
2662 :
2663 240 : ereport(vacrel->verbose ? INFO : DEBUG2,
2664 : (errmsg("table \"%s\": truncated %u to %u pages",
2665 : vacrel->relname,
2666 : orig_rel_pages, new_rel_pages)));
2667 240 : orig_rel_pages = new_rel_pages;
2668 240 : } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
2669 : }
2670 :
2671 : /*
2672 : * Rescan end pages to verify that they are (still) empty of tuples.
2673 : *
2674 : * Returns number of nondeletable pages (last nonempty page + 1).
2675 : */
2676 : static BlockNumber
2677 242 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
2678 : {
2679 : BlockNumber blkno;
2680 : BlockNumber prefetchedUntil;
2681 : instr_time starttime;
2682 :
2683 : /* Initialize the starttime if we check for conflicting lock requests */
2684 242 : INSTR_TIME_SET_CURRENT(starttime);
2685 :
2686 : /*
2687 : * Start checking blocks at what we believe relation end to be and move
2688 : * backwards. (Strange coding of loop control is needed because blkno is
2689 : * unsigned.) To make the scan faster, we prefetch a few blocks at a time
2690 : * in forward direction, so that OS-level readahead can kick in.
2691 : */
2692 242 : blkno = vacrel->rel_pages;
2693 : StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
2694 : "prefetch size must be power of 2");
2695 242 : prefetchedUntil = InvalidBlockNumber;
2696 3924 : while (blkno > vacrel->nonempty_pages)
2697 : {
2698 : Buffer buf;
2699 : Page page;
2700 : OffsetNumber offnum,
2701 : maxoff;
2702 : bool hastup;
2703 :
2704 : /*
2705 : * Check if another process requests a lock on our relation. We are
2706 : * holding an AccessExclusiveLock here, so they will be waiting. We
2707 : * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
2708 : * only check if that interval has elapsed once every 32 blocks to
2709 : * keep the number of system calls and actual shared lock table
2710 : * lookups to a minimum.
2711 : */
2712 3692 : if ((blkno % 32) == 0)
2713 : {
2714 : instr_time currenttime;
2715 : instr_time elapsed;
2716 :
2717 122 : INSTR_TIME_SET_CURRENT(currenttime);
2718 122 : elapsed = currenttime;
2719 122 : INSTR_TIME_SUBTRACT(elapsed, starttime);
2720 122 : if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
2721 : >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
2722 : {
2723 0 : if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
2724 : {
2725 0 : ereport(vacrel->verbose ? INFO : DEBUG2,
2726 : (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
2727 : vacrel->relname)));
2728 :
2729 0 : *lock_waiter_detected = true;
2730 0 : return blkno;
2731 : }
2732 0 : starttime = currenttime;
2733 : }
2734 : }
2735 :
2736 : /*
2737 : * We don't insert a vacuum delay point here, because we have an
2738 : * exclusive lock on the table which we want to hold for as short a
2739 : * time as possible. We still need to check for interrupts however.
2740 : */
2741 3692 : CHECK_FOR_INTERRUPTS();
2742 :
2743 3692 : blkno--;
2744 :
2745 : /* If we haven't prefetched this lot yet, do so now. */
2746 3692 : if (prefetchedUntil > blkno)
2747 : {
2748 : BlockNumber prefetchStart;
2749 : BlockNumber pblkno;
2750 :
2751 332 : prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
2752 5396 : for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
2753 : {
2754 5064 : PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
2755 5064 : CHECK_FOR_INTERRUPTS();
2756 : }
2757 332 : prefetchedUntil = prefetchStart;
2758 : }
2759 :
2760 3692 : buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
2761 : vacrel->bstrategy);
2762 :
2763 : /* In this phase we only need shared access to the buffer */
2764 3692 : LockBuffer(buf, BUFFER_LOCK_SHARE);
2765 :
2766 3692 : page = BufferGetPage(buf);
2767 :
2768 3692 : if (PageIsNew(page) || PageIsEmpty(page))
2769 : {
2770 1602 : UnlockReleaseBuffer(buf);
2771 1602 : continue;
2772 : }
2773 :
2774 2090 : hastup = false;
2775 2090 : maxoff = PageGetMaxOffsetNumber(page);
2776 4170 : for (offnum = FirstOffsetNumber;
2777 : offnum <= maxoff;
2778 2080 : offnum = OffsetNumberNext(offnum))
2779 : {
2780 : ItemId itemid;
2781 :
2782 2090 : itemid = PageGetItemId(page, offnum);
2783 :
2784 : /*
2785 : * Note: any non-unused item should be taken as a reason to keep
2786 : * this page. Even an LP_DEAD item makes truncation unsafe, since
2787 : * we must not have cleaned out its index entries.
2788 : */
2789 2090 : if (ItemIdIsUsed(itemid))
2790 : {
2791 10 : hastup = true;
2792 10 : break; /* can stop scanning */
2793 : }
2794 : } /* scan along page */
2795 :
2796 2090 : UnlockReleaseBuffer(buf);
2797 :
2798 : /* Done scanning if we found a tuple here */
2799 2090 : if (hastup)
2800 10 : return blkno + 1;
2801 : }
2802 :
2803 : /*
2804 : * If we fall out of the loop, all the previously-thought-to-be-empty
2805 : * pages still are; we need not bother to look at the last known-nonempty
2806 : * page.
2807 : */
2808 232 : return vacrel->nonempty_pages;
2809 : }
2810 :
2811 : /*
2812 : * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
2813 : * shared memory). Sets both in vacrel for caller.
2814 : *
2815 : * Also handles parallel initialization as part of allocating dead_items in
2816 : * DSM when required.
2817 : */
2818 : static void
2819 19096 : dead_items_alloc(LVRelState *vacrel, int nworkers)
2820 : {
2821 : VacDeadItemsInfo *dead_items_info;
2822 38236 : int vac_work_mem = AmAutoVacuumWorkerProcess() &&
2823 44 : autovacuum_work_mem != -1 ?
2824 19140 : autovacuum_work_mem : maintenance_work_mem;
2825 :
2826 : /*
2827 : * Initialize state for a parallel vacuum. As of now, only one worker can
2828 : * be used for an index, so we invoke parallelism only if there are at
2829 : * least two indexes on a table.
2830 : */
2831 19096 : if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
2832 : {
2833 : /*
2834 : * Since parallel workers cannot access data in temporary tables, we
2835 : * can't perform parallel vacuum on them.
2836 : */
2837 7342 : if (RelationUsesLocalBuffers(vacrel->rel))
2838 : {
2839 : /*
2840 : * Give warning only if the user explicitly tries to perform a
2841 : * parallel vacuum on the temporary table.
2842 : */
2843 6 : if (nworkers > 0)
2844 6 : ereport(WARNING,
2845 : (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
2846 : vacrel->relname)));
2847 : }
2848 : else
2849 7336 : vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
2850 : vacrel->nindexes, nworkers,
2851 : vac_work_mem,
2852 7336 : vacrel->verbose ? INFO : DEBUG2,
2853 : vacrel->bstrategy);
2854 :
2855 : /*
2856 : * If parallel mode started, dead_items and dead_items_info spaces are
2857 : * allocated in DSM.
2858 : */
2859 7342 : if (ParallelVacuumIsActive(vacrel))
2860 : {
2861 18 : vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
2862 : &vacrel->dead_items_info);
2863 18 : return;
2864 : }
2865 : }
2866 :
2867 : /*
2868 : * Serial VACUUM case. Allocate both dead_items and dead_items_info
2869 : * locally.
2870 : */
2871 :
2872 19078 : dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
2873 19078 : dead_items_info->max_bytes = vac_work_mem * 1024L;
2874 19078 : dead_items_info->num_items = 0;
2875 19078 : vacrel->dead_items_info = dead_items_info;
2876 :
2877 19078 : vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
2878 : }
2879 :
2880 : /*
2881 : * Add the given block number and offset numbers to dead_items.
2882 : */
2883 : static void
2884 19834 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
2885 : int num_offsets)
2886 : {
2887 19834 : TidStore *dead_items = vacrel->dead_items;
2888 :
2889 19834 : TidStoreSetBlockOffsets(dead_items, blkno, offsets, num_offsets);
2890 19834 : vacrel->dead_items_info->num_items += num_offsets;
2891 :
2892 : /* update the memory usage report */
2893 19834 : pgstat_progress_update_param(PROGRESS_VACUUM_DEAD_TUPLE_BYTES,
2894 19834 : TidStoreMemoryUsage(dead_items));
2895 19834 : }
2896 :
2897 : /*
2898 : * Forget all collected dead items.
2899 : */
2900 : static void
2901 870 : dead_items_reset(LVRelState *vacrel)
2902 : {
2903 870 : TidStore *dead_items = vacrel->dead_items;
2904 :
2905 870 : if (ParallelVacuumIsActive(vacrel))
2906 : {
2907 6 : parallel_vacuum_reset_dead_items(vacrel->pvs);
2908 6 : return;
2909 : }
2910 :
2911 : /* Recreate the tidstore with the same max_bytes limitation */
2912 864 : TidStoreDestroy(dead_items);
2913 864 : vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
2914 :
2915 : /* Reset the counter */
2916 864 : vacrel->dead_items_info->num_items = 0;
2917 : }
2918 :
2919 : /*
2920 : * Perform cleanup for resources allocated in dead_items_alloc
2921 : */
2922 : static void
2923 19096 : dead_items_cleanup(LVRelState *vacrel)
2924 : {
2925 19096 : if (!ParallelVacuumIsActive(vacrel))
2926 : {
2927 : /* Don't bother with pfree here */
2928 19078 : return;
2929 : }
2930 :
2931 : /* End parallel mode */
2932 18 : parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
2933 18 : vacrel->pvs = NULL;
2934 : }
2935 :
2936 : /*
2937 : * Check if every tuple in the given page is visible to all current and future
2938 : * transactions. Also return the visibility_cutoff_xid which is the highest
2939 : * xmin amongst the visible tuples. Set *all_frozen to true if every tuple
2940 : * on this page is frozen.
2941 : *
2942 : * This is a stripped down version of lazy_scan_prune(). If you change
2943 : * anything here, make sure that everything stays in sync. Note that an
2944 : * assertion calls us to verify that everybody still agrees. Be sure to avoid
2945 : * introducing new side-effects here.
2946 : */
2947 : static bool
2948 19744 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
2949 : TransactionId *visibility_cutoff_xid,
2950 : bool *all_frozen)
2951 : {
2952 19744 : Page page = BufferGetPage(buf);
2953 19744 : BlockNumber blockno = BufferGetBlockNumber(buf);
2954 : OffsetNumber offnum,
2955 : maxoff;
2956 19744 : bool all_visible = true;
2957 :
2958 19744 : *visibility_cutoff_xid = InvalidTransactionId;
2959 19744 : *all_frozen = true;
2960 :
2961 19744 : maxoff = PageGetMaxOffsetNumber(page);
2962 1045022 : for (offnum = FirstOffsetNumber;
2963 1025358 : offnum <= maxoff && all_visible;
2964 1025278 : offnum = OffsetNumberNext(offnum))
2965 : {
2966 : ItemId itemid;
2967 : HeapTupleData tuple;
2968 :
2969 : /*
2970 : * Set the offset number so that we can display it along with any
2971 : * error that occurred while processing this tuple.
2972 : */
2973 1025278 : vacrel->offnum = offnum;
2974 1025278 : itemid = PageGetItemId(page, offnum);
2975 :
2976 : /* Unused or redirect line pointers are of no interest */
2977 1025278 : if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
2978 245592 : continue;
2979 :
2980 779686 : ItemPointerSet(&(tuple.t_self), blockno, offnum);
2981 :
2982 : /*
2983 : * Dead line pointers can have index pointers pointing to them. So
2984 : * they can't be treated as visible
2985 : */
2986 779686 : if (ItemIdIsDead(itemid))
2987 : {
2988 0 : all_visible = false;
2989 0 : *all_frozen = false;
2990 0 : break;
2991 : }
2992 :
2993 : Assert(ItemIdIsNormal(itemid));
2994 :
2995 779686 : tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
2996 779686 : tuple.t_len = ItemIdGetLength(itemid);
2997 779686 : tuple.t_tableOid = RelationGetRelid(vacrel->rel);
2998 :
2999 779686 : switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
3000 : buf))
3001 : {
3002 779638 : case HEAPTUPLE_LIVE:
3003 : {
3004 : TransactionId xmin;
3005 :
3006 : /* Check comments in lazy_scan_prune. */
3007 779638 : if (!HeapTupleHeaderXminCommitted(tuple.t_data))
3008 : {
3009 0 : all_visible = false;
3010 0 : *all_frozen = false;
3011 0 : break;
3012 : }
3013 :
3014 : /*
3015 : * The inserter definitely committed. But is it old enough
3016 : * that everyone sees it as committed?
3017 : */
3018 779638 : xmin = HeapTupleHeaderGetXmin(tuple.t_data);
3019 779638 : if (!TransactionIdPrecedes(xmin,
3020 : vacrel->cutoffs.OldestXmin))
3021 : {
3022 42 : all_visible = false;
3023 42 : *all_frozen = false;
3024 42 : break;
3025 : }
3026 :
3027 : /* Track newest xmin on page. */
3028 779596 : if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
3029 : TransactionIdIsNormal(xmin))
3030 17108 : *visibility_cutoff_xid = xmin;
3031 :
3032 : /* Check whether this tuple is already frozen or not */
3033 950860 : if (all_visible && *all_frozen &&
3034 171264 : heap_tuple_needs_eventual_freeze(tuple.t_data))
3035 4796 : *all_frozen = false;
3036 : }
3037 779596 : break;
3038 :
3039 48 : case HEAPTUPLE_DEAD:
3040 : case HEAPTUPLE_RECENTLY_DEAD:
3041 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3042 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3043 : {
3044 48 : all_visible = false;
3045 48 : *all_frozen = false;
3046 48 : break;
3047 : }
3048 0 : default:
3049 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
3050 : break;
3051 : }
3052 : } /* scan along page */
3053 :
3054 : /* Clear the offset information once we have processed the given page. */
3055 19744 : vacrel->offnum = InvalidOffsetNumber;
3056 :
3057 19744 : return all_visible;
3058 : }
3059 :
3060 : /*
3061 : * Update index statistics in pg_class if the statistics are accurate.
3062 : */
3063 : static void
3064 18832 : update_relstats_all_indexes(LVRelState *vacrel)
3065 : {
3066 18832 : Relation *indrels = vacrel->indrels;
3067 18832 : int nindexes = vacrel->nindexes;
3068 18832 : IndexBulkDeleteResult **indstats = vacrel->indstats;
3069 :
3070 : Assert(vacrel->do_index_cleanup);
3071 :
3072 45444 : for (int idx = 0; idx < nindexes; idx++)
3073 : {
3074 26612 : Relation indrel = indrels[idx];
3075 26612 : IndexBulkDeleteResult *istat = indstats[idx];
3076 :
3077 26612 : if (istat == NULL || istat->estimated_count)
3078 24710 : continue;
3079 :
3080 : /* Update index statistics */
3081 1902 : vac_update_relstats(indrel,
3082 : istat->num_pages,
3083 : istat->num_index_tuples,
3084 : 0,
3085 : false,
3086 : InvalidTransactionId,
3087 : InvalidMultiXactId,
3088 : NULL, NULL, false);
3089 : }
3090 18832 : }
3091 :
3092 : /*
3093 : * Error context callback for errors occurring during vacuum. The error
3094 : * context messages for index phases should match the messages set in parallel
3095 : * vacuum. If you change this function for those phases, change
3096 : * parallel_vacuum_error_callback() as well.
3097 : */
3098 : static void
3099 34 : vacuum_error_callback(void *arg)
3100 : {
3101 34 : LVRelState *errinfo = arg;
3102 :
3103 34 : switch (errinfo->phase)
3104 : {
3105 0 : case VACUUM_ERRCB_PHASE_SCAN_HEAP:
3106 0 : if (BlockNumberIsValid(errinfo->blkno))
3107 : {
3108 0 : if (OffsetNumberIsValid(errinfo->offnum))
3109 0 : errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
3110 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3111 : else
3112 0 : errcontext("while scanning block %u of relation \"%s.%s\"",
3113 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3114 : }
3115 : else
3116 0 : errcontext("while scanning relation \"%s.%s\"",
3117 : errinfo->relnamespace, errinfo->relname);
3118 0 : break;
3119 :
3120 0 : case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
3121 0 : if (BlockNumberIsValid(errinfo->blkno))
3122 : {
3123 0 : if (OffsetNumberIsValid(errinfo->offnum))
3124 0 : errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
3125 0 : errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
3126 : else
3127 0 : errcontext("while vacuuming block %u of relation \"%s.%s\"",
3128 : errinfo->blkno, errinfo->relnamespace, errinfo->relname);
3129 : }
3130 : else
3131 0 : errcontext("while vacuuming relation \"%s.%s\"",
3132 : errinfo->relnamespace, errinfo->relname);
3133 0 : break;
3134 :
3135 0 : case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
3136 0 : errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
3137 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3138 0 : break;
3139 :
3140 0 : case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
3141 0 : errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
3142 : errinfo->indname, errinfo->relnamespace, errinfo->relname);
3143 0 : break;
3144 :
3145 6 : case VACUUM_ERRCB_PHASE_TRUNCATE:
3146 6 : if (BlockNumberIsValid(errinfo->blkno))
3147 6 : errcontext("while truncating relation \"%s.%s\" to %u blocks",
3148 : errinfo->relnamespace, errinfo->relname, errinfo->blkno);
3149 6 : break;
3150 :
3151 28 : case VACUUM_ERRCB_PHASE_UNKNOWN:
3152 : default:
3153 28 : return; /* do nothing; the errinfo may not be
3154 : * initialized */
3155 : }
3156 : }
3157 :
3158 : /*
3159 : * Updates the information required for vacuum error callback. This also saves
3160 : * the current information which can be later restored via restore_vacuum_error_info.
3161 : */
3162 : static void
3163 140036 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
3164 : int phase, BlockNumber blkno, OffsetNumber offnum)
3165 : {
3166 140036 : if (saved_vacrel)
3167 : {
3168 48780 : saved_vacrel->offnum = vacrel->offnum;
3169 48780 : saved_vacrel->blkno = vacrel->blkno;
3170 48780 : saved_vacrel->phase = vacrel->phase;
3171 : }
3172 :
3173 140036 : vacrel->blkno = blkno;
3174 140036 : vacrel->offnum = offnum;
3175 140036 : vacrel->phase = phase;
3176 140036 : }
3177 :
3178 : /*
3179 : * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
3180 : */
3181 : static void
3182 48780 : restore_vacuum_error_info(LVRelState *vacrel,
3183 : const LVSavedErrInfo *saved_vacrel)
3184 : {
3185 48780 : vacrel->blkno = saved_vacrel->blkno;
3186 48780 : vacrel->offnum = saved_vacrel->offnum;
3187 48780 : vacrel->phase = saved_vacrel->phase;
3188 48780 : }
|