Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * REPACK, handled in repack.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/async.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/repack.h"
44 : #include "commands/vacuum.h"
45 : #include "miscadmin.h"
46 : #include "nodes/makefuncs.h"
47 : #include "pgstat.h"
48 : #include "postmaster/autovacuum.h"
49 : #include "postmaster/bgworker_internals.h"
50 : #include "postmaster/interrupt.h"
51 : #include "storage/bufmgr.h"
52 : #include "storage/lmgr.h"
53 : #include "storage/pmsignal.h"
54 : #include "storage/proc.h"
55 : #include "storage/procarray.h"
56 : #include "utils/acl.h"
57 : #include "utils/fmgroids.h"
58 : #include "utils/guc.h"
59 : #include "utils/guc_hooks.h"
60 : #include "utils/injection_point.h"
61 : #include "utils/memutils.h"
62 : #include "utils/snapmgr.h"
63 : #include "utils/syscache.h"
64 : #include "utils/wait_event.h"
65 :
66 : /*
67 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
68 : * This aims to avoid sending too many messages and waking up the leader too
69 : * frequently.
70 : */
71 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
72 :
73 : /*
74 : * GUC parameters
75 : */
76 : int vacuum_freeze_min_age;
77 : int vacuum_freeze_table_age;
78 : int vacuum_multixact_freeze_min_age;
79 : int vacuum_multixact_freeze_table_age;
80 : int vacuum_failsafe_age;
81 : int vacuum_multixact_failsafe_age;
82 : double vacuum_max_eager_freeze_failure_rate;
83 : bool track_cost_delay_timing;
84 : bool vacuum_truncate;
85 :
86 : /*
87 : * Variables for cost-based vacuum delay. The defaults differ between
88 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
89 : * vacuum code. They are initialized here to the defaults for client backends
90 : * executing VACUUM or ANALYZE.
91 : */
92 : double vacuum_cost_delay = 0;
93 : int vacuum_cost_limit = 200;
94 :
95 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
96 : int64 parallel_vacuum_worker_delay_ns = 0;
97 :
98 : /*
99 : * VacuumFailsafeActive is a defined as a global so that we can determine
100 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
101 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
102 : * for the table until after vacuuming has completed, regardless of other
103 : * settings.
104 : *
105 : * Only VACUUM code should inspect this variable and only table access methods
106 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
107 : * inspected to determine whether or not to allow cost-based delays. Table AMs
108 : * are free to set it if they desire this behavior, but it is false by default
109 : * and reset to false in between vacuuming each relation.
110 : */
111 : bool VacuumFailsafeActive = false;
112 :
113 : /*
114 : * Variables for cost-based parallel vacuum. See comments atop
115 : * compute_parallel_delay to understand how it works.
116 : */
117 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
118 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
119 : int VacuumCostBalanceLocal = 0;
120 :
121 : /* non-export function prototypes */
122 : static List *expand_vacuum_rel(VacuumRelation *vrel,
123 : MemoryContext vac_context, int options);
124 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
125 : static void vac_truncate_clog(TransactionId frozenXID,
126 : MultiXactId minMulti,
127 : TransactionId lastSaneFrozenXid,
128 : MultiXactId lastSaneMinMulti);
129 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
130 : BufferAccessStrategy bstrategy, bool isTopLevel);
131 : static double compute_parallel_delay(void);
132 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
133 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
134 :
135 : /*
136 : * GUC check function to ensure GUC value specified is within the allowable
137 : * range.
138 : */
139 : bool
140 1292 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
141 : GucSource source)
142 : {
143 : /* Value upper and lower hard limits are inclusive */
144 1292 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
145 1292 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
146 1292 : return true;
147 :
148 : /* Value does not fall within any allowable range */
149 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
150 : "vacuum_buffer_usage_limit",
151 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
152 :
153 0 : return false;
154 : }
155 :
156 : /*
157 : * Primary entry point for manual VACUUM and ANALYZE commands
158 : *
159 : * This is mainly a preparation wrapper for the real operations that will
160 : * happen in vacuum().
161 : */
162 : void
163 8732 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
164 : {
165 : VacuumParams params;
166 8732 : BufferAccessStrategy bstrategy = NULL;
167 8732 : bool verbose = false;
168 8732 : bool skip_locked = false;
169 8732 : bool analyze = false;
170 8732 : bool freeze = false;
171 8732 : bool full = false;
172 8732 : bool disable_page_skipping = false;
173 8732 : bool process_main = true;
174 8732 : bool process_toast = true;
175 : int ring_size;
176 8732 : bool skip_database_stats = false;
177 8732 : bool only_database_stats = false;
178 : MemoryContext vac_context;
179 : ListCell *lc;
180 :
181 : /* index_cleanup and truncate values unspecified for now */
182 8732 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
183 8732 : params.truncate = VACOPTVALUE_UNSPECIFIED;
184 :
185 : /* By default parallel vacuum is enabled */
186 8732 : params.nworkers = 0;
187 :
188 : /* Will be set later if we recurse to a TOAST table. */
189 8732 : params.toast_parent = InvalidOid;
190 :
191 : /*
192 : * Set this to an invalid value so it is clear whether or not a
193 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
194 : */
195 8732 : ring_size = -1;
196 :
197 : /* Parse options list */
198 18308 : foreach(lc, vacstmt->options)
199 : {
200 9600 : DefElem *opt = (DefElem *) lfirst(lc);
201 :
202 : /* Parse common options for VACUUM and ANALYZE */
203 9600 : if (strcmp(opt->defname, "verbose") == 0)
204 32 : verbose = defGetBoolean(opt);
205 9568 : else if (strcmp(opt->defname, "skip_locked") == 0)
206 182 : skip_locked = defGetBoolean(opt);
207 9386 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
208 : {
209 : const char *hintmsg;
210 : int result;
211 : char *vac_buffer_size;
212 :
213 36 : vac_buffer_size = defGetString(opt);
214 :
215 : /*
216 : * Check that the specified value is valid and the size falls
217 : * within the hard upper and lower limits if it is not 0.
218 : */
219 36 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
220 32 : (result != 0 &&
221 24 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
222 : {
223 12 : ereport(ERROR,
224 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
225 : errmsg("%s option must be 0 or between %d kB and %d kB",
226 : "BUFFER_USAGE_LIMIT",
227 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
228 : hintmsg ? errhint_internal("%s", _(hintmsg)) : 0));
229 : }
230 :
231 24 : ring_size = result;
232 : }
233 9350 : else if (!vacstmt->is_vacuumcmd)
234 4 : ereport(ERROR,
235 : (errcode(ERRCODE_SYNTAX_ERROR),
236 : errmsg("unrecognized %s option \"%s\"",
237 : "ANALYZE", opt->defname),
238 : parser_errposition(pstate, opt->location)));
239 :
240 : /* Parse options available on VACUUM */
241 9346 : else if (strcmp(opt->defname, "analyze") == 0)
242 1840 : analyze = defGetBoolean(opt);
243 7506 : else if (strcmp(opt->defname, "freeze") == 0)
244 1891 : freeze = defGetBoolean(opt);
245 5615 : else if (strcmp(opt->defname, "full") == 0)
246 234 : full = defGetBoolean(opt);
247 5381 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
248 120 : disable_page_skipping = defGetBoolean(opt);
249 5261 : else if (strcmp(opt->defname, "index_cleanup") == 0)
250 : {
251 : /* Interpret no string as the default, which is 'auto' */
252 98 : if (!opt->arg)
253 0 : params.index_cleanup = VACOPTVALUE_AUTO;
254 : else
255 : {
256 98 : char *sval = defGetString(opt);
257 :
258 : /* Try matching on 'auto' string, or fall back on boolean */
259 98 : if (pg_strcasecmp(sval, "auto") == 0)
260 4 : params.index_cleanup = VACOPTVALUE_AUTO;
261 : else
262 94 : params.index_cleanup = get_vacoptval_from_boolean(opt);
263 : }
264 : }
265 5163 : else if (strcmp(opt->defname, "process_main") == 0)
266 85 : process_main = defGetBoolean(opt);
267 5078 : else if (strcmp(opt->defname, "process_toast") == 0)
268 89 : process_toast = defGetBoolean(opt);
269 4989 : else if (strcmp(opt->defname, "truncate") == 0)
270 88 : params.truncate = get_vacoptval_from_boolean(opt);
271 4901 : else if (strcmp(opt->defname, "parallel") == 0)
272 : {
273 199 : int nworkers = defGetInt32(opt);
274 :
275 195 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
276 4 : ereport(ERROR,
277 : (errcode(ERRCODE_SYNTAX_ERROR),
278 : errmsg("%s option must be between 0 and %d",
279 : "PARALLEL",
280 : MAX_PARALLEL_WORKER_LIMIT),
281 : parser_errposition(pstate, opt->location)));
282 :
283 : /*
284 : * Disable parallel vacuum, if user has specified parallel degree
285 : * as zero.
286 : */
287 191 : if (nworkers == 0)
288 86 : params.nworkers = -1;
289 : else
290 105 : params.nworkers = nworkers;
291 : }
292 4702 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
293 4623 : skip_database_stats = defGetBoolean(opt);
294 79 : else if (strcmp(opt->defname, "only_database_stats") == 0)
295 79 : only_database_stats = defGetBoolean(opt);
296 : else
297 0 : ereport(ERROR,
298 : (errcode(ERRCODE_SYNTAX_ERROR),
299 : errmsg("unrecognized %s option \"%s\"",
300 : "VACUUM", opt->defname),
301 : parser_errposition(pstate, opt->location)));
302 : }
303 :
304 : /* Set vacuum options */
305 8708 : params.options =
306 8708 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
307 8708 : (verbose ? VACOPT_VERBOSE : 0) |
308 8708 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
309 8708 : (analyze ? VACOPT_ANALYZE : 0) |
310 8708 : (freeze ? VACOPT_FREEZE : 0) |
311 8708 : (full ? VACOPT_FULL : 0) |
312 8708 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
313 8708 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
314 8708 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
315 8708 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
316 8708 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
317 :
318 : /* sanity checks on options */
319 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
320 : Assert((params.options & VACOPT_VACUUM) ||
321 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
322 :
323 8708 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
324 4 : ereport(ERROR,
325 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
326 : errmsg("VACUUM FULL cannot be performed in parallel")));
327 :
328 : /*
329 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
330 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
331 : * we'll permit that.
332 : */
333 8704 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
334 4 : !(params.options & VACOPT_ANALYZE))
335 4 : ereport(ERROR,
336 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
337 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
338 :
339 : /*
340 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
341 : */
342 8700 : if (!(params.options & VACOPT_ANALYZE))
343 : {
344 7805 : foreach(lc, vacstmt->rels)
345 : {
346 3847 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
347 :
348 3847 : if (vrel->va_cols != NIL)
349 4 : ereport(ERROR,
350 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
351 : errmsg("ANALYZE option must be specified when a column list is provided")));
352 : }
353 : }
354 :
355 : /*
356 : * Sanity check DISABLE_PAGE_SKIPPING option.
357 : */
358 8696 : if ((params.options & VACOPT_FULL) != 0 &&
359 218 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
360 0 : ereport(ERROR,
361 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
362 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
363 :
364 : /* sanity check for PROCESS_TOAST */
365 8696 : if ((params.options & VACOPT_FULL) != 0 &&
366 218 : (params.options & VACOPT_PROCESS_TOAST) == 0)
367 4 : ereport(ERROR,
368 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
369 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
370 :
371 : /* sanity check for ONLY_DATABASE_STATS */
372 8692 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
373 : {
374 : Assert(params.options & VACOPT_VACUUM);
375 79 : if (vacstmt->rels != NIL)
376 4 : ereport(ERROR,
377 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
378 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
379 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
380 75 : if (params.options & ~(VACOPT_VACUUM |
381 : VACOPT_VERBOSE |
382 : VACOPT_PROCESS_MAIN |
383 : VACOPT_PROCESS_TOAST |
384 : VACOPT_ONLY_DATABASE_STATS))
385 0 : ereport(ERROR,
386 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
387 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
388 : }
389 :
390 : /*
391 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
392 : * them as -1 which means to use the default values.
393 : */
394 8688 : if (params.options & VACOPT_FREEZE)
395 : {
396 1891 : params.freeze_min_age = 0;
397 1891 : params.freeze_table_age = 0;
398 1891 : params.multixact_freeze_min_age = 0;
399 1891 : params.multixact_freeze_table_age = 0;
400 : }
401 : else
402 : {
403 6797 : params.freeze_min_age = -1;
404 6797 : params.freeze_table_age = -1;
405 6797 : params.multixact_freeze_min_age = -1;
406 6797 : params.multixact_freeze_table_age = -1;
407 : }
408 :
409 : /* user-invoked vacuum is never "for wraparound" */
410 8688 : params.is_wraparound = false;
411 :
412 : /*
413 : * user-invoked vacuum uses VACOPT_VERBOSE instead of
414 : * log_vacuum_min_duration and log_analyze_min_duration
415 : */
416 8688 : params.log_vacuum_min_duration = -1;
417 8688 : params.log_analyze_min_duration = -1;
418 :
419 : /*
420 : * Later, in vacuum_rel(), we check if a reloption override was specified.
421 : */
422 8688 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
423 :
424 : /*
425 : * Create special memory context for cross-transaction storage.
426 : *
427 : * Since it is a child of PortalContext, it will go away eventually even
428 : * if we suffer an error; there's no need for special abort cleanup logic.
429 : */
430 8688 : vac_context = AllocSetContextCreate(PortalContext,
431 : "Vacuum",
432 : ALLOCSET_DEFAULT_SIZES);
433 :
434 : /*
435 : * Make a buffer strategy object in the cross-transaction memory context.
436 : * We needn't bother making this for VACUUM (FULL) or VACUUM
437 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
438 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
439 : * when we see ANALYZE.
440 : */
441 8688 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
442 289 : VACOPT_FULL)) == 0 ||
443 289 : (params.options & VACOPT_ANALYZE) != 0)
444 : {
445 :
446 8403 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
447 :
448 : Assert(ring_size >= -1);
449 :
450 : /*
451 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
452 : * command, it overrides the value of VacuumBufferUsageLimit. Either
453 : * value may be 0, in which case GetAccessStrategyWithSize() will
454 : * return NULL, effectively allowing full use of shared buffers.
455 : */
456 8403 : if (ring_size == -1)
457 8383 : ring_size = VacuumBufferUsageLimit;
458 :
459 8403 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
460 :
461 8403 : MemoryContextSwitchTo(old_context);
462 : }
463 :
464 : /* Now go through the common routine */
465 8688 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
466 :
467 : /* Finally, clean up the vacuum memory context */
468 8599 : MemoryContextDelete(vac_context);
469 8599 : }
470 :
471 : /*
472 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
473 : *
474 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
475 : * we process all relevant tables in the database. For each VacuumRelation,
476 : * if a valid OID is supplied, the table with that OID is what to process;
477 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
478 : *
479 : * params contains a set of parameters that can be used to customize the
480 : * behavior.
481 : *
482 : * bstrategy may be passed in as NULL when the caller does not want to
483 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
484 : * otherwise, the caller must build a BufferAccessStrategy with the number of
485 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
486 : * using.
487 : *
488 : * isTopLevel should be passed down from ProcessUtility.
489 : *
490 : * It is the caller's responsibility that all parameters are allocated in a
491 : * memory context that will not disappear at transaction commit.
492 : */
493 : void
494 119714 : vacuum(List *relations, const VacuumParams *params, BufferAccessStrategy bstrategy,
495 : MemoryContext vac_context, bool isTopLevel)
496 : {
497 : static bool in_vacuum = false;
498 :
499 : const char *stmttype;
500 : volatile bool in_outer_xact,
501 : use_own_xacts;
502 :
503 119714 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
504 :
505 : /*
506 : * We cannot run VACUUM inside a user transaction block; if we were inside
507 : * a transaction, then our commit- and start-transaction-command calls
508 : * would not have the intended effect! There are numerous other subtle
509 : * dependencies on this, too.
510 : *
511 : * ANALYZE (without VACUUM) can run either way.
512 : */
513 119714 : if (params->options & VACOPT_VACUUM)
514 : {
515 116637 : PreventInTransactionBlock(isTopLevel, stmttype);
516 116624 : in_outer_xact = false;
517 : }
518 : else
519 3077 : in_outer_xact = IsInTransactionBlock(isTopLevel);
520 :
521 : /*
522 : * Check for and disallow recursive calls. This could happen when VACUUM
523 : * FULL or ANALYZE calls a hostile index expression that itself calls
524 : * ANALYZE.
525 : */
526 119701 : if (in_vacuum)
527 8 : ereport(ERROR,
528 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
529 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
530 : stmttype)));
531 :
532 : /*
533 : * Build list of relation(s) to process, putting any new data in
534 : * vac_context for safekeeping.
535 : */
536 119693 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
537 : {
538 : /* We don't process any tables in this case */
539 : Assert(relations == NIL);
540 : }
541 119618 : else if (relations != NIL)
542 : {
543 119493 : List *newrels = NIL;
544 : ListCell *lc;
545 :
546 239067 : foreach(lc, relations)
547 : {
548 119598 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
549 : List *sublist;
550 : MemoryContext old_context;
551 :
552 119598 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
553 119574 : old_context = MemoryContextSwitchTo(vac_context);
554 119574 : newrels = list_concat(newrels, sublist);
555 119574 : MemoryContextSwitchTo(old_context);
556 : }
557 119469 : relations = newrels;
558 : }
559 : else
560 125 : relations = get_all_vacuum_rels(vac_context, params->options);
561 :
562 : /*
563 : * Decide whether we need to start/commit our own transactions.
564 : *
565 : * For VACUUM (with or without ANALYZE): always do so, so that we can
566 : * release locks as soon as possible. (We could possibly use the outer
567 : * transaction for a one-table VACUUM, but handling TOAST tables would be
568 : * problematic.)
569 : *
570 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
571 : * start/commit our own transactions. Also, there's no need to do so if
572 : * only processing one relation. For multiple relations when not within a
573 : * transaction block, and also in an autovacuum worker, use own
574 : * transactions so we can release locks sooner.
575 : */
576 119669 : if (params->options & VACOPT_VACUUM)
577 116616 : use_own_xacts = true;
578 : else
579 : {
580 : Assert(params->options & VACOPT_ANALYZE);
581 3053 : if (AmAutoVacuumWorkerProcess())
582 179 : use_own_xacts = true;
583 2874 : else if (in_outer_xact)
584 164 : use_own_xacts = false;
585 2710 : else if (list_length(relations) > 1)
586 524 : use_own_xacts = true;
587 : else
588 2186 : use_own_xacts = false;
589 : }
590 :
591 : /*
592 : * vacuum_rel expects to be entered with no transaction active; it will
593 : * start and commit its own transaction. But we are called by an SQL
594 : * command, and so we are executing inside a transaction already. We
595 : * commit the transaction started in PostgresMain() here, and start
596 : * another one before exiting to match the commit waiting for us back in
597 : * PostgresMain().
598 : */
599 119669 : if (use_own_xacts)
600 : {
601 : Assert(!in_outer_xact);
602 :
603 : /* ActiveSnapshot is not set by autovacuum */
604 117319 : if (ActiveSnapshotSet())
605 6293 : PopActiveSnapshot();
606 :
607 : /* matches the StartTransaction in PostgresMain() */
608 117319 : CommitTransactionCommand();
609 : }
610 :
611 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
612 119669 : PG_TRY();
613 : {
614 : ListCell *cur;
615 :
616 119669 : in_vacuum = true;
617 119669 : VacuumFailsafeActive = false;
618 119669 : VacuumUpdateCosts();
619 119669 : VacuumCostBalance = 0;
620 119669 : VacuumCostBalanceLocal = 0;
621 119669 : VacuumSharedCostBalance = NULL;
622 119669 : VacuumActiveNWorkers = NULL;
623 :
624 : /*
625 : * Loop to process each selected relation.
626 : */
627 250044 : foreach(cur, relations)
628 : {
629 130420 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
630 :
631 130420 : if (params->options & VACOPT_VACUUM)
632 : {
633 122149 : if (!vacuum_rel(vrel->oid, vrel->relation, *params, bstrategy,
634 : isTopLevel))
635 62 : continue;
636 : }
637 :
638 130354 : if (params->options & VACOPT_ANALYZE)
639 : {
640 : /*
641 : * If using separate xacts, start one for analyze. Otherwise,
642 : * we can use the outer transaction.
643 : */
644 10253 : if (use_own_xacts)
645 : {
646 7919 : StartTransactionCommand();
647 : /* functions in indexes may want a snapshot set */
648 7919 : PushActiveSnapshot(GetTransactionSnapshot());
649 : }
650 :
651 10253 : analyze_rel(vrel->oid, vrel->relation, params,
652 : vrel->va_cols, in_outer_xact, bstrategy);
653 :
654 10212 : if (use_own_xacts)
655 : {
656 7893 : PopActiveSnapshot();
657 : /* standard_ProcessUtility() does CCI if !use_own_xacts */
658 7893 : CommandCounterIncrement();
659 7893 : CommitTransactionCommand();
660 : }
661 : else
662 : {
663 : /*
664 : * If we're not using separate xacts, better separate the
665 : * ANALYZE actions with CCIs. This avoids trouble if user
666 : * says "ANALYZE t, t".
667 : */
668 2319 : CommandCounterIncrement();
669 : }
670 : }
671 :
672 : /*
673 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
674 : * next relation.
675 : */
676 130313 : VacuumFailsafeActive = false;
677 : }
678 : }
679 44 : PG_FINALLY();
680 : {
681 119668 : in_vacuum = false;
682 119668 : VacuumCostActive = false;
683 119668 : VacuumFailsafeActive = false;
684 119668 : VacuumCostBalance = 0;
685 : }
686 119668 : PG_END_TRY();
687 :
688 : /*
689 : * Finish up processing.
690 : */
691 119624 : if (use_own_xacts)
692 : {
693 : /* here, we are not in a transaction */
694 :
695 : /*
696 : * This matches the CommitTransaction waiting for us in
697 : * PostgresMain().
698 : */
699 117289 : StartTransactionCommand();
700 : }
701 :
702 119624 : if ((params->options & VACOPT_VACUUM) &&
703 116595 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
704 : {
705 : /*
706 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
707 : */
708 1126 : vac_update_datfrozenxid();
709 : }
710 :
711 119624 : }
712 :
713 : /*
714 : * Check if the current user has privileges to vacuum or analyze the relation.
715 : * If not, issue a WARNING log message and return false to let the caller
716 : * decide what to do with this relation. This routine is used to decide if a
717 : * relation can be processed for VACUUM or ANALYZE.
718 : */
719 : bool
720 158275 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
721 : uint32 options)
722 : {
723 : char *relname;
724 :
725 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
726 :
727 : /*----------
728 : * A role has privileges to vacuum or analyze the relation if any of the
729 : * following are true:
730 : * - the role owns the current database and the relation is not shared
731 : * - the role has the MAINTAIN privilege on the relation
732 : *----------
733 : */
734 158275 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
735 181239 : !reltuple->relisshared) ||
736 25448 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
737 156103 : return true;
738 :
739 2172 : relname = NameStr(reltuple->relname);
740 :
741 2172 : if ((options & VACOPT_VACUUM) != 0)
742 : {
743 148 : ereport(WARNING,
744 : (errmsg("permission denied to vacuum \"%s\", skipping it",
745 : relname)));
746 :
747 : /*
748 : * For VACUUM ANALYZE, both logs could show up, but just generate
749 : * information for VACUUM as that would be the first one to be
750 : * processed.
751 : */
752 148 : return false;
753 : }
754 :
755 2024 : if ((options & VACOPT_ANALYZE) != 0)
756 2024 : ereport(WARNING,
757 : (errmsg("permission denied to analyze \"%s\", skipping it",
758 : relname)));
759 :
760 2024 : return false;
761 : }
762 :
763 :
764 : /*
765 : * vacuum_open_relation
766 : *
767 : * This routine is used for attempting to open and lock a relation which
768 : * is going to be vacuumed or analyzed. If the relation cannot be opened
769 : * or locked, a log is emitted if possible.
770 : */
771 : Relation
772 138218 : vacuum_open_relation(Oid relid, RangeVar *relation, uint32 options,
773 : bool verbose, LOCKMODE lmode)
774 : {
775 : Relation rel;
776 138218 : bool rel_lock = true;
777 : int elevel;
778 :
779 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
780 :
781 : /*
782 : * Open the relation and get the appropriate lock on it.
783 : *
784 : * There's a race condition here: the relation may have gone away since
785 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
786 : *
787 : * If we've been asked not to wait for the relation lock, acquire it first
788 : * in non-blocking mode, before calling try_relation_open().
789 : */
790 138218 : if (!(options & VACOPT_SKIP_LOCKED))
791 137556 : rel = try_relation_open(relid, lmode);
792 662 : else if (ConditionalLockRelationOid(relid, lmode))
793 652 : rel = try_relation_open(relid, NoLock);
794 : else
795 : {
796 10 : rel = NULL;
797 10 : rel_lock = false;
798 : }
799 :
800 : /* if relation is opened, leave */
801 138218 : if (rel)
802 138202 : return rel;
803 :
804 : /*
805 : * Relation could not be opened, hence generate if possible a log
806 : * informing on the situation.
807 : *
808 : * If the RangeVar is not defined, we do not have enough information to
809 : * provide a meaningful log statement. Chances are that the caller has
810 : * intentionally not provided this information so that this logging is
811 : * skipped, anyway.
812 : */
813 16 : if (relation == NULL)
814 9 : return NULL;
815 :
816 : /*
817 : * Determine the log level.
818 : *
819 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
820 : * statements in the permission checks; otherwise, only log if the caller
821 : * so requested.
822 : */
823 7 : if (!AmAutoVacuumWorkerProcess())
824 7 : elevel = WARNING;
825 0 : else if (verbose)
826 0 : elevel = LOG;
827 : else
828 0 : return NULL;
829 :
830 7 : if ((options & VACOPT_VACUUM) != 0)
831 : {
832 5 : if (!rel_lock)
833 3 : ereport(elevel,
834 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
835 : errmsg("skipping vacuum of \"%s\" --- lock not available",
836 : relation->relname)));
837 : else
838 2 : ereport(elevel,
839 : (errcode(ERRCODE_UNDEFINED_TABLE),
840 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
841 : relation->relname)));
842 :
843 : /*
844 : * For VACUUM ANALYZE, both logs could show up, but just generate
845 : * information for VACUUM as that would be the first one to be
846 : * processed.
847 : */
848 5 : return NULL;
849 : }
850 :
851 2 : if ((options & VACOPT_ANALYZE) != 0)
852 : {
853 2 : if (!rel_lock)
854 1 : ereport(elevel,
855 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
856 : errmsg("skipping analyze of \"%s\" --- lock not available",
857 : relation->relname)));
858 : else
859 1 : ereport(elevel,
860 : (errcode(ERRCODE_UNDEFINED_TABLE),
861 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
862 : relation->relname)));
863 : }
864 :
865 2 : return NULL;
866 : }
867 :
868 :
869 : /*
870 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
871 : * and optionally add VacuumRelations for partitions or inheritance children.
872 : *
873 : * If a VacuumRelation does not have an OID supplied and is a partitioned
874 : * table, an extra entry will be added to the output for each partition.
875 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
876 : * it does not want us to expand partitioned tables.
877 : *
878 : * We take care not to modify the input data structure, but instead build
879 : * new VacuumRelation(s) to return. (But note that they will reference
880 : * unmodified parts of the input, eg column lists.) New data structures
881 : * are made in vac_context.
882 : */
883 : static List *
884 119598 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
885 : int options)
886 : {
887 119598 : List *vacrels = NIL;
888 : MemoryContext oldcontext;
889 :
890 : /* If caller supplied OID, there's nothing we need do here. */
891 119598 : if (OidIsValid(vrel->oid))
892 : {
893 111026 : oldcontext = MemoryContextSwitchTo(vac_context);
894 111026 : vacrels = lappend(vacrels, vrel);
895 111026 : MemoryContextSwitchTo(oldcontext);
896 : }
897 : else
898 : {
899 : /*
900 : * Process a specific relation, and possibly partitions or child
901 : * tables thereof.
902 : */
903 : Oid relid;
904 : HeapTuple tuple;
905 : Form_pg_class classForm;
906 : bool include_children;
907 : bool is_partitioned_table;
908 : int rvr_opts;
909 :
910 : /*
911 : * Since autovacuum workers supply OIDs when calling vacuum(), no
912 : * autovacuum worker should reach this code.
913 : */
914 : Assert(!AmAutoVacuumWorkerProcess());
915 :
916 : /*
917 : * We transiently take AccessShareLock to protect the syscache lookup
918 : * below, as well as find_all_inheritors's expectation that the caller
919 : * holds some lock on the starting relation.
920 : */
921 8572 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
922 8572 : relid = RangeVarGetRelidExtended(vrel->relation,
923 : AccessShareLock,
924 : rvr_opts,
925 : NULL, NULL);
926 :
927 : /*
928 : * If the lock is unavailable, emit the same log statement that
929 : * vacuum_rel() and analyze_rel() would.
930 : */
931 8548 : if (!OidIsValid(relid))
932 : {
933 4 : if (options & VACOPT_VACUUM)
934 3 : ereport(WARNING,
935 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
936 : errmsg("skipping vacuum of \"%s\" --- lock not available",
937 : vrel->relation->relname)));
938 : else
939 1 : ereport(WARNING,
940 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
941 : errmsg("skipping analyze of \"%s\" --- lock not available",
942 : vrel->relation->relname)));
943 4 : return vacrels;
944 : }
945 :
946 : /*
947 : * To check whether the relation is a partitioned table and its
948 : * ownership, fetch its syscache entry.
949 : */
950 8544 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
951 8544 : if (!HeapTupleIsValid(tuple))
952 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
953 8544 : classForm = (Form_pg_class) GETSTRUCT(tuple);
954 :
955 : /*
956 : * Make a returnable VacuumRelation for this rel if the user has the
957 : * required privileges.
958 : */
959 8544 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
960 : {
961 8392 : oldcontext = MemoryContextSwitchTo(vac_context);
962 8392 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
963 : relid,
964 : vrel->va_cols));
965 8392 : MemoryContextSwitchTo(oldcontext);
966 : }
967 :
968 : /*
969 : * Vacuuming a partitioned table with ONLY will not do anything since
970 : * the partitioned table itself is empty. Issue a warning if the user
971 : * requests this.
972 : */
973 8544 : include_children = vrel->relation->inh;
974 8544 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
975 8544 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
976 4 : ereport(WARNING,
977 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
978 : vrel->relation->relname)));
979 :
980 8544 : ReleaseSysCache(tuple);
981 :
982 : /*
983 : * Unless the user has specified ONLY, make relation list entries for
984 : * its partitions or inheritance child tables. Note that the list
985 : * returned by find_all_inheritors() includes the passed-in OID, so we
986 : * have to skip that. There's no point in taking locks on the
987 : * individual partitions or child tables yet, and doing so would just
988 : * add unnecessary deadlock risk. For this last reason, we do not yet
989 : * check the ownership of the partitions/tables, which get added to
990 : * the list to process. Ownership will be checked later on anyway.
991 : */
992 8544 : if (include_children)
993 : {
994 8524 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
995 : ListCell *part_lc;
996 :
997 18501 : foreach(part_lc, part_oids)
998 : {
999 9977 : Oid part_oid = lfirst_oid(part_lc);
1000 :
1001 9977 : if (part_oid == relid)
1002 8524 : continue; /* ignore original table */
1003 :
1004 : /*
1005 : * We omit a RangeVar since it wouldn't be appropriate to
1006 : * complain about failure to open one of these relations
1007 : * later.
1008 : */
1009 1453 : oldcontext = MemoryContextSwitchTo(vac_context);
1010 1453 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1011 : part_oid,
1012 : vrel->va_cols));
1013 1453 : MemoryContextSwitchTo(oldcontext);
1014 : }
1015 : }
1016 :
1017 : /*
1018 : * Release lock again. This means that by the time we actually try to
1019 : * process the table, it might be gone or renamed. In the former case
1020 : * we'll silently ignore it; in the latter case we'll process it
1021 : * anyway, but we must beware that the RangeVar doesn't necessarily
1022 : * identify it anymore. This isn't ideal, perhaps, but there's little
1023 : * practical alternative, since we're typically going to commit this
1024 : * transaction and begin a new one between now and then. Moreover,
1025 : * holding locks on multiple relations would create significant risk
1026 : * of deadlock.
1027 : */
1028 8544 : UnlockRelationOid(relid, AccessShareLock);
1029 : }
1030 :
1031 119570 : return vacrels;
1032 : }
1033 :
1034 : /*
1035 : * Construct a list of VacuumRelations for all vacuumable rels in
1036 : * the current database. The list is built in vac_context.
1037 : */
1038 : static List *
1039 125 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1040 : {
1041 125 : List *vacrels = NIL;
1042 : Relation pgclass;
1043 : TableScanDesc scan;
1044 : HeapTuple tuple;
1045 :
1046 125 : pgclass = table_open(RelationRelationId, AccessShareLock);
1047 :
1048 125 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1049 :
1050 62238 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1051 : {
1052 62113 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1053 : MemoryContext oldcontext;
1054 62113 : Oid relid = classForm->oid;
1055 :
1056 : /*
1057 : * We include partitioned tables here; depending on which operation is
1058 : * to be performed, caller will decide whether to process or ignore
1059 : * them.
1060 : */
1061 62113 : if (classForm->relkind != RELKIND_RELATION &&
1062 50728 : classForm->relkind != RELKIND_MATVIEW &&
1063 50696 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1064 50583 : continue;
1065 :
1066 : /* Skip temp relations belonging to other sessions */
1067 11530 : if (classForm->relpersistence == RELPERSISTENCE_TEMP &&
1068 9 : !isTempOrTempToastNamespace(classForm->relnamespace))
1069 1 : continue;
1070 :
1071 : /* check permissions of relation */
1072 11529 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1073 1948 : continue;
1074 :
1075 : /*
1076 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1077 : * We omit a RangeVar since it wouldn't be appropriate to complain
1078 : * about failure to open one of these relations later.
1079 : */
1080 9581 : oldcontext = MemoryContextSwitchTo(vac_context);
1081 9581 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1082 : relid,
1083 : NIL));
1084 9581 : MemoryContextSwitchTo(oldcontext);
1085 : }
1086 :
1087 125 : table_endscan(scan);
1088 125 : table_close(pgclass, AccessShareLock);
1089 :
1090 125 : return vacrels;
1091 : }
1092 :
1093 : /*
1094 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1095 : *
1096 : * The target relation and VACUUM parameters are our inputs.
1097 : *
1098 : * Output parameters are the cutoffs that VACUUM caller should use.
1099 : *
1100 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1101 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1102 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1103 : * minimum).
1104 : */
1105 : bool
1106 127868 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1107 : struct VacuumCutoffs *cutoffs)
1108 : {
1109 : int freeze_min_age,
1110 : multixact_freeze_min_age,
1111 : freeze_table_age,
1112 : multixact_freeze_table_age,
1113 : effective_multixact_freeze_max_age;
1114 : TransactionId nextXID,
1115 : safeOldestXmin,
1116 : aggressiveXIDCutoff;
1117 : MultiXactId nextMXID,
1118 : safeOldestMxact,
1119 : aggressiveMXIDCutoff;
1120 :
1121 : /* Use mutable copies of freeze age parameters */
1122 127868 : freeze_min_age = params->freeze_min_age;
1123 127868 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1124 127868 : freeze_table_age = params->freeze_table_age;
1125 127868 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1126 :
1127 : /* Set pg_class fields in cutoffs */
1128 127868 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1129 127868 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1130 :
1131 : /*
1132 : * Acquire OldestXmin.
1133 : *
1134 : * We can always ignore processes running lazy vacuum. This is because we
1135 : * use these values only for deciding which tuples we must keep in the
1136 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1137 : * XID assigned), it's safe to ignore it. In theory it could be
1138 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1139 : * that only one vacuum process can be working on a particular table at
1140 : * any time, and that each vacuum is always an independent transaction.
1141 : */
1142 127868 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1143 :
1144 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1145 :
1146 : /* Acquire OldestMxact */
1147 127868 : cutoffs->OldestMxact = GetOldestMultiXactId();
1148 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1149 :
1150 : /* Acquire next XID/next MXID values used to apply age-based settings */
1151 127868 : nextXID = ReadNextTransactionId();
1152 127868 : nextMXID = ReadNextMultiXactId();
1153 :
1154 : /*
1155 : * Also compute the multixact age for which freezing is urgent. This is
1156 : * normally autovacuum_multixact_freeze_max_age, but may be less if
1157 : * multixact members are bloated.
1158 : */
1159 127868 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1160 :
1161 : /*
1162 : * Almost ready to set freeze output parameters; check if OldestXmin or
1163 : * OldestMxact are held back to an unsafe degree before we start on that
1164 : */
1165 127868 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1166 127868 : if (!TransactionIdIsNormal(safeOldestXmin))
1167 0 : safeOldestXmin = FirstNormalTransactionId;
1168 127868 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1169 127868 : if (safeOldestMxact < FirstMultiXactId)
1170 0 : safeOldestMxact = FirstMultiXactId;
1171 127868 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1172 92571 : ereport(WARNING,
1173 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1174 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1175 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1176 127868 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1177 0 : ereport(WARNING,
1178 : (errmsg("cutoff for freezing multixacts is far in the past"),
1179 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1180 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1181 :
1182 : /*
1183 : * Determine the minimum freeze age to use: as specified by the caller, or
1184 : * vacuum_freeze_min_age, but in any case not more than half
1185 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1186 : * wraparound won't occur too frequently.
1187 : */
1188 127868 : if (freeze_min_age < 0)
1189 7033 : freeze_min_age = vacuum_freeze_min_age;
1190 127868 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1191 : Assert(freeze_min_age >= 0);
1192 :
1193 : /* Compute FreezeLimit, being careful to generate a normal XID */
1194 127868 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1195 127868 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1196 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1197 : /* FreezeLimit must always be <= OldestXmin */
1198 127868 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1199 105210 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1200 :
1201 : /*
1202 : * Determine the minimum multixact freeze age to use: as specified by
1203 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1204 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1205 : * prevent MultiXact wraparound won't occur too frequently.
1206 : */
1207 127868 : if (multixact_freeze_min_age < 0)
1208 7033 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1209 127868 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1210 : effective_multixact_freeze_max_age / 2);
1211 : Assert(multixact_freeze_min_age >= 0);
1212 :
1213 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1214 127868 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1215 127868 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1216 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1217 : /* MultiXactCutoff must always be <= OldestMxact */
1218 127868 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1219 2 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1220 :
1221 : /*
1222 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1223 : *
1224 : * Determine the table freeze age to use: as specified by the caller, or
1225 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1226 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1227 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1228 : * anti-wraparound autovacuum is launched.
1229 : */
1230 127868 : if (freeze_table_age < 0)
1231 7033 : freeze_table_age = vacuum_freeze_table_age;
1232 127868 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1233 : Assert(freeze_table_age >= 0);
1234 127868 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1235 127868 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1236 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1237 127868 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1238 : aggressiveXIDCutoff))
1239 120853 : return true;
1240 :
1241 : /*
1242 : * Similar to the above, determine the table freeze age to use for
1243 : * multixacts: as specified by the caller, or the value of the
1244 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1245 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1246 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1247 : * multixacts before anti-wraparound autovacuum is launched.
1248 : */
1249 7015 : if (multixact_freeze_table_age < 0)
1250 6917 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1251 7015 : multixact_freeze_table_age =
1252 7015 : Min(multixact_freeze_table_age,
1253 : effective_multixact_freeze_max_age * 0.95);
1254 : Assert(multixact_freeze_table_age >= 0);
1255 7015 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1256 7015 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1257 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1258 7015 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1259 : aggressiveMXIDCutoff))
1260 0 : return true;
1261 :
1262 : /* Non-aggressive VACUUM */
1263 7015 : return false;
1264 : }
1265 :
1266 : /*
1267 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1268 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1269 : * dangerously far in the past.
1270 : *
1271 : * When we return true, VACUUM caller triggers the failsafe.
1272 : */
1273 : bool
1274 129708 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1275 : {
1276 129708 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1277 129708 : MultiXactId relminmxid = cutoffs->relminmxid;
1278 : TransactionId xid_skip_limit;
1279 : MultiXactId multi_skip_limit;
1280 : int skip_index_vacuum;
1281 :
1282 : Assert(TransactionIdIsNormal(relfrozenxid));
1283 : Assert(MultiXactIdIsValid(relminmxid));
1284 :
1285 : /*
1286 : * Determine the index skipping age to use. In any case no less than
1287 : * autovacuum_freeze_max_age * 1.05.
1288 : */
1289 129708 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1290 :
1291 129708 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1292 129708 : if (!TransactionIdIsNormal(xid_skip_limit))
1293 0 : xid_skip_limit = FirstNormalTransactionId;
1294 :
1295 129708 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1296 : {
1297 : /* The table's relfrozenxid is too old */
1298 26580 : return true;
1299 : }
1300 :
1301 : /*
1302 : * Similar to above, determine the index skipping age to use for
1303 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1304 : * 1.05.
1305 : */
1306 103128 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1307 : autovacuum_multixact_freeze_max_age * 1.05);
1308 :
1309 103128 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1310 103128 : if (multi_skip_limit < FirstMultiXactId)
1311 0 : multi_skip_limit = FirstMultiXactId;
1312 :
1313 103128 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1314 : {
1315 : /* The table's relminmxid is too old */
1316 0 : return true;
1317 : }
1318 :
1319 103128 : return false;
1320 : }
1321 :
1322 : /*
1323 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1324 : *
1325 : * If we scanned the whole relation then we should just use the count of
1326 : * live tuples seen; but if we did not, we should not blindly extrapolate
1327 : * from that number, since VACUUM may have scanned a quite nonrandom
1328 : * subset of the table. When we have only partial information, we take
1329 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1330 : * of the tuple density in the unscanned pages.
1331 : *
1332 : * Note: scanned_tuples should count only *live* tuples, since
1333 : * pg_class.reltuples is defined that way.
1334 : */
1335 : double
1336 127462 : vac_estimate_reltuples(Relation relation,
1337 : BlockNumber total_pages,
1338 : BlockNumber scanned_pages,
1339 : double scanned_tuples)
1340 : {
1341 127462 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1342 127462 : double old_rel_tuples = relation->rd_rel->reltuples;
1343 : double old_density;
1344 : double unscanned_pages;
1345 : double total_tuples;
1346 :
1347 : /* If we did scan the whole table, just use the count as-is */
1348 127462 : if (scanned_pages >= total_pages)
1349 123217 : return scanned_tuples;
1350 :
1351 : /*
1352 : * When successive VACUUM commands scan the same few pages again and
1353 : * again, without anything from the table really changing, there is a risk
1354 : * that our beliefs about tuple density will gradually become distorted.
1355 : * This might be caused by vacuumlazy.c implementation details, such as
1356 : * its tendency to always scan the last heap page. Handle that here.
1357 : *
1358 : * If the relation is _exactly_ the same size according to the existing
1359 : * pg_class entry, and only a few of its pages (less than 2%) were
1360 : * scanned, keep the existing value of reltuples. Also keep the existing
1361 : * value when only a subset of rel's pages <= a single page were scanned.
1362 : *
1363 : * (Note: we might be returning -1 here.)
1364 : */
1365 4245 : if (old_rel_pages == total_pages &&
1366 4221 : scanned_pages < (double) total_pages * 0.02)
1367 3065 : return old_rel_tuples;
1368 1180 : if (scanned_pages <= 1)
1369 1054 : return old_rel_tuples;
1370 :
1371 : /*
1372 : * If old density is unknown, we can't do much except scale up
1373 : * scanned_tuples to match total_pages.
1374 : */
1375 126 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1376 4 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1377 :
1378 : /*
1379 : * Okay, we've covered the corner cases. The normal calculation is to
1380 : * convert the old measurement to a density (tuples per page), then
1381 : * estimate the number of tuples in the unscanned pages using that figure,
1382 : * and finally add on the number of tuples in the scanned pages.
1383 : */
1384 122 : old_density = old_rel_tuples / old_rel_pages;
1385 122 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1386 122 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1387 122 : return floor(total_tuples + 0.5);
1388 : }
1389 :
1390 :
1391 : /*
1392 : * vac_update_relstats() -- update statistics for one relation
1393 : *
1394 : * Update the whole-relation statistics that are kept in its pg_class
1395 : * row. There are additional stats that will be updated if we are
1396 : * doing ANALYZE, but we always update these stats. This routine works
1397 : * for both index and heap relation entries in pg_class.
1398 : *
1399 : * We violate transaction semantics here by overwriting the rel's
1400 : * existing pg_class tuple with the new values. This is reasonably
1401 : * safe as long as we're sure that the new values are correct whether or
1402 : * not this transaction commits. The reason for doing this is that if
1403 : * we updated these tuples in the usual way, vacuuming pg_class itself
1404 : * wouldn't work very well --- by the time we got done with a vacuum
1405 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1406 : * course, this only works for fixed-size not-null columns, but these are.
1407 : *
1408 : * Another reason for doing it this way is that when we are in a lazy
1409 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1410 : * Somebody vacuuming pg_class might think they could delete a tuple
1411 : * marked with xmin = our xid.
1412 : *
1413 : * In addition to fundamentally nontransactional statistics such as
1414 : * relpages and relallvisible, we try to maintain certain lazily-updated
1415 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1416 : * It's safe to do this in VACUUM, which can't run in parallel with
1417 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1418 : * However, it's *not* safe to do it in an ANALYZE that's within an
1419 : * outer transaction, because for example the current transaction might
1420 : * have dropped the last index; then we'd think relhasindex should be
1421 : * cleared, but if the transaction later rolls back this would be wrong.
1422 : * So we refrain from updating the DDL flags if we're inside an outer
1423 : * transaction. This is OK since postponing the flag maintenance is
1424 : * always allowable.
1425 : *
1426 : * Note: num_tuples should count only *live* tuples, since
1427 : * pg_class.reltuples is defined that way.
1428 : *
1429 : * This routine is shared by VACUUM and ANALYZE.
1430 : */
1431 : void
1432 153179 : vac_update_relstats(Relation relation,
1433 : BlockNumber num_pages, double num_tuples,
1434 : BlockNumber num_all_visible_pages,
1435 : BlockNumber num_all_frozen_pages,
1436 : bool hasindex, TransactionId frozenxid,
1437 : MultiXactId minmulti,
1438 : bool *frozenxid_updated, bool *minmulti_updated,
1439 : bool in_outer_xact)
1440 : {
1441 153179 : Oid relid = RelationGetRelid(relation);
1442 : Relation rd;
1443 : ScanKeyData key[1];
1444 : HeapTuple ctup;
1445 : void *inplace_state;
1446 : Form_pg_class pgcform;
1447 : bool dirty,
1448 : futurexid,
1449 : futuremxid;
1450 : TransactionId oldfrozenxid;
1451 : MultiXactId oldminmulti;
1452 :
1453 153179 : rd = table_open(RelationRelationId, RowExclusiveLock);
1454 :
1455 : /* Fetch a copy of the tuple to scribble on */
1456 153179 : ScanKeyInit(&key[0],
1457 : Anum_pg_class_oid,
1458 : BTEqualStrategyNumber, F_OIDEQ,
1459 : ObjectIdGetDatum(relid));
1460 153179 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1461 : NULL, 1, key, &ctup, &inplace_state);
1462 153179 : if (!HeapTupleIsValid(ctup))
1463 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1464 : relid);
1465 153179 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1466 :
1467 : /* Apply statistical updates, if any, to copied tuple */
1468 :
1469 153179 : dirty = false;
1470 153179 : if (pgcform->relpages != (int32) num_pages)
1471 : {
1472 5686 : pgcform->relpages = (int32) num_pages;
1473 5686 : dirty = true;
1474 : }
1475 153179 : if (pgcform->reltuples != (float4) num_tuples)
1476 : {
1477 12445 : pgcform->reltuples = (float4) num_tuples;
1478 12445 : dirty = true;
1479 : }
1480 153179 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1481 : {
1482 3849 : pgcform->relallvisible = (int32) num_all_visible_pages;
1483 3849 : dirty = true;
1484 : }
1485 153179 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1486 : {
1487 3229 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1488 3229 : dirty = true;
1489 : }
1490 :
1491 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1492 :
1493 153179 : if (!in_outer_xact)
1494 : {
1495 : /*
1496 : * If we didn't find any indexes, reset relhasindex.
1497 : */
1498 152897 : if (pgcform->relhasindex && !hasindex)
1499 : {
1500 15 : pgcform->relhasindex = false;
1501 15 : dirty = true;
1502 : }
1503 :
1504 : /* We also clear relhasrules and relhastriggers if needed */
1505 152897 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1506 : {
1507 0 : pgcform->relhasrules = false;
1508 0 : dirty = true;
1509 : }
1510 152897 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1511 : {
1512 8 : pgcform->relhastriggers = false;
1513 8 : dirty = true;
1514 : }
1515 : }
1516 :
1517 : /*
1518 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1519 : * indicating it has no new data.
1520 : *
1521 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1522 : * stored relfrozenxid is "in the future" then it seems best to assume
1523 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1524 : * This should match vac_update_datfrozenxid() concerning what we consider
1525 : * to be "in the future".
1526 : */
1527 153179 : oldfrozenxid = pgcform->relfrozenxid;
1528 153179 : futurexid = false;
1529 153179 : if (frozenxid_updated)
1530 127460 : *frozenxid_updated = false;
1531 153179 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1532 : {
1533 33590 : bool update = false;
1534 :
1535 33590 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1536 33527 : update = true;
1537 63 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1538 0 : futurexid = update = true;
1539 :
1540 33590 : if (update)
1541 : {
1542 33527 : pgcform->relfrozenxid = frozenxid;
1543 33527 : dirty = true;
1544 33527 : if (frozenxid_updated)
1545 33527 : *frozenxid_updated = true;
1546 : }
1547 : }
1548 :
1549 : /* Similarly for relminmxid */
1550 153179 : oldminmulti = pgcform->relminmxid;
1551 153179 : futuremxid = false;
1552 153179 : if (minmulti_updated)
1553 127460 : *minmulti_updated = false;
1554 153179 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1555 : {
1556 220 : bool update = false;
1557 :
1558 220 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1559 220 : update = true;
1560 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1561 0 : futuremxid = update = true;
1562 :
1563 220 : if (update)
1564 : {
1565 220 : pgcform->relminmxid = minmulti;
1566 220 : dirty = true;
1567 220 : if (minmulti_updated)
1568 220 : *minmulti_updated = true;
1569 : }
1570 : }
1571 :
1572 : /* If anything changed, write out the tuple. */
1573 153179 : if (dirty)
1574 42307 : systable_inplace_update_finish(inplace_state, ctup);
1575 : else
1576 110872 : systable_inplace_update_cancel(inplace_state);
1577 :
1578 153179 : table_close(rd, RowExclusiveLock);
1579 :
1580 153179 : if (futurexid)
1581 0 : ereport(WARNING,
1582 : (errcode(ERRCODE_DATA_CORRUPTED),
1583 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1584 : oldfrozenxid, frozenxid,
1585 : RelationGetRelationName(relation))));
1586 153179 : if (futuremxid)
1587 0 : ereport(WARNING,
1588 : (errcode(ERRCODE_DATA_CORRUPTED),
1589 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1590 : oldminmulti, minmulti,
1591 : RelationGetRelationName(relation))));
1592 153179 : }
1593 :
1594 :
1595 : /*
1596 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1597 : *
1598 : * Update pg_database's datfrozenxid entry for our database to be the
1599 : * minimum of the pg_class.relfrozenxid values.
1600 : *
1601 : * Similarly, update our datminmxid to be the minimum of the
1602 : * pg_class.relminmxid values.
1603 : *
1604 : * If we are able to advance either pg_database value, also try to
1605 : * truncate pg_xact and pg_multixact.
1606 : *
1607 : * We violate transaction semantics here by overwriting the database's
1608 : * existing pg_database tuple with the new values. This is reasonably
1609 : * safe since the new values are correct whether or not this transaction
1610 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1611 : * behind after a VACUUM.
1612 : */
1613 : void
1614 2601 : vac_update_datfrozenxid(void)
1615 : {
1616 : HeapTuple tuple;
1617 : Form_pg_database dbform;
1618 : Relation relation;
1619 : SysScanDesc scan;
1620 : HeapTuple classTup;
1621 : TransactionId newFrozenXid;
1622 : MultiXactId newMinMulti;
1623 : TransactionId lastSaneFrozenXid;
1624 : MultiXactId lastSaneMinMulti;
1625 2601 : bool bogus = false;
1626 2601 : bool dirty = false;
1627 : ScanKeyData key[1];
1628 : void *inplace_state;
1629 :
1630 : /*
1631 : * Restrict this task to one backend per database. This avoids race
1632 : * conditions that would move datfrozenxid or datminmxid backward. It
1633 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1634 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1635 : */
1636 2601 : LockDatabaseFrozenIds(ExclusiveLock);
1637 :
1638 : /*
1639 : * Initialize the "min" calculation with
1640 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1641 : * approximation to the minimum relfrozenxid for not-yet-committed
1642 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1643 : * cannot produce a wrong minimum by starting with this.
1644 : */
1645 2601 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1646 :
1647 : /*
1648 : * Similarly, initialize the MultiXact "min" with the value that would be
1649 : * used on pg_class for new tables. See AddNewRelationTuple().
1650 : */
1651 2601 : newMinMulti = GetOldestMultiXactId();
1652 :
1653 : /*
1654 : * Identify the latest relfrozenxid and relminmxid values that we could
1655 : * validly see during the scan. These are conservative values, but it's
1656 : * not really worth trying to be more exact.
1657 : */
1658 2601 : lastSaneFrozenXid = ReadNextTransactionId();
1659 2601 : lastSaneMinMulti = ReadNextMultiXactId();
1660 :
1661 : /*
1662 : * We must seqscan pg_class to find the minimum Xid, because there is no
1663 : * index that can help us here.
1664 : *
1665 : * See vac_truncate_clog() for the race condition to prevent.
1666 : */
1667 2601 : relation = table_open(RelationRelationId, AccessShareLock);
1668 :
1669 2601 : scan = systable_beginscan(relation, InvalidOid, false,
1670 : NULL, 0, NULL);
1671 :
1672 1518281 : while ((classTup = systable_getnext(scan)) != NULL)
1673 : {
1674 1515680 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1675 1515680 : volatile TransactionId *relfrozenxid_p = &classForm->relfrozenxid;
1676 1515680 : volatile TransactionId *relminmxid_p = &classForm->relminmxid;
1677 1515680 : TransactionId relfrozenxid = *relfrozenxid_p;
1678 1515680 : TransactionId relminmxid = *relminmxid_p;
1679 :
1680 : /*
1681 : * Only consider relations able to hold unfrozen XIDs (anything else
1682 : * should have InvalidTransactionId in relfrozenxid anyway).
1683 : */
1684 1515680 : if (classForm->relkind != RELKIND_RELATION &&
1685 1193282 : classForm->relkind != RELKIND_MATVIEW &&
1686 1191640 : classForm->relkind != RELKIND_TOASTVALUE)
1687 : {
1688 : Assert(!TransactionIdIsValid(relfrozenxid));
1689 : Assert(!MultiXactIdIsValid(relminmxid));
1690 1028066 : continue;
1691 : }
1692 :
1693 : /*
1694 : * Some table AMs might not need per-relation xid / multixid horizons.
1695 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1696 : * to not be set (i.e. set to their respective Invalid*Id)
1697 : * independently. Thus validate and compute horizon for each only if
1698 : * set.
1699 : *
1700 : * If things are working properly, no relation should have a
1701 : * relfrozenxid or relminmxid that is "in the future". However, such
1702 : * cases have been known to arise due to bugs in pg_upgrade. If we
1703 : * see any entries that are "in the future", chicken out and don't do
1704 : * anything. This ensures we won't truncate clog & multixact SLRUs
1705 : * before those relations have been scanned and cleaned up.
1706 : */
1707 :
1708 487614 : if (TransactionIdIsValid(relfrozenxid))
1709 : {
1710 : Assert(TransactionIdIsNormal(relfrozenxid));
1711 :
1712 : /* check for values in the future */
1713 487614 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1714 : {
1715 0 : bogus = true;
1716 0 : break;
1717 : }
1718 :
1719 : /* determine new horizon */
1720 487614 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1721 2394 : newFrozenXid = relfrozenxid;
1722 : }
1723 :
1724 487614 : if (MultiXactIdIsValid(relminmxid))
1725 : {
1726 : /* check for values in the future */
1727 487614 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1728 : {
1729 0 : bogus = true;
1730 0 : break;
1731 : }
1732 :
1733 : /* determine new horizon */
1734 487614 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1735 215 : newMinMulti = relminmxid;
1736 : }
1737 : }
1738 :
1739 : /* we're done with pg_class */
1740 2601 : systable_endscan(scan);
1741 2601 : table_close(relation, AccessShareLock);
1742 :
1743 : /* chicken out if bogus data found */
1744 2601 : if (bogus)
1745 0 : return;
1746 :
1747 : Assert(TransactionIdIsNormal(newFrozenXid));
1748 : Assert(MultiXactIdIsValid(newMinMulti));
1749 :
1750 : /* Now fetch the pg_database tuple we need to update. */
1751 2601 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1752 :
1753 : /*
1754 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1755 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1756 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1757 : * For now, don't optimize that.
1758 : */
1759 2601 : ScanKeyInit(&key[0],
1760 : Anum_pg_database_oid,
1761 : BTEqualStrategyNumber, F_OIDEQ,
1762 : ObjectIdGetDatum(MyDatabaseId));
1763 :
1764 2601 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1765 : NULL, 1, key, &tuple, &inplace_state);
1766 :
1767 2601 : if (!HeapTupleIsValid(tuple))
1768 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1769 :
1770 2601 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1771 :
1772 : /*
1773 : * As in vac_update_relstats(), we ordinarily don't want to let
1774 : * datfrozenxid go backward; but if it's "in the future" then it must be
1775 : * corrupt and it seems best to overwrite it.
1776 : */
1777 2939 : if (dbform->datfrozenxid != newFrozenXid &&
1778 338 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1779 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1780 : {
1781 338 : dbform->datfrozenxid = newFrozenXid;
1782 338 : dirty = true;
1783 : }
1784 : else
1785 2263 : newFrozenXid = dbform->datfrozenxid;
1786 :
1787 : /* Ditto for datminmxid */
1788 2602 : if (dbform->datminmxid != newMinMulti &&
1789 1 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1790 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1791 : {
1792 1 : dbform->datminmxid = newMinMulti;
1793 1 : dirty = true;
1794 : }
1795 : else
1796 2600 : newMinMulti = dbform->datminmxid;
1797 :
1798 2601 : if (dirty)
1799 338 : systable_inplace_update_finish(inplace_state, tuple);
1800 : else
1801 2263 : systable_inplace_update_cancel(inplace_state);
1802 :
1803 2601 : heap_freetuple(tuple);
1804 2601 : table_close(relation, RowExclusiveLock);
1805 :
1806 : /*
1807 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1808 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1809 : * XID-wrap-limit info is stale, since this action will update that too.
1810 : */
1811 2601 : if (dirty || ForceTransactionIdLimitUpdate())
1812 1177 : vac_truncate_clog(newFrozenXid, newMinMulti,
1813 : lastSaneFrozenXid, lastSaneMinMulti);
1814 : }
1815 :
1816 :
1817 : /*
1818 : * vac_truncate_clog() -- attempt to truncate the commit log
1819 : *
1820 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1821 : * and use it to truncate the transaction commit log (pg_xact).
1822 : * Also update the XID wrap limit info maintained by varsup.c.
1823 : * Likewise for datminmxid.
1824 : *
1825 : * The passed frozenXID and minMulti are the updated values for my own
1826 : * pg_database entry. They're used to initialize the "min" calculations.
1827 : * The caller also passes the "last sane" XID and MXID, since it has
1828 : * those at hand already.
1829 : *
1830 : * This routine is only invoked when we've managed to change our
1831 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1832 : * XID-wrap-limit info is stale.
1833 : */
1834 : static void
1835 1177 : vac_truncate_clog(TransactionId frozenXID,
1836 : MultiXactId minMulti,
1837 : TransactionId lastSaneFrozenXid,
1838 : MultiXactId lastSaneMinMulti)
1839 : {
1840 1177 : TransactionId nextXID = ReadNextTransactionId();
1841 : Relation relation;
1842 : TableScanDesc scan;
1843 : HeapTuple tuple;
1844 : Oid oldestxid_datoid;
1845 : Oid minmulti_datoid;
1846 1177 : bool bogus = false;
1847 1177 : bool frozenAlreadyWrapped = false;
1848 :
1849 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1850 1177 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1851 :
1852 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1853 1177 : oldestxid_datoid = MyDatabaseId;
1854 1177 : minmulti_datoid = MyDatabaseId;
1855 :
1856 : /*
1857 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1858 : *
1859 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1860 : * the values could change while we look at them. Fetch each one just
1861 : * once to ensure sane behavior of the comparison logic. (Here, as in
1862 : * many other places, we assume that fetching or updating an XID in shared
1863 : * storage is atomic.)
1864 : *
1865 : * Note: we need not worry about a race condition with new entries being
1866 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1867 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1868 : * of the interlock against copying a DB containing an active backend.
1869 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1870 : * concurrently modify the datfrozenxid's of different databases, the
1871 : * worst possible outcome is that pg_xact is not truncated as aggressively
1872 : * as it could be.
1873 : */
1874 1177 : relation = table_open(DatabaseRelationId, AccessShareLock);
1875 :
1876 1177 : scan = table_beginscan_catalog(relation, 0, NULL);
1877 :
1878 4610 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1879 : {
1880 3433 : Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
1881 3433 : volatile TransactionId *datfrozenxid_p = &dbform->datfrozenxid;
1882 3433 : volatile TransactionId *datminmxid_p = &dbform->datminmxid;
1883 3433 : TransactionId datfrozenxid = *datfrozenxid_p;
1884 3433 : TransactionId datminmxid = *datminmxid_p;
1885 :
1886 : Assert(TransactionIdIsNormal(datfrozenxid));
1887 : Assert(MultiXactIdIsValid(datminmxid));
1888 :
1889 : /*
1890 : * If database is in the process of getting dropped, or has been
1891 : * interrupted while doing so, no connections to it are possible
1892 : * anymore. Therefore we don't need to take it into account here.
1893 : * Which is good, because it can't be processed by autovacuum either.
1894 : */
1895 3433 : if (database_is_invalid_form((Form_pg_database) dbform))
1896 : {
1897 3 : elog(DEBUG2,
1898 : "skipping invalid database \"%s\" while computing relfrozenxid",
1899 : NameStr(dbform->datname));
1900 3 : continue;
1901 : }
1902 :
1903 : /*
1904 : * If things are working properly, no database should have a
1905 : * datfrozenxid or datminmxid that is "in the future". However, such
1906 : * cases have been known to arise due to bugs in pg_upgrade. If we
1907 : * see any entries that are "in the future", chicken out and don't do
1908 : * anything. This ensures we won't truncate clog before those
1909 : * databases have been scanned and cleaned up. (We will issue the
1910 : * "already wrapped" warning if appropriate, though.)
1911 : */
1912 6860 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1913 3430 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1914 0 : bogus = true;
1915 :
1916 3430 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1917 0 : frozenAlreadyWrapped = true;
1918 3430 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1919 : {
1920 273 : frozenXID = datfrozenxid;
1921 273 : oldestxid_datoid = dbform->oid;
1922 : }
1923 :
1924 3430 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1925 : {
1926 2 : minMulti = datminmxid;
1927 2 : minmulti_datoid = dbform->oid;
1928 : }
1929 : }
1930 :
1931 1177 : table_endscan(scan);
1932 :
1933 1177 : table_close(relation, AccessShareLock);
1934 :
1935 : /*
1936 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1937 : * the computed minimum XID might be bogus. This case should now be
1938 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1939 : * test anyway.
1940 : */
1941 1177 : if (frozenAlreadyWrapped)
1942 : {
1943 0 : ereport(WARNING,
1944 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1945 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1946 0 : LWLockRelease(WrapLimitsVacuumLock);
1947 0 : return;
1948 : }
1949 :
1950 : /* chicken out if data is bogus in any other way */
1951 1177 : if (bogus)
1952 : {
1953 0 : LWLockRelease(WrapLimitsVacuumLock);
1954 0 : return;
1955 : }
1956 :
1957 : /*
1958 : * Freeze any old transaction IDs in the async notification queue before
1959 : * CLOG truncation.
1960 : */
1961 1177 : AsyncNotifyFreezeXids(frozenXID);
1962 :
1963 : /*
1964 : * Advance the oldest value for commit timestamps before truncating, so
1965 : * that if a user requests a timestamp for a transaction we're truncating
1966 : * away right after this point, they get NULL instead of an ugly "file not
1967 : * found" error from slru.c. This doesn't matter for xact/multixact
1968 : * because they are not subject to arbitrary lookups from users.
1969 : */
1970 1177 : AdvanceOldestCommitTsXid(frozenXID);
1971 :
1972 : /*
1973 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1974 : */
1975 1177 : TruncateCLOG(frozenXID, oldestxid_datoid);
1976 1177 : TruncateCommitTs(frozenXID);
1977 1177 : TruncateMultiXact(minMulti, minmulti_datoid);
1978 :
1979 : /*
1980 : * Update the wrap limit for GetNewTransactionId and creation of new
1981 : * MultiXactIds. Note: these functions will also signal the postmaster
1982 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1983 : * signaling twice?
1984 : */
1985 1177 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1986 1177 : SetMultiXactIdLimit(minMulti, minmulti_datoid);
1987 :
1988 1177 : LWLockRelease(WrapLimitsVacuumLock);
1989 : }
1990 :
1991 :
1992 : /*
1993 : * vacuum_rel() -- vacuum one heap relation
1994 : *
1995 : * relid identifies the relation to vacuum. If relation is supplied,
1996 : * use the name therein for reporting any failure to open/lock the rel;
1997 : * do not use it once we've successfully opened the rel, since it might
1998 : * be stale.
1999 : *
2000 : * Returns true if it's okay to proceed with a requested ANALYZE
2001 : * operation on this table.
2002 : *
2003 : * Doing one heap at a time incurs extra overhead, since we need to
2004 : * check that the heap exists again just before we vacuum it. The
2005 : * reason that we do this is so that vacuuming can be spread across
2006 : * many small transactions. Otherwise, two-phase locking would require
2007 : * us to lock the entire database during one pass of the vacuum cleaner.
2008 : *
2009 : * At entry and exit, we are not inside a transaction.
2010 : */
2011 : static bool
2012 127957 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
2013 : BufferAccessStrategy bstrategy, bool isTopLevel)
2014 : {
2015 : LOCKMODE lmode;
2016 : Relation rel;
2017 : LockRelId lockrelid;
2018 : Oid priv_relid;
2019 : Oid toast_relid;
2020 : Oid save_userid;
2021 : int save_sec_context;
2022 : int save_nestlevel;
2023 : VacuumParams toast_vacuum_params;
2024 :
2025 : /*
2026 : * This function scribbles on the parameters, so make a copy early to
2027 : * avoid affecting the TOAST table (if we do end up recursing to it).
2028 : */
2029 127957 : memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams));
2030 :
2031 : /* Begin a transaction for vacuuming this relation */
2032 127957 : StartTransactionCommand();
2033 :
2034 127957 : if (!(params.options & VACOPT_FULL))
2035 : {
2036 : /*
2037 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2038 : * other concurrent VACUUMs know that they can ignore this one while
2039 : * determining their OldestXmin. (The reason we don't set it during a
2040 : * full VACUUM is exactly that we may have to run user-defined
2041 : * functions for functional indexes, and we want to make sure that if
2042 : * they use the snapshot set above, any tuples it requires can't get
2043 : * removed from other tables. An index function that depends on the
2044 : * contents of other tables is arguably broken, but we won't break it
2045 : * here by violating transaction semantics.)
2046 : *
2047 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2048 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2049 : * in an emergency.
2050 : *
2051 : * Note: these flags remain set until CommitTransaction or
2052 : * AbortTransaction. We don't want to clear them until we reset
2053 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2054 : * might appear to go backwards, which is probably Not Good. (We also
2055 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2056 : * xmin doesn't become visible ahead of setting the flag.)
2057 : */
2058 127710 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2059 127710 : MyProc->statusFlags |= PROC_IN_VACUUM;
2060 127710 : if (params.is_wraparound)
2061 110714 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2062 127710 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2063 127710 : LWLockRelease(ProcArrayLock);
2064 : }
2065 :
2066 : /*
2067 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2068 : * cutoff xids in local memory wrapping around, and to have updated xmin
2069 : * horizons.
2070 : */
2071 127957 : PushActiveSnapshot(GetTransactionSnapshot());
2072 :
2073 : /*
2074 : * Check for user-requested abort. Note we want this to be inside a
2075 : * transaction, so xact.c doesn't issue useless WARNING.
2076 : */
2077 127957 : CHECK_FOR_INTERRUPTS();
2078 :
2079 : /*
2080 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2081 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2082 : * way, we can be sure that no other backend is vacuuming the same table.
2083 : */
2084 255914 : lmode = (params.options & VACOPT_FULL) ?
2085 127957 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2086 :
2087 : /* open the relation and get the appropriate lock on it */
2088 127957 : rel = vacuum_open_relation(relid, relation, params.options,
2089 127957 : params.log_vacuum_min_duration >= 0, lmode);
2090 :
2091 : /* leave if relation could not be opened or locked */
2092 127957 : if (!rel)
2093 : {
2094 12 : PopActiveSnapshot();
2095 12 : CommitTransactionCommand();
2096 12 : return false;
2097 : }
2098 :
2099 : /*
2100 : * When recursing to a TOAST table, check privileges on the parent. NB:
2101 : * This is only safe to do because we hold a session lock on the main
2102 : * relation that prevents concurrent deletion.
2103 : */
2104 127945 : if (OidIsValid(params.toast_parent))
2105 5808 : priv_relid = params.toast_parent;
2106 : else
2107 122137 : priv_relid = RelationGetRelid(rel);
2108 :
2109 : /*
2110 : * Check if relation needs to be skipped based on privileges. This check
2111 : * happens also when building the relation list to vacuum for a manual
2112 : * operation, and needs to be done additionally here as VACUUM could
2113 : * happen across multiple transactions where privileges could have changed
2114 : * in-between. Make sure to only generate logs for VACUUM in this case.
2115 : */
2116 127945 : if (!vacuum_is_permitted_for_relation(priv_relid,
2117 : rel->rd_rel,
2118 127945 : params.options & ~VACOPT_ANALYZE))
2119 : {
2120 48 : relation_close(rel, lmode);
2121 48 : PopActiveSnapshot();
2122 48 : CommitTransactionCommand();
2123 48 : return false;
2124 : }
2125 :
2126 : /*
2127 : * Check that it's of a vacuumable relkind.
2128 : */
2129 127897 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2130 45939 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2131 45934 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2132 125 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2133 : {
2134 1 : ereport(WARNING,
2135 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2136 : RelationGetRelationName(rel))));
2137 1 : relation_close(rel, lmode);
2138 1 : PopActiveSnapshot();
2139 1 : CommitTransactionCommand();
2140 1 : return false;
2141 : }
2142 :
2143 : /*
2144 : * Silently ignore tables that are temp tables of other backends ---
2145 : * trying to vacuum these will lead to great unhappiness, since their
2146 : * contents are probably not up-to-date on disk. (We don't throw a
2147 : * warning here; it would just lead to chatter during a database-wide
2148 : * VACUUM.)
2149 : */
2150 127896 : if (RELATION_IS_OTHER_TEMP(rel))
2151 : {
2152 1 : relation_close(rel, lmode);
2153 1 : PopActiveSnapshot();
2154 1 : CommitTransactionCommand();
2155 1 : return false;
2156 : }
2157 :
2158 : /*
2159 : * Silently ignore partitioned tables as there is no work to be done. The
2160 : * useful work is on their child partitions, which have been queued up for
2161 : * us separately.
2162 : */
2163 127895 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2164 : {
2165 124 : relation_close(rel, lmode);
2166 124 : PopActiveSnapshot();
2167 124 : CommitTransactionCommand();
2168 : /* It's OK to proceed with ANALYZE on this table */
2169 124 : return true;
2170 : }
2171 :
2172 : /*
2173 : * Get a session-level lock too. This will protect our access to the
2174 : * relation across multiple transactions, so that we can vacuum the
2175 : * relation's TOAST table (if any) secure in the knowledge that no one is
2176 : * deleting the parent relation.
2177 : *
2178 : * NOTE: this cannot block, even if someone else is waiting for access,
2179 : * because the lock manager knows that both lock requests are from the
2180 : * same process.
2181 : */
2182 127771 : lockrelid = rel->rd_lockInfo.lockRelId;
2183 127771 : LockRelationIdForSession(&lockrelid, lmode);
2184 :
2185 : /*
2186 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2187 : * specified in VACUUM command, or when running in an autovacuum worker
2188 : */
2189 127771 : if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
2190 : {
2191 : StdRdOptIndexCleanup vacuum_index_cleanup;
2192 :
2193 127628 : if (rel->rd_options == NULL)
2194 126271 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2195 : else
2196 1357 : vacuum_index_cleanup =
2197 1357 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2198 :
2199 127628 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2200 127600 : params.index_cleanup = VACOPTVALUE_AUTO;
2201 28 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2202 14 : params.index_cleanup = VACOPTVALUE_ENABLED;
2203 : else
2204 : {
2205 : Assert(vacuum_index_cleanup ==
2206 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2207 14 : params.index_cleanup = VACOPTVALUE_DISABLED;
2208 : }
2209 : }
2210 :
2211 : #ifdef USE_INJECTION_POINTS
2212 127771 : if (params.index_cleanup == VACOPTVALUE_AUTO)
2213 127604 : INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
2214 167 : else if (params.index_cleanup == VACOPTVALUE_DISABLED)
2215 144 : INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
2216 23 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
2217 23 : INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
2218 : #endif
2219 :
2220 : /*
2221 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2222 : * parameter was specified. This overrides the GUC value.
2223 : */
2224 127771 : if (rel->rd_options != NULL &&
2225 1365 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2226 0 : params.max_eager_freeze_failure_rate =
2227 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2228 :
2229 : /*
2230 : * Set truncate option based on truncate reloption or GUC if it wasn't
2231 : * specified in VACUUM command, or when running in an autovacuum worker
2232 : */
2233 127771 : if (params.truncate == VACOPTVALUE_UNSPECIFIED)
2234 : {
2235 127632 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2236 :
2237 127632 : if (opts && opts->vacuum_truncate != PG_TERNARY_UNSET)
2238 : {
2239 20 : if (opts->vacuum_truncate == PG_TERNARY_TRUE)
2240 6 : params.truncate = VACOPTVALUE_ENABLED;
2241 : else
2242 14 : params.truncate = VACOPTVALUE_DISABLED;
2243 : }
2244 127612 : else if (vacuum_truncate)
2245 127598 : params.truncate = VACOPTVALUE_ENABLED;
2246 : else
2247 14 : params.truncate = VACOPTVALUE_DISABLED;
2248 : }
2249 :
2250 : #ifdef USE_INJECTION_POINTS
2251 127771 : if (params.truncate == VACOPTVALUE_AUTO)
2252 0 : INJECTION_POINT("vacuum-truncate-auto", NULL);
2253 127771 : else if (params.truncate == VACOPTVALUE_DISABLED)
2254 166 : INJECTION_POINT("vacuum-truncate-disabled", NULL);
2255 127605 : else if (params.truncate == VACOPTVALUE_ENABLED)
2256 127605 : INJECTION_POINT("vacuum-truncate-enabled", NULL);
2257 : #endif
2258 :
2259 : /*
2260 : * Remember the relation's TOAST relation for later, if the caller asked
2261 : * us to process it. In VACUUM FULL, though, the toast table is
2262 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2263 : * unless PROCESS_MAIN is disabled.
2264 : */
2265 127771 : if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
2266 16843 : ((params.options & VACOPT_FULL) == 0 ||
2267 230 : (params.options & VACOPT_PROCESS_MAIN) == 0))
2268 16617 : toast_relid = rel->rd_rel->reltoastrelid;
2269 : else
2270 111154 : toast_relid = InvalidOid;
2271 :
2272 : /*
2273 : * Switch to the table owner's userid, so that any index functions are run
2274 : * as that user. Also lock down security-restricted operations and
2275 : * arrange to make GUC variable changes local to this command. (This is
2276 : * unnecessary, but harmless, for lazy VACUUM.)
2277 : */
2278 127771 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2279 127771 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2280 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2281 127771 : save_nestlevel = NewGUCNestLevel();
2282 127771 : RestrictSearchPath();
2283 :
2284 : /*
2285 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2286 : * relation. Otherwise, we can skip this part. If processing the TOAST
2287 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2288 : * to be set when we recurse to the TOAST table.
2289 : */
2290 127771 : if (params.options & VACOPT_PROCESS_MAIN)
2291 : {
2292 : /*
2293 : * Do the actual work --- either FULL or "lazy" vacuum
2294 : */
2295 127686 : if (params.options & VACOPT_FULL)
2296 : {
2297 226 : ClusterParams cluster_params = {0};
2298 :
2299 226 : if ((params.options & VACOPT_VERBOSE) != 0)
2300 1 : cluster_params.options |= CLUOPT_VERBOSE;
2301 :
2302 : /* VACUUM FULL is a variant of REPACK; see repack.c */
2303 226 : cluster_rel(REPACK_COMMAND_VACUUMFULL, rel, InvalidOid,
2304 : &cluster_params, isTopLevel);
2305 : /* cluster_rel closes the relation, but keeps lock */
2306 :
2307 222 : rel = NULL;
2308 : }
2309 : else
2310 127460 : table_relation_vacuum(rel, ¶ms, bstrategy);
2311 : }
2312 :
2313 : /* Roll back any GUC changes executed by index functions */
2314 127767 : AtEOXact_GUC(false, save_nestlevel);
2315 :
2316 : /* Restore userid and security context */
2317 127767 : SetUserIdAndSecContext(save_userid, save_sec_context);
2318 :
2319 : /* all done with this class, but hold lock until commit */
2320 127767 : if (rel)
2321 127545 : relation_close(rel, NoLock);
2322 :
2323 : /*
2324 : * Complete the transaction and free all temporary memory used.
2325 : */
2326 127767 : PopActiveSnapshot();
2327 127767 : CommitTransactionCommand();
2328 :
2329 : /*
2330 : * If the relation has a secondary toast rel, vacuum that too while we
2331 : * still hold the session lock on the main table. Note however that
2332 : * "analyze" will not get done on the toast table. This is good, because
2333 : * the toaster always uses hardcoded index access and statistics are
2334 : * totally unimportant for toast relations.
2335 : */
2336 127767 : if (toast_relid != InvalidOid)
2337 : {
2338 : /*
2339 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2340 : * set toast_parent so that the privilege checks are done on the main
2341 : * relation. NB: This is only safe to do because we hold a session
2342 : * lock on the main relation that prevents concurrent deletion.
2343 : */
2344 5808 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2345 5808 : toast_vacuum_params.toast_parent = relid;
2346 :
2347 5808 : vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy,
2348 : isTopLevel);
2349 : }
2350 :
2351 : /*
2352 : * Now release the session-level lock on the main table.
2353 : */
2354 127767 : UnlockRelationIdForSession(&lockrelid, lmode);
2355 :
2356 : /* Report that we really did it. */
2357 127767 : return true;
2358 : }
2359 :
2360 :
2361 : /*
2362 : * Open all the vacuumable indexes of the given relation, obtaining the
2363 : * specified kind of lock on each. Return an array of Relation pointers for
2364 : * the indexes into *Irel, and the number of indexes into *nindexes.
2365 : *
2366 : * We consider an index vacuumable if it is marked insertable (indisready).
2367 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2368 : * execution, and what we have is too corrupt to be processable. We will
2369 : * vacuum even if the index isn't indisvalid; this is important because in a
2370 : * unique index, uniqueness checks will be performed anyway and had better not
2371 : * hit dangling index pointers.
2372 : */
2373 : void
2374 137098 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2375 : int *nindexes, Relation **Irel)
2376 : {
2377 : List *indexoidlist;
2378 : ListCell *indexoidscan;
2379 : int i;
2380 :
2381 : Assert(lockmode != NoLock);
2382 :
2383 137098 : indexoidlist = RelationGetIndexList(relation);
2384 :
2385 : /* allocate enough memory for all indexes */
2386 137098 : i = list_length(indexoidlist);
2387 :
2388 137098 : if (i > 0)
2389 128930 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2390 : else
2391 8168 : *Irel = NULL;
2392 :
2393 : /* collect just the ready indexes */
2394 137098 : i = 0;
2395 344520 : foreach(indexoidscan, indexoidlist)
2396 : {
2397 207422 : Oid indexoid = lfirst_oid(indexoidscan);
2398 : Relation indrel;
2399 :
2400 207422 : indrel = index_open(indexoid, lockmode);
2401 207422 : if (indrel->rd_index->indisready)
2402 207422 : (*Irel)[i++] = indrel;
2403 : else
2404 0 : index_close(indrel, lockmode);
2405 : }
2406 :
2407 137098 : *nindexes = i;
2408 :
2409 137098 : list_free(indexoidlist);
2410 137098 : }
2411 :
2412 : /*
2413 : * Release the resources acquired by vac_open_indexes. Optionally release
2414 : * the locks (say NoLock to keep 'em).
2415 : */
2416 : void
2417 137681 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2418 : {
2419 137681 : if (Irel == NULL)
2420 8755 : return;
2421 :
2422 336340 : while (nindexes--)
2423 : {
2424 207414 : Relation ind = Irel[nindexes];
2425 :
2426 207414 : index_close(ind, lockmode);
2427 : }
2428 128926 : pfree(Irel);
2429 : }
2430 :
2431 : /*
2432 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2433 : *
2434 : * This should be called in each major loop of VACUUM processing,
2435 : * typically once per page processed.
2436 : */
2437 : void
2438 51745599 : vacuum_delay_point(bool is_analyze)
2439 : {
2440 51745599 : double msec = 0;
2441 :
2442 : /* Always check for interrupts */
2443 51745599 : CHECK_FOR_INTERRUPTS();
2444 :
2445 51745598 : if (InterruptPending)
2446 0 : return;
2447 :
2448 51745598 : if (IsParallelWorker())
2449 : {
2450 : /*
2451 : * Update cost-based vacuum delay parameters for a parallel autovacuum
2452 : * worker if any changes are detected. It might enable cost-based
2453 : * delay so it needs to be called before VacuumCostActive check.
2454 : */
2455 114 : parallel_vacuum_update_shared_delay_params();
2456 : }
2457 :
2458 51745598 : if (!VacuumCostActive && !ConfigReloadPending)
2459 46543094 : return;
2460 :
2461 : /*
2462 : * Autovacuum workers should reload the configuration file if requested.
2463 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2464 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2465 : * vacuumed or analyzed.
2466 : */
2467 5202504 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2468 : {
2469 1 : ConfigReloadPending = false;
2470 1 : ProcessConfigFile(PGC_SIGHUP);
2471 1 : VacuumUpdateCosts();
2472 :
2473 : /*
2474 : * Propagate cost-based vacuum delay parameters to shared memory if
2475 : * any of them have changed during the config reload.
2476 : */
2477 1 : parallel_vacuum_propagate_shared_delay_params();
2478 : }
2479 :
2480 : /*
2481 : * If we disabled cost-based delays after reloading the config file,
2482 : * return.
2483 : */
2484 5202504 : if (!VacuumCostActive)
2485 0 : return;
2486 :
2487 : /*
2488 : * For parallel vacuum, the delay is computed based on the shared cost
2489 : * balance. See compute_parallel_delay.
2490 : */
2491 5202504 : if (VacuumSharedCostBalance != NULL)
2492 210 : msec = compute_parallel_delay();
2493 5202294 : else if (VacuumCostBalance >= vacuum_cost_limit)
2494 3122 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2495 :
2496 : /* Nap if appropriate */
2497 5202504 : if (msec > 0)
2498 : {
2499 : instr_time delay_start;
2500 :
2501 3129 : if (msec > vacuum_cost_delay * 4)
2502 8 : msec = vacuum_cost_delay * 4;
2503 :
2504 3129 : if (track_cost_delay_timing)
2505 0 : INSTR_TIME_SET_CURRENT(delay_start);
2506 :
2507 3129 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2508 3129 : pg_usleep(msec * 1000);
2509 3129 : pgstat_report_wait_end();
2510 :
2511 3129 : if (track_cost_delay_timing)
2512 : {
2513 : instr_time delay_end;
2514 : instr_time delay;
2515 :
2516 0 : INSTR_TIME_SET_CURRENT(delay_end);
2517 0 : INSTR_TIME_SET_ZERO(delay);
2518 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2519 :
2520 : /*
2521 : * For parallel workers, we only report the delay time every once
2522 : * in a while to avoid overloading the leader with messages and
2523 : * interrupts.
2524 : */
2525 0 : if (IsParallelWorker())
2526 : {
2527 : static instr_time last_report_time;
2528 : instr_time time_since_last_report;
2529 :
2530 : Assert(!is_analyze);
2531 :
2532 : /* Accumulate the delay time */
2533 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2534 :
2535 : /* Calculate interval since last report */
2536 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2537 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2538 :
2539 : /* If we haven't reported in a while, do so now */
2540 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2541 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2542 : {
2543 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2544 : parallel_vacuum_worker_delay_ns);
2545 :
2546 : /* Reset variables */
2547 0 : last_report_time = delay_end;
2548 0 : parallel_vacuum_worker_delay_ns = 0;
2549 : }
2550 : }
2551 0 : else if (is_analyze)
2552 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2553 : INSTR_TIME_GET_NANOSEC(delay));
2554 : else
2555 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2556 : INSTR_TIME_GET_NANOSEC(delay));
2557 : }
2558 :
2559 : /*
2560 : * We don't want to ignore postmaster death during very long vacuums
2561 : * with vacuum_cost_delay configured. We can't use the usual
2562 : * WaitLatch() approach here because we want microsecond-based sleep
2563 : * durations above.
2564 : */
2565 3129 : if (IsUnderPostmaster && !PostmasterIsAlive())
2566 0 : exit(1);
2567 :
2568 3129 : VacuumCostBalance = 0;
2569 :
2570 : /*
2571 : * Balance and update limit values for autovacuum workers. We must do
2572 : * this periodically, as the number of workers across which we are
2573 : * balancing the limit may have changed.
2574 : *
2575 : * TODO: There may be better criteria for determining when to do this
2576 : * besides "check after napping".
2577 : */
2578 3129 : AutoVacuumUpdateCostLimit();
2579 :
2580 : /* Might have gotten an interrupt while sleeping */
2581 3129 : CHECK_FOR_INTERRUPTS();
2582 : }
2583 : }
2584 :
2585 : /*
2586 : * Computes the vacuum delay for parallel workers.
2587 : *
2588 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2589 : * worker to sleep in proportion to the share of work it's done. We achieve this
2590 : * by allowing all parallel vacuum workers including the leader process to
2591 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2592 : * We allow each worker to update it as and when it has incurred any cost and
2593 : * then based on that decide whether it needs to sleep. We compute the time
2594 : * to sleep for a worker based on the cost it has incurred
2595 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2596 : * that amount. This avoids putting to sleep those workers which have done less
2597 : * I/O than other workers and therefore ensure that workers
2598 : * which are doing more I/O got throttled more.
2599 : *
2600 : * We allow a worker to sleep only if it has performed I/O above a certain
2601 : * threshold, which is calculated based on the number of active workers
2602 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2603 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2604 : * the required throttling if we force a worker that has done more than 50%
2605 : * of its share of work to sleep.
2606 : */
2607 : static double
2608 210 : compute_parallel_delay(void)
2609 : {
2610 210 : double msec = 0;
2611 : uint32 shared_balance;
2612 : int nworkers;
2613 :
2614 : /* Parallel vacuum must be active */
2615 : Assert(VacuumSharedCostBalance);
2616 :
2617 210 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2618 :
2619 : /* At least count itself */
2620 : Assert(nworkers >= 1);
2621 :
2622 : /* Update the shared cost balance value atomically */
2623 210 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2624 :
2625 : /* Compute the total local balance for the current worker */
2626 210 : VacuumCostBalanceLocal += VacuumCostBalance;
2627 :
2628 210 : if ((shared_balance >= vacuum_cost_limit) &&
2629 21 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2630 : {
2631 : /* Compute sleep time based on the local cost balance */
2632 7 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2633 7 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2634 7 : VacuumCostBalanceLocal = 0;
2635 : }
2636 :
2637 : /*
2638 : * Reset the local balance as we accumulated it into the shared value.
2639 : */
2640 210 : VacuumCostBalance = 0;
2641 :
2642 210 : return msec;
2643 : }
2644 :
2645 : /*
2646 : * A wrapper function of defGetBoolean().
2647 : *
2648 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2649 : * of true and false.
2650 : */
2651 : static VacOptValue
2652 182 : get_vacoptval_from_boolean(DefElem *def)
2653 : {
2654 182 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2655 : }
2656 :
2657 : /*
2658 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2659 : *
2660 : * Returns bulk delete stats derived from input stats
2661 : */
2662 : IndexBulkDeleteResult *
2663 1496 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2664 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2665 : {
2666 : /* Do bulk deletion */
2667 1496 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2668 : dead_items);
2669 :
2670 1496 : ereport(ivinfo->message_level,
2671 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2672 : RelationGetRelationName(ivinfo->index),
2673 : dead_items_info->num_items)));
2674 :
2675 1496 : return istat;
2676 : }
2677 :
2678 : /*
2679 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2680 : *
2681 : * Returns bulk delete stats derived from input stats
2682 : */
2683 : IndexBulkDeleteResult *
2684 152544 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2685 : {
2686 152544 : istat = index_vacuum_cleanup(ivinfo, istat);
2687 :
2688 152544 : if (istat)
2689 1653 : ereport(ivinfo->message_level,
2690 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2691 : RelationGetRelationName(ivinfo->index),
2692 : istat->num_index_tuples,
2693 : istat->num_pages),
2694 : errdetail("%.0f index row versions were removed.\n"
2695 : "%u index pages were newly deleted.\n"
2696 : "%u index pages are currently deleted, of which %u are currently reusable.",
2697 : istat->tuples_removed,
2698 : istat->pages_newly_deleted,
2699 : istat->pages_deleted, istat->pages_free)));
2700 :
2701 152544 : return istat;
2702 : }
2703 :
2704 : /*
2705 : * vac_tid_reaped() -- is a particular tid deletable?
2706 : *
2707 : * This has the right signature to be an IndexBulkDeleteCallback.
2708 : */
2709 : static bool
2710 4046963 : vac_tid_reaped(ItemPointer itemptr, void *state)
2711 : {
2712 4046963 : TidStore *dead_items = (TidStore *) state;
2713 :
2714 4046963 : return TidStoreIsMember(dead_items, itemptr);
2715 : }
|