Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/injection_point.h"
60 : #include "utils/memutils.h"
61 : #include "utils/snapmgr.h"
62 : #include "utils/syscache.h"
63 :
64 : /*
65 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
66 : * This aims to avoid sending too many messages and waking up the leader too
67 : * frequently.
68 : */
69 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
70 :
71 : /*
72 : * GUC parameters
73 : */
74 : int vacuum_freeze_min_age;
75 : int vacuum_freeze_table_age;
76 : int vacuum_multixact_freeze_min_age;
77 : int vacuum_multixact_freeze_table_age;
78 : int vacuum_failsafe_age;
79 : int vacuum_multixact_failsafe_age;
80 : double vacuum_max_eager_freeze_failure_rate;
81 : bool track_cost_delay_timing;
82 : bool vacuum_truncate;
83 :
84 : /*
85 : * Variables for cost-based vacuum delay. The defaults differ between
86 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
87 : * vacuum code. They are initialized here to the defaults for client backends
88 : * executing VACUUM or ANALYZE.
89 : */
90 : double vacuum_cost_delay = 0;
91 : int vacuum_cost_limit = 200;
92 :
93 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
94 : int64 parallel_vacuum_worker_delay_ns = 0;
95 :
96 : /*
97 : * VacuumFailsafeActive is a defined as a global so that we can determine
98 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
99 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
100 : * for the table until after vacuuming has completed, regardless of other
101 : * settings.
102 : *
103 : * Only VACUUM code should inspect this variable and only table access methods
104 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
105 : * inspected to determine whether or not to allow cost-based delays. Table AMs
106 : * are free to set it if they desire this behavior, but it is false by default
107 : * and reset to false in between vacuuming each relation.
108 : */
109 : bool VacuumFailsafeActive = false;
110 :
111 : /*
112 : * Variables for cost-based parallel vacuum. See comments atop
113 : * compute_parallel_delay to understand how it works.
114 : */
115 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
116 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
117 : int VacuumCostBalanceLocal = 0;
118 :
119 : /* non-export function prototypes */
120 : static List *expand_vacuum_rel(VacuumRelation *vrel,
121 : MemoryContext vac_context, int options);
122 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
123 : static void vac_truncate_clog(TransactionId frozenXID,
124 : MultiXactId minMulti,
125 : TransactionId lastSaneFrozenXid,
126 : MultiXactId lastSaneMinMulti);
127 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
128 : BufferAccessStrategy bstrategy);
129 : static double compute_parallel_delay(void);
130 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
131 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
132 :
133 : /*
134 : * GUC check function to ensure GUC value specified is within the allowable
135 : * range.
136 : */
137 : bool
138 2196 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
139 : GucSource source)
140 : {
141 : /* Value upper and lower hard limits are inclusive */
142 2196 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
143 2196 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
144 2196 : return true;
145 :
146 : /* Value does not fall within any allowable range */
147 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
148 : "vacuum_buffer_usage_limit",
149 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
150 :
151 0 : return false;
152 : }
153 :
154 : /*
155 : * Primary entry point for manual VACUUM and ANALYZE commands
156 : *
157 : * This is mainly a preparation wrapper for the real operations that will
158 : * happen in vacuum().
159 : */
160 : void
161 13704 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
162 : {
163 : VacuumParams params;
164 13704 : BufferAccessStrategy bstrategy = NULL;
165 13704 : bool verbose = false;
166 13704 : bool skip_locked = false;
167 13704 : bool analyze = false;
168 13704 : bool freeze = false;
169 13704 : bool full = false;
170 13704 : bool disable_page_skipping = false;
171 13704 : bool process_main = true;
172 13704 : bool process_toast = true;
173 : int ring_size;
174 13704 : bool skip_database_stats = false;
175 13704 : bool only_database_stats = false;
176 : MemoryContext vac_context;
177 : ListCell *lc;
178 :
179 : /* index_cleanup and truncate values unspecified for now */
180 13704 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
181 13704 : params.truncate = VACOPTVALUE_UNSPECIFIED;
182 :
183 : /* By default parallel vacuum is enabled */
184 13704 : params.nworkers = 0;
185 :
186 : /* Will be set later if we recurse to a TOAST table. */
187 13704 : params.toast_parent = InvalidOid;
188 :
189 : /*
190 : * Set this to an invalid value so it is clear whether or not a
191 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
192 : */
193 13704 : ring_size = -1;
194 :
195 : /* Parse options list */
196 28282 : foreach(lc, vacstmt->options)
197 : {
198 14614 : DefElem *opt = (DefElem *) lfirst(lc);
199 :
200 : /* Parse common options for VACUUM and ANALYZE */
201 14614 : if (strcmp(opt->defname, "verbose") == 0)
202 40 : verbose = defGetBoolean(opt);
203 14574 : else if (strcmp(opt->defname, "skip_locked") == 0)
204 334 : skip_locked = defGetBoolean(opt);
205 14240 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
206 : {
207 : const char *hintmsg;
208 : int result;
209 : char *vac_buffer_size;
210 :
211 54 : vac_buffer_size = defGetString(opt);
212 :
213 : /*
214 : * Check that the specified value is valid and the size falls
215 : * within the hard upper and lower limits if it is not 0.
216 : */
217 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
218 48 : (result != 0 &&
219 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
220 : {
221 18 : ereport(ERROR,
222 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
223 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
224 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
225 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
226 : }
227 :
228 36 : ring_size = result;
229 : }
230 14186 : else if (!vacstmt->is_vacuumcmd)
231 6 : ereport(ERROR,
232 : (errcode(ERRCODE_SYNTAX_ERROR),
233 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
234 : parser_errposition(pstate, opt->location)));
235 :
236 : /* Parse options available on VACUUM */
237 14180 : else if (strcmp(opt->defname, "analyze") == 0)
238 2750 : analyze = defGetBoolean(opt);
239 11430 : else if (strcmp(opt->defname, "freeze") == 0)
240 2552 : freeze = defGetBoolean(opt);
241 8878 : else if (strcmp(opt->defname, "full") == 0)
242 386 : full = defGetBoolean(opt);
243 8492 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
244 214 : disable_page_skipping = defGetBoolean(opt);
245 8278 : else if (strcmp(opt->defname, "index_cleanup") == 0)
246 : {
247 : /* Interpret no string as the default, which is 'auto' */
248 174 : if (!opt->arg)
249 0 : params.index_cleanup = VACOPTVALUE_AUTO;
250 : else
251 : {
252 174 : char *sval = defGetString(opt);
253 :
254 : /* Try matching on 'auto' string, or fall back on boolean */
255 174 : if (pg_strcasecmp(sval, "auto") == 0)
256 6 : params.index_cleanup = VACOPTVALUE_AUTO;
257 : else
258 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
259 : }
260 : }
261 8104 : else if (strcmp(opt->defname, "process_main") == 0)
262 154 : process_main = defGetBoolean(opt);
263 7950 : else if (strcmp(opt->defname, "process_toast") == 0)
264 160 : process_toast = defGetBoolean(opt);
265 7790 : else if (strcmp(opt->defname, "truncate") == 0)
266 158 : params.truncate = get_vacoptval_from_boolean(opt);
267 7632 : else if (strcmp(opt->defname, "parallel") == 0)
268 : {
269 352 : if (opt->arg == NULL)
270 : {
271 6 : ereport(ERROR,
272 : (errcode(ERRCODE_SYNTAX_ERROR),
273 : errmsg("parallel option requires a value between 0 and %d",
274 : MAX_PARALLEL_WORKER_LIMIT),
275 : parser_errposition(pstate, opt->location)));
276 : }
277 : else
278 : {
279 : int nworkers;
280 :
281 346 : nworkers = defGetInt32(opt);
282 346 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
283 6 : ereport(ERROR,
284 : (errcode(ERRCODE_SYNTAX_ERROR),
285 : errmsg("parallel workers for vacuum must be between 0 and %d",
286 : MAX_PARALLEL_WORKER_LIMIT),
287 : parser_errposition(pstate, opt->location)));
288 :
289 : /*
290 : * Disable parallel vacuum, if user has specified parallel
291 : * degree as zero.
292 : */
293 340 : if (nworkers == 0)
294 156 : params.nworkers = -1;
295 : else
296 184 : params.nworkers = nworkers;
297 : }
298 : }
299 7280 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
300 7118 : skip_database_stats = defGetBoolean(opt);
301 162 : else if (strcmp(opt->defname, "only_database_stats") == 0)
302 162 : only_database_stats = defGetBoolean(opt);
303 : else
304 0 : ereport(ERROR,
305 : (errcode(ERRCODE_SYNTAX_ERROR),
306 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
307 : parser_errposition(pstate, opt->location)));
308 : }
309 :
310 : /* Set vacuum options */
311 13668 : params.options =
312 13668 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
313 13668 : (verbose ? VACOPT_VERBOSE : 0) |
314 13668 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
315 13668 : (analyze ? VACOPT_ANALYZE : 0) |
316 13668 : (freeze ? VACOPT_FREEZE : 0) |
317 13668 : (full ? VACOPT_FULL : 0) |
318 13668 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
319 13668 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
320 13668 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
321 13668 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
322 13668 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
323 :
324 : /* sanity checks on options */
325 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
326 : Assert((params.options & VACOPT_VACUUM) ||
327 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
328 :
329 13668 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
330 6 : ereport(ERROR,
331 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
332 : errmsg("VACUUM FULL cannot be performed in parallel")));
333 :
334 : /*
335 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
336 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
337 : * we'll permit that.
338 : */
339 13662 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
340 6 : !(params.options & VACOPT_ANALYZE))
341 6 : ereport(ERROR,
342 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
343 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
344 :
345 : /*
346 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
347 : */
348 13656 : if (!(params.options & VACOPT_ANALYZE))
349 : {
350 12280 : foreach(lc, vacstmt->rels)
351 : {
352 6028 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
353 :
354 6028 : if (vrel->va_cols != NIL)
355 6 : ereport(ERROR,
356 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
357 : errmsg("ANALYZE option must be specified when a column list is provided")));
358 : }
359 : }
360 :
361 :
362 : /*
363 : * Sanity check DISABLE_PAGE_SKIPPING option.
364 : */
365 13650 : if ((params.options & VACOPT_FULL) != 0 &&
366 362 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
367 0 : ereport(ERROR,
368 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
369 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
370 :
371 : /* sanity check for PROCESS_TOAST */
372 13650 : if ((params.options & VACOPT_FULL) != 0 &&
373 362 : (params.options & VACOPT_PROCESS_TOAST) == 0)
374 6 : ereport(ERROR,
375 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
376 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
377 :
378 : /* sanity check for ONLY_DATABASE_STATS */
379 13644 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
380 : {
381 : Assert(params.options & VACOPT_VACUUM);
382 162 : if (vacstmt->rels != NIL)
383 6 : ereport(ERROR,
384 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
385 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
386 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
387 156 : if (params.options & ~(VACOPT_VACUUM |
388 : VACOPT_VERBOSE |
389 : VACOPT_PROCESS_MAIN |
390 : VACOPT_PROCESS_TOAST |
391 : VACOPT_ONLY_DATABASE_STATS))
392 0 : ereport(ERROR,
393 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
394 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
395 : }
396 :
397 : /*
398 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
399 : * them as -1 which means to use the default values.
400 : */
401 13638 : if (params.options & VACOPT_FREEZE)
402 : {
403 2552 : params.freeze_min_age = 0;
404 2552 : params.freeze_table_age = 0;
405 2552 : params.multixact_freeze_min_age = 0;
406 2552 : params.multixact_freeze_table_age = 0;
407 : }
408 : else
409 : {
410 11086 : params.freeze_min_age = -1;
411 11086 : params.freeze_table_age = -1;
412 11086 : params.multixact_freeze_min_age = -1;
413 11086 : params.multixact_freeze_table_age = -1;
414 : }
415 :
416 : /* user-invoked vacuum is never "for wraparound" */
417 13638 : params.is_wraparound = false;
418 :
419 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
420 13638 : params.log_min_duration = -1;
421 :
422 : /*
423 : * Later, in vacuum_rel(), we check if a reloption override was specified.
424 : */
425 13638 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
426 :
427 : /*
428 : * Create special memory context for cross-transaction storage.
429 : *
430 : * Since it is a child of PortalContext, it will go away eventually even
431 : * if we suffer an error; there's no need for special abort cleanup logic.
432 : */
433 13638 : vac_context = AllocSetContextCreate(PortalContext,
434 : "Vacuum",
435 : ALLOCSET_DEFAULT_SIZES);
436 :
437 : /*
438 : * Make a buffer strategy object in the cross-transaction memory context.
439 : * We needn't bother making this for VACUUM (FULL) or VACUUM
440 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
441 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
442 : * when we see ANALYZE.
443 : */
444 13638 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
445 512 : VACOPT_FULL)) == 0 ||
446 512 : (params.options & VACOPT_ANALYZE) != 0)
447 : {
448 :
449 13132 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
450 :
451 : Assert(ring_size >= -1);
452 :
453 : /*
454 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
455 : * command, it overrides the value of VacuumBufferUsageLimit. Either
456 : * value may be 0, in which case GetAccessStrategyWithSize() will
457 : * return NULL, effectively allowing full use of shared buffers.
458 : */
459 13132 : if (ring_size == -1)
460 13102 : ring_size = VacuumBufferUsageLimit;
461 :
462 13132 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
463 :
464 13132 : MemoryContextSwitchTo(old_context);
465 : }
466 :
467 : /* Now go through the common routine */
468 13638 : vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
469 :
470 : /* Finally, clean up the vacuum memory context */
471 13504 : MemoryContextDelete(vac_context);
472 13504 : }
473 :
474 : /*
475 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
476 : *
477 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
478 : * we process all relevant tables in the database. For each VacuumRelation,
479 : * if a valid OID is supplied, the table with that OID is what to process;
480 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
481 : *
482 : * params contains a set of parameters that can be used to customize the
483 : * behavior.
484 : *
485 : * bstrategy may be passed in as NULL when the caller does not want to
486 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
487 : * otherwise, the caller must build a BufferAccessStrategy with the number of
488 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
489 : * using.
490 : *
491 : * isTopLevel should be passed down from ProcessUtility.
492 : *
493 : * It is the caller's responsibility that all parameters are allocated in a
494 : * memory context that will not disappear at transaction commit.
495 : */
496 : void
497 206260 : vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
498 : MemoryContext vac_context, bool isTopLevel)
499 : {
500 : static bool in_vacuum = false;
501 :
502 : const char *stmttype;
503 : volatile bool in_outer_xact,
504 : use_own_xacts;
505 :
506 206260 : stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
507 :
508 : /*
509 : * We cannot run VACUUM inside a user transaction block; if we were inside
510 : * a transaction, then our commit- and start-transaction-command calls
511 : * would not have the intended effect! There are numerous other subtle
512 : * dependencies on this, too.
513 : *
514 : * ANALYZE (without VACUUM) can run either way.
515 : */
516 206260 : if (params.options & VACOPT_VACUUM)
517 : {
518 201380 : PreventInTransactionBlock(isTopLevel, stmttype);
519 201360 : in_outer_xact = false;
520 : }
521 : else
522 4880 : in_outer_xact = IsInTransactionBlock(isTopLevel);
523 :
524 : /*
525 : * Check for and disallow recursive calls. This could happen when VACUUM
526 : * FULL or ANALYZE calls a hostile index expression that itself calls
527 : * ANALYZE.
528 : */
529 206240 : if (in_vacuum)
530 12 : ereport(ERROR,
531 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
532 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
533 : stmttype)));
534 :
535 : /*
536 : * Build list of relation(s) to process, putting any new data in
537 : * vac_context for safekeeping.
538 : */
539 206228 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
540 : {
541 : /* We don't process any tables in this case */
542 : Assert(relations == NIL);
543 : }
544 206072 : else if (relations != NIL)
545 : {
546 205854 : List *newrels = NIL;
547 : ListCell *lc;
548 :
549 411806 : foreach(lc, relations)
550 : {
551 205988 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
552 : List *sublist;
553 : MemoryContext old_context;
554 :
555 205988 : sublist = expand_vacuum_rel(vrel, vac_context, params.options);
556 205952 : old_context = MemoryContextSwitchTo(vac_context);
557 205952 : newrels = list_concat(newrels, sublist);
558 205952 : MemoryContextSwitchTo(old_context);
559 : }
560 205818 : relations = newrels;
561 : }
562 : else
563 218 : relations = get_all_vacuum_rels(vac_context, params.options);
564 :
565 : /*
566 : * Decide whether we need to start/commit our own transactions.
567 : *
568 : * For VACUUM (with or without ANALYZE): always do so, so that we can
569 : * release locks as soon as possible. (We could possibly use the outer
570 : * transaction for a one-table VACUUM, but handling TOAST tables would be
571 : * problematic.)
572 : *
573 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
574 : * start/commit our own transactions. Also, there's no need to do so if
575 : * only processing one relation. For multiple relations when not within a
576 : * transaction block, and also in an autovacuum worker, use own
577 : * transactions so we can release locks sooner.
578 : */
579 206192 : if (params.options & VACOPT_VACUUM)
580 201348 : use_own_xacts = true;
581 : else
582 : {
583 : Assert(params.options & VACOPT_ANALYZE);
584 4844 : if (AmAutoVacuumWorkerProcess())
585 232 : use_own_xacts = true;
586 4612 : else if (in_outer_xact)
587 238 : use_own_xacts = false;
588 4374 : else if (list_length(relations) > 1)
589 766 : use_own_xacts = true;
590 : else
591 3608 : use_own_xacts = false;
592 : }
593 :
594 : /*
595 : * vacuum_rel expects to be entered with no transaction active; it will
596 : * start and commit its own transaction. But we are called by an SQL
597 : * command, and so we are executing inside a transaction already. We
598 : * commit the transaction started in PostgresMain() here, and start
599 : * another one before exiting to match the commit waiting for us back in
600 : * PostgresMain().
601 : */
602 206192 : if (use_own_xacts)
603 : {
604 : Assert(!in_outer_xact);
605 :
606 : /* ActiveSnapshot is not set by autovacuum */
607 202346 : if (ActiveSnapshotSet())
608 9724 : PopActiveSnapshot();
609 :
610 : /* matches the StartTransaction in PostgresMain() */
611 202346 : CommitTransactionCommand();
612 : }
613 :
614 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
615 206192 : PG_TRY();
616 : {
617 : ListCell *cur;
618 :
619 206192 : in_vacuum = true;
620 206192 : VacuumFailsafeActive = false;
621 206192 : VacuumUpdateCosts();
622 206192 : VacuumCostBalance = 0;
623 206192 : VacuumCostBalanceLocal = 0;
624 206192 : VacuumSharedCostBalance = NULL;
625 206192 : VacuumActiveNWorkers = NULL;
626 :
627 : /*
628 : * Loop to process each selected relation.
629 : */
630 429286 : foreach(cur, relations)
631 : {
632 223160 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
633 :
634 223160 : if (params.options & VACOPT_VACUUM)
635 : {
636 210134 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
637 102 : continue;
638 : }
639 :
640 223050 : if (params.options & VACOPT_ANALYZE)
641 : {
642 : /*
643 : * If using separate xacts, start one for analyze. Otherwise,
644 : * we can use the outer transaction.
645 : */
646 15990 : if (use_own_xacts)
647 : {
648 12194 : StartTransactionCommand();
649 : /* functions in indexes may want a snapshot set */
650 12194 : PushActiveSnapshot(GetTransactionSnapshot());
651 : }
652 :
653 15990 : analyze_rel(vrel->oid, vrel->relation, params,
654 : vrel->va_cols, in_outer_xact, bstrategy);
655 :
656 15932 : if (use_own_xacts)
657 : {
658 12156 : PopActiveSnapshot();
659 : /* standard_ProcessUtility() does CCI if !use_own_xacts */
660 12156 : CommandCounterIncrement();
661 12156 : CommitTransactionCommand();
662 : }
663 : else
664 : {
665 : /*
666 : * If we're not using separate xacts, better separate the
667 : * ANALYZE actions with CCIs. This avoids trouble if user
668 : * says "ANALYZE t, t".
669 : */
670 3776 : CommandCounterIncrement();
671 : }
672 : }
673 :
674 : /*
675 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
676 : * next relation.
677 : */
678 222992 : VacuumFailsafeActive = false;
679 : }
680 : }
681 66 : PG_FINALLY();
682 : {
683 206192 : in_vacuum = false;
684 206192 : VacuumCostActive = false;
685 206192 : VacuumFailsafeActive = false;
686 206192 : VacuumCostBalance = 0;
687 : }
688 206192 : PG_END_TRY();
689 :
690 : /*
691 : * Finish up processing.
692 : */
693 206126 : if (use_own_xacts)
694 : {
695 : /* here, we are not in a transaction */
696 :
697 : /*
698 : * This matches the CommitTransaction waiting for us in
699 : * PostgresMain().
700 : */
701 202300 : StartTransactionCommand();
702 : }
703 :
704 206126 : if ((params.options & VACOPT_VACUUM) &&
705 201314 : !(params.options & VACOPT_SKIP_DATABASE_STATS))
706 : {
707 : /*
708 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
709 : */
710 1808 : vac_update_datfrozenxid();
711 : }
712 :
713 206126 : }
714 :
715 : /*
716 : * Check if the current user has privileges to vacuum or analyze the relation.
717 : * If not, issue a WARNING log message and return false to let the caller
718 : * decide what to do with this relation. This routine is used to decide if a
719 : * relation can be processed for VACUUM or ANALYZE.
720 : */
721 : bool
722 266836 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
723 : bits32 options)
724 : {
725 : char *relname;
726 :
727 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
728 :
729 : /*----------
730 : * A role has privileges to vacuum or analyze the relation if any of the
731 : * following are true:
732 : * - the role owns the current database and the relation is not shared
733 : * - the role has the MAINTAIN privilege on the relation
734 : *----------
735 : */
736 266836 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
737 308642 : !reltuple->relisshared) ||
738 45364 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
739 263754 : return true;
740 :
741 3082 : relname = NameStr(reltuple->relname);
742 :
743 3082 : if ((options & VACOPT_VACUUM) != 0)
744 : {
745 224 : ereport(WARNING,
746 : (errmsg("permission denied to vacuum \"%s\", skipping it",
747 : relname)));
748 :
749 : /*
750 : * For VACUUM ANALYZE, both logs could show up, but just generate
751 : * information for VACUUM as that would be the first one to be
752 : * processed.
753 : */
754 224 : return false;
755 : }
756 :
757 2858 : if ((options & VACOPT_ANALYZE) != 0)
758 2858 : ereport(WARNING,
759 : (errmsg("permission denied to analyze \"%s\", skipping it",
760 : relname)));
761 :
762 2858 : return false;
763 : }
764 :
765 :
766 : /*
767 : * vacuum_open_relation
768 : *
769 : * This routine is used for attempting to open and lock a relation which
770 : * is going to be vacuumed or analyzed. If the relation cannot be opened
771 : * or locked, a log is emitted if possible.
772 : */
773 : Relation
774 235378 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
775 : bool verbose, LOCKMODE lmode)
776 : {
777 : Relation rel;
778 235378 : bool rel_lock = true;
779 : int elevel;
780 :
781 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
782 :
783 : /*
784 : * Open the relation and get the appropriate lock on it.
785 : *
786 : * There's a race condition here: the relation may have gone away since
787 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
788 : *
789 : * If we've been asked not to wait for the relation lock, acquire it first
790 : * in non-blocking mode, before calling try_relation_open().
791 : */
792 235378 : if (!(options & VACOPT_SKIP_LOCKED))
793 234358 : rel = try_relation_open(relid, lmode);
794 1020 : else if (ConditionalLockRelationOid(relid, lmode))
795 998 : rel = try_relation_open(relid, NoLock);
796 : else
797 : {
798 22 : rel = NULL;
799 22 : rel_lock = false;
800 : }
801 :
802 : /* if relation is opened, leave */
803 235378 : if (rel)
804 235344 : return rel;
805 :
806 : /*
807 : * Relation could not be opened, hence generate if possible a log
808 : * informing on the situation.
809 : *
810 : * If the RangeVar is not defined, we do not have enough information to
811 : * provide a meaningful log statement. Chances are that the caller has
812 : * intentionally not provided this information so that this logging is
813 : * skipped, anyway.
814 : */
815 34 : if (relation == NULL)
816 18 : return NULL;
817 :
818 : /*
819 : * Determine the log level.
820 : *
821 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
822 : * statements in the permission checks; otherwise, only log if the caller
823 : * so requested.
824 : */
825 16 : if (!AmAutoVacuumWorkerProcess())
826 14 : elevel = WARNING;
827 2 : else if (verbose)
828 2 : elevel = LOG;
829 : else
830 0 : return NULL;
831 :
832 16 : if ((options & VACOPT_VACUUM) != 0)
833 : {
834 12 : if (!rel_lock)
835 8 : ereport(elevel,
836 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
837 : errmsg("skipping vacuum of \"%s\" --- lock not available",
838 : relation->relname)));
839 : else
840 4 : ereport(elevel,
841 : (errcode(ERRCODE_UNDEFINED_TABLE),
842 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
843 : relation->relname)));
844 :
845 : /*
846 : * For VACUUM ANALYZE, both logs could show up, but just generate
847 : * information for VACUUM as that would be the first one to be
848 : * processed.
849 : */
850 12 : return NULL;
851 : }
852 :
853 4 : if ((options & VACOPT_ANALYZE) != 0)
854 : {
855 4 : if (!rel_lock)
856 2 : ereport(elevel,
857 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
858 : errmsg("skipping analyze of \"%s\" --- lock not available",
859 : relation->relname)));
860 : else
861 2 : ereport(elevel,
862 : (errcode(ERRCODE_UNDEFINED_TABLE),
863 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
864 : relation->relname)));
865 : }
866 :
867 4 : return NULL;
868 : }
869 :
870 :
871 : /*
872 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
873 : * and optionally add VacuumRelations for partitions or inheritance children.
874 : *
875 : * If a VacuumRelation does not have an OID supplied and is a partitioned
876 : * table, an extra entry will be added to the output for each partition.
877 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
878 : * it does not want us to expand partitioned tables.
879 : *
880 : * We take care not to modify the input data structure, but instead build
881 : * new VacuumRelation(s) to return. (But note that they will reference
882 : * unmodified parts of the input, eg column lists.) New data structures
883 : * are made in vac_context.
884 : */
885 : static List *
886 205988 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
887 : int options)
888 : {
889 205988 : List *vacrels = NIL;
890 : MemoryContext oldcontext;
891 :
892 : /* If caller supplied OID, there's nothing we need do here. */
893 205988 : if (OidIsValid(vrel->oid))
894 : {
895 192622 : oldcontext = MemoryContextSwitchTo(vac_context);
896 192622 : vacrels = lappend(vacrels, vrel);
897 192622 : MemoryContextSwitchTo(oldcontext);
898 : }
899 : else
900 : {
901 : /*
902 : * Process a specific relation, and possibly partitions or child
903 : * tables thereof.
904 : */
905 : Oid relid;
906 : HeapTuple tuple;
907 : Form_pg_class classForm;
908 : bool include_children;
909 : bool is_partitioned_table;
910 : int rvr_opts;
911 :
912 : /*
913 : * Since autovacuum workers supply OIDs when calling vacuum(), no
914 : * autovacuum worker should reach this code.
915 : */
916 : Assert(!AmAutoVacuumWorkerProcess());
917 :
918 : /*
919 : * We transiently take AccessShareLock to protect the syscache lookup
920 : * below, as well as find_all_inheritors's expectation that the caller
921 : * holds some lock on the starting relation.
922 : */
923 13366 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
924 13366 : relid = RangeVarGetRelidExtended(vrel->relation,
925 : AccessShareLock,
926 : rvr_opts,
927 : NULL, NULL);
928 :
929 : /*
930 : * If the lock is unavailable, emit the same log statement that
931 : * vacuum_rel() and analyze_rel() would.
932 : */
933 13330 : if (!OidIsValid(relid))
934 : {
935 8 : if (options & VACOPT_VACUUM)
936 6 : ereport(WARNING,
937 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
938 : errmsg("skipping vacuum of \"%s\" --- lock not available",
939 : vrel->relation->relname)));
940 : else
941 2 : ereport(WARNING,
942 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
943 : errmsg("skipping analyze of \"%s\" --- lock not available",
944 : vrel->relation->relname)));
945 8 : return vacrels;
946 : }
947 :
948 : /*
949 : * To check whether the relation is a partitioned table and its
950 : * ownership, fetch its syscache entry.
951 : */
952 13322 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
953 13322 : if (!HeapTupleIsValid(tuple))
954 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
955 13322 : classForm = (Form_pg_class) GETSTRUCT(tuple);
956 :
957 : /*
958 : * Make a returnable VacuumRelation for this rel if the user has the
959 : * required privileges.
960 : */
961 13322 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
962 : {
963 13090 : oldcontext = MemoryContextSwitchTo(vac_context);
964 13090 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
965 : relid,
966 : vrel->va_cols));
967 13090 : MemoryContextSwitchTo(oldcontext);
968 : }
969 :
970 : /*
971 : * Vacuuming a partitioned table with ONLY will not do anything since
972 : * the partitioned table itself is empty. Issue a warning if the user
973 : * requests this.
974 : */
975 13322 : include_children = vrel->relation->inh;
976 13322 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
977 13322 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
978 6 : ereport(WARNING,
979 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
980 : vrel->relation->relname)));
981 :
982 13322 : ReleaseSysCache(tuple);
983 :
984 : /*
985 : * Unless the user has specified ONLY, make relation list entries for
986 : * its partitions or inheritance child tables. Note that the list
987 : * returned by find_all_inheritors() includes the passed-in OID, so we
988 : * have to skip that. There's no point in taking locks on the
989 : * individual partitions or child tables yet, and doing so would just
990 : * add unnecessary deadlock risk. For this last reason, we do not yet
991 : * check the ownership of the partitions/tables, which get added to
992 : * the list to process. Ownership will be checked later on anyway.
993 : */
994 13322 : if (include_children)
995 : {
996 13292 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
997 : ListCell *part_lc;
998 :
999 28652 : foreach(part_lc, part_oids)
1000 : {
1001 15360 : Oid part_oid = lfirst_oid(part_lc);
1002 :
1003 15360 : if (part_oid == relid)
1004 13292 : continue; /* ignore original table */
1005 :
1006 : /*
1007 : * We omit a RangeVar since it wouldn't be appropriate to
1008 : * complain about failure to open one of these relations
1009 : * later.
1010 : */
1011 2068 : oldcontext = MemoryContextSwitchTo(vac_context);
1012 2068 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1013 : part_oid,
1014 : vrel->va_cols));
1015 2068 : MemoryContextSwitchTo(oldcontext);
1016 : }
1017 : }
1018 :
1019 : /*
1020 : * Release lock again. This means that by the time we actually try to
1021 : * process the table, it might be gone or renamed. In the former case
1022 : * we'll silently ignore it; in the latter case we'll process it
1023 : * anyway, but we must beware that the RangeVar doesn't necessarily
1024 : * identify it anymore. This isn't ideal, perhaps, but there's little
1025 : * practical alternative, since we're typically going to commit this
1026 : * transaction and begin a new one between now and then. Moreover,
1027 : * holding locks on multiple relations would create significant risk
1028 : * of deadlock.
1029 : */
1030 13322 : UnlockRelationOid(relid, AccessShareLock);
1031 : }
1032 :
1033 205944 : return vacrels;
1034 : }
1035 :
1036 : /*
1037 : * Construct a list of VacuumRelations for all vacuumable rels in
1038 : * the current database. The list is built in vac_context.
1039 : */
1040 : static List *
1041 218 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1042 : {
1043 218 : List *vacrels = NIL;
1044 : Relation pgclass;
1045 : TableScanDesc scan;
1046 : HeapTuple tuple;
1047 :
1048 218 : pgclass = table_open(RelationRelationId, AccessShareLock);
1049 :
1050 218 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1051 :
1052 98332 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1053 : {
1054 98114 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1055 : MemoryContext oldcontext;
1056 98114 : Oid relid = classForm->oid;
1057 :
1058 : /*
1059 : * We include partitioned tables here; depending on which operation is
1060 : * to be performed, caller will decide whether to process or ignore
1061 : * them.
1062 : */
1063 98114 : if (classForm->relkind != RELKIND_RELATION &&
1064 80156 : classForm->relkind != RELKIND_MATVIEW &&
1065 80108 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1066 79944 : continue;
1067 :
1068 : /* check permissions of relation */
1069 18170 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1070 2742 : continue;
1071 :
1072 : /*
1073 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1074 : * We omit a RangeVar since it wouldn't be appropriate to complain
1075 : * about failure to open one of these relations later.
1076 : */
1077 15428 : oldcontext = MemoryContextSwitchTo(vac_context);
1078 15428 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1079 : relid,
1080 : NIL));
1081 15428 : MemoryContextSwitchTo(oldcontext);
1082 : }
1083 :
1084 218 : table_endscan(scan);
1085 218 : table_close(pgclass, AccessShareLock);
1086 :
1087 218 : return vacrels;
1088 : }
1089 :
1090 : /*
1091 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1092 : *
1093 : * The target relation and VACUUM parameters are our inputs.
1094 : *
1095 : * Output parameters are the cutoffs that VACUUM caller should use.
1096 : *
1097 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1098 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1099 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1100 : * minimum).
1101 : */
1102 : bool
1103 219132 : vacuum_get_cutoffs(Relation rel, const VacuumParams params,
1104 : struct VacuumCutoffs *cutoffs)
1105 : {
1106 : int freeze_min_age,
1107 : multixact_freeze_min_age,
1108 : freeze_table_age,
1109 : multixact_freeze_table_age,
1110 : effective_multixact_freeze_max_age;
1111 : TransactionId nextXID,
1112 : safeOldestXmin,
1113 : aggressiveXIDCutoff;
1114 : MultiXactId nextMXID,
1115 : safeOldestMxact,
1116 : aggressiveMXIDCutoff;
1117 :
1118 : /* Use mutable copies of freeze age parameters */
1119 219132 : freeze_min_age = params.freeze_min_age;
1120 219132 : multixact_freeze_min_age = params.multixact_freeze_min_age;
1121 219132 : freeze_table_age = params.freeze_table_age;
1122 219132 : multixact_freeze_table_age = params.multixact_freeze_table_age;
1123 :
1124 : /* Set pg_class fields in cutoffs */
1125 219132 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1126 219132 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1127 :
1128 : /*
1129 : * Acquire OldestXmin.
1130 : *
1131 : * We can always ignore processes running lazy vacuum. This is because we
1132 : * use these values only for deciding which tuples we must keep in the
1133 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1134 : * XID assigned), it's safe to ignore it. In theory it could be
1135 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1136 : * that only one vacuum process can be working on a particular table at
1137 : * any time, and that each vacuum is always an independent transaction.
1138 : */
1139 219132 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1140 :
1141 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1142 :
1143 : /* Acquire OldestMxact */
1144 219132 : cutoffs->OldestMxact = GetOldestMultiXactId();
1145 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1146 :
1147 : /* Acquire next XID/next MXID values used to apply age-based settings */
1148 219132 : nextXID = ReadNextTransactionId();
1149 219132 : nextMXID = ReadNextMultiXactId();
1150 :
1151 : /*
1152 : * Also compute the multixact age for which freezing is urgent. This is
1153 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1154 : * short of multixact member space.
1155 : */
1156 219132 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1157 :
1158 : /*
1159 : * Almost ready to set freeze output parameters; check if OldestXmin or
1160 : * OldestMxact are held back to an unsafe degree before we start on that
1161 : */
1162 219132 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1163 219132 : if (!TransactionIdIsNormal(safeOldestXmin))
1164 0 : safeOldestXmin = FirstNormalTransactionId;
1165 219132 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1166 219132 : if (safeOldestMxact < FirstMultiXactId)
1167 0 : safeOldestMxact = FirstMultiXactId;
1168 219132 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1169 154650 : ereport(WARNING,
1170 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1171 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1172 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1173 219132 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1174 0 : ereport(WARNING,
1175 : (errmsg("cutoff for freezing multixacts is far in the past"),
1176 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1177 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1178 :
1179 : /*
1180 : * Determine the minimum freeze age to use: as specified by the caller, or
1181 : * vacuum_freeze_min_age, but in any case not more than half
1182 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1183 : * wraparound won't occur too frequently.
1184 : */
1185 219132 : if (freeze_min_age < 0)
1186 11218 : freeze_min_age = vacuum_freeze_min_age;
1187 219132 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1188 : Assert(freeze_min_age >= 0);
1189 :
1190 : /* Compute FreezeLimit, being careful to generate a normal XID */
1191 219132 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1192 219132 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1193 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1194 : /* FreezeLimit must always be <= OldestXmin */
1195 219132 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1196 181244 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1197 :
1198 : /*
1199 : * Determine the minimum multixact freeze age to use: as specified by
1200 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1201 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1202 : * prevent MultiXact wraparound won't occur too frequently.
1203 : */
1204 219132 : if (multixact_freeze_min_age < 0)
1205 11218 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1206 219132 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1207 : effective_multixact_freeze_max_age / 2);
1208 : Assert(multixact_freeze_min_age >= 0);
1209 :
1210 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1211 219132 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1212 219132 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1213 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1214 : /* MultiXactCutoff must always be <= OldestMxact */
1215 219132 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1216 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1217 :
1218 : /*
1219 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1220 : *
1221 : * Determine the table freeze age to use: as specified by the caller, or
1222 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1223 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1224 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1225 : * anti-wraparound autovacuum is launched.
1226 : */
1227 219132 : if (freeze_table_age < 0)
1228 11218 : freeze_table_age = vacuum_freeze_table_age;
1229 219132 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1230 : Assert(freeze_table_age >= 0);
1231 219132 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1232 219132 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1233 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1234 219132 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1235 : aggressiveXIDCutoff))
1236 208000 : return true;
1237 :
1238 : /*
1239 : * Similar to the above, determine the table freeze age to use for
1240 : * multixacts: as specified by the caller, or the value of the
1241 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1242 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1243 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1244 : * multixacts before anti-wraparound autovacuum is launched.
1245 : */
1246 11132 : if (multixact_freeze_table_age < 0)
1247 11000 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1248 11132 : multixact_freeze_table_age =
1249 11132 : Min(multixact_freeze_table_age,
1250 : effective_multixact_freeze_max_age * 0.95);
1251 : Assert(multixact_freeze_table_age >= 0);
1252 11132 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1253 11132 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1254 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1255 11132 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1256 : aggressiveMXIDCutoff))
1257 0 : return true;
1258 :
1259 : /* Non-aggressive VACUUM */
1260 11132 : return false;
1261 : }
1262 :
1263 : /*
1264 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1265 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1266 : * dangerously far in the past.
1267 : *
1268 : * When we return true, VACUUM caller triggers the failsafe.
1269 : */
1270 : bool
1271 222102 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1272 : {
1273 222102 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1274 222102 : MultiXactId relminmxid = cutoffs->relminmxid;
1275 : TransactionId xid_skip_limit;
1276 : MultiXactId multi_skip_limit;
1277 : int skip_index_vacuum;
1278 :
1279 : Assert(TransactionIdIsNormal(relfrozenxid));
1280 : Assert(MultiXactIdIsValid(relminmxid));
1281 :
1282 : /*
1283 : * Determine the index skipping age to use. In any case no less than
1284 : * autovacuum_freeze_max_age * 1.05.
1285 : */
1286 222102 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1287 :
1288 222102 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1289 222102 : if (!TransactionIdIsNormal(xid_skip_limit))
1290 0 : xid_skip_limit = FirstNormalTransactionId;
1291 :
1292 222102 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1293 : {
1294 : /* The table's relfrozenxid is too old */
1295 73110 : return true;
1296 : }
1297 :
1298 : /*
1299 : * Similar to above, determine the index skipping age to use for
1300 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1301 : * 1.05.
1302 : */
1303 148992 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1304 : autovacuum_multixact_freeze_max_age * 1.05);
1305 :
1306 148992 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1307 148992 : if (multi_skip_limit < FirstMultiXactId)
1308 0 : multi_skip_limit = FirstMultiXactId;
1309 :
1310 148992 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1311 : {
1312 : /* The table's relminmxid is too old */
1313 0 : return true;
1314 : }
1315 :
1316 148992 : return false;
1317 : }
1318 :
1319 : /*
1320 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1321 : *
1322 : * If we scanned the whole relation then we should just use the count of
1323 : * live tuples seen; but if we did not, we should not blindly extrapolate
1324 : * from that number, since VACUUM may have scanned a quite nonrandom
1325 : * subset of the table. When we have only partial information, we take
1326 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1327 : * of the tuple density in the unscanned pages.
1328 : *
1329 : * Note: scanned_tuples should count only *live* tuples, since
1330 : * pg_class.reltuples is defined that way.
1331 : */
1332 : double
1333 218576 : vac_estimate_reltuples(Relation relation,
1334 : BlockNumber total_pages,
1335 : BlockNumber scanned_pages,
1336 : double scanned_tuples)
1337 : {
1338 218576 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1339 218576 : double old_rel_tuples = relation->rd_rel->reltuples;
1340 : double old_density;
1341 : double unscanned_pages;
1342 : double total_tuples;
1343 :
1344 : /* If we did scan the whole table, just use the count as-is */
1345 218576 : if (scanned_pages >= total_pages)
1346 210816 : return scanned_tuples;
1347 :
1348 : /*
1349 : * When successive VACUUM commands scan the same few pages again and
1350 : * again, without anything from the table really changing, there is a risk
1351 : * that our beliefs about tuple density will gradually become distorted.
1352 : * This might be caused by vacuumlazy.c implementation details, such as
1353 : * its tendency to always scan the last heap page. Handle that here.
1354 : *
1355 : * If the relation is _exactly_ the same size according to the existing
1356 : * pg_class entry, and only a few of its pages (less than 2%) were
1357 : * scanned, keep the existing value of reltuples. Also keep the existing
1358 : * value when only a subset of rel's pages <= a single page were scanned.
1359 : *
1360 : * (Note: we might be returning -1 here.)
1361 : */
1362 7760 : if (old_rel_pages == total_pages &&
1363 7730 : scanned_pages < (double) total_pages * 0.02)
1364 5620 : return old_rel_tuples;
1365 2140 : if (scanned_pages <= 1)
1366 1924 : return old_rel_tuples;
1367 :
1368 : /*
1369 : * If old density is unknown, we can't do much except scale up
1370 : * scanned_tuples to match total_pages.
1371 : */
1372 216 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1373 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1374 :
1375 : /*
1376 : * Okay, we've covered the corner cases. The normal calculation is to
1377 : * convert the old measurement to a density (tuples per page), then
1378 : * estimate the number of tuples in the unscanned pages using that figure,
1379 : * and finally add on the number of tuples in the scanned pages.
1380 : */
1381 214 : old_density = old_rel_tuples / old_rel_pages;
1382 214 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1383 214 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1384 214 : return floor(total_tuples + 0.5);
1385 : }
1386 :
1387 :
1388 : /*
1389 : * vac_update_relstats() -- update statistics for one relation
1390 : *
1391 : * Update the whole-relation statistics that are kept in its pg_class
1392 : * row. There are additional stats that will be updated if we are
1393 : * doing ANALYZE, but we always update these stats. This routine works
1394 : * for both index and heap relation entries in pg_class.
1395 : *
1396 : * We violate transaction semantics here by overwriting the rel's
1397 : * existing pg_class tuple with the new values. This is reasonably
1398 : * safe as long as we're sure that the new values are correct whether or
1399 : * not this transaction commits. The reason for doing this is that if
1400 : * we updated these tuples in the usual way, vacuuming pg_class itself
1401 : * wouldn't work very well --- by the time we got done with a vacuum
1402 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1403 : * course, this only works for fixed-size not-null columns, but these are.
1404 : *
1405 : * Another reason for doing it this way is that when we are in a lazy
1406 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1407 : * Somebody vacuuming pg_class might think they could delete a tuple
1408 : * marked with xmin = our xid.
1409 : *
1410 : * In addition to fundamentally nontransactional statistics such as
1411 : * relpages and relallvisible, we try to maintain certain lazily-updated
1412 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1413 : * It's safe to do this in VACUUM, which can't run in parallel with
1414 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1415 : * However, it's *not* safe to do it in an ANALYZE that's within an
1416 : * outer transaction, because for example the current transaction might
1417 : * have dropped the last index; then we'd think relhasindex should be
1418 : * cleared, but if the transaction later rolls back this would be wrong.
1419 : * So we refrain from updating the DDL flags if we're inside an outer
1420 : * transaction. This is OK since postponing the flag maintenance is
1421 : * always allowable.
1422 : *
1423 : * Note: num_tuples should count only *live* tuples, since
1424 : * pg_class.reltuples is defined that way.
1425 : *
1426 : * This routine is shared by VACUUM and ANALYZE.
1427 : */
1428 : void
1429 259248 : vac_update_relstats(Relation relation,
1430 : BlockNumber num_pages, double num_tuples,
1431 : BlockNumber num_all_visible_pages,
1432 : BlockNumber num_all_frozen_pages,
1433 : bool hasindex, TransactionId frozenxid,
1434 : MultiXactId minmulti,
1435 : bool *frozenxid_updated, bool *minmulti_updated,
1436 : bool in_outer_xact)
1437 : {
1438 259248 : Oid relid = RelationGetRelid(relation);
1439 : Relation rd;
1440 : ScanKeyData key[1];
1441 : HeapTuple ctup;
1442 : void *inplace_state;
1443 : Form_pg_class pgcform;
1444 : bool dirty,
1445 : futurexid,
1446 : futuremxid;
1447 : TransactionId oldfrozenxid;
1448 : MultiXactId oldminmulti;
1449 :
1450 259248 : rd = table_open(RelationRelationId, RowExclusiveLock);
1451 :
1452 : /* Fetch a copy of the tuple to scribble on */
1453 259248 : ScanKeyInit(&key[0],
1454 : Anum_pg_class_oid,
1455 : BTEqualStrategyNumber, F_OIDEQ,
1456 : ObjectIdGetDatum(relid));
1457 259248 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1458 : NULL, 1, key, &ctup, &inplace_state);
1459 259246 : if (!HeapTupleIsValid(ctup))
1460 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1461 : relid);
1462 259246 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1463 :
1464 : /* Apply statistical updates, if any, to copied tuple */
1465 :
1466 259246 : dirty = false;
1467 259246 : if (pgcform->relpages != (int32) num_pages)
1468 : {
1469 9148 : pgcform->relpages = (int32) num_pages;
1470 9148 : dirty = true;
1471 : }
1472 259246 : if (pgcform->reltuples != (float4) num_tuples)
1473 : {
1474 19912 : pgcform->reltuples = (float4) num_tuples;
1475 19912 : dirty = true;
1476 : }
1477 259246 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1478 : {
1479 5734 : pgcform->relallvisible = (int32) num_all_visible_pages;
1480 5734 : dirty = true;
1481 : }
1482 259246 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1483 : {
1484 5196 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1485 5196 : dirty = true;
1486 : }
1487 :
1488 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1489 :
1490 259246 : if (!in_outer_xact)
1491 : {
1492 : /*
1493 : * If we didn't find any indexes, reset relhasindex.
1494 : */
1495 258932 : if (pgcform->relhasindex && !hasindex)
1496 : {
1497 18 : pgcform->relhasindex = false;
1498 18 : dirty = true;
1499 : }
1500 :
1501 : /* We also clear relhasrules and relhastriggers if needed */
1502 258932 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1503 : {
1504 0 : pgcform->relhasrules = false;
1505 0 : dirty = true;
1506 : }
1507 258932 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1508 : {
1509 6 : pgcform->relhastriggers = false;
1510 6 : dirty = true;
1511 : }
1512 : }
1513 :
1514 : /*
1515 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1516 : * indicating it has no new data.
1517 : *
1518 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1519 : * stored relfrozenxid is "in the future" then it seems best to assume
1520 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1521 : * This should match vac_update_datfrozenxid() concerning what we consider
1522 : * to be "in the future".
1523 : */
1524 259246 : oldfrozenxid = pgcform->relfrozenxid;
1525 259246 : futurexid = false;
1526 259246 : if (frozenxid_updated)
1527 218570 : *frozenxid_updated = false;
1528 259246 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1529 : {
1530 61586 : bool update = false;
1531 :
1532 61586 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1533 61494 : update = true;
1534 92 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1535 0 : futurexid = update = true;
1536 :
1537 61586 : if (update)
1538 : {
1539 61494 : pgcform->relfrozenxid = frozenxid;
1540 61494 : dirty = true;
1541 61494 : if (frozenxid_updated)
1542 61494 : *frozenxid_updated = true;
1543 : }
1544 : }
1545 :
1546 : /* Similarly for relminmxid */
1547 259246 : oldminmulti = pgcform->relminmxid;
1548 259246 : futuremxid = false;
1549 259246 : if (minmulti_updated)
1550 218570 : *minmulti_updated = false;
1551 259246 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1552 : {
1553 286 : bool update = false;
1554 :
1555 286 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1556 286 : update = true;
1557 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1558 0 : futuremxid = update = true;
1559 :
1560 286 : if (update)
1561 : {
1562 286 : pgcform->relminmxid = minmulti;
1563 286 : dirty = true;
1564 286 : if (minmulti_updated)
1565 286 : *minmulti_updated = true;
1566 : }
1567 : }
1568 :
1569 : /* If anything changed, write out the tuple. */
1570 259246 : if (dirty)
1571 75440 : systable_inplace_update_finish(inplace_state, ctup);
1572 : else
1573 183806 : systable_inplace_update_cancel(inplace_state);
1574 :
1575 259246 : table_close(rd, RowExclusiveLock);
1576 :
1577 259246 : if (futurexid)
1578 0 : ereport(WARNING,
1579 : (errcode(ERRCODE_DATA_CORRUPTED),
1580 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1581 : oldfrozenxid, frozenxid,
1582 : RelationGetRelationName(relation))));
1583 259246 : if (futuremxid)
1584 0 : ereport(WARNING,
1585 : (errcode(ERRCODE_DATA_CORRUPTED),
1586 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1587 : oldminmulti, minmulti,
1588 : RelationGetRelationName(relation))));
1589 259246 : }
1590 :
1591 :
1592 : /*
1593 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1594 : *
1595 : * Update pg_database's datfrozenxid entry for our database to be the
1596 : * minimum of the pg_class.relfrozenxid values.
1597 : *
1598 : * Similarly, update our datminmxid to be the minimum of the
1599 : * pg_class.relminmxid values.
1600 : *
1601 : * If we are able to advance either pg_database value, also try to
1602 : * truncate pg_xact and pg_multixact.
1603 : *
1604 : * We violate transaction semantics here by overwriting the database's
1605 : * existing pg_database tuple with the new values. This is reasonably
1606 : * safe since the new values are correct whether or not this transaction
1607 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1608 : * behind after a VACUUM.
1609 : */
1610 : void
1611 5214 : vac_update_datfrozenxid(void)
1612 : {
1613 : HeapTuple tuple;
1614 : Form_pg_database dbform;
1615 : Relation relation;
1616 : SysScanDesc scan;
1617 : HeapTuple classTup;
1618 : TransactionId newFrozenXid;
1619 : MultiXactId newMinMulti;
1620 : TransactionId lastSaneFrozenXid;
1621 : MultiXactId lastSaneMinMulti;
1622 5214 : bool bogus = false;
1623 5214 : bool dirty = false;
1624 : ScanKeyData key[1];
1625 : void *inplace_state;
1626 :
1627 : /*
1628 : * Restrict this task to one backend per database. This avoids race
1629 : * conditions that would move datfrozenxid or datminmxid backward. It
1630 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1631 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1632 : */
1633 5214 : LockDatabaseFrozenIds(ExclusiveLock);
1634 :
1635 : /*
1636 : * Initialize the "min" calculation with
1637 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1638 : * approximation to the minimum relfrozenxid for not-yet-committed
1639 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1640 : * cannot produce a wrong minimum by starting with this.
1641 : */
1642 5214 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1643 :
1644 : /*
1645 : * Similarly, initialize the MultiXact "min" with the value that would be
1646 : * used on pg_class for new tables. See AddNewRelationTuple().
1647 : */
1648 5214 : newMinMulti = GetOldestMultiXactId();
1649 :
1650 : /*
1651 : * Identify the latest relfrozenxid and relminmxid values that we could
1652 : * validly see during the scan. These are conservative values, but it's
1653 : * not really worth trying to be more exact.
1654 : */
1655 5214 : lastSaneFrozenXid = ReadNextTransactionId();
1656 5214 : lastSaneMinMulti = ReadNextMultiXactId();
1657 :
1658 : /*
1659 : * We must seqscan pg_class to find the minimum Xid, because there is no
1660 : * index that can help us here.
1661 : *
1662 : * See vac_truncate_clog() for the race condition to prevent.
1663 : */
1664 5214 : relation = table_open(RelationRelationId, AccessShareLock);
1665 :
1666 5214 : scan = systable_beginscan(relation, InvalidOid, false,
1667 : NULL, 0, NULL);
1668 :
1669 2608964 : while ((classTup = systable_getnext(scan)) != NULL)
1670 : {
1671 2603750 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1672 2603750 : TransactionId relfrozenxid = classForm->relfrozenxid;
1673 2603750 : TransactionId relminmxid = classForm->relminmxid;
1674 :
1675 : /*
1676 : * Only consider relations able to hold unfrozen XIDs (anything else
1677 : * should have InvalidTransactionId in relfrozenxid anyway).
1678 : */
1679 2603750 : if (classForm->relkind != RELKIND_RELATION &&
1680 2074304 : classForm->relkind != RELKIND_MATVIEW &&
1681 2071956 : classForm->relkind != RELKIND_TOASTVALUE)
1682 : {
1683 : Assert(!TransactionIdIsValid(relfrozenxid));
1684 : Assert(!MultiXactIdIsValid(relminmxid));
1685 1797212 : continue;
1686 : }
1687 :
1688 : /*
1689 : * Some table AMs might not need per-relation xid / multixid horizons.
1690 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1691 : * to not be set (i.e. set to their respective Invalid*Id)
1692 : * independently. Thus validate and compute horizon for each only if
1693 : * set.
1694 : *
1695 : * If things are working properly, no relation should have a
1696 : * relfrozenxid or relminmxid that is "in the future". However, such
1697 : * cases have been known to arise due to bugs in pg_upgrade. If we
1698 : * see any entries that are "in the future", chicken out and don't do
1699 : * anything. This ensures we won't truncate clog & multixact SLRUs
1700 : * before those relations have been scanned and cleaned up.
1701 : */
1702 :
1703 806538 : if (TransactionIdIsValid(relfrozenxid))
1704 : {
1705 : Assert(TransactionIdIsNormal(relfrozenxid));
1706 :
1707 : /* check for values in the future */
1708 806538 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1709 : {
1710 0 : bogus = true;
1711 0 : break;
1712 : }
1713 :
1714 : /* determine new horizon */
1715 806538 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1716 5062 : newFrozenXid = relfrozenxid;
1717 : }
1718 :
1719 806538 : if (MultiXactIdIsValid(relminmxid))
1720 : {
1721 : /* check for values in the future */
1722 806538 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1723 : {
1724 0 : bogus = true;
1725 0 : break;
1726 : }
1727 :
1728 : /* determine new horizon */
1729 806538 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1730 214 : newMinMulti = relminmxid;
1731 : }
1732 : }
1733 :
1734 : /* we're done with pg_class */
1735 5214 : systable_endscan(scan);
1736 5214 : table_close(relation, AccessShareLock);
1737 :
1738 : /* chicken out if bogus data found */
1739 5214 : if (bogus)
1740 0 : return;
1741 :
1742 : Assert(TransactionIdIsNormal(newFrozenXid));
1743 : Assert(MultiXactIdIsValid(newMinMulti));
1744 :
1745 : /* Now fetch the pg_database tuple we need to update. */
1746 5214 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1747 :
1748 : /*
1749 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1750 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1751 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1752 : * For now, don't optimize that.
1753 : */
1754 5214 : ScanKeyInit(&key[0],
1755 : Anum_pg_database_oid,
1756 : BTEqualStrategyNumber, F_OIDEQ,
1757 : ObjectIdGetDatum(MyDatabaseId));
1758 :
1759 5214 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1760 : NULL, 1, key, &tuple, &inplace_state);
1761 :
1762 5214 : if (!HeapTupleIsValid(tuple))
1763 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1764 :
1765 5214 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1766 :
1767 : /*
1768 : * As in vac_update_relstats(), we ordinarily don't want to let
1769 : * datfrozenxid go backward; but if it's "in the future" then it must be
1770 : * corrupt and it seems best to overwrite it.
1771 : */
1772 5918 : if (dbform->datfrozenxid != newFrozenXid &&
1773 704 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1774 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1775 : {
1776 704 : dbform->datfrozenxid = newFrozenXid;
1777 704 : dirty = true;
1778 : }
1779 : else
1780 4510 : newFrozenXid = dbform->datfrozenxid;
1781 :
1782 : /* Ditto for datminmxid */
1783 5216 : if (dbform->datminmxid != newMinMulti &&
1784 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1785 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1786 : {
1787 2 : dbform->datminmxid = newMinMulti;
1788 2 : dirty = true;
1789 : }
1790 : else
1791 5212 : newMinMulti = dbform->datminmxid;
1792 :
1793 5214 : if (dirty)
1794 704 : systable_inplace_update_finish(inplace_state, tuple);
1795 : else
1796 4510 : systable_inplace_update_cancel(inplace_state);
1797 :
1798 5214 : heap_freetuple(tuple);
1799 5214 : table_close(relation, RowExclusiveLock);
1800 :
1801 : /*
1802 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1803 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1804 : * XID-wrap-limit info is stale, since this action will update that too.
1805 : */
1806 5214 : if (dirty || ForceTransactionIdLimitUpdate())
1807 2176 : vac_truncate_clog(newFrozenXid, newMinMulti,
1808 : lastSaneFrozenXid, lastSaneMinMulti);
1809 : }
1810 :
1811 :
1812 : /*
1813 : * vac_truncate_clog() -- attempt to truncate the commit log
1814 : *
1815 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1816 : * and use it to truncate the transaction commit log (pg_xact).
1817 : * Also update the XID wrap limit info maintained by varsup.c.
1818 : * Likewise for datminmxid.
1819 : *
1820 : * The passed frozenXID and minMulti are the updated values for my own
1821 : * pg_database entry. They're used to initialize the "min" calculations.
1822 : * The caller also passes the "last sane" XID and MXID, since it has
1823 : * those at hand already.
1824 : *
1825 : * This routine is only invoked when we've managed to change our
1826 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1827 : * XID-wrap-limit info is stale.
1828 : */
1829 : static void
1830 2176 : vac_truncate_clog(TransactionId frozenXID,
1831 : MultiXactId minMulti,
1832 : TransactionId lastSaneFrozenXid,
1833 : MultiXactId lastSaneMinMulti)
1834 : {
1835 2176 : TransactionId nextXID = ReadNextTransactionId();
1836 : Relation relation;
1837 : TableScanDesc scan;
1838 : HeapTuple tuple;
1839 : Oid oldestxid_datoid;
1840 : Oid minmulti_datoid;
1841 2176 : bool bogus = false;
1842 2176 : bool frozenAlreadyWrapped = false;
1843 :
1844 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1845 2176 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1846 :
1847 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1848 2176 : oldestxid_datoid = MyDatabaseId;
1849 2176 : minmulti_datoid = MyDatabaseId;
1850 :
1851 : /*
1852 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1853 : *
1854 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1855 : * the values could change while we look at them. Fetch each one just
1856 : * once to ensure sane behavior of the comparison logic. (Here, as in
1857 : * many other places, we assume that fetching or updating an XID in shared
1858 : * storage is atomic.)
1859 : *
1860 : * Note: we need not worry about a race condition with new entries being
1861 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1862 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1863 : * of the interlock against copying a DB containing an active backend.
1864 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1865 : * concurrently modify the datfrozenxid's of different databases, the
1866 : * worst possible outcome is that pg_xact is not truncated as aggressively
1867 : * as it could be.
1868 : */
1869 2176 : relation = table_open(DatabaseRelationId, AccessShareLock);
1870 :
1871 2176 : scan = table_beginscan_catalog(relation, 0, NULL);
1872 :
1873 8530 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1874 : {
1875 6354 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1876 6354 : TransactionId datfrozenxid = dbform->datfrozenxid;
1877 6354 : TransactionId datminmxid = dbform->datminmxid;
1878 :
1879 : Assert(TransactionIdIsNormal(datfrozenxid));
1880 : Assert(MultiXactIdIsValid(datminmxid));
1881 :
1882 : /*
1883 : * If database is in the process of getting dropped, or has been
1884 : * interrupted while doing so, no connections to it are possible
1885 : * anymore. Therefore we don't need to take it into account here.
1886 : * Which is good, because it can't be processed by autovacuum either.
1887 : */
1888 6354 : if (database_is_invalid_form((Form_pg_database) dbform))
1889 : {
1890 2 : elog(DEBUG2,
1891 : "skipping invalid database \"%s\" while computing relfrozenxid",
1892 : NameStr(dbform->datname));
1893 2 : continue;
1894 : }
1895 :
1896 : /*
1897 : * If things are working properly, no database should have a
1898 : * datfrozenxid or datminmxid that is "in the future". However, such
1899 : * cases have been known to arise due to bugs in pg_upgrade. If we
1900 : * see any entries that are "in the future", chicken out and don't do
1901 : * anything. This ensures we won't truncate clog before those
1902 : * databases have been scanned and cleaned up. (We will issue the
1903 : * "already wrapped" warning if appropriate, though.)
1904 : */
1905 12704 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1906 6352 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1907 0 : bogus = true;
1908 :
1909 6352 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1910 0 : frozenAlreadyWrapped = true;
1911 6352 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1912 : {
1913 572 : frozenXID = datfrozenxid;
1914 572 : oldestxid_datoid = dbform->oid;
1915 : }
1916 :
1917 6352 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1918 : {
1919 4 : minMulti = datminmxid;
1920 4 : minmulti_datoid = dbform->oid;
1921 : }
1922 : }
1923 :
1924 2176 : table_endscan(scan);
1925 :
1926 2176 : table_close(relation, AccessShareLock);
1927 :
1928 : /*
1929 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1930 : * the computed minimum XID might be bogus. This case should now be
1931 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1932 : * test anyway.
1933 : */
1934 2176 : if (frozenAlreadyWrapped)
1935 : {
1936 0 : ereport(WARNING,
1937 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1938 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1939 0 : LWLockRelease(WrapLimitsVacuumLock);
1940 0 : return;
1941 : }
1942 :
1943 : /* chicken out if data is bogus in any other way */
1944 2176 : if (bogus)
1945 : {
1946 0 : LWLockRelease(WrapLimitsVacuumLock);
1947 0 : return;
1948 : }
1949 :
1950 : /*
1951 : * Advance the oldest value for commit timestamps before truncating, so
1952 : * that if a user requests a timestamp for a transaction we're truncating
1953 : * away right after this point, they get NULL instead of an ugly "file not
1954 : * found" error from slru.c. This doesn't matter for xact/multixact
1955 : * because they are not subject to arbitrary lookups from users.
1956 : */
1957 2176 : AdvanceOldestCommitTsXid(frozenXID);
1958 :
1959 : /*
1960 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1961 : */
1962 2176 : TruncateCLOG(frozenXID, oldestxid_datoid);
1963 2176 : TruncateCommitTs(frozenXID);
1964 2176 : TruncateMultiXact(minMulti, minmulti_datoid);
1965 :
1966 : /*
1967 : * Update the wrap limit for GetNewTransactionId and creation of new
1968 : * MultiXactIds. Note: these functions will also signal the postmaster
1969 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1970 : * signaling twice?
1971 : */
1972 2176 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1973 2176 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1974 :
1975 2176 : LWLockRelease(WrapLimitsVacuumLock);
1976 : }
1977 :
1978 :
1979 : /*
1980 : * vacuum_rel() -- vacuum one heap relation
1981 : *
1982 : * relid identifies the relation to vacuum. If relation is supplied,
1983 : * use the name therein for reporting any failure to open/lock the rel;
1984 : * do not use it once we've successfully opened the rel, since it might
1985 : * be stale.
1986 : *
1987 : * Returns true if it's okay to proceed with a requested ANALYZE
1988 : * operation on this table.
1989 : *
1990 : * Doing one heap at a time incurs extra overhead, since we need to
1991 : * check that the heap exists again just before we vacuum it. The
1992 : * reason that we do this is so that vacuuming can be spread across
1993 : * many small transactions. Otherwise, two-phase locking would require
1994 : * us to lock the entire database during one pass of the vacuum cleaner.
1995 : *
1996 : * At entry and exit, we are not inside a transaction.
1997 : */
1998 : static bool
1999 219388 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
2000 : BufferAccessStrategy bstrategy)
2001 : {
2002 : LOCKMODE lmode;
2003 : Relation rel;
2004 : LockRelId lockrelid;
2005 : Oid priv_relid;
2006 : Oid toast_relid;
2007 : Oid save_userid;
2008 : int save_sec_context;
2009 : int save_nestlevel;
2010 : VacuumParams toast_vacuum_params;
2011 :
2012 : /*
2013 : * This function scribbles on the parameters, so make a copy early to
2014 : * avoid affecting the TOAST table (if we do end up recursing to it).
2015 : */
2016 219388 : memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams));
2017 :
2018 : /* Begin a transaction for vacuuming this relation */
2019 219388 : StartTransactionCommand();
2020 :
2021 219388 : if (!(params.options & VACOPT_FULL))
2022 : {
2023 : /*
2024 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2025 : * other concurrent VACUUMs know that they can ignore this one while
2026 : * determining their OldestXmin. (The reason we don't set it during a
2027 : * full VACUUM is exactly that we may have to run user-defined
2028 : * functions for functional indexes, and we want to make sure that if
2029 : * they use the snapshot set above, any tuples it requires can't get
2030 : * removed from other tables. An index function that depends on the
2031 : * contents of other tables is arguably broken, but we won't break it
2032 : * here by violating transaction semantics.)
2033 : *
2034 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2035 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2036 : * in an emergency.
2037 : *
2038 : * Note: these flags remain set until CommitTransaction or
2039 : * AbortTransaction. We don't want to clear them until we reset
2040 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2041 : * might appear to go backwards, which is probably Not Good. (We also
2042 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2043 : * xmin doesn't become visible ahead of setting the flag.)
2044 : */
2045 218980 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2046 218980 : MyProc->statusFlags |= PROC_IN_VACUUM;
2047 218980 : if (params.is_wraparound)
2048 192214 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2049 218980 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2050 218980 : LWLockRelease(ProcArrayLock);
2051 : }
2052 :
2053 : /*
2054 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2055 : * cutoff xids in local memory wrapping around, and to have updated xmin
2056 : * horizons.
2057 : */
2058 219388 : PushActiveSnapshot(GetTransactionSnapshot());
2059 :
2060 : /*
2061 : * Check for user-requested abort. Note we want this to be inside a
2062 : * transaction, so xact.c doesn't issue useless WARNING.
2063 : */
2064 219388 : CHECK_FOR_INTERRUPTS();
2065 :
2066 : /*
2067 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2068 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2069 : * way, we can be sure that no other backend is vacuuming the same table.
2070 : */
2071 438776 : lmode = (params.options & VACOPT_FULL) ?
2072 219388 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2073 :
2074 : /* open the relation and get the appropriate lock on it */
2075 219388 : rel = vacuum_open_relation(relid, relation, params.options,
2076 219388 : params.log_min_duration >= 0, lmode);
2077 :
2078 : /* leave if relation could not be opened or locked */
2079 219388 : if (!rel)
2080 : {
2081 26 : PopActiveSnapshot();
2082 26 : CommitTransactionCommand();
2083 26 : return false;
2084 : }
2085 :
2086 : /*
2087 : * When recursing to a TOAST table, check privileges on the parent. NB:
2088 : * This is only safe to do because we hold a session lock on the main
2089 : * relation that prevents concurrent deletion.
2090 : */
2091 219362 : if (OidIsValid(params.toast_parent))
2092 9254 : priv_relid = params.toast_parent;
2093 : else
2094 210108 : priv_relid = RelationGetRelid(rel);
2095 :
2096 : /*
2097 : * Check if relation needs to be skipped based on privileges. This check
2098 : * happens also when building the relation list to vacuum for a manual
2099 : * operation, and needs to be done additionally here as VACUUM could
2100 : * happen across multiple transactions where privileges could have changed
2101 : * in-between. Make sure to only generate logs for VACUUM in this case.
2102 : */
2103 219362 : if (!vacuum_is_permitted_for_relation(priv_relid,
2104 : rel->rd_rel,
2105 219362 : params.options & ~VACOPT_ANALYZE))
2106 : {
2107 72 : relation_close(rel, lmode);
2108 72 : PopActiveSnapshot();
2109 72 : CommitTransactionCommand();
2110 72 : return false;
2111 : }
2112 :
2113 : /*
2114 : * Check that it's of a vacuumable relkind.
2115 : */
2116 219290 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2117 79518 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2118 79510 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2119 188 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2120 : {
2121 2 : ereport(WARNING,
2122 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2123 : RelationGetRelationName(rel))));
2124 2 : relation_close(rel, lmode);
2125 2 : PopActiveSnapshot();
2126 2 : CommitTransactionCommand();
2127 2 : return false;
2128 : }
2129 :
2130 : /*
2131 : * Silently ignore tables that are temp tables of other backends ---
2132 : * trying to vacuum these will lead to great unhappiness, since their
2133 : * contents are probably not up-to-date on disk. (We don't throw a
2134 : * warning here; it would just lead to chatter during a database-wide
2135 : * VACUUM.)
2136 : */
2137 219288 : if (RELATION_IS_OTHER_TEMP(rel))
2138 : {
2139 2 : relation_close(rel, lmode);
2140 2 : PopActiveSnapshot();
2141 2 : CommitTransactionCommand();
2142 2 : return false;
2143 : }
2144 :
2145 : /*
2146 : * Silently ignore partitioned tables as there is no work to be done. The
2147 : * useful work is on their child partitions, which have been queued up for
2148 : * us separately.
2149 : */
2150 219286 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2151 : {
2152 186 : relation_close(rel, lmode);
2153 186 : PopActiveSnapshot();
2154 186 : CommitTransactionCommand();
2155 : /* It's OK to proceed with ANALYZE on this table */
2156 186 : return true;
2157 : }
2158 :
2159 : /*
2160 : * Get a session-level lock too. This will protect our access to the
2161 : * relation across multiple transactions, so that we can vacuum the
2162 : * relation's TOAST table (if any) secure in the knowledge that no one is
2163 : * deleting the parent relation.
2164 : *
2165 : * NOTE: this cannot block, even if someone else is waiting for access,
2166 : * because the lock manager knows that both lock requests are from the
2167 : * same process.
2168 : */
2169 219100 : lockrelid = rel->rd_lockInfo.lockRelId;
2170 219100 : LockRelationIdForSession(&lockrelid, lmode);
2171 :
2172 : /*
2173 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2174 : * specified in VACUUM command, or when running in an autovacuum worker
2175 : */
2176 219100 : if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
2177 : {
2178 : StdRdOptIndexCleanup vacuum_index_cleanup;
2179 :
2180 218842 : if (rel->rd_options == NULL)
2181 216456 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2182 : else
2183 2386 : vacuum_index_cleanup =
2184 2386 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2185 :
2186 218842 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2187 218798 : params.index_cleanup = VACOPTVALUE_AUTO;
2188 44 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2189 22 : params.index_cleanup = VACOPTVALUE_ENABLED;
2190 : else
2191 : {
2192 : Assert(vacuum_index_cleanup ==
2193 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2194 22 : params.index_cleanup = VACOPTVALUE_DISABLED;
2195 : }
2196 : }
2197 :
2198 : #ifdef USE_INJECTION_POINTS
2199 219100 : if (params.index_cleanup == VACOPTVALUE_AUTO)
2200 218804 : INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
2201 296 : else if (params.index_cleanup == VACOPTVALUE_DISABLED)
2202 260 : INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
2203 36 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
2204 36 : INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
2205 : #endif
2206 :
2207 : /*
2208 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2209 : * parameter was specified. This overrides the GUC value.
2210 : */
2211 219100 : if (rel->rd_options != NULL &&
2212 2398 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2213 0 : params.max_eager_freeze_failure_rate =
2214 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2215 :
2216 : /*
2217 : * Set truncate option based on truncate reloption or GUC if it wasn't
2218 : * specified in VACUUM command, or when running in an autovacuum worker
2219 : */
2220 219100 : if (params.truncate == VACOPTVALUE_UNSPECIFIED)
2221 : {
2222 218848 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2223 :
2224 218848 : if (opts && opts->vacuum_truncate_set)
2225 : {
2226 32 : if (opts->vacuum_truncate)
2227 10 : params.truncate = VACOPTVALUE_ENABLED;
2228 : else
2229 22 : params.truncate = VACOPTVALUE_DISABLED;
2230 : }
2231 218816 : else if (vacuum_truncate)
2232 218794 : params.truncate = VACOPTVALUE_ENABLED;
2233 : else
2234 22 : params.truncate = VACOPTVALUE_DISABLED;
2235 : }
2236 :
2237 : #ifdef USE_INJECTION_POINTS
2238 219100 : if (params.truncate == VACOPTVALUE_AUTO)
2239 0 : INJECTION_POINT("vacuum-truncate-auto", NULL);
2240 219100 : else if (params.truncate == VACOPTVALUE_DISABLED)
2241 296 : INJECTION_POINT("vacuum-truncate-disabled", NULL);
2242 218804 : else if (params.truncate == VACOPTVALUE_ENABLED)
2243 218804 : INJECTION_POINT("vacuum-truncate-enabled", NULL);
2244 : #endif
2245 :
2246 : /*
2247 : * Remember the relation's TOAST relation for later, if the caller asked
2248 : * us to process it. In VACUUM FULL, though, the toast table is
2249 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2250 : * unless PROCESS_MAIN is disabled.
2251 : */
2252 219100 : if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
2253 26564 : ((params.options & VACOPT_FULL) == 0 ||
2254 380 : (params.options & VACOPT_PROCESS_MAIN) == 0))
2255 26190 : toast_relid = rel->rd_rel->reltoastrelid;
2256 : else
2257 192910 : toast_relid = InvalidOid;
2258 :
2259 : /*
2260 : * Switch to the table owner's userid, so that any index functions are run
2261 : * as that user. Also lock down security-restricted operations and
2262 : * arrange to make GUC variable changes local to this command. (This is
2263 : * unnecessary, but harmless, for lazy VACUUM.)
2264 : */
2265 219100 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2266 219100 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2267 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2268 219100 : save_nestlevel = NewGUCNestLevel();
2269 219100 : RestrictSearchPath();
2270 :
2271 : /*
2272 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2273 : * relation. Otherwise, we can skip this part. If processing the TOAST
2274 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2275 : * to be set when we recurse to the TOAST table.
2276 : */
2277 219100 : if (params.options & VACOPT_PROCESS_MAIN)
2278 : {
2279 : /*
2280 : * Do the actual work --- either FULL or "lazy" vacuum
2281 : */
2282 218946 : if (params.options & VACOPT_FULL)
2283 : {
2284 374 : ClusterParams cluster_params = {0};
2285 :
2286 374 : if ((params.options & VACOPT_VERBOSE) != 0)
2287 2 : cluster_params.options |= CLUOPT_VERBOSE;
2288 :
2289 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2290 374 : cluster_rel(rel, InvalidOid, &cluster_params);
2291 : /* cluster_rel closes the relation, but keeps lock */
2292 :
2293 368 : rel = NULL;
2294 : }
2295 : else
2296 218572 : table_relation_vacuum(rel, params, bstrategy);
2297 : }
2298 :
2299 : /* Roll back any GUC changes executed by index functions */
2300 219092 : AtEOXact_GUC(false, save_nestlevel);
2301 :
2302 : /* Restore userid and security context */
2303 219092 : SetUserIdAndSecContext(save_userid, save_sec_context);
2304 :
2305 : /* all done with this class, but hold lock until commit */
2306 219092 : if (rel)
2307 218724 : relation_close(rel, NoLock);
2308 :
2309 : /*
2310 : * Complete the transaction and free all temporary memory used.
2311 : */
2312 219092 : PopActiveSnapshot();
2313 219092 : CommitTransactionCommand();
2314 :
2315 : /*
2316 : * If the relation has a secondary toast rel, vacuum that too while we
2317 : * still hold the session lock on the main table. Note however that
2318 : * "analyze" will not get done on the toast table. This is good, because
2319 : * the toaster always uses hardcoded index access and statistics are
2320 : * totally unimportant for toast relations.
2321 : */
2322 219092 : if (toast_relid != InvalidOid)
2323 : {
2324 : /*
2325 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2326 : * set toast_parent so that the privilege checks are done on the main
2327 : * relation. NB: This is only safe to do because we hold a session
2328 : * lock on the main relation that prevents concurrent deletion.
2329 : */
2330 9254 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2331 9254 : toast_vacuum_params.toast_parent = relid;
2332 :
2333 9254 : vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
2334 : }
2335 :
2336 : /*
2337 : * Now release the session-level lock on the main table.
2338 : */
2339 219092 : UnlockRelationIdForSession(&lockrelid, lmode);
2340 :
2341 : /* Report that we really did it. */
2342 219092 : return true;
2343 : }
2344 :
2345 :
2346 : /*
2347 : * Open all the vacuumable indexes of the given relation, obtaining the
2348 : * specified kind of lock on each. Return an array of Relation pointers for
2349 : * the indexes into *Irel, and the number of indexes into *nindexes.
2350 : *
2351 : * We consider an index vacuumable if it is marked insertable (indisready).
2352 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2353 : * execution, and what we have is too corrupt to be processable. We will
2354 : * vacuum even if the index isn't indisvalid; this is important because in a
2355 : * unique index, uniqueness checks will be performed anyway and had better not
2356 : * hit dangling index pointers.
2357 : */
2358 : void
2359 233652 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2360 : int *nindexes, Relation **Irel)
2361 : {
2362 : List *indexoidlist;
2363 : ListCell *indexoidscan;
2364 : int i;
2365 :
2366 : Assert(lockmode != NoLock);
2367 :
2368 233652 : indexoidlist = RelationGetIndexList(relation);
2369 :
2370 : /* allocate enough memory for all indexes */
2371 233652 : i = list_length(indexoidlist);
2372 :
2373 233652 : if (i > 0)
2374 219948 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2375 : else
2376 13704 : *Irel = NULL;
2377 :
2378 : /* collect just the ready indexes */
2379 233652 : i = 0;
2380 583880 : foreach(indexoidscan, indexoidlist)
2381 : {
2382 350228 : Oid indexoid = lfirst_oid(indexoidscan);
2383 : Relation indrel;
2384 :
2385 350228 : indrel = index_open(indexoid, lockmode);
2386 350228 : if (indrel->rd_index->indisready)
2387 350228 : (*Irel)[i++] = indrel;
2388 : else
2389 0 : index_close(indrel, lockmode);
2390 : }
2391 :
2392 233652 : *nindexes = i;
2393 :
2394 233652 : list_free(indexoidlist);
2395 233652 : }
2396 :
2397 : /*
2398 : * Release the resources acquired by vac_open_indexes. Optionally release
2399 : * the locks (say NoLock to keep 'em).
2400 : */
2401 : void
2402 234468 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2403 : {
2404 234468 : if (Irel == NULL)
2405 14526 : return;
2406 :
2407 570158 : while (nindexes--)
2408 : {
2409 350216 : Relation ind = Irel[nindexes];
2410 :
2411 350216 : index_close(ind, lockmode);
2412 : }
2413 219942 : pfree(Irel);
2414 : }
2415 :
2416 : /*
2417 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2418 : *
2419 : * This should be called in each major loop of VACUUM processing,
2420 : * typically once per page processed.
2421 : */
2422 : void
2423 83034310 : vacuum_delay_point(bool is_analyze)
2424 : {
2425 83034310 : double msec = 0;
2426 :
2427 : /* Always check for interrupts */
2428 83034310 : CHECK_FOR_INTERRUPTS();
2429 :
2430 83034310 : if (InterruptPending ||
2431 83034310 : (!VacuumCostActive && !ConfigReloadPending))
2432 74847350 : return;
2433 :
2434 : /*
2435 : * Autovacuum workers should reload the configuration file if requested.
2436 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2437 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2438 : * vacuumed or analyzed.
2439 : */
2440 8186960 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2441 : {
2442 0 : ConfigReloadPending = false;
2443 0 : ProcessConfigFile(PGC_SIGHUP);
2444 0 : VacuumUpdateCosts();
2445 : }
2446 :
2447 : /*
2448 : * If we disabled cost-based delays after reloading the config file,
2449 : * return.
2450 : */
2451 8186960 : if (!VacuumCostActive)
2452 0 : return;
2453 :
2454 : /*
2455 : * For parallel vacuum, the delay is computed based on the shared cost
2456 : * balance. See compute_parallel_delay.
2457 : */
2458 8186960 : if (VacuumSharedCostBalance != NULL)
2459 0 : msec = compute_parallel_delay();
2460 8186960 : else if (VacuumCostBalance >= vacuum_cost_limit)
2461 4608 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2462 :
2463 : /* Nap if appropriate */
2464 8186960 : if (msec > 0)
2465 : {
2466 : instr_time delay_start;
2467 :
2468 4608 : if (msec > vacuum_cost_delay * 4)
2469 8 : msec = vacuum_cost_delay * 4;
2470 :
2471 4608 : if (track_cost_delay_timing)
2472 0 : INSTR_TIME_SET_CURRENT(delay_start);
2473 :
2474 4608 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2475 4608 : pg_usleep(msec * 1000);
2476 4608 : pgstat_report_wait_end();
2477 :
2478 4608 : if (track_cost_delay_timing)
2479 : {
2480 : instr_time delay_end;
2481 : instr_time delay;
2482 :
2483 0 : INSTR_TIME_SET_CURRENT(delay_end);
2484 0 : INSTR_TIME_SET_ZERO(delay);
2485 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2486 :
2487 : /*
2488 : * For parallel workers, we only report the delay time every once
2489 : * in a while to avoid overloading the leader with messages and
2490 : * interrupts.
2491 : */
2492 0 : if (IsParallelWorker())
2493 : {
2494 : static instr_time last_report_time;
2495 : instr_time time_since_last_report;
2496 :
2497 : Assert(!is_analyze);
2498 :
2499 : /* Accumulate the delay time */
2500 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2501 :
2502 : /* Calculate interval since last report */
2503 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2504 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2505 :
2506 : /* If we haven't reported in a while, do so now */
2507 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2508 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2509 : {
2510 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2511 : parallel_vacuum_worker_delay_ns);
2512 :
2513 : /* Reset variables */
2514 0 : last_report_time = delay_end;
2515 0 : parallel_vacuum_worker_delay_ns = 0;
2516 : }
2517 : }
2518 0 : else if (is_analyze)
2519 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2520 0 : INSTR_TIME_GET_NANOSEC(delay));
2521 : else
2522 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2523 0 : INSTR_TIME_GET_NANOSEC(delay));
2524 : }
2525 :
2526 : /*
2527 : * We don't want to ignore postmaster death during very long vacuums
2528 : * with vacuum_cost_delay configured. We can't use the usual
2529 : * WaitLatch() approach here because we want microsecond-based sleep
2530 : * durations above.
2531 : */
2532 4608 : if (IsUnderPostmaster && !PostmasterIsAlive())
2533 0 : exit(1);
2534 :
2535 4608 : VacuumCostBalance = 0;
2536 :
2537 : /*
2538 : * Balance and update limit values for autovacuum workers. We must do
2539 : * this periodically, as the number of workers across which we are
2540 : * balancing the limit may have changed.
2541 : *
2542 : * TODO: There may be better criteria for determining when to do this
2543 : * besides "check after napping".
2544 : */
2545 4608 : AutoVacuumUpdateCostLimit();
2546 :
2547 : /* Might have gotten an interrupt while sleeping */
2548 4608 : CHECK_FOR_INTERRUPTS();
2549 : }
2550 : }
2551 :
2552 : /*
2553 : * Computes the vacuum delay for parallel workers.
2554 : *
2555 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2556 : * worker to sleep in proportion to the share of work it's done. We achieve this
2557 : * by allowing all parallel vacuum workers including the leader process to
2558 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2559 : * We allow each worker to update it as and when it has incurred any cost and
2560 : * then based on that decide whether it needs to sleep. We compute the time
2561 : * to sleep for a worker based on the cost it has incurred
2562 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2563 : * that amount. This avoids putting to sleep those workers which have done less
2564 : * I/O than other workers and therefore ensure that workers
2565 : * which are doing more I/O got throttled more.
2566 : *
2567 : * We allow a worker to sleep only if it has performed I/O above a certain
2568 : * threshold, which is calculated based on the number of active workers
2569 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2570 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2571 : * the required throttling if we force a worker that has done more than 50%
2572 : * of its share of work to sleep.
2573 : */
2574 : static double
2575 0 : compute_parallel_delay(void)
2576 : {
2577 0 : double msec = 0;
2578 : uint32 shared_balance;
2579 : int nworkers;
2580 :
2581 : /* Parallel vacuum must be active */
2582 : Assert(VacuumSharedCostBalance);
2583 :
2584 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2585 :
2586 : /* At least count itself */
2587 : Assert(nworkers >= 1);
2588 :
2589 : /* Update the shared cost balance value atomically */
2590 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2591 :
2592 : /* Compute the total local balance for the current worker */
2593 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2594 :
2595 0 : if ((shared_balance >= vacuum_cost_limit) &&
2596 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2597 : {
2598 : /* Compute sleep time based on the local cost balance */
2599 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2600 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2601 0 : VacuumCostBalanceLocal = 0;
2602 : }
2603 :
2604 : /*
2605 : * Reset the local balance as we accumulated it into the shared value.
2606 : */
2607 0 : VacuumCostBalance = 0;
2608 :
2609 0 : return msec;
2610 : }
2611 :
2612 : /*
2613 : * A wrapper function of defGetBoolean().
2614 : *
2615 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2616 : * of true and false.
2617 : */
2618 : static VacOptValue
2619 326 : get_vacoptval_from_boolean(DefElem *def)
2620 : {
2621 326 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2622 : }
2623 :
2624 : /*
2625 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2626 : *
2627 : * Returns bulk delete stats derived from input stats
2628 : */
2629 : IndexBulkDeleteResult *
2630 2332 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2631 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2632 : {
2633 : /* Do bulk deletion */
2634 2332 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2635 : dead_items);
2636 :
2637 2332 : ereport(ivinfo->message_level,
2638 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2639 : RelationGetRelationName(ivinfo->index),
2640 : dead_items_info->num_items)));
2641 :
2642 2332 : return istat;
2643 : }
2644 :
2645 : /*
2646 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2647 : *
2648 : * Returns bulk delete stats derived from input stats
2649 : */
2650 : IndexBulkDeleteResult *
2651 217394 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2652 : {
2653 217394 : istat = index_vacuum_cleanup(ivinfo, istat);
2654 :
2655 217394 : if (istat)
2656 2576 : ereport(ivinfo->message_level,
2657 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2658 : RelationGetRelationName(ivinfo->index),
2659 : istat->num_index_tuples,
2660 : istat->num_pages),
2661 : errdetail("%.0f index row versions were removed.\n"
2662 : "%u index pages were newly deleted.\n"
2663 : "%u index pages are currently deleted, of which %u are currently reusable.",
2664 : istat->tuples_removed,
2665 : istat->pages_newly_deleted,
2666 : istat->pages_deleted, istat->pages_free)));
2667 :
2668 217394 : return istat;
2669 : }
2670 :
2671 : /*
2672 : * vac_tid_reaped() -- is a particular tid deletable?
2673 : *
2674 : * This has the right signature to be an IndexBulkDeleteCallback.
2675 : */
2676 : static bool
2677 6101302 : vac_tid_reaped(ItemPointer itemptr, void *state)
2678 : {
2679 6101302 : TidStore *dead_items = (TidStore *) state;
2680 :
2681 6101302 : return TidStoreIsMember(dead_items, itemptr);
2682 : }
|