Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/memutils.h"
60 : #include "utils/snapmgr.h"
61 : #include "utils/syscache.h"
62 :
63 : /*
64 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
65 : * This aims to avoid sending too many messages and waking up the leader too
66 : * frequently.
67 : */
68 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
69 :
70 : /*
71 : * GUC parameters
72 : */
73 : int vacuum_freeze_min_age;
74 : int vacuum_freeze_table_age;
75 : int vacuum_multixact_freeze_min_age;
76 : int vacuum_multixact_freeze_table_age;
77 : int vacuum_failsafe_age;
78 : int vacuum_multixact_failsafe_age;
79 : double vacuum_max_eager_freeze_failure_rate;
80 : bool track_cost_delay_timing;
81 : bool vacuum_truncate;
82 :
83 : /*
84 : * Variables for cost-based vacuum delay. The defaults differ between
85 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
86 : * vacuum code. They are initialized here to the defaults for client backends
87 : * executing VACUUM or ANALYZE.
88 : */
89 : double vacuum_cost_delay = 0;
90 : int vacuum_cost_limit = 200;
91 :
92 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
93 : int64 parallel_vacuum_worker_delay_ns = 0;
94 :
95 : /*
96 : * VacuumFailsafeActive is a defined as a global so that we can determine
97 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
98 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
99 : * for the table until after vacuuming has completed, regardless of other
100 : * settings.
101 : *
102 : * Only VACUUM code should inspect this variable and only table access methods
103 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
104 : * inspected to determine whether or not to allow cost-based delays. Table AMs
105 : * are free to set it if they desire this behavior, but it is false by default
106 : * and reset to false in between vacuuming each relation.
107 : */
108 : bool VacuumFailsafeActive = false;
109 :
110 : /*
111 : * Variables for cost-based parallel vacuum. See comments atop
112 : * compute_parallel_delay to understand how it works.
113 : */
114 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
115 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
116 : int VacuumCostBalanceLocal = 0;
117 :
118 : /* non-export function prototypes */
119 : static List *expand_vacuum_rel(VacuumRelation *vrel,
120 : MemoryContext vac_context, int options);
121 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
122 : static void vac_truncate_clog(TransactionId frozenXID,
123 : MultiXactId minMulti,
124 : TransactionId lastSaneFrozenXid,
125 : MultiXactId lastSaneMinMulti);
126 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
127 : BufferAccessStrategy bstrategy);
128 : static double compute_parallel_delay(void);
129 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
130 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
131 :
132 : /*
133 : * GUC check function to ensure GUC value specified is within the allowable
134 : * range.
135 : */
136 : bool
137 2132 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
138 : GucSource source)
139 : {
140 : /* Value upper and lower hard limits are inclusive */
141 2132 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
142 2132 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
143 2132 : return true;
144 :
145 : /* Value does not fall within any allowable range */
146 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
147 : "vacuum_buffer_usage_limit",
148 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
149 :
150 0 : return false;
151 : }
152 :
153 : /*
154 : * Primary entry point for manual VACUUM and ANALYZE commands
155 : *
156 : * This is mainly a preparation wrapper for the real operations that will
157 : * happen in vacuum().
158 : */
159 : void
160 13668 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
161 : {
162 : VacuumParams params;
163 13668 : BufferAccessStrategy bstrategy = NULL;
164 13668 : bool verbose = false;
165 13668 : bool skip_locked = false;
166 13668 : bool analyze = false;
167 13668 : bool freeze = false;
168 13668 : bool full = false;
169 13668 : bool disable_page_skipping = false;
170 13668 : bool process_main = true;
171 13668 : bool process_toast = true;
172 : int ring_size;
173 13668 : bool skip_database_stats = false;
174 13668 : bool only_database_stats = false;
175 : MemoryContext vac_context;
176 : ListCell *lc;
177 :
178 : /* index_cleanup and truncate values unspecified for now */
179 13668 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
180 13668 : params.truncate = VACOPTVALUE_UNSPECIFIED;
181 :
182 : /* By default parallel vacuum is enabled */
183 13668 : params.nworkers = 0;
184 :
185 : /* Will be set later if we recurse to a TOAST table. */
186 13668 : params.toast_parent = InvalidOid;
187 :
188 : /*
189 : * Set this to an invalid value so it is clear whether or not a
190 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
191 : */
192 13668 : ring_size = -1;
193 :
194 : /* Parse options list */
195 28210 : foreach(lc, vacstmt->options)
196 : {
197 14578 : DefElem *opt = (DefElem *) lfirst(lc);
198 :
199 : /* Parse common options for VACUUM and ANALYZE */
200 14578 : if (strcmp(opt->defname, "verbose") == 0)
201 38 : verbose = defGetBoolean(opt);
202 14540 : else if (strcmp(opt->defname, "skip_locked") == 0)
203 334 : skip_locked = defGetBoolean(opt);
204 14206 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
205 : {
206 : const char *hintmsg;
207 : int result;
208 : char *vac_buffer_size;
209 :
210 54 : vac_buffer_size = defGetString(opt);
211 :
212 : /*
213 : * Check that the specified value is valid and the size falls
214 : * within the hard upper and lower limits if it is not 0.
215 : */
216 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
217 48 : (result != 0 &&
218 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
219 : {
220 18 : ereport(ERROR,
221 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
222 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
223 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
224 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
225 : }
226 :
227 36 : ring_size = result;
228 : }
229 14152 : else if (!vacstmt->is_vacuumcmd)
230 6 : ereport(ERROR,
231 : (errcode(ERRCODE_SYNTAX_ERROR),
232 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
233 : parser_errposition(pstate, opt->location)));
234 :
235 : /* Parse options available on VACUUM */
236 14146 : else if (strcmp(opt->defname, "analyze") == 0)
237 2744 : analyze = defGetBoolean(opt);
238 11402 : else if (strcmp(opt->defname, "freeze") == 0)
239 2540 : freeze = defGetBoolean(opt);
240 8862 : else if (strcmp(opt->defname, "full") == 0)
241 386 : full = defGetBoolean(opt);
242 8476 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
243 202 : disable_page_skipping = defGetBoolean(opt);
244 8274 : else if (strcmp(opt->defname, "index_cleanup") == 0)
245 : {
246 : /* Interpret no string as the default, which is 'auto' */
247 174 : if (!opt->arg)
248 0 : params.index_cleanup = VACOPTVALUE_AUTO;
249 : else
250 : {
251 174 : char *sval = defGetString(opt);
252 :
253 : /* Try matching on 'auto' string, or fall back on boolean */
254 174 : if (pg_strcasecmp(sval, "auto") == 0)
255 6 : params.index_cleanup = VACOPTVALUE_AUTO;
256 : else
257 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
258 : }
259 : }
260 8100 : else if (strcmp(opt->defname, "process_main") == 0)
261 154 : process_main = defGetBoolean(opt);
262 7946 : else if (strcmp(opt->defname, "process_toast") == 0)
263 160 : process_toast = defGetBoolean(opt);
264 7786 : else if (strcmp(opt->defname, "truncate") == 0)
265 154 : params.truncate = get_vacoptval_from_boolean(opt);
266 7632 : else if (strcmp(opt->defname, "parallel") == 0)
267 : {
268 350 : if (opt->arg == NULL)
269 : {
270 6 : ereport(ERROR,
271 : (errcode(ERRCODE_SYNTAX_ERROR),
272 : errmsg("parallel option requires a value between 0 and %d",
273 : MAX_PARALLEL_WORKER_LIMIT),
274 : parser_errposition(pstate, opt->location)));
275 : }
276 : else
277 : {
278 : int nworkers;
279 :
280 344 : nworkers = defGetInt32(opt);
281 344 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
282 6 : ereport(ERROR,
283 : (errcode(ERRCODE_SYNTAX_ERROR),
284 : errmsg("parallel workers for vacuum must be between 0 and %d",
285 : MAX_PARALLEL_WORKER_LIMIT),
286 : parser_errposition(pstate, opt->location)));
287 :
288 : /*
289 : * Disable parallel vacuum, if user has specified parallel
290 : * degree as zero.
291 : */
292 338 : if (nworkers == 0)
293 154 : params.nworkers = -1;
294 : else
295 184 : params.nworkers = nworkers;
296 : }
297 : }
298 7282 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
299 7118 : skip_database_stats = defGetBoolean(opt);
300 164 : else if (strcmp(opt->defname, "only_database_stats") == 0)
301 164 : only_database_stats = defGetBoolean(opt);
302 : else
303 0 : ereport(ERROR,
304 : (errcode(ERRCODE_SYNTAX_ERROR),
305 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
306 : parser_errposition(pstate, opt->location)));
307 : }
308 :
309 : /* Set vacuum options */
310 13632 : params.options =
311 13632 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
312 13632 : (verbose ? VACOPT_VERBOSE : 0) |
313 13632 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
314 13632 : (analyze ? VACOPT_ANALYZE : 0) |
315 13632 : (freeze ? VACOPT_FREEZE : 0) |
316 13632 : (full ? VACOPT_FULL : 0) |
317 13632 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
318 13632 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
319 13632 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
320 13632 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
321 13632 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
322 :
323 : /* sanity checks on options */
324 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
325 : Assert((params.options & VACOPT_VACUUM) ||
326 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
327 :
328 13632 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
329 6 : ereport(ERROR,
330 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
331 : errmsg("VACUUM FULL cannot be performed in parallel")));
332 :
333 : /*
334 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
335 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
336 : * we'll permit that.
337 : */
338 13626 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
339 6 : !(params.options & VACOPT_ANALYZE))
340 6 : ereport(ERROR,
341 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
342 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
343 :
344 : /*
345 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
346 : */
347 13620 : if (!(params.options & VACOPT_ANALYZE))
348 : {
349 12244 : foreach(lc, vacstmt->rels)
350 : {
351 6010 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
352 :
353 6010 : if (vrel->va_cols != NIL)
354 6 : ereport(ERROR,
355 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
356 : errmsg("ANALYZE option must be specified when a column list is provided")));
357 : }
358 : }
359 :
360 :
361 : /*
362 : * Sanity check DISABLE_PAGE_SKIPPING option.
363 : */
364 13614 : if ((params.options & VACOPT_FULL) != 0 &&
365 362 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
366 0 : ereport(ERROR,
367 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
368 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
369 :
370 : /* sanity check for PROCESS_TOAST */
371 13614 : if ((params.options & VACOPT_FULL) != 0 &&
372 362 : (params.options & VACOPT_PROCESS_TOAST) == 0)
373 6 : ereport(ERROR,
374 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
375 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
376 :
377 : /* sanity check for ONLY_DATABASE_STATS */
378 13608 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
379 : {
380 : Assert(params.options & VACOPT_VACUUM);
381 164 : if (vacstmt->rels != NIL)
382 6 : ereport(ERROR,
383 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
385 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
386 158 : if (params.options & ~(VACOPT_VACUUM |
387 : VACOPT_VERBOSE |
388 : VACOPT_PROCESS_MAIN |
389 : VACOPT_PROCESS_TOAST |
390 : VACOPT_ONLY_DATABASE_STATS))
391 0 : ereport(ERROR,
392 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
393 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
394 : }
395 :
396 : /*
397 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
398 : * them as -1 which means to use the default values.
399 : */
400 13602 : if (params.options & VACOPT_FREEZE)
401 : {
402 2540 : params.freeze_min_age = 0;
403 2540 : params.freeze_table_age = 0;
404 2540 : params.multixact_freeze_min_age = 0;
405 2540 : params.multixact_freeze_table_age = 0;
406 : }
407 : else
408 : {
409 11062 : params.freeze_min_age = -1;
410 11062 : params.freeze_table_age = -1;
411 11062 : params.multixact_freeze_min_age = -1;
412 11062 : params.multixact_freeze_table_age = -1;
413 : }
414 :
415 : /* user-invoked vacuum is never "for wraparound" */
416 13602 : params.is_wraparound = false;
417 :
418 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
419 13602 : params.log_min_duration = -1;
420 :
421 : /*
422 : * Later, in vacuum_rel(), we check if a reloption override was specified.
423 : */
424 13602 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
425 :
426 : /*
427 : * Create special memory context for cross-transaction storage.
428 : *
429 : * Since it is a child of PortalContext, it will go away eventually even
430 : * if we suffer an error; there's no need for special abort cleanup logic.
431 : */
432 13602 : vac_context = AllocSetContextCreate(PortalContext,
433 : "Vacuum",
434 : ALLOCSET_DEFAULT_SIZES);
435 :
436 : /*
437 : * Make a buffer strategy object in the cross-transaction memory context.
438 : * We needn't bother making this for VACUUM (FULL) or VACUUM
439 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
440 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
441 : * when we see ANALYZE.
442 : */
443 13602 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
444 514 : VACOPT_FULL)) == 0 ||
445 514 : (params.options & VACOPT_ANALYZE) != 0)
446 : {
447 :
448 13094 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
449 :
450 : Assert(ring_size >= -1);
451 :
452 : /*
453 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
454 : * command, it overrides the value of VacuumBufferUsageLimit. Either
455 : * value may be 0, in which case GetAccessStrategyWithSize() will
456 : * return NULL, effectively allowing full use of shared buffers.
457 : */
458 13094 : if (ring_size == -1)
459 13064 : ring_size = VacuumBufferUsageLimit;
460 :
461 13094 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
462 :
463 13094 : MemoryContextSwitchTo(old_context);
464 : }
465 :
466 : /* Now go through the common routine */
467 13602 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
468 :
469 : /* Finally, clean up the vacuum memory context */
470 13468 : MemoryContextDelete(vac_context);
471 13468 : }
472 :
473 : /*
474 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
475 : *
476 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
477 : * we process all relevant tables in the database. For each VacuumRelation,
478 : * if a valid OID is supplied, the table with that OID is what to process;
479 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
480 : *
481 : * params contains a set of parameters that can be used to customize the
482 : * behavior.
483 : *
484 : * bstrategy may be passed in as NULL when the caller does not want to
485 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
486 : * otherwise, the caller must build a BufferAccessStrategy with the number of
487 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
488 : * using.
489 : *
490 : * isTopLevel should be passed down from ProcessUtility.
491 : *
492 : * It is the caller's responsibility that all parameters are allocated in a
493 : * memory context that will not disappear at transaction commit.
494 : */
495 : void
496 110444 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
497 : MemoryContext vac_context, bool isTopLevel)
498 : {
499 : static bool in_vacuum = false;
500 :
501 : const char *stmttype;
502 : volatile bool in_outer_xact,
503 : use_own_xacts;
504 :
505 : Assert(params != NULL);
506 :
507 110444 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
508 :
509 : /*
510 : * We cannot run VACUUM inside a user transaction block; if we were inside
511 : * a transaction, then our commit- and start-transaction-command calls
512 : * would not have the intended effect! There are numerous other subtle
513 : * dependencies on this, too.
514 : *
515 : * ANALYZE (without VACUUM) can run either way.
516 : */
517 110444 : if (params->options & VACOPT_VACUUM)
518 : {
519 105426 : PreventInTransactionBlock(isTopLevel, stmttype);
520 105406 : in_outer_xact = false;
521 : }
522 : else
523 5018 : in_outer_xact = IsInTransactionBlock(isTopLevel);
524 :
525 : /*
526 : * Check for and disallow recursive calls. This could happen when VACUUM
527 : * FULL or ANALYZE calls a hostile index expression that itself calls
528 : * ANALYZE.
529 : */
530 110424 : if (in_vacuum)
531 12 : ereport(ERROR,
532 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
533 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
534 : stmttype)));
535 :
536 : /*
537 : * Build list of relation(s) to process, putting any new data in
538 : * vac_context for safekeeping.
539 : */
540 110412 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
541 : {
542 : /* We don't process any tables in this case */
543 : Assert(relations == NIL);
544 : }
545 110254 : else if (relations != NIL)
546 : {
547 110050 : List *newrels = NIL;
548 : ListCell *lc;
549 :
550 220196 : foreach(lc, relations)
551 : {
552 110182 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
553 : List *sublist;
554 : MemoryContext old_context;
555 :
556 110182 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
557 110146 : old_context = MemoryContextSwitchTo(vac_context);
558 110146 : newrels = list_concat(newrels, sublist);
559 110146 : MemoryContextSwitchTo(old_context);
560 : }
561 110014 : relations = newrels;
562 : }
563 : else
564 204 : relations = get_all_vacuum_rels(vac_context, params->options);
565 :
566 : /*
567 : * Decide whether we need to start/commit our own transactions.
568 : *
569 : * For VACUUM (with or without ANALYZE): always do so, so that we can
570 : * release locks as soon as possible. (We could possibly use the outer
571 : * transaction for a one-table VACUUM, but handling TOAST tables would be
572 : * problematic.)
573 : *
574 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
575 : * start/commit our own transactions. Also, there's no need to do so if
576 : * only processing one relation. For multiple relations when not within a
577 : * transaction block, and also in an autovacuum worker, use own
578 : * transactions so we can release locks sooner.
579 : */
580 110376 : if (params->options & VACOPT_VACUUM)
581 105394 : use_own_xacts = true;
582 : else
583 : {
584 : Assert(params->options & VACOPT_ANALYZE);
585 4982 : if (AmAutoVacuumWorkerProcess())
586 382 : use_own_xacts = true;
587 4600 : else if (in_outer_xact)
588 238 : use_own_xacts = false;
589 4362 : else if (list_length(relations) > 1)
590 752 : use_own_xacts = true;
591 : else
592 3610 : use_own_xacts = false;
593 : }
594 :
595 : /*
596 : * vacuum_rel expects to be entered with no transaction active; it will
597 : * start and commit its own transaction. But we are called by an SQL
598 : * command, and so we are executing inside a transaction already. We
599 : * commit the transaction started in PostgresMain() here, and start
600 : * another one before exiting to match the commit waiting for us back in
601 : * PostgresMain().
602 : */
603 110376 : if (use_own_xacts)
604 : {
605 : Assert(!in_outer_xact);
606 :
607 : /* ActiveSnapshot is not set by autovacuum */
608 106528 : if (ActiveSnapshotSet())
609 9686 : PopActiveSnapshot();
610 :
611 : /* matches the StartTransaction in PostgresMain() */
612 106528 : CommitTransactionCommand();
613 : }
614 :
615 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
616 110376 : PG_TRY();
617 : {
618 : ListCell *cur;
619 :
620 110376 : in_vacuum = true;
621 110376 : VacuumFailsafeActive = false;
622 110376 : VacuumUpdateCosts();
623 110376 : VacuumCostBalance = 0;
624 110376 : VacuumCostBalanceLocal = 0;
625 110376 : VacuumSharedCostBalance = NULL;
626 110376 : VacuumActiveNWorkers = NULL;
627 :
628 : /*
629 : * Loop to process each selected relation.
630 : */
631 237020 : foreach(cur, relations)
632 : {
633 126710 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
634 :
635 126710 : if (params->options & VACOPT_VACUUM)
636 : {
637 113824 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
638 100 : continue;
639 : }
640 :
641 126602 : if (params->options & VACOPT_ANALYZE)
642 : {
643 : /*
644 : * If using separate xacts, start one for analyze. Otherwise,
645 : * we can use the outer transaction.
646 : */
647 15950 : if (use_own_xacts)
648 : {
649 12152 : StartTransactionCommand();
650 : /* functions in indexes may want a snapshot set */
651 12152 : PushActiveSnapshot(GetTransactionSnapshot());
652 : }
653 :
654 15950 : analyze_rel(vrel->oid, vrel->relation, params,
655 : vrel->va_cols, in_outer_xact, bstrategy);
656 :
657 15892 : if (use_own_xacts)
658 : {
659 12114 : PopActiveSnapshot();
660 12114 : CommitTransactionCommand();
661 : }
662 : else
663 : {
664 : /*
665 : * If we're not using separate xacts, better separate the
666 : * ANALYZE actions with CCIs. This avoids trouble if user
667 : * says "ANALYZE t, t".
668 : */
669 3778 : CommandCounterIncrement();
670 : }
671 : }
672 :
673 : /*
674 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
675 : * next relation.
676 : */
677 126544 : VacuumFailsafeActive = false;
678 : }
679 : }
680 66 : PG_FINALLY();
681 : {
682 110376 : in_vacuum = false;
683 110376 : VacuumCostActive = false;
684 110376 : VacuumFailsafeActive = false;
685 110376 : VacuumCostBalance = 0;
686 : }
687 110376 : PG_END_TRY();
688 :
689 : /*
690 : * Finish up processing.
691 : */
692 110310 : if (use_own_xacts)
693 : {
694 : /* here, we are not in a transaction */
695 :
696 : /*
697 : * This matches the CommitTransaction waiting for us in
698 : * PostgresMain().
699 : */
700 106482 : StartTransactionCommand();
701 : }
702 :
703 110310 : if ((params->options & VACOPT_VACUUM) &&
704 105360 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
705 : {
706 : /*
707 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
708 : */
709 1784 : vac_update_datfrozenxid();
710 : }
711 :
712 110310 : }
713 :
714 : /*
715 : * Check if the current user has privileges to vacuum or analyze the relation.
716 : * If not, issue a WARNING log message and return false to let the caller
717 : * decide what to do with this relation. This routine is used to decide if a
718 : * relation can be processed for VACUUM or ANALYZE.
719 : */
720 : bool
721 167120 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
722 : bits32 options)
723 : {
724 : char *relname;
725 :
726 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
727 :
728 : /*----------
729 : * A role has privileges to vacuum or analyze the relation if any of the
730 : * following are true:
731 : * - the role owns the current database and the relation is not shared
732 : * - the role has the MAINTAIN privilege on the relation
733 : *----------
734 : */
735 167120 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
736 193664 : !reltuple->relisshared) ||
737 27318 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
738 166780 : return true;
739 :
740 340 : relname = NameStr(reltuple->relname);
741 :
742 340 : if ((options & VACOPT_VACUUM) != 0)
743 : {
744 224 : ereport(WARNING,
745 : (errmsg("permission denied to vacuum \"%s\", skipping it",
746 : relname)));
747 :
748 : /*
749 : * For VACUUM ANALYZE, both logs could show up, but just generate
750 : * information for VACUUM as that would be the first one to be
751 : * processed.
752 : */
753 224 : return false;
754 : }
755 :
756 116 : if ((options & VACOPT_ANALYZE) != 0)
757 116 : ereport(WARNING,
758 : (errmsg("permission denied to analyze \"%s\", skipping it",
759 : relname)));
760 :
761 116 : return false;
762 : }
763 :
764 :
765 : /*
766 : * vacuum_open_relation
767 : *
768 : * This routine is used for attempting to open and lock a relation which
769 : * is going to be vacuumed or analyzed. If the relation cannot be opened
770 : * or locked, a log is emitted if possible.
771 : */
772 : Relation
773 139068 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
774 : bool verbose, LOCKMODE lmode)
775 : {
776 : Relation rel;
777 139068 : bool rel_lock = true;
778 : int elevel;
779 :
780 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
781 :
782 : /*
783 : * Open the relation and get the appropriate lock on it.
784 : *
785 : * There's a race condition here: the relation may have gone away since
786 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
787 : *
788 : * If we've been asked not to wait for the relation lock, acquire it first
789 : * in non-blocking mode, before calling try_relation_open().
790 : */
791 139068 : if (!(options & VACOPT_SKIP_LOCKED))
792 137720 : rel = try_relation_open(relid, lmode);
793 1348 : else if (ConditionalLockRelationOid(relid, lmode))
794 1328 : rel = try_relation_open(relid, NoLock);
795 : else
796 : {
797 20 : rel = NULL;
798 20 : rel_lock = false;
799 : }
800 :
801 : /* if relation is opened, leave */
802 139068 : if (rel)
803 139036 : return rel;
804 :
805 : /*
806 : * Relation could not be opened, hence generate if possible a log
807 : * informing on the situation.
808 : *
809 : * If the RangeVar is not defined, we do not have enough information to
810 : * provide a meaningful log statement. Chances are that the caller has
811 : * intentionally not provided this information so that this logging is
812 : * skipped, anyway.
813 : */
814 32 : if (relation == NULL)
815 18 : return NULL;
816 :
817 : /*
818 : * Determine the log level.
819 : *
820 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
821 : * statements in the permission checks; otherwise, only log if the caller
822 : * so requested.
823 : */
824 14 : if (!AmAutoVacuumWorkerProcess())
825 14 : elevel = WARNING;
826 0 : else if (verbose)
827 0 : elevel = LOG;
828 : else
829 0 : return NULL;
830 :
831 14 : if ((options & VACOPT_VACUUM) != 0)
832 : {
833 10 : if (!rel_lock)
834 6 : ereport(elevel,
835 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
836 : errmsg("skipping vacuum of \"%s\" --- lock not available",
837 : relation->relname)));
838 : else
839 4 : ereport(elevel,
840 : (errcode(ERRCODE_UNDEFINED_TABLE),
841 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
842 : relation->relname)));
843 :
844 : /*
845 : * For VACUUM ANALYZE, both logs could show up, but just generate
846 : * information for VACUUM as that would be the first one to be
847 : * processed.
848 : */
849 10 : return NULL;
850 : }
851 :
852 4 : if ((options & VACOPT_ANALYZE) != 0)
853 : {
854 4 : if (!rel_lock)
855 2 : ereport(elevel,
856 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
857 : errmsg("skipping analyze of \"%s\" --- lock not available",
858 : relation->relname)));
859 : else
860 2 : ereport(elevel,
861 : (errcode(ERRCODE_UNDEFINED_TABLE),
862 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
863 : relation->relname)));
864 : }
865 :
866 4 : return NULL;
867 : }
868 :
869 :
870 : /*
871 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
872 : * and optionally add VacuumRelations for partitions or inheritance children.
873 : *
874 : * If a VacuumRelation does not have an OID supplied and is a partitioned
875 : * table, an extra entry will be added to the output for each partition.
876 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
877 : * it does not want us to expand partitioned tables.
878 : *
879 : * We take care not to modify the input data structure, but instead build
880 : * new VacuumRelation(s) to return. (But note that they will reference
881 : * unmodified parts of the input, eg column lists.) New data structures
882 : * are made in vac_context.
883 : */
884 : static List *
885 110182 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
886 : int options)
887 : {
888 110182 : List *vacrels = NIL;
889 : MemoryContext oldcontext;
890 :
891 : /* If caller supplied OID, there's nothing we need do here. */
892 110182 : if (OidIsValid(vrel->oid))
893 : {
894 96842 : oldcontext = MemoryContextSwitchTo(vac_context);
895 96842 : vacrels = lappend(vacrels, vrel);
896 96842 : MemoryContextSwitchTo(oldcontext);
897 : }
898 : else
899 : {
900 : /*
901 : * Process a specific relation, and possibly partitions or child
902 : * tables thereof.
903 : */
904 : Oid relid;
905 : HeapTuple tuple;
906 : Form_pg_class classForm;
907 : bool include_children;
908 : bool is_partitioned_table;
909 : int rvr_opts;
910 :
911 : /*
912 : * Since autovacuum workers supply OIDs when calling vacuum(), no
913 : * autovacuum worker should reach this code.
914 : */
915 : Assert(!AmAutoVacuumWorkerProcess());
916 :
917 : /*
918 : * We transiently take AccessShareLock to protect the syscache lookup
919 : * below, as well as find_all_inheritors's expectation that the caller
920 : * holds some lock on the starting relation.
921 : */
922 13340 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
923 13340 : relid = RangeVarGetRelidExtended(vrel->relation,
924 : AccessShareLock,
925 : rvr_opts,
926 : NULL, NULL);
927 :
928 : /*
929 : * If the lock is unavailable, emit the same log statement that
930 : * vacuum_rel() and analyze_rel() would.
931 : */
932 13304 : if (!OidIsValid(relid))
933 : {
934 8 : if (options & VACOPT_VACUUM)
935 6 : ereport(WARNING,
936 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
937 : errmsg("skipping vacuum of \"%s\" --- lock not available",
938 : vrel->relation->relname)));
939 : else
940 2 : ereport(WARNING,
941 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
942 : errmsg("skipping analyze of \"%s\" --- lock not available",
943 : vrel->relation->relname)));
944 8 : return vacrels;
945 : }
946 :
947 : /*
948 : * To check whether the relation is a partitioned table and its
949 : * ownership, fetch its syscache entry.
950 : */
951 13296 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
952 13296 : if (!HeapTupleIsValid(tuple))
953 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
954 13296 : classForm = (Form_pg_class) GETSTRUCT(tuple);
955 :
956 : /*
957 : * Make a returnable VacuumRelation for this rel if the user has the
958 : * required privileges.
959 : */
960 13296 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
961 : {
962 13064 : oldcontext = MemoryContextSwitchTo(vac_context);
963 13064 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
964 : relid,
965 : vrel->va_cols));
966 13064 : MemoryContextSwitchTo(oldcontext);
967 : }
968 :
969 : /*
970 : * Vacuuming a partitioned table with ONLY will not do anything since
971 : * the partitioned table itself is empty. Issue a warning if the user
972 : * requests this.
973 : */
974 13296 : include_children = vrel->relation->inh;
975 13296 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
976 13296 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
977 6 : ereport(WARNING,
978 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
979 : vrel->relation->relname)));
980 :
981 13296 : ReleaseSysCache(tuple);
982 :
983 : /*
984 : * Unless the user has specified ONLY, make relation list entries for
985 : * its partitions or inheritance child tables. Note that the list
986 : * returned by find_all_inheritors() includes the passed-in OID, so we
987 : * have to skip that. There's no point in taking locks on the
988 : * individual partitions or child tables yet, and doing so would just
989 : * add unnecessary deadlock risk. For this last reason, we do not yet
990 : * check the ownership of the partitions/tables, which get added to
991 : * the list to process. Ownership will be checked later on anyway.
992 : */
993 13296 : if (include_children)
994 : {
995 13266 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
996 : ListCell *part_lc;
997 :
998 28596 : foreach(part_lc, part_oids)
999 : {
1000 15330 : Oid part_oid = lfirst_oid(part_lc);
1001 :
1002 15330 : if (part_oid == relid)
1003 13266 : continue; /* ignore original table */
1004 :
1005 : /*
1006 : * We omit a RangeVar since it wouldn't be appropriate to
1007 : * complain about failure to open one of these relations
1008 : * later.
1009 : */
1010 2064 : oldcontext = MemoryContextSwitchTo(vac_context);
1011 2064 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1012 : part_oid,
1013 : vrel->va_cols));
1014 2064 : MemoryContextSwitchTo(oldcontext);
1015 : }
1016 : }
1017 :
1018 : /*
1019 : * Release lock again. This means that by the time we actually try to
1020 : * process the table, it might be gone or renamed. In the former case
1021 : * we'll silently ignore it; in the latter case we'll process it
1022 : * anyway, but we must beware that the RangeVar doesn't necessarily
1023 : * identify it anymore. This isn't ideal, perhaps, but there's little
1024 : * practical alternative, since we're typically going to commit this
1025 : * transaction and begin a new one between now and then. Moreover,
1026 : * holding locks on multiple relations would create significant risk
1027 : * of deadlock.
1028 : */
1029 13296 : UnlockRelationOid(relid, AccessShareLock);
1030 : }
1031 :
1032 110138 : return vacrels;
1033 : }
1034 :
1035 : /*
1036 : * Construct a list of VacuumRelations for all vacuumable rels in
1037 : * the current database. The list is built in vac_context.
1038 : */
1039 : static List *
1040 204 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1041 : {
1042 204 : List *vacrels = NIL;
1043 : Relation pgclass;
1044 : TableScanDesc scan;
1045 : HeapTuple tuple;
1046 :
1047 204 : pgclass = table_open(RelationRelationId, AccessShareLock);
1048 :
1049 204 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1050 :
1051 87204 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1052 : {
1053 87000 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1054 : MemoryContext oldcontext;
1055 87000 : Oid relid = classForm->oid;
1056 :
1057 : /*
1058 : * We include partitioned tables here; depending on which operation is
1059 : * to be performed, caller will decide whether to process or ignore
1060 : * them.
1061 : */
1062 87000 : if (classForm->relkind != RELKIND_RELATION &&
1063 72280 : classForm->relkind != RELKIND_MATVIEW &&
1064 72274 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1065 72212 : continue;
1066 :
1067 : /* check permissions of relation */
1068 14788 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1069 0 : continue;
1070 :
1071 : /*
1072 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1073 : * We omit a RangeVar since it wouldn't be appropriate to complain
1074 : * about failure to open one of these relations later.
1075 : */
1076 14788 : oldcontext = MemoryContextSwitchTo(vac_context);
1077 14788 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1078 : relid,
1079 : NIL));
1080 14788 : MemoryContextSwitchTo(oldcontext);
1081 : }
1082 :
1083 204 : table_endscan(scan);
1084 204 : table_close(pgclass, AccessShareLock);
1085 :
1086 204 : return vacrels;
1087 : }
1088 :
1089 : /*
1090 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1091 : *
1092 : * The target relation and VACUUM parameters are our inputs.
1093 : *
1094 : * Output parameters are the cutoffs that VACUUM caller should use.
1095 : *
1096 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1097 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1098 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1099 : * minimum).
1100 : */
1101 : bool
1102 122874 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1103 : struct VacuumCutoffs *cutoffs)
1104 : {
1105 : int freeze_min_age,
1106 : multixact_freeze_min_age,
1107 : freeze_table_age,
1108 : multixact_freeze_table_age,
1109 : effective_multixact_freeze_max_age;
1110 : TransactionId nextXID,
1111 : safeOldestXmin,
1112 : aggressiveXIDCutoff;
1113 : MultiXactId nextMXID,
1114 : safeOldestMxact,
1115 : aggressiveMXIDCutoff;
1116 :
1117 : /* Use mutable copies of freeze age parameters */
1118 122874 : freeze_min_age = params->freeze_min_age;
1119 122874 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1120 122874 : freeze_table_age = params->freeze_table_age;
1121 122874 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1122 :
1123 : /* Set pg_class fields in cutoffs */
1124 122874 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1125 122874 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1126 :
1127 : /*
1128 : * Acquire OldestXmin.
1129 : *
1130 : * We can always ignore processes running lazy vacuum. This is because we
1131 : * use these values only for deciding which tuples we must keep in the
1132 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1133 : * XID assigned), it's safe to ignore it. In theory it could be
1134 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1135 : * that only one vacuum process can be working on a particular table at
1136 : * any time, and that each vacuum is always an independent transaction.
1137 : */
1138 122874 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1139 :
1140 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1141 :
1142 : /* Acquire OldestMxact */
1143 122874 : cutoffs->OldestMxact = GetOldestMultiXactId();
1144 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1145 :
1146 : /* Acquire next XID/next MXID values used to apply age-based settings */
1147 122874 : nextXID = ReadNextTransactionId();
1148 122874 : nextMXID = ReadNextMultiXactId();
1149 :
1150 : /*
1151 : * Also compute the multixact age for which freezing is urgent. This is
1152 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1153 : * short of multixact member space.
1154 : */
1155 122874 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1156 :
1157 : /*
1158 : * Almost ready to set freeze output parameters; check if OldestXmin or
1159 : * OldestMxact are held back to an unsafe degree before we start on that
1160 : */
1161 122874 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1162 122874 : if (!TransactionIdIsNormal(safeOldestXmin))
1163 0 : safeOldestXmin = FirstNormalTransactionId;
1164 122874 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1165 122874 : if (safeOldestMxact < FirstMultiXactId)
1166 0 : safeOldestMxact = FirstMultiXactId;
1167 122874 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1168 61254 : ereport(WARNING,
1169 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1170 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1171 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1172 122874 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1173 0 : ereport(WARNING,
1174 : (errmsg("cutoff for freezing multixacts is far in the past"),
1175 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1176 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1177 :
1178 : /*
1179 : * Determine the minimum freeze age to use: as specified by the caller, or
1180 : * vacuum_freeze_min_age, but in any case not more than half
1181 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1182 : * wraparound won't occur too frequently.
1183 : */
1184 122874 : if (freeze_min_age < 0)
1185 11186 : freeze_min_age = vacuum_freeze_min_age;
1186 122874 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1187 : Assert(freeze_min_age >= 0);
1188 :
1189 : /* Compute FreezeLimit, being careful to generate a normal XID */
1190 122874 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1191 122874 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1192 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1193 : /* FreezeLimit must always be <= OldestXmin */
1194 122874 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1195 85456 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1196 :
1197 : /*
1198 : * Determine the minimum multixact freeze age to use: as specified by
1199 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1200 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1201 : * prevent MultiXact wraparound won't occur too frequently.
1202 : */
1203 122874 : if (multixact_freeze_min_age < 0)
1204 11186 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1205 122874 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1206 : effective_multixact_freeze_max_age / 2);
1207 : Assert(multixact_freeze_min_age >= 0);
1208 :
1209 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1210 122874 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1211 122874 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1212 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1213 : /* MultiXactCutoff must always be <= OldestMxact */
1214 122874 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1215 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1216 :
1217 : /*
1218 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1219 : *
1220 : * Determine the table freeze age to use: as specified by the caller, or
1221 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1222 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1223 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1224 : * anti-wraparound autovacuum is launched.
1225 : */
1226 122874 : if (freeze_table_age < 0)
1227 11186 : freeze_table_age = vacuum_freeze_table_age;
1228 122874 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1229 : Assert(freeze_table_age >= 0);
1230 122874 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1231 122874 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1232 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1233 122874 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1234 : aggressiveXIDCutoff))
1235 111688 : return true;
1236 :
1237 : /*
1238 : * Similar to the above, determine the table freeze age to use for
1239 : * multixacts: as specified by the caller, or the value of the
1240 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1241 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1242 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1243 : * multixacts before anti-wraparound autovacuum is launched.
1244 : */
1245 11186 : if (multixact_freeze_table_age < 0)
1246 10966 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1247 11186 : multixact_freeze_table_age =
1248 11186 : Min(multixact_freeze_table_age,
1249 : effective_multixact_freeze_max_age * 0.95);
1250 : Assert(multixact_freeze_table_age >= 0);
1251 11186 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1252 11186 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1253 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1254 11186 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1255 : aggressiveMXIDCutoff))
1256 0 : return true;
1257 :
1258 : /* Non-aggressive VACUUM */
1259 11186 : return false;
1260 : }
1261 :
1262 : /*
1263 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1264 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1265 : * dangerously far in the past.
1266 : *
1267 : * When we return true, VACUUM caller triggers the failsafe.
1268 : */
1269 : bool
1270 125912 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1271 : {
1272 125912 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1273 125912 : MultiXactId relminmxid = cutoffs->relminmxid;
1274 : TransactionId xid_skip_limit;
1275 : MultiXactId multi_skip_limit;
1276 : int skip_index_vacuum;
1277 :
1278 : Assert(TransactionIdIsNormal(relfrozenxid));
1279 : Assert(MultiXactIdIsValid(relminmxid));
1280 :
1281 : /*
1282 : * Determine the index skipping age to use. In any case no less than
1283 : * autovacuum_freeze_max_age * 1.05.
1284 : */
1285 125912 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1286 :
1287 125912 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1288 125912 : if (!TransactionIdIsNormal(xid_skip_limit))
1289 0 : xid_skip_limit = FirstNormalTransactionId;
1290 :
1291 125912 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1292 : {
1293 : /* The table's relfrozenxid is too old */
1294 19970 : return true;
1295 : }
1296 :
1297 : /*
1298 : * Similar to above, determine the index skipping age to use for
1299 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1300 : * 1.05.
1301 : */
1302 105942 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1303 : autovacuum_multixact_freeze_max_age * 1.05);
1304 :
1305 105942 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1306 105942 : if (multi_skip_limit < FirstMultiXactId)
1307 0 : multi_skip_limit = FirstMultiXactId;
1308 :
1309 105942 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1310 : {
1311 : /* The table's relminmxid is too old */
1312 0 : return true;
1313 : }
1314 :
1315 105942 : return false;
1316 : }
1317 :
1318 : /*
1319 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1320 : *
1321 : * If we scanned the whole relation then we should just use the count of
1322 : * live tuples seen; but if we did not, we should not blindly extrapolate
1323 : * from that number, since VACUUM may have scanned a quite nonrandom
1324 : * subset of the table. When we have only partial information, we take
1325 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1326 : * of the tuple density in the unscanned pages.
1327 : *
1328 : * Note: scanned_tuples should count only *live* tuples, since
1329 : * pg_class.reltuples is defined that way.
1330 : */
1331 : double
1332 122314 : vac_estimate_reltuples(Relation relation,
1333 : BlockNumber total_pages,
1334 : BlockNumber scanned_pages,
1335 : double scanned_tuples)
1336 : {
1337 122314 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1338 122314 : double old_rel_tuples = relation->rd_rel->reltuples;
1339 : double old_density;
1340 : double unscanned_pages;
1341 : double total_tuples;
1342 :
1343 : /* If we did scan the whole table, just use the count as-is */
1344 122314 : if (scanned_pages >= total_pages)
1345 118106 : return scanned_tuples;
1346 :
1347 : /*
1348 : * When successive VACUUM commands scan the same few pages again and
1349 : * again, without anything from the table really changing, there is a risk
1350 : * that our beliefs about tuple density will gradually become distorted.
1351 : * This might be caused by vacuumlazy.c implementation details, such as
1352 : * its tendency to always scan the last heap page. Handle that here.
1353 : *
1354 : * If the relation is _exactly_ the same size according to the existing
1355 : * pg_class entry, and only a few of its pages (less than 2%) were
1356 : * scanned, keep the existing value of reltuples. Also keep the existing
1357 : * value when only a subset of rel's pages <= a single page were scanned.
1358 : *
1359 : * (Note: we might be returning -1 here.)
1360 : */
1361 4208 : if (old_rel_pages == total_pages &&
1362 4170 : scanned_pages < (double) total_pages * 0.02)
1363 2930 : return old_rel_tuples;
1364 1278 : if (scanned_pages <= 1)
1365 1018 : return old_rel_tuples;
1366 :
1367 : /*
1368 : * If old density is unknown, we can't do much except scale up
1369 : * scanned_tuples to match total_pages.
1370 : */
1371 260 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1372 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1373 :
1374 : /*
1375 : * Okay, we've covered the corner cases. The normal calculation is to
1376 : * convert the old measurement to a density (tuples per page), then
1377 : * estimate the number of tuples in the unscanned pages using that figure,
1378 : * and finally add on the number of tuples in the scanned pages.
1379 : */
1380 258 : old_density = old_rel_tuples / old_rel_pages;
1381 258 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1382 258 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1383 258 : return floor(total_tuples + 0.5);
1384 : }
1385 :
1386 :
1387 : /*
1388 : * vac_update_relstats() -- update statistics for one relation
1389 : *
1390 : * Update the whole-relation statistics that are kept in its pg_class
1391 : * row. There are additional stats that will be updated if we are
1392 : * doing ANALYZE, but we always update these stats. This routine works
1393 : * for both index and heap relation entries in pg_class.
1394 : *
1395 : * We violate transaction semantics here by overwriting the rel's
1396 : * existing pg_class tuple with the new values. This is reasonably
1397 : * safe as long as we're sure that the new values are correct whether or
1398 : * not this transaction commits. The reason for doing this is that if
1399 : * we updated these tuples in the usual way, vacuuming pg_class itself
1400 : * wouldn't work very well --- by the time we got done with a vacuum
1401 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1402 : * course, this only works for fixed-size not-null columns, but these are.
1403 : *
1404 : * Another reason for doing it this way is that when we are in a lazy
1405 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1406 : * Somebody vacuuming pg_class might think they could delete a tuple
1407 : * marked with xmin = our xid.
1408 : *
1409 : * In addition to fundamentally nontransactional statistics such as
1410 : * relpages and relallvisible, we try to maintain certain lazily-updated
1411 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1412 : * It's safe to do this in VACUUM, which can't run in parallel with
1413 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1414 : * However, it's *not* safe to do it in an ANALYZE that's within an
1415 : * outer transaction, because for example the current transaction might
1416 : * have dropped the last index; then we'd think relhasindex should be
1417 : * cleared, but if the transaction later rolls back this would be wrong.
1418 : * So we refrain from updating the DDL flags if we're inside an outer
1419 : * transaction. This is OK since postponing the flag maintenance is
1420 : * always allowable.
1421 : *
1422 : * Note: num_tuples should count only *live* tuples, since
1423 : * pg_class.reltuples is defined that way.
1424 : *
1425 : * This routine is shared by VACUUM and ANALYZE.
1426 : */
1427 : void
1428 162722 : vac_update_relstats(Relation relation,
1429 : BlockNumber num_pages, double num_tuples,
1430 : BlockNumber num_all_visible_pages,
1431 : BlockNumber num_all_frozen_pages,
1432 : bool hasindex, TransactionId frozenxid,
1433 : MultiXactId minmulti,
1434 : bool *frozenxid_updated, bool *minmulti_updated,
1435 : bool in_outer_xact)
1436 : {
1437 162722 : Oid relid = RelationGetRelid(relation);
1438 : Relation rd;
1439 : ScanKeyData key[1];
1440 : HeapTuple ctup;
1441 : void *inplace_state;
1442 : Form_pg_class pgcform;
1443 : bool dirty,
1444 : futurexid,
1445 : futuremxid;
1446 : TransactionId oldfrozenxid;
1447 : MultiXactId oldminmulti;
1448 :
1449 162722 : rd = table_open(RelationRelationId, RowExclusiveLock);
1450 :
1451 : /* Fetch a copy of the tuple to scribble on */
1452 162722 : ScanKeyInit(&key[0],
1453 : Anum_pg_class_oid,
1454 : BTEqualStrategyNumber, F_OIDEQ,
1455 : ObjectIdGetDatum(relid));
1456 162722 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1457 : NULL, 1, key, &ctup, &inplace_state);
1458 162720 : if (!HeapTupleIsValid(ctup))
1459 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1460 : relid);
1461 162720 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1462 :
1463 : /* Apply statistical updates, if any, to copied tuple */
1464 :
1465 162720 : dirty = false;
1466 162720 : if (pgcform->relpages != (int32) num_pages)
1467 : {
1468 9128 : pgcform->relpages = (int32) num_pages;
1469 9128 : dirty = true;
1470 : }
1471 162720 : if (pgcform->reltuples != (float4) num_tuples)
1472 : {
1473 19436 : pgcform->reltuples = (float4) num_tuples;
1474 19436 : dirty = true;
1475 : }
1476 162720 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1477 : {
1478 5646 : pgcform->relallvisible = (int32) num_all_visible_pages;
1479 5646 : dirty = true;
1480 : }
1481 162720 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1482 : {
1483 5056 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1484 5056 : dirty = true;
1485 : }
1486 :
1487 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1488 :
1489 162720 : if (!in_outer_xact)
1490 : {
1491 : /*
1492 : * If we didn't find any indexes, reset relhasindex.
1493 : */
1494 162406 : if (pgcform->relhasindex && !hasindex)
1495 : {
1496 22 : pgcform->relhasindex = false;
1497 22 : dirty = true;
1498 : }
1499 :
1500 : /* We also clear relhasrules and relhastriggers if needed */
1501 162406 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1502 : {
1503 0 : pgcform->relhasrules = false;
1504 0 : dirty = true;
1505 : }
1506 162406 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1507 : {
1508 6 : pgcform->relhastriggers = false;
1509 6 : dirty = true;
1510 : }
1511 : }
1512 :
1513 : /*
1514 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1515 : * indicating it has no new data.
1516 : *
1517 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1518 : * stored relfrozenxid is "in the future" then it seems best to assume
1519 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1520 : * This should match vac_update_datfrozenxid() concerning what we consider
1521 : * to be "in the future".
1522 : */
1523 162720 : oldfrozenxid = pgcform->relfrozenxid;
1524 162720 : futurexid = false;
1525 162720 : if (frozenxid_updated)
1526 122308 : *frozenxid_updated = false;
1527 162720 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1528 : {
1529 58692 : bool update = false;
1530 :
1531 58692 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1532 58598 : update = true;
1533 94 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1534 0 : futurexid = update = true;
1535 :
1536 58692 : if (update)
1537 : {
1538 58598 : pgcform->relfrozenxid = frozenxid;
1539 58598 : dirty = true;
1540 58598 : if (frozenxid_updated)
1541 58598 : *frozenxid_updated = true;
1542 : }
1543 : }
1544 :
1545 : /* Similarly for relminmxid */
1546 162720 : oldminmulti = pgcform->relminmxid;
1547 162720 : futuremxid = false;
1548 162720 : if (minmulti_updated)
1549 122308 : *minmulti_updated = false;
1550 162720 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1551 : {
1552 430 : bool update = false;
1553 :
1554 430 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1555 430 : update = true;
1556 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1557 0 : futuremxid = update = true;
1558 :
1559 430 : if (update)
1560 : {
1561 430 : pgcform->relminmxid = minmulti;
1562 430 : dirty = true;
1563 430 : if (minmulti_updated)
1564 430 : *minmulti_updated = true;
1565 : }
1566 : }
1567 :
1568 : /* If anything changed, write out the tuple. */
1569 162720 : if (dirty)
1570 72348 : systable_inplace_update_finish(inplace_state, ctup);
1571 : else
1572 90372 : systable_inplace_update_cancel(inplace_state);
1573 :
1574 162720 : table_close(rd, RowExclusiveLock);
1575 :
1576 162720 : if (futurexid)
1577 0 : ereport(WARNING,
1578 : (errcode(ERRCODE_DATA_CORRUPTED),
1579 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1580 : oldfrozenxid, frozenxid,
1581 : RelationGetRelationName(relation))));
1582 162720 : if (futuremxid)
1583 0 : ereport(WARNING,
1584 : (errcode(ERRCODE_DATA_CORRUPTED),
1585 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1586 : oldminmulti, minmulti,
1587 : RelationGetRelationName(relation))));
1588 162720 : }
1589 :
1590 :
1591 : /*
1592 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1593 : *
1594 : * Update pg_database's datfrozenxid entry for our database to be the
1595 : * minimum of the pg_class.relfrozenxid values.
1596 : *
1597 : * Similarly, update our datminmxid to be the minimum of the
1598 : * pg_class.relminmxid values.
1599 : *
1600 : * If we are able to advance either pg_database value, also try to
1601 : * truncate pg_xact and pg_multixact.
1602 : *
1603 : * We violate transaction semantics here by overwriting the database's
1604 : * existing pg_database tuple with the new values. This is reasonably
1605 : * safe since the new values are correct whether or not this transaction
1606 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1607 : * behind after a VACUUM.
1608 : */
1609 : void
1610 4204 : vac_update_datfrozenxid(void)
1611 : {
1612 : HeapTuple tuple;
1613 : Form_pg_database dbform;
1614 : Relation relation;
1615 : SysScanDesc scan;
1616 : HeapTuple classTup;
1617 : TransactionId newFrozenXid;
1618 : MultiXactId newMinMulti;
1619 : TransactionId lastSaneFrozenXid;
1620 : MultiXactId lastSaneMinMulti;
1621 4204 : bool bogus = false;
1622 4204 : bool dirty = false;
1623 : ScanKeyData key[1];
1624 : void *inplace_state;
1625 :
1626 : /*
1627 : * Restrict this task to one backend per database. This avoids race
1628 : * conditions that would move datfrozenxid or datminmxid backward. It
1629 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1630 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1631 : */
1632 4204 : LockDatabaseFrozenIds(ExclusiveLock);
1633 :
1634 : /*
1635 : * Initialize the "min" calculation with
1636 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1637 : * approximation to the minimum relfrozenxid for not-yet-committed
1638 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1639 : * cannot produce a wrong minimum by starting with this.
1640 : */
1641 4204 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1642 :
1643 : /*
1644 : * Similarly, initialize the MultiXact "min" with the value that would be
1645 : * used on pg_class for new tables. See AddNewRelationTuple().
1646 : */
1647 4204 : newMinMulti = GetOldestMultiXactId();
1648 :
1649 : /*
1650 : * Identify the latest relfrozenxid and relminmxid values that we could
1651 : * validly see during the scan. These are conservative values, but it's
1652 : * not really worth trying to be more exact.
1653 : */
1654 4204 : lastSaneFrozenXid = ReadNextTransactionId();
1655 4204 : lastSaneMinMulti = ReadNextMultiXactId();
1656 :
1657 : /*
1658 : * We must seqscan pg_class to find the minimum Xid, because there is no
1659 : * index that can help us here.
1660 : *
1661 : * See vac_truncate_clog() for the race condition to prevent.
1662 : */
1663 4204 : relation = table_open(RelationRelationId, AccessShareLock);
1664 :
1665 4204 : scan = systable_beginscan(relation, InvalidOid, false,
1666 : NULL, 0, NULL);
1667 :
1668 2181444 : while ((classTup = systable_getnext(scan)) != NULL)
1669 : {
1670 2177240 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1671 2177240 : TransactionId relfrozenxid = classForm->relfrozenxid;
1672 2177240 : TransactionId relminmxid = classForm->relminmxid;
1673 :
1674 : /*
1675 : * Only consider relations able to hold unfrozen XIDs (anything else
1676 : * should have InvalidTransactionId in relfrozenxid anyway).
1677 : */
1678 2177240 : if (classForm->relkind != RELKIND_RELATION &&
1679 1723630 : classForm->relkind != RELKIND_MATVIEW &&
1680 1721372 : classForm->relkind != RELKIND_TOASTVALUE)
1681 : {
1682 : Assert(!TransactionIdIsValid(relfrozenxid));
1683 : Assert(!MultiXactIdIsValid(relminmxid));
1684 1483396 : continue;
1685 : }
1686 :
1687 : /*
1688 : * Some table AMs might not need per-relation xid / multixid horizons.
1689 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1690 : * to not be set (i.e. set to their respective Invalid*Id)
1691 : * independently. Thus validate and compute horizon for each only if
1692 : * set.
1693 : *
1694 : * If things are working properly, no relation should have a
1695 : * relfrozenxid or relminmxid that is "in the future". However, such
1696 : * cases have been known to arise due to bugs in pg_upgrade. If we
1697 : * see any entries that are "in the future", chicken out and don't do
1698 : * anything. This ensures we won't truncate clog & multixact SLRUs
1699 : * before those relations have been scanned and cleaned up.
1700 : */
1701 :
1702 693844 : if (TransactionIdIsValid(relfrozenxid))
1703 : {
1704 : Assert(TransactionIdIsNormal(relfrozenxid));
1705 :
1706 : /* check for values in the future */
1707 693844 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1708 : {
1709 0 : bogus = true;
1710 0 : break;
1711 : }
1712 :
1713 : /* determine new horizon */
1714 693844 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1715 4290 : newFrozenXid = relfrozenxid;
1716 : }
1717 :
1718 693844 : if (MultiXactIdIsValid(relminmxid))
1719 : {
1720 : /* check for values in the future */
1721 693844 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1722 : {
1723 0 : bogus = true;
1724 0 : break;
1725 : }
1726 :
1727 : /* determine new horizon */
1728 693844 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1729 212 : newMinMulti = relminmxid;
1730 : }
1731 : }
1732 :
1733 : /* we're done with pg_class */
1734 4204 : systable_endscan(scan);
1735 4204 : table_close(relation, AccessShareLock);
1736 :
1737 : /* chicken out if bogus data found */
1738 4204 : if (bogus)
1739 0 : return;
1740 :
1741 : Assert(TransactionIdIsNormal(newFrozenXid));
1742 : Assert(MultiXactIdIsValid(newMinMulti));
1743 :
1744 : /* Now fetch the pg_database tuple we need to update. */
1745 4204 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1746 :
1747 : /*
1748 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1749 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1750 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1751 : * For now, don't optimize that.
1752 : */
1753 4204 : ScanKeyInit(&key[0],
1754 : Anum_pg_database_oid,
1755 : BTEqualStrategyNumber, F_OIDEQ,
1756 : ObjectIdGetDatum(MyDatabaseId));
1757 :
1758 4204 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1759 : NULL, 1, key, &tuple, &inplace_state);
1760 :
1761 4204 : if (!HeapTupleIsValid(tuple))
1762 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1763 :
1764 4204 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1765 :
1766 : /*
1767 : * As in vac_update_relstats(), we ordinarily don't want to let
1768 : * datfrozenxid go backward; but if it's "in the future" then it must be
1769 : * corrupt and it seems best to overwrite it.
1770 : */
1771 4752 : if (dbform->datfrozenxid != newFrozenXid &&
1772 548 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1773 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1774 : {
1775 548 : dbform->datfrozenxid = newFrozenXid;
1776 548 : dirty = true;
1777 : }
1778 : else
1779 3656 : newFrozenXid = dbform->datfrozenxid;
1780 :
1781 : /* Ditto for datminmxid */
1782 4206 : if (dbform->datminmxid != newMinMulti &&
1783 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1784 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1785 : {
1786 2 : dbform->datminmxid = newMinMulti;
1787 2 : dirty = true;
1788 : }
1789 : else
1790 4202 : newMinMulti = dbform->datminmxid;
1791 :
1792 4204 : if (dirty)
1793 548 : systable_inplace_update_finish(inplace_state, tuple);
1794 : else
1795 3656 : systable_inplace_update_cancel(inplace_state);
1796 :
1797 4204 : heap_freetuple(tuple);
1798 4204 : table_close(relation, RowExclusiveLock);
1799 :
1800 : /*
1801 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1802 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1803 : * XID-wrap-limit info is stale, since this action will update that too.
1804 : */
1805 4204 : if (dirty || ForceTransactionIdLimitUpdate())
1806 1110 : vac_truncate_clog(newFrozenXid, newMinMulti,
1807 : lastSaneFrozenXid, lastSaneMinMulti);
1808 : }
1809 :
1810 :
1811 : /*
1812 : * vac_truncate_clog() -- attempt to truncate the commit log
1813 : *
1814 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1815 : * and use it to truncate the transaction commit log (pg_xact).
1816 : * Also update the XID wrap limit info maintained by varsup.c.
1817 : * Likewise for datminmxid.
1818 : *
1819 : * The passed frozenXID and minMulti are the updated values for my own
1820 : * pg_database entry. They're used to initialize the "min" calculations.
1821 : * The caller also passes the "last sane" XID and MXID, since it has
1822 : * those at hand already.
1823 : *
1824 : * This routine is only invoked when we've managed to change our
1825 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1826 : * XID-wrap-limit info is stale.
1827 : */
1828 : static void
1829 1110 : vac_truncate_clog(TransactionId frozenXID,
1830 : MultiXactId minMulti,
1831 : TransactionId lastSaneFrozenXid,
1832 : MultiXactId lastSaneMinMulti)
1833 : {
1834 1110 : TransactionId nextXID = ReadNextTransactionId();
1835 : Relation relation;
1836 : TableScanDesc scan;
1837 : HeapTuple tuple;
1838 : Oid oldestxid_datoid;
1839 : Oid minmulti_datoid;
1840 1110 : bool bogus = false;
1841 1110 : bool frozenAlreadyWrapped = false;
1842 :
1843 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1844 1110 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1845 :
1846 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1847 1110 : oldestxid_datoid = MyDatabaseId;
1848 1110 : minmulti_datoid = MyDatabaseId;
1849 :
1850 : /*
1851 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1852 : *
1853 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1854 : * the values could change while we look at them. Fetch each one just
1855 : * once to ensure sane behavior of the comparison logic. (Here, as in
1856 : * many other places, we assume that fetching or updating an XID in shared
1857 : * storage is atomic.)
1858 : *
1859 : * Note: we need not worry about a race condition with new entries being
1860 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1861 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1862 : * of the interlock against copying a DB containing an active backend.
1863 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1864 : * concurrently modify the datfrozenxid's of different databases, the
1865 : * worst possible outcome is that pg_xact is not truncated as aggressively
1866 : * as it could be.
1867 : */
1868 1110 : relation = table_open(DatabaseRelationId, AccessShareLock);
1869 :
1870 1110 : scan = table_beginscan_catalog(relation, 0, NULL);
1871 :
1872 4274 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1873 : {
1874 3164 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1875 3164 : TransactionId datfrozenxid = dbform->datfrozenxid;
1876 3164 : TransactionId datminmxid = dbform->datminmxid;
1877 :
1878 : Assert(TransactionIdIsNormal(datfrozenxid));
1879 : Assert(MultiXactIdIsValid(datminmxid));
1880 :
1881 : /*
1882 : * If database is in the process of getting dropped, or has been
1883 : * interrupted while doing so, no connections to it are possible
1884 : * anymore. Therefore we don't need to take it into account here.
1885 : * Which is good, because it can't be processed by autovacuum either.
1886 : */
1887 3164 : if (database_is_invalid_form((Form_pg_database) dbform))
1888 : {
1889 2 : elog(DEBUG2,
1890 : "skipping invalid database \"%s\" while computing relfrozenxid",
1891 : NameStr(dbform->datname));
1892 2 : continue;
1893 : }
1894 :
1895 : /*
1896 : * If things are working properly, no database should have a
1897 : * datfrozenxid or datminmxid that is "in the future". However, such
1898 : * cases have been known to arise due to bugs in pg_upgrade. If we
1899 : * see any entries that are "in the future", chicken out and don't do
1900 : * anything. This ensures we won't truncate clog before those
1901 : * databases have been scanned and cleaned up. (We will issue the
1902 : * "already wrapped" warning if appropriate, though.)
1903 : */
1904 6324 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1905 3162 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1906 0 : bogus = true;
1907 :
1908 3162 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1909 0 : frozenAlreadyWrapped = true;
1910 3162 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1911 : {
1912 476 : frozenXID = datfrozenxid;
1913 476 : oldestxid_datoid = dbform->oid;
1914 : }
1915 :
1916 3162 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1917 : {
1918 4 : minMulti = datminmxid;
1919 4 : minmulti_datoid = dbform->oid;
1920 : }
1921 : }
1922 :
1923 1110 : table_endscan(scan);
1924 :
1925 1110 : table_close(relation, AccessShareLock);
1926 :
1927 : /*
1928 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1929 : * the computed minimum XID might be bogus. This case should now be
1930 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1931 : * test anyway.
1932 : */
1933 1110 : if (frozenAlreadyWrapped)
1934 : {
1935 0 : ereport(WARNING,
1936 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1937 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1938 0 : LWLockRelease(WrapLimitsVacuumLock);
1939 0 : return;
1940 : }
1941 :
1942 : /* chicken out if data is bogus in any other way */
1943 1110 : if (bogus)
1944 : {
1945 0 : LWLockRelease(WrapLimitsVacuumLock);
1946 0 : return;
1947 : }
1948 :
1949 : /*
1950 : * Advance the oldest value for commit timestamps before truncating, so
1951 : * that if a user requests a timestamp for a transaction we're truncating
1952 : * away right after this point, they get NULL instead of an ugly "file not
1953 : * found" error from slru.c. This doesn't matter for xact/multixact
1954 : * because they are not subject to arbitrary lookups from users.
1955 : */
1956 1110 : AdvanceOldestCommitTsXid(frozenXID);
1957 :
1958 : /*
1959 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1960 : */
1961 1110 : TruncateCLOG(frozenXID, oldestxid_datoid);
1962 1110 : TruncateCommitTs(frozenXID);
1963 1110 : TruncateMultiXact(minMulti, minmulti_datoid);
1964 :
1965 : /*
1966 : * Update the wrap limit for GetNewTransactionId and creation of new
1967 : * MultiXactIds. Note: these functions will also signal the postmaster
1968 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1969 : * signaling twice?
1970 : */
1971 1110 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1972 1110 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1973 :
1974 1110 : LWLockRelease(WrapLimitsVacuumLock);
1975 : }
1976 :
1977 :
1978 : /*
1979 : * vacuum_rel() -- vacuum one heap relation
1980 : *
1981 : * relid identifies the relation to vacuum. If relation is supplied,
1982 : * use the name therein for reporting any failure to open/lock the rel;
1983 : * do not use it once we've successfully opened the rel, since it might
1984 : * be stale.
1985 : *
1986 : * Returns true if it's okay to proceed with a requested ANALYZE
1987 : * operation on this table.
1988 : *
1989 : * Doing one heap at a time incurs extra overhead, since we need to
1990 : * check that the heap exists again just before we vacuum it. The
1991 : * reason that we do this is so that vacuuming can be spread across
1992 : * many small transactions. Otherwise, two-phase locking would require
1993 : * us to lock the entire database during one pass of the vacuum cleaner.
1994 : *
1995 : * At entry and exit, we are not inside a transaction.
1996 : */
1997 : static bool
1998 123118 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
1999 : BufferAccessStrategy bstrategy)
2000 : {
2001 : LOCKMODE lmode;
2002 : Relation rel;
2003 : LockRelId lockrelid;
2004 : Oid priv_relid;
2005 : Oid toast_relid;
2006 : Oid save_userid;
2007 : int save_sec_context;
2008 : int save_nestlevel;
2009 :
2010 : Assert(params != NULL);
2011 :
2012 : /* Begin a transaction for vacuuming this relation */
2013 123118 : StartTransactionCommand();
2014 :
2015 123118 : if (!(params->options & VACOPT_FULL))
2016 : {
2017 : /*
2018 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2019 : * other concurrent VACUUMs know that they can ignore this one while
2020 : * determining their OldestXmin. (The reason we don't set it during a
2021 : * full VACUUM is exactly that we may have to run user-defined
2022 : * functions for functional indexes, and we want to make sure that if
2023 : * they use the snapshot set above, any tuples it requires can't get
2024 : * removed from other tables. An index function that depends on the
2025 : * contents of other tables is arguably broken, but we won't break it
2026 : * here by violating transaction semantics.)
2027 : *
2028 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2029 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2030 : * in an emergency.
2031 : *
2032 : * Note: these flags remain set until CommitTransaction or
2033 : * AbortTransaction. We don't want to clear them until we reset
2034 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2035 : * might appear to go backwards, which is probably Not Good. (We also
2036 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2037 : * xmin doesn't become visible ahead of setting the flag.)
2038 : */
2039 122710 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2040 122710 : MyProc->statusFlags |= PROC_IN_VACUUM;
2041 122710 : if (params->is_wraparound)
2042 96192 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2043 122710 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2044 122710 : LWLockRelease(ProcArrayLock);
2045 : }
2046 :
2047 : /*
2048 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2049 : * cutoff xids in local memory wrapping around, and to have updated xmin
2050 : * horizons.
2051 : */
2052 123118 : PushActiveSnapshot(GetTransactionSnapshot());
2053 :
2054 : /*
2055 : * Check for user-requested abort. Note we want this to be inside a
2056 : * transaction, so xact.c doesn't issue useless WARNING.
2057 : */
2058 123118 : CHECK_FOR_INTERRUPTS();
2059 :
2060 : /*
2061 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2062 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2063 : * way, we can be sure that no other backend is vacuuming the same table.
2064 : */
2065 246236 : lmode = (params->options & VACOPT_FULL) ?
2066 123118 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2067 :
2068 : /* open the relation and get the appropriate lock on it */
2069 123118 : rel = vacuum_open_relation(relid, relation, params->options,
2070 123118 : params->log_min_duration >= 0, lmode);
2071 :
2072 : /* leave if relation could not be opened or locked */
2073 123118 : if (!rel)
2074 : {
2075 24 : PopActiveSnapshot();
2076 24 : CommitTransactionCommand();
2077 24 : return false;
2078 : }
2079 :
2080 : /*
2081 : * When recursing to a TOAST table, check privileges on the parent. NB:
2082 : * This is only safe to do because we hold a session lock on the main
2083 : * relation that prevents concurrent deletion.
2084 : */
2085 123094 : if (OidIsValid(params->toast_parent))
2086 9294 : priv_relid = params->toast_parent;
2087 : else
2088 113800 : priv_relid = RelationGetRelid(rel);
2089 :
2090 : /*
2091 : * Check if relation needs to be skipped based on privileges. This check
2092 : * happens also when building the relation list to vacuum for a manual
2093 : * operation, and needs to be done additionally here as VACUUM could
2094 : * happen across multiple transactions where privileges could have changed
2095 : * in-between. Make sure to only generate logs for VACUUM in this case.
2096 : */
2097 123094 : if (!vacuum_is_permitted_for_relation(priv_relid,
2098 : rel->rd_rel,
2099 123094 : params->options & ~VACOPT_ANALYZE))
2100 : {
2101 72 : relation_close(rel, lmode);
2102 72 : PopActiveSnapshot();
2103 72 : CommitTransactionCommand();
2104 72 : return false;
2105 : }
2106 :
2107 : /*
2108 : * Check that it's of a vacuumable relkind.
2109 : */
2110 123022 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2111 45408 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2112 45400 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2113 182 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2114 : {
2115 2 : ereport(WARNING,
2116 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2117 : RelationGetRelationName(rel))));
2118 2 : relation_close(rel, lmode);
2119 2 : PopActiveSnapshot();
2120 2 : CommitTransactionCommand();
2121 2 : return false;
2122 : }
2123 :
2124 : /*
2125 : * Silently ignore tables that are temp tables of other backends ---
2126 : * trying to vacuum these will lead to great unhappiness, since their
2127 : * contents are probably not up-to-date on disk. (We don't throw a
2128 : * warning here; it would just lead to chatter during a database-wide
2129 : * VACUUM.)
2130 : */
2131 123020 : if (RELATION_IS_OTHER_TEMP(rel))
2132 : {
2133 2 : relation_close(rel, lmode);
2134 2 : PopActiveSnapshot();
2135 2 : CommitTransactionCommand();
2136 2 : return false;
2137 : }
2138 :
2139 : /*
2140 : * Silently ignore partitioned tables as there is no work to be done. The
2141 : * useful work is on their child partitions, which have been queued up for
2142 : * us separately.
2143 : */
2144 123018 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2145 : {
2146 180 : relation_close(rel, lmode);
2147 180 : PopActiveSnapshot();
2148 180 : CommitTransactionCommand();
2149 : /* It's OK to proceed with ANALYZE on this table */
2150 180 : return true;
2151 : }
2152 :
2153 : /*
2154 : * Get a session-level lock too. This will protect our access to the
2155 : * relation across multiple transactions, so that we can vacuum the
2156 : * relation's TOAST table (if any) secure in the knowledge that no one is
2157 : * deleting the parent relation.
2158 : *
2159 : * NOTE: this cannot block, even if someone else is waiting for access,
2160 : * because the lock manager knows that both lock requests are from the
2161 : * same process.
2162 : */
2163 122838 : lockrelid = rel->rd_lockInfo.lockRelId;
2164 122838 : LockRelationIdForSession(&lockrelid, lmode);
2165 :
2166 : /*
2167 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2168 : * specified in VACUUM command, or when running in an autovacuum worker
2169 : */
2170 122838 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2171 : {
2172 : StdRdOptIndexCleanup vacuum_index_cleanup;
2173 :
2174 104896 : if (rel->rd_options == NULL)
2175 103290 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2176 : else
2177 1606 : vacuum_index_cleanup =
2178 1606 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2179 :
2180 104896 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2181 104872 : params->index_cleanup = VACOPTVALUE_AUTO;
2182 24 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2183 12 : params->index_cleanup = VACOPTVALUE_ENABLED;
2184 : else
2185 : {
2186 : Assert(vacuum_index_cleanup ==
2187 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2188 12 : params->index_cleanup = VACOPTVALUE_DISABLED;
2189 : }
2190 : }
2191 :
2192 : /*
2193 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2194 : * parameter was specified. This overrides the GUC value.
2195 : */
2196 122838 : if (rel->rd_options != NULL &&
2197 1650 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2198 0 : params->max_eager_freeze_failure_rate =
2199 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2200 :
2201 : /*
2202 : * Set truncate option based on truncate reloption or GUC if it wasn't
2203 : * specified in VACUUM command, or when running in an autovacuum worker
2204 : */
2205 122838 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2206 : {
2207 104916 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2208 :
2209 104916 : if (opts && opts->vacuum_truncate_set)
2210 : {
2211 12 : if (opts->vacuum_truncate)
2212 6 : params->truncate = VACOPTVALUE_ENABLED;
2213 : else
2214 6 : params->truncate = VACOPTVALUE_DISABLED;
2215 : }
2216 104904 : else if (vacuum_truncate)
2217 104898 : params->truncate = VACOPTVALUE_ENABLED;
2218 : else
2219 6 : params->truncate = VACOPTVALUE_DISABLED;
2220 : }
2221 :
2222 : /*
2223 : * Remember the relation's TOAST relation for later, if the caller asked
2224 : * us to process it. In VACUUM FULL, though, the toast table is
2225 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2226 : * unless PROCESS_MAIN is disabled.
2227 : */
2228 122838 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2229 26230 : ((params->options & VACOPT_FULL) == 0 ||
2230 380 : (params->options & VACOPT_PROCESS_MAIN) == 0))
2231 25856 : toast_relid = rel->rd_rel->reltoastrelid;
2232 : else
2233 96982 : toast_relid = InvalidOid;
2234 :
2235 : /*
2236 : * Switch to the table owner's userid, so that any index functions are run
2237 : * as that user. Also lock down security-restricted operations and
2238 : * arrange to make GUC variable changes local to this command. (This is
2239 : * unnecessary, but harmless, for lazy VACUUM.)
2240 : */
2241 122838 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2242 122838 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2243 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2244 122838 : save_nestlevel = NewGUCNestLevel();
2245 122838 : RestrictSearchPath();
2246 :
2247 : /*
2248 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2249 : * relation. Otherwise, we can skip this part. If processing the TOAST
2250 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2251 : * to be set when we recurse to the TOAST table.
2252 : */
2253 122838 : if (params->options & VACOPT_PROCESS_MAIN)
2254 : {
2255 : /*
2256 : * Do the actual work --- either FULL or "lazy" vacuum
2257 : */
2258 122684 : if (params->options & VACOPT_FULL)
2259 : {
2260 374 : ClusterParams cluster_params = {0};
2261 :
2262 374 : if ((params->options & VACOPT_VERBOSE) != 0)
2263 2 : cluster_params.options |= CLUOPT_VERBOSE;
2264 :
2265 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2266 374 : cluster_rel(rel, InvalidOid, &cluster_params);
2267 : /* cluster_rel closes the relation, but keeps lock */
2268 :
2269 368 : rel = NULL;
2270 : }
2271 : else
2272 122310 : table_relation_vacuum(rel, params, bstrategy);
2273 : }
2274 :
2275 : /* Roll back any GUC changes executed by index functions */
2276 122830 : AtEOXact_GUC(false, save_nestlevel);
2277 :
2278 : /* Restore userid and security context */
2279 122830 : SetUserIdAndSecContext(save_userid, save_sec_context);
2280 :
2281 : /* all done with this class, but hold lock until commit */
2282 122830 : if (rel)
2283 122462 : relation_close(rel, NoLock);
2284 :
2285 : /*
2286 : * Complete the transaction and free all temporary memory used.
2287 : */
2288 122830 : PopActiveSnapshot();
2289 122830 : CommitTransactionCommand();
2290 :
2291 : /*
2292 : * If the relation has a secondary toast rel, vacuum that too while we
2293 : * still hold the session lock on the main table. Note however that
2294 : * "analyze" will not get done on the toast table. This is good, because
2295 : * the toaster always uses hardcoded index access and statistics are
2296 : * totally unimportant for toast relations.
2297 : */
2298 122830 : if (toast_relid != InvalidOid)
2299 : {
2300 : VacuumParams toast_vacuum_params;
2301 :
2302 : /*
2303 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2304 : * set toast_parent so that the privilege checks are done on the main
2305 : * relation. NB: This is only safe to do because we hold a session
2306 : * lock on the main relation that prevents concurrent deletion.
2307 : */
2308 9294 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2309 9294 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2310 9294 : toast_vacuum_params.toast_parent = relid;
2311 :
2312 9294 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
2313 : }
2314 :
2315 : /*
2316 : * Now release the session-level lock on the main table.
2317 : */
2318 122830 : UnlockRelationIdForSession(&lockrelid, lmode);
2319 :
2320 : /* Report that we really did it. */
2321 122830 : return true;
2322 : }
2323 :
2324 :
2325 : /*
2326 : * Open all the vacuumable indexes of the given relation, obtaining the
2327 : * specified kind of lock on each. Return an array of Relation pointers for
2328 : * the indexes into *Irel, and the number of indexes into *nindexes.
2329 : *
2330 : * We consider an index vacuumable if it is marked insertable (indisready).
2331 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2332 : * execution, and what we have is too corrupt to be processable. We will
2333 : * vacuum even if the index isn't indisvalid; this is important because in a
2334 : * unique index, uniqueness checks will be performed anyway and had better not
2335 : * hit dangling index pointers.
2336 : */
2337 : void
2338 137354 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2339 : int *nindexes, Relation **Irel)
2340 : {
2341 : List *indexoidlist;
2342 : ListCell *indexoidscan;
2343 : int i;
2344 :
2345 : Assert(lockmode != NoLock);
2346 :
2347 137354 : indexoidlist = RelationGetIndexList(relation);
2348 :
2349 : /* allocate enough memory for all indexes */
2350 137354 : i = list_length(indexoidlist);
2351 :
2352 137354 : if (i > 0)
2353 128152 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2354 : else
2355 9202 : *Irel = NULL;
2356 :
2357 : /* collect just the ready indexes */
2358 137354 : i = 0;
2359 341518 : foreach(indexoidscan, indexoidlist)
2360 : {
2361 204164 : Oid indexoid = lfirst_oid(indexoidscan);
2362 : Relation indrel;
2363 :
2364 204164 : indrel = index_open(indexoid, lockmode);
2365 204164 : if (indrel->rd_index->indisready)
2366 204164 : (*Irel)[i++] = indrel;
2367 : else
2368 0 : index_close(indrel, lockmode);
2369 : }
2370 :
2371 137354 : *nindexes = i;
2372 :
2373 137354 : list_free(indexoidlist);
2374 137354 : }
2375 :
2376 : /*
2377 : * Release the resources acquired by vac_open_indexes. Optionally release
2378 : * the locks (say NoLock to keep 'em).
2379 : */
2380 : void
2381 138160 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2382 : {
2383 138160 : if (Irel == NULL)
2384 10014 : return;
2385 :
2386 332298 : while (nindexes--)
2387 : {
2388 204152 : Relation ind = Irel[nindexes];
2389 :
2390 204152 : index_close(ind, lockmode);
2391 : }
2392 128146 : pfree(Irel);
2393 : }
2394 :
2395 : /*
2396 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2397 : *
2398 : * This should be called in each major loop of VACUUM processing,
2399 : * typically once per page processed.
2400 : */
2401 : void
2402 84597542 : vacuum_delay_point(bool is_analyze)
2403 : {
2404 84597542 : double msec = 0;
2405 :
2406 : /* Always check for interrupts */
2407 84597542 : CHECK_FOR_INTERRUPTS();
2408 :
2409 84597542 : if (InterruptPending ||
2410 84597542 : (!VacuumCostActive && !ConfigReloadPending))
2411 73175368 : return;
2412 :
2413 : /*
2414 : * Autovacuum workers should reload the configuration file if requested.
2415 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2416 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2417 : * vacuumed or analyzed.
2418 : */
2419 11422174 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2420 : {
2421 0 : ConfigReloadPending = false;
2422 0 : ProcessConfigFile(PGC_SIGHUP);
2423 0 : VacuumUpdateCosts();
2424 : }
2425 :
2426 : /*
2427 : * If we disabled cost-based delays after reloading the config file,
2428 : * return.
2429 : */
2430 11422174 : if (!VacuumCostActive)
2431 0 : return;
2432 :
2433 : /*
2434 : * For parallel vacuum, the delay is computed based on the shared cost
2435 : * balance. See compute_parallel_delay.
2436 : */
2437 11422174 : if (VacuumSharedCostBalance != NULL)
2438 0 : msec = compute_parallel_delay();
2439 11422174 : else if (VacuumCostBalance >= vacuum_cost_limit)
2440 5394 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2441 :
2442 : /* Nap if appropriate */
2443 11422174 : if (msec > 0)
2444 : {
2445 : instr_time delay_start;
2446 :
2447 5394 : if (msec > vacuum_cost_delay * 4)
2448 16 : msec = vacuum_cost_delay * 4;
2449 :
2450 5394 : if (track_cost_delay_timing)
2451 0 : INSTR_TIME_SET_CURRENT(delay_start);
2452 :
2453 5394 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2454 5394 : pg_usleep(msec * 1000);
2455 5394 : pgstat_report_wait_end();
2456 :
2457 5394 : if (track_cost_delay_timing)
2458 : {
2459 : instr_time delay_end;
2460 : instr_time delay;
2461 :
2462 0 : INSTR_TIME_SET_CURRENT(delay_end);
2463 0 : INSTR_TIME_SET_ZERO(delay);
2464 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2465 :
2466 : /*
2467 : * For parallel workers, we only report the delay time every once
2468 : * in a while to avoid overloading the leader with messages and
2469 : * interrupts.
2470 : */
2471 0 : if (IsParallelWorker())
2472 : {
2473 : static instr_time last_report_time;
2474 : instr_time time_since_last_report;
2475 :
2476 : Assert(!is_analyze);
2477 :
2478 : /* Accumulate the delay time */
2479 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2480 :
2481 : /* Calculate interval since last report */
2482 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2483 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2484 :
2485 : /* If we haven't reported in a while, do so now */
2486 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2487 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2488 : {
2489 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2490 : parallel_vacuum_worker_delay_ns);
2491 :
2492 : /* Reset variables */
2493 0 : last_report_time = delay_end;
2494 0 : parallel_vacuum_worker_delay_ns = 0;
2495 : }
2496 : }
2497 0 : else if (is_analyze)
2498 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2499 0 : INSTR_TIME_GET_NANOSEC(delay));
2500 : else
2501 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2502 0 : INSTR_TIME_GET_NANOSEC(delay));
2503 : }
2504 :
2505 : /*
2506 : * We don't want to ignore postmaster death during very long vacuums
2507 : * with vacuum_cost_delay configured. We can't use the usual
2508 : * WaitLatch() approach here because we want microsecond-based sleep
2509 : * durations above.
2510 : */
2511 5394 : if (IsUnderPostmaster && !PostmasterIsAlive())
2512 0 : exit(1);
2513 :
2514 5394 : VacuumCostBalance = 0;
2515 :
2516 : /*
2517 : * Balance and update limit values for autovacuum workers. We must do
2518 : * this periodically, as the number of workers across which we are
2519 : * balancing the limit may have changed.
2520 : *
2521 : * TODO: There may be better criteria for determining when to do this
2522 : * besides "check after napping".
2523 : */
2524 5394 : AutoVacuumUpdateCostLimit();
2525 :
2526 : /* Might have gotten an interrupt while sleeping */
2527 5394 : CHECK_FOR_INTERRUPTS();
2528 : }
2529 : }
2530 :
2531 : /*
2532 : * Computes the vacuum delay for parallel workers.
2533 : *
2534 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2535 : * worker to sleep in proportion to the share of work it's done. We achieve this
2536 : * by allowing all parallel vacuum workers including the leader process to
2537 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2538 : * We allow each worker to update it as and when it has incurred any cost and
2539 : * then based on that decide whether it needs to sleep. We compute the time
2540 : * to sleep for a worker based on the cost it has incurred
2541 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2542 : * that amount. This avoids putting to sleep those workers which have done less
2543 : * I/O than other workers and therefore ensure that workers
2544 : * which are doing more I/O got throttled more.
2545 : *
2546 : * We allow a worker to sleep only if it has performed I/O above a certain
2547 : * threshold, which is calculated based on the number of active workers
2548 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2549 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2550 : * the required throttling if we force a worker that has done more than 50%
2551 : * of its share of work to sleep.
2552 : */
2553 : static double
2554 0 : compute_parallel_delay(void)
2555 : {
2556 0 : double msec = 0;
2557 : uint32 shared_balance;
2558 : int nworkers;
2559 :
2560 : /* Parallel vacuum must be active */
2561 : Assert(VacuumSharedCostBalance);
2562 :
2563 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2564 :
2565 : /* At least count itself */
2566 : Assert(nworkers >= 1);
2567 :
2568 : /* Update the shared cost balance value atomically */
2569 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2570 :
2571 : /* Compute the total local balance for the current worker */
2572 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2573 :
2574 0 : if ((shared_balance >= vacuum_cost_limit) &&
2575 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2576 : {
2577 : /* Compute sleep time based on the local cost balance */
2578 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2579 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2580 0 : VacuumCostBalanceLocal = 0;
2581 : }
2582 :
2583 : /*
2584 : * Reset the local balance as we accumulated it into the shared value.
2585 : */
2586 0 : VacuumCostBalance = 0;
2587 :
2588 0 : return msec;
2589 : }
2590 :
2591 : /*
2592 : * A wrapper function of defGetBoolean().
2593 : *
2594 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2595 : * of true and false.
2596 : */
2597 : static VacOptValue
2598 322 : get_vacoptval_from_boolean(DefElem *def)
2599 : {
2600 322 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2601 : }
2602 :
2603 : /*
2604 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2605 : *
2606 : * Returns bulk delete stats derived from input stats
2607 : */
2608 : IndexBulkDeleteResult *
2609 2392 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2610 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2611 : {
2612 : /* Do bulk deletion */
2613 2392 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2614 : dead_items);
2615 :
2616 2392 : ereport(ivinfo->message_level,
2617 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2618 : RelationGetRelationName(ivinfo->index),
2619 : dead_items_info->num_items)));
2620 :
2621 2392 : return istat;
2622 : }
2623 :
2624 : /*
2625 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2626 : *
2627 : * Returns bulk delete stats derived from input stats
2628 : */
2629 : IndexBulkDeleteResult *
2630 151658 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2631 : {
2632 151658 : istat = index_vacuum_cleanup(ivinfo, istat);
2633 :
2634 151658 : if (istat)
2635 2652 : ereport(ivinfo->message_level,
2636 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2637 : RelationGetRelationName(ivinfo->index),
2638 : istat->num_index_tuples,
2639 : istat->num_pages),
2640 : errdetail("%.0f index row versions were removed.\n"
2641 : "%u index pages were newly deleted.\n"
2642 : "%u index pages are currently deleted, of which %u are currently reusable.",
2643 : istat->tuples_removed,
2644 : istat->pages_newly_deleted,
2645 : istat->pages_deleted, istat->pages_free)));
2646 :
2647 151658 : return istat;
2648 : }
2649 :
2650 : /*
2651 : * vac_tid_reaped() -- is a particular tid deletable?
2652 : *
2653 : * This has the right signature to be an IndexBulkDeleteCallback.
2654 : */
2655 : static bool
2656 6843446 : vac_tid_reaped(ItemPointer itemptr, void *state)
2657 : {
2658 6843446 : TidStore *dead_items = (TidStore *) state;
2659 :
2660 6843446 : return TidStoreIsMember(dead_items, itemptr);
2661 : }
|