Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/memutils.h"
60 : #include "utils/snapmgr.h"
61 : #include "utils/syscache.h"
62 :
63 : /*
64 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
65 : * This aims to avoid sending too many messages and waking up the leader too
66 : * frequently.
67 : */
68 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
69 :
70 : /*
71 : * GUC parameters
72 : */
73 : int vacuum_freeze_min_age;
74 : int vacuum_freeze_table_age;
75 : int vacuum_multixact_freeze_min_age;
76 : int vacuum_multixact_freeze_table_age;
77 : int vacuum_failsafe_age;
78 : int vacuum_multixact_failsafe_age;
79 : double vacuum_max_eager_freeze_failure_rate;
80 : bool track_cost_delay_timing;
81 : bool vacuum_truncate;
82 :
83 : /*
84 : * Variables for cost-based vacuum delay. The defaults differ between
85 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
86 : * vacuum code. They are initialized here to the defaults for client backends
87 : * executing VACUUM or ANALYZE.
88 : */
89 : double vacuum_cost_delay = 0;
90 : int vacuum_cost_limit = 200;
91 :
92 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
93 : int64 parallel_vacuum_worker_delay_ns = 0;
94 :
95 : /*
96 : * VacuumFailsafeActive is a defined as a global so that we can determine
97 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
98 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
99 : * for the table until after vacuuming has completed, regardless of other
100 : * settings.
101 : *
102 : * Only VACUUM code should inspect this variable and only table access methods
103 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
104 : * inspected to determine whether or not to allow cost-based delays. Table AMs
105 : * are free to set it if they desire this behavior, but it is false by default
106 : * and reset to false in between vacuuming each relation.
107 : */
108 : bool VacuumFailsafeActive = false;
109 :
110 : /*
111 : * Variables for cost-based parallel vacuum. See comments atop
112 : * compute_parallel_delay to understand how it works.
113 : */
114 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
115 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
116 : int VacuumCostBalanceLocal = 0;
117 :
118 : /* non-export function prototypes */
119 : static List *expand_vacuum_rel(VacuumRelation *vrel,
120 : MemoryContext vac_context, int options);
121 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
122 : static void vac_truncate_clog(TransactionId frozenXID,
123 : MultiXactId minMulti,
124 : TransactionId lastSaneFrozenXid,
125 : MultiXactId lastSaneMinMulti);
126 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
127 : BufferAccessStrategy bstrategy);
128 : static double compute_parallel_delay(void);
129 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
130 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
131 :
132 : /*
133 : * GUC check function to ensure GUC value specified is within the allowable
134 : * range.
135 : */
136 : bool
137 2168 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
138 : GucSource source)
139 : {
140 : /* Value upper and lower hard limits are inclusive */
141 2168 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
142 2168 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
143 2168 : return true;
144 :
145 : /* Value does not fall within any allowable range */
146 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
147 : "vacuum_buffer_usage_limit",
148 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
149 :
150 0 : return false;
151 : }
152 :
153 : /*
154 : * Primary entry point for manual VACUUM and ANALYZE commands
155 : *
156 : * This is mainly a preparation wrapper for the real operations that will
157 : * happen in vacuum().
158 : */
159 : void
160 13690 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
161 : {
162 : VacuumParams params;
163 13690 : BufferAccessStrategy bstrategy = NULL;
164 13690 : bool verbose = false;
165 13690 : bool skip_locked = false;
166 13690 : bool analyze = false;
167 13690 : bool freeze = false;
168 13690 : bool full = false;
169 13690 : bool disable_page_skipping = false;
170 13690 : bool process_main = true;
171 13690 : bool process_toast = true;
172 : int ring_size;
173 13690 : bool skip_database_stats = false;
174 13690 : bool only_database_stats = false;
175 : MemoryContext vac_context;
176 : ListCell *lc;
177 :
178 : /* index_cleanup and truncate values unspecified for now */
179 13690 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
180 13690 : params.truncate = VACOPTVALUE_UNSPECIFIED;
181 :
182 : /* By default parallel vacuum is enabled */
183 13690 : params.nworkers = 0;
184 :
185 : /* Will be set later if we recurse to a TOAST table. */
186 13690 : params.toast_parent = InvalidOid;
187 :
188 : /*
189 : * Set this to an invalid value so it is clear whether or not a
190 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
191 : */
192 13690 : ring_size = -1;
193 :
194 : /* Parse options list */
195 28262 : foreach(lc, vacstmt->options)
196 : {
197 14608 : DefElem *opt = (DefElem *) lfirst(lc);
198 :
199 : /* Parse common options for VACUUM and ANALYZE */
200 14608 : if (strcmp(opt->defname, "verbose") == 0)
201 38 : verbose = defGetBoolean(opt);
202 14570 : else if (strcmp(opt->defname, "skip_locked") == 0)
203 334 : skip_locked = defGetBoolean(opt);
204 14236 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
205 : {
206 : const char *hintmsg;
207 : int result;
208 : char *vac_buffer_size;
209 :
210 54 : vac_buffer_size = defGetString(opt);
211 :
212 : /*
213 : * Check that the specified value is valid and the size falls
214 : * within the hard upper and lower limits if it is not 0.
215 : */
216 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
217 48 : (result != 0 &&
218 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
219 : {
220 18 : ereport(ERROR,
221 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
222 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
223 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
224 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
225 : }
226 :
227 36 : ring_size = result;
228 : }
229 14182 : else if (!vacstmt->is_vacuumcmd)
230 6 : ereport(ERROR,
231 : (errcode(ERRCODE_SYNTAX_ERROR),
232 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
233 : parser_errposition(pstate, opt->location)));
234 :
235 : /* Parse options available on VACUUM */
236 14176 : else if (strcmp(opt->defname, "analyze") == 0)
237 2750 : analyze = defGetBoolean(opt);
238 11426 : else if (strcmp(opt->defname, "freeze") == 0)
239 2548 : freeze = defGetBoolean(opt);
240 8878 : else if (strcmp(opt->defname, "full") == 0)
241 386 : full = defGetBoolean(opt);
242 8492 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
243 214 : disable_page_skipping = defGetBoolean(opt);
244 8278 : else if (strcmp(opt->defname, "index_cleanup") == 0)
245 : {
246 : /* Interpret no string as the default, which is 'auto' */
247 174 : if (!opt->arg)
248 0 : params.index_cleanup = VACOPTVALUE_AUTO;
249 : else
250 : {
251 174 : char *sval = defGetString(opt);
252 :
253 : /* Try matching on 'auto' string, or fall back on boolean */
254 174 : if (pg_strcasecmp(sval, "auto") == 0)
255 6 : params.index_cleanup = VACOPTVALUE_AUTO;
256 : else
257 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
258 : }
259 : }
260 8104 : else if (strcmp(opt->defname, "process_main") == 0)
261 154 : process_main = defGetBoolean(opt);
262 7950 : else if (strcmp(opt->defname, "process_toast") == 0)
263 160 : process_toast = defGetBoolean(opt);
264 7790 : else if (strcmp(opt->defname, "truncate") == 0)
265 158 : params.truncate = get_vacoptval_from_boolean(opt);
266 7632 : else if (strcmp(opt->defname, "parallel") == 0)
267 : {
268 350 : if (opt->arg == NULL)
269 : {
270 6 : ereport(ERROR,
271 : (errcode(ERRCODE_SYNTAX_ERROR),
272 : errmsg("parallel option requires a value between 0 and %d",
273 : MAX_PARALLEL_WORKER_LIMIT),
274 : parser_errposition(pstate, opt->location)));
275 : }
276 : else
277 : {
278 : int nworkers;
279 :
280 344 : nworkers = defGetInt32(opt);
281 344 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
282 6 : ereport(ERROR,
283 : (errcode(ERRCODE_SYNTAX_ERROR),
284 : errmsg("parallel workers for vacuum must be between 0 and %d",
285 : MAX_PARALLEL_WORKER_LIMIT),
286 : parser_errposition(pstate, opt->location)));
287 :
288 : /*
289 : * Disable parallel vacuum, if user has specified parallel
290 : * degree as zero.
291 : */
292 338 : if (nworkers == 0)
293 154 : params.nworkers = -1;
294 : else
295 184 : params.nworkers = nworkers;
296 : }
297 : }
298 7282 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
299 7118 : skip_database_stats = defGetBoolean(opt);
300 164 : else if (strcmp(opt->defname, "only_database_stats") == 0)
301 164 : only_database_stats = defGetBoolean(opt);
302 : else
303 0 : ereport(ERROR,
304 : (errcode(ERRCODE_SYNTAX_ERROR),
305 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
306 : parser_errposition(pstate, opt->location)));
307 : }
308 :
309 : /* Set vacuum options */
310 13654 : params.options =
311 13654 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
312 13654 : (verbose ? VACOPT_VERBOSE : 0) |
313 13654 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
314 13654 : (analyze ? VACOPT_ANALYZE : 0) |
315 13654 : (freeze ? VACOPT_FREEZE : 0) |
316 13654 : (full ? VACOPT_FULL : 0) |
317 13654 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
318 13654 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
319 13654 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
320 13654 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
321 13654 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
322 :
323 : /* sanity checks on options */
324 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
325 : Assert((params.options & VACOPT_VACUUM) ||
326 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
327 :
328 13654 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
329 6 : ereport(ERROR,
330 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
331 : errmsg("VACUUM FULL cannot be performed in parallel")));
332 :
333 : /*
334 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
335 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
336 : * we'll permit that.
337 : */
338 13648 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
339 6 : !(params.options & VACOPT_ANALYZE))
340 6 : ereport(ERROR,
341 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
342 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
343 :
344 : /*
345 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
346 : */
347 13642 : if (!(params.options & VACOPT_ANALYZE))
348 : {
349 12246 : foreach(lc, vacstmt->rels)
350 : {
351 6010 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
352 :
353 6010 : if (vrel->va_cols != NIL)
354 6 : ereport(ERROR,
355 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
356 : errmsg("ANALYZE option must be specified when a column list is provided")));
357 : }
358 : }
359 :
360 :
361 : /*
362 : * Sanity check DISABLE_PAGE_SKIPPING option.
363 : */
364 13636 : if ((params.options & VACOPT_FULL) != 0 &&
365 362 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
366 0 : ereport(ERROR,
367 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
368 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
369 :
370 : /* sanity check for PROCESS_TOAST */
371 13636 : if ((params.options & VACOPT_FULL) != 0 &&
372 362 : (params.options & VACOPT_PROCESS_TOAST) == 0)
373 6 : ereport(ERROR,
374 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
375 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
376 :
377 : /* sanity check for ONLY_DATABASE_STATS */
378 13630 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
379 : {
380 : Assert(params.options & VACOPT_VACUUM);
381 164 : if (vacstmt->rels != NIL)
382 6 : ereport(ERROR,
383 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
385 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
386 158 : if (params.options & ~(VACOPT_VACUUM |
387 : VACOPT_VERBOSE |
388 : VACOPT_PROCESS_MAIN |
389 : VACOPT_PROCESS_TOAST |
390 : VACOPT_ONLY_DATABASE_STATS))
391 0 : ereport(ERROR,
392 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
393 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
394 : }
395 :
396 : /*
397 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
398 : * them as -1 which means to use the default values.
399 : */
400 13624 : if (params.options & VACOPT_FREEZE)
401 : {
402 2548 : params.freeze_min_age = 0;
403 2548 : params.freeze_table_age = 0;
404 2548 : params.multixact_freeze_min_age = 0;
405 2548 : params.multixact_freeze_table_age = 0;
406 : }
407 : else
408 : {
409 11076 : params.freeze_min_age = -1;
410 11076 : params.freeze_table_age = -1;
411 11076 : params.multixact_freeze_min_age = -1;
412 11076 : params.multixact_freeze_table_age = -1;
413 : }
414 :
415 : /* user-invoked vacuum is never "for wraparound" */
416 13624 : params.is_wraparound = false;
417 :
418 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
419 13624 : params.log_min_duration = -1;
420 :
421 : /*
422 : * Later, in vacuum_rel(), we check if a reloption override was specified.
423 : */
424 13624 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
425 :
426 : /*
427 : * Create special memory context for cross-transaction storage.
428 : *
429 : * Since it is a child of PortalContext, it will go away eventually even
430 : * if we suffer an error; there's no need for special abort cleanup logic.
431 : */
432 13624 : vac_context = AllocSetContextCreate(PortalContext,
433 : "Vacuum",
434 : ALLOCSET_DEFAULT_SIZES);
435 :
436 : /*
437 : * Make a buffer strategy object in the cross-transaction memory context.
438 : * We needn't bother making this for VACUUM (FULL) or VACUUM
439 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
440 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
441 : * when we see ANALYZE.
442 : */
443 13624 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
444 514 : VACOPT_FULL)) == 0 ||
445 514 : (params.options & VACOPT_ANALYZE) != 0)
446 : {
447 :
448 13116 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
449 :
450 : Assert(ring_size >= -1);
451 :
452 : /*
453 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
454 : * command, it overrides the value of VacuumBufferUsageLimit. Either
455 : * value may be 0, in which case GetAccessStrategyWithSize() will
456 : * return NULL, effectively allowing full use of shared buffers.
457 : */
458 13116 : if (ring_size == -1)
459 13086 : ring_size = VacuumBufferUsageLimit;
460 :
461 13116 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
462 :
463 13116 : MemoryContextSwitchTo(old_context);
464 : }
465 :
466 : /* Now go through the common routine */
467 13624 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
468 :
469 : /* Finally, clean up the vacuum memory context */
470 13492 : MemoryContextDelete(vac_context);
471 13492 : }
472 :
473 : /*
474 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
475 : *
476 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
477 : * we process all relevant tables in the database. For each VacuumRelation,
478 : * if a valid OID is supplied, the table with that OID is what to process;
479 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
480 : *
481 : * params contains a set of parameters that can be used to customize the
482 : * behavior.
483 : *
484 : * bstrategy may be passed in as NULL when the caller does not want to
485 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
486 : * otherwise, the caller must build a BufferAccessStrategy with the number of
487 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
488 : * using.
489 : *
490 : * isTopLevel should be passed down from ProcessUtility.
491 : *
492 : * It is the caller's responsibility that all parameters are allocated in a
493 : * memory context that will not disappear at transaction commit.
494 : */
495 : void
496 108586 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
497 : MemoryContext vac_context, bool isTopLevel)
498 : {
499 : static bool in_vacuum = false;
500 :
501 : const char *stmttype;
502 : volatile bool in_outer_xact,
503 : use_own_xacts;
504 :
505 : Assert(params != NULL);
506 :
507 108586 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
508 :
509 : /*
510 : * We cannot run VACUUM inside a user transaction block; if we were inside
511 : * a transaction, then our commit- and start-transaction-command calls
512 : * would not have the intended effect! There are numerous other subtle
513 : * dependencies on this, too.
514 : *
515 : * ANALYZE (without VACUUM) can run either way.
516 : */
517 108586 : if (params->options & VACOPT_VACUUM)
518 : {
519 103652 : PreventInTransactionBlock(isTopLevel, stmttype);
520 103632 : in_outer_xact = false;
521 : }
522 : else
523 4934 : in_outer_xact = IsInTransactionBlock(isTopLevel);
524 :
525 : /*
526 : * Check for and disallow recursive calls. This could happen when VACUUM
527 : * FULL or ANALYZE calls a hostile index expression that itself calls
528 : * ANALYZE.
529 : */
530 108566 : if (in_vacuum)
531 12 : ereport(ERROR,
532 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
533 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
534 : stmttype)));
535 :
536 : /*
537 : * Build list of relation(s) to process, putting any new data in
538 : * vac_context for safekeeping.
539 : */
540 108554 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
541 : {
542 : /* We don't process any tables in this case */
543 : Assert(relations == NIL);
544 : }
545 108396 : else if (relations != NIL)
546 : {
547 108182 : List *newrels = NIL;
548 : ListCell *lc;
549 :
550 216460 : foreach(lc, relations)
551 : {
552 108314 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
553 : List *sublist;
554 : MemoryContext old_context;
555 :
556 108314 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
557 108278 : old_context = MemoryContextSwitchTo(vac_context);
558 108278 : newrels = list_concat(newrels, sublist);
559 108278 : MemoryContextSwitchTo(old_context);
560 : }
561 108146 : relations = newrels;
562 : }
563 : else
564 214 : relations = get_all_vacuum_rels(vac_context, params->options);
565 :
566 : /*
567 : * Decide whether we need to start/commit our own transactions.
568 : *
569 : * For VACUUM (with or without ANALYZE): always do so, so that we can
570 : * release locks as soon as possible. (We could possibly use the outer
571 : * transaction for a one-table VACUUM, but handling TOAST tables would be
572 : * problematic.)
573 : *
574 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
575 : * start/commit our own transactions. Also, there's no need to do so if
576 : * only processing one relation. For multiple relations when not within a
577 : * transaction block, and also in an autovacuum worker, use own
578 : * transactions so we can release locks sooner.
579 : */
580 108518 : if (params->options & VACOPT_VACUUM)
581 103620 : use_own_xacts = true;
582 : else
583 : {
584 : Assert(params->options & VACOPT_ANALYZE);
585 4898 : if (AmAutoVacuumWorkerProcess())
586 284 : use_own_xacts = true;
587 4614 : else if (in_outer_xact)
588 238 : use_own_xacts = false;
589 4376 : else if (list_length(relations) > 1)
590 766 : use_own_xacts = true;
591 : else
592 3610 : use_own_xacts = false;
593 : }
594 :
595 : /*
596 : * vacuum_rel expects to be entered with no transaction active; it will
597 : * start and commit its own transaction. But we are called by an SQL
598 : * command, and so we are executing inside a transaction already. We
599 : * commit the transaction started in PostgresMain() here, and start
600 : * another one before exiting to match the commit waiting for us back in
601 : * PostgresMain().
602 : */
603 108518 : if (use_own_xacts)
604 : {
605 : Assert(!in_outer_xact);
606 :
607 : /* ActiveSnapshot is not set by autovacuum */
608 104670 : if (ActiveSnapshotSet())
609 9708 : PopActiveSnapshot();
610 :
611 : /* matches the StartTransaction in PostgresMain() */
612 104670 : CommitTransactionCommand();
613 : }
614 :
615 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
616 108518 : PG_TRY();
617 : {
618 : ListCell *cur;
619 :
620 108518 : in_vacuum = true;
621 108518 : VacuumFailsafeActive = false;
622 108518 : VacuumUpdateCosts();
623 108518 : VacuumCostBalance = 0;
624 108518 : VacuumCostBalanceLocal = 0;
625 108518 : VacuumSharedCostBalance = NULL;
626 108518 : VacuumActiveNWorkers = NULL;
627 :
628 : /*
629 : * Loop to process each selected relation.
630 : */
631 233670 : foreach(cur, relations)
632 : {
633 125216 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
634 :
635 125216 : if (params->options & VACOPT_VACUUM)
636 : {
637 112268 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
638 100 : continue;
639 : }
640 :
641 125110 : if (params->options & VACOPT_ANALYZE)
642 : {
643 : /*
644 : * If using separate xacts, start one for analyze. Otherwise,
645 : * we can use the outer transaction.
646 : */
647 16008 : if (use_own_xacts)
648 : {
649 12210 : StartTransactionCommand();
650 : /* functions in indexes may want a snapshot set */
651 12210 : PushActiveSnapshot(GetTransactionSnapshot());
652 : }
653 :
654 16008 : analyze_rel(vrel->oid, vrel->relation, params,
655 : vrel->va_cols, in_outer_xact, bstrategy);
656 :
657 15950 : if (use_own_xacts)
658 : {
659 12172 : PopActiveSnapshot();
660 : /* standard_ProcessUtility() does CCI if !use_own_xacts */
661 12172 : CommandCounterIncrement();
662 12172 : CommitTransactionCommand();
663 : }
664 : else
665 : {
666 : /*
667 : * If we're not using separate xacts, better separate the
668 : * ANALYZE actions with CCIs. This avoids trouble if user
669 : * says "ANALYZE t, t".
670 : */
671 3778 : CommandCounterIncrement();
672 : }
673 : }
674 :
675 : /*
676 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
677 : * next relation.
678 : */
679 125052 : VacuumFailsafeActive = false;
680 : }
681 : }
682 64 : PG_FINALLY();
683 : {
684 108518 : in_vacuum = false;
685 108518 : VacuumCostActive = false;
686 108518 : VacuumFailsafeActive = false;
687 108518 : VacuumCostBalance = 0;
688 : }
689 108518 : PG_END_TRY();
690 :
691 : /*
692 : * Finish up processing.
693 : */
694 108454 : if (use_own_xacts)
695 : {
696 : /* here, we are not in a transaction */
697 :
698 : /*
699 : * This matches the CommitTransaction waiting for us in
700 : * PostgresMain().
701 : */
702 104626 : StartTransactionCommand();
703 : }
704 :
705 108454 : if ((params->options & VACOPT_VACUUM) &&
706 103588 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
707 : {
708 : /*
709 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
710 : */
711 1794 : vac_update_datfrozenxid();
712 : }
713 :
714 108454 : }
715 :
716 : /*
717 : * Check if the current user has privileges to vacuum or analyze the relation.
718 : * If not, issue a WARNING log message and return false to let the caller
719 : * decide what to do with this relation. This routine is used to decide if a
720 : * relation can be processed for VACUUM or ANALYZE.
721 : */
722 : bool
723 168600 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
724 : bits32 options)
725 : {
726 : char *relname;
727 :
728 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
729 :
730 : /*----------
731 : * A role has privileges to vacuum or analyze the relation if any of the
732 : * following are true:
733 : * - the role owns the current database and the relation is not shared
734 : * - the role has the MAINTAIN privilege on the relation
735 : *----------
736 : */
737 168600 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
738 194028 : !reltuple->relisshared) ||
739 28974 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
740 165530 : return true;
741 :
742 3070 : relname = NameStr(reltuple->relname);
743 :
744 3070 : if ((options & VACOPT_VACUUM) != 0)
745 : {
746 224 : ereport(WARNING,
747 : (errmsg("permission denied to vacuum \"%s\", skipping it",
748 : relname)));
749 :
750 : /*
751 : * For VACUUM ANALYZE, both logs could show up, but just generate
752 : * information for VACUUM as that would be the first one to be
753 : * processed.
754 : */
755 224 : return false;
756 : }
757 :
758 2846 : if ((options & VACOPT_ANALYZE) != 0)
759 2846 : ereport(WARNING,
760 : (errmsg("permission denied to analyze \"%s\", skipping it",
761 : relname)));
762 :
763 2846 : return false;
764 : }
765 :
766 :
767 : /*
768 : * vacuum_open_relation
769 : *
770 : * This routine is used for attempting to open and lock a relation which
771 : * is going to be vacuumed or analyzed. If the relation cannot be opened
772 : * or locked, a log is emitted if possible.
773 : */
774 : Relation
775 137442 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
776 : bool verbose, LOCKMODE lmode)
777 : {
778 : Relation rel;
779 137442 : bool rel_lock = true;
780 : int elevel;
781 :
782 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
783 :
784 : /*
785 : * Open the relation and get the appropriate lock on it.
786 : *
787 : * There's a race condition here: the relation may have gone away since
788 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
789 : *
790 : * If we've been asked not to wait for the relation lock, acquire it first
791 : * in non-blocking mode, before calling try_relation_open().
792 : */
793 137442 : if (!(options & VACOPT_SKIP_LOCKED))
794 136266 : rel = try_relation_open(relid, lmode);
795 1176 : else if (ConditionalLockRelationOid(relid, lmode))
796 1152 : rel = try_relation_open(relid, NoLock);
797 : else
798 : {
799 24 : rel = NULL;
800 24 : rel_lock = false;
801 : }
802 :
803 : /* if relation is opened, leave */
804 137442 : if (rel)
805 137406 : return rel;
806 :
807 : /*
808 : * Relation could not be opened, hence generate if possible a log
809 : * informing on the situation.
810 : *
811 : * If the RangeVar is not defined, we do not have enough information to
812 : * provide a meaningful log statement. Chances are that the caller has
813 : * intentionally not provided this information so that this logging is
814 : * skipped, anyway.
815 : */
816 36 : if (relation == NULL)
817 18 : return NULL;
818 :
819 : /*
820 : * Determine the log level.
821 : *
822 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
823 : * statements in the permission checks; otherwise, only log if the caller
824 : * so requested.
825 : */
826 18 : if (!AmAutoVacuumWorkerProcess())
827 14 : elevel = WARNING;
828 4 : else if (verbose)
829 4 : elevel = LOG;
830 : else
831 0 : return NULL;
832 :
833 18 : if ((options & VACOPT_VACUUM) != 0)
834 : {
835 10 : if (!rel_lock)
836 6 : ereport(elevel,
837 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
838 : errmsg("skipping vacuum of \"%s\" --- lock not available",
839 : relation->relname)));
840 : else
841 4 : ereport(elevel,
842 : (errcode(ERRCODE_UNDEFINED_TABLE),
843 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
844 : relation->relname)));
845 :
846 : /*
847 : * For VACUUM ANALYZE, both logs could show up, but just generate
848 : * information for VACUUM as that would be the first one to be
849 : * processed.
850 : */
851 10 : return NULL;
852 : }
853 :
854 8 : if ((options & VACOPT_ANALYZE) != 0)
855 : {
856 8 : if (!rel_lock)
857 6 : ereport(elevel,
858 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
859 : errmsg("skipping analyze of \"%s\" --- lock not available",
860 : relation->relname)));
861 : else
862 2 : ereport(elevel,
863 : (errcode(ERRCODE_UNDEFINED_TABLE),
864 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
865 : relation->relname)));
866 : }
867 :
868 8 : return NULL;
869 : }
870 :
871 :
872 : /*
873 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
874 : * and optionally add VacuumRelations for partitions or inheritance children.
875 : *
876 : * If a VacuumRelation does not have an OID supplied and is a partitioned
877 : * table, an extra entry will be added to the output for each partition.
878 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
879 : * it does not want us to expand partitioned tables.
880 : *
881 : * We take care not to modify the input data structure, but instead build
882 : * new VacuumRelation(s) to return. (But note that they will reference
883 : * unmodified parts of the input, eg column lists.) New data structures
884 : * are made in vac_context.
885 : */
886 : static List *
887 108314 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
888 : int options)
889 : {
890 108314 : List *vacrels = NIL;
891 : MemoryContext oldcontext;
892 :
893 : /* If caller supplied OID, there's nothing we need do here. */
894 108314 : if (OidIsValid(vrel->oid))
895 : {
896 94962 : oldcontext = MemoryContextSwitchTo(vac_context);
897 94962 : vacrels = lappend(vacrels, vrel);
898 94962 : MemoryContextSwitchTo(oldcontext);
899 : }
900 : else
901 : {
902 : /*
903 : * Process a specific relation, and possibly partitions or child
904 : * tables thereof.
905 : */
906 : Oid relid;
907 : HeapTuple tuple;
908 : Form_pg_class classForm;
909 : bool include_children;
910 : bool is_partitioned_table;
911 : int rvr_opts;
912 :
913 : /*
914 : * Since autovacuum workers supply OIDs when calling vacuum(), no
915 : * autovacuum worker should reach this code.
916 : */
917 : Assert(!AmAutoVacuumWorkerProcess());
918 :
919 : /*
920 : * We transiently take AccessShareLock to protect the syscache lookup
921 : * below, as well as find_all_inheritors's expectation that the caller
922 : * holds some lock on the starting relation.
923 : */
924 13352 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
925 13352 : relid = RangeVarGetRelidExtended(vrel->relation,
926 : AccessShareLock,
927 : rvr_opts,
928 : NULL, NULL);
929 :
930 : /*
931 : * If the lock is unavailable, emit the same log statement that
932 : * vacuum_rel() and analyze_rel() would.
933 : */
934 13316 : if (!OidIsValid(relid))
935 : {
936 8 : if (options & VACOPT_VACUUM)
937 6 : ereport(WARNING,
938 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
939 : errmsg("skipping vacuum of \"%s\" --- lock not available",
940 : vrel->relation->relname)));
941 : else
942 2 : ereport(WARNING,
943 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
944 : errmsg("skipping analyze of \"%s\" --- lock not available",
945 : vrel->relation->relname)));
946 8 : return vacrels;
947 : }
948 :
949 : /*
950 : * To check whether the relation is a partitioned table and its
951 : * ownership, fetch its syscache entry.
952 : */
953 13308 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
954 13308 : if (!HeapTupleIsValid(tuple))
955 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
956 13308 : classForm = (Form_pg_class) GETSTRUCT(tuple);
957 :
958 : /*
959 : * Make a returnable VacuumRelation for this rel if the user has the
960 : * required privileges.
961 : */
962 13308 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
963 : {
964 13076 : oldcontext = MemoryContextSwitchTo(vac_context);
965 13076 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
966 : relid,
967 : vrel->va_cols));
968 13076 : MemoryContextSwitchTo(oldcontext);
969 : }
970 :
971 : /*
972 : * Vacuuming a partitioned table with ONLY will not do anything since
973 : * the partitioned table itself is empty. Issue a warning if the user
974 : * requests this.
975 : */
976 13308 : include_children = vrel->relation->inh;
977 13308 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
978 13308 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
979 6 : ereport(WARNING,
980 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
981 : vrel->relation->relname)));
982 :
983 13308 : ReleaseSysCache(tuple);
984 :
985 : /*
986 : * Unless the user has specified ONLY, make relation list entries for
987 : * its partitions or inheritance child tables. Note that the list
988 : * returned by find_all_inheritors() includes the passed-in OID, so we
989 : * have to skip that. There's no point in taking locks on the
990 : * individual partitions or child tables yet, and doing so would just
991 : * add unnecessary deadlock risk. For this last reason, we do not yet
992 : * check the ownership of the partitions/tables, which get added to
993 : * the list to process. Ownership will be checked later on anyway.
994 : */
995 13308 : if (include_children)
996 : {
997 13278 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
998 : ListCell *part_lc;
999 :
1000 28626 : foreach(part_lc, part_oids)
1001 : {
1002 15348 : Oid part_oid = lfirst_oid(part_lc);
1003 :
1004 15348 : if (part_oid == relid)
1005 13278 : continue; /* ignore original table */
1006 :
1007 : /*
1008 : * We omit a RangeVar since it wouldn't be appropriate to
1009 : * complain about failure to open one of these relations
1010 : * later.
1011 : */
1012 2070 : oldcontext = MemoryContextSwitchTo(vac_context);
1013 2070 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1014 : part_oid,
1015 : vrel->va_cols));
1016 2070 : MemoryContextSwitchTo(oldcontext);
1017 : }
1018 : }
1019 :
1020 : /*
1021 : * Release lock again. This means that by the time we actually try to
1022 : * process the table, it might be gone or renamed. In the former case
1023 : * we'll silently ignore it; in the latter case we'll process it
1024 : * anyway, but we must beware that the RangeVar doesn't necessarily
1025 : * identify it anymore. This isn't ideal, perhaps, but there's little
1026 : * practical alternative, since we're typically going to commit this
1027 : * transaction and begin a new one between now and then. Moreover,
1028 : * holding locks on multiple relations would create significant risk
1029 : * of deadlock.
1030 : */
1031 13308 : UnlockRelationOid(relid, AccessShareLock);
1032 : }
1033 :
1034 108270 : return vacrels;
1035 : }
1036 :
1037 : /*
1038 : * Construct a list of VacuumRelations for all vacuumable rels in
1039 : * the current database. The list is built in vac_context.
1040 : */
1041 : static List *
1042 214 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1043 : {
1044 214 : List *vacrels = NIL;
1045 : Relation pgclass;
1046 : TableScanDesc scan;
1047 : HeapTuple tuple;
1048 :
1049 214 : pgclass = table_open(RelationRelationId, AccessShareLock);
1050 :
1051 214 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1052 :
1053 96644 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1054 : {
1055 96430 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1056 : MemoryContext oldcontext;
1057 96430 : Oid relid = classForm->oid;
1058 :
1059 : /*
1060 : * We include partitioned tables here; depending on which operation is
1061 : * to be performed, caller will decide whether to process or ignore
1062 : * them.
1063 : */
1064 96430 : if (classForm->relkind != RELKIND_RELATION &&
1065 78756 : classForm->relkind != RELKIND_MATVIEW &&
1066 78708 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1067 78544 : continue;
1068 :
1069 : /* check permissions of relation */
1070 17886 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1071 2730 : continue;
1072 :
1073 : /*
1074 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1075 : * We omit a RangeVar since it wouldn't be appropriate to complain
1076 : * about failure to open one of these relations later.
1077 : */
1078 15156 : oldcontext = MemoryContextSwitchTo(vac_context);
1079 15156 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1080 : relid,
1081 : NIL));
1082 15156 : MemoryContextSwitchTo(oldcontext);
1083 : }
1084 :
1085 214 : table_endscan(scan);
1086 214 : table_close(pgclass, AccessShareLock);
1087 :
1088 214 : return vacrels;
1089 : }
1090 :
1091 : /*
1092 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1093 : *
1094 : * The target relation and VACUUM parameters are our inputs.
1095 : *
1096 : * Output parameters are the cutoffs that VACUUM caller should use.
1097 : *
1098 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1099 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1100 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1101 : * minimum).
1102 : */
1103 : bool
1104 121182 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1105 : struct VacuumCutoffs *cutoffs)
1106 : {
1107 : int freeze_min_age,
1108 : multixact_freeze_min_age,
1109 : freeze_table_age,
1110 : multixact_freeze_table_age,
1111 : effective_multixact_freeze_max_age;
1112 : TransactionId nextXID,
1113 : safeOldestXmin,
1114 : aggressiveXIDCutoff;
1115 : MultiXactId nextMXID,
1116 : safeOldestMxact,
1117 : aggressiveMXIDCutoff;
1118 :
1119 : /* Use mutable copies of freeze age parameters */
1120 121182 : freeze_min_age = params->freeze_min_age;
1121 121182 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1122 121182 : freeze_table_age = params->freeze_table_age;
1123 121182 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1124 :
1125 : /* Set pg_class fields in cutoffs */
1126 121182 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1127 121182 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1128 :
1129 : /*
1130 : * Acquire OldestXmin.
1131 : *
1132 : * We can always ignore processes running lazy vacuum. This is because we
1133 : * use these values only for deciding which tuples we must keep in the
1134 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1135 : * XID assigned), it's safe to ignore it. In theory it could be
1136 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1137 : * that only one vacuum process can be working on a particular table at
1138 : * any time, and that each vacuum is always an independent transaction.
1139 : */
1140 121182 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1141 :
1142 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1143 :
1144 : /* Acquire OldestMxact */
1145 121182 : cutoffs->OldestMxact = GetOldestMultiXactId();
1146 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1147 :
1148 : /* Acquire next XID/next MXID values used to apply age-based settings */
1149 121182 : nextXID = ReadNextTransactionId();
1150 121182 : nextMXID = ReadNextMultiXactId();
1151 :
1152 : /*
1153 : * Also compute the multixact age for which freezing is urgent. This is
1154 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1155 : * short of multixact member space.
1156 : */
1157 121182 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1158 :
1159 : /*
1160 : * Almost ready to set freeze output parameters; check if OldestXmin or
1161 : * OldestMxact are held back to an unsafe degree before we start on that
1162 : */
1163 121182 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1164 121182 : if (!TransactionIdIsNormal(safeOldestXmin))
1165 0 : safeOldestXmin = FirstNormalTransactionId;
1166 121182 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1167 121182 : if (safeOldestMxact < FirstMultiXactId)
1168 0 : safeOldestMxact = FirstMultiXactId;
1169 121182 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1170 58156 : ereport(WARNING,
1171 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1172 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1173 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1174 121182 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1175 0 : ereport(WARNING,
1176 : (errmsg("cutoff for freezing multixacts is far in the past"),
1177 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1178 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1179 :
1180 : /*
1181 : * Determine the minimum freeze age to use: as specified by the caller, or
1182 : * vacuum_freeze_min_age, but in any case not more than half
1183 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1184 : * wraparound won't occur too frequently.
1185 : */
1186 121182 : if (freeze_min_age < 0)
1187 11192 : freeze_min_age = vacuum_freeze_min_age;
1188 121182 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1189 : Assert(freeze_min_age >= 0);
1190 :
1191 : /* Compute FreezeLimit, being careful to generate a normal XID */
1192 121182 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1193 121182 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1194 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1195 : /* FreezeLimit must always be <= OldestXmin */
1196 121182 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1197 83266 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1198 :
1199 : /*
1200 : * Determine the minimum multixact freeze age to use: as specified by
1201 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1202 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1203 : * prevent MultiXact wraparound won't occur too frequently.
1204 : */
1205 121182 : if (multixact_freeze_min_age < 0)
1206 11192 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1207 121182 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1208 : effective_multixact_freeze_max_age / 2);
1209 : Assert(multixact_freeze_min_age >= 0);
1210 :
1211 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1212 121182 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1213 121182 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1214 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1215 : /* MultiXactCutoff must always be <= OldestMxact */
1216 121182 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1217 6 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1218 :
1219 : /*
1220 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1221 : *
1222 : * Determine the table freeze age to use: as specified by the caller, or
1223 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1224 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1225 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1226 : * anti-wraparound autovacuum is launched.
1227 : */
1228 121182 : if (freeze_table_age < 0)
1229 11192 : freeze_table_age = vacuum_freeze_table_age;
1230 121182 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1231 : Assert(freeze_table_age >= 0);
1232 121182 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1233 121182 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1234 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1235 121182 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1236 : aggressiveXIDCutoff))
1237 110006 : return true;
1238 :
1239 : /*
1240 : * Similar to the above, determine the table freeze age to use for
1241 : * multixacts: as specified by the caller, or the value of the
1242 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1243 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1244 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1245 : * multixacts before anti-wraparound autovacuum is launched.
1246 : */
1247 11176 : if (multixact_freeze_table_age < 0)
1248 10974 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1249 11176 : multixact_freeze_table_age =
1250 11176 : Min(multixact_freeze_table_age,
1251 : effective_multixact_freeze_max_age * 0.95);
1252 : Assert(multixact_freeze_table_age >= 0);
1253 11176 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1254 11176 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1255 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1256 11176 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1257 : aggressiveMXIDCutoff))
1258 0 : return true;
1259 :
1260 : /* Non-aggressive VACUUM */
1261 11176 : return false;
1262 : }
1263 :
1264 : /*
1265 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1266 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1267 : * dangerously far in the past.
1268 : *
1269 : * When we return true, VACUUM caller triggers the failsafe.
1270 : */
1271 : bool
1272 124208 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1273 : {
1274 124208 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1275 124208 : MultiXactId relminmxid = cutoffs->relminmxid;
1276 : TransactionId xid_skip_limit;
1277 : MultiXactId multi_skip_limit;
1278 : int skip_index_vacuum;
1279 :
1280 : Assert(TransactionIdIsNormal(relfrozenxid));
1281 : Assert(MultiXactIdIsValid(relminmxid));
1282 :
1283 : /*
1284 : * Determine the index skipping age to use. In any case no less than
1285 : * autovacuum_freeze_max_age * 1.05.
1286 : */
1287 124208 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1288 :
1289 124208 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1290 124208 : if (!TransactionIdIsNormal(xid_skip_limit))
1291 0 : xid_skip_limit = FirstNormalTransactionId;
1292 :
1293 124208 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1294 : {
1295 : /* The table's relfrozenxid is too old */
1296 19238 : return true;
1297 : }
1298 :
1299 : /*
1300 : * Similar to above, determine the index skipping age to use for
1301 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1302 : * 1.05.
1303 : */
1304 104970 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1305 : autovacuum_multixact_freeze_max_age * 1.05);
1306 :
1307 104970 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1308 104970 : if (multi_skip_limit < FirstMultiXactId)
1309 0 : multi_skip_limit = FirstMultiXactId;
1310 :
1311 104970 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1312 : {
1313 : /* The table's relminmxid is too old */
1314 0 : return true;
1315 : }
1316 :
1317 104970 : return false;
1318 : }
1319 :
1320 : /*
1321 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1322 : *
1323 : * If we scanned the whole relation then we should just use the count of
1324 : * live tuples seen; but if we did not, we should not blindly extrapolate
1325 : * from that number, since VACUUM may have scanned a quite nonrandom
1326 : * subset of the table. When we have only partial information, we take
1327 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1328 : * of the tuple density in the unscanned pages.
1329 : *
1330 : * Note: scanned_tuples should count only *live* tuples, since
1331 : * pg_class.reltuples is defined that way.
1332 : */
1333 : double
1334 120624 : vac_estimate_reltuples(Relation relation,
1335 : BlockNumber total_pages,
1336 : BlockNumber scanned_pages,
1337 : double scanned_tuples)
1338 : {
1339 120624 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1340 120624 : double old_rel_tuples = relation->rd_rel->reltuples;
1341 : double old_density;
1342 : double unscanned_pages;
1343 : double total_tuples;
1344 :
1345 : /* If we did scan the whole table, just use the count as-is */
1346 120624 : if (scanned_pages >= total_pages)
1347 116440 : return scanned_tuples;
1348 :
1349 : /*
1350 : * When successive VACUUM commands scan the same few pages again and
1351 : * again, without anything from the table really changing, there is a risk
1352 : * that our beliefs about tuple density will gradually become distorted.
1353 : * This might be caused by vacuumlazy.c implementation details, such as
1354 : * its tendency to always scan the last heap page. Handle that here.
1355 : *
1356 : * If the relation is _exactly_ the same size according to the existing
1357 : * pg_class entry, and only a few of its pages (less than 2%) were
1358 : * scanned, keep the existing value of reltuples. Also keep the existing
1359 : * value when only a subset of rel's pages <= a single page were scanned.
1360 : *
1361 : * (Note: we might be returning -1 here.)
1362 : */
1363 4184 : if (old_rel_pages == total_pages &&
1364 4154 : scanned_pages < (double) total_pages * 0.02)
1365 2900 : return old_rel_tuples;
1366 1284 : if (scanned_pages <= 1)
1367 1012 : return old_rel_tuples;
1368 :
1369 : /*
1370 : * If old density is unknown, we can't do much except scale up
1371 : * scanned_tuples to match total_pages.
1372 : */
1373 272 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1374 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1375 :
1376 : /*
1377 : * Okay, we've covered the corner cases. The normal calculation is to
1378 : * convert the old measurement to a density (tuples per page), then
1379 : * estimate the number of tuples in the unscanned pages using that figure,
1380 : * and finally add on the number of tuples in the scanned pages.
1381 : */
1382 270 : old_density = old_rel_tuples / old_rel_pages;
1383 270 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1384 270 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1385 270 : return floor(total_tuples + 0.5);
1386 : }
1387 :
1388 :
1389 : /*
1390 : * vac_update_relstats() -- update statistics for one relation
1391 : *
1392 : * Update the whole-relation statistics that are kept in its pg_class
1393 : * row. There are additional stats that will be updated if we are
1394 : * doing ANALYZE, but we always update these stats. This routine works
1395 : * for both index and heap relation entries in pg_class.
1396 : *
1397 : * We violate transaction semantics here by overwriting the rel's
1398 : * existing pg_class tuple with the new values. This is reasonably
1399 : * safe as long as we're sure that the new values are correct whether or
1400 : * not this transaction commits. The reason for doing this is that if
1401 : * we updated these tuples in the usual way, vacuuming pg_class itself
1402 : * wouldn't work very well --- by the time we got done with a vacuum
1403 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1404 : * course, this only works for fixed-size not-null columns, but these are.
1405 : *
1406 : * Another reason for doing it this way is that when we are in a lazy
1407 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1408 : * Somebody vacuuming pg_class might think they could delete a tuple
1409 : * marked with xmin = our xid.
1410 : *
1411 : * In addition to fundamentally nontransactional statistics such as
1412 : * relpages and relallvisible, we try to maintain certain lazily-updated
1413 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1414 : * It's safe to do this in VACUUM, which can't run in parallel with
1415 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1416 : * However, it's *not* safe to do it in an ANALYZE that's within an
1417 : * outer transaction, because for example the current transaction might
1418 : * have dropped the last index; then we'd think relhasindex should be
1419 : * cleared, but if the transaction later rolls back this would be wrong.
1420 : * So we refrain from updating the DDL flags if we're inside an outer
1421 : * transaction. This is OK since postponing the flag maintenance is
1422 : * always allowable.
1423 : *
1424 : * Note: num_tuples should count only *live* tuples, since
1425 : * pg_class.reltuples is defined that way.
1426 : *
1427 : * This routine is shared by VACUUM and ANALYZE.
1428 : */
1429 : void
1430 161326 : vac_update_relstats(Relation relation,
1431 : BlockNumber num_pages, double num_tuples,
1432 : BlockNumber num_all_visible_pages,
1433 : BlockNumber num_all_frozen_pages,
1434 : bool hasindex, TransactionId frozenxid,
1435 : MultiXactId minmulti,
1436 : bool *frozenxid_updated, bool *minmulti_updated,
1437 : bool in_outer_xact)
1438 : {
1439 161326 : Oid relid = RelationGetRelid(relation);
1440 : Relation rd;
1441 : ScanKeyData key[1];
1442 : HeapTuple ctup;
1443 : void *inplace_state;
1444 : Form_pg_class pgcform;
1445 : bool dirty,
1446 : futurexid,
1447 : futuremxid;
1448 : TransactionId oldfrozenxid;
1449 : MultiXactId oldminmulti;
1450 :
1451 161326 : rd = table_open(RelationRelationId, RowExclusiveLock);
1452 :
1453 : /* Fetch a copy of the tuple to scribble on */
1454 161326 : ScanKeyInit(&key[0],
1455 : Anum_pg_class_oid,
1456 : BTEqualStrategyNumber, F_OIDEQ,
1457 : ObjectIdGetDatum(relid));
1458 161326 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1459 : NULL, 1, key, &ctup, &inplace_state);
1460 161326 : if (!HeapTupleIsValid(ctup))
1461 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1462 : relid);
1463 161326 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1464 :
1465 : /* Apply statistical updates, if any, to copied tuple */
1466 :
1467 161326 : dirty = false;
1468 161326 : if (pgcform->relpages != (int32) num_pages)
1469 : {
1470 9188 : pgcform->relpages = (int32) num_pages;
1471 9188 : dirty = true;
1472 : }
1473 161326 : if (pgcform->reltuples != (float4) num_tuples)
1474 : {
1475 19796 : pgcform->reltuples = (float4) num_tuples;
1476 19796 : dirty = true;
1477 : }
1478 161326 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1479 : {
1480 5740 : pgcform->relallvisible = (int32) num_all_visible_pages;
1481 5740 : dirty = true;
1482 : }
1483 161326 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1484 : {
1485 5142 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1486 5142 : dirty = true;
1487 : }
1488 :
1489 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1490 :
1491 161326 : if (!in_outer_xact)
1492 : {
1493 : /*
1494 : * If we didn't find any indexes, reset relhasindex.
1495 : */
1496 161012 : if (pgcform->relhasindex && !hasindex)
1497 : {
1498 20 : pgcform->relhasindex = false;
1499 20 : dirty = true;
1500 : }
1501 :
1502 : /* We also clear relhasrules and relhastriggers if needed */
1503 161012 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1504 : {
1505 0 : pgcform->relhasrules = false;
1506 0 : dirty = true;
1507 : }
1508 161012 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1509 : {
1510 6 : pgcform->relhastriggers = false;
1511 6 : dirty = true;
1512 : }
1513 : }
1514 :
1515 : /*
1516 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1517 : * indicating it has no new data.
1518 : *
1519 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1520 : * stored relfrozenxid is "in the future" then it seems best to assume
1521 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1522 : * This should match vac_update_datfrozenxid() concerning what we consider
1523 : * to be "in the future".
1524 : */
1525 161326 : oldfrozenxid = pgcform->relfrozenxid;
1526 161326 : futurexid = false;
1527 161326 : if (frozenxid_updated)
1528 120620 : *frozenxid_updated = false;
1529 161326 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1530 : {
1531 60090 : bool update = false;
1532 :
1533 60090 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1534 59990 : update = true;
1535 100 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1536 0 : futurexid = update = true;
1537 :
1538 60090 : if (update)
1539 : {
1540 59990 : pgcform->relfrozenxid = frozenxid;
1541 59990 : dirty = true;
1542 59990 : if (frozenxid_updated)
1543 59990 : *frozenxid_updated = true;
1544 : }
1545 : }
1546 :
1547 : /* Similarly for relminmxid */
1548 161326 : oldminmulti = pgcform->relminmxid;
1549 161326 : futuremxid = false;
1550 161326 : if (minmulti_updated)
1551 120620 : *minmulti_updated = false;
1552 161326 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1553 : {
1554 352 : bool update = false;
1555 :
1556 352 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1557 352 : update = true;
1558 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1559 0 : futuremxid = update = true;
1560 :
1561 352 : if (update)
1562 : {
1563 352 : pgcform->relminmxid = minmulti;
1564 352 : dirty = true;
1565 352 : if (minmulti_updated)
1566 352 : *minmulti_updated = true;
1567 : }
1568 : }
1569 :
1570 : /* If anything changed, write out the tuple. */
1571 161326 : if (dirty)
1572 73968 : systable_inplace_update_finish(inplace_state, ctup);
1573 : else
1574 87358 : systable_inplace_update_cancel(inplace_state);
1575 :
1576 161326 : table_close(rd, RowExclusiveLock);
1577 :
1578 161326 : if (futurexid)
1579 0 : ereport(WARNING,
1580 : (errcode(ERRCODE_DATA_CORRUPTED),
1581 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1582 : oldfrozenxid, frozenxid,
1583 : RelationGetRelationName(relation))));
1584 161326 : if (futuremxid)
1585 0 : ereport(WARNING,
1586 : (errcode(ERRCODE_DATA_CORRUPTED),
1587 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1588 : oldminmulti, minmulti,
1589 : RelationGetRelationName(relation))));
1590 161326 : }
1591 :
1592 :
1593 : /*
1594 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1595 : *
1596 : * Update pg_database's datfrozenxid entry for our database to be the
1597 : * minimum of the pg_class.relfrozenxid values.
1598 : *
1599 : * Similarly, update our datminmxid to be the minimum of the
1600 : * pg_class.relminmxid values.
1601 : *
1602 : * If we are able to advance either pg_database value, also try to
1603 : * truncate pg_xact and pg_multixact.
1604 : *
1605 : * We violate transaction semantics here by overwriting the database's
1606 : * existing pg_database tuple with the new values. This is reasonably
1607 : * safe since the new values are correct whether or not this transaction
1608 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1609 : * behind after a VACUUM.
1610 : */
1611 : void
1612 4130 : vac_update_datfrozenxid(void)
1613 : {
1614 : HeapTuple tuple;
1615 : Form_pg_database dbform;
1616 : Relation relation;
1617 : SysScanDesc scan;
1618 : HeapTuple classTup;
1619 : TransactionId newFrozenXid;
1620 : MultiXactId newMinMulti;
1621 : TransactionId lastSaneFrozenXid;
1622 : MultiXactId lastSaneMinMulti;
1623 4130 : bool bogus = false;
1624 4130 : bool dirty = false;
1625 : ScanKeyData key[1];
1626 : void *inplace_state;
1627 :
1628 : /*
1629 : * Restrict this task to one backend per database. This avoids race
1630 : * conditions that would move datfrozenxid or datminmxid backward. It
1631 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1632 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1633 : */
1634 4130 : LockDatabaseFrozenIds(ExclusiveLock);
1635 :
1636 : /*
1637 : * Initialize the "min" calculation with
1638 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1639 : * approximation to the minimum relfrozenxid for not-yet-committed
1640 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1641 : * cannot produce a wrong minimum by starting with this.
1642 : */
1643 4130 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1644 :
1645 : /*
1646 : * Similarly, initialize the MultiXact "min" with the value that would be
1647 : * used on pg_class for new tables. See AddNewRelationTuple().
1648 : */
1649 4130 : newMinMulti = GetOldestMultiXactId();
1650 :
1651 : /*
1652 : * Identify the latest relfrozenxid and relminmxid values that we could
1653 : * validly see during the scan. These are conservative values, but it's
1654 : * not really worth trying to be more exact.
1655 : */
1656 4130 : lastSaneFrozenXid = ReadNextTransactionId();
1657 4130 : lastSaneMinMulti = ReadNextMultiXactId();
1658 :
1659 : /*
1660 : * We must seqscan pg_class to find the minimum Xid, because there is no
1661 : * index that can help us here.
1662 : *
1663 : * See vac_truncate_clog() for the race condition to prevent.
1664 : */
1665 4130 : relation = table_open(RelationRelationId, AccessShareLock);
1666 :
1667 4130 : scan = systable_beginscan(relation, InvalidOid, false,
1668 : NULL, 0, NULL);
1669 :
1670 2153662 : while ((classTup = systable_getnext(scan)) != NULL)
1671 : {
1672 2149532 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1673 2149532 : TransactionId relfrozenxid = classForm->relfrozenxid;
1674 2149532 : TransactionId relminmxid = classForm->relminmxid;
1675 :
1676 : /*
1677 : * Only consider relations able to hold unfrozen XIDs (anything else
1678 : * should have InvalidTransactionId in relfrozenxid anyway).
1679 : */
1680 2149532 : if (classForm->relkind != RELKIND_RELATION &&
1681 1695568 : classForm->relkind != RELKIND_MATVIEW &&
1682 1693288 : classForm->relkind != RELKIND_TOASTVALUE)
1683 : {
1684 : Assert(!TransactionIdIsValid(relfrozenxid));
1685 : Assert(!MultiXactIdIsValid(relminmxid));
1686 1462048 : continue;
1687 : }
1688 :
1689 : /*
1690 : * Some table AMs might not need per-relation xid / multixid horizons.
1691 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1692 : * to not be set (i.e. set to their respective Invalid*Id)
1693 : * independently. Thus validate and compute horizon for each only if
1694 : * set.
1695 : *
1696 : * If things are working properly, no relation should have a
1697 : * relfrozenxid or relminmxid that is "in the future". However, such
1698 : * cases have been known to arise due to bugs in pg_upgrade. If we
1699 : * see any entries that are "in the future", chicken out and don't do
1700 : * anything. This ensures we won't truncate clog & multixact SLRUs
1701 : * before those relations have been scanned and cleaned up.
1702 : */
1703 :
1704 687484 : if (TransactionIdIsValid(relfrozenxid))
1705 : {
1706 : Assert(TransactionIdIsNormal(relfrozenxid));
1707 :
1708 : /* check for values in the future */
1709 687484 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1710 : {
1711 0 : bogus = true;
1712 0 : break;
1713 : }
1714 :
1715 : /* determine new horizon */
1716 687484 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1717 4298 : newFrozenXid = relfrozenxid;
1718 : }
1719 :
1720 687484 : if (MultiXactIdIsValid(relminmxid))
1721 : {
1722 : /* check for values in the future */
1723 687484 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1724 : {
1725 0 : bogus = true;
1726 0 : break;
1727 : }
1728 :
1729 : /* determine new horizon */
1730 687484 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1731 216 : newMinMulti = relminmxid;
1732 : }
1733 : }
1734 :
1735 : /* we're done with pg_class */
1736 4130 : systable_endscan(scan);
1737 4130 : table_close(relation, AccessShareLock);
1738 :
1739 : /* chicken out if bogus data found */
1740 4130 : if (bogus)
1741 0 : return;
1742 :
1743 : Assert(TransactionIdIsNormal(newFrozenXid));
1744 : Assert(MultiXactIdIsValid(newMinMulti));
1745 :
1746 : /* Now fetch the pg_database tuple we need to update. */
1747 4130 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1748 :
1749 : /*
1750 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1751 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1752 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1753 : * For now, don't optimize that.
1754 : */
1755 4130 : ScanKeyInit(&key[0],
1756 : Anum_pg_database_oid,
1757 : BTEqualStrategyNumber, F_OIDEQ,
1758 : ObjectIdGetDatum(MyDatabaseId));
1759 :
1760 4130 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1761 : NULL, 1, key, &tuple, &inplace_state);
1762 :
1763 4130 : if (!HeapTupleIsValid(tuple))
1764 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1765 :
1766 4130 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1767 :
1768 : /*
1769 : * As in vac_update_relstats(), we ordinarily don't want to let
1770 : * datfrozenxid go backward; but if it's "in the future" then it must be
1771 : * corrupt and it seems best to overwrite it.
1772 : */
1773 4696 : if (dbform->datfrozenxid != newFrozenXid &&
1774 566 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1775 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1776 : {
1777 566 : dbform->datfrozenxid = newFrozenXid;
1778 566 : dirty = true;
1779 : }
1780 : else
1781 3564 : newFrozenXid = dbform->datfrozenxid;
1782 :
1783 : /* Ditto for datminmxid */
1784 4132 : if (dbform->datminmxid != newMinMulti &&
1785 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1786 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1787 : {
1788 2 : dbform->datminmxid = newMinMulti;
1789 2 : dirty = true;
1790 : }
1791 : else
1792 4128 : newMinMulti = dbform->datminmxid;
1793 :
1794 4130 : if (dirty)
1795 566 : systable_inplace_update_finish(inplace_state, tuple);
1796 : else
1797 3564 : systable_inplace_update_cancel(inplace_state);
1798 :
1799 4130 : heap_freetuple(tuple);
1800 4130 : table_close(relation, RowExclusiveLock);
1801 :
1802 : /*
1803 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1804 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1805 : * XID-wrap-limit info is stale, since this action will update that too.
1806 : */
1807 4130 : if (dirty || ForceTransactionIdLimitUpdate())
1808 1144 : vac_truncate_clog(newFrozenXid, newMinMulti,
1809 : lastSaneFrozenXid, lastSaneMinMulti);
1810 : }
1811 :
1812 :
1813 : /*
1814 : * vac_truncate_clog() -- attempt to truncate the commit log
1815 : *
1816 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1817 : * and use it to truncate the transaction commit log (pg_xact).
1818 : * Also update the XID wrap limit info maintained by varsup.c.
1819 : * Likewise for datminmxid.
1820 : *
1821 : * The passed frozenXID and minMulti are the updated values for my own
1822 : * pg_database entry. They're used to initialize the "min" calculations.
1823 : * The caller also passes the "last sane" XID and MXID, since it has
1824 : * those at hand already.
1825 : *
1826 : * This routine is only invoked when we've managed to change our
1827 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1828 : * XID-wrap-limit info is stale.
1829 : */
1830 : static void
1831 1144 : vac_truncate_clog(TransactionId frozenXID,
1832 : MultiXactId minMulti,
1833 : TransactionId lastSaneFrozenXid,
1834 : MultiXactId lastSaneMinMulti)
1835 : {
1836 1144 : TransactionId nextXID = ReadNextTransactionId();
1837 : Relation relation;
1838 : TableScanDesc scan;
1839 : HeapTuple tuple;
1840 : Oid oldestxid_datoid;
1841 : Oid minmulti_datoid;
1842 1144 : bool bogus = false;
1843 1144 : bool frozenAlreadyWrapped = false;
1844 :
1845 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1846 1144 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1847 :
1848 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1849 1144 : oldestxid_datoid = MyDatabaseId;
1850 1144 : minmulti_datoid = MyDatabaseId;
1851 :
1852 : /*
1853 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1854 : *
1855 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1856 : * the values could change while we look at them. Fetch each one just
1857 : * once to ensure sane behavior of the comparison logic. (Here, as in
1858 : * many other places, we assume that fetching or updating an XID in shared
1859 : * storage is atomic.)
1860 : *
1861 : * Note: we need not worry about a race condition with new entries being
1862 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1863 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1864 : * of the interlock against copying a DB containing an active backend.
1865 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1866 : * concurrently modify the datfrozenxid's of different databases, the
1867 : * worst possible outcome is that pg_xact is not truncated as aggressively
1868 : * as it could be.
1869 : */
1870 1144 : relation = table_open(DatabaseRelationId, AccessShareLock);
1871 :
1872 1144 : scan = table_beginscan_catalog(relation, 0, NULL);
1873 :
1874 4406 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1875 : {
1876 3262 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1877 3262 : TransactionId datfrozenxid = dbform->datfrozenxid;
1878 3262 : TransactionId datminmxid = dbform->datminmxid;
1879 :
1880 : Assert(TransactionIdIsNormal(datfrozenxid));
1881 : Assert(MultiXactIdIsValid(datminmxid));
1882 :
1883 : /*
1884 : * If database is in the process of getting dropped, or has been
1885 : * interrupted while doing so, no connections to it are possible
1886 : * anymore. Therefore we don't need to take it into account here.
1887 : * Which is good, because it can't be processed by autovacuum either.
1888 : */
1889 3262 : if (database_is_invalid_form((Form_pg_database) dbform))
1890 : {
1891 2 : elog(DEBUG2,
1892 : "skipping invalid database \"%s\" while computing relfrozenxid",
1893 : NameStr(dbform->datname));
1894 2 : continue;
1895 : }
1896 :
1897 : /*
1898 : * If things are working properly, no database should have a
1899 : * datfrozenxid or datminmxid that is "in the future". However, such
1900 : * cases have been known to arise due to bugs in pg_upgrade. If we
1901 : * see any entries that are "in the future", chicken out and don't do
1902 : * anything. This ensures we won't truncate clog before those
1903 : * databases have been scanned and cleaned up. (We will issue the
1904 : * "already wrapped" warning if appropriate, though.)
1905 : */
1906 6520 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1907 3260 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1908 0 : bogus = true;
1909 :
1910 3260 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1911 0 : frozenAlreadyWrapped = true;
1912 3260 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1913 : {
1914 560 : frozenXID = datfrozenxid;
1915 560 : oldestxid_datoid = dbform->oid;
1916 : }
1917 :
1918 3260 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1919 : {
1920 4 : minMulti = datminmxid;
1921 4 : minmulti_datoid = dbform->oid;
1922 : }
1923 : }
1924 :
1925 1144 : table_endscan(scan);
1926 :
1927 1144 : table_close(relation, AccessShareLock);
1928 :
1929 : /*
1930 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1931 : * the computed minimum XID might be bogus. This case should now be
1932 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1933 : * test anyway.
1934 : */
1935 1144 : if (frozenAlreadyWrapped)
1936 : {
1937 0 : ereport(WARNING,
1938 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1939 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1940 0 : LWLockRelease(WrapLimitsVacuumLock);
1941 0 : return;
1942 : }
1943 :
1944 : /* chicken out if data is bogus in any other way */
1945 1144 : if (bogus)
1946 : {
1947 0 : LWLockRelease(WrapLimitsVacuumLock);
1948 0 : return;
1949 : }
1950 :
1951 : /*
1952 : * Advance the oldest value for commit timestamps before truncating, so
1953 : * that if a user requests a timestamp for a transaction we're truncating
1954 : * away right after this point, they get NULL instead of an ugly "file not
1955 : * found" error from slru.c. This doesn't matter for xact/multixact
1956 : * because they are not subject to arbitrary lookups from users.
1957 : */
1958 1144 : AdvanceOldestCommitTsXid(frozenXID);
1959 :
1960 : /*
1961 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1962 : */
1963 1144 : TruncateCLOG(frozenXID, oldestxid_datoid);
1964 1144 : TruncateCommitTs(frozenXID);
1965 1144 : TruncateMultiXact(minMulti, minmulti_datoid);
1966 :
1967 : /*
1968 : * Update the wrap limit for GetNewTransactionId and creation of new
1969 : * MultiXactIds. Note: these functions will also signal the postmaster
1970 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1971 : * signaling twice?
1972 : */
1973 1144 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1974 1144 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1975 :
1976 1144 : LWLockRelease(WrapLimitsVacuumLock);
1977 : }
1978 :
1979 :
1980 : /*
1981 : * vacuum_rel() -- vacuum one heap relation
1982 : *
1983 : * relid identifies the relation to vacuum. If relation is supplied,
1984 : * use the name therein for reporting any failure to open/lock the rel;
1985 : * do not use it once we've successfully opened the rel, since it might
1986 : * be stale.
1987 : *
1988 : * Returns true if it's okay to proceed with a requested ANALYZE
1989 : * operation on this table.
1990 : *
1991 : * Doing one heap at a time incurs extra overhead, since we need to
1992 : * check that the heap exists again just before we vacuum it. The
1993 : * reason that we do this is so that vacuuming can be spread across
1994 : * many small transactions. Otherwise, two-phase locking would require
1995 : * us to lock the entire database during one pass of the vacuum cleaner.
1996 : *
1997 : * At entry and exit, we are not inside a transaction.
1998 : */
1999 : static bool
2000 121434 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
2001 : BufferAccessStrategy bstrategy)
2002 : {
2003 : LOCKMODE lmode;
2004 : Relation rel;
2005 : LockRelId lockrelid;
2006 : Oid priv_relid;
2007 : Oid toast_relid;
2008 : Oid save_userid;
2009 : int save_sec_context;
2010 : int save_nestlevel;
2011 :
2012 : Assert(params != NULL);
2013 :
2014 : /* Begin a transaction for vacuuming this relation */
2015 121434 : StartTransactionCommand();
2016 :
2017 121434 : if (!(params->options & VACOPT_FULL))
2018 : {
2019 : /*
2020 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2021 : * other concurrent VACUUMs know that they can ignore this one while
2022 : * determining their OldestXmin. (The reason we don't set it during a
2023 : * full VACUUM is exactly that we may have to run user-defined
2024 : * functions for functional indexes, and we want to make sure that if
2025 : * they use the snapshot set above, any tuples it requires can't get
2026 : * removed from other tables. An index function that depends on the
2027 : * contents of other tables is arguably broken, but we won't break it
2028 : * here by violating transaction semantics.)
2029 : *
2030 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2031 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2032 : * in an emergency.
2033 : *
2034 : * Note: these flags remain set until CommitTransaction or
2035 : * AbortTransaction. We don't want to clear them until we reset
2036 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2037 : * might appear to go backwards, which is probably Not Good. (We also
2038 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2039 : * xmin doesn't become visible ahead of setting the flag.)
2040 : */
2041 121026 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2042 121026 : MyProc->statusFlags |= PROC_IN_VACUUM;
2043 121026 : if (params->is_wraparound)
2044 94450 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2045 121026 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2046 121026 : LWLockRelease(ProcArrayLock);
2047 : }
2048 :
2049 : /*
2050 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2051 : * cutoff xids in local memory wrapping around, and to have updated xmin
2052 : * horizons.
2053 : */
2054 121434 : PushActiveSnapshot(GetTransactionSnapshot());
2055 :
2056 : /*
2057 : * Check for user-requested abort. Note we want this to be inside a
2058 : * transaction, so xact.c doesn't issue useless WARNING.
2059 : */
2060 121434 : CHECK_FOR_INTERRUPTS();
2061 :
2062 : /*
2063 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2064 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2065 : * way, we can be sure that no other backend is vacuuming the same table.
2066 : */
2067 242868 : lmode = (params->options & VACOPT_FULL) ?
2068 121434 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2069 :
2070 : /* open the relation and get the appropriate lock on it */
2071 121434 : rel = vacuum_open_relation(relid, relation, params->options,
2072 121434 : params->log_min_duration >= 0, lmode);
2073 :
2074 : /* leave if relation could not be opened or locked */
2075 121434 : if (!rel)
2076 : {
2077 24 : PopActiveSnapshot();
2078 24 : CommitTransactionCommand();
2079 24 : return false;
2080 : }
2081 :
2082 : /*
2083 : * When recursing to a TOAST table, check privileges on the parent. NB:
2084 : * This is only safe to do because we hold a session lock on the main
2085 : * relation that prevents concurrent deletion.
2086 : */
2087 121410 : if (OidIsValid(params->toast_parent))
2088 9166 : priv_relid = params->toast_parent;
2089 : else
2090 112244 : priv_relid = RelationGetRelid(rel);
2091 :
2092 : /*
2093 : * Check if relation needs to be skipped based on privileges. This check
2094 : * happens also when building the relation list to vacuum for a manual
2095 : * operation, and needs to be done additionally here as VACUUM could
2096 : * happen across multiple transactions where privileges could have changed
2097 : * in-between. Make sure to only generate logs for VACUUM in this case.
2098 : */
2099 121410 : if (!vacuum_is_permitted_for_relation(priv_relid,
2100 : rel->rd_rel,
2101 121410 : params->options & ~VACOPT_ANALYZE))
2102 : {
2103 72 : relation_close(rel, lmode);
2104 72 : PopActiveSnapshot();
2105 72 : CommitTransactionCommand();
2106 72 : return false;
2107 : }
2108 :
2109 : /*
2110 : * Check that it's of a vacuumable relkind.
2111 : */
2112 121338 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2113 44138 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2114 44130 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2115 188 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2116 : {
2117 2 : ereport(WARNING,
2118 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2119 : RelationGetRelationName(rel))));
2120 2 : relation_close(rel, lmode);
2121 2 : PopActiveSnapshot();
2122 2 : CommitTransactionCommand();
2123 2 : return false;
2124 : }
2125 :
2126 : /*
2127 : * Silently ignore tables that are temp tables of other backends ---
2128 : * trying to vacuum these will lead to great unhappiness, since their
2129 : * contents are probably not up-to-date on disk. (We don't throw a
2130 : * warning here; it would just lead to chatter during a database-wide
2131 : * VACUUM.)
2132 : */
2133 121336 : if (RELATION_IS_OTHER_TEMP(rel))
2134 : {
2135 2 : relation_close(rel, lmode);
2136 2 : PopActiveSnapshot();
2137 2 : CommitTransactionCommand();
2138 2 : return false;
2139 : }
2140 :
2141 : /*
2142 : * Silently ignore partitioned tables as there is no work to be done. The
2143 : * useful work is on their child partitions, which have been queued up for
2144 : * us separately.
2145 : */
2146 121334 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2147 : {
2148 186 : relation_close(rel, lmode);
2149 186 : PopActiveSnapshot();
2150 186 : CommitTransactionCommand();
2151 : /* It's OK to proceed with ANALYZE on this table */
2152 186 : return true;
2153 : }
2154 :
2155 : /*
2156 : * Get a session-level lock too. This will protect our access to the
2157 : * relation across multiple transactions, so that we can vacuum the
2158 : * relation's TOAST table (if any) secure in the knowledge that no one is
2159 : * deleting the parent relation.
2160 : *
2161 : * NOTE: this cannot block, even if someone else is waiting for access,
2162 : * because the lock manager knows that both lock requests are from the
2163 : * same process.
2164 : */
2165 121148 : lockrelid = rel->rd_lockInfo.lockRelId;
2166 121148 : LockRelationIdForSession(&lockrelid, lmode);
2167 :
2168 : /*
2169 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2170 : * specified in VACUUM command, or when running in an autovacuum worker
2171 : */
2172 121148 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2173 : {
2174 : StdRdOptIndexCleanup vacuum_index_cleanup;
2175 :
2176 103122 : if (rel->rd_options == NULL)
2177 101502 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2178 : else
2179 1620 : vacuum_index_cleanup =
2180 1620 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2181 :
2182 103122 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2183 103098 : params->index_cleanup = VACOPTVALUE_AUTO;
2184 24 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2185 12 : params->index_cleanup = VACOPTVALUE_ENABLED;
2186 : else
2187 : {
2188 : Assert(vacuum_index_cleanup ==
2189 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2190 12 : params->index_cleanup = VACOPTVALUE_DISABLED;
2191 : }
2192 : }
2193 :
2194 : /*
2195 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2196 : * parameter was specified. This overrides the GUC value.
2197 : */
2198 121148 : if (rel->rd_options != NULL &&
2199 1664 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2200 0 : params->max_eager_freeze_failure_rate =
2201 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2202 :
2203 : /*
2204 : * Set truncate option based on truncate reloption or GUC if it wasn't
2205 : * specified in VACUUM command, or when running in an autovacuum worker
2206 : */
2207 121148 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2208 : {
2209 103138 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2210 :
2211 103138 : if (opts && opts->vacuum_truncate_set)
2212 : {
2213 12 : if (opts->vacuum_truncate)
2214 6 : params->truncate = VACOPTVALUE_ENABLED;
2215 : else
2216 6 : params->truncate = VACOPTVALUE_DISABLED;
2217 : }
2218 103126 : else if (vacuum_truncate)
2219 103120 : params->truncate = VACOPTVALUE_ENABLED;
2220 : else
2221 6 : params->truncate = VACOPTVALUE_DISABLED;
2222 : }
2223 :
2224 : /*
2225 : * Remember the relation's TOAST relation for later, if the caller asked
2226 : * us to process it. In VACUUM FULL, though, the toast table is
2227 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2228 : * unless PROCESS_MAIN is disabled.
2229 : */
2230 121148 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2231 26322 : ((params->options & VACOPT_FULL) == 0 ||
2232 380 : (params->options & VACOPT_PROCESS_MAIN) == 0))
2233 25948 : toast_relid = rel->rd_rel->reltoastrelid;
2234 : else
2235 95200 : toast_relid = InvalidOid;
2236 :
2237 : /*
2238 : * Switch to the table owner's userid, so that any index functions are run
2239 : * as that user. Also lock down security-restricted operations and
2240 : * arrange to make GUC variable changes local to this command. (This is
2241 : * unnecessary, but harmless, for lazy VACUUM.)
2242 : */
2243 121148 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2244 121148 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2245 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2246 121148 : save_nestlevel = NewGUCNestLevel();
2247 121148 : RestrictSearchPath();
2248 :
2249 : /*
2250 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2251 : * relation. Otherwise, we can skip this part. If processing the TOAST
2252 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2253 : * to be set when we recurse to the TOAST table.
2254 : */
2255 121148 : if (params->options & VACOPT_PROCESS_MAIN)
2256 : {
2257 : /*
2258 : * Do the actual work --- either FULL or "lazy" vacuum
2259 : */
2260 120994 : if (params->options & VACOPT_FULL)
2261 : {
2262 374 : ClusterParams cluster_params = {0};
2263 :
2264 374 : if ((params->options & VACOPT_VERBOSE) != 0)
2265 2 : cluster_params.options |= CLUOPT_VERBOSE;
2266 :
2267 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2268 374 : cluster_rel(rel, InvalidOid, &cluster_params);
2269 : /* cluster_rel closes the relation, but keeps lock */
2270 :
2271 368 : rel = NULL;
2272 : }
2273 : else
2274 120620 : table_relation_vacuum(rel, params, bstrategy);
2275 : }
2276 :
2277 : /* Roll back any GUC changes executed by index functions */
2278 121142 : AtEOXact_GUC(false, save_nestlevel);
2279 :
2280 : /* Restore userid and security context */
2281 121142 : SetUserIdAndSecContext(save_userid, save_sec_context);
2282 :
2283 : /* all done with this class, but hold lock until commit */
2284 121142 : if (rel)
2285 120774 : relation_close(rel, NoLock);
2286 :
2287 : /*
2288 : * Complete the transaction and free all temporary memory used.
2289 : */
2290 121142 : PopActiveSnapshot();
2291 121142 : CommitTransactionCommand();
2292 :
2293 : /*
2294 : * If the relation has a secondary toast rel, vacuum that too while we
2295 : * still hold the session lock on the main table. Note however that
2296 : * "analyze" will not get done on the toast table. This is good, because
2297 : * the toaster always uses hardcoded index access and statistics are
2298 : * totally unimportant for toast relations.
2299 : */
2300 121142 : if (toast_relid != InvalidOid)
2301 : {
2302 : VacuumParams toast_vacuum_params;
2303 :
2304 : /*
2305 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2306 : * set toast_parent so that the privilege checks are done on the main
2307 : * relation. NB: This is only safe to do because we hold a session
2308 : * lock on the main relation that prevents concurrent deletion.
2309 : */
2310 9166 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2311 9166 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2312 9166 : toast_vacuum_params.toast_parent = relid;
2313 :
2314 9166 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
2315 : }
2316 :
2317 : /*
2318 : * Now release the session-level lock on the main table.
2319 : */
2320 121142 : UnlockRelationIdForSession(&lockrelid, lmode);
2321 :
2322 : /* Report that we really did it. */
2323 121142 : return true;
2324 : }
2325 :
2326 :
2327 : /*
2328 : * Open all the vacuumable indexes of the given relation, obtaining the
2329 : * specified kind of lock on each. Return an array of Relation pointers for
2330 : * the indexes into *Irel, and the number of indexes into *nindexes.
2331 : *
2332 : * We consider an index vacuumable if it is marked insertable (indisready).
2333 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2334 : * execution, and what we have is too corrupt to be processable. We will
2335 : * vacuum even if the index isn't indisvalid; this is important because in a
2336 : * unique index, uniqueness checks will be performed anyway and had better not
2337 : * hit dangling index pointers.
2338 : */
2339 : void
2340 135716 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2341 : int *nindexes, Relation **Irel)
2342 : {
2343 : List *indexoidlist;
2344 : ListCell *indexoidscan;
2345 : int i;
2346 :
2347 : Assert(lockmode != NoLock);
2348 :
2349 135716 : indexoidlist = RelationGetIndexList(relation);
2350 :
2351 : /* allocate enough memory for all indexes */
2352 135716 : i = list_length(indexoidlist);
2353 :
2354 135716 : if (i > 0)
2355 126596 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2356 : else
2357 9120 : *Irel = NULL;
2358 :
2359 : /* collect just the ready indexes */
2360 135716 : i = 0;
2361 337988 : foreach(indexoidscan, indexoidlist)
2362 : {
2363 202272 : Oid indexoid = lfirst_oid(indexoidscan);
2364 : Relation indrel;
2365 :
2366 202272 : indrel = index_open(indexoid, lockmode);
2367 202272 : if (indrel->rd_index->indisready)
2368 202272 : (*Irel)[i++] = indrel;
2369 : else
2370 0 : index_close(indrel, lockmode);
2371 : }
2372 :
2373 135716 : *nindexes = i;
2374 :
2375 135716 : list_free(indexoidlist);
2376 135716 : }
2377 :
2378 : /*
2379 : * Release the resources acquired by vac_open_indexes. Optionally release
2380 : * the locks (say NoLock to keep 'em).
2381 : */
2382 : void
2383 136534 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2384 : {
2385 136534 : if (Irel == NULL)
2386 9944 : return;
2387 :
2388 328850 : while (nindexes--)
2389 : {
2390 202260 : Relation ind = Irel[nindexes];
2391 :
2392 202260 : index_close(ind, lockmode);
2393 : }
2394 126590 : pfree(Irel);
2395 : }
2396 :
2397 : /*
2398 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2399 : *
2400 : * This should be called in each major loop of VACUUM processing,
2401 : * typically once per page processed.
2402 : */
2403 : void
2404 83940184 : vacuum_delay_point(bool is_analyze)
2405 : {
2406 83940184 : double msec = 0;
2407 :
2408 : /* Always check for interrupts */
2409 83940184 : CHECK_FOR_INTERRUPTS();
2410 :
2411 83940184 : if (InterruptPending ||
2412 83940184 : (!VacuumCostActive && !ConfigReloadPending))
2413 73906886 : return;
2414 :
2415 : /*
2416 : * Autovacuum workers should reload the configuration file if requested.
2417 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2418 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2419 : * vacuumed or analyzed.
2420 : */
2421 10033298 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2422 : {
2423 0 : ConfigReloadPending = false;
2424 0 : ProcessConfigFile(PGC_SIGHUP);
2425 0 : VacuumUpdateCosts();
2426 : }
2427 :
2428 : /*
2429 : * If we disabled cost-based delays after reloading the config file,
2430 : * return.
2431 : */
2432 10033298 : if (!VacuumCostActive)
2433 0 : return;
2434 :
2435 : /*
2436 : * For parallel vacuum, the delay is computed based on the shared cost
2437 : * balance. See compute_parallel_delay.
2438 : */
2439 10033298 : if (VacuumSharedCostBalance != NULL)
2440 0 : msec = compute_parallel_delay();
2441 10033298 : else if (VacuumCostBalance >= vacuum_cost_limit)
2442 5208 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2443 :
2444 : /* Nap if appropriate */
2445 10033298 : if (msec > 0)
2446 : {
2447 : instr_time delay_start;
2448 :
2449 5208 : if (msec > vacuum_cost_delay * 4)
2450 18 : msec = vacuum_cost_delay * 4;
2451 :
2452 5208 : if (track_cost_delay_timing)
2453 0 : INSTR_TIME_SET_CURRENT(delay_start);
2454 :
2455 5208 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2456 5208 : pg_usleep(msec * 1000);
2457 5208 : pgstat_report_wait_end();
2458 :
2459 5208 : if (track_cost_delay_timing)
2460 : {
2461 : instr_time delay_end;
2462 : instr_time delay;
2463 :
2464 0 : INSTR_TIME_SET_CURRENT(delay_end);
2465 0 : INSTR_TIME_SET_ZERO(delay);
2466 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2467 :
2468 : /*
2469 : * For parallel workers, we only report the delay time every once
2470 : * in a while to avoid overloading the leader with messages and
2471 : * interrupts.
2472 : */
2473 0 : if (IsParallelWorker())
2474 : {
2475 : static instr_time last_report_time;
2476 : instr_time time_since_last_report;
2477 :
2478 : Assert(!is_analyze);
2479 :
2480 : /* Accumulate the delay time */
2481 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2482 :
2483 : /* Calculate interval since last report */
2484 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2485 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2486 :
2487 : /* If we haven't reported in a while, do so now */
2488 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2489 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2490 : {
2491 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2492 : parallel_vacuum_worker_delay_ns);
2493 :
2494 : /* Reset variables */
2495 0 : last_report_time = delay_end;
2496 0 : parallel_vacuum_worker_delay_ns = 0;
2497 : }
2498 : }
2499 0 : else if (is_analyze)
2500 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2501 0 : INSTR_TIME_GET_NANOSEC(delay));
2502 : else
2503 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2504 0 : INSTR_TIME_GET_NANOSEC(delay));
2505 : }
2506 :
2507 : /*
2508 : * We don't want to ignore postmaster death during very long vacuums
2509 : * with vacuum_cost_delay configured. We can't use the usual
2510 : * WaitLatch() approach here because we want microsecond-based sleep
2511 : * durations above.
2512 : */
2513 5208 : if (IsUnderPostmaster && !PostmasterIsAlive())
2514 0 : exit(1);
2515 :
2516 5208 : VacuumCostBalance = 0;
2517 :
2518 : /*
2519 : * Balance and update limit values for autovacuum workers. We must do
2520 : * this periodically, as the number of workers across which we are
2521 : * balancing the limit may have changed.
2522 : *
2523 : * TODO: There may be better criteria for determining when to do this
2524 : * besides "check after napping".
2525 : */
2526 5208 : AutoVacuumUpdateCostLimit();
2527 :
2528 : /* Might have gotten an interrupt while sleeping */
2529 5208 : CHECK_FOR_INTERRUPTS();
2530 : }
2531 : }
2532 :
2533 : /*
2534 : * Computes the vacuum delay for parallel workers.
2535 : *
2536 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2537 : * worker to sleep in proportion to the share of work it's done. We achieve this
2538 : * by allowing all parallel vacuum workers including the leader process to
2539 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2540 : * We allow each worker to update it as and when it has incurred any cost and
2541 : * then based on that decide whether it needs to sleep. We compute the time
2542 : * to sleep for a worker based on the cost it has incurred
2543 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2544 : * that amount. This avoids putting to sleep those workers which have done less
2545 : * I/O than other workers and therefore ensure that workers
2546 : * which are doing more I/O got throttled more.
2547 : *
2548 : * We allow a worker to sleep only if it has performed I/O above a certain
2549 : * threshold, which is calculated based on the number of active workers
2550 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2551 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2552 : * the required throttling if we force a worker that has done more than 50%
2553 : * of its share of work to sleep.
2554 : */
2555 : static double
2556 0 : compute_parallel_delay(void)
2557 : {
2558 0 : double msec = 0;
2559 : uint32 shared_balance;
2560 : int nworkers;
2561 :
2562 : /* Parallel vacuum must be active */
2563 : Assert(VacuumSharedCostBalance);
2564 :
2565 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2566 :
2567 : /* At least count itself */
2568 : Assert(nworkers >= 1);
2569 :
2570 : /* Update the shared cost balance value atomically */
2571 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2572 :
2573 : /* Compute the total local balance for the current worker */
2574 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2575 :
2576 0 : if ((shared_balance >= vacuum_cost_limit) &&
2577 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2578 : {
2579 : /* Compute sleep time based on the local cost balance */
2580 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2581 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2582 0 : VacuumCostBalanceLocal = 0;
2583 : }
2584 :
2585 : /*
2586 : * Reset the local balance as we accumulated it into the shared value.
2587 : */
2588 0 : VacuumCostBalance = 0;
2589 :
2590 0 : return msec;
2591 : }
2592 :
2593 : /*
2594 : * A wrapper function of defGetBoolean().
2595 : *
2596 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2597 : * of true and false.
2598 : */
2599 : static VacOptValue
2600 326 : get_vacoptval_from_boolean(DefElem *def)
2601 : {
2602 326 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2603 : }
2604 :
2605 : /*
2606 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2607 : *
2608 : * Returns bulk delete stats derived from input stats
2609 : */
2610 : IndexBulkDeleteResult *
2611 2382 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2612 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2613 : {
2614 : /* Do bulk deletion */
2615 2382 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2616 : dead_items);
2617 :
2618 2382 : ereport(ivinfo->message_level,
2619 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2620 : RelationGetRelationName(ivinfo->index),
2621 : dead_items_info->num_items)));
2622 :
2623 2382 : return istat;
2624 : }
2625 :
2626 : /*
2627 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2628 : *
2629 : * Returns bulk delete stats derived from input stats
2630 : */
2631 : IndexBulkDeleteResult *
2632 150504 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2633 : {
2634 150504 : istat = index_vacuum_cleanup(ivinfo, istat);
2635 :
2636 150504 : if (istat)
2637 2650 : ereport(ivinfo->message_level,
2638 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2639 : RelationGetRelationName(ivinfo->index),
2640 : istat->num_index_tuples,
2641 : istat->num_pages),
2642 : errdetail("%.0f index row versions were removed.\n"
2643 : "%u index pages were newly deleted.\n"
2644 : "%u index pages are currently deleted, of which %u are currently reusable.",
2645 : istat->tuples_removed,
2646 : istat->pages_newly_deleted,
2647 : istat->pages_deleted, istat->pages_free)));
2648 :
2649 150504 : return istat;
2650 : }
2651 :
2652 : /*
2653 : * vac_tid_reaped() -- is a particular tid deletable?
2654 : *
2655 : * This has the right signature to be an IndexBulkDeleteCallback.
2656 : */
2657 : static bool
2658 6476980 : vac_tid_reaped(ItemPointer itemptr, void *state)
2659 : {
2660 6476980 : TidStore *dead_items = (TidStore *) state;
2661 :
2662 6476980 : return TidStoreIsMember(dead_items, itemptr);
2663 : }
|