Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/memutils.h"
60 : #include "utils/snapmgr.h"
61 : #include "utils/syscache.h"
62 :
63 : /*
64 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
65 : * This aims to avoid sending too many messages and waking up the leader too
66 : * frequently.
67 : */
68 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
69 :
70 : /*
71 : * GUC parameters
72 : */
73 : int vacuum_freeze_min_age;
74 : int vacuum_freeze_table_age;
75 : int vacuum_multixact_freeze_min_age;
76 : int vacuum_multixact_freeze_table_age;
77 : int vacuum_failsafe_age;
78 : int vacuum_multixact_failsafe_age;
79 : double vacuum_max_eager_freeze_failure_rate;
80 : bool track_cost_delay_timing;
81 :
82 : /*
83 : * Variables for cost-based vacuum delay. The defaults differ between
84 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
85 : * vacuum code. They are initialized here to the defaults for client backends
86 : * executing VACUUM or ANALYZE.
87 : */
88 : double vacuum_cost_delay = 0;
89 : int vacuum_cost_limit = 200;
90 :
91 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
92 : int64 parallel_vacuum_worker_delay_ns = 0;
93 :
94 : /*
95 : * VacuumFailsafeActive is a defined as a global so that we can determine
96 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
97 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
98 : * for the table until after vacuuming has completed, regardless of other
99 : * settings.
100 : *
101 : * Only VACUUM code should inspect this variable and only table access methods
102 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
103 : * inspected to determine whether or not to allow cost-based delays. Table AMs
104 : * are free to set it if they desire this behavior, but it is false by default
105 : * and reset to false in between vacuuming each relation.
106 : */
107 : bool VacuumFailsafeActive = false;
108 :
109 : /*
110 : * Variables for cost-based parallel vacuum. See comments atop
111 : * compute_parallel_delay to understand how it works.
112 : */
113 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
114 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
115 : int VacuumCostBalanceLocal = 0;
116 :
117 : /* non-export function prototypes */
118 : static List *expand_vacuum_rel(VacuumRelation *vrel,
119 : MemoryContext vac_context, int options);
120 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
121 : static void vac_truncate_clog(TransactionId frozenXID,
122 : MultiXactId minMulti,
123 : TransactionId lastSaneFrozenXid,
124 : MultiXactId lastSaneMinMulti);
125 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
126 : BufferAccessStrategy bstrategy);
127 : static double compute_parallel_delay(void);
128 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
129 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
130 :
131 : /*
132 : * GUC check function to ensure GUC value specified is within the allowable
133 : * range.
134 : */
135 : bool
136 1996 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
137 : GucSource source)
138 : {
139 : /* Value upper and lower hard limits are inclusive */
140 1996 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
141 1996 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
142 1996 : return true;
143 :
144 : /* Value does not fall within any allowable range */
145 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
146 : "vacuum_buffer_usage_limit",
147 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
148 :
149 0 : return false;
150 : }
151 :
152 : /*
153 : * Primary entry point for manual VACUUM and ANALYZE commands
154 : *
155 : * This is mainly a preparation wrapper for the real operations that will
156 : * happen in vacuum().
157 : */
158 : void
159 10778 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
160 : {
161 : VacuumParams params;
162 10778 : BufferAccessStrategy bstrategy = NULL;
163 10778 : bool verbose = false;
164 10778 : bool skip_locked = false;
165 10778 : bool analyze = false;
166 10778 : bool freeze = false;
167 10778 : bool full = false;
168 10778 : bool disable_page_skipping = false;
169 10778 : bool process_main = true;
170 10778 : bool process_toast = true;
171 : int ring_size;
172 10778 : bool skip_database_stats = false;
173 10778 : bool only_database_stats = false;
174 : MemoryContext vac_context;
175 : ListCell *lc;
176 :
177 : /* index_cleanup and truncate values unspecified for now */
178 10778 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
179 10778 : params.truncate = VACOPTVALUE_UNSPECIFIED;
180 :
181 : /* By default parallel vacuum is enabled */
182 10778 : params.nworkers = 0;
183 :
184 : /* Will be set later if we recurse to a TOAST table. */
185 10778 : params.toast_parent = InvalidOid;
186 :
187 : /*
188 : * Set this to an invalid value so it is clear whether or not a
189 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
190 : */
191 10778 : ring_size = -1;
192 :
193 : /* Parse options list */
194 19774 : foreach(lc, vacstmt->options)
195 : {
196 9032 : DefElem *opt = (DefElem *) lfirst(lc);
197 :
198 : /* Parse common options for VACUUM and ANALYZE */
199 9032 : if (strcmp(opt->defname, "verbose") == 0)
200 38 : verbose = defGetBoolean(opt);
201 8994 : else if (strcmp(opt->defname, "skip_locked") == 0)
202 334 : skip_locked = defGetBoolean(opt);
203 8660 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
204 : {
205 : const char *hintmsg;
206 : int result;
207 : char *vac_buffer_size;
208 :
209 54 : vac_buffer_size = defGetString(opt);
210 :
211 : /*
212 : * Check that the specified value is valid and the size falls
213 : * within the hard upper and lower limits if it is not 0.
214 : */
215 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
216 48 : (result != 0 &&
217 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
218 : {
219 18 : ereport(ERROR,
220 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
221 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
222 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
223 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
224 : }
225 :
226 36 : ring_size = result;
227 : }
228 8606 : else if (!vacstmt->is_vacuumcmd)
229 6 : ereport(ERROR,
230 : (errcode(ERRCODE_SYNTAX_ERROR),
231 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
232 : parser_errposition(pstate, opt->location)));
233 :
234 : /* Parse options available on VACUUM */
235 8600 : else if (strcmp(opt->defname, "analyze") == 0)
236 1382 : analyze = defGetBoolean(opt);
237 7218 : else if (strcmp(opt->defname, "freeze") == 0)
238 1168 : freeze = defGetBoolean(opt);
239 6050 : else if (strcmp(opt->defname, "full") == 0)
240 376 : full = defGetBoolean(opt);
241 5674 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
242 184 : disable_page_skipping = defGetBoolean(opt);
243 5490 : else if (strcmp(opt->defname, "index_cleanup") == 0)
244 : {
245 : /* Interpret no string as the default, which is 'auto' */
246 174 : if (!opt->arg)
247 0 : params.index_cleanup = VACOPTVALUE_AUTO;
248 : else
249 : {
250 174 : char *sval = defGetString(opt);
251 :
252 : /* Try matching on 'auto' string, or fall back on boolean */
253 174 : if (pg_strcasecmp(sval, "auto") == 0)
254 6 : params.index_cleanup = VACOPTVALUE_AUTO;
255 : else
256 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
257 : }
258 : }
259 5316 : else if (strcmp(opt->defname, "process_main") == 0)
260 154 : process_main = defGetBoolean(opt);
261 5162 : else if (strcmp(opt->defname, "process_toast") == 0)
262 160 : process_toast = defGetBoolean(opt);
263 5002 : else if (strcmp(opt->defname, "truncate") == 0)
264 148 : params.truncate = get_vacoptval_from_boolean(opt);
265 4854 : else if (strcmp(opt->defname, "parallel") == 0)
266 : {
267 338 : if (opt->arg == NULL)
268 : {
269 6 : ereport(ERROR,
270 : (errcode(ERRCODE_SYNTAX_ERROR),
271 : errmsg("parallel option requires a value between 0 and %d",
272 : MAX_PARALLEL_WORKER_LIMIT),
273 : parser_errposition(pstate, opt->location)));
274 : }
275 : else
276 : {
277 : int nworkers;
278 :
279 332 : nworkers = defGetInt32(opt);
280 332 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
281 6 : ereport(ERROR,
282 : (errcode(ERRCODE_SYNTAX_ERROR),
283 : errmsg("parallel workers for vacuum must be between 0 and %d",
284 : MAX_PARALLEL_WORKER_LIMIT),
285 : parser_errposition(pstate, opt->location)));
286 :
287 : /*
288 : * Disable parallel vacuum, if user has specified parallel
289 : * degree as zero.
290 : */
291 326 : if (nworkers == 0)
292 154 : params.nworkers = -1;
293 : else
294 172 : params.nworkers = nworkers;
295 : }
296 : }
297 4516 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
298 4398 : skip_database_stats = defGetBoolean(opt);
299 118 : else if (strcmp(opt->defname, "only_database_stats") == 0)
300 118 : only_database_stats = defGetBoolean(opt);
301 : else
302 0 : ereport(ERROR,
303 : (errcode(ERRCODE_SYNTAX_ERROR),
304 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
305 : parser_errposition(pstate, opt->location)));
306 : }
307 :
308 : /* Set vacuum options */
309 10742 : params.options =
310 10742 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
311 10742 : (verbose ? VACOPT_VERBOSE : 0) |
312 10742 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
313 10742 : (analyze ? VACOPT_ANALYZE : 0) |
314 10742 : (freeze ? VACOPT_FREEZE : 0) |
315 10742 : (full ? VACOPT_FULL : 0) |
316 10742 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
317 10742 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
318 10742 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
319 10742 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
320 10742 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
321 :
322 : /* sanity checks on options */
323 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
324 : Assert((params.options & VACOPT_VACUUM) ||
325 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
326 :
327 10742 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
328 6 : ereport(ERROR,
329 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
330 : errmsg("VACUUM FULL cannot be performed in parallel")));
331 :
332 : /*
333 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
334 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
335 : * we'll permit that.
336 : */
337 10736 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
338 6 : !(params.options & VACOPT_ANALYZE))
339 6 : ereport(ERROR,
340 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
341 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
342 :
343 : /*
344 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
345 : */
346 10730 : if (!(params.options & VACOPT_ANALYZE))
347 : {
348 9332 : foreach(lc, vacstmt->rels)
349 : {
350 4580 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
351 :
352 4580 : if (vrel->va_cols != NIL)
353 6 : ereport(ERROR,
354 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
355 : errmsg("ANALYZE option must be specified when a column list is provided")));
356 : }
357 : }
358 :
359 :
360 : /*
361 : * Sanity check DISABLE_PAGE_SKIPPING option.
362 : */
363 10724 : if ((params.options & VACOPT_FULL) != 0 &&
364 352 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
365 0 : ereport(ERROR,
366 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
367 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
368 :
369 : /* sanity check for PROCESS_TOAST */
370 10724 : if ((params.options & VACOPT_FULL) != 0 &&
371 352 : (params.options & VACOPT_PROCESS_TOAST) == 0)
372 6 : ereport(ERROR,
373 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
374 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
375 :
376 : /* sanity check for ONLY_DATABASE_STATS */
377 10718 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
378 : {
379 : Assert(params.options & VACOPT_VACUUM);
380 118 : if (vacstmt->rels != NIL)
381 6 : ereport(ERROR,
382 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
383 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
384 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
385 112 : if (params.options & ~(VACOPT_VACUUM |
386 : VACOPT_VERBOSE |
387 : VACOPT_PROCESS_MAIN |
388 : VACOPT_PROCESS_TOAST |
389 : VACOPT_ONLY_DATABASE_STATS))
390 0 : ereport(ERROR,
391 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
392 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
393 : }
394 :
395 : /*
396 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
397 : * them as -1 which means to use the default values.
398 : */
399 10712 : if (params.options & VACOPT_FREEZE)
400 : {
401 1168 : params.freeze_min_age = 0;
402 1168 : params.freeze_table_age = 0;
403 1168 : params.multixact_freeze_min_age = 0;
404 1168 : params.multixact_freeze_table_age = 0;
405 : }
406 : else
407 : {
408 9544 : params.freeze_min_age = -1;
409 9544 : params.freeze_table_age = -1;
410 9544 : params.multixact_freeze_min_age = -1;
411 9544 : params.multixact_freeze_table_age = -1;
412 : }
413 :
414 : /* user-invoked vacuum is never "for wraparound" */
415 10712 : params.is_wraparound = false;
416 :
417 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
418 10712 : params.log_min_duration = -1;
419 :
420 : /*
421 : * Later, in vacuum_rel(), we check if a reloption override was specified.
422 : */
423 10712 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
424 :
425 : /*
426 : * Create special memory context for cross-transaction storage.
427 : *
428 : * Since it is a child of PortalContext, it will go away eventually even
429 : * if we suffer an error; there's no need for special abort cleanup logic.
430 : */
431 10712 : vac_context = AllocSetContextCreate(PortalContext,
432 : "Vacuum",
433 : ALLOCSET_DEFAULT_SIZES);
434 :
435 : /*
436 : * Make a buffer strategy object in the cross-transaction memory context.
437 : * We needn't bother making this for VACUUM (FULL) or VACUUM
438 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
439 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
440 : * when we see ANALYZE.
441 : */
442 10712 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
443 458 : VACOPT_FULL)) == 0 ||
444 458 : (params.options & VACOPT_ANALYZE) != 0)
445 : {
446 :
447 10260 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
448 :
449 : Assert(ring_size >= -1);
450 :
451 : /*
452 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
453 : * command, it overrides the value of VacuumBufferUsageLimit. Either
454 : * value may be 0, in which case GetAccessStrategyWithSize() will
455 : * return NULL, effectively allowing full use of shared buffers.
456 : */
457 10260 : if (ring_size == -1)
458 10230 : ring_size = VacuumBufferUsageLimit;
459 :
460 10260 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
461 :
462 10260 : MemoryContextSwitchTo(old_context);
463 : }
464 :
465 : /* Now go through the common routine */
466 10712 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
467 :
468 : /* Finally, clean up the vacuum memory context */
469 10586 : MemoryContextDelete(vac_context);
470 10586 : }
471 :
472 : /*
473 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
474 : *
475 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
476 : * we process all relevant tables in the database. For each VacuumRelation,
477 : * if a valid OID is supplied, the table with that OID is what to process;
478 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
479 : *
480 : * params contains a set of parameters that can be used to customize the
481 : * behavior.
482 : *
483 : * bstrategy may be passed in as NULL when the caller does not want to
484 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
485 : * otherwise, the caller must build a BufferAccessStrategy with the number of
486 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
487 : * using.
488 : *
489 : * isTopLevel should be passed down from ProcessUtility.
490 : *
491 : * It is the caller's responsibility that all parameters are allocated in a
492 : * memory context that will not disappear at transaction commit.
493 : */
494 : void
495 108046 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
496 : MemoryContext vac_context, bool isTopLevel)
497 : {
498 : static bool in_vacuum = false;
499 :
500 : const char *stmttype;
501 : volatile bool in_outer_xact,
502 : use_own_xacts;
503 :
504 : Assert(params != NULL);
505 :
506 108046 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
507 :
508 : /*
509 : * We cannot run VACUUM inside a user transaction block; if we were inside
510 : * a transaction, then our commit- and start-transaction-command calls
511 : * would not have the intended effect! There are numerous other subtle
512 : * dependencies on this, too.
513 : *
514 : * ANALYZE (without VACUUM) can run either way.
515 : */
516 108046 : if (params->options & VACOPT_VACUUM)
517 : {
518 103314 : PreventInTransactionBlock(isTopLevel, stmttype);
519 103300 : in_outer_xact = false;
520 : }
521 : else
522 4732 : in_outer_xact = IsInTransactionBlock(isTopLevel);
523 :
524 : /*
525 : * Check for and disallow recursive calls. This could happen when VACUUM
526 : * FULL or ANALYZE calls a hostile index expression that itself calls
527 : * ANALYZE.
528 : */
529 108032 : if (in_vacuum)
530 12 : ereport(ERROR,
531 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
532 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
533 : stmttype)));
534 :
535 : /*
536 : * Build list of relation(s) to process, putting any new data in
537 : * vac_context for safekeeping.
538 : */
539 108020 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
540 : {
541 : /* We don't process any tables in this case */
542 : Assert(relations == NIL);
543 : }
544 107908 : else if (relations != NIL)
545 : {
546 107708 : List *newrels = NIL;
547 : ListCell *lc;
548 :
549 215506 : foreach(lc, relations)
550 : {
551 107834 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
552 : List *sublist;
553 : MemoryContext old_context;
554 :
555 107834 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
556 107798 : old_context = MemoryContextSwitchTo(vac_context);
557 107798 : newrels = list_concat(newrels, sublist);
558 107798 : MemoryContextSwitchTo(old_context);
559 : }
560 107672 : relations = newrels;
561 : }
562 : else
563 200 : relations = get_all_vacuum_rels(vac_context, params->options);
564 :
565 : /*
566 : * Decide whether we need to start/commit our own transactions.
567 : *
568 : * For VACUUM (with or without ANALYZE): always do so, so that we can
569 : * release locks as soon as possible. (We could possibly use the outer
570 : * transaction for a one-table VACUUM, but handling TOAST tables would be
571 : * problematic.)
572 : *
573 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
574 : * start/commit our own transactions. Also, there's no need to do so if
575 : * only processing one relation. For multiple relations when not within a
576 : * transaction block, and also in an autovacuum worker, use own
577 : * transactions so we can release locks sooner.
578 : */
579 107984 : if (params->options & VACOPT_VACUUM)
580 103288 : use_own_xacts = true;
581 : else
582 : {
583 : Assert(params->options & VACOPT_ANALYZE);
584 4696 : if (AmAutoVacuumWorkerProcess())
585 142 : use_own_xacts = true;
586 4554 : else if (in_outer_xact)
587 246 : use_own_xacts = false;
588 4308 : else if (list_length(relations) > 1)
589 730 : use_own_xacts = true;
590 : else
591 3578 : use_own_xacts = false;
592 : }
593 :
594 : /*
595 : * vacuum_rel expects to be entered with no transaction active; it will
596 : * start and commit its own transaction. But we are called by an SQL
597 : * command, and so we are executing inside a transaction already. We
598 : * commit the transaction started in PostgresMain() here, and start
599 : * another one before exiting to match the commit waiting for us back in
600 : * PostgresMain().
601 : */
602 107984 : if (use_own_xacts)
603 : {
604 : Assert(!in_outer_xact);
605 :
606 : /* ActiveSnapshot is not set by autovacuum */
607 104160 : if (ActiveSnapshotSet())
608 6826 : PopActiveSnapshot();
609 :
610 : /* matches the StartTransaction in PostgresMain() */
611 104160 : CommitTransactionCommand();
612 : }
613 :
614 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
615 107984 : PG_TRY();
616 : {
617 : ListCell *cur;
618 :
619 107984 : in_vacuum = true;
620 107984 : VacuumFailsafeActive = false;
621 107984 : VacuumUpdateCosts();
622 107984 : VacuumCostBalance = 0;
623 107984 : VacuumCostBalanceLocal = 0;
624 107984 : VacuumSharedCostBalance = NULL;
625 107984 : VacuumActiveNWorkers = NULL;
626 :
627 : /*
628 : * Loop to process each selected relation.
629 : */
630 232064 : foreach(cur, relations)
631 : {
632 124144 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
633 :
634 124144 : if (params->options & VACOPT_VACUUM)
635 : {
636 111364 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
637 100 : continue;
638 : }
639 :
640 124038 : if (params->options & VACOPT_ANALYZE)
641 : {
642 : /*
643 : * If using separate xacts, start one for analyze. Otherwise,
644 : * we can use the outer transaction.
645 : */
646 14318 : if (use_own_xacts)
647 : {
648 9944 : StartTransactionCommand();
649 : /* functions in indexes may want a snapshot set */
650 9944 : PushActiveSnapshot(GetTransactionSnapshot());
651 : }
652 :
653 14318 : analyze_rel(vrel->oid, vrel->relation, params,
654 : vrel->va_cols, in_outer_xact, bstrategy);
655 :
656 14260 : if (use_own_xacts)
657 : {
658 9906 : PopActiveSnapshot();
659 9906 : CommitTransactionCommand();
660 : }
661 : else
662 : {
663 : /*
664 : * If we're not using separate xacts, better separate the
665 : * ANALYZE actions with CCIs. This avoids trouble if user
666 : * says "ANALYZE t, t".
667 : */
668 4354 : CommandCounterIncrement();
669 : }
670 : }
671 :
672 : /*
673 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
674 : * next relation.
675 : */
676 123980 : VacuumFailsafeActive = false;
677 : }
678 : }
679 64 : PG_FINALLY();
680 : {
681 107984 : in_vacuum = false;
682 107984 : VacuumCostActive = false;
683 107984 : VacuumFailsafeActive = false;
684 107984 : VacuumCostBalance = 0;
685 : }
686 107984 : PG_END_TRY();
687 :
688 : /*
689 : * Finish up processing.
690 : */
691 107920 : if (use_own_xacts)
692 : {
693 : /* here, we are not in a transaction */
694 :
695 : /*
696 : * This matches the CommitTransaction waiting for us in
697 : * PostgresMain().
698 : */
699 104116 : StartTransactionCommand();
700 : }
701 :
702 107920 : if ((params->options & VACOPT_VACUUM) &&
703 103256 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
704 : {
705 : /*
706 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
707 : */
708 1668 : vac_update_datfrozenxid();
709 : }
710 :
711 107920 : }
712 :
713 : /*
714 : * Check if the current user has privileges to vacuum or analyze the relation.
715 : * If not, issue a WARNING log message and return false to let the caller
716 : * decide what to do with this relation. This routine is used to decide if a
717 : * relation can be processed for VACUUM or ANALYZE.
718 : */
719 : bool
720 158100 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
721 : bits32 options)
722 : {
723 : char *relname;
724 :
725 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
726 :
727 : /*----------
728 : * A role has privileges to vacuum or analyze the relation if any of the
729 : * following are true:
730 : * - the role owns the current database and the relation is not shared
731 : * - the role has the MAINTAIN privilege on the relation
732 : *----------
733 : */
734 158100 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
735 183236 : !reltuple->relisshared) ||
736 25910 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
737 157760 : return true;
738 :
739 340 : relname = NameStr(reltuple->relname);
740 :
741 340 : if ((options & VACOPT_VACUUM) != 0)
742 : {
743 224 : ereport(WARNING,
744 : (errmsg("permission denied to vacuum \"%s\", skipping it",
745 : relname)));
746 :
747 : /*
748 : * For VACUUM ANALYZE, both logs could show up, but just generate
749 : * information for VACUUM as that would be the first one to be
750 : * processed.
751 : */
752 224 : return false;
753 : }
754 :
755 116 : if ((options & VACOPT_ANALYZE) != 0)
756 116 : ereport(WARNING,
757 : (errmsg("permission denied to analyze \"%s\", skipping it",
758 : relname)));
759 :
760 116 : return false;
761 : }
762 :
763 :
764 : /*
765 : * vacuum_open_relation
766 : *
767 : * This routine is used for attempting to open and lock a relation which
768 : * is going to be vacuumed or analyzed. If the relation cannot be opened
769 : * or locked, a log is emitted if possible.
770 : */
771 : Relation
772 133090 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
773 : bool verbose, LOCKMODE lmode)
774 : {
775 : Relation rel;
776 133090 : bool rel_lock = true;
777 : int elevel;
778 :
779 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
780 :
781 : /*
782 : * Open the relation and get the appropriate lock on it.
783 : *
784 : * There's a race condition here: the relation may have gone away since
785 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
786 : *
787 : * If we've been asked not to wait for the relation lock, acquire it first
788 : * in non-blocking mode, before calling try_relation_open().
789 : */
790 133090 : if (!(options & VACOPT_SKIP_LOCKED))
791 132366 : rel = try_relation_open(relid, lmode);
792 724 : else if (ConditionalLockRelationOid(relid, lmode))
793 704 : rel = try_relation_open(relid, NoLock);
794 : else
795 : {
796 20 : rel = NULL;
797 20 : rel_lock = false;
798 : }
799 :
800 : /* if relation is opened, leave */
801 133090 : if (rel)
802 133058 : return rel;
803 :
804 : /*
805 : * Relation could not be opened, hence generate if possible a log
806 : * informing on the situation.
807 : *
808 : * If the RangeVar is not defined, we do not have enough information to
809 : * provide a meaningful log statement. Chances are that the caller has
810 : * intentionally not provided this information so that this logging is
811 : * skipped, anyway.
812 : */
813 32 : if (relation == NULL)
814 18 : return NULL;
815 :
816 : /*
817 : * Determine the log level.
818 : *
819 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
820 : * statements in the permission checks; otherwise, only log if the caller
821 : * so requested.
822 : */
823 14 : if (!AmAutoVacuumWorkerProcess())
824 14 : elevel = WARNING;
825 0 : else if (verbose)
826 0 : elevel = LOG;
827 : else
828 0 : return NULL;
829 :
830 14 : if ((options & VACOPT_VACUUM) != 0)
831 : {
832 10 : if (!rel_lock)
833 6 : ereport(elevel,
834 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
835 : errmsg("skipping vacuum of \"%s\" --- lock not available",
836 : relation->relname)));
837 : else
838 4 : ereport(elevel,
839 : (errcode(ERRCODE_UNDEFINED_TABLE),
840 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
841 : relation->relname)));
842 :
843 : /*
844 : * For VACUUM ANALYZE, both logs could show up, but just generate
845 : * information for VACUUM as that would be the first one to be
846 : * processed.
847 : */
848 10 : return NULL;
849 : }
850 :
851 4 : if ((options & VACOPT_ANALYZE) != 0)
852 : {
853 4 : if (!rel_lock)
854 2 : ereport(elevel,
855 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
856 : errmsg("skipping analyze of \"%s\" --- lock not available",
857 : relation->relname)));
858 : else
859 2 : ereport(elevel,
860 : (errcode(ERRCODE_UNDEFINED_TABLE),
861 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
862 : relation->relname)));
863 : }
864 :
865 4 : return NULL;
866 : }
867 :
868 :
869 : /*
870 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
871 : * and optionally add VacuumRelations for partitions or inheritance children.
872 : *
873 : * If a VacuumRelation does not have an OID supplied and is a partitioned
874 : * table, an extra entry will be added to the output for each partition.
875 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
876 : * it does not want us to expand partitioned tables.
877 : *
878 : * We take care not to modify the input data structure, but instead build
879 : * new VacuumRelation(s) to return. (But note that they will reference
880 : * unmodified parts of the input, eg column lists.) New data structures
881 : * are made in vac_context.
882 : */
883 : static List *
884 107834 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
885 : int options)
886 : {
887 107834 : List *vacrels = NIL;
888 : MemoryContext oldcontext;
889 :
890 : /* If caller supplied OID, there's nothing we need do here. */
891 107834 : if (OidIsValid(vrel->oid))
892 : {
893 97334 : oldcontext = MemoryContextSwitchTo(vac_context);
894 97334 : vacrels = lappend(vacrels, vrel);
895 97334 : MemoryContextSwitchTo(oldcontext);
896 : }
897 : else
898 : {
899 : /*
900 : * Process a specific relation, and possibly partitions or child
901 : * tables thereof.
902 : */
903 : Oid relid;
904 : HeapTuple tuple;
905 : Form_pg_class classForm;
906 : bool include_children;
907 : bool is_partitioned_table;
908 : int rvr_opts;
909 :
910 : /*
911 : * Since autovacuum workers supply OIDs when calling vacuum(), no
912 : * autovacuum worker should reach this code.
913 : */
914 : Assert(!AmAutoVacuumWorkerProcess());
915 :
916 : /*
917 : * We transiently take AccessShareLock to protect the syscache lookup
918 : * below, as well as find_all_inheritors's expectation that the caller
919 : * holds some lock on the starting relation.
920 : */
921 10500 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
922 10500 : relid = RangeVarGetRelidExtended(vrel->relation,
923 : AccessShareLock,
924 : rvr_opts,
925 : NULL, NULL);
926 :
927 : /*
928 : * If the lock is unavailable, emit the same log statement that
929 : * vacuum_rel() and analyze_rel() would.
930 : */
931 10464 : if (!OidIsValid(relid))
932 : {
933 8 : if (options & VACOPT_VACUUM)
934 6 : ereport(WARNING,
935 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
936 : errmsg("skipping vacuum of \"%s\" --- lock not available",
937 : vrel->relation->relname)));
938 : else
939 2 : ereport(WARNING,
940 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
941 : errmsg("skipping analyze of \"%s\" --- lock not available",
942 : vrel->relation->relname)));
943 8 : return vacrels;
944 : }
945 :
946 : /*
947 : * To check whether the relation is a partitioned table and its
948 : * ownership, fetch its syscache entry.
949 : */
950 10456 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
951 10456 : if (!HeapTupleIsValid(tuple))
952 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
953 10456 : classForm = (Form_pg_class) GETSTRUCT(tuple);
954 :
955 : /*
956 : * Make a returnable VacuumRelation for this rel if the user has the
957 : * required privileges.
958 : */
959 10456 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
960 : {
961 10224 : oldcontext = MemoryContextSwitchTo(vac_context);
962 10224 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
963 : relid,
964 : vrel->va_cols));
965 10224 : MemoryContextSwitchTo(oldcontext);
966 : }
967 :
968 : /*
969 : * Vacuuming a partitioned table with ONLY will not do anything since
970 : * the partitioned table itself is empty. Issue a warning if the user
971 : * requests this.
972 : */
973 10456 : include_children = vrel->relation->inh;
974 10456 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
975 10456 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
976 6 : ereport(WARNING,
977 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
978 : vrel->relation->relname)));
979 :
980 10456 : ReleaseSysCache(tuple);
981 :
982 : /*
983 : * Unless the user has specified ONLY, make relation list entries for
984 : * its partitions or inheritance child tables. Note that the list
985 : * returned by find_all_inheritors() includes the passed-in OID, so we
986 : * have to skip that. There's no point in taking locks on the
987 : * individual partitions or child tables yet, and doing so would just
988 : * add unnecessary deadlock risk. For this last reason, we do not yet
989 : * check the ownership of the partitions/tables, which get added to
990 : * the list to process. Ownership will be checked later on anyway.
991 : */
992 10456 : if (include_children)
993 : {
994 10426 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
995 : ListCell *part_lc;
996 :
997 22900 : foreach(part_lc, part_oids)
998 : {
999 12474 : Oid part_oid = lfirst_oid(part_lc);
1000 :
1001 12474 : if (part_oid == relid)
1002 10426 : continue; /* ignore original table */
1003 :
1004 : /*
1005 : * We omit a RangeVar since it wouldn't be appropriate to
1006 : * complain about failure to open one of these relations
1007 : * later.
1008 : */
1009 2048 : oldcontext = MemoryContextSwitchTo(vac_context);
1010 2048 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1011 : part_oid,
1012 : vrel->va_cols));
1013 2048 : MemoryContextSwitchTo(oldcontext);
1014 : }
1015 : }
1016 :
1017 : /*
1018 : * Release lock again. This means that by the time we actually try to
1019 : * process the table, it might be gone or renamed. In the former case
1020 : * we'll silently ignore it; in the latter case we'll process it
1021 : * anyway, but we must beware that the RangeVar doesn't necessarily
1022 : * identify it anymore. This isn't ideal, perhaps, but there's little
1023 : * practical alternative, since we're typically going to commit this
1024 : * transaction and begin a new one between now and then. Moreover,
1025 : * holding locks on multiple relations would create significant risk
1026 : * of deadlock.
1027 : */
1028 10456 : UnlockRelationOid(relid, AccessShareLock);
1029 : }
1030 :
1031 107790 : return vacrels;
1032 : }
1033 :
1034 : /*
1035 : * Construct a list of VacuumRelations for all vacuumable rels in
1036 : * the current database. The list is built in vac_context.
1037 : */
1038 : static List *
1039 200 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1040 : {
1041 200 : List *vacrels = NIL;
1042 : Relation pgclass;
1043 : TableScanDesc scan;
1044 : HeapTuple tuple;
1045 :
1046 200 : pgclass = table_open(RelationRelationId, AccessShareLock);
1047 :
1048 200 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1049 :
1050 85510 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1051 : {
1052 85310 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1053 : MemoryContext oldcontext;
1054 85310 : Oid relid = classForm->oid;
1055 :
1056 : /*
1057 : * We include partitioned tables here; depending on which operation is
1058 : * to be performed, caller will decide whether to process or ignore
1059 : * them.
1060 : */
1061 85310 : if (classForm->relkind != RELKIND_RELATION &&
1062 70816 : classForm->relkind != RELKIND_MATVIEW &&
1063 70810 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1064 70724 : continue;
1065 :
1066 : /* check permissions of relation */
1067 14586 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1068 0 : continue;
1069 :
1070 : /*
1071 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1072 : * We omit a RangeVar since it wouldn't be appropriate to complain
1073 : * about failure to open one of these relations later.
1074 : */
1075 14586 : oldcontext = MemoryContextSwitchTo(vac_context);
1076 14586 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1077 : relid,
1078 : NIL));
1079 14586 : MemoryContextSwitchTo(oldcontext);
1080 : }
1081 :
1082 200 : table_endscan(scan);
1083 200 : table_close(pgclass, AccessShareLock);
1084 :
1085 200 : return vacrels;
1086 : }
1087 :
1088 : /*
1089 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1090 : *
1091 : * The target relation and VACUUM parameters are our inputs.
1092 : *
1093 : * Output parameters are the cutoffs that VACUUM caller should use.
1094 : *
1095 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1096 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1097 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1098 : * minimum).
1099 : */
1100 : bool
1101 118528 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1102 : struct VacuumCutoffs *cutoffs)
1103 : {
1104 : int freeze_min_age,
1105 : multixact_freeze_min_age,
1106 : freeze_table_age,
1107 : multixact_freeze_table_age,
1108 : effective_multixact_freeze_max_age;
1109 : TransactionId nextXID,
1110 : safeOldestXmin,
1111 : aggressiveXIDCutoff;
1112 : MultiXactId nextMXID,
1113 : safeOldestMxact,
1114 : aggressiveMXIDCutoff;
1115 :
1116 : /* Use mutable copies of freeze age parameters */
1117 118528 : freeze_min_age = params->freeze_min_age;
1118 118528 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1119 118528 : freeze_table_age = params->freeze_table_age;
1120 118528 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1121 :
1122 : /* Set pg_class fields in cutoffs */
1123 118528 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1124 118528 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1125 :
1126 : /*
1127 : * Acquire OldestXmin.
1128 : *
1129 : * We can always ignore processes running lazy vacuum. This is because we
1130 : * use these values only for deciding which tuples we must keep in the
1131 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1132 : * XID assigned), it's safe to ignore it. In theory it could be
1133 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1134 : * that only one vacuum process can be working on a particular table at
1135 : * any time, and that each vacuum is always an independent transaction.
1136 : */
1137 118528 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1138 :
1139 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1140 :
1141 : /* Acquire OldestMxact */
1142 118528 : cutoffs->OldestMxact = GetOldestMultiXactId();
1143 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1144 :
1145 : /* Acquire next XID/next MXID values used to apply age-based settings */
1146 118528 : nextXID = ReadNextTransactionId();
1147 118528 : nextMXID = ReadNextMultiXactId();
1148 :
1149 : /*
1150 : * Also compute the multixact age for which freezing is urgent. This is
1151 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1152 : * short of multixact member space.
1153 : */
1154 118528 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1155 :
1156 : /*
1157 : * Almost ready to set freeze output parameters; check if OldestXmin or
1158 : * OldestMxact are held back to an unsafe degree before we start on that
1159 : */
1160 118528 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1161 118528 : if (!TransactionIdIsNormal(safeOldestXmin))
1162 0 : safeOldestXmin = FirstNormalTransactionId;
1163 118528 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1164 118528 : if (safeOldestMxact < FirstMultiXactId)
1165 0 : safeOldestMxact = FirstMultiXactId;
1166 118528 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1167 60952 : ereport(WARNING,
1168 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1169 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1170 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1171 118528 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1172 0 : ereport(WARNING,
1173 : (errmsg("cutoff for freezing multixacts is far in the past"),
1174 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1175 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1176 :
1177 : /*
1178 : * Determine the minimum freeze age to use: as specified by the caller, or
1179 : * vacuum_freeze_min_age, but in any case not more than half
1180 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1181 : * wraparound won't occur too frequently.
1182 : */
1183 118528 : if (freeze_min_age < 0)
1184 8940 : freeze_min_age = vacuum_freeze_min_age;
1185 118528 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1186 : Assert(freeze_min_age >= 0);
1187 :
1188 : /* Compute FreezeLimit, being careful to generate a normal XID */
1189 118528 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1190 118528 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1191 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1192 : /* FreezeLimit must always be <= OldestXmin */
1193 118528 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1194 85998 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1195 :
1196 : /*
1197 : * Determine the minimum multixact freeze age to use: as specified by
1198 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1199 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1200 : * prevent MultiXact wraparound won't occur too frequently.
1201 : */
1202 118528 : if (multixact_freeze_min_age < 0)
1203 8940 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1204 118528 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1205 : effective_multixact_freeze_max_age / 2);
1206 : Assert(multixact_freeze_min_age >= 0);
1207 :
1208 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1209 118528 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1210 118528 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1211 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1212 : /* MultiXactCutoff must always be <= OldestMxact */
1213 118528 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1214 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1215 :
1216 : /*
1217 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1218 : *
1219 : * Determine the table freeze age to use: as specified by the caller, or
1220 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1221 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1222 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1223 : * anti-wraparound autovacuum is launched.
1224 : */
1225 118528 : if (freeze_table_age < 0)
1226 8940 : freeze_table_age = vacuum_freeze_table_age;
1227 118528 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1228 : Assert(freeze_table_age >= 0);
1229 118528 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1230 118528 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1231 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1232 118528 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1233 : aggressiveXIDCutoff))
1234 109756 : return true;
1235 :
1236 : /*
1237 : * Similar to the above, determine the table freeze age to use for
1238 : * multixacts: as specified by the caller, or the value of the
1239 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1240 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1241 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1242 : * multixacts before anti-wraparound autovacuum is launched.
1243 : */
1244 8772 : if (multixact_freeze_table_age < 0)
1245 8720 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1246 8772 : multixact_freeze_table_age =
1247 8772 : Min(multixact_freeze_table_age,
1248 : effective_multixact_freeze_max_age * 0.95);
1249 : Assert(multixact_freeze_table_age >= 0);
1250 8772 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1251 8772 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1252 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1253 8772 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1254 : aggressiveMXIDCutoff))
1255 0 : return true;
1256 :
1257 : /* Non-aggressive VACUUM */
1258 8772 : return false;
1259 : }
1260 :
1261 : /*
1262 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1263 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1264 : * dangerously far in the past.
1265 : *
1266 : * When we return true, VACUUM caller triggers the failsafe.
1267 : */
1268 : bool
1269 120916 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1270 : {
1271 120916 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1272 120916 : MultiXactId relminmxid = cutoffs->relminmxid;
1273 : TransactionId xid_skip_limit;
1274 : MultiXactId multi_skip_limit;
1275 : int skip_index_vacuum;
1276 :
1277 : Assert(TransactionIdIsNormal(relfrozenxid));
1278 : Assert(MultiXactIdIsValid(relminmxid));
1279 :
1280 : /*
1281 : * Determine the index skipping age to use. In any case no less than
1282 : * autovacuum_freeze_max_age * 1.05.
1283 : */
1284 120916 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1285 :
1286 120916 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1287 120916 : if (!TransactionIdIsNormal(xid_skip_limit))
1288 0 : xid_skip_limit = FirstNormalTransactionId;
1289 :
1290 120916 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1291 : {
1292 : /* The table's relfrozenxid is too old */
1293 18678 : return true;
1294 : }
1295 :
1296 : /*
1297 : * Similar to above, determine the index skipping age to use for
1298 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1299 : * 1.05.
1300 : */
1301 102238 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1302 : autovacuum_multixact_freeze_max_age * 1.05);
1303 :
1304 102238 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1305 102238 : if (multi_skip_limit < FirstMultiXactId)
1306 0 : multi_skip_limit = FirstMultiXactId;
1307 :
1308 102238 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1309 : {
1310 : /* The table's relminmxid is too old */
1311 0 : return true;
1312 : }
1313 :
1314 102238 : return false;
1315 : }
1316 :
1317 : /*
1318 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1319 : *
1320 : * If we scanned the whole relation then we should just use the count of
1321 : * live tuples seen; but if we did not, we should not blindly extrapolate
1322 : * from that number, since VACUUM may have scanned a quite nonrandom
1323 : * subset of the table. When we have only partial information, we take
1324 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1325 : * of the tuple density in the unscanned pages.
1326 : *
1327 : * Note: scanned_tuples should count only *live* tuples, since
1328 : * pg_class.reltuples is defined that way.
1329 : */
1330 : double
1331 117980 : vac_estimate_reltuples(Relation relation,
1332 : BlockNumber total_pages,
1333 : BlockNumber scanned_pages,
1334 : double scanned_tuples)
1335 : {
1336 117980 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1337 117980 : double old_rel_tuples = relation->rd_rel->reltuples;
1338 : double old_density;
1339 : double unscanned_pages;
1340 : double total_tuples;
1341 :
1342 : /* If we did scan the whole table, just use the count as-is */
1343 117980 : if (scanned_pages >= total_pages)
1344 113930 : return scanned_tuples;
1345 :
1346 : /*
1347 : * When successive VACUUM commands scan the same few pages again and
1348 : * again, without anything from the table really changing, there is a risk
1349 : * that our beliefs about tuple density will gradually become distorted.
1350 : * This might be caused by vacuumlazy.c implementation details, such as
1351 : * its tendency to always scan the last heap page. Handle that here.
1352 : *
1353 : * If the relation is _exactly_ the same size according to the existing
1354 : * pg_class entry, and only a few of its pages (less than 2%) were
1355 : * scanned, keep the existing value of reltuples. Also keep the existing
1356 : * value when only a subset of rel's pages <= a single page were scanned.
1357 : *
1358 : * (Note: we might be returning -1 here.)
1359 : */
1360 4050 : if (old_rel_pages == total_pages &&
1361 4022 : scanned_pages < (double) total_pages * 0.02)
1362 2828 : return old_rel_tuples;
1363 1222 : if (scanned_pages <= 1)
1364 976 : return old_rel_tuples;
1365 :
1366 : /*
1367 : * If old density is unknown, we can't do much except scale up
1368 : * scanned_tuples to match total_pages.
1369 : */
1370 246 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1371 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1372 :
1373 : /*
1374 : * Okay, we've covered the corner cases. The normal calculation is to
1375 : * convert the old measurement to a density (tuples per page), then
1376 : * estimate the number of tuples in the unscanned pages using that figure,
1377 : * and finally add on the number of tuples in the scanned pages.
1378 : */
1379 244 : old_density = old_rel_tuples / old_rel_pages;
1380 244 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1381 244 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1382 244 : return floor(total_tuples + 0.5);
1383 : }
1384 :
1385 :
1386 : /*
1387 : * vac_update_relstats() -- update statistics for one relation
1388 : *
1389 : * Update the whole-relation statistics that are kept in its pg_class
1390 : * row. There are additional stats that will be updated if we are
1391 : * doing ANALYZE, but we always update these stats. This routine works
1392 : * for both index and heap relation entries in pg_class.
1393 : *
1394 : * We violate transaction semantics here by overwriting the rel's
1395 : * existing pg_class tuple with the new values. This is reasonably
1396 : * safe as long as we're sure that the new values are correct whether or
1397 : * not this transaction commits. The reason for doing this is that if
1398 : * we updated these tuples in the usual way, vacuuming pg_class itself
1399 : * wouldn't work very well --- by the time we got done with a vacuum
1400 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1401 : * course, this only works for fixed-size not-null columns, but these are.
1402 : *
1403 : * Another reason for doing it this way is that when we are in a lazy
1404 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1405 : * Somebody vacuuming pg_class might think they could delete a tuple
1406 : * marked with xmin = our xid.
1407 : *
1408 : * In addition to fundamentally nontransactional statistics such as
1409 : * relpages and relallvisible, we try to maintain certain lazily-updated
1410 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1411 : * It's safe to do this in VACUUM, which can't run in parallel with
1412 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1413 : * However, it's *not* safe to do it in an ANALYZE that's within an
1414 : * outer transaction, because for example the current transaction might
1415 : * have dropped the last index; then we'd think relhasindex should be
1416 : * cleared, but if the transaction later rolls back this would be wrong.
1417 : * So we refrain from updating the DDL flags if we're inside an outer
1418 : * transaction. This is OK since postponing the flag maintenance is
1419 : * always allowable.
1420 : *
1421 : * Note: num_tuples should count only *live* tuples, since
1422 : * pg_class.reltuples is defined that way.
1423 : *
1424 : * This routine is shared by VACUUM and ANALYZE.
1425 : */
1426 : void
1427 153684 : vac_update_relstats(Relation relation,
1428 : BlockNumber num_pages, double num_tuples,
1429 : BlockNumber num_all_visible_pages,
1430 : bool hasindex, TransactionId frozenxid,
1431 : MultiXactId minmulti,
1432 : bool *frozenxid_updated, bool *minmulti_updated,
1433 : bool in_outer_xact)
1434 : {
1435 153684 : Oid relid = RelationGetRelid(relation);
1436 : Relation rd;
1437 : ScanKeyData key[1];
1438 : HeapTuple ctup;
1439 : void *inplace_state;
1440 : Form_pg_class pgcform;
1441 : bool dirty,
1442 : futurexid,
1443 : futuremxid;
1444 : TransactionId oldfrozenxid;
1445 : MultiXactId oldminmulti;
1446 :
1447 153684 : rd = table_open(RelationRelationId, RowExclusiveLock);
1448 :
1449 : /* Fetch a copy of the tuple to scribble on */
1450 153684 : ScanKeyInit(&key[0],
1451 : Anum_pg_class_oid,
1452 : BTEqualStrategyNumber, F_OIDEQ,
1453 : ObjectIdGetDatum(relid));
1454 153684 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1455 : NULL, 1, key, &ctup, &inplace_state);
1456 153684 : if (!HeapTupleIsValid(ctup))
1457 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1458 : relid);
1459 153684 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1460 :
1461 : /* Apply statistical updates, if any, to copied tuple */
1462 :
1463 153684 : dirty = false;
1464 153684 : if (pgcform->relpages != (int32) num_pages)
1465 : {
1466 8406 : pgcform->relpages = (int32) num_pages;
1467 8406 : dirty = true;
1468 : }
1469 153684 : if (pgcform->reltuples != (float4) num_tuples)
1470 : {
1471 18044 : pgcform->reltuples = (float4) num_tuples;
1472 18044 : dirty = true;
1473 : }
1474 153684 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1475 : {
1476 5104 : pgcform->relallvisible = (int32) num_all_visible_pages;
1477 5104 : dirty = true;
1478 : }
1479 :
1480 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1481 :
1482 153684 : if (!in_outer_xact)
1483 : {
1484 : /*
1485 : * If we didn't find any indexes, reset relhasindex.
1486 : */
1487 151770 : if (pgcform->relhasindex && !hasindex)
1488 : {
1489 18 : pgcform->relhasindex = false;
1490 18 : dirty = true;
1491 : }
1492 :
1493 : /* We also clear relhasrules and relhastriggers if needed */
1494 151770 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1495 : {
1496 0 : pgcform->relhasrules = false;
1497 0 : dirty = true;
1498 : }
1499 151770 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1500 : {
1501 6 : pgcform->relhastriggers = false;
1502 6 : dirty = true;
1503 : }
1504 : }
1505 :
1506 : /*
1507 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1508 : * indicating it has no new data.
1509 : *
1510 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1511 : * stored relfrozenxid is "in the future" then it seems best to assume
1512 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1513 : * This should match vac_update_datfrozenxid() concerning what we consider
1514 : * to be "in the future".
1515 : */
1516 153684 : oldfrozenxid = pgcform->relfrozenxid;
1517 153684 : futurexid = false;
1518 153684 : if (frozenxid_updated)
1519 117976 : *frozenxid_updated = false;
1520 153684 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1521 : {
1522 55202 : bool update = false;
1523 :
1524 55202 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1525 55114 : update = true;
1526 88 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1527 0 : futurexid = update = true;
1528 :
1529 55202 : if (update)
1530 : {
1531 55114 : pgcform->relfrozenxid = frozenxid;
1532 55114 : dirty = true;
1533 55114 : if (frozenxid_updated)
1534 55114 : *frozenxid_updated = true;
1535 : }
1536 : }
1537 :
1538 : /* Similarly for relminmxid */
1539 153684 : oldminmulti = pgcform->relminmxid;
1540 153684 : futuremxid = false;
1541 153684 : if (minmulti_updated)
1542 117976 : *minmulti_updated = false;
1543 153684 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1544 : {
1545 274 : bool update = false;
1546 :
1547 274 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1548 274 : update = true;
1549 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1550 0 : futuremxid = update = true;
1551 :
1552 274 : if (update)
1553 : {
1554 274 : pgcform->relminmxid = minmulti;
1555 274 : dirty = true;
1556 274 : if (minmulti_updated)
1557 274 : *minmulti_updated = true;
1558 : }
1559 : }
1560 :
1561 : /* If anything changed, write out the tuple. */
1562 153684 : if (dirty)
1563 67806 : systable_inplace_update_finish(inplace_state, ctup);
1564 : else
1565 85878 : systable_inplace_update_cancel(inplace_state);
1566 :
1567 153684 : table_close(rd, RowExclusiveLock);
1568 :
1569 153684 : if (futurexid)
1570 0 : ereport(WARNING,
1571 : (errcode(ERRCODE_DATA_CORRUPTED),
1572 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1573 : oldfrozenxid, frozenxid,
1574 : RelationGetRelationName(relation))));
1575 153684 : if (futuremxid)
1576 0 : ereport(WARNING,
1577 : (errcode(ERRCODE_DATA_CORRUPTED),
1578 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1579 : oldminmulti, minmulti,
1580 : RelationGetRelationName(relation))));
1581 153684 : }
1582 :
1583 :
1584 : /*
1585 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1586 : *
1587 : * Update pg_database's datfrozenxid entry for our database to be the
1588 : * minimum of the pg_class.relfrozenxid values.
1589 : *
1590 : * Similarly, update our datminmxid to be the minimum of the
1591 : * pg_class.relminmxid values.
1592 : *
1593 : * If we are able to advance either pg_database value, also try to
1594 : * truncate pg_xact and pg_multixact.
1595 : *
1596 : * We violate transaction semantics here by overwriting the database's
1597 : * existing pg_database tuple with the new values. This is reasonably
1598 : * safe since the new values are correct whether or not this transaction
1599 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1600 : * behind after a VACUUM.
1601 : */
1602 : void
1603 4028 : vac_update_datfrozenxid(void)
1604 : {
1605 : HeapTuple tuple;
1606 : Form_pg_database dbform;
1607 : Relation relation;
1608 : SysScanDesc scan;
1609 : HeapTuple classTup;
1610 : TransactionId newFrozenXid;
1611 : MultiXactId newMinMulti;
1612 : TransactionId lastSaneFrozenXid;
1613 : MultiXactId lastSaneMinMulti;
1614 4028 : bool bogus = false;
1615 4028 : bool dirty = false;
1616 : ScanKeyData key[1];
1617 : void *inplace_state;
1618 :
1619 : /*
1620 : * Restrict this task to one backend per database. This avoids race
1621 : * conditions that would move datfrozenxid or datminmxid backward. It
1622 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1623 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1624 : */
1625 4028 : LockDatabaseFrozenIds(ExclusiveLock);
1626 :
1627 : /*
1628 : * Initialize the "min" calculation with
1629 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1630 : * approximation to the minimum relfrozenxid for not-yet-committed
1631 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1632 : * cannot produce a wrong minimum by starting with this.
1633 : */
1634 4028 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1635 :
1636 : /*
1637 : * Similarly, initialize the MultiXact "min" with the value that would be
1638 : * used on pg_class for new tables. See AddNewRelationTuple().
1639 : */
1640 4028 : newMinMulti = GetOldestMultiXactId();
1641 :
1642 : /*
1643 : * Identify the latest relfrozenxid and relminmxid values that we could
1644 : * validly see during the scan. These are conservative values, but it's
1645 : * not really worth trying to be more exact.
1646 : */
1647 4028 : lastSaneFrozenXid = ReadNextTransactionId();
1648 4028 : lastSaneMinMulti = ReadNextMultiXactId();
1649 :
1650 : /*
1651 : * We must seqscan pg_class to find the minimum Xid, because there is no
1652 : * index that can help us here.
1653 : *
1654 : * See vac_truncate_clog() for the race condition to prevent.
1655 : */
1656 4028 : relation = table_open(RelationRelationId, AccessShareLock);
1657 :
1658 4028 : scan = systable_beginscan(relation, InvalidOid, false,
1659 : NULL, 0, NULL);
1660 :
1661 2073422 : while ((classTup = systable_getnext(scan)) != NULL)
1662 : {
1663 2069394 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1664 2069394 : TransactionId relfrozenxid = classForm->relfrozenxid;
1665 2069394 : TransactionId relminmxid = classForm->relminmxid;
1666 :
1667 : /*
1668 : * Only consider relations able to hold unfrozen XIDs (anything else
1669 : * should have InvalidTransactionId in relfrozenxid anyway).
1670 : */
1671 2069394 : if (classForm->relkind != RELKIND_RELATION &&
1672 1639678 : classForm->relkind != RELKIND_MATVIEW &&
1673 1637530 : classForm->relkind != RELKIND_TOASTVALUE)
1674 : {
1675 : Assert(!TransactionIdIsValid(relfrozenxid));
1676 : Assert(!MultiXactIdIsValid(relminmxid));
1677 1411210 : continue;
1678 : }
1679 :
1680 : /*
1681 : * Some table AMs might not need per-relation xid / multixid horizons.
1682 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1683 : * to not be set (i.e. set to their respective Invalid*Id)
1684 : * independently. Thus validate and compute horizon for each only if
1685 : * set.
1686 : *
1687 : * If things are working properly, no relation should have a
1688 : * relfrozenxid or relminmxid that is "in the future". However, such
1689 : * cases have been known to arise due to bugs in pg_upgrade. If we
1690 : * see any entries that are "in the future", chicken out and don't do
1691 : * anything. This ensures we won't truncate clog & multixact SLRUs
1692 : * before those relations have been scanned and cleaned up.
1693 : */
1694 :
1695 658184 : if (TransactionIdIsValid(relfrozenxid))
1696 : {
1697 : Assert(TransactionIdIsNormal(relfrozenxid));
1698 :
1699 : /* check for values in the future */
1700 658184 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1701 : {
1702 0 : bogus = true;
1703 0 : break;
1704 : }
1705 :
1706 : /* determine new horizon */
1707 658184 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1708 4146 : newFrozenXid = relfrozenxid;
1709 : }
1710 :
1711 658184 : if (MultiXactIdIsValid(relminmxid))
1712 : {
1713 : /* check for values in the future */
1714 658184 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1715 : {
1716 0 : bogus = true;
1717 0 : break;
1718 : }
1719 :
1720 : /* determine new horizon */
1721 658184 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1722 200 : newMinMulti = relminmxid;
1723 : }
1724 : }
1725 :
1726 : /* we're done with pg_class */
1727 4028 : systable_endscan(scan);
1728 4028 : table_close(relation, AccessShareLock);
1729 :
1730 : /* chicken out if bogus data found */
1731 4028 : if (bogus)
1732 0 : return;
1733 :
1734 : Assert(TransactionIdIsNormal(newFrozenXid));
1735 : Assert(MultiXactIdIsValid(newMinMulti));
1736 :
1737 : /* Now fetch the pg_database tuple we need to update. */
1738 4028 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1739 :
1740 : /*
1741 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1742 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1743 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1744 : * For now, don't optimize that.
1745 : */
1746 4028 : ScanKeyInit(&key[0],
1747 : Anum_pg_database_oid,
1748 : BTEqualStrategyNumber, F_OIDEQ,
1749 : ObjectIdGetDatum(MyDatabaseId));
1750 :
1751 4028 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1752 : NULL, 1, key, &tuple, &inplace_state);
1753 :
1754 4028 : if (!HeapTupleIsValid(tuple))
1755 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1756 :
1757 4028 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1758 :
1759 : /*
1760 : * As in vac_update_relstats(), we ordinarily don't want to let
1761 : * datfrozenxid go backward; but if it's "in the future" then it must be
1762 : * corrupt and it seems best to overwrite it.
1763 : */
1764 4580 : if (dbform->datfrozenxid != newFrozenXid &&
1765 552 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1766 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1767 : {
1768 552 : dbform->datfrozenxid = newFrozenXid;
1769 552 : dirty = true;
1770 : }
1771 : else
1772 3476 : newFrozenXid = dbform->datfrozenxid;
1773 :
1774 : /* Ditto for datminmxid */
1775 4030 : if (dbform->datminmxid != newMinMulti &&
1776 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1777 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1778 : {
1779 2 : dbform->datminmxid = newMinMulti;
1780 2 : dirty = true;
1781 : }
1782 : else
1783 4026 : newMinMulti = dbform->datminmxid;
1784 :
1785 4028 : if (dirty)
1786 552 : systable_inplace_update_finish(inplace_state, tuple);
1787 : else
1788 3476 : systable_inplace_update_cancel(inplace_state);
1789 :
1790 4028 : heap_freetuple(tuple);
1791 4028 : table_close(relation, RowExclusiveLock);
1792 :
1793 : /*
1794 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1795 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1796 : * XID-wrap-limit info is stale, since this action will update that too.
1797 : */
1798 4028 : if (dirty || ForceTransactionIdLimitUpdate())
1799 1126 : vac_truncate_clog(newFrozenXid, newMinMulti,
1800 : lastSaneFrozenXid, lastSaneMinMulti);
1801 : }
1802 :
1803 :
1804 : /*
1805 : * vac_truncate_clog() -- attempt to truncate the commit log
1806 : *
1807 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1808 : * and use it to truncate the transaction commit log (pg_xact).
1809 : * Also update the XID wrap limit info maintained by varsup.c.
1810 : * Likewise for datminmxid.
1811 : *
1812 : * The passed frozenXID and minMulti are the updated values for my own
1813 : * pg_database entry. They're used to initialize the "min" calculations.
1814 : * The caller also passes the "last sane" XID and MXID, since it has
1815 : * those at hand already.
1816 : *
1817 : * This routine is only invoked when we've managed to change our
1818 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1819 : * XID-wrap-limit info is stale.
1820 : */
1821 : static void
1822 1126 : vac_truncate_clog(TransactionId frozenXID,
1823 : MultiXactId minMulti,
1824 : TransactionId lastSaneFrozenXid,
1825 : MultiXactId lastSaneMinMulti)
1826 : {
1827 1126 : TransactionId nextXID = ReadNextTransactionId();
1828 : Relation relation;
1829 : TableScanDesc scan;
1830 : HeapTuple tuple;
1831 : Oid oldestxid_datoid;
1832 : Oid minmulti_datoid;
1833 1126 : bool bogus = false;
1834 1126 : bool frozenAlreadyWrapped = false;
1835 :
1836 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1837 1126 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1838 :
1839 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1840 1126 : oldestxid_datoid = MyDatabaseId;
1841 1126 : minmulti_datoid = MyDatabaseId;
1842 :
1843 : /*
1844 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1845 : *
1846 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1847 : * the values could change while we look at them. Fetch each one just
1848 : * once to ensure sane behavior of the comparison logic. (Here, as in
1849 : * many other places, we assume that fetching or updating an XID in shared
1850 : * storage is atomic.)
1851 : *
1852 : * Note: we need not worry about a race condition with new entries being
1853 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1854 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1855 : * of the interlock against copying a DB containing an active backend.
1856 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1857 : * concurrently modify the datfrozenxid's of different databases, the
1858 : * worst possible outcome is that pg_xact is not truncated as aggressively
1859 : * as it could be.
1860 : */
1861 1126 : relation = table_open(DatabaseRelationId, AccessShareLock);
1862 :
1863 1126 : scan = table_beginscan_catalog(relation, 0, NULL);
1864 :
1865 4350 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1866 : {
1867 3224 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1868 3224 : TransactionId datfrozenxid = dbform->datfrozenxid;
1869 3224 : TransactionId datminmxid = dbform->datminmxid;
1870 :
1871 : Assert(TransactionIdIsNormal(datfrozenxid));
1872 : Assert(MultiXactIdIsValid(datminmxid));
1873 :
1874 : /*
1875 : * If database is in the process of getting dropped, or has been
1876 : * interrupted while doing so, no connections to it are possible
1877 : * anymore. Therefore we don't need to take it into account here.
1878 : * Which is good, because it can't be processed by autovacuum either.
1879 : */
1880 3224 : if (database_is_invalid_form((Form_pg_database) dbform))
1881 : {
1882 2 : elog(DEBUG2,
1883 : "skipping invalid database \"%s\" while computing relfrozenxid",
1884 : NameStr(dbform->datname));
1885 2 : continue;
1886 : }
1887 :
1888 : /*
1889 : * If things are working properly, no database should have a
1890 : * datfrozenxid or datminmxid that is "in the future". However, such
1891 : * cases have been known to arise due to bugs in pg_upgrade. If we
1892 : * see any entries that are "in the future", chicken out and don't do
1893 : * anything. This ensures we won't truncate clog before those
1894 : * databases have been scanned and cleaned up. (We will issue the
1895 : * "already wrapped" warning if appropriate, though.)
1896 : */
1897 6444 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1898 3222 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1899 0 : bogus = true;
1900 :
1901 3222 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1902 0 : frozenAlreadyWrapped = true;
1903 3222 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1904 : {
1905 450 : frozenXID = datfrozenxid;
1906 450 : oldestxid_datoid = dbform->oid;
1907 : }
1908 :
1909 3222 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1910 : {
1911 4 : minMulti = datminmxid;
1912 4 : minmulti_datoid = dbform->oid;
1913 : }
1914 : }
1915 :
1916 1126 : table_endscan(scan);
1917 :
1918 1126 : table_close(relation, AccessShareLock);
1919 :
1920 : /*
1921 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1922 : * the computed minimum XID might be bogus. This case should now be
1923 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1924 : * test anyway.
1925 : */
1926 1126 : if (frozenAlreadyWrapped)
1927 : {
1928 0 : ereport(WARNING,
1929 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1930 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1931 0 : LWLockRelease(WrapLimitsVacuumLock);
1932 0 : return;
1933 : }
1934 :
1935 : /* chicken out if data is bogus in any other way */
1936 1126 : if (bogus)
1937 : {
1938 0 : LWLockRelease(WrapLimitsVacuumLock);
1939 0 : return;
1940 : }
1941 :
1942 : /*
1943 : * Advance the oldest value for commit timestamps before truncating, so
1944 : * that if a user requests a timestamp for a transaction we're truncating
1945 : * away right after this point, they get NULL instead of an ugly "file not
1946 : * found" error from slru.c. This doesn't matter for xact/multixact
1947 : * because they are not subject to arbitrary lookups from users.
1948 : */
1949 1126 : AdvanceOldestCommitTsXid(frozenXID);
1950 :
1951 : /*
1952 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1953 : */
1954 1126 : TruncateCLOG(frozenXID, oldestxid_datoid);
1955 1126 : TruncateCommitTs(frozenXID);
1956 1126 : TruncateMultiXact(minMulti, minmulti_datoid);
1957 :
1958 : /*
1959 : * Update the wrap limit for GetNewTransactionId and creation of new
1960 : * MultiXactIds. Note: these functions will also signal the postmaster
1961 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1962 : * signaling twice?
1963 : */
1964 1126 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1965 1126 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1966 :
1967 1126 : LWLockRelease(WrapLimitsVacuumLock);
1968 : }
1969 :
1970 :
1971 : /*
1972 : * vacuum_rel() -- vacuum one heap relation
1973 : *
1974 : * relid identifies the relation to vacuum. If relation is supplied,
1975 : * use the name therein for reporting any failure to open/lock the rel;
1976 : * do not use it once we've successfully opened the rel, since it might
1977 : * be stale.
1978 : *
1979 : * Returns true if it's okay to proceed with a requested ANALYZE
1980 : * operation on this table.
1981 : *
1982 : * Doing one heap at a time incurs extra overhead, since we need to
1983 : * check that the heap exists again just before we vacuum it. The
1984 : * reason that we do this is so that vacuuming can be spread across
1985 : * many small transactions. Otherwise, two-phase locking would require
1986 : * us to lock the entire database during one pass of the vacuum cleaner.
1987 : *
1988 : * At entry and exit, we are not inside a transaction.
1989 : */
1990 : static bool
1991 118772 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
1992 : BufferAccessStrategy bstrategy)
1993 : {
1994 : LOCKMODE lmode;
1995 : Relation rel;
1996 : LockRelId lockrelid;
1997 : Oid priv_relid;
1998 : Oid toast_relid;
1999 : Oid save_userid;
2000 : int save_sec_context;
2001 : int save_nestlevel;
2002 :
2003 : Assert(params != NULL);
2004 :
2005 : /* Begin a transaction for vacuuming this relation */
2006 118772 : StartTransactionCommand();
2007 :
2008 118772 : if (!(params->options & VACOPT_FULL))
2009 : {
2010 : /*
2011 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2012 : * other concurrent VACUUMs know that they can ignore this one while
2013 : * determining their OldestXmin. (The reason we don't set it during a
2014 : * full VACUUM is exactly that we may have to run user-defined
2015 : * functions for functional indexes, and we want to make sure that if
2016 : * they use the snapshot set above, any tuples it requires can't get
2017 : * removed from other tables. An index function that depends on the
2018 : * contents of other tables is arguably broken, but we won't break it
2019 : * here by violating transaction semantics.)
2020 : *
2021 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2022 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2023 : * in an emergency.
2024 : *
2025 : * Note: these flags remain set until CommitTransaction or
2026 : * AbortTransaction. We don't want to clear them until we reset
2027 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2028 : * might appear to go backwards, which is probably Not Good. (We also
2029 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2030 : * xmin doesn't become visible ahead of setting the flag.)
2031 : */
2032 118374 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2033 118374 : MyProc->statusFlags |= PROC_IN_VACUUM;
2034 118374 : if (params->is_wraparound)
2035 97126 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2036 118374 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2037 118374 : LWLockRelease(ProcArrayLock);
2038 : }
2039 :
2040 : /*
2041 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2042 : * cutoff xids in local memory wrapping around, and to have updated xmin
2043 : * horizons.
2044 : */
2045 118772 : PushActiveSnapshot(GetTransactionSnapshot());
2046 :
2047 : /*
2048 : * Check for user-requested abort. Note we want this to be inside a
2049 : * transaction, so xact.c doesn't issue useless WARNING.
2050 : */
2051 118772 : CHECK_FOR_INTERRUPTS();
2052 :
2053 : /*
2054 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2055 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2056 : * way, we can be sure that no other backend is vacuuming the same table.
2057 : */
2058 237544 : lmode = (params->options & VACOPT_FULL) ?
2059 118772 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2060 :
2061 : /* open the relation and get the appropriate lock on it */
2062 118772 : rel = vacuum_open_relation(relid, relation, params->options,
2063 118772 : params->log_min_duration >= 0, lmode);
2064 :
2065 : /* leave if relation could not be opened or locked */
2066 118772 : if (!rel)
2067 : {
2068 24 : PopActiveSnapshot();
2069 24 : CommitTransactionCommand();
2070 24 : return false;
2071 : }
2072 :
2073 : /*
2074 : * When recursing to a TOAST table, check privileges on the parent. NB:
2075 : * This is only safe to do because we hold a session lock on the main
2076 : * relation that prevents concurrent deletion.
2077 : */
2078 118748 : if (OidIsValid(params->toast_parent))
2079 7408 : priv_relid = params->toast_parent;
2080 : else
2081 111340 : priv_relid = RelationGetRelid(rel);
2082 :
2083 : /*
2084 : * Check if relation needs to be skipped based on privileges. This check
2085 : * happens also when building the relation list to vacuum for a manual
2086 : * operation, and needs to be done additionally here as VACUUM could
2087 : * happen across multiple transactions where privileges could have changed
2088 : * in-between. Make sure to only generate logs for VACUUM in this case.
2089 : */
2090 118748 : if (!vacuum_is_permitted_for_relation(priv_relid,
2091 : rel->rd_rel,
2092 118748 : params->options & ~VACOPT_ANALYZE))
2093 : {
2094 72 : relation_close(rel, lmode);
2095 72 : PopActiveSnapshot();
2096 72 : CommitTransactionCommand();
2097 72 : return false;
2098 : }
2099 :
2100 : /*
2101 : * Check that it's of a vacuumable relkind.
2102 : */
2103 118676 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2104 43862 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2105 43854 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2106 180 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2107 : {
2108 2 : ereport(WARNING,
2109 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2110 : RelationGetRelationName(rel))));
2111 2 : relation_close(rel, lmode);
2112 2 : PopActiveSnapshot();
2113 2 : CommitTransactionCommand();
2114 2 : return false;
2115 : }
2116 :
2117 : /*
2118 : * Silently ignore tables that are temp tables of other backends ---
2119 : * trying to vacuum these will lead to great unhappiness, since their
2120 : * contents are probably not up-to-date on disk. (We don't throw a
2121 : * warning here; it would just lead to chatter during a database-wide
2122 : * VACUUM.)
2123 : */
2124 118674 : if (RELATION_IS_OTHER_TEMP(rel))
2125 : {
2126 2 : relation_close(rel, lmode);
2127 2 : PopActiveSnapshot();
2128 2 : CommitTransactionCommand();
2129 2 : return false;
2130 : }
2131 :
2132 : /*
2133 : * Silently ignore partitioned tables as there is no work to be done. The
2134 : * useful work is on their child partitions, which have been queued up for
2135 : * us separately.
2136 : */
2137 118672 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2138 : {
2139 178 : relation_close(rel, lmode);
2140 178 : PopActiveSnapshot();
2141 178 : CommitTransactionCommand();
2142 : /* It's OK to proceed with ANALYZE on this table */
2143 178 : return true;
2144 : }
2145 :
2146 : /*
2147 : * Get a session-level lock too. This will protect our access to the
2148 : * relation across multiple transactions, so that we can vacuum the
2149 : * relation's TOAST table (if any) secure in the knowledge that no one is
2150 : * deleting the parent relation.
2151 : *
2152 : * NOTE: this cannot block, even if someone else is waiting for access,
2153 : * because the lock manager knows that both lock requests are from the
2154 : * same process.
2155 : */
2156 118494 : lockrelid = rel->rd_lockInfo.lockRelId;
2157 118494 : LockRelationIdForSession(&lockrelid, lmode);
2158 :
2159 : /*
2160 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2161 : * specified in VACUUM command, or when running in an autovacuum worker
2162 : */
2163 118494 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2164 : {
2165 : StdRdOptIndexCleanup vacuum_index_cleanup;
2166 :
2167 102836 : if (rel->rd_options == NULL)
2168 101308 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2169 : else
2170 1528 : vacuum_index_cleanup =
2171 1528 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2172 :
2173 102836 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2174 102812 : params->index_cleanup = VACOPTVALUE_AUTO;
2175 24 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2176 12 : params->index_cleanup = VACOPTVALUE_ENABLED;
2177 : else
2178 : {
2179 : Assert(vacuum_index_cleanup ==
2180 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2181 12 : params->index_cleanup = VACOPTVALUE_DISABLED;
2182 : }
2183 : }
2184 :
2185 : /*
2186 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2187 : * parameter was specified. This overrides the GUC value.
2188 : */
2189 118494 : if (rel->rd_options != NULL &&
2190 1572 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2191 0 : params->max_eager_freeze_failure_rate =
2192 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2193 :
2194 : /*
2195 : * Set truncate option based on truncate reloption if it wasn't specified
2196 : * in VACUUM command, or when running in an autovacuum worker
2197 : */
2198 118494 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2199 : {
2200 102862 : if (rel->rd_options == NULL ||
2201 1528 : ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
2202 102856 : params->truncate = VACOPTVALUE_ENABLED;
2203 : else
2204 6 : params->truncate = VACOPTVALUE_DISABLED;
2205 : }
2206 :
2207 : /*
2208 : * Remember the relation's TOAST relation for later, if the caller asked
2209 : * us to process it. In VACUUM FULL, though, the toast table is
2210 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2211 : * unless PROCESS_MAIN is disabled.
2212 : */
2213 118494 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2214 21154 : ((params->options & VACOPT_FULL) == 0 ||
2215 370 : (params->options & VACOPT_PROCESS_MAIN) == 0))
2216 20790 : toast_relid = rel->rd_rel->reltoastrelid;
2217 : else
2218 97704 : toast_relid = InvalidOid;
2219 :
2220 : /*
2221 : * Switch to the table owner's userid, so that any index functions are run
2222 : * as that user. Also lock down security-restricted operations and
2223 : * arrange to make GUC variable changes local to this command. (This is
2224 : * unnecessary, but harmless, for lazy VACUUM.)
2225 : */
2226 118494 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2227 118494 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2228 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2229 118494 : save_nestlevel = NewGUCNestLevel();
2230 118494 : RestrictSearchPath();
2231 :
2232 : /*
2233 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2234 : * relation. Otherwise, we can skip this part. If processing the TOAST
2235 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2236 : * to be set when we recurse to the TOAST table.
2237 : */
2238 118494 : if (params->options & VACOPT_PROCESS_MAIN)
2239 : {
2240 : /*
2241 : * Do the actual work --- either FULL or "lazy" vacuum
2242 : */
2243 118340 : if (params->options & VACOPT_FULL)
2244 : {
2245 364 : ClusterParams cluster_params = {0};
2246 :
2247 364 : if ((params->options & VACOPT_VERBOSE) != 0)
2248 2 : cluster_params.options |= CLUOPT_VERBOSE;
2249 :
2250 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2251 364 : cluster_rel(rel, InvalidOid, &cluster_params);
2252 : /* cluster_rel closes the relation, but keeps lock */
2253 :
2254 358 : rel = NULL;
2255 : }
2256 : else
2257 117976 : table_relation_vacuum(rel, params, bstrategy);
2258 : }
2259 :
2260 : /* Roll back any GUC changes executed by index functions */
2261 118488 : AtEOXact_GUC(false, save_nestlevel);
2262 :
2263 : /* Restore userid and security context */
2264 118488 : SetUserIdAndSecContext(save_userid, save_sec_context);
2265 :
2266 : /* all done with this class, but hold lock until commit */
2267 118488 : if (rel)
2268 118130 : relation_close(rel, NoLock);
2269 :
2270 : /*
2271 : * Complete the transaction and free all temporary memory used.
2272 : */
2273 118488 : PopActiveSnapshot();
2274 118488 : CommitTransactionCommand();
2275 :
2276 : /*
2277 : * If the relation has a secondary toast rel, vacuum that too while we
2278 : * still hold the session lock on the main table. Note however that
2279 : * "analyze" will not get done on the toast table. This is good, because
2280 : * the toaster always uses hardcoded index access and statistics are
2281 : * totally unimportant for toast relations.
2282 : */
2283 118488 : if (toast_relid != InvalidOid)
2284 : {
2285 : VacuumParams toast_vacuum_params;
2286 :
2287 : /*
2288 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2289 : * set toast_parent so that the privilege checks are done on the main
2290 : * relation. NB: This is only safe to do because we hold a session
2291 : * lock on the main relation that prevents concurrent deletion.
2292 : */
2293 7408 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2294 7408 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2295 7408 : toast_vacuum_params.toast_parent = relid;
2296 :
2297 7408 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
2298 : }
2299 :
2300 : /*
2301 : * Now release the session-level lock on the main table.
2302 : */
2303 118488 : UnlockRelationIdForSession(&lockrelid, lmode);
2304 :
2305 : /* Report that we really did it. */
2306 118488 : return true;
2307 : }
2308 :
2309 :
2310 : /*
2311 : * Open all the vacuumable indexes of the given relation, obtaining the
2312 : * specified kind of lock on each. Return an array of Relation pointers for
2313 : * the indexes into *Irel, and the number of indexes into *nindexes.
2314 : *
2315 : * We consider an index vacuumable if it is marked insertable (indisready).
2316 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2317 : * execution, and what we have is too corrupt to be processable. We will
2318 : * vacuum even if the index isn't indisvalid; this is important because in a
2319 : * unique index, uniqueness checks will be performed anyway and had better not
2320 : * hit dangling index pointers.
2321 : */
2322 : void
2323 131376 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2324 : int *nindexes, Relation **Irel)
2325 : {
2326 : List *indexoidlist;
2327 : ListCell *indexoidscan;
2328 : int i;
2329 :
2330 : Assert(lockmode != NoLock);
2331 :
2332 131376 : indexoidlist = RelationGetIndexList(relation);
2333 :
2334 : /* allocate enough memory for all indexes */
2335 131376 : i = list_length(indexoidlist);
2336 :
2337 131376 : if (i > 0)
2338 122584 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2339 : else
2340 8792 : *Irel = NULL;
2341 :
2342 : /* collect just the ready indexes */
2343 131376 : i = 0;
2344 326290 : foreach(indexoidscan, indexoidlist)
2345 : {
2346 194914 : Oid indexoid = lfirst_oid(indexoidscan);
2347 : Relation indrel;
2348 :
2349 194914 : indrel = index_open(indexoid, lockmode);
2350 194914 : if (indrel->rd_index->indisready)
2351 194914 : (*Irel)[i++] = indrel;
2352 : else
2353 0 : index_close(indrel, lockmode);
2354 : }
2355 :
2356 131376 : *nindexes = i;
2357 :
2358 131376 : list_free(indexoidlist);
2359 131376 : }
2360 :
2361 : /*
2362 : * Release the resources acquired by vac_open_indexes. Optionally release
2363 : * the locks (say NoLock to keep 'em).
2364 : */
2365 : void
2366 132192 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2367 : {
2368 132192 : if (Irel == NULL)
2369 9614 : return;
2370 :
2371 317480 : while (nindexes--)
2372 : {
2373 194902 : Relation ind = Irel[nindexes];
2374 :
2375 194902 : index_close(ind, lockmode);
2376 : }
2377 122578 : pfree(Irel);
2378 : }
2379 :
2380 : /*
2381 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2382 : *
2383 : * This should be called in each major loop of VACUUM processing,
2384 : * typically once per page processed.
2385 : */
2386 : void
2387 71855172 : vacuum_delay_point(bool is_analyze)
2388 : {
2389 71855172 : double msec = 0;
2390 :
2391 : /* Always check for interrupts */
2392 71855172 : CHECK_FOR_INTERRUPTS();
2393 :
2394 71855172 : if (InterruptPending ||
2395 71855172 : (!VacuumCostActive && !ConfigReloadPending))
2396 66339984 : return;
2397 :
2398 : /*
2399 : * Autovacuum workers should reload the configuration file if requested.
2400 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2401 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2402 : * vacuumed or analyzed.
2403 : */
2404 5515188 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2405 : {
2406 0 : ConfigReloadPending = false;
2407 0 : ProcessConfigFile(PGC_SIGHUP);
2408 0 : VacuumUpdateCosts();
2409 : }
2410 :
2411 : /*
2412 : * If we disabled cost-based delays after reloading the config file,
2413 : * return.
2414 : */
2415 5515188 : if (!VacuumCostActive)
2416 0 : return;
2417 :
2418 : /*
2419 : * For parallel vacuum, the delay is computed based on the shared cost
2420 : * balance. See compute_parallel_delay.
2421 : */
2422 5515188 : if (VacuumSharedCostBalance != NULL)
2423 0 : msec = compute_parallel_delay();
2424 5515188 : else if (VacuumCostBalance >= vacuum_cost_limit)
2425 3482 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2426 :
2427 : /* Nap if appropriate */
2428 5515188 : if (msec > 0)
2429 : {
2430 : instr_time delay_start;
2431 :
2432 3482 : if (msec > vacuum_cost_delay * 4)
2433 6 : msec = vacuum_cost_delay * 4;
2434 :
2435 3482 : if (track_cost_delay_timing)
2436 0 : INSTR_TIME_SET_CURRENT(delay_start);
2437 :
2438 3482 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2439 3482 : pg_usleep(msec * 1000);
2440 3482 : pgstat_report_wait_end();
2441 :
2442 3482 : if (track_cost_delay_timing)
2443 : {
2444 : instr_time delay_end;
2445 : instr_time delay;
2446 :
2447 0 : INSTR_TIME_SET_CURRENT(delay_end);
2448 0 : INSTR_TIME_SET_ZERO(delay);
2449 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2450 :
2451 : /*
2452 : * For parallel workers, we only report the delay time every once
2453 : * in a while to avoid overloading the leader with messages and
2454 : * interrupts.
2455 : */
2456 0 : if (IsParallelWorker())
2457 : {
2458 : static instr_time last_report_time;
2459 : instr_time time_since_last_report;
2460 :
2461 : Assert(!is_analyze);
2462 :
2463 : /* Accumulate the delay time */
2464 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2465 :
2466 : /* Calculate interval since last report */
2467 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2468 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2469 :
2470 : /* If we haven't reported in a while, do so now */
2471 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2472 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2473 : {
2474 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2475 : parallel_vacuum_worker_delay_ns);
2476 :
2477 : /* Reset variables */
2478 0 : last_report_time = delay_end;
2479 0 : parallel_vacuum_worker_delay_ns = 0;
2480 : }
2481 : }
2482 0 : else if (is_analyze)
2483 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2484 0 : INSTR_TIME_GET_NANOSEC(delay));
2485 : else
2486 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2487 0 : INSTR_TIME_GET_NANOSEC(delay));
2488 : }
2489 :
2490 : /*
2491 : * We don't want to ignore postmaster death during very long vacuums
2492 : * with vacuum_cost_delay configured. We can't use the usual
2493 : * WaitLatch() approach here because we want microsecond-based sleep
2494 : * durations above.
2495 : */
2496 3482 : if (IsUnderPostmaster && !PostmasterIsAlive())
2497 0 : exit(1);
2498 :
2499 3482 : VacuumCostBalance = 0;
2500 :
2501 : /*
2502 : * Balance and update limit values for autovacuum workers. We must do
2503 : * this periodically, as the number of workers across which we are
2504 : * balancing the limit may have changed.
2505 : *
2506 : * TODO: There may be better criteria for determining when to do this
2507 : * besides "check after napping".
2508 : */
2509 3482 : AutoVacuumUpdateCostLimit();
2510 :
2511 : /* Might have gotten an interrupt while sleeping */
2512 3482 : CHECK_FOR_INTERRUPTS();
2513 : }
2514 : }
2515 :
2516 : /*
2517 : * Computes the vacuum delay for parallel workers.
2518 : *
2519 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2520 : * worker to sleep in proportion to the share of work it's done. We achieve this
2521 : * by allowing all parallel vacuum workers including the leader process to
2522 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2523 : * We allow each worker to update it as and when it has incurred any cost and
2524 : * then based on that decide whether it needs to sleep. We compute the time
2525 : * to sleep for a worker based on the cost it has incurred
2526 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2527 : * that amount. This avoids putting to sleep those workers which have done less
2528 : * I/O than other workers and therefore ensure that workers
2529 : * which are doing more I/O got throttled more.
2530 : *
2531 : * We allow a worker to sleep only if it has performed I/O above a certain
2532 : * threshold, which is calculated based on the number of active workers
2533 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2534 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2535 : * the required throttling if we force a worker that has done more than 50%
2536 : * of its share of work to sleep.
2537 : */
2538 : static double
2539 0 : compute_parallel_delay(void)
2540 : {
2541 0 : double msec = 0;
2542 : uint32 shared_balance;
2543 : int nworkers;
2544 :
2545 : /* Parallel vacuum must be active */
2546 : Assert(VacuumSharedCostBalance);
2547 :
2548 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2549 :
2550 : /* At least count itself */
2551 : Assert(nworkers >= 1);
2552 :
2553 : /* Update the shared cost balance value atomically */
2554 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2555 :
2556 : /* Compute the total local balance for the current worker */
2557 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2558 :
2559 0 : if ((shared_balance >= vacuum_cost_limit) &&
2560 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2561 : {
2562 : /* Compute sleep time based on the local cost balance */
2563 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2564 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2565 0 : VacuumCostBalanceLocal = 0;
2566 : }
2567 :
2568 : /*
2569 : * Reset the local balance as we accumulated it into the shared value.
2570 : */
2571 0 : VacuumCostBalance = 0;
2572 :
2573 0 : return msec;
2574 : }
2575 :
2576 : /*
2577 : * A wrapper function of defGetBoolean().
2578 : *
2579 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2580 : * of true and false.
2581 : */
2582 : static VacOptValue
2583 316 : get_vacoptval_from_boolean(DefElem *def)
2584 : {
2585 316 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2586 : }
2587 :
2588 : /*
2589 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2590 : *
2591 : * Returns bulk delete stats derived from input stats
2592 : */
2593 : IndexBulkDeleteResult *
2594 1976 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2595 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2596 : {
2597 : /* Do bulk deletion */
2598 1976 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2599 : dead_items);
2600 :
2601 1976 : ereport(ivinfo->message_level,
2602 : (errmsg("scanned index \"%s\" to remove %lld row versions",
2603 : RelationGetRelationName(ivinfo->index),
2604 : (long long) dead_items_info->num_items)));
2605 :
2606 1976 : return istat;
2607 : }
2608 :
2609 : /*
2610 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2611 : *
2612 : * Returns bulk delete stats derived from input stats
2613 : */
2614 : IndexBulkDeleteResult *
2615 146996 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2616 : {
2617 146996 : istat = index_vacuum_cleanup(ivinfo, istat);
2618 :
2619 146996 : if (istat)
2620 2218 : ereport(ivinfo->message_level,
2621 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2622 : RelationGetRelationName(ivinfo->index),
2623 : istat->num_index_tuples,
2624 : istat->num_pages),
2625 : errdetail("%.0f index row versions were removed.\n"
2626 : "%u index pages were newly deleted.\n"
2627 : "%u index pages are currently deleted, of which %u are currently reusable.",
2628 : istat->tuples_removed,
2629 : istat->pages_newly_deleted,
2630 : istat->pages_deleted, istat->pages_free)));
2631 :
2632 146996 : return istat;
2633 : }
2634 :
2635 : /*
2636 : * vac_tid_reaped() -- is a particular tid deletable?
2637 : *
2638 : * This has the right signature to be an IndexBulkDeleteCallback.
2639 : */
2640 : static bool
2641 5454606 : vac_tid_reaped(ItemPointer itemptr, void *state)
2642 : {
2643 5454606 : TidStore *dead_items = (TidStore *) state;
2644 :
2645 5454606 : return TidStoreIsMember(dead_items, itemptr);
2646 : }
|