Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/async.h"
41 : #include "commands/cluster.h"
42 : #include "commands/defrem.h"
43 : #include "commands/progress.h"
44 : #include "commands/vacuum.h"
45 : #include "miscadmin.h"
46 : #include "nodes/makefuncs.h"
47 : #include "pgstat.h"
48 : #include "postmaster/autovacuum.h"
49 : #include "postmaster/bgworker_internals.h"
50 : #include "postmaster/interrupt.h"
51 : #include "storage/bufmgr.h"
52 : #include "storage/lmgr.h"
53 : #include "storage/pmsignal.h"
54 : #include "storage/proc.h"
55 : #include "storage/procarray.h"
56 : #include "utils/acl.h"
57 : #include "utils/fmgroids.h"
58 : #include "utils/guc.h"
59 : #include "utils/guc_hooks.h"
60 : #include "utils/injection_point.h"
61 : #include "utils/memutils.h"
62 : #include "utils/snapmgr.h"
63 : #include "utils/syscache.h"
64 :
65 : /*
66 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
67 : * This aims to avoid sending too many messages and waking up the leader too
68 : * frequently.
69 : */
70 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
71 :
72 : /*
73 : * GUC parameters
74 : */
75 : int vacuum_freeze_min_age;
76 : int vacuum_freeze_table_age;
77 : int vacuum_multixact_freeze_min_age;
78 : int vacuum_multixact_freeze_table_age;
79 : int vacuum_failsafe_age;
80 : int vacuum_multixact_failsafe_age;
81 : double vacuum_max_eager_freeze_failure_rate;
82 : bool track_cost_delay_timing;
83 : bool vacuum_truncate;
84 :
85 : /*
86 : * Variables for cost-based vacuum delay. The defaults differ between
87 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
88 : * vacuum code. They are initialized here to the defaults for client backends
89 : * executing VACUUM or ANALYZE.
90 : */
91 : double vacuum_cost_delay = 0;
92 : int vacuum_cost_limit = 200;
93 :
94 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
95 : int64 parallel_vacuum_worker_delay_ns = 0;
96 :
97 : /*
98 : * VacuumFailsafeActive is a defined as a global so that we can determine
99 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
100 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
101 : * for the table until after vacuuming has completed, regardless of other
102 : * settings.
103 : *
104 : * Only VACUUM code should inspect this variable and only table access methods
105 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
106 : * inspected to determine whether or not to allow cost-based delays. Table AMs
107 : * are free to set it if they desire this behavior, but it is false by default
108 : * and reset to false in between vacuuming each relation.
109 : */
110 : bool VacuumFailsafeActive = false;
111 :
112 : /*
113 : * Variables for cost-based parallel vacuum. See comments atop
114 : * compute_parallel_delay to understand how it works.
115 : */
116 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
117 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
118 : int VacuumCostBalanceLocal = 0;
119 :
120 : /* non-export function prototypes */
121 : static List *expand_vacuum_rel(VacuumRelation *vrel,
122 : MemoryContext vac_context, int options);
123 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
124 : static void vac_truncate_clog(TransactionId frozenXID,
125 : MultiXactId minMulti,
126 : TransactionId lastSaneFrozenXid,
127 : MultiXactId lastSaneMinMulti);
128 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
129 : BufferAccessStrategy bstrategy);
130 : static double compute_parallel_delay(void);
131 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
132 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
133 :
134 : /*
135 : * GUC check function to ensure GUC value specified is within the allowable
136 : * range.
137 : */
138 : bool
139 2274 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
140 : GucSource source)
141 : {
142 : /* Value upper and lower hard limits are inclusive */
143 2274 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
144 2274 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
145 2274 : return true;
146 :
147 : /* Value does not fall within any allowable range */
148 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
149 : "vacuum_buffer_usage_limit",
150 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
151 :
152 0 : return false;
153 : }
154 :
155 : /*
156 : * Primary entry point for manual VACUUM and ANALYZE commands
157 : *
158 : * This is mainly a preparation wrapper for the real operations that will
159 : * happen in vacuum().
160 : */
161 : void
162 14356 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
163 : {
164 : VacuumParams params;
165 14356 : BufferAccessStrategy bstrategy = NULL;
166 14356 : bool verbose = false;
167 14356 : bool skip_locked = false;
168 14356 : bool analyze = false;
169 14356 : bool freeze = false;
170 14356 : bool full = false;
171 14356 : bool disable_page_skipping = false;
172 14356 : bool process_main = true;
173 14356 : bool process_toast = true;
174 : int ring_size;
175 14356 : bool skip_database_stats = false;
176 14356 : bool only_database_stats = false;
177 : MemoryContext vac_context;
178 : ListCell *lc;
179 :
180 : /* index_cleanup and truncate values unspecified for now */
181 14356 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
182 14356 : params.truncate = VACOPTVALUE_UNSPECIFIED;
183 :
184 : /* By default parallel vacuum is enabled */
185 14356 : params.nworkers = 0;
186 :
187 : /* Will be set later if we recurse to a TOAST table. */
188 14356 : params.toast_parent = InvalidOid;
189 :
190 : /*
191 : * Set this to an invalid value so it is clear whether or not a
192 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
193 : */
194 14356 : ring_size = -1;
195 :
196 : /* Parse options list */
197 29746 : foreach(lc, vacstmt->options)
198 : {
199 15426 : DefElem *opt = (DefElem *) lfirst(lc);
200 :
201 : /* Parse common options for VACUUM and ANALYZE */
202 15426 : if (strcmp(opt->defname, "verbose") == 0)
203 42 : verbose = defGetBoolean(opt);
204 15384 : else if (strcmp(opt->defname, "skip_locked") == 0)
205 334 : skip_locked = defGetBoolean(opt);
206 15050 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
207 : {
208 : const char *hintmsg;
209 : int result;
210 : char *vac_buffer_size;
211 :
212 54 : vac_buffer_size = defGetString(opt);
213 :
214 : /*
215 : * Check that the specified value is valid and the size falls
216 : * within the hard upper and lower limits if it is not 0.
217 : */
218 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
219 48 : (result != 0 &&
220 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
221 : {
222 18 : ereport(ERROR,
223 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
224 : errmsg("%s option must be 0 or between %d kB and %d kB",
225 : "BUFFER_USAGE_LIMIT",
226 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
227 : hintmsg ? errhint_internal("%s", _(hintmsg)) : 0));
228 : }
229 :
230 36 : ring_size = result;
231 : }
232 14996 : else if (!vacstmt->is_vacuumcmd)
233 6 : ereport(ERROR,
234 : (errcode(ERRCODE_SYNTAX_ERROR),
235 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
236 : parser_errposition(pstate, opt->location)));
237 :
238 : /* Parse options available on VACUUM */
239 14990 : else if (strcmp(opt->defname, "analyze") == 0)
240 2762 : analyze = defGetBoolean(opt);
241 12228 : else if (strcmp(opt->defname, "freeze") == 0)
242 2960 : freeze = defGetBoolean(opt);
243 9268 : else if (strcmp(opt->defname, "full") == 0)
244 392 : full = defGetBoolean(opt);
245 8876 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
246 214 : disable_page_skipping = defGetBoolean(opt);
247 8662 : else if (strcmp(opt->defname, "index_cleanup") == 0)
248 : {
249 : /* Interpret no string as the default, which is 'auto' */
250 174 : if (!opt->arg)
251 0 : params.index_cleanup = VACOPTVALUE_AUTO;
252 : else
253 : {
254 174 : char *sval = defGetString(opt);
255 :
256 : /* Try matching on 'auto' string, or fall back on boolean */
257 174 : if (pg_strcasecmp(sval, "auto") == 0)
258 6 : params.index_cleanup = VACOPTVALUE_AUTO;
259 : else
260 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
261 : }
262 : }
263 8488 : else if (strcmp(opt->defname, "process_main") == 0)
264 154 : process_main = defGetBoolean(opt);
265 8334 : else if (strcmp(opt->defname, "process_toast") == 0)
266 160 : process_toast = defGetBoolean(opt);
267 8174 : else if (strcmp(opt->defname, "truncate") == 0)
268 158 : params.truncate = get_vacoptval_from_boolean(opt);
269 8016 : else if (strcmp(opt->defname, "parallel") == 0)
270 : {
271 352 : int nworkers = defGetInt32(opt);
272 :
273 346 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
274 6 : ereport(ERROR,
275 : (errcode(ERRCODE_SYNTAX_ERROR),
276 : errmsg("%s option must be between 0 and %d",
277 : "PARALLEL",
278 : MAX_PARALLEL_WORKER_LIMIT),
279 : parser_errposition(pstate, opt->location)));
280 :
281 : /*
282 : * Disable parallel vacuum, if user has specified parallel degree
283 : * as zero.
284 : */
285 340 : if (nworkers == 0)
286 156 : params.nworkers = -1;
287 : else
288 184 : params.nworkers = nworkers;
289 : }
290 7664 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
291 7526 : skip_database_stats = defGetBoolean(opt);
292 138 : else if (strcmp(opt->defname, "only_database_stats") == 0)
293 138 : only_database_stats = defGetBoolean(opt);
294 : else
295 0 : ereport(ERROR,
296 : (errcode(ERRCODE_SYNTAX_ERROR),
297 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
298 : parser_errposition(pstate, opt->location)));
299 : }
300 :
301 : /* Set vacuum options */
302 14320 : params.options =
303 14320 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
304 14320 : (verbose ? VACOPT_VERBOSE : 0) |
305 14320 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
306 14320 : (analyze ? VACOPT_ANALYZE : 0) |
307 14320 : (freeze ? VACOPT_FREEZE : 0) |
308 14320 : (full ? VACOPT_FULL : 0) |
309 14320 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
310 14320 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
311 14320 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
312 14320 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
313 14320 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
314 :
315 : /* sanity checks on options */
316 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
317 : Assert((params.options & VACOPT_VACUUM) ||
318 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
319 :
320 14320 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
321 6 : ereport(ERROR,
322 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
323 : errmsg("VACUUM FULL cannot be performed in parallel")));
324 :
325 : /*
326 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
327 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
328 : * we'll permit that.
329 : */
330 14314 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
331 6 : !(params.options & VACOPT_ANALYZE))
332 6 : ereport(ERROR,
333 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
334 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
335 :
336 : /*
337 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
338 : */
339 14308 : if (!(params.options & VACOPT_ANALYZE))
340 : {
341 13072 : foreach(lc, vacstmt->rels)
342 : {
343 6436 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
344 :
345 6436 : if (vrel->va_cols != NIL)
346 6 : ereport(ERROR,
347 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
348 : errmsg("ANALYZE option must be specified when a column list is provided")));
349 : }
350 : }
351 :
352 :
353 : /*
354 : * Sanity check DISABLE_PAGE_SKIPPING option.
355 : */
356 14302 : if ((params.options & VACOPT_FULL) != 0 &&
357 368 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
358 0 : ereport(ERROR,
359 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
360 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
361 :
362 : /* sanity check for PROCESS_TOAST */
363 14302 : if ((params.options & VACOPT_FULL) != 0 &&
364 368 : (params.options & VACOPT_PROCESS_TOAST) == 0)
365 6 : ereport(ERROR,
366 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
367 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
368 :
369 : /* sanity check for ONLY_DATABASE_STATS */
370 14296 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
371 : {
372 : Assert(params.options & VACOPT_VACUUM);
373 138 : if (vacstmt->rels != NIL)
374 6 : ereport(ERROR,
375 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
376 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
377 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
378 132 : if (params.options & ~(VACOPT_VACUUM |
379 : VACOPT_VERBOSE |
380 : VACOPT_PROCESS_MAIN |
381 : VACOPT_PROCESS_TOAST |
382 : VACOPT_ONLY_DATABASE_STATS))
383 0 : ereport(ERROR,
384 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
385 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
386 : }
387 :
388 : /*
389 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
390 : * them as -1 which means to use the default values.
391 : */
392 14290 : if (params.options & VACOPT_FREEZE)
393 : {
394 2960 : params.freeze_min_age = 0;
395 2960 : params.freeze_table_age = 0;
396 2960 : params.multixact_freeze_min_age = 0;
397 2960 : params.multixact_freeze_table_age = 0;
398 : }
399 : else
400 : {
401 11330 : params.freeze_min_age = -1;
402 11330 : params.freeze_table_age = -1;
403 11330 : params.multixact_freeze_min_age = -1;
404 11330 : params.multixact_freeze_table_age = -1;
405 : }
406 :
407 : /* user-invoked vacuum is never "for wraparound" */
408 14290 : params.is_wraparound = false;
409 :
410 : /*
411 : * user-invoked vacuum uses VACOPT_VERBOSE instead of
412 : * log_vacuum_min_duration and log_analyze_min_duration
413 : */
414 14290 : params.log_vacuum_min_duration = -1;
415 14290 : params.log_analyze_min_duration = -1;
416 :
417 : /*
418 : * Later, in vacuum_rel(), we check if a reloption override was specified.
419 : */
420 14290 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
421 :
422 : /*
423 : * Create special memory context for cross-transaction storage.
424 : *
425 : * Since it is a child of PortalContext, it will go away eventually even
426 : * if we suffer an error; there's no need for special abort cleanup logic.
427 : */
428 14290 : vac_context = AllocSetContextCreate(PortalContext,
429 : "Vacuum",
430 : ALLOCSET_DEFAULT_SIZES);
431 :
432 : /*
433 : * Make a buffer strategy object in the cross-transaction memory context.
434 : * We needn't bother making this for VACUUM (FULL) or VACUUM
435 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
436 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
437 : * when we see ANALYZE.
438 : */
439 14290 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
440 494 : VACOPT_FULL)) == 0 ||
441 494 : (params.options & VACOPT_ANALYZE) != 0)
442 : {
443 :
444 13802 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
445 :
446 : Assert(ring_size >= -1);
447 :
448 : /*
449 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
450 : * command, it overrides the value of VacuumBufferUsageLimit. Either
451 : * value may be 0, in which case GetAccessStrategyWithSize() will
452 : * return NULL, effectively allowing full use of shared buffers.
453 : */
454 13802 : if (ring_size == -1)
455 13772 : ring_size = VacuumBufferUsageLimit;
456 :
457 13802 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
458 :
459 13802 : MemoryContextSwitchTo(old_context);
460 : }
461 :
462 : /* Now go through the common routine */
463 14290 : vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
464 :
465 : /* Finally, clean up the vacuum memory context */
466 14156 : MemoryContextDelete(vac_context);
467 14156 : }
468 :
469 : /*
470 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
471 : *
472 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
473 : * we process all relevant tables in the database. For each VacuumRelation,
474 : * if a valid OID is supplied, the table with that OID is what to process;
475 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
476 : *
477 : * params contains a set of parameters that can be used to customize the
478 : * behavior.
479 : *
480 : * bstrategy may be passed in as NULL when the caller does not want to
481 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
482 : * otherwise, the caller must build a BufferAccessStrategy with the number of
483 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
484 : * using.
485 : *
486 : * isTopLevel should be passed down from ProcessUtility.
487 : *
488 : * It is the caller's responsibility that all parameters are allocated in a
489 : * memory context that will not disappear at transaction commit.
490 : */
491 : void
492 240912 : vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
493 : MemoryContext vac_context, bool isTopLevel)
494 : {
495 : static bool in_vacuum = false;
496 :
497 : const char *stmttype;
498 : volatile bool in_outer_xact,
499 : use_own_xacts;
500 :
501 240912 : stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
502 :
503 : /*
504 : * We cannot run VACUUM inside a user transaction block; if we were inside
505 : * a transaction, then our commit- and start-transaction-command calls
506 : * would not have the intended effect! There are numerous other subtle
507 : * dependencies on this, too.
508 : *
509 : * ANALYZE (without VACUUM) can run either way.
510 : */
511 240912 : if (params.options & VACOPT_VACUUM)
512 : {
513 235716 : PreventInTransactionBlock(isTopLevel, stmttype);
514 235696 : in_outer_xact = false;
515 : }
516 : else
517 5196 : in_outer_xact = IsInTransactionBlock(isTopLevel);
518 :
519 : /*
520 : * Check for and disallow recursive calls. This could happen when VACUUM
521 : * FULL or ANALYZE calls a hostile index expression that itself calls
522 : * ANALYZE.
523 : */
524 240892 : if (in_vacuum)
525 12 : ereport(ERROR,
526 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
527 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
528 : stmttype)));
529 :
530 : /*
531 : * Build list of relation(s) to process, putting any new data in
532 : * vac_context for safekeeping.
533 : */
534 240880 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
535 : {
536 : /* We don't process any tables in this case */
537 : Assert(relations == NIL);
538 : }
539 240748 : else if (relations != NIL)
540 : {
541 240530 : List *newrels = NIL;
542 : ListCell *lc;
543 :
544 481188 : foreach(lc, relations)
545 : {
546 240694 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
547 : List *sublist;
548 : MemoryContext old_context;
549 :
550 240694 : sublist = expand_vacuum_rel(vrel, vac_context, params.options);
551 240658 : old_context = MemoryContextSwitchTo(vac_context);
552 240658 : newrels = list_concat(newrels, sublist);
553 240658 : MemoryContextSwitchTo(old_context);
554 : }
555 240494 : relations = newrels;
556 : }
557 : else
558 218 : relations = get_all_vacuum_rels(vac_context, params.options);
559 :
560 : /*
561 : * Decide whether we need to start/commit our own transactions.
562 : *
563 : * For VACUUM (with or without ANALYZE): always do so, so that we can
564 : * release locks as soon as possible. (We could possibly use the outer
565 : * transaction for a one-table VACUUM, but handling TOAST tables would be
566 : * problematic.)
567 : *
568 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
569 : * start/commit our own transactions. Also, there's no need to do so if
570 : * only processing one relation. For multiple relations when not within a
571 : * transaction block, and also in an autovacuum worker, use own
572 : * transactions so we can release locks sooner.
573 : */
574 240844 : if (params.options & VACOPT_VACUUM)
575 235684 : use_own_xacts = true;
576 : else
577 : {
578 : Assert(params.options & VACOPT_ANALYZE);
579 5160 : if (AmAutoVacuumWorkerProcess())
580 292 : use_own_xacts = true;
581 4868 : else if (in_outer_xact)
582 254 : use_own_xacts = false;
583 4614 : else if (list_length(relations) > 1)
584 810 : use_own_xacts = true;
585 : else
586 3804 : use_own_xacts = false;
587 : }
588 :
589 : /*
590 : * vacuum_rel expects to be entered with no transaction active; it will
591 : * start and commit its own transaction. But we are called by an SQL
592 : * command, and so we are executing inside a transaction already. We
593 : * commit the transaction started in PostgresMain() here, and start
594 : * another one before exiting to match the commit waiting for us back in
595 : * PostgresMain().
596 : */
597 240844 : if (use_own_xacts)
598 : {
599 : Assert(!in_outer_xact);
600 :
601 : /* ActiveSnapshot is not set by autovacuum */
602 236786 : if (ActiveSnapshotSet())
603 10164 : PopActiveSnapshot();
604 :
605 : /* matches the StartTransaction in PostgresMain() */
606 236786 : CommitTransactionCommand();
607 : }
608 :
609 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
610 240844 : PG_TRY();
611 : {
612 : ListCell *cur;
613 :
614 240844 : in_vacuum = true;
615 240844 : VacuumFailsafeActive = false;
616 240844 : VacuumUpdateCosts();
617 240844 : VacuumCostBalance = 0;
618 240844 : VacuumCostBalanceLocal = 0;
619 240844 : VacuumSharedCostBalance = NULL;
620 240844 : VacuumActiveNWorkers = NULL;
621 :
622 : /*
623 : * Loop to process each selected relation.
624 : */
625 498778 : foreach(cur, relations)
626 : {
627 258000 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
628 :
629 258000 : if (params.options & VACOPT_VACUUM)
630 : {
631 244506 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
632 100 : continue;
633 : }
634 :
635 257892 : if (params.options & VACOPT_ANALYZE)
636 : {
637 : /*
638 : * If using separate xacts, start one for analyze. Otherwise,
639 : * we can use the outer transaction.
640 : */
641 16470 : if (use_own_xacts)
642 : {
643 12438 : StartTransactionCommand();
644 : /* functions in indexes may want a snapshot set */
645 12438 : PushActiveSnapshot(GetTransactionSnapshot());
646 : }
647 :
648 16470 : analyze_rel(vrel->oid, vrel->relation, params,
649 : vrel->va_cols, in_outer_xact, bstrategy);
650 :
651 16412 : if (use_own_xacts)
652 : {
653 12400 : PopActiveSnapshot();
654 : /* standard_ProcessUtility() does CCI if !use_own_xacts */
655 12400 : CommandCounterIncrement();
656 12400 : CommitTransactionCommand();
657 : }
658 : else
659 : {
660 : /*
661 : * If we're not using separate xacts, better separate the
662 : * ANALYZE actions with CCIs. This avoids trouble if user
663 : * says "ANALYZE t, t".
664 : */
665 4012 : CommandCounterIncrement();
666 : }
667 : }
668 :
669 : /*
670 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
671 : * next relation.
672 : */
673 257834 : VacuumFailsafeActive = false;
674 : }
675 : }
676 66 : PG_FINALLY();
677 : {
678 240844 : in_vacuum = false;
679 240844 : VacuumCostActive = false;
680 240844 : VacuumFailsafeActive = false;
681 240844 : VacuumCostBalance = 0;
682 : }
683 240844 : PG_END_TRY();
684 :
685 : /*
686 : * Finish up processing.
687 : */
688 240778 : if (use_own_xacts)
689 : {
690 : /* here, we are not in a transaction */
691 :
692 : /*
693 : * This matches the CommitTransaction waiting for us in
694 : * PostgresMain().
695 : */
696 236740 : StartTransactionCommand();
697 : }
698 :
699 240778 : if ((params.options & VACOPT_VACUUM) &&
700 235650 : !(params.options & VACOPT_SKIP_DATABASE_STATS))
701 : {
702 : /*
703 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
704 : */
705 1796 : vac_update_datfrozenxid();
706 : }
707 :
708 240778 : }
709 :
710 : /*
711 : * Check if the current user has privileges to vacuum or analyze the relation.
712 : * If not, issue a WARNING log message and return false to let the caller
713 : * decide what to do with this relation. This routine is used to decide if a
714 : * relation can be processed for VACUUM or ANALYZE.
715 : */
716 : bool
717 302654 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
718 : bits32 options)
719 : {
720 : char *relname;
721 :
722 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
723 :
724 : /*----------
725 : * A role has privileges to vacuum or analyze the relation if any of the
726 : * following are true:
727 : * - the role owns the current database and the relation is not shared
728 : * - the role has the MAINTAIN privilege on the relation
729 : *----------
730 : */
731 302654 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
732 350056 : !reltuple->relisshared) ||
733 50972 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
734 299560 : return true;
735 :
736 3094 : relname = NameStr(reltuple->relname);
737 :
738 3094 : if ((options & VACOPT_VACUUM) != 0)
739 : {
740 224 : ereport(WARNING,
741 : (errmsg("permission denied to vacuum \"%s\", skipping it",
742 : relname)));
743 :
744 : /*
745 : * For VACUUM ANALYZE, both logs could show up, but just generate
746 : * information for VACUUM as that would be the first one to be
747 : * processed.
748 : */
749 224 : return false;
750 : }
751 :
752 2870 : if ((options & VACOPT_ANALYZE) != 0)
753 2870 : ereport(WARNING,
754 : (errmsg("permission denied to analyze \"%s\", skipping it",
755 : relname)));
756 :
757 2870 : return false;
758 : }
759 :
760 :
761 : /*
762 : * vacuum_open_relation
763 : *
764 : * This routine is used for attempting to open and lock a relation which
765 : * is going to be vacuumed or analyzed. If the relation cannot be opened
766 : * or locked, a log is emitted if possible.
767 : */
768 : Relation
769 270464 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
770 : bool verbose, LOCKMODE lmode)
771 : {
772 : Relation rel;
773 270464 : bool rel_lock = true;
774 : int elevel;
775 :
776 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
777 :
778 : /*
779 : * Open the relation and get the appropriate lock on it.
780 : *
781 : * There's a race condition here: the relation may have gone away since
782 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
783 : *
784 : * If we've been asked not to wait for the relation lock, acquire it first
785 : * in non-blocking mode, before calling try_relation_open().
786 : */
787 270464 : if (!(options & VACOPT_SKIP_LOCKED))
788 269342 : rel = try_relation_open(relid, lmode);
789 1122 : else if (ConditionalLockRelationOid(relid, lmode))
790 1102 : rel = try_relation_open(relid, NoLock);
791 : else
792 : {
793 20 : rel = NULL;
794 20 : rel_lock = false;
795 : }
796 :
797 : /* if relation is opened, leave */
798 270464 : if (rel)
799 270432 : return rel;
800 :
801 : /*
802 : * Relation could not be opened, hence generate if possible a log
803 : * informing on the situation.
804 : *
805 : * If the RangeVar is not defined, we do not have enough information to
806 : * provide a meaningful log statement. Chances are that the caller has
807 : * intentionally not provided this information so that this logging is
808 : * skipped, anyway.
809 : */
810 32 : if (relation == NULL)
811 18 : return NULL;
812 :
813 : /*
814 : * Determine the log level.
815 : *
816 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
817 : * statements in the permission checks; otherwise, only log if the caller
818 : * so requested.
819 : */
820 14 : if (!AmAutoVacuumWorkerProcess())
821 14 : elevel = WARNING;
822 0 : else if (verbose)
823 0 : elevel = LOG;
824 : else
825 0 : return NULL;
826 :
827 14 : if ((options & VACOPT_VACUUM) != 0)
828 : {
829 10 : if (!rel_lock)
830 6 : ereport(elevel,
831 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
832 : errmsg("skipping vacuum of \"%s\" --- lock not available",
833 : relation->relname)));
834 : else
835 4 : ereport(elevel,
836 : (errcode(ERRCODE_UNDEFINED_TABLE),
837 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
838 : relation->relname)));
839 :
840 : /*
841 : * For VACUUM ANALYZE, both logs could show up, but just generate
842 : * information for VACUUM as that would be the first one to be
843 : * processed.
844 : */
845 10 : return NULL;
846 : }
847 :
848 4 : if ((options & VACOPT_ANALYZE) != 0)
849 : {
850 4 : if (!rel_lock)
851 2 : ereport(elevel,
852 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
853 : errmsg("skipping analyze of \"%s\" --- lock not available",
854 : relation->relname)));
855 : else
856 2 : ereport(elevel,
857 : (errcode(ERRCODE_UNDEFINED_TABLE),
858 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
859 : relation->relname)));
860 : }
861 :
862 4 : return NULL;
863 : }
864 :
865 :
866 : /*
867 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
868 : * and optionally add VacuumRelations for partitions or inheritance children.
869 : *
870 : * If a VacuumRelation does not have an OID supplied and is a partitioned
871 : * table, an extra entry will be added to the output for each partition.
872 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
873 : * it does not want us to expand partitioned tables.
874 : *
875 : * We take care not to modify the input data structure, but instead build
876 : * new VacuumRelation(s) to return. (But note that they will reference
877 : * unmodified parts of the input, eg column lists.) New data structures
878 : * are made in vac_context.
879 : */
880 : static List *
881 240694 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
882 : int options)
883 : {
884 240694 : List *vacrels = NIL;
885 : MemoryContext oldcontext;
886 :
887 : /* If caller supplied OID, there's nothing we need do here. */
888 240694 : if (OidIsValid(vrel->oid))
889 : {
890 226622 : oldcontext = MemoryContextSwitchTo(vac_context);
891 226622 : vacrels = lappend(vacrels, vrel);
892 226622 : MemoryContextSwitchTo(oldcontext);
893 : }
894 : else
895 : {
896 : /*
897 : * Process a specific relation, and possibly partitions or child
898 : * tables thereof.
899 : */
900 : Oid relid;
901 : HeapTuple tuple;
902 : Form_pg_class classForm;
903 : bool include_children;
904 : bool is_partitioned_table;
905 : int rvr_opts;
906 :
907 : /*
908 : * Since autovacuum workers supply OIDs when calling vacuum(), no
909 : * autovacuum worker should reach this code.
910 : */
911 : Assert(!AmAutoVacuumWorkerProcess());
912 :
913 : /*
914 : * We transiently take AccessShareLock to protect the syscache lookup
915 : * below, as well as find_all_inheritors's expectation that the caller
916 : * holds some lock on the starting relation.
917 : */
918 14072 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
919 14072 : relid = RangeVarGetRelidExtended(vrel->relation,
920 : AccessShareLock,
921 : rvr_opts,
922 : NULL, NULL);
923 :
924 : /*
925 : * If the lock is unavailable, emit the same log statement that
926 : * vacuum_rel() and analyze_rel() would.
927 : */
928 14036 : if (!OidIsValid(relid))
929 : {
930 8 : if (options & VACOPT_VACUUM)
931 6 : ereport(WARNING,
932 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
933 : errmsg("skipping vacuum of \"%s\" --- lock not available",
934 : vrel->relation->relname)));
935 : else
936 2 : ereport(WARNING,
937 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
938 : errmsg("skipping analyze of \"%s\" --- lock not available",
939 : vrel->relation->relname)));
940 8 : return vacrels;
941 : }
942 :
943 : /*
944 : * To check whether the relation is a partitioned table and its
945 : * ownership, fetch its syscache entry.
946 : */
947 14028 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
948 14028 : if (!HeapTupleIsValid(tuple))
949 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
950 14028 : classForm = (Form_pg_class) GETSTRUCT(tuple);
951 :
952 : /*
953 : * Make a returnable VacuumRelation for this rel if the user has the
954 : * required privileges.
955 : */
956 14028 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
957 : {
958 13796 : oldcontext = MemoryContextSwitchTo(vac_context);
959 13796 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
960 : relid,
961 : vrel->va_cols));
962 13796 : MemoryContextSwitchTo(oldcontext);
963 : }
964 :
965 : /*
966 : * Vacuuming a partitioned table with ONLY will not do anything since
967 : * the partitioned table itself is empty. Issue a warning if the user
968 : * requests this.
969 : */
970 14028 : include_children = vrel->relation->inh;
971 14028 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
972 14028 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
973 6 : ereport(WARNING,
974 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
975 : vrel->relation->relname)));
976 :
977 14028 : ReleaseSysCache(tuple);
978 :
979 : /*
980 : * Unless the user has specified ONLY, make relation list entries for
981 : * its partitions or inheritance child tables. Note that the list
982 : * returned by find_all_inheritors() includes the passed-in OID, so we
983 : * have to skip that. There's no point in taking locks on the
984 : * individual partitions or child tables yet, and doing so would just
985 : * add unnecessary deadlock risk. For this last reason, we do not yet
986 : * check the ownership of the partitions/tables, which get added to
987 : * the list to process. Ownership will be checked later on anyway.
988 : */
989 14028 : if (include_children)
990 : {
991 13998 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
992 : ListCell *part_lc;
993 :
994 30186 : foreach(part_lc, part_oids)
995 : {
996 16188 : Oid part_oid = lfirst_oid(part_lc);
997 :
998 16188 : if (part_oid == relid)
999 13998 : continue; /* ignore original table */
1000 :
1001 : /*
1002 : * We omit a RangeVar since it wouldn't be appropriate to
1003 : * complain about failure to open one of these relations
1004 : * later.
1005 : */
1006 2190 : oldcontext = MemoryContextSwitchTo(vac_context);
1007 2190 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1008 : part_oid,
1009 : vrel->va_cols));
1010 2190 : MemoryContextSwitchTo(oldcontext);
1011 : }
1012 : }
1013 :
1014 : /*
1015 : * Release lock again. This means that by the time we actually try to
1016 : * process the table, it might be gone or renamed. In the former case
1017 : * we'll silently ignore it; in the latter case we'll process it
1018 : * anyway, but we must beware that the RangeVar doesn't necessarily
1019 : * identify it anymore. This isn't ideal, perhaps, but there's little
1020 : * practical alternative, since we're typically going to commit this
1021 : * transaction and begin a new one between now and then. Moreover,
1022 : * holding locks on multiple relations would create significant risk
1023 : * of deadlock.
1024 : */
1025 14028 : UnlockRelationOid(relid, AccessShareLock);
1026 : }
1027 :
1028 240650 : return vacrels;
1029 : }
1030 :
1031 : /*
1032 : * Construct a list of VacuumRelations for all vacuumable rels in
1033 : * the current database. The list is built in vac_context.
1034 : */
1035 : static List *
1036 218 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1037 : {
1038 218 : List *vacrels = NIL;
1039 : Relation pgclass;
1040 : TableScanDesc scan;
1041 : HeapTuple tuple;
1042 :
1043 218 : pgclass = table_open(RelationRelationId, AccessShareLock);
1044 :
1045 218 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1046 :
1047 98792 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1048 : {
1049 98574 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1050 : MemoryContext oldcontext;
1051 98574 : Oid relid = classForm->oid;
1052 :
1053 : /*
1054 : * We include partitioned tables here; depending on which operation is
1055 : * to be performed, caller will decide whether to process or ignore
1056 : * them.
1057 : */
1058 98574 : if (classForm->relkind != RELKIND_RELATION &&
1059 80592 : classForm->relkind != RELKIND_MATVIEW &&
1060 80544 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1061 80380 : continue;
1062 :
1063 : /* check permissions of relation */
1064 18194 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1065 2754 : continue;
1066 :
1067 : /*
1068 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1069 : * We omit a RangeVar since it wouldn't be appropriate to complain
1070 : * about failure to open one of these relations later.
1071 : */
1072 15440 : oldcontext = MemoryContextSwitchTo(vac_context);
1073 15440 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1074 : relid,
1075 : NIL));
1076 15440 : MemoryContextSwitchTo(oldcontext);
1077 : }
1078 :
1079 218 : table_endscan(scan);
1080 218 : table_close(pgclass, AccessShareLock);
1081 :
1082 218 : return vacrels;
1083 : }
1084 :
1085 : /*
1086 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1087 : *
1088 : * The target relation and VACUUM parameters are our inputs.
1089 : *
1090 : * Output parameters are the cutoffs that VACUUM caller should use.
1091 : *
1092 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1093 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1094 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1095 : * minimum).
1096 : */
1097 : bool
1098 253742 : vacuum_get_cutoffs(Relation rel, const VacuumParams params,
1099 : struct VacuumCutoffs *cutoffs)
1100 : {
1101 : int freeze_min_age,
1102 : multixact_freeze_min_age,
1103 : freeze_table_age,
1104 : multixact_freeze_table_age,
1105 : effective_multixact_freeze_max_age;
1106 : TransactionId nextXID,
1107 : safeOldestXmin,
1108 : aggressiveXIDCutoff;
1109 : MultiXactId nextMXID,
1110 : safeOldestMxact,
1111 : aggressiveMXIDCutoff;
1112 :
1113 : /* Use mutable copies of freeze age parameters */
1114 253742 : freeze_min_age = params.freeze_min_age;
1115 253742 : multixact_freeze_min_age = params.multixact_freeze_min_age;
1116 253742 : freeze_table_age = params.freeze_table_age;
1117 253742 : multixact_freeze_table_age = params.multixact_freeze_table_age;
1118 :
1119 : /* Set pg_class fields in cutoffs */
1120 253742 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1121 253742 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1122 :
1123 : /*
1124 : * Acquire OldestXmin.
1125 : *
1126 : * We can always ignore processes running lazy vacuum. This is because we
1127 : * use these values only for deciding which tuples we must keep in the
1128 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1129 : * XID assigned), it's safe to ignore it. In theory it could be
1130 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1131 : * that only one vacuum process can be working on a particular table at
1132 : * any time, and that each vacuum is always an independent transaction.
1133 : */
1134 253742 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1135 :
1136 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1137 :
1138 : /* Acquire OldestMxact */
1139 253742 : cutoffs->OldestMxact = GetOldestMultiXactId();
1140 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1141 :
1142 : /* Acquire next XID/next MXID values used to apply age-based settings */
1143 253742 : nextXID = ReadNextTransactionId();
1144 253742 : nextMXID = ReadNextMultiXactId();
1145 :
1146 : /*
1147 : * Also compute the multixact age for which freezing is urgent. This is
1148 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1149 : * short of multixact member space.
1150 : */
1151 253742 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1152 :
1153 : /*
1154 : * Almost ready to set freeze output parameters; check if OldestXmin or
1155 : * OldestMxact are held back to an unsafe degree before we start on that
1156 : */
1157 253742 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1158 253742 : if (!TransactionIdIsNormal(safeOldestXmin))
1159 0 : safeOldestXmin = FirstNormalTransactionId;
1160 253742 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1161 253742 : if (safeOldestMxact < FirstMultiXactId)
1162 0 : safeOldestMxact = FirstMultiXactId;
1163 253742 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1164 191854 : ereport(WARNING,
1165 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1166 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1167 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1168 253742 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1169 0 : ereport(WARNING,
1170 : (errmsg("cutoff for freezing multixacts is far in the past"),
1171 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1172 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1173 :
1174 : /*
1175 : * Determine the minimum freeze age to use: as specified by the caller, or
1176 : * vacuum_freeze_min_age, but in any case not more than half
1177 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1178 : * wraparound won't occur too frequently.
1179 : */
1180 253742 : if (freeze_min_age < 0)
1181 11236 : freeze_min_age = vacuum_freeze_min_age;
1182 253742 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1183 : Assert(freeze_min_age >= 0);
1184 :
1185 : /* Compute FreezeLimit, being careful to generate a normal XID */
1186 253742 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1187 253742 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1188 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1189 : /* FreezeLimit must always be <= OldestXmin */
1190 253742 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1191 215810 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1192 :
1193 : /*
1194 : * Determine the minimum multixact freeze age to use: as specified by
1195 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1196 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1197 : * prevent MultiXact wraparound won't occur too frequently.
1198 : */
1199 253742 : if (multixact_freeze_min_age < 0)
1200 11236 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1201 253742 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1202 : effective_multixact_freeze_max_age / 2);
1203 : Assert(multixact_freeze_min_age >= 0);
1204 :
1205 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1206 253742 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1207 253742 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1208 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1209 : /* MultiXactCutoff must always be <= OldestMxact */
1210 253742 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1211 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1212 :
1213 : /*
1214 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1215 : *
1216 : * Determine the table freeze age to use: as specified by the caller, or
1217 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1218 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1219 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1220 : * anti-wraparound autovacuum is launched.
1221 : */
1222 253742 : if (freeze_table_age < 0)
1223 11236 : freeze_table_age = vacuum_freeze_table_age;
1224 253742 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1225 : Assert(freeze_table_age >= 0);
1226 253742 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1227 253742 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1228 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1229 253742 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1230 : aggressiveXIDCutoff))
1231 242538 : return true;
1232 :
1233 : /*
1234 : * Similar to the above, determine the table freeze age to use for
1235 : * multixacts: as specified by the caller, or the value of the
1236 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1237 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1238 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1239 : * multixacts before anti-wraparound autovacuum is launched.
1240 : */
1241 11204 : if (multixact_freeze_table_age < 0)
1242 11018 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1243 11204 : multixact_freeze_table_age =
1244 11204 : Min(multixact_freeze_table_age,
1245 : effective_multixact_freeze_max_age * 0.95);
1246 : Assert(multixact_freeze_table_age >= 0);
1247 11204 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1248 11204 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1249 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1250 11204 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1251 : aggressiveMXIDCutoff))
1252 0 : return true;
1253 :
1254 : /* Non-aggressive VACUUM */
1255 11204 : return false;
1256 : }
1257 :
1258 : /*
1259 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1260 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1261 : * dangerously far in the past.
1262 : *
1263 : * When we return true, VACUUM caller triggers the failsafe.
1264 : */
1265 : bool
1266 256812 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1267 : {
1268 256812 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1269 256812 : MultiXactId relminmxid = cutoffs->relminmxid;
1270 : TransactionId xid_skip_limit;
1271 : MultiXactId multi_skip_limit;
1272 : int skip_index_vacuum;
1273 :
1274 : Assert(TransactionIdIsNormal(relfrozenxid));
1275 : Assert(MultiXactIdIsValid(relminmxid));
1276 :
1277 : /*
1278 : * Determine the index skipping age to use. In any case no less than
1279 : * autovacuum_freeze_max_age * 1.05.
1280 : */
1281 256812 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1282 :
1283 256812 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1284 256812 : if (!TransactionIdIsNormal(xid_skip_limit))
1285 0 : xid_skip_limit = FirstNormalTransactionId;
1286 :
1287 256812 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1288 : {
1289 : /* The table's relfrozenxid is too old */
1290 47846 : return true;
1291 : }
1292 :
1293 : /*
1294 : * Similar to above, determine the index skipping age to use for
1295 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1296 : * 1.05.
1297 : */
1298 208966 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1299 : autovacuum_multixact_freeze_max_age * 1.05);
1300 :
1301 208966 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1302 208966 : if (multi_skip_limit < FirstMultiXactId)
1303 0 : multi_skip_limit = FirstMultiXactId;
1304 :
1305 208966 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1306 : {
1307 : /* The table's relminmxid is too old */
1308 0 : return true;
1309 : }
1310 :
1311 208966 : return false;
1312 : }
1313 :
1314 : /*
1315 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1316 : *
1317 : * If we scanned the whole relation then we should just use the count of
1318 : * live tuples seen; but if we did not, we should not blindly extrapolate
1319 : * from that number, since VACUUM may have scanned a quite nonrandom
1320 : * subset of the table. When we have only partial information, we take
1321 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1322 : * of the tuple density in the unscanned pages.
1323 : *
1324 : * Note: scanned_tuples should count only *live* tuples, since
1325 : * pg_class.reltuples is defined that way.
1326 : */
1327 : double
1328 253178 : vac_estimate_reltuples(Relation relation,
1329 : BlockNumber total_pages,
1330 : BlockNumber scanned_pages,
1331 : double scanned_tuples)
1332 : {
1333 253178 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1334 253178 : double old_rel_tuples = relation->rd_rel->reltuples;
1335 : double old_density;
1336 : double unscanned_pages;
1337 : double total_tuples;
1338 :
1339 : /* If we did scan the whole table, just use the count as-is */
1340 253178 : if (scanned_pages >= total_pages)
1341 243992 : return scanned_tuples;
1342 :
1343 : /*
1344 : * When successive VACUUM commands scan the same few pages again and
1345 : * again, without anything from the table really changing, there is a risk
1346 : * that our beliefs about tuple density will gradually become distorted.
1347 : * This might be caused by vacuumlazy.c implementation details, such as
1348 : * its tendency to always scan the last heap page. Handle that here.
1349 : *
1350 : * If the relation is _exactly_ the same size according to the existing
1351 : * pg_class entry, and only a few of its pages (less than 2%) were
1352 : * scanned, keep the existing value of reltuples. Also keep the existing
1353 : * value when only a subset of rel's pages <= a single page were scanned.
1354 : *
1355 : * (Note: we might be returning -1 here.)
1356 : */
1357 9186 : if (old_rel_pages == total_pages &&
1358 9156 : scanned_pages < (double) total_pages * 0.02)
1359 6524 : return old_rel_tuples;
1360 2662 : if (scanned_pages <= 1)
1361 2220 : return old_rel_tuples;
1362 :
1363 : /*
1364 : * If old density is unknown, we can't do much except scale up
1365 : * scanned_tuples to match total_pages.
1366 : */
1367 442 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1368 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1369 :
1370 : /*
1371 : * Okay, we've covered the corner cases. The normal calculation is to
1372 : * convert the old measurement to a density (tuples per page), then
1373 : * estimate the number of tuples in the unscanned pages using that figure,
1374 : * and finally add on the number of tuples in the scanned pages.
1375 : */
1376 440 : old_density = old_rel_tuples / old_rel_pages;
1377 440 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1378 440 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1379 440 : return floor(total_tuples + 0.5);
1380 : }
1381 :
1382 :
1383 : /*
1384 : * vac_update_relstats() -- update statistics for one relation
1385 : *
1386 : * Update the whole-relation statistics that are kept in its pg_class
1387 : * row. There are additional stats that will be updated if we are
1388 : * doing ANALYZE, but we always update these stats. This routine works
1389 : * for both index and heap relation entries in pg_class.
1390 : *
1391 : * We violate transaction semantics here by overwriting the rel's
1392 : * existing pg_class tuple with the new values. This is reasonably
1393 : * safe as long as we're sure that the new values are correct whether or
1394 : * not this transaction commits. The reason for doing this is that if
1395 : * we updated these tuples in the usual way, vacuuming pg_class itself
1396 : * wouldn't work very well --- by the time we got done with a vacuum
1397 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1398 : * course, this only works for fixed-size not-null columns, but these are.
1399 : *
1400 : * Another reason for doing it this way is that when we are in a lazy
1401 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1402 : * Somebody vacuuming pg_class might think they could delete a tuple
1403 : * marked with xmin = our xid.
1404 : *
1405 : * In addition to fundamentally nontransactional statistics such as
1406 : * relpages and relallvisible, we try to maintain certain lazily-updated
1407 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1408 : * It's safe to do this in VACUUM, which can't run in parallel with
1409 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1410 : * However, it's *not* safe to do it in an ANALYZE that's within an
1411 : * outer transaction, because for example the current transaction might
1412 : * have dropped the last index; then we'd think relhasindex should be
1413 : * cleared, but if the transaction later rolls back this would be wrong.
1414 : * So we refrain from updating the DDL flags if we're inside an outer
1415 : * transaction. This is OK since postponing the flag maintenance is
1416 : * always allowable.
1417 : *
1418 : * Note: num_tuples should count only *live* tuples, since
1419 : * pg_class.reltuples is defined that way.
1420 : *
1421 : * This routine is shared by VACUUM and ANALYZE.
1422 : */
1423 : void
1424 294710 : vac_update_relstats(Relation relation,
1425 : BlockNumber num_pages, double num_tuples,
1426 : BlockNumber num_all_visible_pages,
1427 : BlockNumber num_all_frozen_pages,
1428 : bool hasindex, TransactionId frozenxid,
1429 : MultiXactId minmulti,
1430 : bool *frozenxid_updated, bool *minmulti_updated,
1431 : bool in_outer_xact)
1432 : {
1433 294710 : Oid relid = RelationGetRelid(relation);
1434 : Relation rd;
1435 : ScanKeyData key[1];
1436 : HeapTuple ctup;
1437 : void *inplace_state;
1438 : Form_pg_class pgcform;
1439 : bool dirty,
1440 : futurexid,
1441 : futuremxid;
1442 : TransactionId oldfrozenxid;
1443 : MultiXactId oldminmulti;
1444 :
1445 294710 : rd = table_open(RelationRelationId, RowExclusiveLock);
1446 :
1447 : /* Fetch a copy of the tuple to scribble on */
1448 294710 : ScanKeyInit(&key[0],
1449 : Anum_pg_class_oid,
1450 : BTEqualStrategyNumber, F_OIDEQ,
1451 : ObjectIdGetDatum(relid));
1452 294710 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1453 : NULL, 1, key, &ctup, &inplace_state);
1454 294708 : if (!HeapTupleIsValid(ctup))
1455 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1456 : relid);
1457 294708 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1458 :
1459 : /* Apply statistical updates, if any, to copied tuple */
1460 :
1461 294708 : dirty = false;
1462 294708 : if (pgcform->relpages != (int32) num_pages)
1463 : {
1464 9508 : pgcform->relpages = (int32) num_pages;
1465 9508 : dirty = true;
1466 : }
1467 294708 : if (pgcform->reltuples != (float4) num_tuples)
1468 : {
1469 20328 : pgcform->reltuples = (float4) num_tuples;
1470 20328 : dirty = true;
1471 : }
1472 294708 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1473 : {
1474 5804 : pgcform->relallvisible = (int32) num_all_visible_pages;
1475 5804 : dirty = true;
1476 : }
1477 294708 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1478 : {
1479 5244 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1480 5244 : dirty = true;
1481 : }
1482 :
1483 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1484 :
1485 294708 : if (!in_outer_xact)
1486 : {
1487 : /*
1488 : * If we didn't find any indexes, reset relhasindex.
1489 : */
1490 294346 : if (pgcform->relhasindex && !hasindex)
1491 : {
1492 24 : pgcform->relhasindex = false;
1493 24 : dirty = true;
1494 : }
1495 :
1496 : /* We also clear relhasrules and relhastriggers if needed */
1497 294346 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1498 : {
1499 0 : pgcform->relhasrules = false;
1500 0 : dirty = true;
1501 : }
1502 294346 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1503 : {
1504 6 : pgcform->relhastriggers = false;
1505 6 : dirty = true;
1506 : }
1507 : }
1508 :
1509 : /*
1510 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1511 : * indicating it has no new data.
1512 : *
1513 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1514 : * stored relfrozenxid is "in the future" then it seems best to assume
1515 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1516 : * This should match vac_update_datfrozenxid() concerning what we consider
1517 : * to be "in the future".
1518 : */
1519 294708 : oldfrozenxid = pgcform->relfrozenxid;
1520 294708 : futurexid = false;
1521 294708 : if (frozenxid_updated)
1522 253172 : *frozenxid_updated = false;
1523 294708 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1524 : {
1525 59156 : bool update = false;
1526 :
1527 59156 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1528 59068 : update = true;
1529 88 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1530 0 : futurexid = update = true;
1531 :
1532 59156 : if (update)
1533 : {
1534 59068 : pgcform->relfrozenxid = frozenxid;
1535 59068 : dirty = true;
1536 59068 : if (frozenxid_updated)
1537 59068 : *frozenxid_updated = true;
1538 : }
1539 : }
1540 :
1541 : /* Similarly for relminmxid */
1542 294708 : oldminmulti = pgcform->relminmxid;
1543 294708 : futuremxid = false;
1544 294708 : if (minmulti_updated)
1545 253172 : *minmulti_updated = false;
1546 294708 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1547 : {
1548 316 : bool update = false;
1549 :
1550 316 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1551 316 : update = true;
1552 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1553 0 : futuremxid = update = true;
1554 :
1555 316 : if (update)
1556 : {
1557 316 : pgcform->relminmxid = minmulti;
1558 316 : dirty = true;
1559 316 : if (minmulti_updated)
1560 316 : *minmulti_updated = true;
1561 : }
1562 : }
1563 :
1564 : /* If anything changed, write out the tuple. */
1565 294708 : if (dirty)
1566 73460 : systable_inplace_update_finish(inplace_state, ctup);
1567 : else
1568 221248 : systable_inplace_update_cancel(inplace_state);
1569 :
1570 294708 : table_close(rd, RowExclusiveLock);
1571 :
1572 294708 : if (futurexid)
1573 0 : ereport(WARNING,
1574 : (errcode(ERRCODE_DATA_CORRUPTED),
1575 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1576 : oldfrozenxid, frozenxid,
1577 : RelationGetRelationName(relation))));
1578 294708 : if (futuremxid)
1579 0 : ereport(WARNING,
1580 : (errcode(ERRCODE_DATA_CORRUPTED),
1581 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1582 : oldminmulti, minmulti,
1583 : RelationGetRelationName(relation))));
1584 294708 : }
1585 :
1586 :
1587 : /*
1588 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1589 : *
1590 : * Update pg_database's datfrozenxid entry for our database to be the
1591 : * minimum of the pg_class.relfrozenxid values.
1592 : *
1593 : * Similarly, update our datminmxid to be the minimum of the
1594 : * pg_class.relminmxid values.
1595 : *
1596 : * If we are able to advance either pg_database value, also try to
1597 : * truncate pg_xact and pg_multixact.
1598 : *
1599 : * We violate transaction semantics here by overwriting the database's
1600 : * existing pg_database tuple with the new values. This is reasonably
1601 : * safe since the new values are correct whether or not this transaction
1602 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1603 : * behind after a VACUUM.
1604 : */
1605 : void
1606 6142 : vac_update_datfrozenxid(void)
1607 : {
1608 : HeapTuple tuple;
1609 : Form_pg_database dbform;
1610 : Relation relation;
1611 : SysScanDesc scan;
1612 : HeapTuple classTup;
1613 : TransactionId newFrozenXid;
1614 : MultiXactId newMinMulti;
1615 : TransactionId lastSaneFrozenXid;
1616 : MultiXactId lastSaneMinMulti;
1617 6142 : bool bogus = false;
1618 6142 : bool dirty = false;
1619 : ScanKeyData key[1];
1620 : void *inplace_state;
1621 :
1622 : /*
1623 : * Restrict this task to one backend per database. This avoids race
1624 : * conditions that would move datfrozenxid or datminmxid backward. It
1625 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1626 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1627 : */
1628 6142 : LockDatabaseFrozenIds(ExclusiveLock);
1629 :
1630 : /*
1631 : * Initialize the "min" calculation with
1632 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1633 : * approximation to the minimum relfrozenxid for not-yet-committed
1634 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1635 : * cannot produce a wrong minimum by starting with this.
1636 : */
1637 6142 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1638 :
1639 : /*
1640 : * Similarly, initialize the MultiXact "min" with the value that would be
1641 : * used on pg_class for new tables. See AddNewRelationTuple().
1642 : */
1643 6142 : newMinMulti = GetOldestMultiXactId();
1644 :
1645 : /*
1646 : * Identify the latest relfrozenxid and relminmxid values that we could
1647 : * validly see during the scan. These are conservative values, but it's
1648 : * not really worth trying to be more exact.
1649 : */
1650 6142 : lastSaneFrozenXid = ReadNextTransactionId();
1651 6142 : lastSaneMinMulti = ReadNextMultiXactId();
1652 :
1653 : /*
1654 : * We must seqscan pg_class to find the minimum Xid, because there is no
1655 : * index that can help us here.
1656 : *
1657 : * See vac_truncate_clog() for the race condition to prevent.
1658 : */
1659 6142 : relation = table_open(RelationRelationId, AccessShareLock);
1660 :
1661 6142 : scan = systable_beginscan(relation, InvalidOid, false,
1662 : NULL, 0, NULL);
1663 :
1664 3037486 : while ((classTup = systable_getnext(scan)) != NULL)
1665 : {
1666 3031344 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1667 3031344 : TransactionId relfrozenxid = classForm->relfrozenxid;
1668 3031344 : TransactionId relminmxid = classForm->relminmxid;
1669 :
1670 : /*
1671 : * Only consider relations able to hold unfrozen XIDs (anything else
1672 : * should have InvalidTransactionId in relfrozenxid anyway).
1673 : */
1674 3031344 : if (classForm->relkind != RELKIND_RELATION &&
1675 2426078 : classForm->relkind != RELKIND_MATVIEW &&
1676 2423664 : classForm->relkind != RELKIND_TOASTVALUE)
1677 : {
1678 : Assert(!TransactionIdIsValid(relfrozenxid));
1679 : Assert(!MultiXactIdIsValid(relminmxid));
1680 2108428 : continue;
1681 : }
1682 :
1683 : /*
1684 : * Some table AMs might not need per-relation xid / multixid horizons.
1685 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1686 : * to not be set (i.e. set to their respective Invalid*Id)
1687 : * independently. Thus validate and compute horizon for each only if
1688 : * set.
1689 : *
1690 : * If things are working properly, no relation should have a
1691 : * relfrozenxid or relminmxid that is "in the future". However, such
1692 : * cases have been known to arise due to bugs in pg_upgrade. If we
1693 : * see any entries that are "in the future", chicken out and don't do
1694 : * anything. This ensures we won't truncate clog & multixact SLRUs
1695 : * before those relations have been scanned and cleaned up.
1696 : */
1697 :
1698 922916 : if (TransactionIdIsValid(relfrozenxid))
1699 : {
1700 : Assert(TransactionIdIsNormal(relfrozenxid));
1701 :
1702 : /* check for values in the future */
1703 922916 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1704 : {
1705 0 : bogus = true;
1706 0 : break;
1707 : }
1708 :
1709 : /* determine new horizon */
1710 922916 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1711 5558 : newFrozenXid = relfrozenxid;
1712 : }
1713 :
1714 922916 : if (MultiXactIdIsValid(relminmxid))
1715 : {
1716 : /* check for values in the future */
1717 922916 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1718 : {
1719 0 : bogus = true;
1720 0 : break;
1721 : }
1722 :
1723 : /* determine new horizon */
1724 922916 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1725 228 : newMinMulti = relminmxid;
1726 : }
1727 : }
1728 :
1729 : /* we're done with pg_class */
1730 6142 : systable_endscan(scan);
1731 6142 : table_close(relation, AccessShareLock);
1732 :
1733 : /* chicken out if bogus data found */
1734 6142 : if (bogus)
1735 0 : return;
1736 :
1737 : Assert(TransactionIdIsNormal(newFrozenXid));
1738 : Assert(MultiXactIdIsValid(newMinMulti));
1739 :
1740 : /* Now fetch the pg_database tuple we need to update. */
1741 6142 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1742 :
1743 : /*
1744 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1745 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1746 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1747 : * For now, don't optimize that.
1748 : */
1749 6142 : ScanKeyInit(&key[0],
1750 : Anum_pg_database_oid,
1751 : BTEqualStrategyNumber, F_OIDEQ,
1752 : ObjectIdGetDatum(MyDatabaseId));
1753 :
1754 6142 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1755 : NULL, 1, key, &tuple, &inplace_state);
1756 :
1757 6142 : if (!HeapTupleIsValid(tuple))
1758 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1759 :
1760 6142 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1761 :
1762 : /*
1763 : * As in vac_update_relstats(), we ordinarily don't want to let
1764 : * datfrozenxid go backward; but if it's "in the future" then it must be
1765 : * corrupt and it seems best to overwrite it.
1766 : */
1767 6828 : if (dbform->datfrozenxid != newFrozenXid &&
1768 686 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1769 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1770 : {
1771 686 : dbform->datfrozenxid = newFrozenXid;
1772 686 : dirty = true;
1773 : }
1774 : else
1775 5456 : newFrozenXid = dbform->datfrozenxid;
1776 :
1777 : /* Ditto for datminmxid */
1778 6144 : if (dbform->datminmxid != newMinMulti &&
1779 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1780 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1781 : {
1782 2 : dbform->datminmxid = newMinMulti;
1783 2 : dirty = true;
1784 : }
1785 : else
1786 6140 : newMinMulti = dbform->datminmxid;
1787 :
1788 6142 : if (dirty)
1789 686 : systable_inplace_update_finish(inplace_state, tuple);
1790 : else
1791 5456 : systable_inplace_update_cancel(inplace_state);
1792 :
1793 6142 : heap_freetuple(tuple);
1794 6142 : table_close(relation, RowExclusiveLock);
1795 :
1796 : /*
1797 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1798 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1799 : * XID-wrap-limit info is stale, since this action will update that too.
1800 : */
1801 6142 : if (dirty || ForceTransactionIdLimitUpdate())
1802 2450 : vac_truncate_clog(newFrozenXid, newMinMulti,
1803 : lastSaneFrozenXid, lastSaneMinMulti);
1804 : }
1805 :
1806 :
1807 : /*
1808 : * vac_truncate_clog() -- attempt to truncate the commit log
1809 : *
1810 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1811 : * and use it to truncate the transaction commit log (pg_xact).
1812 : * Also update the XID wrap limit info maintained by varsup.c.
1813 : * Likewise for datminmxid.
1814 : *
1815 : * The passed frozenXID and minMulti are the updated values for my own
1816 : * pg_database entry. They're used to initialize the "min" calculations.
1817 : * The caller also passes the "last sane" XID and MXID, since it has
1818 : * those at hand already.
1819 : *
1820 : * This routine is only invoked when we've managed to change our
1821 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1822 : * XID-wrap-limit info is stale.
1823 : */
1824 : static void
1825 2450 : vac_truncate_clog(TransactionId frozenXID,
1826 : MultiXactId minMulti,
1827 : TransactionId lastSaneFrozenXid,
1828 : MultiXactId lastSaneMinMulti)
1829 : {
1830 2450 : TransactionId nextXID = ReadNextTransactionId();
1831 : Relation relation;
1832 : TableScanDesc scan;
1833 : HeapTuple tuple;
1834 : Oid oldestxid_datoid;
1835 : Oid minmulti_datoid;
1836 2450 : bool bogus = false;
1837 2450 : bool frozenAlreadyWrapped = false;
1838 :
1839 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1840 2450 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1841 :
1842 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1843 2450 : oldestxid_datoid = MyDatabaseId;
1844 2450 : minmulti_datoid = MyDatabaseId;
1845 :
1846 : /*
1847 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1848 : *
1849 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1850 : * the values could change while we look at them. Fetch each one just
1851 : * once to ensure sane behavior of the comparison logic. (Here, as in
1852 : * many other places, we assume that fetching or updating an XID in shared
1853 : * storage is atomic.)
1854 : *
1855 : * Note: we need not worry about a race condition with new entries being
1856 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1857 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1858 : * of the interlock against copying a DB containing an active backend.
1859 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1860 : * concurrently modify the datfrozenxid's of different databases, the
1861 : * worst possible outcome is that pg_xact is not truncated as aggressively
1862 : * as it could be.
1863 : */
1864 2450 : relation = table_open(DatabaseRelationId, AccessShareLock);
1865 :
1866 2450 : scan = table_beginscan_catalog(relation, 0, NULL);
1867 :
1868 9626 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1869 : {
1870 7176 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1871 7176 : TransactionId datfrozenxid = dbform->datfrozenxid;
1872 7176 : TransactionId datminmxid = dbform->datminmxid;
1873 :
1874 : Assert(TransactionIdIsNormal(datfrozenxid));
1875 : Assert(MultiXactIdIsValid(datminmxid));
1876 :
1877 : /*
1878 : * If database is in the process of getting dropped, or has been
1879 : * interrupted while doing so, no connections to it are possible
1880 : * anymore. Therefore we don't need to take it into account here.
1881 : * Which is good, because it can't be processed by autovacuum either.
1882 : */
1883 7176 : if (database_is_invalid_form((Form_pg_database) dbform))
1884 : {
1885 2 : elog(DEBUG2,
1886 : "skipping invalid database \"%s\" while computing relfrozenxid",
1887 : NameStr(dbform->datname));
1888 2 : continue;
1889 : }
1890 :
1891 : /*
1892 : * If things are working properly, no database should have a
1893 : * datfrozenxid or datminmxid that is "in the future". However, such
1894 : * cases have been known to arise due to bugs in pg_upgrade. If we
1895 : * see any entries that are "in the future", chicken out and don't do
1896 : * anything. This ensures we won't truncate clog before those
1897 : * databases have been scanned and cleaned up. (We will issue the
1898 : * "already wrapped" warning if appropriate, though.)
1899 : */
1900 14348 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1901 7174 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1902 0 : bogus = true;
1903 :
1904 7174 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1905 0 : frozenAlreadyWrapped = true;
1906 7174 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1907 : {
1908 510 : frozenXID = datfrozenxid;
1909 510 : oldestxid_datoid = dbform->oid;
1910 : }
1911 :
1912 7174 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1913 : {
1914 4 : minMulti = datminmxid;
1915 4 : minmulti_datoid = dbform->oid;
1916 : }
1917 : }
1918 :
1919 2450 : table_endscan(scan);
1920 :
1921 2450 : table_close(relation, AccessShareLock);
1922 :
1923 : /*
1924 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1925 : * the computed minimum XID might be bogus. This case should now be
1926 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1927 : * test anyway.
1928 : */
1929 2450 : if (frozenAlreadyWrapped)
1930 : {
1931 0 : ereport(WARNING,
1932 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1933 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1934 0 : LWLockRelease(WrapLimitsVacuumLock);
1935 0 : return;
1936 : }
1937 :
1938 : /* chicken out if data is bogus in any other way */
1939 2450 : if (bogus)
1940 : {
1941 0 : LWLockRelease(WrapLimitsVacuumLock);
1942 0 : return;
1943 : }
1944 :
1945 : /*
1946 : * Freeze any old transaction IDs in the async notification queue before
1947 : * CLOG truncation.
1948 : */
1949 2450 : AsyncNotifyFreezeXids(frozenXID);
1950 :
1951 : /*
1952 : * Advance the oldest value for commit timestamps before truncating, so
1953 : * that if a user requests a timestamp for a transaction we're truncating
1954 : * away right after this point, they get NULL instead of an ugly "file not
1955 : * found" error from slru.c. This doesn't matter for xact/multixact
1956 : * because they are not subject to arbitrary lookups from users.
1957 : */
1958 2450 : AdvanceOldestCommitTsXid(frozenXID);
1959 :
1960 : /*
1961 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1962 : */
1963 2450 : TruncateCLOG(frozenXID, oldestxid_datoid);
1964 2450 : TruncateCommitTs(frozenXID);
1965 2450 : TruncateMultiXact(minMulti, minmulti_datoid);
1966 :
1967 : /*
1968 : * Update the wrap limit for GetNewTransactionId and creation of new
1969 : * MultiXactIds. Note: these functions will also signal the postmaster
1970 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1971 : * signaling twice?
1972 : */
1973 2450 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1974 2450 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1975 :
1976 2450 : LWLockRelease(WrapLimitsVacuumLock);
1977 : }
1978 :
1979 :
1980 : /*
1981 : * vacuum_rel() -- vacuum one heap relation
1982 : *
1983 : * relid identifies the relation to vacuum. If relation is supplied,
1984 : * use the name therein for reporting any failure to open/lock the rel;
1985 : * do not use it once we've successfully opened the rel, since it might
1986 : * be stale.
1987 : *
1988 : * Returns true if it's okay to proceed with a requested ANALYZE
1989 : * operation on this table.
1990 : *
1991 : * Doing one heap at a time incurs extra overhead, since we need to
1992 : * check that the heap exists again just before we vacuum it. The
1993 : * reason that we do this is so that vacuuming can be spread across
1994 : * many small transactions. Otherwise, two-phase locking would require
1995 : * us to lock the entire database during one pass of the vacuum cleaner.
1996 : *
1997 : * At entry and exit, we are not inside a transaction.
1998 : */
1999 : static bool
2000 253994 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
2001 : BufferAccessStrategy bstrategy)
2002 : {
2003 : LOCKMODE lmode;
2004 : Relation rel;
2005 : LockRelId lockrelid;
2006 : Oid priv_relid;
2007 : Oid toast_relid;
2008 : Oid save_userid;
2009 : int save_sec_context;
2010 : int save_nestlevel;
2011 : VacuumParams toast_vacuum_params;
2012 :
2013 : /*
2014 : * This function scribbles on the parameters, so make a copy early to
2015 : * avoid affecting the TOAST table (if we do end up recursing to it).
2016 : */
2017 253994 : memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams));
2018 :
2019 : /* Begin a transaction for vacuuming this relation */
2020 253994 : StartTransactionCommand();
2021 :
2022 253994 : if (!(params.options & VACOPT_FULL))
2023 : {
2024 : /*
2025 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2026 : * other concurrent VACUUMs know that they can ignore this one while
2027 : * determining their OldestXmin. (The reason we don't set it during a
2028 : * full VACUUM is exactly that we may have to run user-defined
2029 : * functions for functional indexes, and we want to make sure that if
2030 : * they use the snapshot set above, any tuples it requires can't get
2031 : * removed from other tables. An index function that depends on the
2032 : * contents of other tables is arguably broken, but we won't break it
2033 : * here by violating transaction semantics.)
2034 : *
2035 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2036 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2037 : * in an emergency.
2038 : *
2039 : * Note: these flags remain set until CommitTransaction or
2040 : * AbortTransaction. We don't want to clear them until we reset
2041 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2042 : * might appear to go backwards, which is probably Not Good. (We also
2043 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2044 : * xmin doesn't become visible ahead of setting the flag.)
2045 : */
2046 253580 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2047 253580 : MyProc->statusFlags |= PROC_IN_VACUUM;
2048 253580 : if (params.is_wraparound)
2049 226112 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2050 253580 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2051 253580 : LWLockRelease(ProcArrayLock);
2052 : }
2053 :
2054 : /*
2055 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2056 : * cutoff xids in local memory wrapping around, and to have updated xmin
2057 : * horizons.
2058 : */
2059 253994 : PushActiveSnapshot(GetTransactionSnapshot());
2060 :
2061 : /*
2062 : * Check for user-requested abort. Note we want this to be inside a
2063 : * transaction, so xact.c doesn't issue useless WARNING.
2064 : */
2065 253994 : CHECK_FOR_INTERRUPTS();
2066 :
2067 : /*
2068 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2069 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2070 : * way, we can be sure that no other backend is vacuuming the same table.
2071 : */
2072 507988 : lmode = (params.options & VACOPT_FULL) ?
2073 253994 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2074 :
2075 : /* open the relation and get the appropriate lock on it */
2076 253994 : rel = vacuum_open_relation(relid, relation, params.options,
2077 253994 : params.log_vacuum_min_duration >= 0, lmode);
2078 :
2079 : /* leave if relation could not be opened or locked */
2080 253994 : if (!rel)
2081 : {
2082 24 : PopActiveSnapshot();
2083 24 : CommitTransactionCommand();
2084 24 : return false;
2085 : }
2086 :
2087 : /*
2088 : * When recursing to a TOAST table, check privileges on the parent. NB:
2089 : * This is only safe to do because we hold a session lock on the main
2090 : * relation that prevents concurrent deletion.
2091 : */
2092 253970 : if (OidIsValid(params.toast_parent))
2093 9488 : priv_relid = params.toast_parent;
2094 : else
2095 244482 : priv_relid = RelationGetRelid(rel);
2096 :
2097 : /*
2098 : * Check if relation needs to be skipped based on privileges. This check
2099 : * happens also when building the relation list to vacuum for a manual
2100 : * operation, and needs to be done additionally here as VACUUM could
2101 : * happen across multiple transactions where privileges could have changed
2102 : * in-between. Make sure to only generate logs for VACUUM in this case.
2103 : */
2104 253970 : if (!vacuum_is_permitted_for_relation(priv_relid,
2105 : rel->rd_rel,
2106 253970 : params.options & ~VACOPT_ANALYZE))
2107 : {
2108 72 : relation_close(rel, lmode);
2109 72 : PopActiveSnapshot();
2110 72 : CommitTransactionCommand();
2111 72 : return false;
2112 : }
2113 :
2114 : /*
2115 : * Check that it's of a vacuumable relkind.
2116 : */
2117 253898 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2118 92678 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2119 92670 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2120 188 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2121 : {
2122 2 : ereport(WARNING,
2123 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2124 : RelationGetRelationName(rel))));
2125 2 : relation_close(rel, lmode);
2126 2 : PopActiveSnapshot();
2127 2 : CommitTransactionCommand();
2128 2 : return false;
2129 : }
2130 :
2131 : /*
2132 : * Silently ignore tables that are temp tables of other backends ---
2133 : * trying to vacuum these will lead to great unhappiness, since their
2134 : * contents are probably not up-to-date on disk. (We don't throw a
2135 : * warning here; it would just lead to chatter during a database-wide
2136 : * VACUUM.)
2137 : */
2138 253896 : if (RELATION_IS_OTHER_TEMP(rel))
2139 : {
2140 2 : relation_close(rel, lmode);
2141 2 : PopActiveSnapshot();
2142 2 : CommitTransactionCommand();
2143 2 : return false;
2144 : }
2145 :
2146 : /*
2147 : * Silently ignore partitioned tables as there is no work to be done. The
2148 : * useful work is on their child partitions, which have been queued up for
2149 : * us separately.
2150 : */
2151 253894 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2152 : {
2153 186 : relation_close(rel, lmode);
2154 186 : PopActiveSnapshot();
2155 186 : CommitTransactionCommand();
2156 : /* It's OK to proceed with ANALYZE on this table */
2157 186 : return true;
2158 : }
2159 :
2160 : /*
2161 : * Get a session-level lock too. This will protect our access to the
2162 : * relation across multiple transactions, so that we can vacuum the
2163 : * relation's TOAST table (if any) secure in the knowledge that no one is
2164 : * deleting the parent relation.
2165 : *
2166 : * NOTE: this cannot block, even if someone else is waiting for access,
2167 : * because the lock manager knows that both lock requests are from the
2168 : * same process.
2169 : */
2170 253708 : lockrelid = rel->rd_lockInfo.lockRelId;
2171 253708 : LockRelationIdForSession(&lockrelid, lmode);
2172 :
2173 : /*
2174 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2175 : * specified in VACUUM command, or when running in an autovacuum worker
2176 : */
2177 253708 : if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
2178 : {
2179 : StdRdOptIndexCleanup vacuum_index_cleanup;
2180 :
2181 253450 : if (rel->rd_options == NULL)
2182 250056 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2183 : else
2184 3394 : vacuum_index_cleanup =
2185 3394 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2186 :
2187 253450 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2188 253406 : params.index_cleanup = VACOPTVALUE_AUTO;
2189 44 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2190 22 : params.index_cleanup = VACOPTVALUE_ENABLED;
2191 : else
2192 : {
2193 : Assert(vacuum_index_cleanup ==
2194 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2195 22 : params.index_cleanup = VACOPTVALUE_DISABLED;
2196 : }
2197 : }
2198 :
2199 : #ifdef USE_INJECTION_POINTS
2200 253708 : if (params.index_cleanup == VACOPTVALUE_AUTO)
2201 253412 : INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
2202 296 : else if (params.index_cleanup == VACOPTVALUE_DISABLED)
2203 260 : INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
2204 36 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
2205 36 : INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
2206 : #endif
2207 :
2208 : /*
2209 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2210 : * parameter was specified. This overrides the GUC value.
2211 : */
2212 253708 : if (rel->rd_options != NULL &&
2213 3406 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2214 0 : params.max_eager_freeze_failure_rate =
2215 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2216 :
2217 : /*
2218 : * Set truncate option based on truncate reloption or GUC if it wasn't
2219 : * specified in VACUUM command, or when running in an autovacuum worker
2220 : */
2221 253708 : if (params.truncate == VACOPTVALUE_UNSPECIFIED)
2222 : {
2223 253456 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2224 :
2225 253456 : if (opts && opts->vacuum_truncate_set)
2226 : {
2227 32 : if (opts->vacuum_truncate)
2228 10 : params.truncate = VACOPTVALUE_ENABLED;
2229 : else
2230 22 : params.truncate = VACOPTVALUE_DISABLED;
2231 : }
2232 253424 : else if (vacuum_truncate)
2233 253402 : params.truncate = VACOPTVALUE_ENABLED;
2234 : else
2235 22 : params.truncate = VACOPTVALUE_DISABLED;
2236 : }
2237 :
2238 : #ifdef USE_INJECTION_POINTS
2239 253708 : if (params.truncate == VACOPTVALUE_AUTO)
2240 0 : INJECTION_POINT("vacuum-truncate-auto", NULL);
2241 253708 : else if (params.truncate == VACOPTVALUE_DISABLED)
2242 296 : INJECTION_POINT("vacuum-truncate-disabled", NULL);
2243 253412 : else if (params.truncate == VACOPTVALUE_ENABLED)
2244 253412 : INJECTION_POINT("vacuum-truncate-enabled", NULL);
2245 : #endif
2246 :
2247 : /*
2248 : * Remember the relation's TOAST relation for later, if the caller asked
2249 : * us to process it. In VACUUM FULL, though, the toast table is
2250 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2251 : * unless PROCESS_MAIN is disabled.
2252 : */
2253 253708 : if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
2254 27230 : ((params.options & VACOPT_FULL) == 0 ||
2255 386 : (params.options & VACOPT_PROCESS_MAIN) == 0))
2256 26850 : toast_relid = rel->rd_rel->reltoastrelid;
2257 : else
2258 226858 : toast_relid = InvalidOid;
2259 :
2260 : /*
2261 : * Switch to the table owner's userid, so that any index functions are run
2262 : * as that user. Also lock down security-restricted operations and
2263 : * arrange to make GUC variable changes local to this command. (This is
2264 : * unnecessary, but harmless, for lazy VACUUM.)
2265 : */
2266 253708 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2267 253708 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2268 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2269 253708 : save_nestlevel = NewGUCNestLevel();
2270 253708 : RestrictSearchPath();
2271 :
2272 : /*
2273 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2274 : * relation. Otherwise, we can skip this part. If processing the TOAST
2275 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2276 : * to be set when we recurse to the TOAST table.
2277 : */
2278 253708 : if (params.options & VACOPT_PROCESS_MAIN)
2279 : {
2280 : /*
2281 : * Do the actual work --- either FULL or "lazy" vacuum
2282 : */
2283 253554 : if (params.options & VACOPT_FULL)
2284 : {
2285 380 : ClusterParams cluster_params = {0};
2286 :
2287 380 : if ((params.options & VACOPT_VERBOSE) != 0)
2288 2 : cluster_params.options |= CLUOPT_VERBOSE;
2289 :
2290 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2291 380 : cluster_rel(rel, InvalidOid, &cluster_params);
2292 : /* cluster_rel closes the relation, but keeps lock */
2293 :
2294 374 : rel = NULL;
2295 : }
2296 : else
2297 253174 : table_relation_vacuum(rel, params, bstrategy);
2298 : }
2299 :
2300 : /* Roll back any GUC changes executed by index functions */
2301 253700 : AtEOXact_GUC(false, save_nestlevel);
2302 :
2303 : /* Restore userid and security context */
2304 253700 : SetUserIdAndSecContext(save_userid, save_sec_context);
2305 :
2306 : /* all done with this class, but hold lock until commit */
2307 253700 : if (rel)
2308 253326 : relation_close(rel, NoLock);
2309 :
2310 : /*
2311 : * Complete the transaction and free all temporary memory used.
2312 : */
2313 253700 : PopActiveSnapshot();
2314 253700 : CommitTransactionCommand();
2315 :
2316 : /*
2317 : * If the relation has a secondary toast rel, vacuum that too while we
2318 : * still hold the session lock on the main table. Note however that
2319 : * "analyze" will not get done on the toast table. This is good, because
2320 : * the toaster always uses hardcoded index access and statistics are
2321 : * totally unimportant for toast relations.
2322 : */
2323 253700 : if (toast_relid != InvalidOid)
2324 : {
2325 : /*
2326 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2327 : * set toast_parent so that the privilege checks are done on the main
2328 : * relation. NB: This is only safe to do because we hold a session
2329 : * lock on the main relation that prevents concurrent deletion.
2330 : */
2331 9488 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2332 9488 : toast_vacuum_params.toast_parent = relid;
2333 :
2334 9488 : vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
2335 : }
2336 :
2337 : /*
2338 : * Now release the session-level lock on the main table.
2339 : */
2340 253700 : UnlockRelationIdForSession(&lockrelid, lmode);
2341 :
2342 : /* Report that we really did it. */
2343 253700 : return true;
2344 : }
2345 :
2346 :
2347 : /*
2348 : * Open all the vacuumable indexes of the given relation, obtaining the
2349 : * specified kind of lock on each. Return an array of Relation pointers for
2350 : * the indexes into *Irel, and the number of indexes into *nindexes.
2351 : *
2352 : * We consider an index vacuumable if it is marked insertable (indisready).
2353 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2354 : * execution, and what we have is too corrupt to be processable. We will
2355 : * vacuum even if the index isn't indisvalid; this is important because in a
2356 : * unique index, uniqueness checks will be performed anyway and had better not
2357 : * hit dangling index pointers.
2358 : */
2359 : void
2360 268678 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2361 : int *nindexes, Relation **Irel)
2362 : {
2363 : List *indexoidlist;
2364 : ListCell *indexoidscan;
2365 : int i;
2366 :
2367 : Assert(lockmode != NoLock);
2368 :
2369 268678 : indexoidlist = RelationGetIndexList(relation);
2370 :
2371 : /* allocate enough memory for all indexes */
2372 268678 : i = list_length(indexoidlist);
2373 :
2374 268678 : if (i > 0)
2375 253438 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2376 : else
2377 15240 : *Irel = NULL;
2378 :
2379 : /* collect just the ready indexes */
2380 268678 : i = 0;
2381 670478 : foreach(indexoidscan, indexoidlist)
2382 : {
2383 401800 : Oid indexoid = lfirst_oid(indexoidscan);
2384 : Relation indrel;
2385 :
2386 401800 : indrel = index_open(indexoid, lockmode);
2387 401800 : if (indrel->rd_index->indisready)
2388 401800 : (*Irel)[i++] = indrel;
2389 : else
2390 0 : index_close(indrel, lockmode);
2391 : }
2392 :
2393 268678 : *nindexes = i;
2394 :
2395 268678 : list_free(indexoidlist);
2396 268678 : }
2397 :
2398 : /*
2399 : * Release the resources acquired by vac_open_indexes. Optionally release
2400 : * the locks (say NoLock to keep 'em).
2401 : */
2402 : void
2403 269550 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2404 : {
2405 269550 : if (Irel == NULL)
2406 16118 : return;
2407 :
2408 655220 : while (nindexes--)
2409 : {
2410 401788 : Relation ind = Irel[nindexes];
2411 :
2412 401788 : index_close(ind, lockmode);
2413 : }
2414 253432 : pfree(Irel);
2415 : }
2416 :
2417 : /*
2418 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2419 : *
2420 : * This should be called in each major loop of VACUUM processing,
2421 : * typically once per page processed.
2422 : */
2423 : void
2424 86434340 : vacuum_delay_point(bool is_analyze)
2425 : {
2426 86434340 : double msec = 0;
2427 :
2428 : /* Always check for interrupts */
2429 86434340 : CHECK_FOR_INTERRUPTS();
2430 :
2431 86434340 : if (InterruptPending ||
2432 86434340 : (!VacuumCostActive && !ConfigReloadPending))
2433 76342944 : return;
2434 :
2435 : /*
2436 : * Autovacuum workers should reload the configuration file if requested.
2437 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2438 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2439 : * vacuumed or analyzed.
2440 : */
2441 10091396 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2442 : {
2443 0 : ConfigReloadPending = false;
2444 0 : ProcessConfigFile(PGC_SIGHUP);
2445 0 : VacuumUpdateCosts();
2446 : }
2447 :
2448 : /*
2449 : * If we disabled cost-based delays after reloading the config file,
2450 : * return.
2451 : */
2452 10091396 : if (!VacuumCostActive)
2453 0 : return;
2454 :
2455 : /*
2456 : * For parallel vacuum, the delay is computed based on the shared cost
2457 : * balance. See compute_parallel_delay.
2458 : */
2459 10091396 : if (VacuumSharedCostBalance != NULL)
2460 0 : msec = compute_parallel_delay();
2461 10091396 : else if (VacuumCostBalance >= vacuum_cost_limit)
2462 9650 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2463 :
2464 : /* Nap if appropriate */
2465 10091396 : if (msec > 0)
2466 : {
2467 : instr_time delay_start;
2468 :
2469 9650 : if (msec > vacuum_cost_delay * 4)
2470 12 : msec = vacuum_cost_delay * 4;
2471 :
2472 9650 : if (track_cost_delay_timing)
2473 0 : INSTR_TIME_SET_CURRENT(delay_start);
2474 :
2475 9650 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2476 9650 : pg_usleep(msec * 1000);
2477 9650 : pgstat_report_wait_end();
2478 :
2479 9650 : if (track_cost_delay_timing)
2480 : {
2481 : instr_time delay_end;
2482 : instr_time delay;
2483 :
2484 0 : INSTR_TIME_SET_CURRENT(delay_end);
2485 0 : INSTR_TIME_SET_ZERO(delay);
2486 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2487 :
2488 : /*
2489 : * For parallel workers, we only report the delay time every once
2490 : * in a while to avoid overloading the leader with messages and
2491 : * interrupts.
2492 : */
2493 0 : if (IsParallelWorker())
2494 : {
2495 : static instr_time last_report_time;
2496 : instr_time time_since_last_report;
2497 :
2498 : Assert(!is_analyze);
2499 :
2500 : /* Accumulate the delay time */
2501 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2502 :
2503 : /* Calculate interval since last report */
2504 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2505 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2506 :
2507 : /* If we haven't reported in a while, do so now */
2508 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2509 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2510 : {
2511 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2512 : parallel_vacuum_worker_delay_ns);
2513 :
2514 : /* Reset variables */
2515 0 : last_report_time = delay_end;
2516 0 : parallel_vacuum_worker_delay_ns = 0;
2517 : }
2518 : }
2519 0 : else if (is_analyze)
2520 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2521 0 : INSTR_TIME_GET_NANOSEC(delay));
2522 : else
2523 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2524 0 : INSTR_TIME_GET_NANOSEC(delay));
2525 : }
2526 :
2527 : /*
2528 : * We don't want to ignore postmaster death during very long vacuums
2529 : * with vacuum_cost_delay configured. We can't use the usual
2530 : * WaitLatch() approach here because we want microsecond-based sleep
2531 : * durations above.
2532 : */
2533 9650 : if (IsUnderPostmaster && !PostmasterIsAlive())
2534 0 : exit(1);
2535 :
2536 9650 : VacuumCostBalance = 0;
2537 :
2538 : /*
2539 : * Balance and update limit values for autovacuum workers. We must do
2540 : * this periodically, as the number of workers across which we are
2541 : * balancing the limit may have changed.
2542 : *
2543 : * TODO: There may be better criteria for determining when to do this
2544 : * besides "check after napping".
2545 : */
2546 9650 : AutoVacuumUpdateCostLimit();
2547 :
2548 : /* Might have gotten an interrupt while sleeping */
2549 9650 : CHECK_FOR_INTERRUPTS();
2550 : }
2551 : }
2552 :
2553 : /*
2554 : * Computes the vacuum delay for parallel workers.
2555 : *
2556 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2557 : * worker to sleep in proportion to the share of work it's done. We achieve this
2558 : * by allowing all parallel vacuum workers including the leader process to
2559 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2560 : * We allow each worker to update it as and when it has incurred any cost and
2561 : * then based on that decide whether it needs to sleep. We compute the time
2562 : * to sleep for a worker based on the cost it has incurred
2563 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2564 : * that amount. This avoids putting to sleep those workers which have done less
2565 : * I/O than other workers and therefore ensure that workers
2566 : * which are doing more I/O got throttled more.
2567 : *
2568 : * We allow a worker to sleep only if it has performed I/O above a certain
2569 : * threshold, which is calculated based on the number of active workers
2570 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2571 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2572 : * the required throttling if we force a worker that has done more than 50%
2573 : * of its share of work to sleep.
2574 : */
2575 : static double
2576 0 : compute_parallel_delay(void)
2577 : {
2578 0 : double msec = 0;
2579 : uint32 shared_balance;
2580 : int nworkers;
2581 :
2582 : /* Parallel vacuum must be active */
2583 : Assert(VacuumSharedCostBalance);
2584 :
2585 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2586 :
2587 : /* At least count itself */
2588 : Assert(nworkers >= 1);
2589 :
2590 : /* Update the shared cost balance value atomically */
2591 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2592 :
2593 : /* Compute the total local balance for the current worker */
2594 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2595 :
2596 0 : if ((shared_balance >= vacuum_cost_limit) &&
2597 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2598 : {
2599 : /* Compute sleep time based on the local cost balance */
2600 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2601 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2602 0 : VacuumCostBalanceLocal = 0;
2603 : }
2604 :
2605 : /*
2606 : * Reset the local balance as we accumulated it into the shared value.
2607 : */
2608 0 : VacuumCostBalance = 0;
2609 :
2610 0 : return msec;
2611 : }
2612 :
2613 : /*
2614 : * A wrapper function of defGetBoolean().
2615 : *
2616 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2617 : * of true and false.
2618 : */
2619 : static VacOptValue
2620 326 : get_vacoptval_from_boolean(DefElem *def)
2621 : {
2622 326 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2623 : }
2624 :
2625 : /*
2626 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2627 : *
2628 : * Returns bulk delete stats derived from input stats
2629 : */
2630 : IndexBulkDeleteResult *
2631 2410 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2632 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2633 : {
2634 : /* Do bulk deletion */
2635 2410 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2636 : dead_items);
2637 :
2638 2410 : ereport(ivinfo->message_level,
2639 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2640 : RelationGetRelationName(ivinfo->index),
2641 : dead_items_info->num_items)));
2642 :
2643 2410 : return istat;
2644 : }
2645 :
2646 : /*
2647 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2648 : *
2649 : * Returns bulk delete stats derived from input stats
2650 : */
2651 : IndexBulkDeleteResult *
2652 306728 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2653 : {
2654 306728 : istat = index_vacuum_cleanup(ivinfo, istat);
2655 :
2656 306728 : if (istat)
2657 2660 : ereport(ivinfo->message_level,
2658 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2659 : RelationGetRelationName(ivinfo->index),
2660 : istat->num_index_tuples,
2661 : istat->num_pages),
2662 : errdetail("%.0f index row versions were removed.\n"
2663 : "%u index pages were newly deleted.\n"
2664 : "%u index pages are currently deleted, of which %u are currently reusable.",
2665 : istat->tuples_removed,
2666 : istat->pages_newly_deleted,
2667 : istat->pages_deleted, istat->pages_free)));
2668 :
2669 306728 : return istat;
2670 : }
2671 :
2672 : /*
2673 : * vac_tid_reaped() -- is a particular tid deletable?
2674 : *
2675 : * This has the right signature to be an IndexBulkDeleteCallback.
2676 : */
2677 : static bool
2678 6583170 : vac_tid_reaped(ItemPointer itemptr, void *state)
2679 : {
2680 6583170 : TidStore *dead_items = (TidStore *) state;
2681 :
2682 6583170 : return TidStoreIsMember(dead_items, itemptr);
2683 : }
|