Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/pg_database.h"
39 : #include "catalog/pg_inherits.h"
40 : #include "commands/cluster.h"
41 : #include "commands/defrem.h"
42 : #include "commands/progress.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/injection_point.h"
60 : #include "utils/memutils.h"
61 : #include "utils/snapmgr.h"
62 : #include "utils/syscache.h"
63 :
64 : /*
65 : * Minimum interval for cost-based vacuum delay reports from a parallel worker.
66 : * This aims to avoid sending too many messages and waking up the leader too
67 : * frequently.
68 : */
69 : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S)
70 :
71 : /*
72 : * GUC parameters
73 : */
74 : int vacuum_freeze_min_age;
75 : int vacuum_freeze_table_age;
76 : int vacuum_multixact_freeze_min_age;
77 : int vacuum_multixact_freeze_table_age;
78 : int vacuum_failsafe_age;
79 : int vacuum_multixact_failsafe_age;
80 : double vacuum_max_eager_freeze_failure_rate;
81 : bool track_cost_delay_timing;
82 : bool vacuum_truncate;
83 :
84 : /*
85 : * Variables for cost-based vacuum delay. The defaults differ between
86 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
87 : * vacuum code. They are initialized here to the defaults for client backends
88 : * executing VACUUM or ANALYZE.
89 : */
90 : double vacuum_cost_delay = 0;
91 : int vacuum_cost_limit = 200;
92 :
93 : /* Variable for reporting cost-based vacuum delay from parallel workers. */
94 : int64 parallel_vacuum_worker_delay_ns = 0;
95 :
96 : /*
97 : * VacuumFailsafeActive is a defined as a global so that we can determine
98 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
99 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
100 : * for the table until after vacuuming has completed, regardless of other
101 : * settings.
102 : *
103 : * Only VACUUM code should inspect this variable and only table access methods
104 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
105 : * inspected to determine whether or not to allow cost-based delays. Table AMs
106 : * are free to set it if they desire this behavior, but it is false by default
107 : * and reset to false in between vacuuming each relation.
108 : */
109 : bool VacuumFailsafeActive = false;
110 :
111 : /*
112 : * Variables for cost-based parallel vacuum. See comments atop
113 : * compute_parallel_delay to understand how it works.
114 : */
115 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
116 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
117 : int VacuumCostBalanceLocal = 0;
118 :
119 : /* non-export function prototypes */
120 : static List *expand_vacuum_rel(VacuumRelation *vrel,
121 : MemoryContext vac_context, int options);
122 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
123 : static void vac_truncate_clog(TransactionId frozenXID,
124 : MultiXactId minMulti,
125 : TransactionId lastSaneFrozenXid,
126 : MultiXactId lastSaneMinMulti);
127 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
128 : BufferAccessStrategy bstrategy);
129 : static double compute_parallel_delay(void);
130 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
131 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
132 :
133 : /*
134 : * GUC check function to ensure GUC value specified is within the allowable
135 : * range.
136 : */
137 : bool
138 2254 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
139 : GucSource source)
140 : {
141 : /* Value upper and lower hard limits are inclusive */
142 2254 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
143 2254 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
144 2254 : return true;
145 :
146 : /* Value does not fall within any allowable range */
147 0 : GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
148 : "vacuum_buffer_usage_limit",
149 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
150 :
151 0 : return false;
152 : }
153 :
154 : /*
155 : * Primary entry point for manual VACUUM and ANALYZE commands
156 : *
157 : * This is mainly a preparation wrapper for the real operations that will
158 : * happen in vacuum().
159 : */
160 : void
161 13794 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
162 : {
163 : VacuumParams params;
164 13794 : BufferAccessStrategy bstrategy = NULL;
165 13794 : bool verbose = false;
166 13794 : bool skip_locked = false;
167 13794 : bool analyze = false;
168 13794 : bool freeze = false;
169 13794 : bool full = false;
170 13794 : bool disable_page_skipping = false;
171 13794 : bool process_main = true;
172 13794 : bool process_toast = true;
173 : int ring_size;
174 13794 : bool skip_database_stats = false;
175 13794 : bool only_database_stats = false;
176 : MemoryContext vac_context;
177 : ListCell *lc;
178 :
179 : /* index_cleanup and truncate values unspecified for now */
180 13794 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
181 13794 : params.truncate = VACOPTVALUE_UNSPECIFIED;
182 :
183 : /* By default parallel vacuum is enabled */
184 13794 : params.nworkers = 0;
185 :
186 : /* Will be set later if we recurse to a TOAST table. */
187 13794 : params.toast_parent = InvalidOid;
188 :
189 : /*
190 : * Set this to an invalid value so it is clear whether or not a
191 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
192 : */
193 13794 : ring_size = -1;
194 :
195 : /* Parse options list */
196 28212 : foreach(lc, vacstmt->options)
197 : {
198 14454 : DefElem *opt = (DefElem *) lfirst(lc);
199 :
200 : /* Parse common options for VACUUM and ANALYZE */
201 14454 : if (strcmp(opt->defname, "verbose") == 0)
202 42 : verbose = defGetBoolean(opt);
203 14412 : else if (strcmp(opt->defname, "skip_locked") == 0)
204 334 : skip_locked = defGetBoolean(opt);
205 14078 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
206 : {
207 : const char *hintmsg;
208 : int result;
209 : char *vac_buffer_size;
210 :
211 54 : vac_buffer_size = defGetString(opt);
212 :
213 : /*
214 : * Check that the specified value is valid and the size falls
215 : * within the hard upper and lower limits if it is not 0.
216 : */
217 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
218 48 : (result != 0 &&
219 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
220 : {
221 18 : ereport(ERROR,
222 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
223 : errmsg("%s option must be 0 or between %d kB and %d kB",
224 : "BUFFER_USAGE_LIMIT",
225 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
226 : hintmsg ? errhint_internal("%s", _(hintmsg)) : 0));
227 : }
228 :
229 36 : ring_size = result;
230 : }
231 14024 : else if (!vacstmt->is_vacuumcmd)
232 6 : ereport(ERROR,
233 : (errcode(ERRCODE_SYNTAX_ERROR),
234 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
235 : parser_errposition(pstate, opt->location)));
236 :
237 : /* Parse options available on VACUUM */
238 14018 : else if (strcmp(opt->defname, "analyze") == 0)
239 2750 : analyze = defGetBoolean(opt);
240 11268 : else if (strcmp(opt->defname, "freeze") == 0)
241 2552 : freeze = defGetBoolean(opt);
242 8716 : else if (strcmp(opt->defname, "full") == 0)
243 392 : full = defGetBoolean(opt);
244 8324 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
245 214 : disable_page_skipping = defGetBoolean(opt);
246 8110 : else if (strcmp(opt->defname, "index_cleanup") == 0)
247 : {
248 : /* Interpret no string as the default, which is 'auto' */
249 174 : if (!opt->arg)
250 0 : params.index_cleanup = VACOPTVALUE_AUTO;
251 : else
252 : {
253 174 : char *sval = defGetString(opt);
254 :
255 : /* Try matching on 'auto' string, or fall back on boolean */
256 174 : if (pg_strcasecmp(sval, "auto") == 0)
257 6 : params.index_cleanup = VACOPTVALUE_AUTO;
258 : else
259 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
260 : }
261 : }
262 7936 : else if (strcmp(opt->defname, "process_main") == 0)
263 154 : process_main = defGetBoolean(opt);
264 7782 : else if (strcmp(opt->defname, "process_toast") == 0)
265 160 : process_toast = defGetBoolean(opt);
266 7622 : else if (strcmp(opt->defname, "truncate") == 0)
267 158 : params.truncate = get_vacoptval_from_boolean(opt);
268 7464 : else if (strcmp(opt->defname, "parallel") == 0)
269 : {
270 352 : int nworkers = defGetInt32(opt);
271 :
272 346 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
273 6 : ereport(ERROR,
274 : (errcode(ERRCODE_SYNTAX_ERROR),
275 : errmsg("%s option must be between 0 and %d",
276 : "PARALLEL",
277 : MAX_PARALLEL_WORKER_LIMIT),
278 : parser_errposition(pstate, opt->location)));
279 :
280 : /*
281 : * Disable parallel vacuum, if user has specified parallel degree
282 : * as zero.
283 : */
284 340 : if (nworkers == 0)
285 156 : params.nworkers = -1;
286 : else
287 184 : params.nworkers = nworkers;
288 : }
289 7112 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
290 6982 : skip_database_stats = defGetBoolean(opt);
291 130 : else if (strcmp(opt->defname, "only_database_stats") == 0)
292 130 : only_database_stats = defGetBoolean(opt);
293 : else
294 0 : ereport(ERROR,
295 : (errcode(ERRCODE_SYNTAX_ERROR),
296 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
297 : parser_errposition(pstate, opt->location)));
298 : }
299 :
300 : /* Set vacuum options */
301 13758 : params.options =
302 13758 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
303 13758 : (verbose ? VACOPT_VERBOSE : 0) |
304 13758 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
305 13758 : (analyze ? VACOPT_ANALYZE : 0) |
306 13758 : (freeze ? VACOPT_FREEZE : 0) |
307 13758 : (full ? VACOPT_FULL : 0) |
308 13758 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
309 13758 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
310 13758 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
311 13758 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
312 13758 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
313 :
314 : /* sanity checks on options */
315 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
316 : Assert((params.options & VACOPT_VACUUM) ||
317 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
318 :
319 13758 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
320 6 : ereport(ERROR,
321 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
322 : errmsg("VACUUM FULL cannot be performed in parallel")));
323 :
324 : /*
325 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
326 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
327 : * we'll permit that.
328 : */
329 13752 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
330 6 : !(params.options & VACOPT_ANALYZE))
331 6 : ereport(ERROR,
332 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
333 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
334 :
335 : /*
336 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
337 : */
338 13746 : if (!(params.options & VACOPT_ANALYZE))
339 : {
340 12012 : foreach(lc, vacstmt->rels)
341 : {
342 5910 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
343 :
344 5910 : if (vrel->va_cols != NIL)
345 6 : ereport(ERROR,
346 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
347 : errmsg("ANALYZE option must be specified when a column list is provided")));
348 : }
349 : }
350 :
351 :
352 : /*
353 : * Sanity check DISABLE_PAGE_SKIPPING option.
354 : */
355 13740 : if ((params.options & VACOPT_FULL) != 0 &&
356 368 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
357 0 : ereport(ERROR,
358 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
359 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
360 :
361 : /* sanity check for PROCESS_TOAST */
362 13740 : if ((params.options & VACOPT_FULL) != 0 &&
363 368 : (params.options & VACOPT_PROCESS_TOAST) == 0)
364 6 : ereport(ERROR,
365 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
366 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
367 :
368 : /* sanity check for ONLY_DATABASE_STATS */
369 13734 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
370 : {
371 : Assert(params.options & VACOPT_VACUUM);
372 130 : if (vacstmt->rels != NIL)
373 6 : ereport(ERROR,
374 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
375 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
376 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
377 124 : if (params.options & ~(VACOPT_VACUUM |
378 : VACOPT_VERBOSE |
379 : VACOPT_PROCESS_MAIN |
380 : VACOPT_PROCESS_TOAST |
381 : VACOPT_ONLY_DATABASE_STATS))
382 0 : ereport(ERROR,
383 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
385 : }
386 :
387 : /*
388 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
389 : * them as -1 which means to use the default values.
390 : */
391 13728 : if (params.options & VACOPT_FREEZE)
392 : {
393 2552 : params.freeze_min_age = 0;
394 2552 : params.freeze_table_age = 0;
395 2552 : params.multixact_freeze_min_age = 0;
396 2552 : params.multixact_freeze_table_age = 0;
397 : }
398 : else
399 : {
400 11176 : params.freeze_min_age = -1;
401 11176 : params.freeze_table_age = -1;
402 11176 : params.multixact_freeze_min_age = -1;
403 11176 : params.multixact_freeze_table_age = -1;
404 : }
405 :
406 : /* user-invoked vacuum is never "for wraparound" */
407 13728 : params.is_wraparound = false;
408 :
409 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
410 13728 : params.log_min_duration = -1;
411 :
412 : /*
413 : * Later, in vacuum_rel(), we check if a reloption override was specified.
414 : */
415 13728 : params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
416 :
417 : /*
418 : * Create special memory context for cross-transaction storage.
419 : *
420 : * Since it is a child of PortalContext, it will go away eventually even
421 : * if we suffer an error; there's no need for special abort cleanup logic.
422 : */
423 13728 : vac_context = AllocSetContextCreate(PortalContext,
424 : "Vacuum",
425 : ALLOCSET_DEFAULT_SIZES);
426 :
427 : /*
428 : * Make a buffer strategy object in the cross-transaction memory context.
429 : * We needn't bother making this for VACUUM (FULL) or VACUUM
430 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
431 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
432 : * when we see ANALYZE.
433 : */
434 13728 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
435 486 : VACOPT_FULL)) == 0 ||
436 486 : (params.options & VACOPT_ANALYZE) != 0)
437 : {
438 :
439 13248 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
440 :
441 : Assert(ring_size >= -1);
442 :
443 : /*
444 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
445 : * command, it overrides the value of VacuumBufferUsageLimit. Either
446 : * value may be 0, in which case GetAccessStrategyWithSize() will
447 : * return NULL, effectively allowing full use of shared buffers.
448 : */
449 13248 : if (ring_size == -1)
450 13218 : ring_size = VacuumBufferUsageLimit;
451 :
452 13248 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
453 :
454 13248 : MemoryContextSwitchTo(old_context);
455 : }
456 :
457 : /* Now go through the common routine */
458 13728 : vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
459 :
460 : /* Finally, clean up the vacuum memory context */
461 13596 : MemoryContextDelete(vac_context);
462 13596 : }
463 :
464 : /*
465 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
466 : *
467 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
468 : * we process all relevant tables in the database. For each VacuumRelation,
469 : * if a valid OID is supplied, the table with that OID is what to process;
470 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
471 : *
472 : * params contains a set of parameters that can be used to customize the
473 : * behavior.
474 : *
475 : * bstrategy may be passed in as NULL when the caller does not want to
476 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
477 : * otherwise, the caller must build a BufferAccessStrategy with the number of
478 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
479 : * using.
480 : *
481 : * isTopLevel should be passed down from ProcessUtility.
482 : *
483 : * It is the caller's responsibility that all parameters are allocated in a
484 : * memory context that will not disappear at transaction commit.
485 : */
486 : void
487 209398 : vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
488 : MemoryContext vac_context, bool isTopLevel)
489 : {
490 : static bool in_vacuum = false;
491 :
492 : const char *stmttype;
493 : volatile bool in_outer_xact,
494 : use_own_xacts;
495 :
496 209398 : stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
497 :
498 : /*
499 : * We cannot run VACUUM inside a user transaction block; if we were inside
500 : * a transaction, then our commit- and start-transaction-command calls
501 : * would not have the intended effect! There are numerous other subtle
502 : * dependencies on this, too.
503 : *
504 : * ANALYZE (without VACUUM) can run either way.
505 : */
506 209398 : if (params.options & VACOPT_VACUUM)
507 : {
508 204276 : PreventInTransactionBlock(isTopLevel, stmttype);
509 204256 : in_outer_xact = false;
510 : }
511 : else
512 5122 : in_outer_xact = IsInTransactionBlock(isTopLevel);
513 :
514 : /*
515 : * Check for and disallow recursive calls. This could happen when VACUUM
516 : * FULL or ANALYZE calls a hostile index expression that itself calls
517 : * ANALYZE.
518 : */
519 209378 : if (in_vacuum)
520 12 : ereport(ERROR,
521 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
522 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
523 : stmttype)));
524 :
525 : /*
526 : * Build list of relation(s) to process, putting any new data in
527 : * vac_context for safekeeping.
528 : */
529 209366 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
530 : {
531 : /* We don't process any tables in this case */
532 : Assert(relations == NIL);
533 : }
534 209242 : else if (relations != NIL)
535 : {
536 209024 : List *newrels = NIL;
537 : ListCell *lc;
538 :
539 418152 : foreach(lc, relations)
540 : {
541 209164 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
542 : List *sublist;
543 : MemoryContext old_context;
544 :
545 209164 : sublist = expand_vacuum_rel(vrel, vac_context, params.options);
546 209128 : old_context = MemoryContextSwitchTo(vac_context);
547 209128 : newrels = list_concat(newrels, sublist);
548 209128 : MemoryContextSwitchTo(old_context);
549 : }
550 208988 : relations = newrels;
551 : }
552 : else
553 218 : relations = get_all_vacuum_rels(vac_context, params.options);
554 :
555 : /*
556 : * Decide whether we need to start/commit our own transactions.
557 : *
558 : * For VACUUM (with or without ANALYZE): always do so, so that we can
559 : * release locks as soon as possible. (We could possibly use the outer
560 : * transaction for a one-table VACUUM, but handling TOAST tables would be
561 : * problematic.)
562 : *
563 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
564 : * start/commit our own transactions. Also, there's no need to do so if
565 : * only processing one relation. For multiple relations when not within a
566 : * transaction block, and also in an autovacuum worker, use own
567 : * transactions so we can release locks sooner.
568 : */
569 209330 : if (params.options & VACOPT_VACUUM)
570 204244 : use_own_xacts = true;
571 : else
572 : {
573 : Assert(params.options & VACOPT_ANALYZE);
574 5086 : if (AmAutoVacuumWorkerProcess())
575 234 : use_own_xacts = true;
576 4852 : else if (in_outer_xact)
577 238 : use_own_xacts = false;
578 4614 : else if (list_length(relations) > 1)
579 810 : use_own_xacts = true;
580 : else
581 3804 : use_own_xacts = false;
582 : }
583 :
584 : /*
585 : * vacuum_rel expects to be entered with no transaction active; it will
586 : * start and commit its own transaction. But we are called by an SQL
587 : * command, and so we are executing inside a transaction already. We
588 : * commit the transaction started in PostgresMain() here, and start
589 : * another one before exiting to match the commit waiting for us back in
590 : * PostgresMain().
591 : */
592 209330 : if (use_own_xacts)
593 : {
594 : Assert(!in_outer_xact);
595 :
596 : /* ActiveSnapshot is not set by autovacuum */
597 205288 : if (ActiveSnapshotSet())
598 9618 : PopActiveSnapshot();
599 :
600 : /* matches the StartTransaction in PostgresMain() */
601 205288 : CommitTransactionCommand();
602 : }
603 :
604 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
605 209330 : PG_TRY();
606 : {
607 : ListCell *cur;
608 :
609 209330 : in_vacuum = true;
610 209330 : VacuumFailsafeActive = false;
611 209330 : VacuumUpdateCosts();
612 209330 : VacuumCostBalance = 0;
613 209330 : VacuumCostBalanceLocal = 0;
614 209330 : VacuumSharedCostBalance = NULL;
615 209330 : VacuumActiveNWorkers = NULL;
616 :
617 : /*
618 : * Loop to process each selected relation.
619 : */
620 435724 : foreach(cur, relations)
621 : {
622 226458 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
623 :
624 226458 : if (params.options & VACOPT_VACUUM)
625 : {
626 213062 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
627 100 : continue;
628 : }
629 :
630 226352 : if (params.options & VACOPT_ANALYZE)
631 : {
632 : /*
633 : * If using separate xacts, start one for analyze. Otherwise,
634 : * we can use the outer transaction.
635 : */
636 16368 : if (use_own_xacts)
637 : {
638 12376 : StartTransactionCommand();
639 : /* functions in indexes may want a snapshot set */
640 12376 : PushActiveSnapshot(GetTransactionSnapshot());
641 : }
642 :
643 16368 : analyze_rel(vrel->oid, vrel->relation, params,
644 : vrel->va_cols, in_outer_xact, bstrategy);
645 :
646 16310 : if (use_own_xacts)
647 : {
648 12338 : PopActiveSnapshot();
649 : /* standard_ProcessUtility() does CCI if !use_own_xacts */
650 12338 : CommandCounterIncrement();
651 12338 : CommitTransactionCommand();
652 : }
653 : else
654 : {
655 : /*
656 : * If we're not using separate xacts, better separate the
657 : * ANALYZE actions with CCIs. This avoids trouble if user
658 : * says "ANALYZE t, t".
659 : */
660 3972 : CommandCounterIncrement();
661 : }
662 : }
663 :
664 : /*
665 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
666 : * next relation.
667 : */
668 226294 : VacuumFailsafeActive = false;
669 : }
670 : }
671 64 : PG_FINALLY();
672 : {
673 209330 : in_vacuum = false;
674 209330 : VacuumCostActive = false;
675 209330 : VacuumFailsafeActive = false;
676 209330 : VacuumCostBalance = 0;
677 : }
678 209330 : PG_END_TRY();
679 :
680 : /*
681 : * Finish up processing.
682 : */
683 209266 : if (use_own_xacts)
684 : {
685 : /* here, we are not in a transaction */
686 :
687 : /*
688 : * This matches the CommitTransaction waiting for us in
689 : * PostgresMain().
690 : */
691 205244 : StartTransactionCommand();
692 : }
693 :
694 209266 : if ((params.options & VACOPT_VACUUM) &&
695 204212 : !(params.options & VACOPT_SKIP_DATABASE_STATS))
696 : {
697 : /*
698 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
699 : */
700 1796 : vac_update_datfrozenxid();
701 : }
702 :
703 209266 : }
704 :
705 : /*
706 : * Check if the current user has privileges to vacuum or analyze the relation.
707 : * If not, issue a WARNING log message and return false to let the caller
708 : * decide what to do with this relation. This routine is used to decide if a
709 : * relation can be processed for VACUUM or ANALYZE.
710 : */
711 : bool
712 270194 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
713 : bits32 options)
714 : {
715 : char *relname;
716 :
717 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
718 :
719 : /*----------
720 : * A role has privileges to vacuum or analyze the relation if any of the
721 : * following are true:
722 : * - the role owns the current database and the relation is not shared
723 : * - the role has the MAINTAIN privilege on the relation
724 : *----------
725 : */
726 270194 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
727 312266 : !reltuple->relisshared) ||
728 45630 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
729 267112 : return true;
730 :
731 3082 : relname = NameStr(reltuple->relname);
732 :
733 3082 : if ((options & VACOPT_VACUUM) != 0)
734 : {
735 224 : ereport(WARNING,
736 : (errmsg("permission denied to vacuum \"%s\", skipping it",
737 : relname)));
738 :
739 : /*
740 : * For VACUUM ANALYZE, both logs could show up, but just generate
741 : * information for VACUUM as that would be the first one to be
742 : * processed.
743 : */
744 224 : return false;
745 : }
746 :
747 2858 : if ((options & VACOPT_ANALYZE) != 0)
748 2858 : ereport(WARNING,
749 : (errmsg("permission denied to analyze \"%s\", skipping it",
750 : relname)));
751 :
752 2858 : return false;
753 : }
754 :
755 :
756 : /*
757 : * vacuum_open_relation
758 : *
759 : * This routine is used for attempting to open and lock a relation which
760 : * is going to be vacuumed or analyzed. If the relation cannot be opened
761 : * or locked, a log is emitted if possible.
762 : */
763 : Relation
764 238606 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
765 : bool verbose, LOCKMODE lmode)
766 : {
767 : Relation rel;
768 238606 : bool rel_lock = true;
769 : int elevel;
770 :
771 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
772 :
773 : /*
774 : * Open the relation and get the appropriate lock on it.
775 : *
776 : * There's a race condition here: the relation may have gone away since
777 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
778 : *
779 : * If we've been asked not to wait for the relation lock, acquire it first
780 : * in non-blocking mode, before calling try_relation_open().
781 : */
782 238606 : if (!(options & VACOPT_SKIP_LOCKED))
783 237542 : rel = try_relation_open(relid, lmode);
784 1064 : else if (ConditionalLockRelationOid(relid, lmode))
785 1044 : rel = try_relation_open(relid, NoLock);
786 : else
787 : {
788 20 : rel = NULL;
789 20 : rel_lock = false;
790 : }
791 :
792 : /* if relation is opened, leave */
793 238606 : if (rel)
794 238574 : return rel;
795 :
796 : /*
797 : * Relation could not be opened, hence generate if possible a log
798 : * informing on the situation.
799 : *
800 : * If the RangeVar is not defined, we do not have enough information to
801 : * provide a meaningful log statement. Chances are that the caller has
802 : * intentionally not provided this information so that this logging is
803 : * skipped, anyway.
804 : */
805 32 : if (relation == NULL)
806 18 : return NULL;
807 :
808 : /*
809 : * Determine the log level.
810 : *
811 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
812 : * statements in the permission checks; otherwise, only log if the caller
813 : * so requested.
814 : */
815 14 : if (!AmAutoVacuumWorkerProcess())
816 14 : elevel = WARNING;
817 0 : else if (verbose)
818 0 : elevel = LOG;
819 : else
820 0 : return NULL;
821 :
822 14 : if ((options & VACOPT_VACUUM) != 0)
823 : {
824 10 : if (!rel_lock)
825 6 : ereport(elevel,
826 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
827 : errmsg("skipping vacuum of \"%s\" --- lock not available",
828 : relation->relname)));
829 : else
830 4 : ereport(elevel,
831 : (errcode(ERRCODE_UNDEFINED_TABLE),
832 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
833 : relation->relname)));
834 :
835 : /*
836 : * For VACUUM ANALYZE, both logs could show up, but just generate
837 : * information for VACUUM as that would be the first one to be
838 : * processed.
839 : */
840 10 : return NULL;
841 : }
842 :
843 4 : if ((options & VACOPT_ANALYZE) != 0)
844 : {
845 4 : if (!rel_lock)
846 2 : ereport(elevel,
847 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
848 : errmsg("skipping analyze of \"%s\" --- lock not available",
849 : relation->relname)));
850 : else
851 2 : ereport(elevel,
852 : (errcode(ERRCODE_UNDEFINED_TABLE),
853 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
854 : relation->relname)));
855 : }
856 :
857 4 : return NULL;
858 : }
859 :
860 :
861 : /*
862 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
863 : * and optionally add VacuumRelations for partitions or inheritance children.
864 : *
865 : * If a VacuumRelation does not have an OID supplied and is a partitioned
866 : * table, an extra entry will be added to the output for each partition.
867 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
868 : * it does not want us to expand partitioned tables.
869 : *
870 : * We take care not to modify the input data structure, but instead build
871 : * new VacuumRelation(s) to return. (But note that they will reference
872 : * unmodified parts of the input, eg column lists.) New data structures
873 : * are made in vac_context.
874 : */
875 : static List *
876 209164 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
877 : int options)
878 : {
879 209164 : List *vacrels = NIL;
880 : MemoryContext oldcontext;
881 :
882 : /* If caller supplied OID, there's nothing we need do here. */
883 209164 : if (OidIsValid(vrel->oid))
884 : {
885 195670 : oldcontext = MemoryContextSwitchTo(vac_context);
886 195670 : vacrels = lappend(vacrels, vrel);
887 195670 : MemoryContextSwitchTo(oldcontext);
888 : }
889 : else
890 : {
891 : /*
892 : * Process a specific relation, and possibly partitions or child
893 : * tables thereof.
894 : */
895 : Oid relid;
896 : HeapTuple tuple;
897 : Form_pg_class classForm;
898 : bool include_children;
899 : bool is_partitioned_table;
900 : int rvr_opts;
901 :
902 : /*
903 : * Since autovacuum workers supply OIDs when calling vacuum(), no
904 : * autovacuum worker should reach this code.
905 : */
906 : Assert(!AmAutoVacuumWorkerProcess());
907 :
908 : /*
909 : * We transiently take AccessShareLock to protect the syscache lookup
910 : * below, as well as find_all_inheritors's expectation that the caller
911 : * holds some lock on the starting relation.
912 : */
913 13494 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
914 13494 : relid = RangeVarGetRelidExtended(vrel->relation,
915 : AccessShareLock,
916 : rvr_opts,
917 : NULL, NULL);
918 :
919 : /*
920 : * If the lock is unavailable, emit the same log statement that
921 : * vacuum_rel() and analyze_rel() would.
922 : */
923 13458 : if (!OidIsValid(relid))
924 : {
925 8 : if (options & VACOPT_VACUUM)
926 6 : ereport(WARNING,
927 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
928 : errmsg("skipping vacuum of \"%s\" --- lock not available",
929 : vrel->relation->relname)));
930 : else
931 2 : ereport(WARNING,
932 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
933 : errmsg("skipping analyze of \"%s\" --- lock not available",
934 : vrel->relation->relname)));
935 8 : return vacrels;
936 : }
937 :
938 : /*
939 : * To check whether the relation is a partitioned table and its
940 : * ownership, fetch its syscache entry.
941 : */
942 13450 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
943 13450 : if (!HeapTupleIsValid(tuple))
944 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
945 13450 : classForm = (Form_pg_class) GETSTRUCT(tuple);
946 :
947 : /*
948 : * Make a returnable VacuumRelation for this rel if the user has the
949 : * required privileges.
950 : */
951 13450 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
952 : {
953 13218 : oldcontext = MemoryContextSwitchTo(vac_context);
954 13218 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
955 : relid,
956 : vrel->va_cols));
957 13218 : MemoryContextSwitchTo(oldcontext);
958 : }
959 :
960 : /*
961 : * Vacuuming a partitioned table with ONLY will not do anything since
962 : * the partitioned table itself is empty. Issue a warning if the user
963 : * requests this.
964 : */
965 13450 : include_children = vrel->relation->inh;
966 13450 : is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
967 13450 : if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
968 6 : ereport(WARNING,
969 : (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
970 : vrel->relation->relname)));
971 :
972 13450 : ReleaseSysCache(tuple);
973 :
974 : /*
975 : * Unless the user has specified ONLY, make relation list entries for
976 : * its partitions or inheritance child tables. Note that the list
977 : * returned by find_all_inheritors() includes the passed-in OID, so we
978 : * have to skip that. There's no point in taking locks on the
979 : * individual partitions or child tables yet, and doing so would just
980 : * add unnecessary deadlock risk. For this last reason, we do not yet
981 : * check the ownership of the partitions/tables, which get added to
982 : * the list to process. Ownership will be checked later on anyway.
983 : */
984 13450 : if (include_children)
985 : {
986 13420 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
987 : ListCell *part_lc;
988 :
989 29030 : foreach(part_lc, part_oids)
990 : {
991 15610 : Oid part_oid = lfirst_oid(part_lc);
992 :
993 15610 : if (part_oid == relid)
994 13420 : continue; /* ignore original table */
995 :
996 : /*
997 : * We omit a RangeVar since it wouldn't be appropriate to
998 : * complain about failure to open one of these relations
999 : * later.
1000 : */
1001 2190 : oldcontext = MemoryContextSwitchTo(vac_context);
1002 2190 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1003 : part_oid,
1004 : vrel->va_cols));
1005 2190 : MemoryContextSwitchTo(oldcontext);
1006 : }
1007 : }
1008 :
1009 : /*
1010 : * Release lock again. This means that by the time we actually try to
1011 : * process the table, it might be gone or renamed. In the former case
1012 : * we'll silently ignore it; in the latter case we'll process it
1013 : * anyway, but we must beware that the RangeVar doesn't necessarily
1014 : * identify it anymore. This isn't ideal, perhaps, but there's little
1015 : * practical alternative, since we're typically going to commit this
1016 : * transaction and begin a new one between now and then. Moreover,
1017 : * holding locks on multiple relations would create significant risk
1018 : * of deadlock.
1019 : */
1020 13450 : UnlockRelationOid(relid, AccessShareLock);
1021 : }
1022 :
1023 209120 : return vacrels;
1024 : }
1025 :
1026 : /*
1027 : * Construct a list of VacuumRelations for all vacuumable rels in
1028 : * the current database. The list is built in vac_context.
1029 : */
1030 : static List *
1031 218 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1032 : {
1033 218 : List *vacrels = NIL;
1034 : Relation pgclass;
1035 : TableScanDesc scan;
1036 : HeapTuple tuple;
1037 :
1038 218 : pgclass = table_open(RelationRelationId, AccessShareLock);
1039 :
1040 218 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1041 :
1042 98768 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1043 : {
1044 98550 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1045 : MemoryContext oldcontext;
1046 98550 : Oid relid = classForm->oid;
1047 :
1048 : /*
1049 : * We include partitioned tables here; depending on which operation is
1050 : * to be performed, caller will decide whether to process or ignore
1051 : * them.
1052 : */
1053 98550 : if (classForm->relkind != RELKIND_RELATION &&
1054 80592 : classForm->relkind != RELKIND_MATVIEW &&
1055 80544 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1056 80380 : continue;
1057 :
1058 : /* check permissions of relation */
1059 18170 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1060 2742 : continue;
1061 :
1062 : /*
1063 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1064 : * We omit a RangeVar since it wouldn't be appropriate to complain
1065 : * about failure to open one of these relations later.
1066 : */
1067 15428 : oldcontext = MemoryContextSwitchTo(vac_context);
1068 15428 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1069 : relid,
1070 : NIL));
1071 15428 : MemoryContextSwitchTo(oldcontext);
1072 : }
1073 :
1074 218 : table_endscan(scan);
1075 218 : table_close(pgclass, AccessShareLock);
1076 :
1077 218 : return vacrels;
1078 : }
1079 :
1080 : /*
1081 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1082 : *
1083 : * The target relation and VACUUM parameters are our inputs.
1084 : *
1085 : * Output parameters are the cutoffs that VACUUM caller should use.
1086 : *
1087 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1088 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1089 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1090 : * minimum).
1091 : */
1092 : bool
1093 221988 : vacuum_get_cutoffs(Relation rel, const VacuumParams params,
1094 : struct VacuumCutoffs *cutoffs)
1095 : {
1096 : int freeze_min_age,
1097 : multixact_freeze_min_age,
1098 : freeze_table_age,
1099 : multixact_freeze_table_age,
1100 : effective_multixact_freeze_max_age;
1101 : TransactionId nextXID,
1102 : safeOldestXmin,
1103 : aggressiveXIDCutoff;
1104 : MultiXactId nextMXID,
1105 : safeOldestMxact,
1106 : aggressiveMXIDCutoff;
1107 :
1108 : /* Use mutable copies of freeze age parameters */
1109 221988 : freeze_min_age = params.freeze_min_age;
1110 221988 : multixact_freeze_min_age = params.multixact_freeze_min_age;
1111 221988 : freeze_table_age = params.freeze_table_age;
1112 221988 : multixact_freeze_table_age = params.multixact_freeze_table_age;
1113 :
1114 : /* Set pg_class fields in cutoffs */
1115 221988 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1116 221988 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1117 :
1118 : /*
1119 : * Acquire OldestXmin.
1120 : *
1121 : * We can always ignore processes running lazy vacuum. This is because we
1122 : * use these values only for deciding which tuples we must keep in the
1123 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1124 : * XID assigned), it's safe to ignore it. In theory it could be
1125 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1126 : * that only one vacuum process can be working on a particular table at
1127 : * any time, and that each vacuum is always an independent transaction.
1128 : */
1129 221988 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1130 :
1131 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1132 :
1133 : /* Acquire OldestMxact */
1134 221988 : cutoffs->OldestMxact = GetOldestMultiXactId();
1135 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1136 :
1137 : /* Acquire next XID/next MXID values used to apply age-based settings */
1138 221988 : nextXID = ReadNextTransactionId();
1139 221988 : nextMXID = ReadNextMultiXactId();
1140 :
1141 : /*
1142 : * Also compute the multixact age for which freezing is urgent. This is
1143 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1144 : * short of multixact member space.
1145 : */
1146 221988 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1147 :
1148 : /*
1149 : * Almost ready to set freeze output parameters; check if OldestXmin or
1150 : * OldestMxact are held back to an unsafe degree before we start on that
1151 : */
1152 221988 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1153 221988 : if (!TransactionIdIsNormal(safeOldestXmin))
1154 0 : safeOldestXmin = FirstNormalTransactionId;
1155 221988 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1156 221988 : if (safeOldestMxact < FirstMultiXactId)
1157 0 : safeOldestMxact = FirstMultiXactId;
1158 221988 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1159 160964 : ereport(WARNING,
1160 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1161 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1162 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1163 221988 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1164 0 : ereport(WARNING,
1165 : (errmsg("cutoff for freezing multixacts is far in the past"),
1166 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1167 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1168 :
1169 : /*
1170 : * Determine the minimum freeze age to use: as specified by the caller, or
1171 : * vacuum_freeze_min_age, but in any case not more than half
1172 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1173 : * wraparound won't occur too frequently.
1174 : */
1175 221988 : if (freeze_min_age < 0)
1176 11016 : freeze_min_age = vacuum_freeze_min_age;
1177 221988 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1178 : Assert(freeze_min_age >= 0);
1179 :
1180 : /* Compute FreezeLimit, being careful to generate a normal XID */
1181 221988 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1182 221988 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1183 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1184 : /* FreezeLimit must always be <= OldestXmin */
1185 221988 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1186 184730 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1187 :
1188 : /*
1189 : * Determine the minimum multixact freeze age to use: as specified by
1190 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1191 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1192 : * prevent MultiXact wraparound won't occur too frequently.
1193 : */
1194 221988 : if (multixact_freeze_min_age < 0)
1195 11016 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1196 221988 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1197 : effective_multixact_freeze_max_age / 2);
1198 : Assert(multixact_freeze_min_age >= 0);
1199 :
1200 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1201 221988 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1202 221988 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1203 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1204 : /* MultiXactCutoff must always be <= OldestMxact */
1205 221988 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1206 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1207 :
1208 : /*
1209 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1210 : *
1211 : * Determine the table freeze age to use: as specified by the caller, or
1212 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1213 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1214 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1215 : * anti-wraparound autovacuum is launched.
1216 : */
1217 221988 : if (freeze_table_age < 0)
1218 11016 : freeze_table_age = vacuum_freeze_table_age;
1219 221988 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1220 : Assert(freeze_table_age >= 0);
1221 221988 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1222 221988 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1223 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1224 221988 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1225 : aggressiveXIDCutoff))
1226 210956 : return true;
1227 :
1228 : /*
1229 : * Similar to the above, determine the table freeze age to use for
1230 : * multixacts: as specified by the caller, or the value of the
1231 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1232 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1233 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1234 : * multixacts before anti-wraparound autovacuum is launched.
1235 : */
1236 11032 : if (multixact_freeze_table_age < 0)
1237 10872 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1238 11032 : multixact_freeze_table_age =
1239 11032 : Min(multixact_freeze_table_age,
1240 : effective_multixact_freeze_max_age * 0.95);
1241 : Assert(multixact_freeze_table_age >= 0);
1242 11032 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1243 11032 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1244 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1245 11032 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1246 : aggressiveMXIDCutoff))
1247 0 : return true;
1248 :
1249 : /* Non-aggressive VACUUM */
1250 11032 : return false;
1251 : }
1252 :
1253 : /*
1254 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1255 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1256 : * dangerously far in the past.
1257 : *
1258 : * When we return true, VACUUM caller triggers the failsafe.
1259 : */
1260 : bool
1261 225064 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1262 : {
1263 225064 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1264 225064 : MultiXactId relminmxid = cutoffs->relminmxid;
1265 : TransactionId xid_skip_limit;
1266 : MultiXactId multi_skip_limit;
1267 : int skip_index_vacuum;
1268 :
1269 : Assert(TransactionIdIsNormal(relfrozenxid));
1270 : Assert(MultiXactIdIsValid(relminmxid));
1271 :
1272 : /*
1273 : * Determine the index skipping age to use. In any case no less than
1274 : * autovacuum_freeze_max_age * 1.05.
1275 : */
1276 225064 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1277 :
1278 225064 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1279 225064 : if (!TransactionIdIsNormal(xid_skip_limit))
1280 0 : xid_skip_limit = FirstNormalTransactionId;
1281 :
1282 225064 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1283 : {
1284 : /* The table's relfrozenxid is too old */
1285 43292 : return true;
1286 : }
1287 :
1288 : /*
1289 : * Similar to above, determine the index skipping age to use for
1290 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1291 : * 1.05.
1292 : */
1293 181772 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1294 : autovacuum_multixact_freeze_max_age * 1.05);
1295 :
1296 181772 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1297 181772 : if (multi_skip_limit < FirstMultiXactId)
1298 0 : multi_skip_limit = FirstMultiXactId;
1299 :
1300 181772 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1301 : {
1302 : /* The table's relminmxid is too old */
1303 0 : return true;
1304 : }
1305 :
1306 181772 : return false;
1307 : }
1308 :
1309 : /*
1310 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1311 : *
1312 : * If we scanned the whole relation then we should just use the count of
1313 : * live tuples seen; but if we did not, we should not blindly extrapolate
1314 : * from that number, since VACUUM may have scanned a quite nonrandom
1315 : * subset of the table. When we have only partial information, we take
1316 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1317 : * of the tuple density in the unscanned pages.
1318 : *
1319 : * Note: scanned_tuples should count only *live* tuples, since
1320 : * pg_class.reltuples is defined that way.
1321 : */
1322 : double
1323 221422 : vac_estimate_reltuples(Relation relation,
1324 : BlockNumber total_pages,
1325 : BlockNumber scanned_pages,
1326 : double scanned_tuples)
1327 : {
1328 221422 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1329 221422 : double old_rel_tuples = relation->rd_rel->reltuples;
1330 : double old_density;
1331 : double unscanned_pages;
1332 : double total_tuples;
1333 :
1334 : /* If we did scan the whole table, just use the count as-is */
1335 221422 : if (scanned_pages >= total_pages)
1336 213444 : return scanned_tuples;
1337 :
1338 : /*
1339 : * When successive VACUUM commands scan the same few pages again and
1340 : * again, without anything from the table really changing, there is a risk
1341 : * that our beliefs about tuple density will gradually become distorted.
1342 : * This might be caused by vacuumlazy.c implementation details, such as
1343 : * its tendency to always scan the last heap page. Handle that here.
1344 : *
1345 : * If the relation is _exactly_ the same size according to the existing
1346 : * pg_class entry, and only a few of its pages (less than 2%) were
1347 : * scanned, keep the existing value of reltuples. Also keep the existing
1348 : * value when only a subset of rel's pages <= a single page were scanned.
1349 : *
1350 : * (Note: we might be returning -1 here.)
1351 : */
1352 7978 : if (old_rel_pages == total_pages &&
1353 7952 : scanned_pages < (double) total_pages * 0.02)
1354 5676 : return old_rel_tuples;
1355 2302 : if (scanned_pages <= 1)
1356 1936 : return old_rel_tuples;
1357 :
1358 : /*
1359 : * If old density is unknown, we can't do much except scale up
1360 : * scanned_tuples to match total_pages.
1361 : */
1362 366 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1363 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1364 :
1365 : /*
1366 : * Okay, we've covered the corner cases. The normal calculation is to
1367 : * convert the old measurement to a density (tuples per page), then
1368 : * estimate the number of tuples in the unscanned pages using that figure,
1369 : * and finally add on the number of tuples in the scanned pages.
1370 : */
1371 364 : old_density = old_rel_tuples / old_rel_pages;
1372 364 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1373 364 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1374 364 : return floor(total_tuples + 0.5);
1375 : }
1376 :
1377 :
1378 : /*
1379 : * vac_update_relstats() -- update statistics for one relation
1380 : *
1381 : * Update the whole-relation statistics that are kept in its pg_class
1382 : * row. There are additional stats that will be updated if we are
1383 : * doing ANALYZE, but we always update these stats. This routine works
1384 : * for both index and heap relation entries in pg_class.
1385 : *
1386 : * We violate transaction semantics here by overwriting the rel's
1387 : * existing pg_class tuple with the new values. This is reasonably
1388 : * safe as long as we're sure that the new values are correct whether or
1389 : * not this transaction commits. The reason for doing this is that if
1390 : * we updated these tuples in the usual way, vacuuming pg_class itself
1391 : * wouldn't work very well --- by the time we got done with a vacuum
1392 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1393 : * course, this only works for fixed-size not-null columns, but these are.
1394 : *
1395 : * Another reason for doing it this way is that when we are in a lazy
1396 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1397 : * Somebody vacuuming pg_class might think they could delete a tuple
1398 : * marked with xmin = our xid.
1399 : *
1400 : * In addition to fundamentally nontransactional statistics such as
1401 : * relpages and relallvisible, we try to maintain certain lazily-updated
1402 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1403 : * It's safe to do this in VACUUM, which can't run in parallel with
1404 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1405 : * However, it's *not* safe to do it in an ANALYZE that's within an
1406 : * outer transaction, because for example the current transaction might
1407 : * have dropped the last index; then we'd think relhasindex should be
1408 : * cleared, but if the transaction later rolls back this would be wrong.
1409 : * So we refrain from updating the DDL flags if we're inside an outer
1410 : * transaction. This is OK since postponing the flag maintenance is
1411 : * always allowable.
1412 : *
1413 : * Note: num_tuples should count only *live* tuples, since
1414 : * pg_class.reltuples is defined that way.
1415 : *
1416 : * This routine is shared by VACUUM and ANALYZE.
1417 : */
1418 : void
1419 262880 : vac_update_relstats(Relation relation,
1420 : BlockNumber num_pages, double num_tuples,
1421 : BlockNumber num_all_visible_pages,
1422 : BlockNumber num_all_frozen_pages,
1423 : bool hasindex, TransactionId frozenxid,
1424 : MultiXactId minmulti,
1425 : bool *frozenxid_updated, bool *minmulti_updated,
1426 : bool in_outer_xact)
1427 : {
1428 262880 : Oid relid = RelationGetRelid(relation);
1429 : Relation rd;
1430 : ScanKeyData key[1];
1431 : HeapTuple ctup;
1432 : void *inplace_state;
1433 : Form_pg_class pgcform;
1434 : bool dirty,
1435 : futurexid,
1436 : futuremxid;
1437 : TransactionId oldfrozenxid;
1438 : MultiXactId oldminmulti;
1439 :
1440 262880 : rd = table_open(RelationRelationId, RowExclusiveLock);
1441 :
1442 : /* Fetch a copy of the tuple to scribble on */
1443 262880 : ScanKeyInit(&key[0],
1444 : Anum_pg_class_oid,
1445 : BTEqualStrategyNumber, F_OIDEQ,
1446 : ObjectIdGetDatum(relid));
1447 262880 : systable_inplace_update_begin(rd, ClassOidIndexId, true,
1448 : NULL, 1, key, &ctup, &inplace_state);
1449 262880 : if (!HeapTupleIsValid(ctup))
1450 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1451 : relid);
1452 262880 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1453 :
1454 : /* Apply statistical updates, if any, to copied tuple */
1455 :
1456 262880 : dirty = false;
1457 262880 : if (pgcform->relpages != (int32) num_pages)
1458 : {
1459 9426 : pgcform->relpages = (int32) num_pages;
1460 9426 : dirty = true;
1461 : }
1462 262880 : if (pgcform->reltuples != (float4) num_tuples)
1463 : {
1464 20226 : pgcform->reltuples = (float4) num_tuples;
1465 20226 : dirty = true;
1466 : }
1467 262880 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1468 : {
1469 5778 : pgcform->relallvisible = (int32) num_all_visible_pages;
1470 5778 : dirty = true;
1471 : }
1472 262880 : if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
1473 : {
1474 5222 : pgcform->relallfrozen = (int32) num_all_frozen_pages;
1475 5222 : dirty = true;
1476 : }
1477 :
1478 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1479 :
1480 262880 : if (!in_outer_xact)
1481 : {
1482 : /*
1483 : * If we didn't find any indexes, reset relhasindex.
1484 : */
1485 262566 : if (pgcform->relhasindex && !hasindex)
1486 : {
1487 20 : pgcform->relhasindex = false;
1488 20 : dirty = true;
1489 : }
1490 :
1491 : /* We also clear relhasrules and relhastriggers if needed */
1492 262566 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1493 : {
1494 0 : pgcform->relhasrules = false;
1495 0 : dirty = true;
1496 : }
1497 262566 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1498 : {
1499 6 : pgcform->relhastriggers = false;
1500 6 : dirty = true;
1501 : }
1502 : }
1503 :
1504 : /*
1505 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1506 : * indicating it has no new data.
1507 : *
1508 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1509 : * stored relfrozenxid is "in the future" then it seems best to assume
1510 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1511 : * This should match vac_update_datfrozenxid() concerning what we consider
1512 : * to be "in the future".
1513 : */
1514 262880 : oldfrozenxid = pgcform->relfrozenxid;
1515 262880 : futurexid = false;
1516 262880 : if (frozenxid_updated)
1517 221418 : *frozenxid_updated = false;
1518 262880 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1519 : {
1520 58062 : bool update = false;
1521 :
1522 58062 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1523 57936 : update = true;
1524 126 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1525 0 : futurexid = update = true;
1526 :
1527 58062 : if (update)
1528 : {
1529 57936 : pgcform->relfrozenxid = frozenxid;
1530 57936 : dirty = true;
1531 57936 : if (frozenxid_updated)
1532 57936 : *frozenxid_updated = true;
1533 : }
1534 : }
1535 :
1536 : /* Similarly for relminmxid */
1537 262880 : oldminmulti = pgcform->relminmxid;
1538 262880 : futuremxid = false;
1539 262880 : if (minmulti_updated)
1540 221418 : *minmulti_updated = false;
1541 262880 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1542 : {
1543 310 : bool update = false;
1544 :
1545 310 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1546 310 : update = true;
1547 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1548 0 : futuremxid = update = true;
1549 :
1550 310 : if (update)
1551 : {
1552 310 : pgcform->relminmxid = minmulti;
1553 310 : dirty = true;
1554 310 : if (minmulti_updated)
1555 310 : *minmulti_updated = true;
1556 : }
1557 : }
1558 :
1559 : /* If anything changed, write out the tuple. */
1560 262880 : if (dirty)
1561 72260 : systable_inplace_update_finish(inplace_state, ctup);
1562 : else
1563 190620 : systable_inplace_update_cancel(inplace_state);
1564 :
1565 262880 : table_close(rd, RowExclusiveLock);
1566 :
1567 262880 : if (futurexid)
1568 0 : ereport(WARNING,
1569 : (errcode(ERRCODE_DATA_CORRUPTED),
1570 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1571 : oldfrozenxid, frozenxid,
1572 : RelationGetRelationName(relation))));
1573 262880 : if (futuremxid)
1574 0 : ereport(WARNING,
1575 : (errcode(ERRCODE_DATA_CORRUPTED),
1576 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1577 : oldminmulti, minmulti,
1578 : RelationGetRelationName(relation))));
1579 262880 : }
1580 :
1581 :
1582 : /*
1583 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1584 : *
1585 : * Update pg_database's datfrozenxid entry for our database to be the
1586 : * minimum of the pg_class.relfrozenxid values.
1587 : *
1588 : * Similarly, update our datminmxid to be the minimum of the
1589 : * pg_class.relminmxid values.
1590 : *
1591 : * If we are able to advance either pg_database value, also try to
1592 : * truncate pg_xact and pg_multixact.
1593 : *
1594 : * We violate transaction semantics here by overwriting the database's
1595 : * existing pg_database tuple with the new values. This is reasonably
1596 : * safe since the new values are correct whether or not this transaction
1597 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1598 : * behind after a VACUUM.
1599 : */
1600 : void
1601 5720 : vac_update_datfrozenxid(void)
1602 : {
1603 : HeapTuple tuple;
1604 : Form_pg_database dbform;
1605 : Relation relation;
1606 : SysScanDesc scan;
1607 : HeapTuple classTup;
1608 : TransactionId newFrozenXid;
1609 : MultiXactId newMinMulti;
1610 : TransactionId lastSaneFrozenXid;
1611 : MultiXactId lastSaneMinMulti;
1612 5720 : bool bogus = false;
1613 5720 : bool dirty = false;
1614 : ScanKeyData key[1];
1615 : void *inplace_state;
1616 :
1617 : /*
1618 : * Restrict this task to one backend per database. This avoids race
1619 : * conditions that would move datfrozenxid or datminmxid backward. It
1620 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1621 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1622 : */
1623 5720 : LockDatabaseFrozenIds(ExclusiveLock);
1624 :
1625 : /*
1626 : * Initialize the "min" calculation with
1627 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1628 : * approximation to the minimum relfrozenxid for not-yet-committed
1629 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1630 : * cannot produce a wrong minimum by starting with this.
1631 : */
1632 5720 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1633 :
1634 : /*
1635 : * Similarly, initialize the MultiXact "min" with the value that would be
1636 : * used on pg_class for new tables. See AddNewRelationTuple().
1637 : */
1638 5720 : newMinMulti = GetOldestMultiXactId();
1639 :
1640 : /*
1641 : * Identify the latest relfrozenxid and relminmxid values that we could
1642 : * validly see during the scan. These are conservative values, but it's
1643 : * not really worth trying to be more exact.
1644 : */
1645 5720 : lastSaneFrozenXid = ReadNextTransactionId();
1646 5720 : lastSaneMinMulti = ReadNextMultiXactId();
1647 :
1648 : /*
1649 : * We must seqscan pg_class to find the minimum Xid, because there is no
1650 : * index that can help us here.
1651 : *
1652 : * See vac_truncate_clog() for the race condition to prevent.
1653 : */
1654 5720 : relation = table_open(RelationRelationId, AccessShareLock);
1655 :
1656 5720 : scan = systable_beginscan(relation, InvalidOid, false,
1657 : NULL, 0, NULL);
1658 :
1659 2842506 : while ((classTup = systable_getnext(scan)) != NULL)
1660 : {
1661 2836786 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1662 2836786 : TransactionId relfrozenxid = classForm->relfrozenxid;
1663 2836786 : TransactionId relminmxid = classForm->relminmxid;
1664 :
1665 : /*
1666 : * Only consider relations able to hold unfrozen XIDs (anything else
1667 : * should have InvalidTransactionId in relfrozenxid anyway).
1668 : */
1669 2836786 : if (classForm->relkind != RELKIND_RELATION &&
1670 2266804 : classForm->relkind != RELKIND_MATVIEW &&
1671 2264468 : classForm->relkind != RELKIND_TOASTVALUE)
1672 : {
1673 : Assert(!TransactionIdIsValid(relfrozenxid));
1674 : Assert(!MultiXactIdIsValid(relminmxid));
1675 1968766 : continue;
1676 : }
1677 :
1678 : /*
1679 : * Some table AMs might not need per-relation xid / multixid horizons.
1680 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1681 : * to not be set (i.e. set to their respective Invalid*Id)
1682 : * independently. Thus validate and compute horizon for each only if
1683 : * set.
1684 : *
1685 : * If things are working properly, no relation should have a
1686 : * relfrozenxid or relminmxid that is "in the future". However, such
1687 : * cases have been known to arise due to bugs in pg_upgrade. If we
1688 : * see any entries that are "in the future", chicken out and don't do
1689 : * anything. This ensures we won't truncate clog & multixact SLRUs
1690 : * before those relations have been scanned and cleaned up.
1691 : */
1692 :
1693 868020 : if (TransactionIdIsValid(relfrozenxid))
1694 : {
1695 : Assert(TransactionIdIsNormal(relfrozenxid));
1696 :
1697 : /* check for values in the future */
1698 868020 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1699 : {
1700 0 : bogus = true;
1701 0 : break;
1702 : }
1703 :
1704 : /* determine new horizon */
1705 868020 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1706 5192 : newFrozenXid = relfrozenxid;
1707 : }
1708 :
1709 868020 : if (MultiXactIdIsValid(relminmxid))
1710 : {
1711 : /* check for values in the future */
1712 868020 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1713 : {
1714 0 : bogus = true;
1715 0 : break;
1716 : }
1717 :
1718 : /* determine new horizon */
1719 868020 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1720 218 : newMinMulti = relminmxid;
1721 : }
1722 : }
1723 :
1724 : /* we're done with pg_class */
1725 5720 : systable_endscan(scan);
1726 5720 : table_close(relation, AccessShareLock);
1727 :
1728 : /* chicken out if bogus data found */
1729 5720 : if (bogus)
1730 0 : return;
1731 :
1732 : Assert(TransactionIdIsNormal(newFrozenXid));
1733 : Assert(MultiXactIdIsValid(newMinMulti));
1734 :
1735 : /* Now fetch the pg_database tuple we need to update. */
1736 5720 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1737 :
1738 : /*
1739 : * Fetch a copy of the tuple to scribble on. We could check the syscache
1740 : * tuple first. If that concluded !dirty, we'd avoid waiting on
1741 : * concurrent heap_update() and would avoid exclusive-locking the buffer.
1742 : * For now, don't optimize that.
1743 : */
1744 5720 : ScanKeyInit(&key[0],
1745 : Anum_pg_database_oid,
1746 : BTEqualStrategyNumber, F_OIDEQ,
1747 : ObjectIdGetDatum(MyDatabaseId));
1748 :
1749 5720 : systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
1750 : NULL, 1, key, &tuple, &inplace_state);
1751 :
1752 5720 : if (!HeapTupleIsValid(tuple))
1753 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1754 :
1755 5720 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1756 :
1757 : /*
1758 : * As in vac_update_relstats(), we ordinarily don't want to let
1759 : * datfrozenxid go backward; but if it's "in the future" then it must be
1760 : * corrupt and it seems best to overwrite it.
1761 : */
1762 6404 : if (dbform->datfrozenxid != newFrozenXid &&
1763 684 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1764 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1765 : {
1766 684 : dbform->datfrozenxid = newFrozenXid;
1767 684 : dirty = true;
1768 : }
1769 : else
1770 5036 : newFrozenXid = dbform->datfrozenxid;
1771 :
1772 : /* Ditto for datminmxid */
1773 5722 : if (dbform->datminmxid != newMinMulti &&
1774 2 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1775 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1776 : {
1777 2 : dbform->datminmxid = newMinMulti;
1778 2 : dirty = true;
1779 : }
1780 : else
1781 5718 : newMinMulti = dbform->datminmxid;
1782 :
1783 5720 : if (dirty)
1784 684 : systable_inplace_update_finish(inplace_state, tuple);
1785 : else
1786 5036 : systable_inplace_update_cancel(inplace_state);
1787 :
1788 5720 : heap_freetuple(tuple);
1789 5720 : table_close(relation, RowExclusiveLock);
1790 :
1791 : /*
1792 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1793 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1794 : * XID-wrap-limit info is stale, since this action will update that too.
1795 : */
1796 5720 : if (dirty || ForceTransactionIdLimitUpdate())
1797 2164 : vac_truncate_clog(newFrozenXid, newMinMulti,
1798 : lastSaneFrozenXid, lastSaneMinMulti);
1799 : }
1800 :
1801 :
1802 : /*
1803 : * vac_truncate_clog() -- attempt to truncate the commit log
1804 : *
1805 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1806 : * and use it to truncate the transaction commit log (pg_xact).
1807 : * Also update the XID wrap limit info maintained by varsup.c.
1808 : * Likewise for datminmxid.
1809 : *
1810 : * The passed frozenXID and minMulti are the updated values for my own
1811 : * pg_database entry. They're used to initialize the "min" calculations.
1812 : * The caller also passes the "last sane" XID and MXID, since it has
1813 : * those at hand already.
1814 : *
1815 : * This routine is only invoked when we've managed to change our
1816 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1817 : * XID-wrap-limit info is stale.
1818 : */
1819 : static void
1820 2164 : vac_truncate_clog(TransactionId frozenXID,
1821 : MultiXactId minMulti,
1822 : TransactionId lastSaneFrozenXid,
1823 : MultiXactId lastSaneMinMulti)
1824 : {
1825 2164 : TransactionId nextXID = ReadNextTransactionId();
1826 : Relation relation;
1827 : TableScanDesc scan;
1828 : HeapTuple tuple;
1829 : Oid oldestxid_datoid;
1830 : Oid minmulti_datoid;
1831 2164 : bool bogus = false;
1832 2164 : bool frozenAlreadyWrapped = false;
1833 :
1834 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1835 2164 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1836 :
1837 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1838 2164 : oldestxid_datoid = MyDatabaseId;
1839 2164 : minmulti_datoid = MyDatabaseId;
1840 :
1841 : /*
1842 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1843 : *
1844 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1845 : * the values could change while we look at them. Fetch each one just
1846 : * once to ensure sane behavior of the comparison logic. (Here, as in
1847 : * many other places, we assume that fetching or updating an XID in shared
1848 : * storage is atomic.)
1849 : *
1850 : * Note: we need not worry about a race condition with new entries being
1851 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1852 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1853 : * of the interlock against copying a DB containing an active backend.
1854 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1855 : * concurrently modify the datfrozenxid's of different databases, the
1856 : * worst possible outcome is that pg_xact is not truncated as aggressively
1857 : * as it could be.
1858 : */
1859 2164 : relation = table_open(DatabaseRelationId, AccessShareLock);
1860 :
1861 2164 : scan = table_beginscan_catalog(relation, 0, NULL);
1862 :
1863 8482 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1864 : {
1865 6318 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1866 6318 : TransactionId datfrozenxid = dbform->datfrozenxid;
1867 6318 : TransactionId datminmxid = dbform->datminmxid;
1868 :
1869 : Assert(TransactionIdIsNormal(datfrozenxid));
1870 : Assert(MultiXactIdIsValid(datminmxid));
1871 :
1872 : /*
1873 : * If database is in the process of getting dropped, or has been
1874 : * interrupted while doing so, no connections to it are possible
1875 : * anymore. Therefore we don't need to take it into account here.
1876 : * Which is good, because it can't be processed by autovacuum either.
1877 : */
1878 6318 : if (database_is_invalid_form((Form_pg_database) dbform))
1879 : {
1880 2 : elog(DEBUG2,
1881 : "skipping invalid database \"%s\" while computing relfrozenxid",
1882 : NameStr(dbform->datname));
1883 2 : continue;
1884 : }
1885 :
1886 : /*
1887 : * If things are working properly, no database should have a
1888 : * datfrozenxid or datminmxid that is "in the future". However, such
1889 : * cases have been known to arise due to bugs in pg_upgrade. If we
1890 : * see any entries that are "in the future", chicken out and don't do
1891 : * anything. This ensures we won't truncate clog before those
1892 : * databases have been scanned and cleaned up. (We will issue the
1893 : * "already wrapped" warning if appropriate, though.)
1894 : */
1895 12632 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1896 6316 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1897 0 : bogus = true;
1898 :
1899 6316 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1900 0 : frozenAlreadyWrapped = true;
1901 6316 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1902 : {
1903 522 : frozenXID = datfrozenxid;
1904 522 : oldestxid_datoid = dbform->oid;
1905 : }
1906 :
1907 6316 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1908 : {
1909 4 : minMulti = datminmxid;
1910 4 : minmulti_datoid = dbform->oid;
1911 : }
1912 : }
1913 :
1914 2164 : table_endscan(scan);
1915 :
1916 2164 : table_close(relation, AccessShareLock);
1917 :
1918 : /*
1919 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1920 : * the computed minimum XID might be bogus. This case should now be
1921 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1922 : * test anyway.
1923 : */
1924 2164 : if (frozenAlreadyWrapped)
1925 : {
1926 0 : ereport(WARNING,
1927 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1928 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1929 0 : LWLockRelease(WrapLimitsVacuumLock);
1930 0 : return;
1931 : }
1932 :
1933 : /* chicken out if data is bogus in any other way */
1934 2164 : if (bogus)
1935 : {
1936 0 : LWLockRelease(WrapLimitsVacuumLock);
1937 0 : return;
1938 : }
1939 :
1940 : /*
1941 : * Advance the oldest value for commit timestamps before truncating, so
1942 : * that if a user requests a timestamp for a transaction we're truncating
1943 : * away right after this point, they get NULL instead of an ugly "file not
1944 : * found" error from slru.c. This doesn't matter for xact/multixact
1945 : * because they are not subject to arbitrary lookups from users.
1946 : */
1947 2164 : AdvanceOldestCommitTsXid(frozenXID);
1948 :
1949 : /*
1950 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1951 : */
1952 2164 : TruncateCLOG(frozenXID, oldestxid_datoid);
1953 2164 : TruncateCommitTs(frozenXID);
1954 2164 : TruncateMultiXact(minMulti, minmulti_datoid);
1955 :
1956 : /*
1957 : * Update the wrap limit for GetNewTransactionId and creation of new
1958 : * MultiXactIds. Note: these functions will also signal the postmaster
1959 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1960 : * signaling twice?
1961 : */
1962 2164 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1963 2164 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1964 :
1965 2164 : LWLockRelease(WrapLimitsVacuumLock);
1966 : }
1967 :
1968 :
1969 : /*
1970 : * vacuum_rel() -- vacuum one heap relation
1971 : *
1972 : * relid identifies the relation to vacuum. If relation is supplied,
1973 : * use the name therein for reporting any failure to open/lock the rel;
1974 : * do not use it once we've successfully opened the rel, since it might
1975 : * be stale.
1976 : *
1977 : * Returns true if it's okay to proceed with a requested ANALYZE
1978 : * operation on this table.
1979 : *
1980 : * Doing one heap at a time incurs extra overhead, since we need to
1981 : * check that the heap exists again just before we vacuum it. The
1982 : * reason that we do this is so that vacuuming can be spread across
1983 : * many small transactions. Otherwise, two-phase locking would require
1984 : * us to lock the entire database during one pass of the vacuum cleaner.
1985 : *
1986 : * At entry and exit, we are not inside a transaction.
1987 : */
1988 : static bool
1989 222238 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
1990 : BufferAccessStrategy bstrategy)
1991 : {
1992 : LOCKMODE lmode;
1993 : Relation rel;
1994 : LockRelId lockrelid;
1995 : Oid priv_relid;
1996 : Oid toast_relid;
1997 : Oid save_userid;
1998 : int save_sec_context;
1999 : int save_nestlevel;
2000 : VacuumParams toast_vacuum_params;
2001 :
2002 : /*
2003 : * This function scribbles on the parameters, so make a copy early to
2004 : * avoid affecting the TOAST table (if we do end up recursing to it).
2005 : */
2006 222238 : memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams));
2007 :
2008 : /* Begin a transaction for vacuuming this relation */
2009 222238 : StartTransactionCommand();
2010 :
2011 222238 : if (!(params.options & VACOPT_FULL))
2012 : {
2013 : /*
2014 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
2015 : * other concurrent VACUUMs know that they can ignore this one while
2016 : * determining their OldestXmin. (The reason we don't set it during a
2017 : * full VACUUM is exactly that we may have to run user-defined
2018 : * functions for functional indexes, and we want to make sure that if
2019 : * they use the snapshot set above, any tuples it requires can't get
2020 : * removed from other tables. An index function that depends on the
2021 : * contents of other tables is arguably broken, but we won't break it
2022 : * here by violating transaction semantics.)
2023 : *
2024 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
2025 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
2026 : * in an emergency.
2027 : *
2028 : * Note: these flags remain set until CommitTransaction or
2029 : * AbortTransaction. We don't want to clear them until we reset
2030 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
2031 : * might appear to go backwards, which is probably Not Good. (We also
2032 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
2033 : * xmin doesn't become visible ahead of setting the flag.)
2034 : */
2035 221824 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2036 221824 : MyProc->statusFlags |= PROC_IN_VACUUM;
2037 221824 : if (params.is_wraparound)
2038 195226 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
2039 221824 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2040 221824 : LWLockRelease(ProcArrayLock);
2041 : }
2042 :
2043 : /*
2044 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2045 : * cutoff xids in local memory wrapping around, and to have updated xmin
2046 : * horizons.
2047 : */
2048 222238 : PushActiveSnapshot(GetTransactionSnapshot());
2049 :
2050 : /*
2051 : * Check for user-requested abort. Note we want this to be inside a
2052 : * transaction, so xact.c doesn't issue useless WARNING.
2053 : */
2054 222238 : CHECK_FOR_INTERRUPTS();
2055 :
2056 : /*
2057 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2058 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2059 : * way, we can be sure that no other backend is vacuuming the same table.
2060 : */
2061 444476 : lmode = (params.options & VACOPT_FULL) ?
2062 222238 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2063 :
2064 : /* open the relation and get the appropriate lock on it */
2065 222238 : rel = vacuum_open_relation(relid, relation, params.options,
2066 222238 : params.log_min_duration >= 0, lmode);
2067 :
2068 : /* leave if relation could not be opened or locked */
2069 222238 : if (!rel)
2070 : {
2071 24 : PopActiveSnapshot();
2072 24 : CommitTransactionCommand();
2073 24 : return false;
2074 : }
2075 :
2076 : /*
2077 : * When recursing to a TOAST table, check privileges on the parent. NB:
2078 : * This is only safe to do because we hold a session lock on the main
2079 : * relation that prevents concurrent deletion.
2080 : */
2081 222214 : if (OidIsValid(params.toast_parent))
2082 9176 : priv_relid = params.toast_parent;
2083 : else
2084 213038 : priv_relid = RelationGetRelid(rel);
2085 :
2086 : /*
2087 : * Check if relation needs to be skipped based on privileges. This check
2088 : * happens also when building the relation list to vacuum for a manual
2089 : * operation, and needs to be done additionally here as VACUUM could
2090 : * happen across multiple transactions where privileges could have changed
2091 : * in-between. Make sure to only generate logs for VACUUM in this case.
2092 : */
2093 222214 : if (!vacuum_is_permitted_for_relation(priv_relid,
2094 : rel->rd_rel,
2095 222214 : params.options & ~VACOPT_ANALYZE))
2096 : {
2097 72 : relation_close(rel, lmode);
2098 72 : PopActiveSnapshot();
2099 72 : CommitTransactionCommand();
2100 72 : return false;
2101 : }
2102 :
2103 : /*
2104 : * Check that it's of a vacuumable relkind.
2105 : */
2106 222142 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2107 81024 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2108 81016 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2109 188 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2110 : {
2111 2 : ereport(WARNING,
2112 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2113 : RelationGetRelationName(rel))));
2114 2 : relation_close(rel, lmode);
2115 2 : PopActiveSnapshot();
2116 2 : CommitTransactionCommand();
2117 2 : return false;
2118 : }
2119 :
2120 : /*
2121 : * Silently ignore tables that are temp tables of other backends ---
2122 : * trying to vacuum these will lead to great unhappiness, since their
2123 : * contents are probably not up-to-date on disk. (We don't throw a
2124 : * warning here; it would just lead to chatter during a database-wide
2125 : * VACUUM.)
2126 : */
2127 222140 : if (RELATION_IS_OTHER_TEMP(rel))
2128 : {
2129 2 : relation_close(rel, lmode);
2130 2 : PopActiveSnapshot();
2131 2 : CommitTransactionCommand();
2132 2 : return false;
2133 : }
2134 :
2135 : /*
2136 : * Silently ignore partitioned tables as there is no work to be done. The
2137 : * useful work is on their child partitions, which have been queued up for
2138 : * us separately.
2139 : */
2140 222138 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2141 : {
2142 186 : relation_close(rel, lmode);
2143 186 : PopActiveSnapshot();
2144 186 : CommitTransactionCommand();
2145 : /* It's OK to proceed with ANALYZE on this table */
2146 186 : return true;
2147 : }
2148 :
2149 : /*
2150 : * Get a session-level lock too. This will protect our access to the
2151 : * relation across multiple transactions, so that we can vacuum the
2152 : * relation's TOAST table (if any) secure in the knowledge that no one is
2153 : * deleting the parent relation.
2154 : *
2155 : * NOTE: this cannot block, even if someone else is waiting for access,
2156 : * because the lock manager knows that both lock requests are from the
2157 : * same process.
2158 : */
2159 221952 : lockrelid = rel->rd_lockInfo.lockRelId;
2160 221952 : LockRelationIdForSession(&lockrelid, lmode);
2161 :
2162 : /*
2163 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2164 : * specified in VACUUM command, or when running in an autovacuum worker
2165 : */
2166 221952 : if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
2167 : {
2168 : StdRdOptIndexCleanup vacuum_index_cleanup;
2169 :
2170 221694 : if (rel->rd_options == NULL)
2171 218822 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2172 : else
2173 2872 : vacuum_index_cleanup =
2174 2872 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2175 :
2176 221694 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2177 221650 : params.index_cleanup = VACOPTVALUE_AUTO;
2178 44 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2179 22 : params.index_cleanup = VACOPTVALUE_ENABLED;
2180 : else
2181 : {
2182 : Assert(vacuum_index_cleanup ==
2183 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2184 22 : params.index_cleanup = VACOPTVALUE_DISABLED;
2185 : }
2186 : }
2187 :
2188 : #ifdef USE_INJECTION_POINTS
2189 221952 : if (params.index_cleanup == VACOPTVALUE_AUTO)
2190 221656 : INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
2191 296 : else if (params.index_cleanup == VACOPTVALUE_DISABLED)
2192 260 : INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
2193 36 : else if (params.index_cleanup == VACOPTVALUE_ENABLED)
2194 36 : INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
2195 : #endif
2196 :
2197 : /*
2198 : * Check if the vacuum_max_eager_freeze_failure_rate table storage
2199 : * parameter was specified. This overrides the GUC value.
2200 : */
2201 221952 : if (rel->rd_options != NULL &&
2202 2884 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
2203 0 : params.max_eager_freeze_failure_rate =
2204 0 : ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
2205 :
2206 : /*
2207 : * Set truncate option based on truncate reloption or GUC if it wasn't
2208 : * specified in VACUUM command, or when running in an autovacuum worker
2209 : */
2210 221952 : if (params.truncate == VACOPTVALUE_UNSPECIFIED)
2211 : {
2212 221700 : StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
2213 :
2214 221700 : if (opts && opts->vacuum_truncate_set)
2215 : {
2216 32 : if (opts->vacuum_truncate)
2217 10 : params.truncate = VACOPTVALUE_ENABLED;
2218 : else
2219 22 : params.truncate = VACOPTVALUE_DISABLED;
2220 : }
2221 221668 : else if (vacuum_truncate)
2222 221646 : params.truncate = VACOPTVALUE_ENABLED;
2223 : else
2224 22 : params.truncate = VACOPTVALUE_DISABLED;
2225 : }
2226 :
2227 : #ifdef USE_INJECTION_POINTS
2228 221952 : if (params.truncate == VACOPTVALUE_AUTO)
2229 0 : INJECTION_POINT("vacuum-truncate-auto", NULL);
2230 221952 : else if (params.truncate == VACOPTVALUE_DISABLED)
2231 296 : INJECTION_POINT("vacuum-truncate-disabled", NULL);
2232 221656 : else if (params.truncate == VACOPTVALUE_ENABLED)
2233 221656 : INJECTION_POINT("vacuum-truncate-enabled", NULL);
2234 : #endif
2235 :
2236 : /*
2237 : * Remember the relation's TOAST relation for later, if the caller asked
2238 : * us to process it. In VACUUM FULL, though, the toast table is
2239 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2240 : * unless PROCESS_MAIN is disabled.
2241 : */
2242 221952 : if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
2243 26368 : ((params.options & VACOPT_FULL) == 0 ||
2244 386 : (params.options & VACOPT_PROCESS_MAIN) == 0))
2245 25988 : toast_relid = rel->rd_rel->reltoastrelid;
2246 : else
2247 195964 : toast_relid = InvalidOid;
2248 :
2249 : /*
2250 : * Switch to the table owner's userid, so that any index functions are run
2251 : * as that user. Also lock down security-restricted operations and
2252 : * arrange to make GUC variable changes local to this command. (This is
2253 : * unnecessary, but harmless, for lazy VACUUM.)
2254 : */
2255 221952 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2256 221952 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2257 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2258 221952 : save_nestlevel = NewGUCNestLevel();
2259 221952 : RestrictSearchPath();
2260 :
2261 : /*
2262 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2263 : * relation. Otherwise, we can skip this part. If processing the TOAST
2264 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2265 : * to be set when we recurse to the TOAST table.
2266 : */
2267 221952 : if (params.options & VACOPT_PROCESS_MAIN)
2268 : {
2269 : /*
2270 : * Do the actual work --- either FULL or "lazy" vacuum
2271 : */
2272 221798 : if (params.options & VACOPT_FULL)
2273 : {
2274 380 : ClusterParams cluster_params = {0};
2275 :
2276 380 : if ((params.options & VACOPT_VERBOSE) != 0)
2277 2 : cluster_params.options |= CLUOPT_VERBOSE;
2278 :
2279 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2280 380 : cluster_rel(rel, InvalidOid, &cluster_params);
2281 : /* cluster_rel closes the relation, but keeps lock */
2282 :
2283 374 : rel = NULL;
2284 : }
2285 : else
2286 221418 : table_relation_vacuum(rel, params, bstrategy);
2287 : }
2288 :
2289 : /* Roll back any GUC changes executed by index functions */
2290 221946 : AtEOXact_GUC(false, save_nestlevel);
2291 :
2292 : /* Restore userid and security context */
2293 221946 : SetUserIdAndSecContext(save_userid, save_sec_context);
2294 :
2295 : /* all done with this class, but hold lock until commit */
2296 221946 : if (rel)
2297 221572 : relation_close(rel, NoLock);
2298 :
2299 : /*
2300 : * Complete the transaction and free all temporary memory used.
2301 : */
2302 221946 : PopActiveSnapshot();
2303 221946 : CommitTransactionCommand();
2304 :
2305 : /*
2306 : * If the relation has a secondary toast rel, vacuum that too while we
2307 : * still hold the session lock on the main table. Note however that
2308 : * "analyze" will not get done on the toast table. This is good, because
2309 : * the toaster always uses hardcoded index access and statistics are
2310 : * totally unimportant for toast relations.
2311 : */
2312 221946 : if (toast_relid != InvalidOid)
2313 : {
2314 : /*
2315 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2316 : * set toast_parent so that the privilege checks are done on the main
2317 : * relation. NB: This is only safe to do because we hold a session
2318 : * lock on the main relation that prevents concurrent deletion.
2319 : */
2320 9176 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2321 9176 : toast_vacuum_params.toast_parent = relid;
2322 :
2323 9176 : vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
2324 : }
2325 :
2326 : /*
2327 : * Now release the session-level lock on the main table.
2328 : */
2329 221946 : UnlockRelationIdForSession(&lockrelid, lmode);
2330 :
2331 : /* Report that we really did it. */
2332 221946 : return true;
2333 : }
2334 :
2335 :
2336 : /*
2337 : * Open all the vacuumable indexes of the given relation, obtaining the
2338 : * specified kind of lock on each. Return an array of Relation pointers for
2339 : * the indexes into *Irel, and the number of indexes into *nindexes.
2340 : *
2341 : * We consider an index vacuumable if it is marked insertable (indisready).
2342 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2343 : * execution, and what we have is too corrupt to be processable. We will
2344 : * vacuum even if the index isn't indisvalid; this is important because in a
2345 : * unique index, uniqueness checks will be performed anyway and had better not
2346 : * hit dangling index pointers.
2347 : */
2348 : void
2349 236822 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2350 : int *nindexes, Relation **Irel)
2351 : {
2352 : List *indexoidlist;
2353 : ListCell *indexoidscan;
2354 : int i;
2355 :
2356 : Assert(lockmode != NoLock);
2357 :
2358 236822 : indexoidlist = RelationGetIndexList(relation);
2359 :
2360 : /* allocate enough memory for all indexes */
2361 236822 : i = list_length(indexoidlist);
2362 :
2363 236822 : if (i > 0)
2364 223072 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2365 : else
2366 13750 : *Irel = NULL;
2367 :
2368 : /* collect just the ready indexes */
2369 236822 : i = 0;
2370 591050 : foreach(indexoidscan, indexoidlist)
2371 : {
2372 354228 : Oid indexoid = lfirst_oid(indexoidscan);
2373 : Relation indrel;
2374 :
2375 354228 : indrel = index_open(indexoid, lockmode);
2376 354228 : if (indrel->rd_index->indisready)
2377 354228 : (*Irel)[i++] = indrel;
2378 : else
2379 0 : index_close(indrel, lockmode);
2380 : }
2381 :
2382 236822 : *nindexes = i;
2383 :
2384 236822 : list_free(indexoidlist);
2385 236822 : }
2386 :
2387 : /*
2388 : * Release the resources acquired by vac_open_indexes. Optionally release
2389 : * the locks (say NoLock to keep 'em).
2390 : */
2391 : void
2392 237692 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2393 : {
2394 237692 : if (Irel == NULL)
2395 14626 : return;
2396 :
2397 577282 : while (nindexes--)
2398 : {
2399 354216 : Relation ind = Irel[nindexes];
2400 :
2401 354216 : index_close(ind, lockmode);
2402 : }
2403 223066 : pfree(Irel);
2404 : }
2405 :
2406 : /*
2407 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2408 : *
2409 : * This should be called in each major loop of VACUUM processing,
2410 : * typically once per page processed.
2411 : */
2412 : void
2413 84622346 : vacuum_delay_point(bool is_analyze)
2414 : {
2415 84622346 : double msec = 0;
2416 :
2417 : /* Always check for interrupts */
2418 84622346 : CHECK_FOR_INTERRUPTS();
2419 :
2420 84622346 : if (InterruptPending ||
2421 84622346 : (!VacuumCostActive && !ConfigReloadPending))
2422 76267410 : return;
2423 :
2424 : /*
2425 : * Autovacuum workers should reload the configuration file if requested.
2426 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2427 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2428 : * vacuumed or analyzed.
2429 : */
2430 8354936 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2431 : {
2432 0 : ConfigReloadPending = false;
2433 0 : ProcessConfigFile(PGC_SIGHUP);
2434 0 : VacuumUpdateCosts();
2435 : }
2436 :
2437 : /*
2438 : * If we disabled cost-based delays after reloading the config file,
2439 : * return.
2440 : */
2441 8354936 : if (!VacuumCostActive)
2442 0 : return;
2443 :
2444 : /*
2445 : * For parallel vacuum, the delay is computed based on the shared cost
2446 : * balance. See compute_parallel_delay.
2447 : */
2448 8354936 : if (VacuumSharedCostBalance != NULL)
2449 0 : msec = compute_parallel_delay();
2450 8354936 : else if (VacuumCostBalance >= vacuum_cost_limit)
2451 7152 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2452 :
2453 : /* Nap if appropriate */
2454 8354936 : if (msec > 0)
2455 : {
2456 : instr_time delay_start;
2457 :
2458 7152 : if (msec > vacuum_cost_delay * 4)
2459 10 : msec = vacuum_cost_delay * 4;
2460 :
2461 7152 : if (track_cost_delay_timing)
2462 0 : INSTR_TIME_SET_CURRENT(delay_start);
2463 :
2464 7152 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2465 7152 : pg_usleep(msec * 1000);
2466 7152 : pgstat_report_wait_end();
2467 :
2468 7152 : if (track_cost_delay_timing)
2469 : {
2470 : instr_time delay_end;
2471 : instr_time delay;
2472 :
2473 0 : INSTR_TIME_SET_CURRENT(delay_end);
2474 0 : INSTR_TIME_SET_ZERO(delay);
2475 0 : INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
2476 :
2477 : /*
2478 : * For parallel workers, we only report the delay time every once
2479 : * in a while to avoid overloading the leader with messages and
2480 : * interrupts.
2481 : */
2482 0 : if (IsParallelWorker())
2483 : {
2484 : static instr_time last_report_time;
2485 : instr_time time_since_last_report;
2486 :
2487 : Assert(!is_analyze);
2488 :
2489 : /* Accumulate the delay time */
2490 0 : parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
2491 :
2492 : /* Calculate interval since last report */
2493 0 : INSTR_TIME_SET_ZERO(time_since_last_report);
2494 0 : INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
2495 :
2496 : /* If we haven't reported in a while, do so now */
2497 0 : if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
2498 : PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
2499 : {
2500 0 : pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2501 : parallel_vacuum_worker_delay_ns);
2502 :
2503 : /* Reset variables */
2504 0 : last_report_time = delay_end;
2505 0 : parallel_vacuum_worker_delay_ns = 0;
2506 : }
2507 : }
2508 0 : else if (is_analyze)
2509 0 : pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
2510 0 : INSTR_TIME_GET_NANOSEC(delay));
2511 : else
2512 0 : pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
2513 0 : INSTR_TIME_GET_NANOSEC(delay));
2514 : }
2515 :
2516 : /*
2517 : * We don't want to ignore postmaster death during very long vacuums
2518 : * with vacuum_cost_delay configured. We can't use the usual
2519 : * WaitLatch() approach here because we want microsecond-based sleep
2520 : * durations above.
2521 : */
2522 7152 : if (IsUnderPostmaster && !PostmasterIsAlive())
2523 0 : exit(1);
2524 :
2525 7152 : VacuumCostBalance = 0;
2526 :
2527 : /*
2528 : * Balance and update limit values for autovacuum workers. We must do
2529 : * this periodically, as the number of workers across which we are
2530 : * balancing the limit may have changed.
2531 : *
2532 : * TODO: There may be better criteria for determining when to do this
2533 : * besides "check after napping".
2534 : */
2535 7152 : AutoVacuumUpdateCostLimit();
2536 :
2537 : /* Might have gotten an interrupt while sleeping */
2538 7152 : CHECK_FOR_INTERRUPTS();
2539 : }
2540 : }
2541 :
2542 : /*
2543 : * Computes the vacuum delay for parallel workers.
2544 : *
2545 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2546 : * worker to sleep in proportion to the share of work it's done. We achieve this
2547 : * by allowing all parallel vacuum workers including the leader process to
2548 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2549 : * We allow each worker to update it as and when it has incurred any cost and
2550 : * then based on that decide whether it needs to sleep. We compute the time
2551 : * to sleep for a worker based on the cost it has incurred
2552 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2553 : * that amount. This avoids putting to sleep those workers which have done less
2554 : * I/O than other workers and therefore ensure that workers
2555 : * which are doing more I/O got throttled more.
2556 : *
2557 : * We allow a worker to sleep only if it has performed I/O above a certain
2558 : * threshold, which is calculated based on the number of active workers
2559 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2560 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2561 : * the required throttling if we force a worker that has done more than 50%
2562 : * of its share of work to sleep.
2563 : */
2564 : static double
2565 0 : compute_parallel_delay(void)
2566 : {
2567 0 : double msec = 0;
2568 : uint32 shared_balance;
2569 : int nworkers;
2570 :
2571 : /* Parallel vacuum must be active */
2572 : Assert(VacuumSharedCostBalance);
2573 :
2574 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2575 :
2576 : /* At least count itself */
2577 : Assert(nworkers >= 1);
2578 :
2579 : /* Update the shared cost balance value atomically */
2580 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2581 :
2582 : /* Compute the total local balance for the current worker */
2583 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2584 :
2585 0 : if ((shared_balance >= vacuum_cost_limit) &&
2586 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2587 : {
2588 : /* Compute sleep time based on the local cost balance */
2589 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2590 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2591 0 : VacuumCostBalanceLocal = 0;
2592 : }
2593 :
2594 : /*
2595 : * Reset the local balance as we accumulated it into the shared value.
2596 : */
2597 0 : VacuumCostBalance = 0;
2598 :
2599 0 : return msec;
2600 : }
2601 :
2602 : /*
2603 : * A wrapper function of defGetBoolean().
2604 : *
2605 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2606 : * of true and false.
2607 : */
2608 : static VacOptValue
2609 326 : get_vacoptval_from_boolean(DefElem *def)
2610 : {
2611 326 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2612 : }
2613 :
2614 : /*
2615 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2616 : *
2617 : * Returns bulk delete stats derived from input stats
2618 : */
2619 : IndexBulkDeleteResult *
2620 2428 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2621 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2622 : {
2623 : /* Do bulk deletion */
2624 2428 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2625 : dead_items);
2626 :
2627 2428 : ereport(ivinfo->message_level,
2628 : (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
2629 : RelationGetRelationName(ivinfo->index),
2630 : dead_items_info->num_items)));
2631 :
2632 2428 : return istat;
2633 : }
2634 :
2635 : /*
2636 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2637 : *
2638 : * Returns bulk delete stats derived from input stats
2639 : */
2640 : IndexBulkDeleteResult *
2641 265888 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2642 : {
2643 265888 : istat = index_vacuum_cleanup(ivinfo, istat);
2644 :
2645 265888 : if (istat)
2646 2670 : ereport(ivinfo->message_level,
2647 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2648 : RelationGetRelationName(ivinfo->index),
2649 : istat->num_index_tuples,
2650 : istat->num_pages),
2651 : errdetail("%.0f index row versions were removed.\n"
2652 : "%u index pages were newly deleted.\n"
2653 : "%u index pages are currently deleted, of which %u are currently reusable.",
2654 : istat->tuples_removed,
2655 : istat->pages_newly_deleted,
2656 : istat->pages_deleted, istat->pages_free)));
2657 :
2658 265888 : return istat;
2659 : }
2660 :
2661 : /*
2662 : * vac_tid_reaped() -- is a particular tid deletable?
2663 : *
2664 : * This has the right signature to be an IndexBulkDeleteCallback.
2665 : */
2666 : static bool
2667 6254192 : vac_tid_reaped(ItemPointer itemptr, void *state)
2668 : {
2669 6254192 : TidStore *dead_items = (TidStore *) state;
2670 :
2671 6254192 : return TidStoreIsMember(dead_items, itemptr);
2672 : }
|