Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/index.h"
38 : #include "catalog/namespace.h"
39 : #include "catalog/pg_database.h"
40 : #include "catalog/pg_inherits.h"
41 : #include "commands/cluster.h"
42 : #include "commands/defrem.h"
43 : #include "commands/vacuum.h"
44 : #include "miscadmin.h"
45 : #include "nodes/makefuncs.h"
46 : #include "pgstat.h"
47 : #include "postmaster/autovacuum.h"
48 : #include "postmaster/bgworker_internals.h"
49 : #include "postmaster/interrupt.h"
50 : #include "storage/bufmgr.h"
51 : #include "storage/lmgr.h"
52 : #include "storage/pmsignal.h"
53 : #include "storage/proc.h"
54 : #include "storage/procarray.h"
55 : #include "utils/acl.h"
56 : #include "utils/fmgroids.h"
57 : #include "utils/guc.h"
58 : #include "utils/guc_hooks.h"
59 : #include "utils/memutils.h"
60 : #include "utils/snapmgr.h"
61 : #include "utils/syscache.h"
62 :
63 :
64 : /*
65 : * GUC parameters
66 : */
67 : int vacuum_freeze_min_age;
68 : int vacuum_freeze_table_age;
69 : int vacuum_multixact_freeze_min_age;
70 : int vacuum_multixact_freeze_table_age;
71 : int vacuum_failsafe_age;
72 : int vacuum_multixact_failsafe_age;
73 :
74 : /*
75 : * Variables for cost-based vacuum delay. The defaults differ between
76 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
77 : * vacuum code. They are initialized here to the defaults for client backends
78 : * executing VACUUM or ANALYZE.
79 : */
80 : double vacuum_cost_delay = 0;
81 : int vacuum_cost_limit = 200;
82 :
83 : /*
84 : * VacuumFailsafeActive is a defined as a global so that we can determine
85 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
86 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
87 : * for the table until after vacuuming has completed, regardless of other
88 : * settings.
89 : *
90 : * Only VACUUM code should inspect this variable and only table access methods
91 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
92 : * inspected to determine whether or not to allow cost-based delays. Table AMs
93 : * are free to set it if they desire this behavior, but it is false by default
94 : * and reset to false in between vacuuming each relation.
95 : */
96 : bool VacuumFailsafeActive = false;
97 :
98 : /*
99 : * Variables for cost-based parallel vacuum. See comments atop
100 : * compute_parallel_delay to understand how it works.
101 : */
102 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
103 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
104 : int VacuumCostBalanceLocal = 0;
105 :
106 : /* non-export function prototypes */
107 : static List *expand_vacuum_rel(VacuumRelation *vrel,
108 : MemoryContext vac_context, int options);
109 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
110 : static void vac_truncate_clog(TransactionId frozenXID,
111 : MultiXactId minMulti,
112 : TransactionId lastSaneFrozenXid,
113 : MultiXactId lastSaneMinMulti);
114 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
115 : BufferAccessStrategy bstrategy);
116 : static double compute_parallel_delay(void);
117 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
118 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
119 :
120 : /*
121 : * GUC check function to ensure GUC value specified is within the allowable
122 : * range.
123 : */
124 : bool
125 1830 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
126 : GucSource source)
127 : {
128 : /* Value upper and lower hard limits are inclusive */
129 1830 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
130 1830 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
131 1830 : return true;
132 :
133 : /* Value does not fall within any allowable range */
134 0 : GUC_check_errdetail("vacuum_buffer_usage_limit must be 0 or between %d kB and %d kB",
135 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
136 :
137 0 : return false;
138 : }
139 :
140 : /*
141 : * Primary entry point for manual VACUUM and ANALYZE commands
142 : *
143 : * This is mainly a preparation wrapper for the real operations that will
144 : * happen in vacuum().
145 : */
146 : void
147 10420 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
148 : {
149 : VacuumParams params;
150 10420 : BufferAccessStrategy bstrategy = NULL;
151 10420 : bool verbose = false;
152 10420 : bool skip_locked = false;
153 10420 : bool analyze = false;
154 10420 : bool freeze = false;
155 10420 : bool full = false;
156 10420 : bool disable_page_skipping = false;
157 10420 : bool process_main = true;
158 10420 : bool process_toast = true;
159 : int ring_size;
160 10420 : bool skip_database_stats = false;
161 10420 : bool only_database_stats = false;
162 : MemoryContext vac_context;
163 : ListCell *lc;
164 :
165 : /* index_cleanup and truncate values unspecified for now */
166 10420 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
167 10420 : params.truncate = VACOPTVALUE_UNSPECIFIED;
168 :
169 : /* By default parallel vacuum is enabled */
170 10420 : params.nworkers = 0;
171 :
172 : /* Will be set later if we recurse to a TOAST table. */
173 10420 : params.toast_parent = InvalidOid;
174 :
175 : /*
176 : * Set this to an invalid value so it is clear whether or not a
177 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
178 : */
179 10420 : ring_size = -1;
180 :
181 : /* Parse options list */
182 19356 : foreach(lc, vacstmt->options)
183 : {
184 8972 : DefElem *opt = (DefElem *) lfirst(lc);
185 :
186 : /* Parse common options for VACUUM and ANALYZE */
187 8972 : if (strcmp(opt->defname, "verbose") == 0)
188 38 : verbose = defGetBoolean(opt);
189 8934 : else if (strcmp(opt->defname, "skip_locked") == 0)
190 334 : skip_locked = defGetBoolean(opt);
191 8600 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
192 : {
193 : const char *hintmsg;
194 : int result;
195 : char *vac_buffer_size;
196 :
197 54 : vac_buffer_size = defGetString(opt);
198 :
199 : /*
200 : * Check that the specified value is valid and the size falls
201 : * within the hard upper and lower limits if it is not 0.
202 : */
203 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
204 48 : (result != 0 &&
205 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
206 : {
207 18 : ereport(ERROR,
208 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
209 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
210 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
211 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
212 : }
213 :
214 36 : ring_size = result;
215 : }
216 8546 : else if (!vacstmt->is_vacuumcmd)
217 6 : ereport(ERROR,
218 : (errcode(ERRCODE_SYNTAX_ERROR),
219 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
220 : parser_errposition(pstate, opt->location)));
221 :
222 : /* Parse options available on VACUUM */
223 8540 : else if (strcmp(opt->defname, "analyze") == 0)
224 1364 : analyze = defGetBoolean(opt);
225 7176 : else if (strcmp(opt->defname, "freeze") == 0)
226 1148 : freeze = defGetBoolean(opt);
227 6028 : else if (strcmp(opt->defname, "full") == 0)
228 356 : full = defGetBoolean(opt);
229 5672 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
230 182 : disable_page_skipping = defGetBoolean(opt);
231 5490 : else if (strcmp(opt->defname, "index_cleanup") == 0)
232 : {
233 : /* Interpret no string as the default, which is 'auto' */
234 174 : if (!opt->arg)
235 0 : params.index_cleanup = VACOPTVALUE_AUTO;
236 : else
237 : {
238 174 : char *sval = defGetString(opt);
239 :
240 : /* Try matching on 'auto' string, or fall back on boolean */
241 174 : if (pg_strcasecmp(sval, "auto") == 0)
242 6 : params.index_cleanup = VACOPTVALUE_AUTO;
243 : else
244 168 : params.index_cleanup = get_vacoptval_from_boolean(opt);
245 : }
246 : }
247 5316 : else if (strcmp(opt->defname, "process_main") == 0)
248 154 : process_main = defGetBoolean(opt);
249 5162 : else if (strcmp(opt->defname, "process_toast") == 0)
250 160 : process_toast = defGetBoolean(opt);
251 5002 : else if (strcmp(opt->defname, "truncate") == 0)
252 148 : params.truncate = get_vacoptval_from_boolean(opt);
253 4854 : else if (strcmp(opt->defname, "parallel") == 0)
254 : {
255 338 : if (opt->arg == NULL)
256 : {
257 6 : ereport(ERROR,
258 : (errcode(ERRCODE_SYNTAX_ERROR),
259 : errmsg("parallel option requires a value between 0 and %d",
260 : MAX_PARALLEL_WORKER_LIMIT),
261 : parser_errposition(pstate, opt->location)));
262 : }
263 : else
264 : {
265 : int nworkers;
266 :
267 332 : nworkers = defGetInt32(opt);
268 332 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
269 6 : ereport(ERROR,
270 : (errcode(ERRCODE_SYNTAX_ERROR),
271 : errmsg("parallel workers for vacuum must be between 0 and %d",
272 : MAX_PARALLEL_WORKER_LIMIT),
273 : parser_errposition(pstate, opt->location)));
274 :
275 : /*
276 : * Disable parallel vacuum, if user has specified parallel
277 : * degree as zero.
278 : */
279 326 : if (nworkers == 0)
280 154 : params.nworkers = -1;
281 : else
282 172 : params.nworkers = nworkers;
283 : }
284 : }
285 4516 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
286 4398 : skip_database_stats = defGetBoolean(opt);
287 118 : else if (strcmp(opt->defname, "only_database_stats") == 0)
288 118 : only_database_stats = defGetBoolean(opt);
289 : else
290 0 : ereport(ERROR,
291 : (errcode(ERRCODE_SYNTAX_ERROR),
292 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
293 : parser_errposition(pstate, opt->location)));
294 : }
295 :
296 : /* Set vacuum options */
297 10384 : params.options =
298 10384 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
299 10384 : (verbose ? VACOPT_VERBOSE : 0) |
300 10384 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
301 10384 : (analyze ? VACOPT_ANALYZE : 0) |
302 10384 : (freeze ? VACOPT_FREEZE : 0) |
303 10384 : (full ? VACOPT_FULL : 0) |
304 10384 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
305 10384 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
306 10384 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
307 10384 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
308 10384 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
309 :
310 : /* sanity checks on options */
311 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
312 : Assert((params.options & VACOPT_VACUUM) ||
313 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
314 :
315 10384 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
316 6 : ereport(ERROR,
317 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
318 : errmsg("VACUUM FULL cannot be performed in parallel")));
319 :
320 : /*
321 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
322 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
323 : * we'll permit that.
324 : */
325 10378 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
326 6 : !(params.options & VACOPT_ANALYZE))
327 6 : ereport(ERROR,
328 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
329 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
330 :
331 : /*
332 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
333 : */
334 10372 : if (!(params.options & VACOPT_ANALYZE))
335 : {
336 9030 : foreach(lc, vacstmt->rels)
337 : {
338 4436 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
339 :
340 4436 : if (vrel->va_cols != NIL)
341 6 : ereport(ERROR,
342 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
343 : errmsg("ANALYZE option must be specified when a column list is provided")));
344 : }
345 : }
346 :
347 :
348 : /*
349 : * Sanity check DISABLE_PAGE_SKIPPING option.
350 : */
351 10366 : if ((params.options & VACOPT_FULL) != 0 &&
352 332 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
353 0 : ereport(ERROR,
354 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
355 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
356 :
357 : /* sanity check for PROCESS_TOAST */
358 10366 : if ((params.options & VACOPT_FULL) != 0 &&
359 332 : (params.options & VACOPT_PROCESS_TOAST) == 0)
360 6 : ereport(ERROR,
361 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
362 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
363 :
364 : /* sanity check for ONLY_DATABASE_STATS */
365 10360 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
366 : {
367 : Assert(params.options & VACOPT_VACUUM);
368 118 : if (vacstmt->rels != NIL)
369 6 : ereport(ERROR,
370 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
371 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
372 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
373 112 : if (params.options & ~(VACOPT_VACUUM |
374 : VACOPT_VERBOSE |
375 : VACOPT_PROCESS_MAIN |
376 : VACOPT_PROCESS_TOAST |
377 : VACOPT_ONLY_DATABASE_STATS))
378 0 : ereport(ERROR,
379 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
380 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
381 : }
382 :
383 : /*
384 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
385 : * them as -1 which means to use the default values.
386 : */
387 10354 : if (params.options & VACOPT_FREEZE)
388 : {
389 1148 : params.freeze_min_age = 0;
390 1148 : params.freeze_table_age = 0;
391 1148 : params.multixact_freeze_min_age = 0;
392 1148 : params.multixact_freeze_table_age = 0;
393 : }
394 : else
395 : {
396 9206 : params.freeze_min_age = -1;
397 9206 : params.freeze_table_age = -1;
398 9206 : params.multixact_freeze_min_age = -1;
399 9206 : params.multixact_freeze_table_age = -1;
400 : }
401 :
402 : /* user-invoked vacuum is never "for wraparound" */
403 10354 : params.is_wraparound = false;
404 :
405 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
406 10354 : params.log_min_duration = -1;
407 :
408 : /*
409 : * Create special memory context for cross-transaction storage.
410 : *
411 : * Since it is a child of PortalContext, it will go away eventually even
412 : * if we suffer an error; there's no need for special abort cleanup logic.
413 : */
414 10354 : vac_context = AllocSetContextCreate(PortalContext,
415 : "Vacuum",
416 : ALLOCSET_DEFAULT_SIZES);
417 :
418 : /*
419 : * Make a buffer strategy object in the cross-transaction memory context.
420 : * We needn't bother making this for VACUUM (FULL) or VACUUM
421 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
422 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
423 : * when we see ANALYZE.
424 : */
425 10354 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
426 438 : VACOPT_FULL)) == 0 ||
427 438 : (params.options & VACOPT_ANALYZE) != 0)
428 : {
429 :
430 9922 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
431 :
432 : Assert(ring_size >= -1);
433 :
434 : /*
435 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
436 : * command, it overrides the value of VacuumBufferUsageLimit. Either
437 : * value may be 0, in which case GetAccessStrategyWithSize() will
438 : * return NULL, effectively allowing full use of shared buffers.
439 : */
440 9922 : if (ring_size == -1)
441 9892 : ring_size = VacuumBufferUsageLimit;
442 :
443 9922 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
444 :
445 9922 : MemoryContextSwitchTo(old_context);
446 : }
447 :
448 : /* Now go through the common routine */
449 10354 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
450 :
451 : /* Finally, clean up the vacuum memory context */
452 10230 : MemoryContextDelete(vac_context);
453 10230 : }
454 :
455 : /*
456 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
457 : *
458 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
459 : * we process all relevant tables in the database. For each VacuumRelation,
460 : * if a valid OID is supplied, the table with that OID is what to process;
461 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
462 : *
463 : * params contains a set of parameters that can be used to customize the
464 : * behavior.
465 : *
466 : * bstrategy may be passed in as NULL when the caller does not want to
467 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
468 : * otherwise, the caller must build a BufferAccessStrategy with the number of
469 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
470 : * using.
471 : *
472 : * isTopLevel should be passed down from ProcessUtility.
473 : *
474 : * It is the caller's responsibility that all parameters are allocated in a
475 : * memory context that will not disappear at transaction commit.
476 : */
477 : void
478 10402 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
479 : MemoryContext vac_context, bool isTopLevel)
480 : {
481 : static bool in_vacuum = false;
482 :
483 : const char *stmttype;
484 : volatile bool in_outer_xact,
485 : use_own_xacts;
486 :
487 : Assert(params != NULL);
488 :
489 10402 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
490 :
491 : /*
492 : * We cannot run VACUUM inside a user transaction block; if we were inside
493 : * a transaction, then our commit- and start-transaction-command calls
494 : * would not have the intended effect! There are numerous other subtle
495 : * dependencies on this, too.
496 : *
497 : * ANALYZE (without VACUUM) can run either way.
498 : */
499 10402 : if (params->options & VACOPT_VACUUM)
500 : {
501 5964 : PreventInTransactionBlock(isTopLevel, stmttype);
502 5952 : in_outer_xact = false;
503 : }
504 : else
505 4438 : in_outer_xact = IsInTransactionBlock(isTopLevel);
506 :
507 : /*
508 : * Check for and disallow recursive calls. This could happen when VACUUM
509 : * FULL or ANALYZE calls a hostile index expression that itself calls
510 : * ANALYZE.
511 : */
512 10390 : if (in_vacuum)
513 12 : ereport(ERROR,
514 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
515 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
516 : stmttype)));
517 :
518 : /*
519 : * Build list of relation(s) to process, putting any new data in
520 : * vac_context for safekeeping.
521 : */
522 10378 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
523 : {
524 : /* We don't process any tables in this case */
525 : Assert(relations == NIL);
526 : }
527 10266 : else if (relations != NIL)
528 : {
529 10098 : List *newrels = NIL;
530 : ListCell *lc;
531 :
532 20284 : foreach(lc, relations)
533 : {
534 10222 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
535 : List *sublist;
536 : MemoryContext old_context;
537 :
538 10222 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
539 10186 : old_context = MemoryContextSwitchTo(vac_context);
540 10186 : newrels = list_concat(newrels, sublist);
541 10186 : MemoryContextSwitchTo(old_context);
542 : }
543 10062 : relations = newrels;
544 : }
545 : else
546 168 : relations = get_all_vacuum_rels(vac_context, params->options);
547 :
548 : /*
549 : * Decide whether we need to start/commit our own transactions.
550 : *
551 : * For VACUUM (with or without ANALYZE): always do so, so that we can
552 : * release locks as soon as possible. (We could possibly use the outer
553 : * transaction for a one-table VACUUM, but handling TOAST tables would be
554 : * problematic.)
555 : *
556 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
557 : * start/commit our own transactions. Also, there's no need to do so if
558 : * only processing one relation. For multiple relations when not within a
559 : * transaction block, and also in an autovacuum worker, use own
560 : * transactions so we can release locks sooner.
561 : */
562 10342 : if (params->options & VACOPT_VACUUM)
563 5940 : use_own_xacts = true;
564 : else
565 : {
566 : Assert(params->options & VACOPT_ANALYZE);
567 4402 : if (AmAutoVacuumWorkerProcess())
568 30 : use_own_xacts = true;
569 4372 : else if (in_outer_xact)
570 214 : use_own_xacts = false;
571 4158 : else if (list_length(relations) > 1)
572 626 : use_own_xacts = true;
573 : else
574 3532 : use_own_xacts = false;
575 : }
576 :
577 : /*
578 : * vacuum_rel expects to be entered with no transaction active; it will
579 : * start and commit its own transaction. But we are called by an SQL
580 : * command, and so we are executing inside a transaction already. We
581 : * commit the transaction started in PostgresMain() here, and start
582 : * another one before exiting to match the commit waiting for us back in
583 : * PostgresMain().
584 : */
585 10342 : if (use_own_xacts)
586 : {
587 : Assert(!in_outer_xact);
588 :
589 : /* ActiveSnapshot is not set by autovacuum */
590 6596 : if (ActiveSnapshotSet())
591 6548 : PopActiveSnapshot();
592 :
593 : /* matches the StartTransaction in PostgresMain() */
594 6596 : CommitTransactionCommand();
595 : }
596 :
597 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
598 10342 : PG_TRY();
599 : {
600 : ListCell *cur;
601 :
602 10342 : in_vacuum = true;
603 10342 : VacuumFailsafeActive = false;
604 10342 : VacuumUpdateCosts();
605 10342 : VacuumCostBalance = 0;
606 10342 : VacuumPageHit = 0;
607 10342 : VacuumPageMiss = 0;
608 10342 : VacuumPageDirty = 0;
609 10342 : VacuumCostBalanceLocal = 0;
610 10342 : VacuumSharedCostBalance = NULL;
611 10342 : VacuumActiveNWorkers = NULL;
612 :
613 : /*
614 : * Loop to process each selected relation.
615 : */
616 34308 : foreach(cur, relations)
617 : {
618 24030 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
619 :
620 24030 : if (params->options & VACOPT_VACUUM)
621 : {
622 12992 : if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
623 98 : continue;
624 : }
625 :
626 23926 : if (params->options & VACOPT_ANALYZE)
627 : {
628 : /*
629 : * If using separate xacts, start one for analyze. Otherwise,
630 : * we can use the outer transaction.
631 : */
632 12430 : if (use_own_xacts)
633 : {
634 8746 : StartTransactionCommand();
635 : /* functions in indexes may want a snapshot set */
636 8746 : PushActiveSnapshot(GetTransactionSnapshot());
637 : }
638 :
639 12430 : analyze_rel(vrel->oid, vrel->relation, params,
640 : vrel->va_cols, in_outer_xact, bstrategy);
641 :
642 12372 : if (use_own_xacts)
643 : {
644 8708 : PopActiveSnapshot();
645 8708 : CommitTransactionCommand();
646 : }
647 : else
648 : {
649 : /*
650 : * If we're not using separate xacts, better separate the
651 : * ANALYZE actions with CCIs. This avoids trouble if user
652 : * says "ANALYZE t, t".
653 : */
654 3664 : CommandCounterIncrement();
655 : }
656 : }
657 :
658 : /*
659 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
660 : * next relation.
661 : */
662 23868 : VacuumFailsafeActive = false;
663 : }
664 : }
665 64 : PG_FINALLY();
666 : {
667 10342 : in_vacuum = false;
668 10342 : VacuumCostActive = false;
669 10342 : VacuumFailsafeActive = false;
670 10342 : VacuumCostBalance = 0;
671 : }
672 10342 : PG_END_TRY();
673 :
674 : /*
675 : * Finish up processing.
676 : */
677 10278 : if (use_own_xacts)
678 : {
679 : /* here, we are not in a transaction */
680 :
681 : /*
682 : * This matches the CommitTransaction waiting for us in
683 : * PostgresMain().
684 : */
685 6552 : StartTransactionCommand();
686 : }
687 :
688 10278 : if ((params->options & VACOPT_VACUUM) &&
689 5908 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
690 : {
691 : /*
692 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
693 : */
694 1494 : vac_update_datfrozenxid();
695 : }
696 :
697 10278 : }
698 :
699 : /*
700 : * Check if the current user has privileges to vacuum or analyze the relation.
701 : * If not, issue a WARNING log message and return false to let the caller
702 : * decide what to do with this relation. This routine is used to decide if a
703 : * relation can be processed for VACUUM or ANALYZE.
704 : */
705 : bool
706 54640 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
707 : bits32 options)
708 : {
709 : char *relname;
710 :
711 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
712 :
713 : /*----------
714 : * A role has privileges to vacuum or analyze the relation if any of the
715 : * following are true:
716 : * - the role owns the current database and the relation is not shared
717 : * - the role has the MAINTAIN privilege on the relation
718 : *----------
719 : */
720 54640 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
721 62360 : !reltuple->relisshared) ||
722 8494 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
723 54300 : return true;
724 :
725 340 : relname = NameStr(reltuple->relname);
726 :
727 340 : if ((options & VACOPT_VACUUM) != 0)
728 : {
729 224 : ereport(WARNING,
730 : (errmsg("permission denied to vacuum \"%s\", skipping it",
731 : relname)));
732 :
733 : /*
734 : * For VACUUM ANALYZE, both logs could show up, but just generate
735 : * information for VACUUM as that would be the first one to be
736 : * processed.
737 : */
738 224 : return false;
739 : }
740 :
741 116 : if ((options & VACOPT_ANALYZE) != 0)
742 116 : ereport(WARNING,
743 : (errmsg("permission denied to analyze \"%s\", skipping it",
744 : relname)));
745 :
746 116 : return false;
747 : }
748 :
749 :
750 : /*
751 : * vacuum_open_relation
752 : *
753 : * This routine is used for attempting to open and lock a relation which
754 : * is going to be vacuumed or analyzed. If the relation cannot be opened
755 : * or locked, a log is emitted if possible.
756 : */
757 : Relation
758 32240 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
759 : bool verbose, LOCKMODE lmode)
760 : {
761 : Relation rel;
762 32240 : bool rel_lock = true;
763 : int elevel;
764 :
765 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
766 :
767 : /*
768 : * Open the relation and get the appropriate lock on it.
769 : *
770 : * There's a race condition here: the relation may have gone away since
771 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
772 : *
773 : * If we've been asked not to wait for the relation lock, acquire it first
774 : * in non-blocking mode, before calling try_relation_open().
775 : */
776 32240 : if (!(options & VACOPT_SKIP_LOCKED))
777 31718 : rel = try_relation_open(relid, lmode);
778 522 : else if (ConditionalLockRelationOid(relid, lmode))
779 502 : rel = try_relation_open(relid, NoLock);
780 : else
781 : {
782 20 : rel = NULL;
783 20 : rel_lock = false;
784 : }
785 :
786 : /* if relation is opened, leave */
787 32240 : if (rel)
788 32208 : return rel;
789 :
790 : /*
791 : * Relation could not be opened, hence generate if possible a log
792 : * informing on the situation.
793 : *
794 : * If the RangeVar is not defined, we do not have enough information to
795 : * provide a meaningful log statement. Chances are that the caller has
796 : * intentionally not provided this information so that this logging is
797 : * skipped, anyway.
798 : */
799 32 : if (relation == NULL)
800 18 : return NULL;
801 :
802 : /*
803 : * Determine the log level.
804 : *
805 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
806 : * statements in the permission checks; otherwise, only log if the caller
807 : * so requested.
808 : */
809 14 : if (!AmAutoVacuumWorkerProcess())
810 14 : elevel = WARNING;
811 0 : else if (verbose)
812 0 : elevel = LOG;
813 : else
814 0 : return NULL;
815 :
816 14 : if ((options & VACOPT_VACUUM) != 0)
817 : {
818 10 : if (!rel_lock)
819 6 : ereport(elevel,
820 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
821 : errmsg("skipping vacuum of \"%s\" --- lock not available",
822 : relation->relname)));
823 : else
824 4 : ereport(elevel,
825 : (errcode(ERRCODE_UNDEFINED_TABLE),
826 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
827 : relation->relname)));
828 :
829 : /*
830 : * For VACUUM ANALYZE, both logs could show up, but just generate
831 : * information for VACUUM as that would be the first one to be
832 : * processed.
833 : */
834 10 : return NULL;
835 : }
836 :
837 4 : if ((options & VACOPT_ANALYZE) != 0)
838 : {
839 4 : if (!rel_lock)
840 2 : ereport(elevel,
841 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
842 : errmsg("skipping analyze of \"%s\" --- lock not available",
843 : relation->relname)));
844 : else
845 2 : ereport(elevel,
846 : (errcode(ERRCODE_UNDEFINED_TABLE),
847 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
848 : relation->relname)));
849 : }
850 :
851 4 : return NULL;
852 : }
853 :
854 :
855 : /*
856 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
857 : * and optionally add VacuumRelations for partitions of the table.
858 : *
859 : * If a VacuumRelation does not have an OID supplied and is a partitioned
860 : * table, an extra entry will be added to the output for each partition.
861 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
862 : * it does not want us to expand partitioned tables.
863 : *
864 : * We take care not to modify the input data structure, but instead build
865 : * new VacuumRelation(s) to return. (But note that they will reference
866 : * unmodified parts of the input, eg column lists.) New data structures
867 : * are made in vac_context.
868 : */
869 : static List *
870 10222 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
871 : int options)
872 : {
873 10222 : List *vacrels = NIL;
874 : MemoryContext oldcontext;
875 :
876 : /* If caller supplied OID, there's nothing we need do here. */
877 10222 : if (OidIsValid(vrel->oid))
878 : {
879 48 : oldcontext = MemoryContextSwitchTo(vac_context);
880 48 : vacrels = lappend(vacrels, vrel);
881 48 : MemoryContextSwitchTo(oldcontext);
882 : }
883 : else
884 : {
885 : /* Process a specific relation, and possibly partitions thereof */
886 : Oid relid;
887 : HeapTuple tuple;
888 : Form_pg_class classForm;
889 : bool include_parts;
890 : int rvr_opts;
891 :
892 : /*
893 : * Since autovacuum workers supply OIDs when calling vacuum(), no
894 : * autovacuum worker should reach this code.
895 : */
896 : Assert(!AmAutoVacuumWorkerProcess());
897 :
898 : /*
899 : * We transiently take AccessShareLock to protect the syscache lookup
900 : * below, as well as find_all_inheritors's expectation that the caller
901 : * holds some lock on the starting relation.
902 : */
903 10174 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
904 10174 : relid = RangeVarGetRelidExtended(vrel->relation,
905 : AccessShareLock,
906 : rvr_opts,
907 : NULL, NULL);
908 :
909 : /*
910 : * If the lock is unavailable, emit the same log statement that
911 : * vacuum_rel() and analyze_rel() would.
912 : */
913 10138 : if (!OidIsValid(relid))
914 : {
915 8 : if (options & VACOPT_VACUUM)
916 6 : ereport(WARNING,
917 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
918 : errmsg("skipping vacuum of \"%s\" --- lock not available",
919 : vrel->relation->relname)));
920 : else
921 2 : ereport(WARNING,
922 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
923 : errmsg("skipping analyze of \"%s\" --- lock not available",
924 : vrel->relation->relname)));
925 8 : return vacrels;
926 : }
927 :
928 : /*
929 : * To check whether the relation is a partitioned table and its
930 : * ownership, fetch its syscache entry.
931 : */
932 10130 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
933 10130 : if (!HeapTupleIsValid(tuple))
934 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
935 10130 : classForm = (Form_pg_class) GETSTRUCT(tuple);
936 :
937 : /*
938 : * Make a returnable VacuumRelation for this rel if the user has the
939 : * required privileges.
940 : */
941 10130 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
942 : {
943 9898 : oldcontext = MemoryContextSwitchTo(vac_context);
944 9898 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
945 : relid,
946 : vrel->va_cols));
947 9898 : MemoryContextSwitchTo(oldcontext);
948 : }
949 :
950 :
951 10130 : include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
952 10130 : ReleaseSysCache(tuple);
953 :
954 : /*
955 : * If it is, make relation list entries for its partitions. Note that
956 : * the list returned by find_all_inheritors() includes the passed-in
957 : * OID, so we have to skip that. There's no point in taking locks on
958 : * the individual partitions yet, and doing so would just add
959 : * unnecessary deadlock risk. For this last reason we do not check
960 : * yet the ownership of the partitions, which get added to the list to
961 : * process. Ownership will be checked later on anyway.
962 : */
963 10130 : if (include_parts)
964 : {
965 684 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
966 : ListCell *part_lc;
967 :
968 3198 : foreach(part_lc, part_oids)
969 : {
970 2514 : Oid part_oid = lfirst_oid(part_lc);
971 :
972 2514 : if (part_oid == relid)
973 684 : continue; /* ignore original table */
974 :
975 : /*
976 : * We omit a RangeVar since it wouldn't be appropriate to
977 : * complain about failure to open one of these relations
978 : * later.
979 : */
980 1830 : oldcontext = MemoryContextSwitchTo(vac_context);
981 1830 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
982 : part_oid,
983 : vrel->va_cols));
984 1830 : MemoryContextSwitchTo(oldcontext);
985 : }
986 : }
987 :
988 : /*
989 : * Release lock again. This means that by the time we actually try to
990 : * process the table, it might be gone or renamed. In the former case
991 : * we'll silently ignore it; in the latter case we'll process it
992 : * anyway, but we must beware that the RangeVar doesn't necessarily
993 : * identify it anymore. This isn't ideal, perhaps, but there's little
994 : * practical alternative, since we're typically going to commit this
995 : * transaction and begin a new one between now and then. Moreover,
996 : * holding locks on multiple relations would create significant risk
997 : * of deadlock.
998 : */
999 10130 : UnlockRelationOid(relid, AccessShareLock);
1000 : }
1001 :
1002 10178 : return vacrels;
1003 : }
1004 :
1005 : /*
1006 : * Construct a list of VacuumRelations for all vacuumable rels in
1007 : * the current database. The list is built in vac_context.
1008 : */
1009 : static List *
1010 168 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1011 : {
1012 168 : List *vacrels = NIL;
1013 : Relation pgclass;
1014 : TableScanDesc scan;
1015 : HeapTuple tuple;
1016 :
1017 168 : pgclass = table_open(RelationRelationId, AccessShareLock);
1018 :
1019 168 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1020 :
1021 71974 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1022 : {
1023 71806 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1024 : MemoryContext oldcontext;
1025 71806 : Oid relid = classForm->oid;
1026 :
1027 : /*
1028 : * We include partitioned tables here; depending on which operation is
1029 : * to be performed, caller will decide whether to process or ignore
1030 : * them.
1031 : */
1032 71806 : if (classForm->relkind != RELKIND_RELATION &&
1033 59552 : classForm->relkind != RELKIND_MATVIEW &&
1034 59546 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1035 59504 : continue;
1036 :
1037 : /* check permissions of relation */
1038 12302 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1039 0 : continue;
1040 :
1041 : /*
1042 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1043 : * We omit a RangeVar since it wouldn't be appropriate to complain
1044 : * about failure to open one of these relations later.
1045 : */
1046 12302 : oldcontext = MemoryContextSwitchTo(vac_context);
1047 12302 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1048 : relid,
1049 : NIL));
1050 12302 : MemoryContextSwitchTo(oldcontext);
1051 : }
1052 :
1053 168 : table_endscan(scan);
1054 168 : table_close(pgclass, AccessShareLock);
1055 :
1056 168 : return vacrels;
1057 : }
1058 :
1059 : /*
1060 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1061 : *
1062 : * The target relation and VACUUM parameters are our inputs.
1063 : *
1064 : * Output parameters are the cutoffs that VACUUM caller should use.
1065 : *
1066 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1067 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1068 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1069 : * minimum).
1070 : */
1071 : bool
1072 19598 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1073 : struct VacuumCutoffs *cutoffs)
1074 : {
1075 : int freeze_min_age,
1076 : multixact_freeze_min_age,
1077 : freeze_table_age,
1078 : multixact_freeze_table_age,
1079 : effective_multixact_freeze_max_age;
1080 : TransactionId nextXID,
1081 : safeOldestXmin,
1082 : aggressiveXIDCutoff;
1083 : MultiXactId nextMXID,
1084 : safeOldestMxact,
1085 : aggressiveMXIDCutoff;
1086 :
1087 : /* Use mutable copies of freeze age parameters */
1088 19598 : freeze_min_age = params->freeze_min_age;
1089 19598 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1090 19598 : freeze_table_age = params->freeze_table_age;
1091 19598 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1092 :
1093 : /* Set pg_class fields in cutoffs */
1094 19598 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1095 19598 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1096 :
1097 : /*
1098 : * Acquire OldestXmin.
1099 : *
1100 : * We can always ignore processes running lazy vacuum. This is because we
1101 : * use these values only for deciding which tuples we must keep in the
1102 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1103 : * XID assigned), it's safe to ignore it. In theory it could be
1104 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1105 : * that only one vacuum process can be working on a particular table at
1106 : * any time, and that each vacuum is always an independent transaction.
1107 : */
1108 19598 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1109 :
1110 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1111 :
1112 : /* Acquire OldestMxact */
1113 19598 : cutoffs->OldestMxact = GetOldestMultiXactId();
1114 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1115 :
1116 : /* Acquire next XID/next MXID values used to apply age-based settings */
1117 19598 : nextXID = ReadNextTransactionId();
1118 19598 : nextMXID = ReadNextMultiXactId();
1119 :
1120 : /*
1121 : * Also compute the multixact age for which freezing is urgent. This is
1122 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1123 : * short of multixact member space.
1124 : */
1125 19598 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1126 :
1127 : /*
1128 : * Almost ready to set freeze output parameters; check if OldestXmin or
1129 : * OldestMxact are held back to an unsafe degree before we start on that
1130 : */
1131 19598 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1132 19598 : if (!TransactionIdIsNormal(safeOldestXmin))
1133 0 : safeOldestXmin = FirstNormalTransactionId;
1134 19598 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1135 19598 : if (safeOldestMxact < FirstMultiXactId)
1136 0 : safeOldestMxact = FirstMultiXactId;
1137 19598 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1138 0 : ereport(WARNING,
1139 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1140 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1141 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1142 19598 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1143 0 : ereport(WARNING,
1144 : (errmsg("cutoff for freezing multixacts is far in the past"),
1145 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1146 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1147 :
1148 : /*
1149 : * Determine the minimum freeze age to use: as specified by the caller, or
1150 : * vacuum_freeze_min_age, but in any case not more than half
1151 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1152 : * wraparound won't occur too frequently.
1153 : */
1154 19598 : if (freeze_min_age < 0)
1155 8522 : freeze_min_age = vacuum_freeze_min_age;
1156 19598 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1157 : Assert(freeze_min_age >= 0);
1158 :
1159 : /* Compute FreezeLimit, being careful to generate a normal XID */
1160 19598 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1161 19598 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1162 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1163 : /* FreezeLimit must always be <= OldestXmin */
1164 19598 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1165 610 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1166 :
1167 : /*
1168 : * Determine the minimum multixact freeze age to use: as specified by
1169 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1170 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1171 : * prevent MultiXact wraparound won't occur too frequently.
1172 : */
1173 19598 : if (multixact_freeze_min_age < 0)
1174 8522 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1175 19598 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1176 : effective_multixact_freeze_max_age / 2);
1177 : Assert(multixact_freeze_min_age >= 0);
1178 :
1179 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1180 19598 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1181 19598 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1182 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1183 : /* MultiXactCutoff must always be <= OldestMxact */
1184 19598 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1185 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1186 :
1187 : /*
1188 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1189 : *
1190 : * Determine the table freeze age to use: as specified by the caller, or
1191 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1192 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1193 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1194 : * anti-wraparound autovacuum is launched.
1195 : */
1196 19598 : if (freeze_table_age < 0)
1197 8522 : freeze_table_age = vacuum_freeze_table_age;
1198 19598 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1199 : Assert(freeze_table_age >= 0);
1200 19598 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1201 19598 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1202 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1203 19598 : if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
1204 : aggressiveXIDCutoff))
1205 11062 : return true;
1206 :
1207 : /*
1208 : * Similar to the above, determine the table freeze age to use for
1209 : * multixacts: as specified by the caller, or the value of the
1210 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1211 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1212 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1213 : * multixacts before anti-wraparound autovacuum is launched.
1214 : */
1215 8536 : if (multixact_freeze_table_age < 0)
1216 8522 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1217 8536 : multixact_freeze_table_age =
1218 8536 : Min(multixact_freeze_table_age,
1219 : effective_multixact_freeze_max_age * 0.95);
1220 : Assert(multixact_freeze_table_age >= 0);
1221 8536 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1222 8536 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1223 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1224 8536 : if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
1225 : aggressiveMXIDCutoff))
1226 0 : return true;
1227 :
1228 : /* Non-aggressive VACUUM */
1229 8536 : return false;
1230 : }
1231 :
1232 : /*
1233 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1234 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1235 : * dangerously far in the past.
1236 : *
1237 : * When we return true, VACUUM caller triggers the failsafe.
1238 : */
1239 : bool
1240 21550 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1241 : {
1242 21550 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1243 21550 : MultiXactId relminmxid = cutoffs->relminmxid;
1244 : TransactionId xid_skip_limit;
1245 : MultiXactId multi_skip_limit;
1246 : int skip_index_vacuum;
1247 :
1248 : Assert(TransactionIdIsNormal(relfrozenxid));
1249 : Assert(MultiXactIdIsValid(relminmxid));
1250 :
1251 : /*
1252 : * Determine the index skipping age to use. In any case no less than
1253 : * autovacuum_freeze_max_age * 1.05.
1254 : */
1255 21550 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1256 :
1257 21550 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1258 21550 : if (!TransactionIdIsNormal(xid_skip_limit))
1259 0 : xid_skip_limit = FirstNormalTransactionId;
1260 :
1261 21550 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1262 : {
1263 : /* The table's relfrozenxid is too old */
1264 0 : return true;
1265 : }
1266 :
1267 : /*
1268 : * Similar to above, determine the index skipping age to use for
1269 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1270 : * 1.05.
1271 : */
1272 21550 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1273 : autovacuum_multixact_freeze_max_age * 1.05);
1274 :
1275 21550 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1276 21550 : if (multi_skip_limit < FirstMultiXactId)
1277 0 : multi_skip_limit = FirstMultiXactId;
1278 :
1279 21550 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1280 : {
1281 : /* The table's relminmxid is too old */
1282 0 : return true;
1283 : }
1284 :
1285 21550 : return false;
1286 : }
1287 :
1288 : /*
1289 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1290 : *
1291 : * If we scanned the whole relation then we should just use the count of
1292 : * live tuples seen; but if we did not, we should not blindly extrapolate
1293 : * from that number, since VACUUM may have scanned a quite nonrandom
1294 : * subset of the table. When we have only partial information, we take
1295 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1296 : * of the tuple density in the unscanned pages.
1297 : *
1298 : * Note: scanned_tuples should count only *live* tuples, since
1299 : * pg_class.reltuples is defined that way.
1300 : */
1301 : double
1302 19074 : vac_estimate_reltuples(Relation relation,
1303 : BlockNumber total_pages,
1304 : BlockNumber scanned_pages,
1305 : double scanned_tuples)
1306 : {
1307 19074 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1308 19074 : double old_rel_tuples = relation->rd_rel->reltuples;
1309 : double old_density;
1310 : double unscanned_pages;
1311 : double total_tuples;
1312 :
1313 : /* If we did scan the whole table, just use the count as-is */
1314 19074 : if (scanned_pages >= total_pages)
1315 18748 : return scanned_tuples;
1316 :
1317 : /*
1318 : * When successive VACUUM commands scan the same few pages again and
1319 : * again, without anything from the table really changing, there is a risk
1320 : * that our beliefs about tuple density will gradually become distorted.
1321 : * This might be caused by vacuumlazy.c implementation details, such as
1322 : * its tendency to always scan the last heap page. Handle that here.
1323 : *
1324 : * If the relation is _exactly_ the same size according to the existing
1325 : * pg_class entry, and only a few of its pages (less than 2%) were
1326 : * scanned, keep the existing value of reltuples. Also keep the existing
1327 : * value when only a subset of rel's pages <= a single page were scanned.
1328 : *
1329 : * (Note: we might be returning -1 here.)
1330 : */
1331 326 : if (old_rel_pages == total_pages &&
1332 306 : scanned_pages < (double) total_pages * 0.02)
1333 200 : return old_rel_tuples;
1334 126 : if (scanned_pages <= 1)
1335 96 : return old_rel_tuples;
1336 :
1337 : /*
1338 : * If old density is unknown, we can't do much except scale up
1339 : * scanned_tuples to match total_pages.
1340 : */
1341 30 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1342 2 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1343 :
1344 : /*
1345 : * Okay, we've covered the corner cases. The normal calculation is to
1346 : * convert the old measurement to a density (tuples per page), then
1347 : * estimate the number of tuples in the unscanned pages using that figure,
1348 : * and finally add on the number of tuples in the scanned pages.
1349 : */
1350 28 : old_density = old_rel_tuples / old_rel_pages;
1351 28 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1352 28 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1353 28 : return floor(total_tuples + 0.5);
1354 : }
1355 :
1356 :
1357 : /*
1358 : * vac_update_relstats() -- update statistics for one relation
1359 : *
1360 : * Update the whole-relation statistics that are kept in its pg_class
1361 : * row. There are additional stats that will be updated if we are
1362 : * doing ANALYZE, but we always update these stats. This routine works
1363 : * for both index and heap relation entries in pg_class.
1364 : *
1365 : * We violate transaction semantics here by overwriting the rel's
1366 : * existing pg_class tuple with the new values. This is reasonably
1367 : * safe as long as we're sure that the new values are correct whether or
1368 : * not this transaction commits. The reason for doing this is that if
1369 : * we updated these tuples in the usual way, vacuuming pg_class itself
1370 : * wouldn't work very well --- by the time we got done with a vacuum
1371 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1372 : * course, this only works for fixed-size not-null columns, but these are.
1373 : *
1374 : * Another reason for doing it this way is that when we are in a lazy
1375 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1376 : * Somebody vacuuming pg_class might think they could delete a tuple
1377 : * marked with xmin = our xid.
1378 : *
1379 : * In addition to fundamentally nontransactional statistics such as
1380 : * relpages and relallvisible, we try to maintain certain lazily-updated
1381 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1382 : * It's safe to do this in VACUUM, which can't run in parallel with
1383 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1384 : * However, it's *not* safe to do it in an ANALYZE that's within an
1385 : * outer transaction, because for example the current transaction might
1386 : * have dropped the last index; then we'd think relhasindex should be
1387 : * cleared, but if the transaction later rolls back this would be wrong.
1388 : * So we refrain from updating the DDL flags if we're inside an outer
1389 : * transaction. This is OK since postponing the flag maintenance is
1390 : * always allowable.
1391 : *
1392 : * Note: num_tuples should count only *live* tuples, since
1393 : * pg_class.reltuples is defined that way.
1394 : *
1395 : * This routine is shared by VACUUM and ANALYZE.
1396 : */
1397 : void
1398 49848 : vac_update_relstats(Relation relation,
1399 : BlockNumber num_pages, double num_tuples,
1400 : BlockNumber num_all_visible_pages,
1401 : bool hasindex, TransactionId frozenxid,
1402 : MultiXactId minmulti,
1403 : bool *frozenxid_updated, bool *minmulti_updated,
1404 : bool in_outer_xact)
1405 : {
1406 49848 : Oid relid = RelationGetRelid(relation);
1407 : Relation rd;
1408 : HeapTuple ctup;
1409 : Form_pg_class pgcform;
1410 : bool dirty,
1411 : futurexid,
1412 : futuremxid;
1413 : TransactionId oldfrozenxid;
1414 : MultiXactId oldminmulti;
1415 :
1416 49848 : rd = table_open(RelationRelationId, RowExclusiveLock);
1417 :
1418 : /* Fetch a copy of the tuple to scribble on */
1419 49848 : ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1420 49848 : if (!HeapTupleIsValid(ctup))
1421 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1422 : relid);
1423 49848 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1424 :
1425 : /* Apply statistical updates, if any, to copied tuple */
1426 :
1427 49848 : dirty = false;
1428 49848 : if (pgcform->relpages != (int32) num_pages)
1429 : {
1430 7338 : pgcform->relpages = (int32) num_pages;
1431 7338 : dirty = true;
1432 : }
1433 49848 : if (pgcform->reltuples != (float4) num_tuples)
1434 : {
1435 15728 : pgcform->reltuples = (float4) num_tuples;
1436 15728 : dirty = true;
1437 : }
1438 49848 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1439 : {
1440 4494 : pgcform->relallvisible = (int32) num_all_visible_pages;
1441 4494 : dirty = true;
1442 : }
1443 :
1444 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1445 :
1446 49848 : if (!in_outer_xact)
1447 : {
1448 : /*
1449 : * If we didn't find any indexes, reset relhasindex.
1450 : */
1451 49570 : if (pgcform->relhasindex && !hasindex)
1452 : {
1453 18 : pgcform->relhasindex = false;
1454 18 : dirty = true;
1455 : }
1456 :
1457 : /* We also clear relhasrules and relhastriggers if needed */
1458 49570 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1459 : {
1460 0 : pgcform->relhasrules = false;
1461 0 : dirty = true;
1462 : }
1463 49570 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1464 : {
1465 6 : pgcform->relhastriggers = false;
1466 6 : dirty = true;
1467 : }
1468 : }
1469 :
1470 : /*
1471 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1472 : * indicating it has no new data.
1473 : *
1474 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1475 : * stored relfrozenxid is "in the future" then it seems best to assume
1476 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1477 : * This should match vac_update_datfrozenxid() concerning what we consider
1478 : * to be "in the future".
1479 : */
1480 49848 : oldfrozenxid = pgcform->relfrozenxid;
1481 49848 : futurexid = false;
1482 49848 : if (frozenxid_updated)
1483 19070 : *frozenxid_updated = false;
1484 49848 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1485 : {
1486 16674 : bool update = false;
1487 :
1488 16674 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1489 16604 : update = true;
1490 70 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1491 0 : futurexid = update = true;
1492 :
1493 16674 : if (update)
1494 : {
1495 16604 : pgcform->relfrozenxid = frozenxid;
1496 16604 : dirty = true;
1497 16604 : if (frozenxid_updated)
1498 16604 : *frozenxid_updated = true;
1499 : }
1500 : }
1501 :
1502 : /* Similarly for relminmxid */
1503 49848 : oldminmulti = pgcform->relminmxid;
1504 49848 : futuremxid = false;
1505 49848 : if (minmulti_updated)
1506 19070 : *minmulti_updated = false;
1507 49848 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1508 : {
1509 24 : bool update = false;
1510 :
1511 24 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1512 24 : update = true;
1513 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1514 0 : futuremxid = update = true;
1515 :
1516 24 : if (update)
1517 : {
1518 24 : pgcform->relminmxid = minmulti;
1519 24 : dirty = true;
1520 24 : if (minmulti_updated)
1521 24 : *minmulti_updated = true;
1522 : }
1523 : }
1524 :
1525 : /* If anything changed, write out the tuple. */
1526 49848 : if (dirty)
1527 27612 : heap_inplace_update(rd, ctup);
1528 :
1529 49848 : table_close(rd, RowExclusiveLock);
1530 :
1531 49848 : if (futurexid)
1532 0 : ereport(WARNING,
1533 : (errcode(ERRCODE_DATA_CORRUPTED),
1534 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1535 : oldfrozenxid, frozenxid,
1536 : RelationGetRelationName(relation))));
1537 49848 : if (futuremxid)
1538 0 : ereport(WARNING,
1539 : (errcode(ERRCODE_DATA_CORRUPTED),
1540 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1541 : oldminmulti, minmulti,
1542 : RelationGetRelationName(relation))));
1543 49848 : }
1544 :
1545 :
1546 : /*
1547 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1548 : *
1549 : * Update pg_database's datfrozenxid entry for our database to be the
1550 : * minimum of the pg_class.relfrozenxid values.
1551 : *
1552 : * Similarly, update our datminmxid to be the minimum of the
1553 : * pg_class.relminmxid values.
1554 : *
1555 : * If we are able to advance either pg_database value, also try to
1556 : * truncate pg_xact and pg_multixact.
1557 : *
1558 : * We violate transaction semantics here by overwriting the database's
1559 : * existing pg_database tuple with the new values. This is reasonably
1560 : * safe since the new values are correct whether or not this transaction
1561 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1562 : * behind after a VACUUM.
1563 : */
1564 : void
1565 1512 : vac_update_datfrozenxid(void)
1566 : {
1567 : HeapTuple tuple;
1568 : Form_pg_database dbform;
1569 : Relation relation;
1570 : SysScanDesc scan;
1571 : HeapTuple classTup;
1572 : TransactionId newFrozenXid;
1573 : MultiXactId newMinMulti;
1574 : TransactionId lastSaneFrozenXid;
1575 : MultiXactId lastSaneMinMulti;
1576 1512 : bool bogus = false;
1577 1512 : bool dirty = false;
1578 : ScanKeyData key[1];
1579 :
1580 : /*
1581 : * Restrict this task to one backend per database. This avoids race
1582 : * conditions that would move datfrozenxid or datminmxid backward. It
1583 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1584 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1585 : */
1586 1512 : LockDatabaseFrozenIds(ExclusiveLock);
1587 :
1588 : /*
1589 : * Initialize the "min" calculation with
1590 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1591 : * approximation to the minimum relfrozenxid for not-yet-committed
1592 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1593 : * cannot produce a wrong minimum by starting with this.
1594 : */
1595 1512 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1596 :
1597 : /*
1598 : * Similarly, initialize the MultiXact "min" with the value that would be
1599 : * used on pg_class for new tables. See AddNewRelationTuple().
1600 : */
1601 1512 : newMinMulti = GetOldestMultiXactId();
1602 :
1603 : /*
1604 : * Identify the latest relfrozenxid and relminmxid values that we could
1605 : * validly see during the scan. These are conservative values, but it's
1606 : * not really worth trying to be more exact.
1607 : */
1608 1512 : lastSaneFrozenXid = ReadNextTransactionId();
1609 1512 : lastSaneMinMulti = ReadNextMultiXactId();
1610 :
1611 : /*
1612 : * We must seqscan pg_class to find the minimum Xid, because there is no
1613 : * index that can help us here.
1614 : *
1615 : * See vac_truncate_clog() for the race condition to prevent.
1616 : */
1617 1512 : relation = table_open(RelationRelationId, AccessShareLock);
1618 :
1619 1512 : scan = systable_beginscan(relation, InvalidOid, false,
1620 : NULL, 0, NULL);
1621 :
1622 981152 : while ((classTup = systable_getnext(scan)) != NULL)
1623 : {
1624 979640 : volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
1625 979640 : TransactionId relfrozenxid = classForm->relfrozenxid;
1626 979640 : TransactionId relminmxid = classForm->relminmxid;
1627 :
1628 : /*
1629 : * Only consider relations able to hold unfrozen XIDs (anything else
1630 : * should have InvalidTransactionId in relfrozenxid anyway).
1631 : */
1632 979640 : if (classForm->relkind != RELKIND_RELATION &&
1633 737980 : classForm->relkind != RELKIND_MATVIEW &&
1634 735950 : classForm->relkind != RELKIND_TOASTVALUE)
1635 : {
1636 : Assert(!TransactionIdIsValid(relfrozenxid));
1637 : Assert(!MultiXactIdIsValid(relminmxid));
1638 615710 : continue;
1639 : }
1640 :
1641 : /*
1642 : * Some table AMs might not need per-relation xid / multixid horizons.
1643 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1644 : * to not be set (i.e. set to their respective Invalid*Id)
1645 : * independently. Thus validate and compute horizon for each only if
1646 : * set.
1647 : *
1648 : * If things are working properly, no relation should have a
1649 : * relfrozenxid or relminmxid that is "in the future". However, such
1650 : * cases have been known to arise due to bugs in pg_upgrade. If we
1651 : * see any entries that are "in the future", chicken out and don't do
1652 : * anything. This ensures we won't truncate clog & multixact SLRUs
1653 : * before those relations have been scanned and cleaned up.
1654 : */
1655 :
1656 363930 : if (TransactionIdIsValid(relfrozenxid))
1657 : {
1658 : Assert(TransactionIdIsNormal(relfrozenxid));
1659 :
1660 : /* check for values in the future */
1661 363930 : if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
1662 : {
1663 0 : bogus = true;
1664 0 : break;
1665 : }
1666 :
1667 : /* determine new horizon */
1668 363930 : if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
1669 2828 : newFrozenXid = relfrozenxid;
1670 : }
1671 :
1672 363930 : if (MultiXactIdIsValid(relminmxid))
1673 : {
1674 : /* check for values in the future */
1675 363930 : if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
1676 : {
1677 0 : bogus = true;
1678 0 : break;
1679 : }
1680 :
1681 : /* determine new horizon */
1682 363930 : if (MultiXactIdPrecedes(relminmxid, newMinMulti))
1683 194 : newMinMulti = relminmxid;
1684 : }
1685 : }
1686 :
1687 : /* we're done with pg_class */
1688 1512 : systable_endscan(scan);
1689 1512 : table_close(relation, AccessShareLock);
1690 :
1691 : /* chicken out if bogus data found */
1692 1512 : if (bogus)
1693 0 : return;
1694 :
1695 : Assert(TransactionIdIsNormal(newFrozenXid));
1696 : Assert(MultiXactIdIsValid(newMinMulti));
1697 :
1698 : /* Now fetch the pg_database tuple we need to update. */
1699 1512 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1700 :
1701 : /*
1702 : * Get the pg_database tuple to scribble on. Note that this does not
1703 : * directly rely on the syscache to avoid issues with flattened toast
1704 : * values for the in-place update.
1705 : */
1706 1512 : ScanKeyInit(&key[0],
1707 : Anum_pg_database_oid,
1708 : BTEqualStrategyNumber, F_OIDEQ,
1709 : ObjectIdGetDatum(MyDatabaseId));
1710 :
1711 1512 : scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1712 : NULL, 1, key);
1713 1512 : tuple = systable_getnext(scan);
1714 1512 : tuple = heap_copytuple(tuple);
1715 1512 : systable_endscan(scan);
1716 :
1717 1512 : if (!HeapTupleIsValid(tuple))
1718 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1719 :
1720 1512 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1721 :
1722 : /*
1723 : * As in vac_update_relstats(), we ordinarily don't want to let
1724 : * datfrozenxid go backward; but if it's "in the future" then it must be
1725 : * corrupt and it seems best to overwrite it.
1726 : */
1727 1644 : if (dbform->datfrozenxid != newFrozenXid &&
1728 132 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1729 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1730 : {
1731 132 : dbform->datfrozenxid = newFrozenXid;
1732 132 : dirty = true;
1733 : }
1734 : else
1735 1380 : newFrozenXid = dbform->datfrozenxid;
1736 :
1737 : /* Ditto for datminmxid */
1738 1512 : if (dbform->datminmxid != newMinMulti &&
1739 0 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1740 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1741 : {
1742 0 : dbform->datminmxid = newMinMulti;
1743 0 : dirty = true;
1744 : }
1745 : else
1746 1512 : newMinMulti = dbform->datminmxid;
1747 :
1748 1512 : if (dirty)
1749 132 : heap_inplace_update(relation, tuple);
1750 :
1751 1512 : heap_freetuple(tuple);
1752 1512 : table_close(relation, RowExclusiveLock);
1753 :
1754 : /*
1755 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1756 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1757 : * XID-wrap-limit info is stale, since this action will update that too.
1758 : */
1759 1512 : if (dirty || ForceTransactionIdLimitUpdate())
1760 132 : vac_truncate_clog(newFrozenXid, newMinMulti,
1761 : lastSaneFrozenXid, lastSaneMinMulti);
1762 : }
1763 :
1764 :
1765 : /*
1766 : * vac_truncate_clog() -- attempt to truncate the commit log
1767 : *
1768 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1769 : * and use it to truncate the transaction commit log (pg_xact).
1770 : * Also update the XID wrap limit info maintained by varsup.c.
1771 : * Likewise for datminmxid.
1772 : *
1773 : * The passed frozenXID and minMulti are the updated values for my own
1774 : * pg_database entry. They're used to initialize the "min" calculations.
1775 : * The caller also passes the "last sane" XID and MXID, since it has
1776 : * those at hand already.
1777 : *
1778 : * This routine is only invoked when we've managed to change our
1779 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1780 : * XID-wrap-limit info is stale.
1781 : */
1782 : static void
1783 132 : vac_truncate_clog(TransactionId frozenXID,
1784 : MultiXactId minMulti,
1785 : TransactionId lastSaneFrozenXid,
1786 : MultiXactId lastSaneMinMulti)
1787 : {
1788 132 : TransactionId nextXID = ReadNextTransactionId();
1789 : Relation relation;
1790 : TableScanDesc scan;
1791 : HeapTuple tuple;
1792 : Oid oldestxid_datoid;
1793 : Oid minmulti_datoid;
1794 132 : bool bogus = false;
1795 132 : bool frozenAlreadyWrapped = false;
1796 :
1797 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1798 132 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1799 :
1800 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1801 132 : oldestxid_datoid = MyDatabaseId;
1802 132 : minmulti_datoid = MyDatabaseId;
1803 :
1804 : /*
1805 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1806 : *
1807 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1808 : * the values could change while we look at them. Fetch each one just
1809 : * once to ensure sane behavior of the comparison logic. (Here, as in
1810 : * many other places, we assume that fetching or updating an XID in shared
1811 : * storage is atomic.)
1812 : *
1813 : * Note: we need not worry about a race condition with new entries being
1814 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1815 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1816 : * of the interlock against copying a DB containing an active backend.
1817 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1818 : * concurrently modify the datfrozenxid's of different databases, the
1819 : * worst possible outcome is that pg_xact is not truncated as aggressively
1820 : * as it could be.
1821 : */
1822 132 : relation = table_open(DatabaseRelationId, AccessShareLock);
1823 :
1824 132 : scan = table_beginscan_catalog(relation, 0, NULL);
1825 :
1826 390 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1827 : {
1828 258 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1829 258 : TransactionId datfrozenxid = dbform->datfrozenxid;
1830 258 : TransactionId datminmxid = dbform->datminmxid;
1831 :
1832 : Assert(TransactionIdIsNormal(datfrozenxid));
1833 : Assert(MultiXactIdIsValid(datminmxid));
1834 :
1835 : /*
1836 : * If database is in the process of getting dropped, or has been
1837 : * interrupted while doing so, no connections to it are possible
1838 : * anymore. Therefore we don't need to take it into account here.
1839 : * Which is good, because it can't be processed by autovacuum either.
1840 : */
1841 258 : if (database_is_invalid_form((Form_pg_database) dbform))
1842 : {
1843 2 : elog(DEBUG2,
1844 : "skipping invalid database \"%s\" while computing relfrozenxid",
1845 : NameStr(dbform->datname));
1846 2 : continue;
1847 : }
1848 :
1849 : /*
1850 : * If things are working properly, no database should have a
1851 : * datfrozenxid or datminmxid that is "in the future". However, such
1852 : * cases have been known to arise due to bugs in pg_upgrade. If we
1853 : * see any entries that are "in the future", chicken out and don't do
1854 : * anything. This ensures we won't truncate clog before those
1855 : * databases have been scanned and cleaned up. (We will issue the
1856 : * "already wrapped" warning if appropriate, though.)
1857 : */
1858 512 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1859 256 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1860 0 : bogus = true;
1861 :
1862 256 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1863 0 : frozenAlreadyWrapped = true;
1864 256 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1865 : {
1866 58 : frozenXID = datfrozenxid;
1867 58 : oldestxid_datoid = dbform->oid;
1868 : }
1869 :
1870 256 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1871 : {
1872 0 : minMulti = datminmxid;
1873 0 : minmulti_datoid = dbform->oid;
1874 : }
1875 : }
1876 :
1877 132 : table_endscan(scan);
1878 :
1879 132 : table_close(relation, AccessShareLock);
1880 :
1881 : /*
1882 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1883 : * the computed minimum XID might be bogus. This case should now be
1884 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1885 : * test anyway.
1886 : */
1887 132 : if (frozenAlreadyWrapped)
1888 : {
1889 0 : ereport(WARNING,
1890 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1891 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1892 0 : LWLockRelease(WrapLimitsVacuumLock);
1893 0 : return;
1894 : }
1895 :
1896 : /* chicken out if data is bogus in any other way */
1897 132 : if (bogus)
1898 : {
1899 0 : LWLockRelease(WrapLimitsVacuumLock);
1900 0 : return;
1901 : }
1902 :
1903 : /*
1904 : * Advance the oldest value for commit timestamps before truncating, so
1905 : * that if a user requests a timestamp for a transaction we're truncating
1906 : * away right after this point, they get NULL instead of an ugly "file not
1907 : * found" error from slru.c. This doesn't matter for xact/multixact
1908 : * because they are not subject to arbitrary lookups from users.
1909 : */
1910 132 : AdvanceOldestCommitTsXid(frozenXID);
1911 :
1912 : /*
1913 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1914 : */
1915 132 : TruncateCLOG(frozenXID, oldestxid_datoid);
1916 132 : TruncateCommitTs(frozenXID);
1917 132 : TruncateMultiXact(minMulti, minmulti_datoid);
1918 :
1919 : /*
1920 : * Update the wrap limit for GetNewTransactionId and creation of new
1921 : * MultiXactIds. Note: these functions will also signal the postmaster
1922 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1923 : * signaling twice?
1924 : */
1925 132 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1926 132 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1927 :
1928 132 : LWLockRelease(WrapLimitsVacuumLock);
1929 : }
1930 :
1931 :
1932 : /*
1933 : * vacuum_rel() -- vacuum one heap relation
1934 : *
1935 : * relid identifies the relation to vacuum. If relation is supplied,
1936 : * use the name therein for reporting any failure to open/lock the rel;
1937 : * do not use it once we've successfully opened the rel, since it might
1938 : * be stale.
1939 : *
1940 : * Returns true if it's okay to proceed with a requested ANALYZE
1941 : * operation on this table.
1942 : *
1943 : * Doing one heap at a time incurs extra overhead, since we need to
1944 : * check that the heap exists again just before we vacuum it. The
1945 : * reason that we do this is so that vacuuming can be spread across
1946 : * many small transactions. Otherwise, two-phase locking would require
1947 : * us to lock the entire database during one pass of the vacuum cleaner.
1948 : *
1949 : * At entry and exit, we are not inside a transaction.
1950 : */
1951 : static bool
1952 19810 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
1953 : BufferAccessStrategy bstrategy)
1954 : {
1955 : LOCKMODE lmode;
1956 : Relation rel;
1957 : LockRelId lockrelid;
1958 : Oid priv_relid;
1959 : Oid toast_relid;
1960 : Oid save_userid;
1961 : int save_sec_context;
1962 : int save_nestlevel;
1963 :
1964 : Assert(params != NULL);
1965 :
1966 : /* Begin a transaction for vacuuming this relation */
1967 19810 : StartTransactionCommand();
1968 :
1969 19810 : if (!(params->options & VACOPT_FULL))
1970 : {
1971 : /*
1972 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1973 : * other concurrent VACUUMs know that they can ignore this one while
1974 : * determining their OldestXmin. (The reason we don't set it during a
1975 : * full VACUUM is exactly that we may have to run user-defined
1976 : * functions for functional indexes, and we want to make sure that if
1977 : * they use the snapshot set above, any tuples it requires can't get
1978 : * removed from other tables. An index function that depends on the
1979 : * contents of other tables is arguably broken, but we won't break it
1980 : * here by violating transaction semantics.)
1981 : *
1982 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1983 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
1984 : * in an emergency.
1985 : *
1986 : * Note: these flags remain set until CommitTransaction or
1987 : * AbortTransaction. We don't want to clear them until we reset
1988 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
1989 : * might appear to go backwards, which is probably Not Good. (We also
1990 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
1991 : * xmin doesn't become visible ahead of setting the flag.)
1992 : */
1993 19438 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1994 19438 : MyProc->statusFlags |= PROC_IN_VACUUM;
1995 19438 : if (params->is_wraparound)
1996 0 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1997 19438 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
1998 19438 : LWLockRelease(ProcArrayLock);
1999 : }
2000 :
2001 : /*
2002 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2003 : * cutoff xids in local memory wrapping around, and to have updated xmin
2004 : * horizons.
2005 : */
2006 19810 : PushActiveSnapshot(GetTransactionSnapshot());
2007 :
2008 : /*
2009 : * Check for user-requested abort. Note we want this to be inside a
2010 : * transaction, so xact.c doesn't issue useless WARNING.
2011 : */
2012 19810 : CHECK_FOR_INTERRUPTS();
2013 :
2014 : /*
2015 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2016 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2017 : * way, we can be sure that no other backend is vacuuming the same table.
2018 : */
2019 39620 : lmode = (params->options & VACOPT_FULL) ?
2020 19810 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2021 :
2022 : /* open the relation and get the appropriate lock on it */
2023 19810 : rel = vacuum_open_relation(relid, relation, params->options,
2024 19810 : params->log_min_duration >= 0, lmode);
2025 :
2026 : /* leave if relation could not be opened or locked */
2027 19810 : if (!rel)
2028 : {
2029 24 : PopActiveSnapshot();
2030 24 : CommitTransactionCommand();
2031 24 : return false;
2032 : }
2033 :
2034 : /*
2035 : * When recursing to a TOAST table, check privileges on the parent. NB:
2036 : * This is only safe to do because we hold a session lock on the main
2037 : * relation that prevents concurrent deletion.
2038 : */
2039 19786 : if (OidIsValid(params->toast_parent))
2040 6818 : priv_relid = params->toast_parent;
2041 : else
2042 12968 : priv_relid = RelationGetRelid(rel);
2043 :
2044 : /*
2045 : * Check if relation needs to be skipped based on privileges. This check
2046 : * happens also when building the relation list to vacuum for a manual
2047 : * operation, and needs to be done additionally here as VACUUM could
2048 : * happen across multiple transactions where privileges could have changed
2049 : * in-between. Make sure to only generate logs for VACUUM in this case.
2050 : */
2051 19786 : if (!vacuum_is_permitted_for_relation(priv_relid,
2052 : rel->rd_rel,
2053 19786 : params->options & ~VACOPT_ANALYZE))
2054 : {
2055 72 : relation_close(rel, lmode);
2056 72 : PopActiveSnapshot();
2057 72 : CommitTransactionCommand();
2058 72 : return false;
2059 : }
2060 :
2061 : /*
2062 : * Check that it's of a vacuumable relkind.
2063 : */
2064 19714 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2065 6978 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2066 6970 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2067 152 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2068 : {
2069 2 : ereport(WARNING,
2070 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2071 : RelationGetRelationName(rel))));
2072 2 : relation_close(rel, lmode);
2073 2 : PopActiveSnapshot();
2074 2 : CommitTransactionCommand();
2075 2 : return false;
2076 : }
2077 :
2078 : /*
2079 : * Silently ignore tables that are temp tables of other backends ---
2080 : * trying to vacuum these will lead to great unhappiness, since their
2081 : * contents are probably not up-to-date on disk. (We don't throw a
2082 : * warning here; it would just lead to chatter during a database-wide
2083 : * VACUUM.)
2084 : */
2085 19712 : if (RELATION_IS_OTHER_TEMP(rel))
2086 : {
2087 0 : relation_close(rel, lmode);
2088 0 : PopActiveSnapshot();
2089 0 : CommitTransactionCommand();
2090 0 : return false;
2091 : }
2092 :
2093 : /*
2094 : * Silently ignore partitioned tables as there is no work to be done. The
2095 : * useful work is on their child partitions, which have been queued up for
2096 : * us separately.
2097 : */
2098 19712 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2099 : {
2100 150 : relation_close(rel, lmode);
2101 150 : PopActiveSnapshot();
2102 150 : CommitTransactionCommand();
2103 : /* It's OK to proceed with ANALYZE on this table */
2104 150 : return true;
2105 : }
2106 :
2107 : /*
2108 : * Get a session-level lock too. This will protect our access to the
2109 : * relation across multiple transactions, so that we can vacuum the
2110 : * relation's TOAST table (if any) secure in the knowledge that no one is
2111 : * deleting the parent relation.
2112 : *
2113 : * NOTE: this cannot block, even if someone else is waiting for access,
2114 : * because the lock manager knows that both lock requests are from the
2115 : * same process.
2116 : */
2117 19562 : lockrelid = rel->rd_lockInfo.lockRelId;
2118 19562 : LockRelationIdForSession(&lockrelid, lmode);
2119 :
2120 : /*
2121 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2122 : * specified in VACUUM command, or when running in an autovacuum worker
2123 : */
2124 19562 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2125 : {
2126 : StdRdOptIndexCleanup vacuum_index_cleanup;
2127 :
2128 5494 : if (rel->rd_options == NULL)
2129 5238 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2130 : else
2131 256 : vacuum_index_cleanup =
2132 256 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2133 :
2134 5494 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2135 5470 : params->index_cleanup = VACOPTVALUE_AUTO;
2136 24 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2137 12 : params->index_cleanup = VACOPTVALUE_ENABLED;
2138 : else
2139 : {
2140 : Assert(vacuum_index_cleanup ==
2141 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2142 12 : params->index_cleanup = VACOPTVALUE_DISABLED;
2143 : }
2144 : }
2145 :
2146 : /*
2147 : * Set truncate option based on truncate reloption if it wasn't specified
2148 : * in VACUUM command, or when running in an autovacuum worker
2149 : */
2150 19562 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2151 : {
2152 5520 : if (rel->rd_options == NULL ||
2153 256 : ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
2154 5514 : params->truncate = VACOPTVALUE_ENABLED;
2155 : else
2156 6 : params->truncate = VACOPTVALUE_DISABLED;
2157 : }
2158 :
2159 : /*
2160 : * Remember the relation's TOAST relation for later, if the caller asked
2161 : * us to process it. In VACUUM FULL, though, the toast table is
2162 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2163 : * unless PROCESS_MAIN is disabled.
2164 : */
2165 19562 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2166 19396 : ((params->options & VACOPT_FULL) == 0 ||
2167 344 : (params->options & VACOPT_PROCESS_MAIN) == 0))
2168 19058 : toast_relid = rel->rd_rel->reltoastrelid;
2169 : else
2170 504 : toast_relid = InvalidOid;
2171 :
2172 : /*
2173 : * Switch to the table owner's userid, so that any index functions are run
2174 : * as that user. Also lock down security-restricted operations and
2175 : * arrange to make GUC variable changes local to this command. (This is
2176 : * unnecessary, but harmless, for lazy VACUUM.)
2177 : */
2178 19562 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2179 19562 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2180 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2181 19562 : save_nestlevel = NewGUCNestLevel();
2182 19562 : RestrictSearchPath();
2183 :
2184 : /*
2185 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2186 : * relation. Otherwise, we can skip this part. If processing the TOAST
2187 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2188 : * to be set when we recurse to the TOAST table.
2189 : */
2190 19562 : if (params->options & VACOPT_PROCESS_MAIN)
2191 : {
2192 : /*
2193 : * Do the actual work --- either FULL or "lazy" vacuum
2194 : */
2195 19408 : if (params->options & VACOPT_FULL)
2196 : {
2197 338 : ClusterParams cluster_params = {0};
2198 :
2199 : /* close relation before vacuuming, but hold lock until commit */
2200 338 : relation_close(rel, NoLock);
2201 338 : rel = NULL;
2202 :
2203 338 : if ((params->options & VACOPT_VERBOSE) != 0)
2204 2 : cluster_params.options |= CLUOPT_VERBOSE;
2205 :
2206 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2207 338 : cluster_rel(relid, InvalidOid, &cluster_params);
2208 : }
2209 : else
2210 19070 : table_relation_vacuum(rel, params, bstrategy);
2211 : }
2212 :
2213 : /* Roll back any GUC changes executed by index functions */
2214 19556 : AtEOXact_GUC(false, save_nestlevel);
2215 :
2216 : /* Restore userid and security context */
2217 19556 : SetUserIdAndSecContext(save_userid, save_sec_context);
2218 :
2219 : /* all done with this class, but hold lock until commit */
2220 19556 : if (rel)
2221 19224 : relation_close(rel, NoLock);
2222 :
2223 : /*
2224 : * Complete the transaction and free all temporary memory used.
2225 : */
2226 19556 : PopActiveSnapshot();
2227 19556 : CommitTransactionCommand();
2228 :
2229 : /*
2230 : * If the relation has a secondary toast rel, vacuum that too while we
2231 : * still hold the session lock on the main table. Note however that
2232 : * "analyze" will not get done on the toast table. This is good, because
2233 : * the toaster always uses hardcoded index access and statistics are
2234 : * totally unimportant for toast relations.
2235 : */
2236 19556 : if (toast_relid != InvalidOid)
2237 : {
2238 : VacuumParams toast_vacuum_params;
2239 :
2240 : /*
2241 : * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
2242 : * set toast_parent so that the privilege checks are done on the main
2243 : * relation. NB: This is only safe to do because we hold a session
2244 : * lock on the main relation that prevents concurrent deletion.
2245 : */
2246 6818 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2247 6818 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2248 6818 : toast_vacuum_params.toast_parent = relid;
2249 :
2250 6818 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
2251 : }
2252 :
2253 : /*
2254 : * Now release the session-level lock on the main table.
2255 : */
2256 19556 : UnlockRelationIdForSession(&lockrelid, lmode);
2257 :
2258 : /* Report that we really did it. */
2259 19556 : return true;
2260 : }
2261 :
2262 :
2263 : /*
2264 : * Open all the vacuumable indexes of the given relation, obtaining the
2265 : * specified kind of lock on each. Return an array of Relation pointers for
2266 : * the indexes into *Irel, and the number of indexes into *nindexes.
2267 : *
2268 : * We consider an index vacuumable if it is marked insertable (indisready).
2269 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2270 : * execution, and what we have is too corrupt to be processable. We will
2271 : * vacuum even if the index isn't indisvalid; this is important because in a
2272 : * unique index, uniqueness checks will be performed anyway and had better not
2273 : * hit dangling index pointers.
2274 : */
2275 : void
2276 30688 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2277 : int *nindexes, Relation **Irel)
2278 : {
2279 : List *indexoidlist;
2280 : ListCell *indexoidscan;
2281 : int i;
2282 :
2283 : Assert(lockmode != NoLock);
2284 :
2285 30688 : indexoidlist = RelationGetIndexList(relation);
2286 :
2287 : /* allocate enough memory for all indexes */
2288 30688 : i = list_length(indexoidlist);
2289 :
2290 30688 : if (i > 0)
2291 26380 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2292 : else
2293 4308 : *Irel = NULL;
2294 :
2295 : /* collect just the ready indexes */
2296 30688 : i = 0;
2297 74574 : foreach(indexoidscan, indexoidlist)
2298 : {
2299 43886 : Oid indexoid = lfirst_oid(indexoidscan);
2300 : Relation indrel;
2301 :
2302 43886 : indrel = index_open(indexoid, lockmode);
2303 43886 : if (indrel->rd_index->indisready)
2304 43886 : (*Irel)[i++] = indrel;
2305 : else
2306 0 : index_close(indrel, lockmode);
2307 : }
2308 :
2309 30688 : *nindexes = i;
2310 :
2311 30688 : list_free(indexoidlist);
2312 30688 : }
2313 :
2314 : /*
2315 : * Release the resources acquired by vac_open_indexes. Optionally release
2316 : * the locks (say NoLock to keep 'em).
2317 : */
2318 : void
2319 31362 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2320 : {
2321 31362 : if (Irel == NULL)
2322 4988 : return;
2323 :
2324 70248 : while (nindexes--)
2325 : {
2326 43874 : Relation ind = Irel[nindexes];
2327 :
2328 43874 : index_close(ind, lockmode);
2329 : }
2330 26374 : pfree(Irel);
2331 : }
2332 :
2333 : /*
2334 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2335 : *
2336 : * This should be called in each major loop of VACUUM processing,
2337 : * typically once per page processed.
2338 : */
2339 : void
2340 59492842 : vacuum_delay_point(void)
2341 : {
2342 59492842 : double msec = 0;
2343 :
2344 : /* Always check for interrupts */
2345 59492842 : CHECK_FOR_INTERRUPTS();
2346 :
2347 59492842 : if (InterruptPending ||
2348 59492842 : (!VacuumCostActive && !ConfigReloadPending))
2349 59135958 : return;
2350 :
2351 : /*
2352 : * Autovacuum workers should reload the configuration file if requested.
2353 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2354 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2355 : * vacuumed or analyzed.
2356 : */
2357 356884 : if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
2358 : {
2359 0 : ConfigReloadPending = false;
2360 0 : ProcessConfigFile(PGC_SIGHUP);
2361 0 : VacuumUpdateCosts();
2362 : }
2363 :
2364 : /*
2365 : * If we disabled cost-based delays after reloading the config file,
2366 : * return.
2367 : */
2368 356884 : if (!VacuumCostActive)
2369 0 : return;
2370 :
2371 : /*
2372 : * For parallel vacuum, the delay is computed based on the shared cost
2373 : * balance. See compute_parallel_delay.
2374 : */
2375 356884 : if (VacuumSharedCostBalance != NULL)
2376 0 : msec = compute_parallel_delay();
2377 356884 : else if (VacuumCostBalance >= vacuum_cost_limit)
2378 16 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2379 :
2380 : /* Nap if appropriate */
2381 356884 : if (msec > 0)
2382 : {
2383 16 : if (msec > vacuum_cost_delay * 4)
2384 0 : msec = vacuum_cost_delay * 4;
2385 :
2386 16 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2387 16 : pg_usleep(msec * 1000);
2388 16 : pgstat_report_wait_end();
2389 :
2390 : /*
2391 : * We don't want to ignore postmaster death during very long vacuums
2392 : * with vacuum_cost_delay configured. We can't use the usual
2393 : * WaitLatch() approach here because we want microsecond-based sleep
2394 : * durations above.
2395 : */
2396 16 : if (IsUnderPostmaster && !PostmasterIsAlive())
2397 0 : exit(1);
2398 :
2399 16 : VacuumCostBalance = 0;
2400 :
2401 : /*
2402 : * Balance and update limit values for autovacuum workers. We must do
2403 : * this periodically, as the number of workers across which we are
2404 : * balancing the limit may have changed.
2405 : *
2406 : * TODO: There may be better criteria for determining when to do this
2407 : * besides "check after napping".
2408 : */
2409 16 : AutoVacuumUpdateCostLimit();
2410 :
2411 : /* Might have gotten an interrupt while sleeping */
2412 16 : CHECK_FOR_INTERRUPTS();
2413 : }
2414 : }
2415 :
2416 : /*
2417 : * Computes the vacuum delay for parallel workers.
2418 : *
2419 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2420 : * worker to sleep in proportion to the share of work it's done. We achieve this
2421 : * by allowing all parallel vacuum workers including the leader process to
2422 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2423 : * We allow each worker to update it as and when it has incurred any cost and
2424 : * then based on that decide whether it needs to sleep. We compute the time
2425 : * to sleep for a worker based on the cost it has incurred
2426 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2427 : * that amount. This avoids putting to sleep those workers which have done less
2428 : * I/O than other workers and therefore ensure that workers
2429 : * which are doing more I/O got throttled more.
2430 : *
2431 : * We allow a worker to sleep only if it has performed I/O above a certain
2432 : * threshold, which is calculated based on the number of active workers
2433 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2434 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2435 : * the required throttling if we force a worker that has done more than 50%
2436 : * of its share of work to sleep.
2437 : */
2438 : static double
2439 0 : compute_parallel_delay(void)
2440 : {
2441 0 : double msec = 0;
2442 : uint32 shared_balance;
2443 : int nworkers;
2444 :
2445 : /* Parallel vacuum must be active */
2446 : Assert(VacuumSharedCostBalance);
2447 :
2448 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2449 :
2450 : /* At least count itself */
2451 : Assert(nworkers >= 1);
2452 :
2453 : /* Update the shared cost balance value atomically */
2454 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2455 :
2456 : /* Compute the total local balance for the current worker */
2457 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2458 :
2459 0 : if ((shared_balance >= vacuum_cost_limit) &&
2460 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2461 : {
2462 : /* Compute sleep time based on the local cost balance */
2463 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2464 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2465 0 : VacuumCostBalanceLocal = 0;
2466 : }
2467 :
2468 : /*
2469 : * Reset the local balance as we accumulated it into the shared value.
2470 : */
2471 0 : VacuumCostBalance = 0;
2472 :
2473 0 : return msec;
2474 : }
2475 :
2476 : /*
2477 : * A wrapper function of defGetBoolean().
2478 : *
2479 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2480 : * of true and false.
2481 : */
2482 : static VacOptValue
2483 316 : get_vacoptval_from_boolean(DefElem *def)
2484 : {
2485 316 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2486 : }
2487 :
2488 : /*
2489 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2490 : *
2491 : * Returns bulk delete stats derived from input stats
2492 : */
2493 : IndexBulkDeleteResult *
2494 1666 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2495 : TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
2496 : {
2497 : /* Do bulk deletion */
2498 1666 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2499 : (void *) dead_items);
2500 :
2501 1666 : ereport(ivinfo->message_level,
2502 : (errmsg("scanned index \"%s\" to remove %lld row versions",
2503 : RelationGetRelationName(ivinfo->index),
2504 : (long long) dead_items_info->num_items)));
2505 :
2506 1666 : return istat;
2507 : }
2508 :
2509 : /*
2510 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2511 : *
2512 : * Returns bulk delete stats derived from input stats
2513 : */
2514 : IndexBulkDeleteResult *
2515 26566 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2516 : {
2517 26566 : istat = index_vacuum_cleanup(ivinfo, istat);
2518 :
2519 26566 : if (istat)
2520 1896 : ereport(ivinfo->message_level,
2521 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2522 : RelationGetRelationName(ivinfo->index),
2523 : istat->num_index_tuples,
2524 : istat->num_pages),
2525 : errdetail("%.0f index row versions were removed.\n"
2526 : "%u index pages were newly deleted.\n"
2527 : "%u index pages are currently deleted, of which %u are currently reusable.",
2528 : istat->tuples_removed,
2529 : istat->pages_newly_deleted,
2530 : istat->pages_deleted, istat->pages_free)));
2531 :
2532 26566 : return istat;
2533 : }
2534 :
2535 : /*
2536 : * vac_tid_reaped() -- is a particular tid deletable?
2537 : *
2538 : * This has the right signature to be an IndexBulkDeleteCallback.
2539 : */
2540 : static bool
2541 4467172 : vac_tid_reaped(ItemPointer itemptr, void *state)
2542 : {
2543 4467172 : TidStore *dead_items = (TidStore *) state;
2544 :
2545 4467172 : return TidStoreIsMember(dead_items, itemptr);
2546 : }
|