Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * vacuum.c
4 : * The postgres vacuum cleaner.
5 : *
6 : * This file includes (a) control and dispatch code for VACUUM and ANALYZE
7 : * commands, (b) code to compute various vacuum thresholds, and (c) index
8 : * vacuum code.
9 : *
10 : * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
11 : * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
12 : * CLUSTER, handled in cluster.c.
13 : *
14 : *
15 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16 : * Portions Copyright (c) 1994, Regents of the University of California
17 : *
18 : *
19 : * IDENTIFICATION
20 : * src/backend/commands/vacuum.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include <math.h>
27 :
28 : #include "access/clog.h"
29 : #include "access/commit_ts.h"
30 : #include "access/genam.h"
31 : #include "access/heapam.h"
32 : #include "access/htup_details.h"
33 : #include "access/multixact.h"
34 : #include "access/tableam.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "catalog/namespace.h"
38 : #include "catalog/index.h"
39 : #include "catalog/pg_database.h"
40 : #include "catalog/pg_inherits.h"
41 : #include "catalog/pg_namespace.h"
42 : #include "commands/cluster.h"
43 : #include "commands/defrem.h"
44 : #include "commands/tablecmds.h"
45 : #include "commands/vacuum.h"
46 : #include "miscadmin.h"
47 : #include "nodes/makefuncs.h"
48 : #include "pgstat.h"
49 : #include "postmaster/autovacuum.h"
50 : #include "postmaster/bgworker_internals.h"
51 : #include "postmaster/interrupt.h"
52 : #include "storage/bufmgr.h"
53 : #include "storage/lmgr.h"
54 : #include "storage/pmsignal.h"
55 : #include "storage/proc.h"
56 : #include "storage/procarray.h"
57 : #include "utils/acl.h"
58 : #include "utils/fmgroids.h"
59 : #include "utils/guc.h"
60 : #include "utils/guc_hooks.h"
61 : #include "utils/memutils.h"
62 : #include "utils/pg_rusage.h"
63 : #include "utils/snapmgr.h"
64 : #include "utils/syscache.h"
65 :
66 :
67 : /*
68 : * GUC parameters
69 : */
70 : int vacuum_freeze_min_age;
71 : int vacuum_freeze_table_age;
72 : int vacuum_multixact_freeze_min_age;
73 : int vacuum_multixact_freeze_table_age;
74 : int vacuum_failsafe_age;
75 : int vacuum_multixact_failsafe_age;
76 :
77 : /*
78 : * Variables for cost-based vacuum delay. The defaults differ between
79 : * autovacuum and vacuum. They should be set with the appropriate GUC value in
80 : * vacuum code. They are initialized here to the defaults for client backends
81 : * executing VACUUM or ANALYZE.
82 : */
83 : double vacuum_cost_delay = 0;
84 : int vacuum_cost_limit = 200;
85 :
86 : /*
87 : * VacuumFailsafeActive is a defined as a global so that we can determine
88 : * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
89 : * If failsafe mode has been engaged, we will not re-enable cost-based delay
90 : * for the table until after vacuuming has completed, regardless of other
91 : * settings.
92 : *
93 : * Only VACUUM code should inspect this variable and only table access methods
94 : * should set it to true. In Table AM-agnostic VACUUM code, this variable is
95 : * inspected to determine whether or not to allow cost-based delays. Table AMs
96 : * are free to set it if they desire this behavior, but it is false by default
97 : * and reset to false in between vacuuming each relation.
98 : */
99 : bool VacuumFailsafeActive = false;
100 :
101 : /*
102 : * Variables for cost-based parallel vacuum. See comments atop
103 : * compute_parallel_delay to understand how it works.
104 : */
105 : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
106 : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
107 : int VacuumCostBalanceLocal = 0;
108 :
109 : /* non-export function prototypes */
110 : static List *expand_vacuum_rel(VacuumRelation *vrel,
111 : MemoryContext vac_context, int options);
112 : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
113 : static void vac_truncate_clog(TransactionId frozenXID,
114 : MultiXactId minMulti,
115 : TransactionId lastSaneFrozenXid,
116 : MultiXactId lastSaneMinMulti);
117 : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
118 : bool skip_privs, BufferAccessStrategy bstrategy);
119 : static double compute_parallel_delay(void);
120 : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
121 : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
122 : static int vac_cmp_itemptr(const void *left, const void *right);
123 :
124 : /*
125 : * GUC check function to ensure GUC value specified is within the allowable
126 : * range.
127 : */
128 : bool
129 3698 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
130 : GucSource source)
131 : {
132 : /* Value upper and lower hard limits are inclusive */
133 3698 : if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
134 3698 : *newval <= MAX_BAS_VAC_RING_SIZE_KB))
135 3698 : return true;
136 :
137 : /* Value does not fall within any allowable range */
138 0 : GUC_check_errdetail("\"vacuum_buffer_usage_limit\" must be 0 or between %d kB and %d kB",
139 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
140 :
141 0 : return false;
142 : }
143 :
144 : /*
145 : * Primary entry point for manual VACUUM and ANALYZE commands
146 : *
147 : * This is mainly a preparation wrapper for the real operations that will
148 : * happen in vacuum().
149 : */
150 : void
151 10064 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
152 : {
153 : VacuumParams params;
154 10064 : BufferAccessStrategy bstrategy = NULL;
155 10064 : bool verbose = false;
156 10064 : bool skip_locked = false;
157 10064 : bool analyze = false;
158 10064 : bool freeze = false;
159 10064 : bool full = false;
160 10064 : bool disable_page_skipping = false;
161 10064 : bool process_main = true;
162 10064 : bool process_toast = true;
163 : int ring_size;
164 10064 : bool skip_database_stats = false;
165 10064 : bool only_database_stats = false;
166 : MemoryContext vac_context;
167 : ListCell *lc;
168 :
169 : /* index_cleanup and truncate values unspecified for now */
170 10064 : params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
171 10064 : params.truncate = VACOPTVALUE_UNSPECIFIED;
172 :
173 : /* By default parallel vacuum is enabled */
174 10064 : params.nworkers = 0;
175 :
176 : /*
177 : * Set this to an invalid value so it is clear whether or not a
178 : * BUFFER_USAGE_LIMIT was specified when making the access strategy.
179 : */
180 10064 : ring_size = -1;
181 :
182 : /* Parse options list */
183 16574 : foreach(lc, vacstmt->options)
184 : {
185 6546 : DefElem *opt = (DefElem *) lfirst(lc);
186 :
187 : /* Parse common options for VACUUM and ANALYZE */
188 6546 : if (strcmp(opt->defname, "verbose") == 0)
189 28 : verbose = defGetBoolean(opt);
190 6518 : else if (strcmp(opt->defname, "skip_locked") == 0)
191 334 : skip_locked = defGetBoolean(opt);
192 6184 : else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
193 : {
194 : const char *hintmsg;
195 : int result;
196 : char *vac_buffer_size;
197 :
198 54 : vac_buffer_size = defGetString(opt);
199 :
200 : /*
201 : * Check that the specified value is valid and the size falls
202 : * within the hard upper and lower limits if it is not 0.
203 : */
204 54 : if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
205 48 : (result != 0 &&
206 36 : (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
207 : {
208 18 : ereport(ERROR,
209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 : errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
211 : MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
212 : hintmsg ? errhint("%s", _(hintmsg)) : 0));
213 : }
214 :
215 36 : ring_size = result;
216 : }
217 6130 : else if (!vacstmt->is_vacuumcmd)
218 6 : ereport(ERROR,
219 : (errcode(ERRCODE_SYNTAX_ERROR),
220 : errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
221 : parser_errposition(pstate, opt->location)));
222 :
223 : /* Parse options available on VACUUM */
224 6124 : else if (strcmp(opt->defname, "analyze") == 0)
225 820 : analyze = defGetBoolean(opt);
226 5304 : else if (strcmp(opt->defname, "freeze") == 0)
227 1120 : freeze = defGetBoolean(opt);
228 4184 : else if (strcmp(opt->defname, "full") == 0)
229 346 : full = defGetBoolean(opt);
230 3838 : else if (strcmp(opt->defname, "disable_page_skipping") == 0)
231 180 : disable_page_skipping = defGetBoolean(opt);
232 3658 : else if (strcmp(opt->defname, "index_cleanup") == 0)
233 : {
234 : /* Interpret no string as the default, which is 'auto' */
235 172 : if (!opt->arg)
236 0 : params.index_cleanup = VACOPTVALUE_AUTO;
237 : else
238 : {
239 172 : char *sval = defGetString(opt);
240 :
241 : /* Try matching on 'auto' string, or fall back on boolean */
242 172 : if (pg_strcasecmp(sval, "auto") == 0)
243 6 : params.index_cleanup = VACOPTVALUE_AUTO;
244 : else
245 166 : params.index_cleanup = get_vacoptval_from_boolean(opt);
246 : }
247 : }
248 3486 : else if (strcmp(opt->defname, "process_main") == 0)
249 154 : process_main = defGetBoolean(opt);
250 3332 : else if (strcmp(opt->defname, "process_toast") == 0)
251 160 : process_toast = defGetBoolean(opt);
252 3172 : else if (strcmp(opt->defname, "truncate") == 0)
253 148 : params.truncate = get_vacoptval_from_boolean(opt);
254 3024 : else if (strcmp(opt->defname, "parallel") == 0)
255 : {
256 338 : if (opt->arg == NULL)
257 : {
258 6 : ereport(ERROR,
259 : (errcode(ERRCODE_SYNTAX_ERROR),
260 : errmsg("parallel option requires a value between 0 and %d",
261 : MAX_PARALLEL_WORKER_LIMIT),
262 : parser_errposition(pstate, opt->location)));
263 : }
264 : else
265 : {
266 : int nworkers;
267 :
268 332 : nworkers = defGetInt32(opt);
269 332 : if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
270 6 : ereport(ERROR,
271 : (errcode(ERRCODE_SYNTAX_ERROR),
272 : errmsg("parallel workers for vacuum must be between 0 and %d",
273 : MAX_PARALLEL_WORKER_LIMIT),
274 : parser_errposition(pstate, opt->location)));
275 :
276 : /*
277 : * Disable parallel vacuum, if user has specified parallel
278 : * degree as zero.
279 : */
280 326 : if (nworkers == 0)
281 154 : params.nworkers = -1;
282 : else
283 172 : params.nworkers = nworkers;
284 : }
285 : }
286 2686 : else if (strcmp(opt->defname, "skip_database_stats") == 0)
287 2604 : skip_database_stats = defGetBoolean(opt);
288 82 : else if (strcmp(opt->defname, "only_database_stats") == 0)
289 82 : only_database_stats = defGetBoolean(opt);
290 : else
291 0 : ereport(ERROR,
292 : (errcode(ERRCODE_SYNTAX_ERROR),
293 : errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
294 : parser_errposition(pstate, opt->location)));
295 : }
296 :
297 : /* Set vacuum options */
298 10028 : params.options =
299 10028 : (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
300 10028 : (verbose ? VACOPT_VERBOSE : 0) |
301 10028 : (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
302 10028 : (analyze ? VACOPT_ANALYZE : 0) |
303 10028 : (freeze ? VACOPT_FREEZE : 0) |
304 10028 : (full ? VACOPT_FULL : 0) |
305 10028 : (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
306 10028 : (process_main ? VACOPT_PROCESS_MAIN : 0) |
307 10028 : (process_toast ? VACOPT_PROCESS_TOAST : 0) |
308 10028 : (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
309 10028 : (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
310 :
311 : /* sanity checks on options */
312 : Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
313 : Assert((params.options & VACOPT_VACUUM) ||
314 : !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
315 :
316 10028 : if ((params.options & VACOPT_FULL) && params.nworkers > 0)
317 6 : ereport(ERROR,
318 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
319 : errmsg("VACUUM FULL cannot be performed in parallel")));
320 :
321 : /*
322 : * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
323 : * ERROR for that case. VACUUM (FULL, ANALYZE) does make use of it, so
324 : * we'll permit that.
325 : */
326 10022 : if (ring_size != -1 && (params.options & VACOPT_FULL) &&
327 6 : !(params.options & VACOPT_ANALYZE))
328 6 : ereport(ERROR,
329 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
330 : errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
331 :
332 : /*
333 : * Make sure VACOPT_ANALYZE is specified if any column lists are present.
334 : */
335 10016 : if (!(params.options & VACOPT_ANALYZE))
336 : {
337 7978 : foreach(lc, vacstmt->rels)
338 : {
339 3668 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
340 :
341 3668 : if (vrel->va_cols != NIL)
342 6 : ereport(ERROR,
343 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
344 : errmsg("ANALYZE option must be specified when a column list is provided")));
345 : }
346 : }
347 :
348 :
349 : /*
350 : * Sanity check DISABLE_PAGE_SKIPPING option.
351 : */
352 10010 : if ((params.options & VACOPT_FULL) != 0 &&
353 322 : (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
354 0 : ereport(ERROR,
355 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
356 : errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
357 :
358 : /* sanity check for PROCESS_TOAST */
359 10010 : if ((params.options & VACOPT_FULL) != 0 &&
360 322 : (params.options & VACOPT_PROCESS_TOAST) == 0)
361 6 : ereport(ERROR,
362 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
363 : errmsg("PROCESS_TOAST required with VACUUM FULL")));
364 :
365 : /* sanity check for ONLY_DATABASE_STATS */
366 10004 : if (params.options & VACOPT_ONLY_DATABASE_STATS)
367 : {
368 : Assert(params.options & VACOPT_VACUUM);
369 82 : if (vacstmt->rels != NIL)
370 6 : ereport(ERROR,
371 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
372 : errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
373 : /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
374 76 : if (params.options & ~(VACOPT_VACUUM |
375 : VACOPT_VERBOSE |
376 : VACOPT_PROCESS_MAIN |
377 : VACOPT_PROCESS_TOAST |
378 : VACOPT_ONLY_DATABASE_STATS))
379 0 : ereport(ERROR,
380 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
381 : errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
382 : }
383 :
384 : /*
385 : * All freeze ages are zero if the FREEZE option is given; otherwise pass
386 : * them as -1 which means to use the default values.
387 : */
388 9998 : if (params.options & VACOPT_FREEZE)
389 : {
390 1120 : params.freeze_min_age = 0;
391 1120 : params.freeze_table_age = 0;
392 1120 : params.multixact_freeze_min_age = 0;
393 1120 : params.multixact_freeze_table_age = 0;
394 : }
395 : else
396 : {
397 8878 : params.freeze_min_age = -1;
398 8878 : params.freeze_table_age = -1;
399 8878 : params.multixact_freeze_min_age = -1;
400 8878 : params.multixact_freeze_table_age = -1;
401 : }
402 :
403 : /* user-invoked vacuum is never "for wraparound" */
404 9998 : params.is_wraparound = false;
405 :
406 : /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
407 9998 : params.log_min_duration = -1;
408 :
409 : /*
410 : * Create special memory context for cross-transaction storage.
411 : *
412 : * Since it is a child of PortalContext, it will go away eventually even
413 : * if we suffer an error; there's no need for special abort cleanup logic.
414 : */
415 9998 : vac_context = AllocSetContextCreate(PortalContext,
416 : "Vacuum",
417 : ALLOCSET_DEFAULT_SIZES);
418 :
419 : /*
420 : * Make a buffer strategy object in the cross-transaction memory context.
421 : * We needn't bother making this for VACUUM (FULL) or VACUUM
422 : * (ONLY_DATABASE_STATS) as they'll not make use of it. VACUUM (FULL,
423 : * ANALYZE) is possible, so we'd better ensure that we make a strategy
424 : * when we see ANALYZE.
425 : */
426 9998 : if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
427 392 : VACOPT_FULL)) == 0 ||
428 392 : (params.options & VACOPT_ANALYZE) != 0)
429 : {
430 :
431 9612 : MemoryContext old_context = MemoryContextSwitchTo(vac_context);
432 :
433 : Assert(ring_size >= -1);
434 :
435 : /*
436 : * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
437 : * command, it overrides the value of VacuumBufferUsageLimit. Either
438 : * value may be 0, in which case GetAccessStrategyWithSize() will
439 : * return NULL, effectively allowing full use of shared buffers.
440 : */
441 9612 : if (ring_size == -1)
442 9582 : ring_size = VacuumBufferUsageLimit;
443 :
444 9612 : bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
445 :
446 9612 : MemoryContextSwitchTo(old_context);
447 : }
448 :
449 : /* Now go through the common routine */
450 9998 : vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel);
451 :
452 : /* Finally, clean up the vacuum memory context */
453 9872 : MemoryContextDelete(vac_context);
454 9872 : }
455 :
456 : /*
457 : * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
458 : *
459 : * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
460 : * we process all relevant tables in the database. For each VacuumRelation,
461 : * if a valid OID is supplied, the table with that OID is what to process;
462 : * otherwise, the VacuumRelation's RangeVar indicates what to process.
463 : *
464 : * params contains a set of parameters that can be used to customize the
465 : * behavior.
466 : *
467 : * bstrategy may be passed in as NULL when the caller does not want to
468 : * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
469 : * otherwise, the caller must build a BufferAccessStrategy with the number of
470 : * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
471 : * using.
472 : *
473 : * isTopLevel should be passed down from ProcessUtility.
474 : *
475 : * It is the caller's responsibility that all parameters are allocated in a
476 : * memory context that will not disappear at transaction commit.
477 : */
478 : void
479 10358 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
480 : MemoryContext vac_context, bool isTopLevel)
481 : {
482 : static bool in_vacuum = false;
483 :
484 : const char *stmttype;
485 : volatile bool in_outer_xact,
486 : use_own_xacts;
487 :
488 : Assert(params != NULL);
489 :
490 10358 : stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
491 :
492 : /*
493 : * We cannot run VACUUM inside a user transaction block; if we were inside
494 : * a transaction, then our commit- and start-transaction-command calls
495 : * would not have the intended effect! There are numerous other subtle
496 : * dependencies on this, too.
497 : *
498 : * ANALYZE (without VACUUM) can run either way.
499 : */
500 10358 : if (params->options & VACOPT_VACUUM)
501 : {
502 5304 : PreventInTransactionBlock(isTopLevel, stmttype);
503 5292 : in_outer_xact = false;
504 : }
505 : else
506 5054 : in_outer_xact = IsInTransactionBlock(isTopLevel);
507 :
508 : /*
509 : * Check for and disallow recursive calls. This could happen when VACUUM
510 : * FULL or ANALYZE calls a hostile index expression that itself calls
511 : * ANALYZE.
512 : */
513 10346 : if (in_vacuum)
514 12 : ereport(ERROR,
515 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
516 : errmsg("%s cannot be executed from VACUUM or ANALYZE",
517 : stmttype)));
518 :
519 : /*
520 : * Build list of relation(s) to process, putting any new data in
521 : * vac_context for safekeeping.
522 : */
523 10334 : if (params->options & VACOPT_ONLY_DATABASE_STATS)
524 : {
525 : /* We don't process any tables in this case */
526 : Assert(relations == NIL);
527 : }
528 10258 : else if (relations != NIL)
529 : {
530 9044 : List *newrels = NIL;
531 : ListCell *lc;
532 :
533 18176 : foreach(lc, relations)
534 : {
535 9168 : VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
536 : List *sublist;
537 : MemoryContext old_context;
538 :
539 9168 : sublist = expand_vacuum_rel(vrel, vac_context, params->options);
540 9132 : old_context = MemoryContextSwitchTo(vac_context);
541 9132 : newrels = list_concat(newrels, sublist);
542 9132 : MemoryContextSwitchTo(old_context);
543 : }
544 9008 : relations = newrels;
545 : }
546 : else
547 1214 : relations = get_all_vacuum_rels(vac_context, params->options);
548 :
549 : /*
550 : * Decide whether we need to start/commit our own transactions.
551 : *
552 : * For VACUUM (with or without ANALYZE): always do so, so that we can
553 : * release locks as soon as possible. (We could possibly use the outer
554 : * transaction for a one-table VACUUM, but handling TOAST tables would be
555 : * problematic.)
556 : *
557 : * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
558 : * start/commit our own transactions. Also, there's no need to do so if
559 : * only processing one relation. For multiple relations when not within a
560 : * transaction block, and also in an autovacuum worker, use own
561 : * transactions so we can release locks sooner.
562 : */
563 10298 : if (params->options & VACOPT_VACUUM)
564 5280 : use_own_xacts = true;
565 : else
566 : {
567 : Assert(params->options & VACOPT_ANALYZE);
568 5018 : if (IsAutoVacuumWorkerProcess())
569 174 : use_own_xacts = true;
570 4844 : else if (in_outer_xact)
571 208 : use_own_xacts = false;
572 4636 : else if (list_length(relations) > 1)
573 1152 : use_own_xacts = true;
574 : else
575 3484 : use_own_xacts = false;
576 : }
577 :
578 : /*
579 : * vacuum_rel expects to be entered with no transaction active; it will
580 : * start and commit its own transaction. But we are called by an SQL
581 : * command, and so we are executing inside a transaction already. We
582 : * commit the transaction started in PostgresMain() here, and start
583 : * another one before exiting to match the commit waiting for us back in
584 : * PostgresMain().
585 : */
586 10298 : if (use_own_xacts)
587 : {
588 : Assert(!in_outer_xact);
589 :
590 : /* ActiveSnapshot is not set by autovacuum */
591 6606 : if (ActiveSnapshotSet())
592 6246 : PopActiveSnapshot();
593 :
594 : /* matches the StartTransaction in PostgresMain() */
595 6606 : CommitTransactionCommand();
596 : }
597 :
598 : /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
599 10298 : PG_TRY();
600 : {
601 : ListCell *cur;
602 :
603 10298 : in_vacuum = true;
604 10298 : VacuumFailsafeActive = false;
605 10298 : VacuumUpdateCosts();
606 10298 : VacuumCostBalance = 0;
607 10298 : VacuumPageHit = 0;
608 10298 : VacuumPageMiss = 0;
609 10298 : VacuumPageDirty = 0;
610 10298 : VacuumCostBalanceLocal = 0;
611 10298 : VacuumSharedCostBalance = NULL;
612 10298 : VacuumActiveNWorkers = NULL;
613 :
614 : /*
615 : * Loop to process each selected relation.
616 : */
617 104266 : foreach(cur, relations)
618 : {
619 94034 : VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
620 :
621 94034 : if (params->options & VACOPT_VACUUM)
622 : {
623 47120 : if (!vacuum_rel(vrel->oid, vrel->relation, params, false,
624 : bstrategy))
625 62 : continue;
626 : }
627 :
628 93966 : if (params->options & VACOPT_ANALYZE)
629 : {
630 : /*
631 : * If using separate xacts, start one for analyze. Otherwise,
632 : * we can use the outer transaction.
633 : */
634 47960 : if (use_own_xacts)
635 : {
636 44312 : StartTransactionCommand();
637 : /* functions in indexes may want a snapshot set */
638 44312 : PushActiveSnapshot(GetTransactionSnapshot());
639 : }
640 :
641 47960 : analyze_rel(vrel->oid, vrel->relation, params,
642 : vrel->va_cols, in_outer_xact, bstrategy);
643 :
644 47900 : if (use_own_xacts)
645 : {
646 44274 : PopActiveSnapshot();
647 44274 : CommitTransactionCommand();
648 : }
649 : else
650 : {
651 : /*
652 : * If we're not using separate xacts, better separate the
653 : * ANALYZE actions with CCIs. This avoids trouble if user
654 : * says "ANALYZE t, t".
655 : */
656 3626 : CommandCounterIncrement();
657 : }
658 : }
659 :
660 : /*
661 : * Ensure VacuumFailsafeActive has been reset before vacuuming the
662 : * next relation.
663 : */
664 93906 : VacuumFailsafeActive = false;
665 : }
666 : }
667 66 : PG_FINALLY();
668 : {
669 10298 : in_vacuum = false;
670 10298 : VacuumCostActive = false;
671 10298 : VacuumFailsafeActive = false;
672 10298 : VacuumCostBalance = 0;
673 : }
674 10298 : PG_END_TRY();
675 :
676 : /*
677 : * Finish up processing.
678 : */
679 10232 : if (use_own_xacts)
680 : {
681 : /* here, we are not in a transaction */
682 :
683 : /*
684 : * This matches the CommitTransaction waiting for us in
685 : * PostgresMain().
686 : */
687 6562 : StartTransactionCommand();
688 : }
689 :
690 10232 : if ((params->options & VACOPT_VACUUM) &&
691 5248 : !(params->options & VACOPT_SKIP_DATABASE_STATS))
692 : {
693 : /*
694 : * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
695 : */
696 2460 : vac_update_datfrozenxid();
697 : }
698 :
699 10232 : }
700 :
701 : /*
702 : * Check if the current user has privileges to vacuum or analyze the relation.
703 : * If not, issue a WARNING log message and return false to let the caller
704 : * decide what to do with this relation. This routine is used to decide if a
705 : * relation can be processed for VACUUM or ANALYZE.
706 : */
707 : bool
708 187110 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
709 : bits32 options)
710 : {
711 : char *relname;
712 :
713 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
714 :
715 : /*----------
716 : * A role has privileges to vacuum or analyze the relation if any of the
717 : * following are true:
718 : * - the role is a superuser
719 : * - the role owns the relation
720 : * - the role owns the current database and the relation is not shared
721 : * - the role has been granted the MAINTAIN privilege on the relation
722 : * - the role has privileges to vacuum/analyze any of the relation's
723 : * partition ancestors
724 : *----------
725 : */
726 217656 : if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) && !reltuple->relisshared) ||
727 30976 : pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK ||
728 430 : has_partition_ancestor_privs(relid, GetUserId(), ACL_MAINTAIN))
729 186878 : return true;
730 :
731 232 : relname = NameStr(reltuple->relname);
732 :
733 232 : if ((options & VACOPT_VACUUM) != 0)
734 : {
735 152 : ereport(WARNING,
736 : (errmsg("permission denied to vacuum \"%s\", skipping it",
737 : relname)));
738 :
739 : /*
740 : * For VACUUM ANALYZE, both logs could show up, but just generate
741 : * information for VACUUM as that would be the first one to be
742 : * processed.
743 : */
744 152 : return false;
745 : }
746 :
747 80 : if ((options & VACOPT_ANALYZE) != 0)
748 80 : ereport(WARNING,
749 : (errmsg("permission denied to analyze \"%s\", skipping it",
750 : relname)));
751 :
752 80 : return false;
753 : }
754 :
755 :
756 : /*
757 : * vacuum_open_relation
758 : *
759 : * This routine is used for attempting to open and lock a relation which
760 : * is going to be vacuumed or analyzed. If the relation cannot be opened
761 : * or locked, a log is emitted if possible.
762 : */
763 : Relation
764 122150 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
765 : bool verbose, LOCKMODE lmode)
766 : {
767 : Relation rel;
768 122150 : bool rel_lock = true;
769 : int elevel;
770 :
771 : Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
772 :
773 : /*
774 : * Open the relation and get the appropriate lock on it.
775 : *
776 : * There's a race condition here: the relation may have gone away since
777 : * the last time we saw it. If so, we don't need to vacuum or analyze it.
778 : *
779 : * If we've been asked not to wait for the relation lock, acquire it first
780 : * in non-blocking mode, before calling try_relation_open().
781 : */
782 122150 : if (!(options & VACOPT_SKIP_LOCKED))
783 121154 : rel = try_relation_open(relid, lmode);
784 996 : else if (ConditionalLockRelationOid(relid, lmode))
785 968 : rel = try_relation_open(relid, NoLock);
786 : else
787 : {
788 28 : rel = NULL;
789 28 : rel_lock = false;
790 : }
791 :
792 : /* if relation is opened, leave */
793 122150 : if (rel)
794 122110 : return rel;
795 :
796 : /*
797 : * Relation could not be opened, hence generate if possible a log
798 : * informing on the situation.
799 : *
800 : * If the RangeVar is not defined, we do not have enough information to
801 : * provide a meaningful log statement. Chances are that the caller has
802 : * intentionally not provided this information so that this logging is
803 : * skipped, anyway.
804 : */
805 40 : if (relation == NULL)
806 18 : return NULL;
807 :
808 : /*
809 : * Determine the log level.
810 : *
811 : * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
812 : * statements in the permission checks; otherwise, only log if the caller
813 : * so requested.
814 : */
815 22 : if (!IsAutoVacuumWorkerProcess())
816 14 : elevel = WARNING;
817 8 : else if (verbose)
818 8 : elevel = LOG;
819 : else
820 0 : return NULL;
821 :
822 22 : if ((options & VACOPT_VACUUM) != 0)
823 : {
824 10 : if (!rel_lock)
825 6 : ereport(elevel,
826 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
827 : errmsg("skipping vacuum of \"%s\" --- lock not available",
828 : relation->relname)));
829 : else
830 4 : ereport(elevel,
831 : (errcode(ERRCODE_UNDEFINED_TABLE),
832 : errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
833 : relation->relname)));
834 :
835 : /*
836 : * For VACUUM ANALYZE, both logs could show up, but just generate
837 : * information for VACUUM as that would be the first one to be
838 : * processed.
839 : */
840 10 : return NULL;
841 : }
842 :
843 12 : if ((options & VACOPT_ANALYZE) != 0)
844 : {
845 12 : if (!rel_lock)
846 10 : ereport(elevel,
847 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
848 : errmsg("skipping analyze of \"%s\" --- lock not available",
849 : relation->relname)));
850 : else
851 2 : ereport(elevel,
852 : (errcode(ERRCODE_UNDEFINED_TABLE),
853 : errmsg("skipping analyze of \"%s\" --- relation no longer exists",
854 : relation->relname)));
855 : }
856 :
857 12 : return NULL;
858 : }
859 :
860 :
861 : /*
862 : * Given a VacuumRelation, fill in the table OID if it wasn't specified,
863 : * and optionally add VacuumRelations for partitions of the table.
864 : *
865 : * If a VacuumRelation does not have an OID supplied and is a partitioned
866 : * table, an extra entry will be added to the output for each partition.
867 : * Presently, only autovacuum supplies OIDs when calling vacuum(), and
868 : * it does not want us to expand partitioned tables.
869 : *
870 : * We take care not to modify the input data structure, but instead build
871 : * new VacuumRelation(s) to return. (But note that they will reference
872 : * unmodified parts of the input, eg column lists.) New data structures
873 : * are made in vac_context.
874 : */
875 : static List *
876 9168 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
877 : int options)
878 : {
879 9168 : List *vacrels = NIL;
880 : MemoryContext oldcontext;
881 :
882 : /* If caller supplied OID, there's nothing we need do here. */
883 9168 : if (OidIsValid(vrel->oid))
884 : {
885 360 : oldcontext = MemoryContextSwitchTo(vac_context);
886 360 : vacrels = lappend(vacrels, vrel);
887 360 : MemoryContextSwitchTo(oldcontext);
888 : }
889 : else
890 : {
891 : /* Process a specific relation, and possibly partitions thereof */
892 : Oid relid;
893 : HeapTuple tuple;
894 : Form_pg_class classForm;
895 : bool include_parts;
896 : int rvr_opts;
897 :
898 : /*
899 : * Since autovacuum workers supply OIDs when calling vacuum(), no
900 : * autovacuum worker should reach this code.
901 : */
902 : Assert(!IsAutoVacuumWorkerProcess());
903 :
904 : /*
905 : * We transiently take AccessShareLock to protect the syscache lookup
906 : * below, as well as find_all_inheritors's expectation that the caller
907 : * holds some lock on the starting relation.
908 : */
909 8808 : rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
910 8808 : relid = RangeVarGetRelidExtended(vrel->relation,
911 : AccessShareLock,
912 : rvr_opts,
913 : NULL, NULL);
914 :
915 : /*
916 : * If the lock is unavailable, emit the same log statement that
917 : * vacuum_rel() and analyze_rel() would.
918 : */
919 8772 : if (!OidIsValid(relid))
920 : {
921 8 : if (options & VACOPT_VACUUM)
922 6 : ereport(WARNING,
923 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
924 : errmsg("skipping vacuum of \"%s\" --- lock not available",
925 : vrel->relation->relname)));
926 : else
927 2 : ereport(WARNING,
928 : (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
929 : errmsg("skipping analyze of \"%s\" --- lock not available",
930 : vrel->relation->relname)));
931 8 : return vacrels;
932 : }
933 :
934 : /*
935 : * To check whether the relation is a partitioned table and its
936 : * ownership, fetch its syscache entry.
937 : */
938 8764 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
939 8764 : if (!HeapTupleIsValid(tuple))
940 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
941 8764 : classForm = (Form_pg_class) GETSTRUCT(tuple);
942 :
943 : /*
944 : * Make a returnable VacuumRelation for this rel if the user has the
945 : * required privileges.
946 : */
947 8764 : if (vacuum_is_permitted_for_relation(relid, classForm, options))
948 : {
949 8586 : oldcontext = MemoryContextSwitchTo(vac_context);
950 8586 : vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
951 : relid,
952 : vrel->va_cols));
953 8586 : MemoryContextSwitchTo(oldcontext);
954 : }
955 :
956 :
957 8764 : include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
958 8764 : ReleaseSysCache(tuple);
959 :
960 : /*
961 : * If it is, make relation list entries for its partitions. Note that
962 : * the list returned by find_all_inheritors() includes the passed-in
963 : * OID, so we have to skip that. There's no point in taking locks on
964 : * the individual partitions yet, and doing so would just add
965 : * unnecessary deadlock risk. For this last reason we do not check
966 : * yet the ownership of the partitions, which get added to the list to
967 : * process. Ownership will be checked later on anyway.
968 : */
969 8764 : if (include_parts)
970 : {
971 684 : List *part_oids = find_all_inheritors(relid, NoLock, NULL);
972 : ListCell *part_lc;
973 :
974 3198 : foreach(part_lc, part_oids)
975 : {
976 2514 : Oid part_oid = lfirst_oid(part_lc);
977 :
978 2514 : if (part_oid == relid)
979 684 : continue; /* ignore original table */
980 :
981 : /*
982 : * We omit a RangeVar since it wouldn't be appropriate to
983 : * complain about failure to open one of these relations
984 : * later.
985 : */
986 1830 : oldcontext = MemoryContextSwitchTo(vac_context);
987 1830 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
988 : part_oid,
989 : vrel->va_cols));
990 1830 : MemoryContextSwitchTo(oldcontext);
991 : }
992 : }
993 :
994 : /*
995 : * Release lock again. This means that by the time we actually try to
996 : * process the table, it might be gone or renamed. In the former case
997 : * we'll silently ignore it; in the latter case we'll process it
998 : * anyway, but we must beware that the RangeVar doesn't necessarily
999 : * identify it anymore. This isn't ideal, perhaps, but there's little
1000 : * practical alternative, since we're typically going to commit this
1001 : * transaction and begin a new one between now and then. Moreover,
1002 : * holding locks on multiple relations would create significant risk
1003 : * of deadlock.
1004 : */
1005 8764 : UnlockRelationOid(relid, AccessShareLock);
1006 : }
1007 :
1008 9124 : return vacrels;
1009 : }
1010 :
1011 : /*
1012 : * Construct a list of VacuumRelations for all vacuumable rels in
1013 : * the current database. The list is built in vac_context.
1014 : */
1015 : static List *
1016 1214 : get_all_vacuum_rels(MemoryContext vac_context, int options)
1017 : {
1018 1214 : List *vacrels = NIL;
1019 : Relation pgclass;
1020 : TableScanDesc scan;
1021 : HeapTuple tuple;
1022 :
1023 1214 : pgclass = table_open(RelationRelationId, AccessShareLock);
1024 :
1025 1214 : scan = table_beginscan_catalog(pgclass, 0, NULL);
1026 :
1027 504438 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1028 : {
1029 503224 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
1030 : MemoryContext oldcontext;
1031 503224 : Oid relid = classForm->oid;
1032 :
1033 : /*
1034 : * We include partitioned tables here; depending on which operation is
1035 : * to be performed, caller will decide whether to process or ignore
1036 : * them.
1037 : */
1038 503224 : if (classForm->relkind != RELKIND_RELATION &&
1039 419966 : classForm->relkind != RELKIND_MATVIEW &&
1040 419960 : classForm->relkind != RELKIND_PARTITIONED_TABLE)
1041 419918 : continue;
1042 :
1043 : /* check permissions of relation */
1044 83306 : if (!vacuum_is_permitted_for_relation(relid, classForm, options))
1045 0 : continue;
1046 :
1047 : /*
1048 : * Build VacuumRelation(s) specifying the table OIDs to be processed.
1049 : * We omit a RangeVar since it wouldn't be appropriate to complain
1050 : * about failure to open one of these relations later.
1051 : */
1052 83306 : oldcontext = MemoryContextSwitchTo(vac_context);
1053 83306 : vacrels = lappend(vacrels, makeVacuumRelation(NULL,
1054 : relid,
1055 : NIL));
1056 83306 : MemoryContextSwitchTo(oldcontext);
1057 : }
1058 :
1059 1214 : table_endscan(scan);
1060 1214 : table_close(pgclass, AccessShareLock);
1061 :
1062 1214 : return vacrels;
1063 : }
1064 :
1065 : /*
1066 : * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
1067 : *
1068 : * The target relation and VACUUM parameters are our inputs.
1069 : *
1070 : * Output parameters are the cutoffs that VACUUM caller should use.
1071 : *
1072 : * Return value indicates if vacuumlazy.c caller should make its VACUUM
1073 : * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to
1074 : * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
1075 : * minimum).
1076 : */
1077 : bool
1078 74022 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
1079 : struct VacuumCutoffs *cutoffs)
1080 : {
1081 : int freeze_min_age,
1082 : multixact_freeze_min_age,
1083 : freeze_table_age,
1084 : multixact_freeze_table_age,
1085 : effective_multixact_freeze_max_age;
1086 : TransactionId nextXID,
1087 : safeOldestXmin,
1088 : aggressiveXIDCutoff;
1089 : MultiXactId nextMXID,
1090 : safeOldestMxact,
1091 : aggressiveMXIDCutoff;
1092 :
1093 : /* Use mutable copies of freeze age parameters */
1094 74022 : freeze_min_age = params->freeze_min_age;
1095 74022 : multixact_freeze_min_age = params->multixact_freeze_min_age;
1096 74022 : freeze_table_age = params->freeze_table_age;
1097 74022 : multixact_freeze_table_age = params->multixact_freeze_table_age;
1098 :
1099 : /* Set pg_class fields in cutoffs */
1100 74022 : cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
1101 74022 : cutoffs->relminmxid = rel->rd_rel->relminmxid;
1102 :
1103 : /*
1104 : * Acquire OldestXmin.
1105 : *
1106 : * We can always ignore processes running lazy vacuum. This is because we
1107 : * use these values only for deciding which tuples we must keep in the
1108 : * tables. Since lazy vacuum doesn't write its XID anywhere (usually no
1109 : * XID assigned), it's safe to ignore it. In theory it could be
1110 : * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
1111 : * that only one vacuum process can be working on a particular table at
1112 : * any time, and that each vacuum is always an independent transaction.
1113 : */
1114 74022 : cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
1115 :
1116 74022 : if (OldSnapshotThresholdActive())
1117 : {
1118 : TransactionId limit_xmin;
1119 : TimestampTz limit_ts;
1120 :
1121 6 : if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
1122 : &limit_xmin, &limit_ts))
1123 : {
1124 : /*
1125 : * TODO: We should only set the threshold if we are pruning on the
1126 : * basis of the increased limits. Not as crucial here as it is
1127 : * for opportunistic pruning (which often happens at a much higher
1128 : * frequency), but would still be a significant improvement.
1129 : */
1130 6 : SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
1131 6 : cutoffs->OldestXmin = limit_xmin;
1132 : }
1133 : }
1134 :
1135 : Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
1136 :
1137 : /* Acquire OldestMxact */
1138 74022 : cutoffs->OldestMxact = GetOldestMultiXactId();
1139 : Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
1140 :
1141 : /* Acquire next XID/next MXID values used to apply age-based settings */
1142 74022 : nextXID = ReadNextTransactionId();
1143 74022 : nextMXID = ReadNextMultiXactId();
1144 :
1145 : /*
1146 : * Also compute the multixact age for which freezing is urgent. This is
1147 : * normally autovacuum_multixact_freeze_max_age, but may be less if we are
1148 : * short of multixact member space.
1149 : */
1150 74022 : effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
1151 :
1152 : /*
1153 : * Almost ready to set freeze output parameters; check if OldestXmin or
1154 : * OldestMxact are held back to an unsafe degree before we start on that
1155 : */
1156 74022 : safeOldestXmin = nextXID - autovacuum_freeze_max_age;
1157 74022 : if (!TransactionIdIsNormal(safeOldestXmin))
1158 0 : safeOldestXmin = FirstNormalTransactionId;
1159 74022 : safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
1160 74022 : if (safeOldestMxact < FirstMultiXactId)
1161 0 : safeOldestMxact = FirstMultiXactId;
1162 74022 : if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
1163 0 : ereport(WARNING,
1164 : (errmsg("cutoff for removing and freezing tuples is far in the past"),
1165 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1166 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1167 74022 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
1168 0 : ereport(WARNING,
1169 : (errmsg("cutoff for freezing multixacts is far in the past"),
1170 : errhint("Close open transactions soon to avoid wraparound problems.\n"
1171 : "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
1172 :
1173 : /*
1174 : * Determine the minimum freeze age to use: as specified by the caller, or
1175 : * vacuum_freeze_min_age, but in any case not more than half
1176 : * autovacuum_freeze_max_age, so that autovacuums to prevent XID
1177 : * wraparound won't occur too frequently.
1178 : */
1179 74022 : if (freeze_min_age < 0)
1180 7280 : freeze_min_age = vacuum_freeze_min_age;
1181 74022 : freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
1182 : Assert(freeze_min_age >= 0);
1183 :
1184 : /* Compute FreezeLimit, being careful to generate a normal XID */
1185 74022 : cutoffs->FreezeLimit = nextXID - freeze_min_age;
1186 74022 : if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
1187 0 : cutoffs->FreezeLimit = FirstNormalTransactionId;
1188 : /* FreezeLimit must always be <= OldestXmin */
1189 74022 : if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
1190 600 : cutoffs->FreezeLimit = cutoffs->OldestXmin;
1191 :
1192 : /*
1193 : * Determine the minimum multixact freeze age to use: as specified by
1194 : * caller, or vacuum_multixact_freeze_min_age, but in any case not more
1195 : * than half effective_multixact_freeze_max_age, so that autovacuums to
1196 : * prevent MultiXact wraparound won't occur too frequently.
1197 : */
1198 74022 : if (multixact_freeze_min_age < 0)
1199 7280 : multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
1200 74022 : multixact_freeze_min_age = Min(multixact_freeze_min_age,
1201 : effective_multixact_freeze_max_age / 2);
1202 : Assert(multixact_freeze_min_age >= 0);
1203 :
1204 : /* Compute MultiXactCutoff, being careful to generate a valid value */
1205 74022 : cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
1206 74022 : if (cutoffs->MultiXactCutoff < FirstMultiXactId)
1207 0 : cutoffs->MultiXactCutoff = FirstMultiXactId;
1208 : /* MultiXactCutoff must always be <= OldestMxact */
1209 74022 : if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
1210 4 : cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
1211 :
1212 : /*
1213 : * Finally, figure out if caller needs to do an aggressive VACUUM or not.
1214 : *
1215 : * Determine the table freeze age to use: as specified by the caller, or
1216 : * the value of the vacuum_freeze_table_age GUC, but in any case not more
1217 : * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
1218 : * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
1219 : * anti-wraparound autovacuum is launched.
1220 : */
1221 74022 : if (freeze_table_age < 0)
1222 7280 : freeze_table_age = vacuum_freeze_table_age;
1223 74022 : freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
1224 : Assert(freeze_table_age >= 0);
1225 74022 : aggressiveXIDCutoff = nextXID - freeze_table_age;
1226 74022 : if (!TransactionIdIsNormal(aggressiveXIDCutoff))
1227 0 : aggressiveXIDCutoff = FirstNormalTransactionId;
1228 74022 : if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
1229 : aggressiveXIDCutoff))
1230 66560 : return true;
1231 :
1232 : /*
1233 : * Similar to the above, determine the table freeze age to use for
1234 : * multixacts: as specified by the caller, or the value of the
1235 : * vacuum_multixact_freeze_table_age GUC, but in any case not more than
1236 : * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
1237 : * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
1238 : * multixacts before anti-wraparound autovacuum is launched.
1239 : */
1240 7462 : if (multixact_freeze_table_age < 0)
1241 7280 : multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
1242 7462 : multixact_freeze_table_age =
1243 7462 : Min(multixact_freeze_table_age,
1244 : effective_multixact_freeze_max_age * 0.95);
1245 : Assert(multixact_freeze_table_age >= 0);
1246 7462 : aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
1247 7462 : if (aggressiveMXIDCutoff < FirstMultiXactId)
1248 0 : aggressiveMXIDCutoff = FirstMultiXactId;
1249 7462 : if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
1250 : aggressiveMXIDCutoff))
1251 0 : return true;
1252 :
1253 : /* Non-aggressive VACUUM */
1254 7462 : return false;
1255 : }
1256 :
1257 : /*
1258 : * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
1259 : * mechanism to determine if its table's relfrozenxid and relminmxid are now
1260 : * dangerously far in the past.
1261 : *
1262 : * When we return true, VACUUM caller triggers the failsafe.
1263 : */
1264 : bool
1265 84582 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
1266 : {
1267 84582 : TransactionId relfrozenxid = cutoffs->relfrozenxid;
1268 84582 : MultiXactId relminmxid = cutoffs->relminmxid;
1269 : TransactionId xid_skip_limit;
1270 : MultiXactId multi_skip_limit;
1271 : int skip_index_vacuum;
1272 :
1273 : Assert(TransactionIdIsNormal(relfrozenxid));
1274 : Assert(MultiXactIdIsValid(relminmxid));
1275 :
1276 : /*
1277 : * Determine the index skipping age to use. In any case no less than
1278 : * autovacuum_freeze_max_age * 1.05.
1279 : */
1280 84582 : skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
1281 :
1282 84582 : xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
1283 84582 : if (!TransactionIdIsNormal(xid_skip_limit))
1284 0 : xid_skip_limit = FirstNormalTransactionId;
1285 :
1286 84582 : if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
1287 : {
1288 : /* The table's relfrozenxid is too old */
1289 0 : return true;
1290 : }
1291 :
1292 : /*
1293 : * Similar to above, determine the index skipping age to use for
1294 : * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
1295 : * 1.05.
1296 : */
1297 84582 : skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
1298 : autovacuum_multixact_freeze_max_age * 1.05);
1299 :
1300 84582 : multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
1301 84582 : if (multi_skip_limit < FirstMultiXactId)
1302 0 : multi_skip_limit = FirstMultiXactId;
1303 :
1304 84582 : if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
1305 : {
1306 : /* The table's relminmxid is too old */
1307 0 : return true;
1308 : }
1309 :
1310 84582 : return false;
1311 : }
1312 :
1313 : /*
1314 : * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
1315 : *
1316 : * If we scanned the whole relation then we should just use the count of
1317 : * live tuples seen; but if we did not, we should not blindly extrapolate
1318 : * from that number, since VACUUM may have scanned a quite nonrandom
1319 : * subset of the table. When we have only partial information, we take
1320 : * the old value of pg_class.reltuples/pg_class.relpages as a measurement
1321 : * of the tuple density in the unscanned pages.
1322 : *
1323 : * Note: scanned_tuples should count only *live* tuples, since
1324 : * pg_class.reltuples is defined that way.
1325 : */
1326 : double
1327 73500 : vac_estimate_reltuples(Relation relation,
1328 : BlockNumber total_pages,
1329 : BlockNumber scanned_pages,
1330 : double scanned_tuples)
1331 : {
1332 73500 : BlockNumber old_rel_pages = relation->rd_rel->relpages;
1333 73500 : double old_rel_tuples = relation->rd_rel->reltuples;
1334 : double old_density;
1335 : double unscanned_pages;
1336 : double total_tuples;
1337 :
1338 : /* If we did scan the whole table, just use the count as-is */
1339 73500 : if (scanned_pages >= total_pages)
1340 73292 : return scanned_tuples;
1341 :
1342 : /*
1343 : * When successive VACUUM commands scan the same few pages again and
1344 : * again, without anything from the table really changing, there is a risk
1345 : * that our beliefs about tuple density will gradually become distorted.
1346 : * This might be caused by vacuumlazy.c implementation details, such as
1347 : * its tendency to always scan the last heap page. Handle that here.
1348 : *
1349 : * If the relation is _exactly_ the same size according to the existing
1350 : * pg_class entry, and only a few of its pages (less than 2%) were
1351 : * scanned, keep the existing value of reltuples. Also keep the existing
1352 : * value when only a subset of rel's pages <= a single page were scanned.
1353 : *
1354 : * (Note: we might be returning -1 here.)
1355 : */
1356 208 : if (old_rel_pages == total_pages &&
1357 182 : scanned_pages < (double) total_pages * 0.02)
1358 116 : return old_rel_tuples;
1359 92 : if (scanned_pages <= 1)
1360 64 : return old_rel_tuples;
1361 :
1362 : /*
1363 : * If old density is unknown, we can't do much except scale up
1364 : * scanned_tuples to match total_pages.
1365 : */
1366 28 : if (old_rel_tuples < 0 || old_rel_pages == 0)
1367 0 : return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
1368 :
1369 : /*
1370 : * Okay, we've covered the corner cases. The normal calculation is to
1371 : * convert the old measurement to a density (tuples per page), then
1372 : * estimate the number of tuples in the unscanned pages using that figure,
1373 : * and finally add on the number of tuples in the scanned pages.
1374 : */
1375 28 : old_density = old_rel_tuples / old_rel_pages;
1376 28 : unscanned_pages = (double) total_pages - (double) scanned_pages;
1377 28 : total_tuples = old_density * unscanned_pages + scanned_tuples;
1378 28 : return floor(total_tuples + 0.5);
1379 : }
1380 :
1381 :
1382 : /*
1383 : * vac_update_relstats() -- update statistics for one relation
1384 : *
1385 : * Update the whole-relation statistics that are kept in its pg_class
1386 : * row. There are additional stats that will be updated if we are
1387 : * doing ANALYZE, but we always update these stats. This routine works
1388 : * for both index and heap relation entries in pg_class.
1389 : *
1390 : * We violate transaction semantics here by overwriting the rel's
1391 : * existing pg_class tuple with the new values. This is reasonably
1392 : * safe as long as we're sure that the new values are correct whether or
1393 : * not this transaction commits. The reason for doing this is that if
1394 : * we updated these tuples in the usual way, vacuuming pg_class itself
1395 : * wouldn't work very well --- by the time we got done with a vacuum
1396 : * cycle, most of the tuples in pg_class would've been obsoleted. Of
1397 : * course, this only works for fixed-size not-null columns, but these are.
1398 : *
1399 : * Another reason for doing it this way is that when we are in a lazy
1400 : * VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
1401 : * Somebody vacuuming pg_class might think they could delete a tuple
1402 : * marked with xmin = our xid.
1403 : *
1404 : * In addition to fundamentally nontransactional statistics such as
1405 : * relpages and relallvisible, we try to maintain certain lazily-updated
1406 : * DDL flags such as relhasindex, by clearing them if no longer correct.
1407 : * It's safe to do this in VACUUM, which can't run in parallel with
1408 : * CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
1409 : * However, it's *not* safe to do it in an ANALYZE that's within an
1410 : * outer transaction, because for example the current transaction might
1411 : * have dropped the last index; then we'd think relhasindex should be
1412 : * cleared, but if the transaction later rolls back this would be wrong.
1413 : * So we refrain from updating the DDL flags if we're inside an outer
1414 : * transaction. This is OK since postponing the flag maintenance is
1415 : * always allowable.
1416 : *
1417 : * Note: num_tuples should count only *live* tuples, since
1418 : * pg_class.reltuples is defined that way.
1419 : *
1420 : * This routine is shared by VACUUM and ANALYZE.
1421 : */
1422 : void
1423 209318 : vac_update_relstats(Relation relation,
1424 : BlockNumber num_pages, double num_tuples,
1425 : BlockNumber num_all_visible_pages,
1426 : bool hasindex, TransactionId frozenxid,
1427 : MultiXactId minmulti,
1428 : bool *frozenxid_updated, bool *minmulti_updated,
1429 : bool in_outer_xact)
1430 : {
1431 209318 : Oid relid = RelationGetRelid(relation);
1432 : Relation rd;
1433 : HeapTuple ctup;
1434 : Form_pg_class pgcform;
1435 : bool dirty,
1436 : futurexid,
1437 : futuremxid;
1438 : TransactionId oldfrozenxid;
1439 : MultiXactId oldminmulti;
1440 :
1441 209318 : rd = table_open(RelationRelationId, RowExclusiveLock);
1442 :
1443 : /* Fetch a copy of the tuple to scribble on */
1444 209318 : ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1445 209318 : if (!HeapTupleIsValid(ctup))
1446 0 : elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
1447 : relid);
1448 209318 : pgcform = (Form_pg_class) GETSTRUCT(ctup);
1449 :
1450 : /* Apply statistical updates, if any, to copied tuple */
1451 :
1452 209318 : dirty = false;
1453 209318 : if (pgcform->relpages != (int32) num_pages)
1454 : {
1455 32078 : pgcform->relpages = (int32) num_pages;
1456 32078 : dirty = true;
1457 : }
1458 209318 : if (pgcform->reltuples != (float4) num_tuples)
1459 : {
1460 86742 : pgcform->reltuples = (float4) num_tuples;
1461 86742 : dirty = true;
1462 : }
1463 209318 : if (pgcform->relallvisible != (int32) num_all_visible_pages)
1464 : {
1465 26656 : pgcform->relallvisible = (int32) num_all_visible_pages;
1466 26656 : dirty = true;
1467 : }
1468 :
1469 : /* Apply DDL updates, but not inside an outer transaction (see above) */
1470 :
1471 209318 : if (!in_outer_xact)
1472 : {
1473 : /*
1474 : * If we didn't find any indexes, reset relhasindex.
1475 : */
1476 209046 : if (pgcform->relhasindex && !hasindex)
1477 : {
1478 12 : pgcform->relhasindex = false;
1479 12 : dirty = true;
1480 : }
1481 :
1482 : /* We also clear relhasrules and relhastriggers if needed */
1483 209046 : if (pgcform->relhasrules && relation->rd_rules == NULL)
1484 : {
1485 0 : pgcform->relhasrules = false;
1486 0 : dirty = true;
1487 : }
1488 209046 : if (pgcform->relhastriggers && relation->trigdesc == NULL)
1489 : {
1490 6 : pgcform->relhastriggers = false;
1491 6 : dirty = true;
1492 : }
1493 : }
1494 :
1495 : /*
1496 : * Update relfrozenxid, unless caller passed InvalidTransactionId
1497 : * indicating it has no new data.
1498 : *
1499 : * Ordinarily, we don't let relfrozenxid go backwards. However, if the
1500 : * stored relfrozenxid is "in the future" then it seems best to assume
1501 : * it's corrupt, and overwrite with the oldest remaining XID in the table.
1502 : * This should match vac_update_datfrozenxid() concerning what we consider
1503 : * to be "in the future".
1504 : */
1505 209318 : oldfrozenxid = pgcform->relfrozenxid;
1506 209318 : futurexid = false;
1507 209318 : if (frozenxid_updated)
1508 73496 : *frozenxid_updated = false;
1509 209318 : if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
1510 : {
1511 71692 : bool update = false;
1512 :
1513 71692 : if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
1514 71640 : update = true;
1515 52 : else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
1516 0 : futurexid = update = true;
1517 :
1518 71692 : if (update)
1519 : {
1520 71640 : pgcform->relfrozenxid = frozenxid;
1521 71640 : dirty = true;
1522 71640 : if (frozenxid_updated)
1523 71640 : *frozenxid_updated = true;
1524 : }
1525 : }
1526 :
1527 : /* Similarly for relminmxid */
1528 209318 : oldminmulti = pgcform->relminmxid;
1529 209318 : futuremxid = false;
1530 209318 : if (minmulti_updated)
1531 73496 : *minmulti_updated = false;
1532 209318 : if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
1533 : {
1534 58 : bool update = false;
1535 :
1536 58 : if (MultiXactIdPrecedes(oldminmulti, minmulti))
1537 58 : update = true;
1538 0 : else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
1539 0 : futuremxid = update = true;
1540 :
1541 58 : if (update)
1542 : {
1543 58 : pgcform->relminmxid = minmulti;
1544 58 : dirty = true;
1545 58 : if (minmulti_updated)
1546 58 : *minmulti_updated = true;
1547 : }
1548 : }
1549 :
1550 : /* If anything changed, write out the tuple. */
1551 209318 : if (dirty)
1552 131788 : heap_inplace_update(rd, ctup);
1553 :
1554 209318 : table_close(rd, RowExclusiveLock);
1555 :
1556 209318 : if (futurexid)
1557 0 : ereport(WARNING,
1558 : (errcode(ERRCODE_DATA_CORRUPTED),
1559 : errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
1560 : oldfrozenxid, frozenxid,
1561 : RelationGetRelationName(relation))));
1562 209318 : if (futuremxid)
1563 0 : ereport(WARNING,
1564 : (errcode(ERRCODE_DATA_CORRUPTED),
1565 : errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
1566 : oldminmulti, minmulti,
1567 : RelationGetRelationName(relation))));
1568 209318 : }
1569 :
1570 :
1571 : /*
1572 : * vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
1573 : *
1574 : * Update pg_database's datfrozenxid entry for our database to be the
1575 : * minimum of the pg_class.relfrozenxid values.
1576 : *
1577 : * Similarly, update our datminmxid to be the minimum of the
1578 : * pg_class.relminmxid values.
1579 : *
1580 : * If we are able to advance either pg_database value, also try to
1581 : * truncate pg_xact and pg_multixact.
1582 : *
1583 : * We violate transaction semantics here by overwriting the database's
1584 : * existing pg_database tuple with the new values. This is reasonably
1585 : * safe since the new values are correct whether or not this transaction
1586 : * commits. As with vac_update_relstats, this avoids leaving dead tuples
1587 : * behind after a VACUUM.
1588 : */
1589 : void
1590 2484 : vac_update_datfrozenxid(void)
1591 : {
1592 : HeapTuple tuple;
1593 : Form_pg_database dbform;
1594 : Relation relation;
1595 : SysScanDesc scan;
1596 : HeapTuple classTup;
1597 : TransactionId newFrozenXid;
1598 : MultiXactId newMinMulti;
1599 : TransactionId lastSaneFrozenXid;
1600 : MultiXactId lastSaneMinMulti;
1601 2484 : bool bogus = false;
1602 2484 : bool dirty = false;
1603 : ScanKeyData key[1];
1604 :
1605 : /*
1606 : * Restrict this task to one backend per database. This avoids race
1607 : * conditions that would move datfrozenxid or datminmxid backward. It
1608 : * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
1609 : * datfrozenxid passed to an earlier vac_truncate_clog() call.
1610 : */
1611 2484 : LockDatabaseFrozenIds(ExclusiveLock);
1612 :
1613 : /*
1614 : * Initialize the "min" calculation with
1615 : * GetOldestNonRemovableTransactionId(), which is a reasonable
1616 : * approximation to the minimum relfrozenxid for not-yet-committed
1617 : * pg_class entries for new tables; see AddNewRelationTuple(). So we
1618 : * cannot produce a wrong minimum by starting with this.
1619 : */
1620 2484 : newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
1621 :
1622 : /*
1623 : * Similarly, initialize the MultiXact "min" with the value that would be
1624 : * used on pg_class for new tables. See AddNewRelationTuple().
1625 : */
1626 2484 : newMinMulti = GetOldestMultiXactId();
1627 :
1628 : /*
1629 : * Identify the latest relfrozenxid and relminmxid values that we could
1630 : * validly see during the scan. These are conservative values, but it's
1631 : * not really worth trying to be more exact.
1632 : */
1633 2484 : lastSaneFrozenXid = ReadNextTransactionId();
1634 2484 : lastSaneMinMulti = ReadNextMultiXactId();
1635 :
1636 : /*
1637 : * We must seqscan pg_class to find the minimum Xid, because there is no
1638 : * index that can help us here.
1639 : */
1640 2484 : relation = table_open(RelationRelationId, AccessShareLock);
1641 :
1642 2484 : scan = systable_beginscan(relation, InvalidOid, false,
1643 : NULL, 0, NULL);
1644 :
1645 1345396 : while ((classTup = systable_getnext(scan)) != NULL)
1646 : {
1647 1342912 : Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
1648 :
1649 : /*
1650 : * Only consider relations able to hold unfrozen XIDs (anything else
1651 : * should have InvalidTransactionId in relfrozenxid anyway).
1652 : */
1653 1342912 : if (classForm->relkind != RELKIND_RELATION &&
1654 1050070 : classForm->relkind != RELKIND_MATVIEW &&
1655 1048130 : classForm->relkind != RELKIND_TOASTVALUE)
1656 : {
1657 : Assert(!TransactionIdIsValid(classForm->relfrozenxid));
1658 : Assert(!MultiXactIdIsValid(classForm->relminmxid));
1659 894218 : continue;
1660 : }
1661 :
1662 : /*
1663 : * Some table AMs might not need per-relation xid / multixid horizons.
1664 : * It therefore seems reasonable to allow relfrozenxid and relminmxid
1665 : * to not be set (i.e. set to their respective Invalid*Id)
1666 : * independently. Thus validate and compute horizon for each only if
1667 : * set.
1668 : *
1669 : * If things are working properly, no relation should have a
1670 : * relfrozenxid or relminmxid that is "in the future". However, such
1671 : * cases have been known to arise due to bugs in pg_upgrade. If we
1672 : * see any entries that are "in the future", chicken out and don't do
1673 : * anything. This ensures we won't truncate clog & multixact SLRUs
1674 : * before those relations have been scanned and cleaned up.
1675 : */
1676 :
1677 448694 : if (TransactionIdIsValid(classForm->relfrozenxid))
1678 : {
1679 : Assert(TransactionIdIsNormal(classForm->relfrozenxid));
1680 :
1681 : /* check for values in the future */
1682 448694 : if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
1683 : {
1684 0 : bogus = true;
1685 0 : break;
1686 : }
1687 :
1688 : /* determine new horizon */
1689 448694 : if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
1690 3380 : newFrozenXid = classForm->relfrozenxid;
1691 : }
1692 :
1693 448694 : if (MultiXactIdIsValid(classForm->relminmxid))
1694 : {
1695 : /* check for values in the future */
1696 448694 : if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
1697 : {
1698 0 : bogus = true;
1699 0 : break;
1700 : }
1701 :
1702 : /* determine new horizon */
1703 448694 : if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
1704 190 : newMinMulti = classForm->relminmxid;
1705 : }
1706 : }
1707 :
1708 : /* we're done with pg_class */
1709 2484 : systable_endscan(scan);
1710 2484 : table_close(relation, AccessShareLock);
1711 :
1712 : /* chicken out if bogus data found */
1713 2484 : if (bogus)
1714 0 : return;
1715 :
1716 : Assert(TransactionIdIsNormal(newFrozenXid));
1717 : Assert(MultiXactIdIsValid(newMinMulti));
1718 :
1719 : /* Now fetch the pg_database tuple we need to update. */
1720 2484 : relation = table_open(DatabaseRelationId, RowExclusiveLock);
1721 :
1722 : /*
1723 : * Get the pg_database tuple to scribble on. Note that this does not
1724 : * directly rely on the syscache to avoid issues with flattened toast
1725 : * values for the in-place update.
1726 : */
1727 2484 : ScanKeyInit(&key[0],
1728 : Anum_pg_database_oid,
1729 : BTEqualStrategyNumber, F_OIDEQ,
1730 : ObjectIdGetDatum(MyDatabaseId));
1731 :
1732 2484 : scan = systable_beginscan(relation, DatabaseOidIndexId, true,
1733 : NULL, 1, key);
1734 2484 : tuple = systable_getnext(scan);
1735 2484 : tuple = heap_copytuple(tuple);
1736 2484 : systable_endscan(scan);
1737 :
1738 2484 : if (!HeapTupleIsValid(tuple))
1739 0 : elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
1740 :
1741 2484 : dbform = (Form_pg_database) GETSTRUCT(tuple);
1742 :
1743 : /*
1744 : * As in vac_update_relstats(), we ordinarily don't want to let
1745 : * datfrozenxid go backward; but if it's "in the future" then it must be
1746 : * corrupt and it seems best to overwrite it.
1747 : */
1748 3118 : if (dbform->datfrozenxid != newFrozenXid &&
1749 634 : (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
1750 0 : TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
1751 : {
1752 634 : dbform->datfrozenxid = newFrozenXid;
1753 634 : dirty = true;
1754 : }
1755 : else
1756 1850 : newFrozenXid = dbform->datfrozenxid;
1757 :
1758 : /* Ditto for datminmxid */
1759 2484 : if (dbform->datminmxid != newMinMulti &&
1760 0 : (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
1761 0 : MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
1762 : {
1763 0 : dbform->datminmxid = newMinMulti;
1764 0 : dirty = true;
1765 : }
1766 : else
1767 2484 : newMinMulti = dbform->datminmxid;
1768 :
1769 2484 : if (dirty)
1770 634 : heap_inplace_update(relation, tuple);
1771 :
1772 2484 : heap_freetuple(tuple);
1773 2484 : table_close(relation, RowExclusiveLock);
1774 :
1775 : /*
1776 : * If we were able to advance datfrozenxid or datminmxid, see if we can
1777 : * truncate pg_xact and/or pg_multixact. Also do it if the shared
1778 : * XID-wrap-limit info is stale, since this action will update that too.
1779 : */
1780 2484 : if (dirty || ForceTransactionIdLimitUpdate())
1781 634 : vac_truncate_clog(newFrozenXid, newMinMulti,
1782 : lastSaneFrozenXid, lastSaneMinMulti);
1783 : }
1784 :
1785 :
1786 : /*
1787 : * vac_truncate_clog() -- attempt to truncate the commit log
1788 : *
1789 : * Scan pg_database to determine the system-wide oldest datfrozenxid,
1790 : * and use it to truncate the transaction commit log (pg_xact).
1791 : * Also update the XID wrap limit info maintained by varsup.c.
1792 : * Likewise for datminmxid.
1793 : *
1794 : * The passed frozenXID and minMulti are the updated values for my own
1795 : * pg_database entry. They're used to initialize the "min" calculations.
1796 : * The caller also passes the "last sane" XID and MXID, since it has
1797 : * those at hand already.
1798 : *
1799 : * This routine is only invoked when we've managed to change our
1800 : * DB's datfrozenxid/datminmxid values, or we found that the shared
1801 : * XID-wrap-limit info is stale.
1802 : */
1803 : static void
1804 634 : vac_truncate_clog(TransactionId frozenXID,
1805 : MultiXactId minMulti,
1806 : TransactionId lastSaneFrozenXid,
1807 : MultiXactId lastSaneMinMulti)
1808 : {
1809 634 : TransactionId nextXID = ReadNextTransactionId();
1810 : Relation relation;
1811 : TableScanDesc scan;
1812 : HeapTuple tuple;
1813 : Oid oldestxid_datoid;
1814 : Oid minmulti_datoid;
1815 634 : bool bogus = false;
1816 634 : bool frozenAlreadyWrapped = false;
1817 :
1818 : /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
1819 634 : LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
1820 :
1821 : /* init oldest datoids to sync with my frozenXID/minMulti values */
1822 634 : oldestxid_datoid = MyDatabaseId;
1823 634 : minmulti_datoid = MyDatabaseId;
1824 :
1825 : /*
1826 : * Scan pg_database to compute the minimum datfrozenxid/datminmxid
1827 : *
1828 : * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
1829 : * the values could change while we look at them. Fetch each one just
1830 : * once to ensure sane behavior of the comparison logic. (Here, as in
1831 : * many other places, we assume that fetching or updating an XID in shared
1832 : * storage is atomic.)
1833 : *
1834 : * Note: we need not worry about a race condition with new entries being
1835 : * inserted by CREATE DATABASE. Any such entry will have a copy of some
1836 : * existing DB's datfrozenxid, and that source DB cannot be ours because
1837 : * of the interlock against copying a DB containing an active backend.
1838 : * Hence the new entry will not reduce the minimum. Also, if two VACUUMs
1839 : * concurrently modify the datfrozenxid's of different databases, the
1840 : * worst possible outcome is that pg_xact is not truncated as aggressively
1841 : * as it could be.
1842 : */
1843 634 : relation = table_open(DatabaseRelationId, AccessShareLock);
1844 :
1845 634 : scan = table_beginscan_catalog(relation, 0, NULL);
1846 :
1847 1344 : while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
1848 : {
1849 710 : volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
1850 710 : TransactionId datfrozenxid = dbform->datfrozenxid;
1851 710 : TransactionId datminmxid = dbform->datminmxid;
1852 :
1853 : Assert(TransactionIdIsNormal(datfrozenxid));
1854 : Assert(MultiXactIdIsValid(datminmxid));
1855 :
1856 : /*
1857 : * If things are working properly, no database should have a
1858 : * datfrozenxid or datminmxid that is "in the future". However, such
1859 : * cases have been known to arise due to bugs in pg_upgrade. If we
1860 : * see any entries that are "in the future", chicken out and don't do
1861 : * anything. This ensures we won't truncate clog before those
1862 : * databases have been scanned and cleaned up. (We will issue the
1863 : * "already wrapped" warning if appropriate, though.)
1864 : */
1865 1420 : if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
1866 710 : MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
1867 0 : bogus = true;
1868 :
1869 710 : if (TransactionIdPrecedes(nextXID, datfrozenxid))
1870 0 : frozenAlreadyWrapped = true;
1871 710 : else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
1872 : {
1873 32 : frozenXID = datfrozenxid;
1874 32 : oldestxid_datoid = dbform->oid;
1875 : }
1876 :
1877 710 : if (MultiXactIdPrecedes(datminmxid, minMulti))
1878 : {
1879 0 : minMulti = datminmxid;
1880 0 : minmulti_datoid = dbform->oid;
1881 : }
1882 : }
1883 :
1884 634 : table_endscan(scan);
1885 :
1886 634 : table_close(relation, AccessShareLock);
1887 :
1888 : /*
1889 : * Do not truncate CLOG if we seem to have suffered wraparound already;
1890 : * the computed minimum XID might be bogus. This case should now be
1891 : * impossible due to the defenses in GetNewTransactionId, but we keep the
1892 : * test anyway.
1893 : */
1894 634 : if (frozenAlreadyWrapped)
1895 : {
1896 0 : ereport(WARNING,
1897 : (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
1898 : errdetail("You might have already suffered transaction-wraparound data loss.")));
1899 0 : return;
1900 : }
1901 :
1902 : /* chicken out if data is bogus in any other way */
1903 634 : if (bogus)
1904 0 : return;
1905 :
1906 : /*
1907 : * Advance the oldest value for commit timestamps before truncating, so
1908 : * that if a user requests a timestamp for a transaction we're truncating
1909 : * away right after this point, they get NULL instead of an ugly "file not
1910 : * found" error from slru.c. This doesn't matter for xact/multixact
1911 : * because they are not subject to arbitrary lookups from users.
1912 : */
1913 634 : AdvanceOldestCommitTsXid(frozenXID);
1914 :
1915 : /*
1916 : * Truncate CLOG, multixact and CommitTs to the oldest computed value.
1917 : */
1918 634 : TruncateCLOG(frozenXID, oldestxid_datoid);
1919 634 : TruncateCommitTs(frozenXID);
1920 634 : TruncateMultiXact(minMulti, minmulti_datoid);
1921 :
1922 : /*
1923 : * Update the wrap limit for GetNewTransactionId and creation of new
1924 : * MultiXactIds. Note: these functions will also signal the postmaster
1925 : * for an(other) autovac cycle if needed. XXX should we avoid possibly
1926 : * signaling twice?
1927 : */
1928 634 : SetTransactionIdLimit(frozenXID, oldestxid_datoid);
1929 634 : SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
1930 :
1931 634 : LWLockRelease(WrapLimitsVacuumLock);
1932 : }
1933 :
1934 :
1935 : /*
1936 : * vacuum_rel() -- vacuum one heap relation
1937 : *
1938 : * relid identifies the relation to vacuum. If relation is supplied,
1939 : * use the name therein for reporting any failure to open/lock the rel;
1940 : * do not use it once we've successfully opened the rel, since it might
1941 : * be stale.
1942 : *
1943 : * Returns true if it's okay to proceed with a requested ANALYZE
1944 : * operation on this table.
1945 : *
1946 : * Doing one heap at a time incurs extra overhead, since we need to
1947 : * check that the heap exists again just before we vacuum it. The
1948 : * reason that we do this is so that vacuuming can be spread across
1949 : * many small transactions. Otherwise, two-phase locking would require
1950 : * us to lock the entire database during one pass of the vacuum cleaner.
1951 : *
1952 : * At entry and exit, we are not inside a transaction.
1953 : */
1954 : static bool
1955 74190 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
1956 : bool skip_privs, BufferAccessStrategy bstrategy)
1957 : {
1958 : LOCKMODE lmode;
1959 : Relation rel;
1960 : LockRelId lockrelid;
1961 : Oid toast_relid;
1962 : Oid save_userid;
1963 : int save_sec_context;
1964 : int save_nestlevel;
1965 :
1966 : Assert(params != NULL);
1967 :
1968 : /* Begin a transaction for vacuuming this relation */
1969 74190 : StartTransactionCommand();
1970 :
1971 74190 : if (!(params->options & VACOPT_FULL))
1972 : {
1973 : /*
1974 : * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
1975 : * other concurrent VACUUMs know that they can ignore this one while
1976 : * determining their OldestXmin. (The reason we don't set it during a
1977 : * full VACUUM is exactly that we may have to run user-defined
1978 : * functions for functional indexes, and we want to make sure that if
1979 : * they use the snapshot set above, any tuples it requires can't get
1980 : * removed from other tables. An index function that depends on the
1981 : * contents of other tables is arguably broken, but we won't break it
1982 : * here by violating transaction semantics.)
1983 : *
1984 : * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
1985 : * autovacuum; it's used to avoid canceling a vacuum that was invoked
1986 : * in an emergency.
1987 : *
1988 : * Note: these flags remain set until CommitTransaction or
1989 : * AbortTransaction. We don't want to clear them until we reset
1990 : * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
1991 : * might appear to go backwards, which is probably Not Good. (We also
1992 : * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
1993 : * xmin doesn't become visible ahead of setting the flag.)
1994 : */
1995 73828 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
1996 73828 : MyProc->statusFlags |= PROC_IN_VACUUM;
1997 73828 : if (params->is_wraparound)
1998 0 : MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
1999 73828 : ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
2000 73828 : LWLockRelease(ProcArrayLock);
2001 : }
2002 :
2003 : /*
2004 : * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
2005 : * cutoff xids in local memory wrapping around, and to have updated xmin
2006 : * horizons.
2007 : */
2008 74190 : PushActiveSnapshot(GetTransactionSnapshot());
2009 :
2010 : /*
2011 : * Check for user-requested abort. Note we want this to be inside a
2012 : * transaction, so xact.c doesn't issue useless WARNING.
2013 : */
2014 74190 : CHECK_FOR_INTERRUPTS();
2015 :
2016 : /*
2017 : * Determine the type of lock we want --- hard exclusive lock for a FULL
2018 : * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
2019 : * way, we can be sure that no other backend is vacuuming the same table.
2020 : */
2021 148380 : lmode = (params->options & VACOPT_FULL) ?
2022 74190 : AccessExclusiveLock : ShareUpdateExclusiveLock;
2023 :
2024 : /* open the relation and get the appropriate lock on it */
2025 74190 : rel = vacuum_open_relation(relid, relation, params->options,
2026 74190 : params->log_min_duration >= 0, lmode);
2027 :
2028 : /* leave if relation could not be opened or locked */
2029 74190 : if (!rel)
2030 : {
2031 24 : PopActiveSnapshot();
2032 24 : CommitTransactionCommand();
2033 24 : return false;
2034 : }
2035 :
2036 : /*
2037 : * Check if relation needs to be skipped based on privileges. This check
2038 : * happens also when building the relation list to vacuum for a manual
2039 : * operation, and needs to be done additionally here as VACUUM could
2040 : * happen across multiple transactions where privileges could have changed
2041 : * in-between. Make sure to only generate logs for VACUUM in this case.
2042 : */
2043 74166 : if (!skip_privs &&
2044 47096 : !vacuum_is_permitted_for_relation(RelationGetRelid(rel),
2045 : rel->rd_rel,
2046 47096 : params->options & VACOPT_VACUUM))
2047 : {
2048 36 : relation_close(rel, lmode);
2049 36 : PopActiveSnapshot();
2050 36 : CommitTransactionCommand();
2051 36 : return false;
2052 : }
2053 :
2054 : /*
2055 : * Check that it's of a vacuumable relkind.
2056 : */
2057 74130 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
2058 27234 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
2059 27226 : rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
2060 152 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2061 : {
2062 2 : ereport(WARNING,
2063 : (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
2064 : RelationGetRelationName(rel))));
2065 2 : relation_close(rel, lmode);
2066 2 : PopActiveSnapshot();
2067 2 : CommitTransactionCommand();
2068 2 : return false;
2069 : }
2070 :
2071 : /*
2072 : * Silently ignore tables that are temp tables of other backends ---
2073 : * trying to vacuum these will lead to great unhappiness, since their
2074 : * contents are probably not up-to-date on disk. (We don't throw a
2075 : * warning here; it would just lead to chatter during a database-wide
2076 : * VACUUM.)
2077 : */
2078 74128 : if (RELATION_IS_OTHER_TEMP(rel))
2079 : {
2080 0 : relation_close(rel, lmode);
2081 0 : PopActiveSnapshot();
2082 0 : CommitTransactionCommand();
2083 0 : return false;
2084 : }
2085 :
2086 : /*
2087 : * Silently ignore partitioned tables as there is no work to be done. The
2088 : * useful work is on their child partitions, which have been queued up for
2089 : * us separately.
2090 : */
2091 74128 : if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
2092 : {
2093 150 : relation_close(rel, lmode);
2094 150 : PopActiveSnapshot();
2095 150 : CommitTransactionCommand();
2096 : /* It's OK to proceed with ANALYZE on this table */
2097 150 : return true;
2098 : }
2099 :
2100 : /*
2101 : * Get a session-level lock too. This will protect our access to the
2102 : * relation across multiple transactions, so that we can vacuum the
2103 : * relation's TOAST table (if any) secure in the knowledge that no one is
2104 : * deleting the parent relation.
2105 : *
2106 : * NOTE: this cannot block, even if someone else is waiting for access,
2107 : * because the lock manager knows that both lock requests are from the
2108 : * same process.
2109 : */
2110 73978 : lockrelid = rel->rd_lockInfo.lockRelId;
2111 73978 : LockRelationIdForSession(&lockrelid, lmode);
2112 :
2113 : /*
2114 : * Set index_cleanup option based on index_cleanup reloption if it wasn't
2115 : * specified in VACUUM command, or when running in an autovacuum worker
2116 : */
2117 73978 : if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
2118 : {
2119 : StdRdOptIndexCleanup vacuum_index_cleanup;
2120 :
2121 4920 : if (rel->rd_options == NULL)
2122 4660 : vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
2123 : else
2124 260 : vacuum_index_cleanup =
2125 260 : ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
2126 :
2127 4920 : if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
2128 4896 : params->index_cleanup = VACOPTVALUE_AUTO;
2129 24 : else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
2130 12 : params->index_cleanup = VACOPTVALUE_ENABLED;
2131 : else
2132 : {
2133 : Assert(vacuum_index_cleanup ==
2134 : STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
2135 12 : params->index_cleanup = VACOPTVALUE_DISABLED;
2136 : }
2137 : }
2138 :
2139 : /*
2140 : * Set truncate option based on truncate reloption if it wasn't specified
2141 : * in VACUUM command, or when running in an autovacuum worker
2142 : */
2143 73978 : if (params->truncate == VACOPTVALUE_UNSPECIFIED)
2144 : {
2145 4944 : if (rel->rd_options == NULL ||
2146 260 : ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
2147 4938 : params->truncate = VACOPTVALUE_ENABLED;
2148 : else
2149 6 : params->truncate = VACOPTVALUE_DISABLED;
2150 : }
2151 :
2152 : /*
2153 : * Remember the relation's TOAST relation for later, if the caller asked
2154 : * us to process it. In VACUUM FULL, though, the toast table is
2155 : * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
2156 : * unless PROCESS_MAIN is disabled.
2157 : */
2158 73978 : if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
2159 73644 : ((params->options & VACOPT_FULL) == 0 ||
2160 334 : (params->options & VACOPT_PROCESS_MAIN) == 0))
2161 73316 : toast_relid = rel->rd_rel->reltoastrelid;
2162 : else
2163 662 : toast_relid = InvalidOid;
2164 :
2165 : /*
2166 : * Switch to the table owner's userid, so that any index functions are run
2167 : * as that user. Also lock down security-restricted operations and
2168 : * arrange to make GUC variable changes local to this command. (This is
2169 : * unnecessary, but harmless, for lazy VACUUM.)
2170 : */
2171 73978 : GetUserIdAndSecContext(&save_userid, &save_sec_context);
2172 73978 : SetUserIdAndSecContext(rel->rd_rel->relowner,
2173 : save_sec_context | SECURITY_RESTRICTED_OPERATION);
2174 73978 : save_nestlevel = NewGUCNestLevel();
2175 :
2176 : /*
2177 : * If PROCESS_MAIN is set (the default), it's time to vacuum the main
2178 : * relation. Otherwise, we can skip this part. If processing the TOAST
2179 : * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
2180 : * to be set when we recurse to the TOAST table.
2181 : */
2182 73978 : if (params->options & VACOPT_PROCESS_MAIN)
2183 : {
2184 : /*
2185 : * Do the actual work --- either FULL or "lazy" vacuum
2186 : */
2187 73824 : if (params->options & VACOPT_FULL)
2188 : {
2189 328 : ClusterParams cluster_params = {0};
2190 :
2191 : /* close relation before vacuuming, but hold lock until commit */
2192 328 : relation_close(rel, NoLock);
2193 328 : rel = NULL;
2194 :
2195 328 : if ((params->options & VACOPT_VERBOSE) != 0)
2196 0 : cluster_params.options |= CLUOPT_VERBOSE;
2197 :
2198 : /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
2199 328 : cluster_rel(relid, InvalidOid, &cluster_params);
2200 : }
2201 : else
2202 73496 : table_relation_vacuum(rel, params, bstrategy);
2203 : }
2204 :
2205 : /* Roll back any GUC changes executed by index functions */
2206 73972 : AtEOXact_GUC(false, save_nestlevel);
2207 :
2208 : /* Restore userid and security context */
2209 73972 : SetUserIdAndSecContext(save_userid, save_sec_context);
2210 :
2211 : /* all done with this class, but hold lock until commit */
2212 73972 : if (rel)
2213 73650 : relation_close(rel, NoLock);
2214 :
2215 : /*
2216 : * Complete the transaction and free all temporary memory used.
2217 : */
2218 73972 : PopActiveSnapshot();
2219 73972 : CommitTransactionCommand();
2220 :
2221 : /*
2222 : * If the relation has a secondary toast rel, vacuum that too while we
2223 : * still hold the session lock on the main table. Note however that
2224 : * "analyze" will not get done on the toast table. This is good, because
2225 : * the toaster always uses hardcoded index access and statistics are
2226 : * totally unimportant for toast relations.
2227 : */
2228 73972 : if (toast_relid != InvalidOid)
2229 : {
2230 : VacuumParams toast_vacuum_params;
2231 :
2232 : /* force VACOPT_PROCESS_MAIN so vacuum_rel() processes it */
2233 27070 : memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
2234 27070 : toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
2235 :
2236 27070 : vacuum_rel(toast_relid, NULL, &toast_vacuum_params, true, bstrategy);
2237 : }
2238 :
2239 : /*
2240 : * Now release the session-level lock on the main table.
2241 : */
2242 73972 : UnlockRelationIdForSession(&lockrelid, lmode);
2243 :
2244 : /* Report that we really did it. */
2245 73972 : return true;
2246 : }
2247 :
2248 :
2249 : /*
2250 : * Open all the vacuumable indexes of the given relation, obtaining the
2251 : * specified kind of lock on each. Return an array of Relation pointers for
2252 : * the indexes into *Irel, and the number of indexes into *nindexes.
2253 : *
2254 : * We consider an index vacuumable if it is marked insertable (indisready).
2255 : * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
2256 : * execution, and what we have is too corrupt to be processable. We will
2257 : * vacuum even if the index isn't indisvalid; this is important because in a
2258 : * unique index, uniqueness checks will be performed anyway and had better not
2259 : * hit dangling index pointers.
2260 : */
2261 : void
2262 120134 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
2263 : int *nindexes, Relation **Irel)
2264 : {
2265 : List *indexoidlist;
2266 : ListCell *indexoidscan;
2267 : int i;
2268 :
2269 : Assert(lockmode != NoLock);
2270 :
2271 120134 : indexoidlist = RelationGetIndexList(relation);
2272 :
2273 : /* allocate enough memory for all indexes */
2274 120134 : i = list_length(indexoidlist);
2275 :
2276 120134 : if (i > 0)
2277 111682 : *Irel = (Relation *) palloc(i * sizeof(Relation));
2278 : else
2279 8452 : *Irel = NULL;
2280 :
2281 : /* collect just the ready indexes */
2282 120134 : i = 0;
2283 310816 : foreach(indexoidscan, indexoidlist)
2284 : {
2285 190682 : Oid indexoid = lfirst_oid(indexoidscan);
2286 : Relation indrel;
2287 :
2288 190682 : indrel = index_open(indexoid, lockmode);
2289 190682 : if (indrel->rd_index->indisready)
2290 190682 : (*Irel)[i++] = indrel;
2291 : else
2292 0 : index_close(indrel, lockmode);
2293 : }
2294 :
2295 120134 : *nindexes = i;
2296 :
2297 120134 : list_free(indexoidlist);
2298 120134 : }
2299 :
2300 : /*
2301 : * Release the resources acquired by vac_open_indexes. Optionally release
2302 : * the locks (say NoLock to keep 'em).
2303 : */
2304 : void
2305 120804 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
2306 : {
2307 120804 : if (Irel == NULL)
2308 9130 : return;
2309 :
2310 302342 : while (nindexes--)
2311 : {
2312 190668 : Relation ind = Irel[nindexes];
2313 :
2314 190668 : index_close(ind, lockmode);
2315 : }
2316 111674 : pfree(Irel);
2317 : }
2318 :
2319 : /*
2320 : * vacuum_delay_point --- check for interrupts and cost-based delay.
2321 : *
2322 : * This should be called in each major loop of VACUUM processing,
2323 : * typically once per page processed.
2324 : */
2325 : void
2326 234706102 : vacuum_delay_point(void)
2327 : {
2328 234706102 : double msec = 0;
2329 :
2330 : /* Always check for interrupts */
2331 234706102 : CHECK_FOR_INTERRUPTS();
2332 :
2333 234706102 : if (InterruptPending ||
2334 234706102 : (!VacuumCostActive && !ConfigReloadPending))
2335 229711414 : return;
2336 :
2337 : /*
2338 : * Autovacuum workers should reload the configuration file if requested.
2339 : * This allows changes to [autovacuum_]vacuum_cost_limit and
2340 : * [autovacuum_]vacuum_cost_delay to take effect while a table is being
2341 : * vacuumed or analyzed.
2342 : */
2343 4994688 : if (ConfigReloadPending && IsAutoVacuumWorkerProcess())
2344 : {
2345 0 : ConfigReloadPending = false;
2346 0 : ProcessConfigFile(PGC_SIGHUP);
2347 0 : VacuumUpdateCosts();
2348 : }
2349 :
2350 : /*
2351 : * If we disabled cost-based delays after reloading the config file,
2352 : * return.
2353 : */
2354 4994688 : if (!VacuumCostActive)
2355 0 : return;
2356 :
2357 : /*
2358 : * For parallel vacuum, the delay is computed based on the shared cost
2359 : * balance. See compute_parallel_delay.
2360 : */
2361 4994688 : if (VacuumSharedCostBalance != NULL)
2362 0 : msec = compute_parallel_delay();
2363 4994688 : else if (VacuumCostBalance >= vacuum_cost_limit)
2364 716 : msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
2365 :
2366 : /* Nap if appropriate */
2367 4994688 : if (msec > 0)
2368 : {
2369 716 : if (msec > vacuum_cost_delay * 4)
2370 10 : msec = vacuum_cost_delay * 4;
2371 :
2372 716 : pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
2373 716 : pg_usleep(msec * 1000);
2374 716 : pgstat_report_wait_end();
2375 :
2376 : /*
2377 : * We don't want to ignore postmaster death during very long vacuums
2378 : * with vacuum_cost_delay configured. We can't use the usual
2379 : * WaitLatch() approach here because we want microsecond-based sleep
2380 : * durations above.
2381 : */
2382 716 : if (IsUnderPostmaster && !PostmasterIsAlive())
2383 0 : exit(1);
2384 :
2385 716 : VacuumCostBalance = 0;
2386 :
2387 : /*
2388 : * Balance and update limit values for autovacuum workers. We must do
2389 : * this periodically, as the number of workers across which we are
2390 : * balancing the limit may have changed.
2391 : *
2392 : * TODO: There may be better criteria for determining when to do this
2393 : * besides "check after napping".
2394 : */
2395 716 : AutoVacuumUpdateCostLimit();
2396 :
2397 : /* Might have gotten an interrupt while sleeping */
2398 716 : CHECK_FOR_INTERRUPTS();
2399 : }
2400 : }
2401 :
2402 : /*
2403 : * Computes the vacuum delay for parallel workers.
2404 : *
2405 : * The basic idea of a cost-based delay for parallel vacuum is to allow each
2406 : * worker to sleep in proportion to the share of work it's done. We achieve this
2407 : * by allowing all parallel vacuum workers including the leader process to
2408 : * have a shared view of cost related parameters (mainly VacuumCostBalance).
2409 : * We allow each worker to update it as and when it has incurred any cost and
2410 : * then based on that decide whether it needs to sleep. We compute the time
2411 : * to sleep for a worker based on the cost it has incurred
2412 : * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
2413 : * that amount. This avoids putting to sleep those workers which have done less
2414 : * I/O than other workers and therefore ensure that workers
2415 : * which are doing more I/O got throttled more.
2416 : *
2417 : * We allow a worker to sleep only if it has performed I/O above a certain
2418 : * threshold, which is calculated based on the number of active workers
2419 : * (VacuumActiveNWorkers), and the overall cost balance is more than
2420 : * VacuumCostLimit set by the system. Testing reveals that we achieve
2421 : * the required throttling if we force a worker that has done more than 50%
2422 : * of its share of work to sleep.
2423 : */
2424 : static double
2425 0 : compute_parallel_delay(void)
2426 : {
2427 0 : double msec = 0;
2428 : uint32 shared_balance;
2429 : int nworkers;
2430 :
2431 : /* Parallel vacuum must be active */
2432 : Assert(VacuumSharedCostBalance);
2433 :
2434 0 : nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
2435 :
2436 : /* At least count itself */
2437 : Assert(nworkers >= 1);
2438 :
2439 : /* Update the shared cost balance value atomically */
2440 0 : shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
2441 :
2442 : /* Compute the total local balance for the current worker */
2443 0 : VacuumCostBalanceLocal += VacuumCostBalance;
2444 :
2445 0 : if ((shared_balance >= vacuum_cost_limit) &&
2446 0 : (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
2447 : {
2448 : /* Compute sleep time based on the local cost balance */
2449 0 : msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
2450 0 : pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
2451 0 : VacuumCostBalanceLocal = 0;
2452 : }
2453 :
2454 : /*
2455 : * Reset the local balance as we accumulated it into the shared value.
2456 : */
2457 0 : VacuumCostBalance = 0;
2458 :
2459 0 : return msec;
2460 : }
2461 :
2462 : /*
2463 : * A wrapper function of defGetBoolean().
2464 : *
2465 : * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
2466 : * of true and false.
2467 : */
2468 : static VacOptValue
2469 314 : get_vacoptval_from_boolean(DefElem *def)
2470 : {
2471 314 : return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
2472 : }
2473 :
2474 : /*
2475 : * vac_bulkdel_one_index() -- bulk-deletion for index relation.
2476 : *
2477 : * Returns bulk delete stats derived from input stats
2478 : */
2479 : IndexBulkDeleteResult *
2480 7590 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
2481 : VacDeadItems *dead_items)
2482 : {
2483 : /* Do bulk deletion */
2484 7590 : istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
2485 : (void *) dead_items);
2486 :
2487 7590 : ereport(ivinfo->message_level,
2488 : (errmsg("scanned index \"%s\" to remove %d row versions",
2489 : RelationGetRelationName(ivinfo->index),
2490 : dead_items->num_items)));
2491 :
2492 7590 : return istat;
2493 : }
2494 :
2495 : /*
2496 : * vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
2497 : *
2498 : * Returns bulk delete stats derived from input stats
2499 : */
2500 : IndexBulkDeleteResult *
2501 109312 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
2502 : {
2503 109312 : istat = index_vacuum_cleanup(ivinfo, istat);
2504 :
2505 109312 : if (istat)
2506 7864 : ereport(ivinfo->message_level,
2507 : (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
2508 : RelationGetRelationName(ivinfo->index),
2509 : istat->num_index_tuples,
2510 : istat->num_pages),
2511 : errdetail("%.0f index row versions were removed.\n"
2512 : "%u index pages were newly deleted.\n"
2513 : "%u index pages are currently deleted, of which %u are currently reusable.",
2514 : istat->tuples_removed,
2515 : istat->pages_newly_deleted,
2516 : istat->pages_deleted, istat->pages_free)));
2517 :
2518 109312 : return istat;
2519 : }
2520 :
2521 : /*
2522 : * Returns the total required space for VACUUM's dead_items array given a
2523 : * max_items value.
2524 : */
2525 : Size
2526 73496 : vac_max_items_to_alloc_size(int max_items)
2527 : {
2528 : Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
2529 :
2530 73496 : return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
2531 : }
2532 :
2533 : /*
2534 : * vac_tid_reaped() -- is a particular tid deletable?
2535 : *
2536 : * This has the right signature to be an IndexBulkDeleteCallback.
2537 : *
2538 : * Assumes dead_items array is sorted (in ascending TID order).
2539 : */
2540 : static bool
2541 16156432 : vac_tid_reaped(ItemPointer itemptr, void *state)
2542 : {
2543 16156432 : VacDeadItems *dead_items = (VacDeadItems *) state;
2544 : int64 litem,
2545 : ritem,
2546 : item;
2547 : ItemPointer res;
2548 :
2549 16156432 : litem = itemptr_encode(&dead_items->items[0]);
2550 16156432 : ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
2551 16156432 : item = itemptr_encode(itemptr);
2552 :
2553 : /*
2554 : * Doing a simple bound check before bsearch() is useful to avoid the
2555 : * extra cost of bsearch(), especially if dead items on the heap are
2556 : * concentrated in a certain range. Since this function is called for
2557 : * every index tuple, it pays to be really fast.
2558 : */
2559 16156432 : if (item < litem || item > ritem)
2560 7144938 : return false;
2561 :
2562 9011494 : res = (ItemPointer) bsearch(itemptr,
2563 9011494 : dead_items->items,
2564 9011494 : dead_items->num_items,
2565 : sizeof(ItemPointerData),
2566 : vac_cmp_itemptr);
2567 :
2568 9011494 : return (res != NULL);
2569 : }
2570 :
2571 : /*
2572 : * Comparator routines for use with qsort() and bsearch().
2573 : */
2574 : static int
2575 88144246 : vac_cmp_itemptr(const void *left, const void *right)
2576 : {
2577 : BlockNumber lblk,
2578 : rblk;
2579 : OffsetNumber loff,
2580 : roff;
2581 :
2582 88144246 : lblk = ItemPointerGetBlockNumber((ItemPointer) left);
2583 88144246 : rblk = ItemPointerGetBlockNumber((ItemPointer) right);
2584 :
2585 88144246 : if (lblk < rblk)
2586 28655538 : return -1;
2587 59488708 : if (lblk > rblk)
2588 29250668 : return 1;
2589 :
2590 30238040 : loff = ItemPointerGetOffsetNumber((ItemPointer) left);
2591 30238040 : roff = ItemPointerGetOffsetNumber((ItemPointer) right);
2592 :
2593 30238040 : if (loff < roff)
2594 14348116 : return -1;
2595 15889924 : if (loff > roff)
2596 12733422 : return 1;
2597 :
2598 3156502 : return 0;
2599 : }
|