LCOV - code coverage report
Current view: top level - src/backend/commands - vacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 679 762 89.1 %
Date: 2025-12-11 21:18:53 Functions: 21 22 95.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuum.c
       4             :  *    The postgres vacuum cleaner.
       5             :  *
       6             :  * This file includes (a) control and dispatch code for VACUUM and ANALYZE
       7             :  * commands, (b) code to compute various vacuum thresholds, and (c) index
       8             :  * vacuum code.
       9             :  *
      10             :  * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
      11             :  * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
      12             :  * CLUSTER, handled in cluster.c.
      13             :  *
      14             :  *
      15             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      16             :  * Portions Copyright (c) 1994, Regents of the University of California
      17             :  *
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/commands/vacuum.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "access/clog.h"
      29             : #include "access/commit_ts.h"
      30             : #include "access/genam.h"
      31             : #include "access/heapam.h"
      32             : #include "access/htup_details.h"
      33             : #include "access/multixact.h"
      34             : #include "access/tableam.h"
      35             : #include "access/transam.h"
      36             : #include "access/xact.h"
      37             : #include "catalog/namespace.h"
      38             : #include "catalog/pg_database.h"
      39             : #include "catalog/pg_inherits.h"
      40             : #include "commands/async.h"
      41             : #include "commands/cluster.h"
      42             : #include "commands/defrem.h"
      43             : #include "commands/progress.h"
      44             : #include "commands/vacuum.h"
      45             : #include "miscadmin.h"
      46             : #include "nodes/makefuncs.h"
      47             : #include "pgstat.h"
      48             : #include "postmaster/autovacuum.h"
      49             : #include "postmaster/bgworker_internals.h"
      50             : #include "postmaster/interrupt.h"
      51             : #include "storage/bufmgr.h"
      52             : #include "storage/lmgr.h"
      53             : #include "storage/pmsignal.h"
      54             : #include "storage/proc.h"
      55             : #include "storage/procarray.h"
      56             : #include "utils/acl.h"
      57             : #include "utils/fmgroids.h"
      58             : #include "utils/guc.h"
      59             : #include "utils/guc_hooks.h"
      60             : #include "utils/injection_point.h"
      61             : #include "utils/memutils.h"
      62             : #include "utils/snapmgr.h"
      63             : #include "utils/syscache.h"
      64             : 
      65             : /*
      66             :  * Minimum interval for cost-based vacuum delay reports from a parallel worker.
      67             :  * This aims to avoid sending too many messages and waking up the leader too
      68             :  * frequently.
      69             :  */
      70             : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS    (NS_PER_S)
      71             : 
      72             : /*
      73             :  * GUC parameters
      74             :  */
      75             : int         vacuum_freeze_min_age;
      76             : int         vacuum_freeze_table_age;
      77             : int         vacuum_multixact_freeze_min_age;
      78             : int         vacuum_multixact_freeze_table_age;
      79             : int         vacuum_failsafe_age;
      80             : int         vacuum_multixact_failsafe_age;
      81             : double      vacuum_max_eager_freeze_failure_rate;
      82             : bool        track_cost_delay_timing;
      83             : bool        vacuum_truncate;
      84             : 
      85             : /*
      86             :  * Variables for cost-based vacuum delay. The defaults differ between
      87             :  * autovacuum and vacuum. They should be set with the appropriate GUC value in
      88             :  * vacuum code. They are initialized here to the defaults for client backends
      89             :  * executing VACUUM or ANALYZE.
      90             :  */
      91             : double      vacuum_cost_delay = 0;
      92             : int         vacuum_cost_limit = 200;
      93             : 
      94             : /* Variable for reporting cost-based vacuum delay from parallel workers. */
      95             : int64       parallel_vacuum_worker_delay_ns = 0;
      96             : 
      97             : /*
      98             :  * VacuumFailsafeActive is a defined as a global so that we can determine
      99             :  * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
     100             :  * If failsafe mode has been engaged, we will not re-enable cost-based delay
     101             :  * for the table until after vacuuming has completed, regardless of other
     102             :  * settings.
     103             :  *
     104             :  * Only VACUUM code should inspect this variable and only table access methods
     105             :  * should set it to true. In Table AM-agnostic VACUUM code, this variable is
     106             :  * inspected to determine whether or not to allow cost-based delays. Table AMs
     107             :  * are free to set it if they desire this behavior, but it is false by default
     108             :  * and reset to false in between vacuuming each relation.
     109             :  */
     110             : bool        VacuumFailsafeActive = false;
     111             : 
     112             : /*
     113             :  * Variables for cost-based parallel vacuum.  See comments atop
     114             :  * compute_parallel_delay to understand how it works.
     115             :  */
     116             : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
     117             : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
     118             : int         VacuumCostBalanceLocal = 0;
     119             : 
     120             : /* non-export function prototypes */
     121             : static List *expand_vacuum_rel(VacuumRelation *vrel,
     122             :                                MemoryContext vac_context, int options);
     123             : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
     124             : static void vac_truncate_clog(TransactionId frozenXID,
     125             :                               MultiXactId minMulti,
     126             :                               TransactionId lastSaneFrozenXid,
     127             :                               MultiXactId lastSaneMinMulti);
     128             : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
     129             :                        BufferAccessStrategy bstrategy);
     130             : static double compute_parallel_delay(void);
     131             : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
     132             : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
     133             : 
     134             : /*
     135             :  * GUC check function to ensure GUC value specified is within the allowable
     136             :  * range.
     137             :  */
     138             : bool
     139        2302 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
     140             :                                 GucSource source)
     141             : {
     142             :     /* Value upper and lower hard limits are inclusive */
     143        2302 :     if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
     144        2302 :                          *newval <= MAX_BAS_VAC_RING_SIZE_KB))
     145        2302 :         return true;
     146             : 
     147             :     /* Value does not fall within any allowable range */
     148           0 :     GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
     149             :                         "vacuum_buffer_usage_limit",
     150             :                         MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
     151             : 
     152           0 :     return false;
     153             : }
     154             : 
     155             : /*
     156             :  * Primary entry point for manual VACUUM and ANALYZE commands
     157             :  *
     158             :  * This is mainly a preparation wrapper for the real operations that will
     159             :  * happen in vacuum().
     160             :  */
     161             : void
     162       14794 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
     163             : {
     164             :     VacuumParams params;
     165       14794 :     BufferAccessStrategy bstrategy = NULL;
     166       14794 :     bool        verbose = false;
     167       14794 :     bool        skip_locked = false;
     168       14794 :     bool        analyze = false;
     169       14794 :     bool        freeze = false;
     170       14794 :     bool        full = false;
     171       14794 :     bool        disable_page_skipping = false;
     172       14794 :     bool        process_main = true;
     173       14794 :     bool        process_toast = true;
     174             :     int         ring_size;
     175       14794 :     bool        skip_database_stats = false;
     176       14794 :     bool        only_database_stats = false;
     177             :     MemoryContext vac_context;
     178             :     ListCell   *lc;
     179             : 
     180             :     /* index_cleanup and truncate values unspecified for now */
     181       14794 :     params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
     182       14794 :     params.truncate = VACOPTVALUE_UNSPECIFIED;
     183             : 
     184             :     /* By default parallel vacuum is enabled */
     185       14794 :     params.nworkers = 0;
     186             : 
     187             :     /* Will be set later if we recurse to a TOAST table. */
     188       14794 :     params.toast_parent = InvalidOid;
     189             : 
     190             :     /*
     191             :      * Set this to an invalid value so it is clear whether or not a
     192             :      * BUFFER_USAGE_LIMIT was specified when making the access strategy.
     193             :      */
     194       14794 :     ring_size = -1;
     195             : 
     196             :     /* Parse options list */
     197       31008 :     foreach(lc, vacstmt->options)
     198             :     {
     199       16250 :         DefElem    *opt = (DefElem *) lfirst(lc);
     200             : 
     201             :         /* Parse common options for VACUUM and ANALYZE */
     202       16250 :         if (strcmp(opt->defname, "verbose") == 0)
     203          42 :             verbose = defGetBoolean(opt);
     204       16208 :         else if (strcmp(opt->defname, "skip_locked") == 0)
     205         334 :             skip_locked = defGetBoolean(opt);
     206       15874 :         else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
     207             :         {
     208             :             const char *hintmsg;
     209             :             int         result;
     210             :             char       *vac_buffer_size;
     211             : 
     212          54 :             vac_buffer_size = defGetString(opt);
     213             : 
     214             :             /*
     215             :              * Check that the specified value is valid and the size falls
     216             :              * within the hard upper and lower limits if it is not 0.
     217             :              */
     218          54 :             if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
     219          48 :                 (result != 0 &&
     220          36 :                  (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
     221             :             {
     222          18 :                 ereport(ERROR,
     223             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     224             :                          errmsg("%s option must be 0 or between %d kB and %d kB",
     225             :                                 "BUFFER_USAGE_LIMIT",
     226             :                                 MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
     227             :                          hintmsg ? errhint_internal("%s", _(hintmsg)) : 0));
     228             :             }
     229             : 
     230          36 :             ring_size = result;
     231             :         }
     232       15820 :         else if (!vacstmt->is_vacuumcmd)
     233           6 :             ereport(ERROR,
     234             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     235             :                      errmsg("unrecognized %s option \"%s\"",
     236             :                             "ANALYZE", opt->defname),
     237             :                      parser_errposition(pstate, opt->location)));
     238             : 
     239             :         /* Parse options available on VACUUM */
     240       15814 :         else if (strcmp(opt->defname, "analyze") == 0)
     241        3034 :             analyze = defGetBoolean(opt);
     242       12780 :         else if (strcmp(opt->defname, "freeze") == 0)
     243        3098 :             freeze = defGetBoolean(opt);
     244        9682 :         else if (strcmp(opt->defname, "full") == 0)
     245         392 :             full = defGetBoolean(opt);
     246        9290 :         else if (strcmp(opt->defname, "disable_page_skipping") == 0)
     247         214 :             disable_page_skipping = defGetBoolean(opt);
     248        9076 :         else if (strcmp(opt->defname, "index_cleanup") == 0)
     249             :         {
     250             :             /* Interpret no string as the default, which is 'auto' */
     251         174 :             if (!opt->arg)
     252           0 :                 params.index_cleanup = VACOPTVALUE_AUTO;
     253             :             else
     254             :             {
     255         174 :                 char       *sval = defGetString(opt);
     256             : 
     257             :                 /* Try matching on 'auto' string, or fall back on boolean */
     258         174 :                 if (pg_strcasecmp(sval, "auto") == 0)
     259           6 :                     params.index_cleanup = VACOPTVALUE_AUTO;
     260             :                 else
     261         168 :                     params.index_cleanup = get_vacoptval_from_boolean(opt);
     262             :             }
     263             :         }
     264        8902 :         else if (strcmp(opt->defname, "process_main") == 0)
     265         154 :             process_main = defGetBoolean(opt);
     266        8748 :         else if (strcmp(opt->defname, "process_toast") == 0)
     267         160 :             process_toast = defGetBoolean(opt);
     268        8588 :         else if (strcmp(opt->defname, "truncate") == 0)
     269         158 :             params.truncate = get_vacoptval_from_boolean(opt);
     270        8430 :         else if (strcmp(opt->defname, "parallel") == 0)
     271             :         {
     272         352 :             int         nworkers = defGetInt32(opt);
     273             : 
     274         346 :             if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
     275           6 :                 ereport(ERROR,
     276             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     277             :                          errmsg("%s option must be between 0 and %d",
     278             :                                 "PARALLEL",
     279             :                                 MAX_PARALLEL_WORKER_LIMIT),
     280             :                          parser_errposition(pstate, opt->location)));
     281             : 
     282             :             /*
     283             :              * Disable parallel vacuum, if user has specified parallel degree
     284             :              * as zero.
     285             :              */
     286         340 :             if (nworkers == 0)
     287         156 :                 params.nworkers = -1;
     288             :             else
     289         184 :                 params.nworkers = nworkers;
     290             :         }
     291        8078 :         else if (strcmp(opt->defname, "skip_database_stats") == 0)
     292        7934 :             skip_database_stats = defGetBoolean(opt);
     293         144 :         else if (strcmp(opt->defname, "only_database_stats") == 0)
     294         144 :             only_database_stats = defGetBoolean(opt);
     295             :         else
     296           0 :             ereport(ERROR,
     297             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     298             :                      errmsg("unrecognized %s option \"%s\"",
     299             :                             "VACUUM", opt->defname),
     300             :                      parser_errposition(pstate, opt->location)));
     301             :     }
     302             : 
     303             :     /* Set vacuum options */
     304       14758 :     params.options =
     305       14758 :         (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
     306       14758 :         (verbose ? VACOPT_VERBOSE : 0) |
     307       14758 :         (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
     308       14758 :         (analyze ? VACOPT_ANALYZE : 0) |
     309       14758 :         (freeze ? VACOPT_FREEZE : 0) |
     310       14758 :         (full ? VACOPT_FULL : 0) |
     311       14758 :         (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
     312       14758 :         (process_main ? VACOPT_PROCESS_MAIN : 0) |
     313       14758 :         (process_toast ? VACOPT_PROCESS_TOAST : 0) |
     314       14758 :         (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
     315       14758 :         (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
     316             : 
     317             :     /* sanity checks on options */
     318             :     Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
     319             :     Assert((params.options & VACOPT_VACUUM) ||
     320             :            !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
     321             : 
     322       14758 :     if ((params.options & VACOPT_FULL) && params.nworkers > 0)
     323           6 :         ereport(ERROR,
     324             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     325             :                  errmsg("VACUUM FULL cannot be performed in parallel")));
     326             : 
     327             :     /*
     328             :      * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
     329             :      * ERROR for that case.  VACUUM (FULL, ANALYZE) does make use of it, so
     330             :      * we'll permit that.
     331             :      */
     332       14752 :     if (ring_size != -1 && (params.options & VACOPT_FULL) &&
     333           6 :         !(params.options & VACOPT_ANALYZE))
     334           6 :         ereport(ERROR,
     335             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     336             :                  errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
     337             : 
     338             :     /*
     339             :      * Make sure VACOPT_ANALYZE is specified if any column lists are present.
     340             :      */
     341       14746 :     if (!(params.options & VACOPT_ANALYZE))
     342             :     {
     343       13380 :         foreach(lc, vacstmt->rels)
     344             :         {
     345        6586 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     346             : 
     347        6586 :             if (vrel->va_cols != NIL)
     348           6 :                 ereport(ERROR,
     349             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     350             :                          errmsg("ANALYZE option must be specified when a column list is provided")));
     351             :         }
     352             :     }
     353             : 
     354             : 
     355             :     /*
     356             :      * Sanity check DISABLE_PAGE_SKIPPING option.
     357             :      */
     358       14740 :     if ((params.options & VACOPT_FULL) != 0 &&
     359         368 :         (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
     360           0 :         ereport(ERROR,
     361             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     362             :                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
     363             : 
     364             :     /* sanity check for PROCESS_TOAST */
     365       14740 :     if ((params.options & VACOPT_FULL) != 0 &&
     366         368 :         (params.options & VACOPT_PROCESS_TOAST) == 0)
     367           6 :         ereport(ERROR,
     368             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     369             :                  errmsg("PROCESS_TOAST required with VACUUM FULL")));
     370             : 
     371             :     /* sanity check for ONLY_DATABASE_STATS */
     372       14734 :     if (params.options & VACOPT_ONLY_DATABASE_STATS)
     373             :     {
     374             :         Assert(params.options & VACOPT_VACUUM);
     375         144 :         if (vacstmt->rels != NIL)
     376           6 :             ereport(ERROR,
     377             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     378             :                      errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
     379             :         /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
     380         138 :         if (params.options & ~(VACOPT_VACUUM |
     381             :                                VACOPT_VERBOSE |
     382             :                                VACOPT_PROCESS_MAIN |
     383             :                                VACOPT_PROCESS_TOAST |
     384             :                                VACOPT_ONLY_DATABASE_STATS))
     385           0 :             ereport(ERROR,
     386             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     387             :                      errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
     388             :     }
     389             : 
     390             :     /*
     391             :      * All freeze ages are zero if the FREEZE option is given; otherwise pass
     392             :      * them as -1 which means to use the default values.
     393             :      */
     394       14728 :     if (params.options & VACOPT_FREEZE)
     395             :     {
     396        3098 :         params.freeze_min_age = 0;
     397        3098 :         params.freeze_table_age = 0;
     398        3098 :         params.multixact_freeze_min_age = 0;
     399        3098 :         params.multixact_freeze_table_age = 0;
     400             :     }
     401             :     else
     402             :     {
     403       11630 :         params.freeze_min_age = -1;
     404       11630 :         params.freeze_table_age = -1;
     405       11630 :         params.multixact_freeze_min_age = -1;
     406       11630 :         params.multixact_freeze_table_age = -1;
     407             :     }
     408             : 
     409             :     /* user-invoked vacuum is never "for wraparound" */
     410       14728 :     params.is_wraparound = false;
     411             : 
     412             :     /*
     413             :      * user-invoked vacuum uses VACOPT_VERBOSE instead of
     414             :      * log_vacuum_min_duration and log_analyze_min_duration
     415             :      */
     416       14728 :     params.log_vacuum_min_duration = -1;
     417       14728 :     params.log_analyze_min_duration = -1;
     418             : 
     419             :     /*
     420             :      * Later, in vacuum_rel(), we check if a reloption override was specified.
     421             :      */
     422       14728 :     params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
     423             : 
     424             :     /*
     425             :      * Create special memory context for cross-transaction storage.
     426             :      *
     427             :      * Since it is a child of PortalContext, it will go away eventually even
     428             :      * if we suffer an error; there's no need for special abort cleanup logic.
     429             :      */
     430       14728 :     vac_context = AllocSetContextCreate(PortalContext,
     431             :                                         "Vacuum",
     432             :                                         ALLOCSET_DEFAULT_SIZES);
     433             : 
     434             :     /*
     435             :      * Make a buffer strategy object in the cross-transaction memory context.
     436             :      * We needn't bother making this for VACUUM (FULL) or VACUUM
     437             :      * (ONLY_DATABASE_STATS) as they'll not make use of it.  VACUUM (FULL,
     438             :      * ANALYZE) is possible, so we'd better ensure that we make a strategy
     439             :      * when we see ANALYZE.
     440             :      */
     441       14728 :     if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
     442         500 :                            VACOPT_FULL)) == 0 ||
     443         500 :         (params.options & VACOPT_ANALYZE) != 0)
     444             :     {
     445             : 
     446       14234 :         MemoryContext old_context = MemoryContextSwitchTo(vac_context);
     447             : 
     448             :         Assert(ring_size >= -1);
     449             : 
     450             :         /*
     451             :          * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
     452             :          * command, it overrides the value of VacuumBufferUsageLimit.  Either
     453             :          * value may be 0, in which case GetAccessStrategyWithSize() will
     454             :          * return NULL, effectively allowing full use of shared buffers.
     455             :          */
     456       14234 :         if (ring_size == -1)
     457       14204 :             ring_size = VacuumBufferUsageLimit;
     458             : 
     459       14234 :         bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
     460             : 
     461       14234 :         MemoryContextSwitchTo(old_context);
     462             :     }
     463             : 
     464             :     /* Now go through the common routine */
     465       14728 :     vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
     466             : 
     467             :     /* Finally, clean up the vacuum memory context */
     468       14594 :     MemoryContextDelete(vac_context);
     469       14594 : }
     470             : 
     471             : /*
     472             :  * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
     473             :  *
     474             :  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
     475             :  * we process all relevant tables in the database.  For each VacuumRelation,
     476             :  * if a valid OID is supplied, the table with that OID is what to process;
     477             :  * otherwise, the VacuumRelation's RangeVar indicates what to process.
     478             :  *
     479             :  * params contains a set of parameters that can be used to customize the
     480             :  * behavior.
     481             :  *
     482             :  * bstrategy may be passed in as NULL when the caller does not want to
     483             :  * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
     484             :  * otherwise, the caller must build a BufferAccessStrategy with the number of
     485             :  * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
     486             :  * using.
     487             :  *
     488             :  * isTopLevel should be passed down from ProcessUtility.
     489             :  *
     490             :  * It is the caller's responsibility that all parameters are allocated in a
     491             :  * memory context that will not disappear at transaction commit.
     492             :  */
     493             : void
     494      244952 : vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
     495             :        MemoryContext vac_context, bool isTopLevel)
     496             : {
     497             :     static bool in_vacuum = false;
     498             : 
     499             :     const char *stmttype;
     500             :     volatile bool in_outer_xact,
     501             :                 use_own_xacts;
     502             : 
     503      244952 :     stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
     504             : 
     505             :     /*
     506             :      * We cannot run VACUUM inside a user transaction block; if we were inside
     507             :      * a transaction, then our commit- and start-transaction-command calls
     508             :      * would not have the intended effect!  There are numerous other subtle
     509             :      * dependencies on this, too.
     510             :      *
     511             :      * ANALYZE (without VACUUM) can run either way.
     512             :      */
     513      244952 :     if (params.options & VACOPT_VACUUM)
     514             :     {
     515      239750 :         PreventInTransactionBlock(isTopLevel, stmttype);
     516      239730 :         in_outer_xact = false;
     517             :     }
     518             :     else
     519        5202 :         in_outer_xact = IsInTransactionBlock(isTopLevel);
     520             : 
     521             :     /*
     522             :      * Check for and disallow recursive calls.  This could happen when VACUUM
     523             :      * FULL or ANALYZE calls a hostile index expression that itself calls
     524             :      * ANALYZE.
     525             :      */
     526      244932 :     if (in_vacuum)
     527          12 :         ereport(ERROR,
     528             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     529             :                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
     530             :                         stmttype)));
     531             : 
     532             :     /*
     533             :      * Build list of relation(s) to process, putting any new data in
     534             :      * vac_context for safekeeping.
     535             :      */
     536      244920 :     if (params.options & VACOPT_ONLY_DATABASE_STATS)
     537             :     {
     538             :         /* We don't process any tables in this case */
     539             :         Assert(relations == NIL);
     540             :     }
     541      244782 :     else if (relations != NIL)
     542             :     {
     543      244560 :         List       *newrels = NIL;
     544             :         ListCell   *lc;
     545             : 
     546      489248 :         foreach(lc, relations)
     547             :         {
     548      244724 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     549             :             List       *sublist;
     550             :             MemoryContext old_context;
     551             : 
     552      244724 :             sublist = expand_vacuum_rel(vrel, vac_context, params.options);
     553      244688 :             old_context = MemoryContextSwitchTo(vac_context);
     554      244688 :             newrels = list_concat(newrels, sublist);
     555      244688 :             MemoryContextSwitchTo(old_context);
     556             :         }
     557      244524 :         relations = newrels;
     558             :     }
     559             :     else
     560         222 :         relations = get_all_vacuum_rels(vac_context, params.options);
     561             : 
     562             :     /*
     563             :      * Decide whether we need to start/commit our own transactions.
     564             :      *
     565             :      * For VACUUM (with or without ANALYZE): always do so, so that we can
     566             :      * release locks as soon as possible.  (We could possibly use the outer
     567             :      * transaction for a one-table VACUUM, but handling TOAST tables would be
     568             :      * problematic.)
     569             :      *
     570             :      * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
     571             :      * start/commit our own transactions.  Also, there's no need to do so if
     572             :      * only processing one relation.  For multiple relations when not within a
     573             :      * transaction block, and also in an autovacuum worker, use own
     574             :      * transactions so we can release locks sooner.
     575             :      */
     576      244884 :     if (params.options & VACOPT_VACUUM)
     577      239718 :         use_own_xacts = true;
     578             :     else
     579             :     {
     580             :         Assert(params.options & VACOPT_ANALYZE);
     581        5166 :         if (AmAutoVacuumWorkerProcess())
     582         290 :             use_own_xacts = true;
     583        4876 :         else if (in_outer_xact)
     584         254 :             use_own_xacts = false;
     585        4622 :         else if (list_length(relations) > 1)
     586         812 :             use_own_xacts = true;
     587             :         else
     588        3810 :             use_own_xacts = false;
     589             :     }
     590             : 
     591             :     /*
     592             :      * vacuum_rel expects to be entered with no transaction active; it will
     593             :      * start and commit its own transaction.  But we are called by an SQL
     594             :      * command, and so we are executing inside a transaction already. We
     595             :      * commit the transaction started in PostgresMain() here, and start
     596             :      * another one before exiting to match the commit waiting for us back in
     597             :      * PostgresMain().
     598             :      */
     599      244884 :     if (use_own_xacts)
     600             :     {
     601             :         Assert(!in_outer_xact);
     602             : 
     603             :         /* ActiveSnapshot is not set by autovacuum */
     604      240820 :         if (ActiveSnapshotSet())
     605       10596 :             PopActiveSnapshot();
     606             : 
     607             :         /* matches the StartTransaction in PostgresMain() */
     608      240820 :         CommitTransactionCommand();
     609             :     }
     610             : 
     611             :     /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
     612      244884 :     PG_TRY();
     613             :     {
     614             :         ListCell   *cur;
     615             : 
     616      244884 :         in_vacuum = true;
     617      244884 :         VacuumFailsafeActive = false;
     618      244884 :         VacuumUpdateCosts();
     619      244884 :         VacuumCostBalance = 0;
     620      244884 :         VacuumCostBalanceLocal = 0;
     621      244884 :         VacuumSharedCostBalance = NULL;
     622      244884 :         VacuumActiveNWorkers = NULL;
     623             : 
     624             :         /*
     625             :          * Loop to process each selected relation.
     626             :          */
     627      507120 :         foreach(cur, relations)
     628             :         {
     629      262302 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
     630             : 
     631      262302 :             if (params.options & VACOPT_VACUUM)
     632             :             {
     633      248668 :                 if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
     634         100 :                     continue;
     635             :             }
     636             : 
     637      262194 :             if (params.options & VACOPT_ANALYZE)
     638             :             {
     639             :                 /*
     640             :                  * If using separate xacts, start one for analyze. Otherwise,
     641             :                  * we can use the outer transaction.
     642             :                  */
     643       16898 :                 if (use_own_xacts)
     644             :                 {
     645       12860 :                     StartTransactionCommand();
     646             :                     /* functions in indexes may want a snapshot set */
     647       12860 :                     PushActiveSnapshot(GetTransactionSnapshot());
     648             :                 }
     649             : 
     650       16898 :                 analyze_rel(vrel->oid, vrel->relation, params,
     651             :                             vrel->va_cols, in_outer_xact, bstrategy);
     652             : 
     653       16840 :                 if (use_own_xacts)
     654             :                 {
     655       12822 :                     PopActiveSnapshot();
     656             :                     /* standard_ProcessUtility() does CCI if !use_own_xacts */
     657       12822 :                     CommandCounterIncrement();
     658       12822 :                     CommitTransactionCommand();
     659             :                 }
     660             :                 else
     661             :                 {
     662             :                     /*
     663             :                      * If we're not using separate xacts, better separate the
     664             :                      * ANALYZE actions with CCIs.  This avoids trouble if user
     665             :                      * says "ANALYZE t, t".
     666             :                      */
     667        4018 :                     CommandCounterIncrement();
     668             :                 }
     669             :             }
     670             : 
     671             :             /*
     672             :              * Ensure VacuumFailsafeActive has been reset before vacuuming the
     673             :              * next relation.
     674             :              */
     675      262136 :             VacuumFailsafeActive = false;
     676             :         }
     677             :     }
     678          66 :     PG_FINALLY();
     679             :     {
     680      244884 :         in_vacuum = false;
     681      244884 :         VacuumCostActive = false;
     682      244884 :         VacuumFailsafeActive = false;
     683      244884 :         VacuumCostBalance = 0;
     684             :     }
     685      244884 :     PG_END_TRY();
     686             : 
     687             :     /*
     688             :      * Finish up processing.
     689             :      */
     690      244818 :     if (use_own_xacts)
     691             :     {
     692             :         /* here, we are not in a transaction */
     693             : 
     694             :         /*
     695             :          * This matches the CommitTransaction waiting for us in
     696             :          * PostgresMain().
     697             :          */
     698      240774 :         StartTransactionCommand();
     699             :     }
     700             : 
     701      244818 :     if ((params.options & VACOPT_VACUUM) &&
     702      239684 :         !(params.options & VACOPT_SKIP_DATABASE_STATS))
     703             :     {
     704             :         /*
     705             :          * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
     706             :          */
     707        1818 :         vac_update_datfrozenxid();
     708             :     }
     709             : 
     710      244818 : }
     711             : 
     712             : /*
     713             :  * Check if the current user has privileges to vacuum or analyze the relation.
     714             :  * If not, issue a WARNING log message and return false to let the caller
     715             :  * decide what to do with this relation.  This routine is used to decide if a
     716             :  * relation can be processed for VACUUM or ANALYZE.
     717             :  */
     718             : bool
     719      308258 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
     720             :                                  bits32 options)
     721             : {
     722             :     char       *relname;
     723             : 
     724             :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     725             : 
     726             :     /*----------
     727             :      * A role has privileges to vacuum or analyze the relation if any of the
     728             :      * following are true:
     729             :      *   - the role owns the current database and the relation is not shared
     730             :      *   - the role has the MAINTAIN privilege on the relation
     731             :      *----------
     732             :      */
     733      308258 :     if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
     734      356678 :          !reltuple->relisshared) ||
     735       51990 :         pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
     736      305164 :         return true;
     737             : 
     738        3094 :     relname = NameStr(reltuple->relname);
     739             : 
     740        3094 :     if ((options & VACOPT_VACUUM) != 0)
     741             :     {
     742         224 :         ereport(WARNING,
     743             :                 (errmsg("permission denied to vacuum \"%s\", skipping it",
     744             :                         relname)));
     745             : 
     746             :         /*
     747             :          * For VACUUM ANALYZE, both logs could show up, but just generate
     748             :          * information for VACUUM as that would be the first one to be
     749             :          * processed.
     750             :          */
     751         224 :         return false;
     752             :     }
     753             : 
     754        2870 :     if ((options & VACOPT_ANALYZE) != 0)
     755        2870 :         ereport(WARNING,
     756             :                 (errmsg("permission denied to analyze \"%s\", skipping it",
     757             :                         relname)));
     758             : 
     759        2870 :     return false;
     760             : }
     761             : 
     762             : 
     763             : /*
     764             :  * vacuum_open_relation
     765             :  *
     766             :  * This routine is used for attempting to open and lock a relation which
     767             :  * is going to be vacuumed or analyzed.  If the relation cannot be opened
     768             :  * or locked, a log is emitted if possible.
     769             :  */
     770             : Relation
     771      275368 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
     772             :                      bool verbose, LOCKMODE lmode)
     773             : {
     774             :     Relation    rel;
     775      275368 :     bool        rel_lock = true;
     776             :     int         elevel;
     777             : 
     778             :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     779             : 
     780             :     /*
     781             :      * Open the relation and get the appropriate lock on it.
     782             :      *
     783             :      * There's a race condition here: the relation may have gone away since
     784             :      * the last time we saw it.  If so, we don't need to vacuum or analyze it.
     785             :      *
     786             :      * If we've been asked not to wait for the relation lock, acquire it first
     787             :      * in non-blocking mode, before calling try_relation_open().
     788             :      */
     789      275368 :     if (!(options & VACOPT_SKIP_LOCKED))
     790      274208 :         rel = try_relation_open(relid, lmode);
     791        1160 :     else if (ConditionalLockRelationOid(relid, lmode))
     792        1140 :         rel = try_relation_open(relid, NoLock);
     793             :     else
     794             :     {
     795          20 :         rel = NULL;
     796          20 :         rel_lock = false;
     797             :     }
     798             : 
     799             :     /* if relation is opened, leave */
     800      275368 :     if (rel)
     801      275336 :         return rel;
     802             : 
     803             :     /*
     804             :      * Relation could not be opened, hence generate if possible a log
     805             :      * informing on the situation.
     806             :      *
     807             :      * If the RangeVar is not defined, we do not have enough information to
     808             :      * provide a meaningful log statement.  Chances are that the caller has
     809             :      * intentionally not provided this information so that this logging is
     810             :      * skipped, anyway.
     811             :      */
     812          32 :     if (relation == NULL)
     813          18 :         return NULL;
     814             : 
     815             :     /*
     816             :      * Determine the log level.
     817             :      *
     818             :      * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
     819             :      * statements in the permission checks; otherwise, only log if the caller
     820             :      * so requested.
     821             :      */
     822          14 :     if (!AmAutoVacuumWorkerProcess())
     823          14 :         elevel = WARNING;
     824           0 :     else if (verbose)
     825           0 :         elevel = LOG;
     826             :     else
     827           0 :         return NULL;
     828             : 
     829          14 :     if ((options & VACOPT_VACUUM) != 0)
     830             :     {
     831          10 :         if (!rel_lock)
     832           6 :             ereport(elevel,
     833             :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     834             :                      errmsg("skipping vacuum of \"%s\" --- lock not available",
     835             :                             relation->relname)));
     836             :         else
     837           4 :             ereport(elevel,
     838             :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     839             :                      errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
     840             :                             relation->relname)));
     841             : 
     842             :         /*
     843             :          * For VACUUM ANALYZE, both logs could show up, but just generate
     844             :          * information for VACUUM as that would be the first one to be
     845             :          * processed.
     846             :          */
     847          10 :         return NULL;
     848             :     }
     849             : 
     850           4 :     if ((options & VACOPT_ANALYZE) != 0)
     851             :     {
     852           4 :         if (!rel_lock)
     853           2 :             ereport(elevel,
     854             :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     855             :                      errmsg("skipping analyze of \"%s\" --- lock not available",
     856             :                             relation->relname)));
     857             :         else
     858           2 :             ereport(elevel,
     859             :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     860             :                      errmsg("skipping analyze of \"%s\" --- relation no longer exists",
     861             :                             relation->relname)));
     862             :     }
     863             : 
     864           4 :     return NULL;
     865             : }
     866             : 
     867             : 
     868             : /*
     869             :  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
     870             :  * and optionally add VacuumRelations for partitions or inheritance children.
     871             :  *
     872             :  * If a VacuumRelation does not have an OID supplied and is a partitioned
     873             :  * table, an extra entry will be added to the output for each partition.
     874             :  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
     875             :  * it does not want us to expand partitioned tables.
     876             :  *
     877             :  * We take care not to modify the input data structure, but instead build
     878             :  * new VacuumRelation(s) to return.  (But note that they will reference
     879             :  * unmodified parts of the input, eg column lists.)  New data structures
     880             :  * are made in vac_context.
     881             :  */
     882             : static List *
     883      244724 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
     884             :                   int options)
     885             : {
     886      244724 :     List       *vacrels = NIL;
     887             :     MemoryContext oldcontext;
     888             : 
     889             :     /* If caller supplied OID, there's nothing we need do here. */
     890      244724 :     if (OidIsValid(vrel->oid))
     891             :     {
     892      230224 :         oldcontext = MemoryContextSwitchTo(vac_context);
     893      230224 :         vacrels = lappend(vacrels, vrel);
     894      230224 :         MemoryContextSwitchTo(oldcontext);
     895             :     }
     896             :     else
     897             :     {
     898             :         /*
     899             :          * Process a specific relation, and possibly partitions or child
     900             :          * tables thereof.
     901             :          */
     902             :         Oid         relid;
     903             :         HeapTuple   tuple;
     904             :         Form_pg_class classForm;
     905             :         bool        include_children;
     906             :         bool        is_partitioned_table;
     907             :         int         rvr_opts;
     908             : 
     909             :         /*
     910             :          * Since autovacuum workers supply OIDs when calling vacuum(), no
     911             :          * autovacuum worker should reach this code.
     912             :          */
     913             :         Assert(!AmAutoVacuumWorkerProcess());
     914             : 
     915             :         /*
     916             :          * We transiently take AccessShareLock to protect the syscache lookup
     917             :          * below, as well as find_all_inheritors's expectation that the caller
     918             :          * holds some lock on the starting relation.
     919             :          */
     920       14500 :         rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
     921       14500 :         relid = RangeVarGetRelidExtended(vrel->relation,
     922             :                                          AccessShareLock,
     923             :                                          rvr_opts,
     924             :                                          NULL, NULL);
     925             : 
     926             :         /*
     927             :          * If the lock is unavailable, emit the same log statement that
     928             :          * vacuum_rel() and analyze_rel() would.
     929             :          */
     930       14464 :         if (!OidIsValid(relid))
     931             :         {
     932           8 :             if (options & VACOPT_VACUUM)
     933           6 :                 ereport(WARNING,
     934             :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     935             :                          errmsg("skipping vacuum of \"%s\" --- lock not available",
     936             :                                 vrel->relation->relname)));
     937             :             else
     938           2 :                 ereport(WARNING,
     939             :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     940             :                          errmsg("skipping analyze of \"%s\" --- lock not available",
     941             :                                 vrel->relation->relname)));
     942           8 :             return vacrels;
     943             :         }
     944             : 
     945             :         /*
     946             :          * To check whether the relation is a partitioned table and its
     947             :          * ownership, fetch its syscache entry.
     948             :          */
     949       14456 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
     950       14456 :         if (!HeapTupleIsValid(tuple))
     951           0 :             elog(ERROR, "cache lookup failed for relation %u", relid);
     952       14456 :         classForm = (Form_pg_class) GETSTRUCT(tuple);
     953             : 
     954             :         /*
     955             :          * Make a returnable VacuumRelation for this rel if the user has the
     956             :          * required privileges.
     957             :          */
     958       14456 :         if (vacuum_is_permitted_for_relation(relid, classForm, options))
     959             :         {
     960       14224 :             oldcontext = MemoryContextSwitchTo(vac_context);
     961       14224 :             vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
     962             :                                                           relid,
     963             :                                                           vrel->va_cols));
     964       14224 :             MemoryContextSwitchTo(oldcontext);
     965             :         }
     966             : 
     967             :         /*
     968             :          * Vacuuming a partitioned table with ONLY will not do anything since
     969             :          * the partitioned table itself is empty.  Issue a warning if the user
     970             :          * requests this.
     971             :          */
     972       14456 :         include_children = vrel->relation->inh;
     973       14456 :         is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
     974       14456 :         if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
     975           6 :             ereport(WARNING,
     976             :                     (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
     977             :                             vrel->relation->relname)));
     978             : 
     979       14456 :         ReleaseSysCache(tuple);
     980             : 
     981             :         /*
     982             :          * Unless the user has specified ONLY, make relation list entries for
     983             :          * its partitions or inheritance child tables.  Note that the list
     984             :          * returned by find_all_inheritors() includes the passed-in OID, so we
     985             :          * have to skip that.  There's no point in taking locks on the
     986             :          * individual partitions or child tables yet, and doing so would just
     987             :          * add unnecessary deadlock risk.  For this last reason, we do not yet
     988             :          * check the ownership of the partitions/tables, which get added to
     989             :          * the list to process.  Ownership will be checked later on anyway.
     990             :          */
     991       14456 :         if (include_children)
     992             :         {
     993       14426 :             List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
     994             :             ListCell   *part_lc;
     995             : 
     996       31042 :             foreach(part_lc, part_oids)
     997             :             {
     998       16616 :                 Oid         part_oid = lfirst_oid(part_lc);
     999             : 
    1000       16616 :                 if (part_oid == relid)
    1001       14426 :                     continue;   /* ignore original table */
    1002             : 
    1003             :                 /*
    1004             :                  * We omit a RangeVar since it wouldn't be appropriate to
    1005             :                  * complain about failure to open one of these relations
    1006             :                  * later.
    1007             :                  */
    1008        2190 :                 oldcontext = MemoryContextSwitchTo(vac_context);
    1009        2190 :                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
    1010             :                                                               part_oid,
    1011             :                                                               vrel->va_cols));
    1012        2190 :                 MemoryContextSwitchTo(oldcontext);
    1013             :             }
    1014             :         }
    1015             : 
    1016             :         /*
    1017             :          * Release lock again.  This means that by the time we actually try to
    1018             :          * process the table, it might be gone or renamed.  In the former case
    1019             :          * we'll silently ignore it; in the latter case we'll process it
    1020             :          * anyway, but we must beware that the RangeVar doesn't necessarily
    1021             :          * identify it anymore.  This isn't ideal, perhaps, but there's little
    1022             :          * practical alternative, since we're typically going to commit this
    1023             :          * transaction and begin a new one between now and then.  Moreover,
    1024             :          * holding locks on multiple relations would create significant risk
    1025             :          * of deadlock.
    1026             :          */
    1027       14456 :         UnlockRelationOid(relid, AccessShareLock);
    1028             :     }
    1029             : 
    1030      244680 :     return vacrels;
    1031             : }
    1032             : 
    1033             : /*
    1034             :  * Construct a list of VacuumRelations for all vacuumable rels in
    1035             :  * the current database.  The list is built in vac_context.
    1036             :  */
    1037             : static List *
    1038         222 : get_all_vacuum_rels(MemoryContext vac_context, int options)
    1039             : {
    1040         222 :     List       *vacrels = NIL;
    1041             :     Relation    pgclass;
    1042             :     TableScanDesc scan;
    1043             :     HeapTuple   tuple;
    1044             : 
    1045         222 :     pgclass = table_open(RelationRelationId, AccessShareLock);
    1046             : 
    1047         222 :     scan = table_beginscan_catalog(pgclass, 0, NULL);
    1048             : 
    1049      100470 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1050             :     {
    1051      100248 :         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
    1052             :         MemoryContext oldcontext;
    1053      100248 :         Oid         relid = classForm->oid;
    1054             : 
    1055             :         /*
    1056             :          * We include partitioned tables here; depending on which operation is
    1057             :          * to be performed, caller will decide whether to process or ignore
    1058             :          * them.
    1059             :          */
    1060      100248 :         if (classForm->relkind != RELKIND_RELATION &&
    1061       81994 :             classForm->relkind != RELKIND_MATVIEW &&
    1062       81946 :             classForm->relkind != RELKIND_PARTITIONED_TABLE)
    1063       81782 :             continue;
    1064             : 
    1065             :         /* check permissions of relation */
    1066       18466 :         if (!vacuum_is_permitted_for_relation(relid, classForm, options))
    1067        2754 :             continue;
    1068             : 
    1069             :         /*
    1070             :          * Build VacuumRelation(s) specifying the table OIDs to be processed.
    1071             :          * We omit a RangeVar since it wouldn't be appropriate to complain
    1072             :          * about failure to open one of these relations later.
    1073             :          */
    1074       15712 :         oldcontext = MemoryContextSwitchTo(vac_context);
    1075       15712 :         vacrels = lappend(vacrels, makeVacuumRelation(NULL,
    1076             :                                                       relid,
    1077             :                                                       NIL));
    1078       15712 :         MemoryContextSwitchTo(oldcontext);
    1079             :     }
    1080             : 
    1081         222 :     table_endscan(scan);
    1082         222 :     table_close(pgclass, AccessShareLock);
    1083             : 
    1084         222 :     return vacrels;
    1085             : }
    1086             : 
    1087             : /*
    1088             :  * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
    1089             :  *
    1090             :  * The target relation and VACUUM parameters are our inputs.
    1091             :  *
    1092             :  * Output parameters are the cutoffs that VACUUM caller should use.
    1093             :  *
    1094             :  * Return value indicates if vacuumlazy.c caller should make its VACUUM
    1095             :  * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
    1096             :  * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
    1097             :  * minimum).
    1098             :  */
    1099             : bool
    1100      258220 : vacuum_get_cutoffs(Relation rel, const VacuumParams params,
    1101             :                    struct VacuumCutoffs *cutoffs)
    1102             : {
    1103             :     int         freeze_min_age,
    1104             :                 multixact_freeze_min_age,
    1105             :                 freeze_table_age,
    1106             :                 multixact_freeze_table_age,
    1107             :                 effective_multixact_freeze_max_age;
    1108             :     TransactionId nextXID,
    1109             :                 safeOldestXmin,
    1110             :                 aggressiveXIDCutoff;
    1111             :     MultiXactId nextMXID,
    1112             :                 safeOldestMxact,
    1113             :                 aggressiveMXIDCutoff;
    1114             : 
    1115             :     /* Use mutable copies of freeze age parameters */
    1116      258220 :     freeze_min_age = params.freeze_min_age;
    1117      258220 :     multixact_freeze_min_age = params.multixact_freeze_min_age;
    1118      258220 :     freeze_table_age = params.freeze_table_age;
    1119      258220 :     multixact_freeze_table_age = params.multixact_freeze_table_age;
    1120             : 
    1121             :     /* Set pg_class fields in cutoffs */
    1122      258220 :     cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
    1123      258220 :     cutoffs->relminmxid = rel->rd_rel->relminmxid;
    1124             : 
    1125             :     /*
    1126             :      * Acquire OldestXmin.
    1127             :      *
    1128             :      * We can always ignore processes running lazy vacuum.  This is because we
    1129             :      * use these values only for deciding which tuples we must keep in the
    1130             :      * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
    1131             :      * XID assigned), it's safe to ignore it.  In theory it could be
    1132             :      * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
    1133             :      * that only one vacuum process can be working on a particular table at
    1134             :      * any time, and that each vacuum is always an independent transaction.
    1135             :      */
    1136      258220 :     cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
    1137             : 
    1138             :     Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
    1139             : 
    1140             :     /* Acquire OldestMxact */
    1141      258220 :     cutoffs->OldestMxact = GetOldestMultiXactId();
    1142             :     Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
    1143             : 
    1144             :     /* Acquire next XID/next MXID values used to apply age-based settings */
    1145      258220 :     nextXID = ReadNextTransactionId();
    1146      258220 :     nextMXID = ReadNextMultiXactId();
    1147             : 
    1148             :     /*
    1149             :      * Also compute the multixact age for which freezing is urgent.  This is
    1150             :      * normally autovacuum_multixact_freeze_max_age, but may be less if
    1151             :      * multixact members are bloated.
    1152             :      */
    1153      258220 :     effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
    1154             : 
    1155             :     /*
    1156             :      * Almost ready to set freeze output parameters; check if OldestXmin or
    1157             :      * OldestMxact are held back to an unsafe degree before we start on that
    1158             :      */
    1159      258220 :     safeOldestXmin = nextXID - autovacuum_freeze_max_age;
    1160      258220 :     if (!TransactionIdIsNormal(safeOldestXmin))
    1161           0 :         safeOldestXmin = FirstNormalTransactionId;
    1162      258220 :     safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
    1163      258220 :     if (safeOldestMxact < FirstMultiXactId)
    1164           0 :         safeOldestMxact = FirstMultiXactId;
    1165      258220 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
    1166      195124 :         ereport(WARNING,
    1167             :                 (errmsg("cutoff for removing and freezing tuples is far in the past"),
    1168             :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1169             :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1170      258220 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
    1171           0 :         ereport(WARNING,
    1172             :                 (errmsg("cutoff for freezing multixacts is far in the past"),
    1173             :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1174             :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1175             : 
    1176             :     /*
    1177             :      * Determine the minimum freeze age to use: as specified by the caller, or
    1178             :      * vacuum_freeze_min_age, but in any case not more than half
    1179             :      * autovacuum_freeze_max_age, so that autovacuums to prevent XID
    1180             :      * wraparound won't occur too frequently.
    1181             :      */
    1182      258220 :     if (freeze_min_age < 0)
    1183       11680 :         freeze_min_age = vacuum_freeze_min_age;
    1184      258220 :     freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
    1185             :     Assert(freeze_min_age >= 0);
    1186             : 
    1187             :     /* Compute FreezeLimit, being careful to generate a normal XID */
    1188      258220 :     cutoffs->FreezeLimit = nextXID - freeze_min_age;
    1189      258220 :     if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
    1190           0 :         cutoffs->FreezeLimit = FirstNormalTransactionId;
    1191             :     /* FreezeLimit must always be <= OldestXmin */
    1192      258220 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
    1193      219256 :         cutoffs->FreezeLimit = cutoffs->OldestXmin;
    1194             : 
    1195             :     /*
    1196             :      * Determine the minimum multixact freeze age to use: as specified by
    1197             :      * caller, or vacuum_multixact_freeze_min_age, but in any case not more
    1198             :      * than half effective_multixact_freeze_max_age, so that autovacuums to
    1199             :      * prevent MultiXact wraparound won't occur too frequently.
    1200             :      */
    1201      258220 :     if (multixact_freeze_min_age < 0)
    1202       11680 :         multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
    1203      258220 :     multixact_freeze_min_age = Min(multixact_freeze_min_age,
    1204             :                                    effective_multixact_freeze_max_age / 2);
    1205             :     Assert(multixact_freeze_min_age >= 0);
    1206             : 
    1207             :     /* Compute MultiXactCutoff, being careful to generate a valid value */
    1208      258220 :     cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
    1209      258220 :     if (cutoffs->MultiXactCutoff < FirstMultiXactId)
    1210           0 :         cutoffs->MultiXactCutoff = FirstMultiXactId;
    1211             :     /* MultiXactCutoff must always be <= OldestMxact */
    1212      258220 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
    1213           4 :         cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
    1214             : 
    1215             :     /*
    1216             :      * Finally, figure out if caller needs to do an aggressive VACUUM or not.
    1217             :      *
    1218             :      * Determine the table freeze age to use: as specified by the caller, or
    1219             :      * the value of the vacuum_freeze_table_age GUC, but in any case not more
    1220             :      * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
    1221             :      * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
    1222             :      * anti-wraparound autovacuum is launched.
    1223             :      */
    1224      258220 :     if (freeze_table_age < 0)
    1225       11680 :         freeze_table_age = vacuum_freeze_table_age;
    1226      258220 :     freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
    1227             :     Assert(freeze_table_age >= 0);
    1228      258220 :     aggressiveXIDCutoff = nextXID - freeze_table_age;
    1229      258220 :     if (!TransactionIdIsNormal(aggressiveXIDCutoff))
    1230           0 :         aggressiveXIDCutoff = FirstNormalTransactionId;
    1231      258220 :     if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
    1232             :                                       aggressiveXIDCutoff))
    1233      246552 :         return true;
    1234             : 
    1235             :     /*
    1236             :      * Similar to the above, determine the table freeze age to use for
    1237             :      * multixacts: as specified by the caller, or the value of the
    1238             :      * vacuum_multixact_freeze_table_age GUC, but in any case not more than
    1239             :      * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
    1240             :      * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
    1241             :      * multixacts before anti-wraparound autovacuum is launched.
    1242             :      */
    1243       11668 :     if (multixact_freeze_table_age < 0)
    1244       11462 :         multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
    1245       11668 :     multixact_freeze_table_age =
    1246       11668 :         Min(multixact_freeze_table_age,
    1247             :             effective_multixact_freeze_max_age * 0.95);
    1248             :     Assert(multixact_freeze_table_age >= 0);
    1249       11668 :     aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
    1250       11668 :     if (aggressiveMXIDCutoff < FirstMultiXactId)
    1251           0 :         aggressiveMXIDCutoff = FirstMultiXactId;
    1252       11668 :     if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
    1253             :                                     aggressiveMXIDCutoff))
    1254           0 :         return true;
    1255             : 
    1256             :     /* Non-aggressive VACUUM */
    1257       11668 :     return false;
    1258             : }
    1259             : 
    1260             : /*
    1261             :  * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
    1262             :  * mechanism to determine if its table's relfrozenxid and relminmxid are now
    1263             :  * dangerously far in the past.
    1264             :  *
    1265             :  * When we return true, VACUUM caller triggers the failsafe.
    1266             :  */
    1267             : bool
    1268      261394 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
    1269             : {
    1270      261394 :     TransactionId relfrozenxid = cutoffs->relfrozenxid;
    1271      261394 :     MultiXactId relminmxid = cutoffs->relminmxid;
    1272             :     TransactionId xid_skip_limit;
    1273             :     MultiXactId multi_skip_limit;
    1274             :     int         skip_index_vacuum;
    1275             : 
    1276             :     Assert(TransactionIdIsNormal(relfrozenxid));
    1277             :     Assert(MultiXactIdIsValid(relminmxid));
    1278             : 
    1279             :     /*
    1280             :      * Determine the index skipping age to use. In any case no less than
    1281             :      * autovacuum_freeze_max_age * 1.05.
    1282             :      */
    1283      261394 :     skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
    1284             : 
    1285      261394 :     xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
    1286      261394 :     if (!TransactionIdIsNormal(xid_skip_limit))
    1287           0 :         xid_skip_limit = FirstNormalTransactionId;
    1288             : 
    1289      261394 :     if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
    1290             :     {
    1291             :         /* The table's relfrozenxid is too old */
    1292       66794 :         return true;
    1293             :     }
    1294             : 
    1295             :     /*
    1296             :      * Similar to above, determine the index skipping age to use for
    1297             :      * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
    1298             :      * 1.05.
    1299             :      */
    1300      194600 :     skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
    1301             :                             autovacuum_multixact_freeze_max_age * 1.05);
    1302             : 
    1303      194600 :     multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
    1304      194600 :     if (multi_skip_limit < FirstMultiXactId)
    1305           0 :         multi_skip_limit = FirstMultiXactId;
    1306             : 
    1307      194600 :     if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
    1308             :     {
    1309             :         /* The table's relminmxid is too old */
    1310           0 :         return true;
    1311             :     }
    1312             : 
    1313      194600 :     return false;
    1314             : }
    1315             : 
    1316             : /*
    1317             :  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
    1318             :  *
    1319             :  *      If we scanned the whole relation then we should just use the count of
    1320             :  *      live tuples seen; but if we did not, we should not blindly extrapolate
    1321             :  *      from that number, since VACUUM may have scanned a quite nonrandom
    1322             :  *      subset of the table.  When we have only partial information, we take
    1323             :  *      the old value of pg_class.reltuples/pg_class.relpages as a measurement
    1324             :  *      of the tuple density in the unscanned pages.
    1325             :  *
    1326             :  *      Note: scanned_tuples should count only *live* tuples, since
    1327             :  *      pg_class.reltuples is defined that way.
    1328             :  */
    1329             : double
    1330      257654 : vac_estimate_reltuples(Relation relation,
    1331             :                        BlockNumber total_pages,
    1332             :                        BlockNumber scanned_pages,
    1333             :                        double scanned_tuples)
    1334             : {
    1335      257654 :     BlockNumber old_rel_pages = relation->rd_rel->relpages;
    1336      257654 :     double      old_rel_tuples = relation->rd_rel->reltuples;
    1337             :     double      old_density;
    1338             :     double      unscanned_pages;
    1339             :     double      total_tuples;
    1340             : 
    1341             :     /* If we did scan the whole table, just use the count as-is */
    1342      257654 :     if (scanned_pages >= total_pages)
    1343      248368 :         return scanned_tuples;
    1344             : 
    1345             :     /*
    1346             :      * When successive VACUUM commands scan the same few pages again and
    1347             :      * again, without anything from the table really changing, there is a risk
    1348             :      * that our beliefs about tuple density will gradually become distorted.
    1349             :      * This might be caused by vacuumlazy.c implementation details, such as
    1350             :      * its tendency to always scan the last heap page.  Handle that here.
    1351             :      *
    1352             :      * If the relation is _exactly_ the same size according to the existing
    1353             :      * pg_class entry, and only a few of its pages (less than 2%) were
    1354             :      * scanned, keep the existing value of reltuples.  Also keep the existing
    1355             :      * value when only a subset of rel's pages <= a single page were scanned.
    1356             :      *
    1357             :      * (Note: we might be returning -1 here.)
    1358             :      */
    1359        9286 :     if (old_rel_pages == total_pages &&
    1360        9256 :         scanned_pages < (double) total_pages * 0.02)
    1361        6646 :         return old_rel_tuples;
    1362        2640 :     if (scanned_pages <= 1)
    1363        2252 :         return old_rel_tuples;
    1364             : 
    1365             :     /*
    1366             :      * If old density is unknown, we can't do much except scale up
    1367             :      * scanned_tuples to match total_pages.
    1368             :      */
    1369         388 :     if (old_rel_tuples < 0 || old_rel_pages == 0)
    1370           2 :         return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
    1371             : 
    1372             :     /*
    1373             :      * Okay, we've covered the corner cases.  The normal calculation is to
    1374             :      * convert the old measurement to a density (tuples per page), then
    1375             :      * estimate the number of tuples in the unscanned pages using that figure,
    1376             :      * and finally add on the number of tuples in the scanned pages.
    1377             :      */
    1378         386 :     old_density = old_rel_tuples / old_rel_pages;
    1379         386 :     unscanned_pages = (double) total_pages - (double) scanned_pages;
    1380         386 :     total_tuples = old_density * unscanned_pages + scanned_tuples;
    1381         386 :     return floor(total_tuples + 0.5);
    1382             : }
    1383             : 
    1384             : 
    1385             : /*
    1386             :  *  vac_update_relstats() -- update statistics for one relation
    1387             :  *
    1388             :  *      Update the whole-relation statistics that are kept in its pg_class
    1389             :  *      row.  There are additional stats that will be updated if we are
    1390             :  *      doing ANALYZE, but we always update these stats.  This routine works
    1391             :  *      for both index and heap relation entries in pg_class.
    1392             :  *
    1393             :  *      We violate transaction semantics here by overwriting the rel's
    1394             :  *      existing pg_class tuple with the new values.  This is reasonably
    1395             :  *      safe as long as we're sure that the new values are correct whether or
    1396             :  *      not this transaction commits.  The reason for doing this is that if
    1397             :  *      we updated these tuples in the usual way, vacuuming pg_class itself
    1398             :  *      wouldn't work very well --- by the time we got done with a vacuum
    1399             :  *      cycle, most of the tuples in pg_class would've been obsoleted.  Of
    1400             :  *      course, this only works for fixed-size not-null columns, but these are.
    1401             :  *
    1402             :  *      Another reason for doing it this way is that when we are in a lazy
    1403             :  *      VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
    1404             :  *      Somebody vacuuming pg_class might think they could delete a tuple
    1405             :  *      marked with xmin = our xid.
    1406             :  *
    1407             :  *      In addition to fundamentally nontransactional statistics such as
    1408             :  *      relpages and relallvisible, we try to maintain certain lazily-updated
    1409             :  *      DDL flags such as relhasindex, by clearing them if no longer correct.
    1410             :  *      It's safe to do this in VACUUM, which can't run in parallel with
    1411             :  *      CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
    1412             :  *      However, it's *not* safe to do it in an ANALYZE that's within an
    1413             :  *      outer transaction, because for example the current transaction might
    1414             :  *      have dropped the last index; then we'd think relhasindex should be
    1415             :  *      cleared, but if the transaction later rolls back this would be wrong.
    1416             :  *      So we refrain from updating the DDL flags if we're inside an outer
    1417             :  *      transaction.  This is OK since postponing the flag maintenance is
    1418             :  *      always allowable.
    1419             :  *
    1420             :  *      Note: num_tuples should count only *live* tuples, since
    1421             :  *      pg_class.reltuples is defined that way.
    1422             :  *
    1423             :  *      This routine is shared by VACUUM and ANALYZE.
    1424             :  */
    1425             : void
    1426      300448 : vac_update_relstats(Relation relation,
    1427             :                     BlockNumber num_pages, double num_tuples,
    1428             :                     BlockNumber num_all_visible_pages,
    1429             :                     BlockNumber num_all_frozen_pages,
    1430             :                     bool hasindex, TransactionId frozenxid,
    1431             :                     MultiXactId minmulti,
    1432             :                     bool *frozenxid_updated, bool *minmulti_updated,
    1433             :                     bool in_outer_xact)
    1434             : {
    1435      300448 :     Oid         relid = RelationGetRelid(relation);
    1436             :     Relation    rd;
    1437             :     ScanKeyData key[1];
    1438             :     HeapTuple   ctup;
    1439             :     void       *inplace_state;
    1440             :     Form_pg_class pgcform;
    1441             :     bool        dirty,
    1442             :                 futurexid,
    1443             :                 futuremxid;
    1444             :     TransactionId oldfrozenxid;
    1445             :     MultiXactId oldminmulti;
    1446             : 
    1447      300448 :     rd = table_open(RelationRelationId, RowExclusiveLock);
    1448             : 
    1449             :     /* Fetch a copy of the tuple to scribble on */
    1450      300448 :     ScanKeyInit(&key[0],
    1451             :                 Anum_pg_class_oid,
    1452             :                 BTEqualStrategyNumber, F_OIDEQ,
    1453             :                 ObjectIdGetDatum(relid));
    1454      300448 :     systable_inplace_update_begin(rd, ClassOidIndexId, true,
    1455             :                                   NULL, 1, key, &ctup, &inplace_state);
    1456      300446 :     if (!HeapTupleIsValid(ctup))
    1457           0 :         elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
    1458             :              relid);
    1459      300446 :     pgcform = (Form_pg_class) GETSTRUCT(ctup);
    1460             : 
    1461             :     /* Apply statistical updates, if any, to copied tuple */
    1462             : 
    1463      300446 :     dirty = false;
    1464      300446 :     if (pgcform->relpages != (int32) num_pages)
    1465             :     {
    1466        9628 :         pgcform->relpages = (int32) num_pages;
    1467        9628 :         dirty = true;
    1468             :     }
    1469      300446 :     if (pgcform->reltuples != (float4) num_tuples)
    1470             :     {
    1471       20812 :         pgcform->reltuples = (float4) num_tuples;
    1472       20812 :         dirty = true;
    1473             :     }
    1474      300446 :     if (pgcform->relallvisible != (int32) num_all_visible_pages)
    1475             :     {
    1476        5966 :         pgcform->relallvisible = (int32) num_all_visible_pages;
    1477        5966 :         dirty = true;
    1478             :     }
    1479      300446 :     if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
    1480             :     {
    1481        5384 :         pgcform->relallfrozen = (int32) num_all_frozen_pages;
    1482        5384 :         dirty = true;
    1483             :     }
    1484             : 
    1485             :     /* Apply DDL updates, but not inside an outer transaction (see above) */
    1486             : 
    1487      300446 :     if (!in_outer_xact)
    1488             :     {
    1489             :         /*
    1490             :          * If we didn't find any indexes, reset relhasindex.
    1491             :          */
    1492      300084 :         if (pgcform->relhasindex && !hasindex)
    1493             :         {
    1494          24 :             pgcform->relhasindex = false;
    1495          24 :             dirty = true;
    1496             :         }
    1497             : 
    1498             :         /* We also clear relhasrules and relhastriggers if needed */
    1499      300084 :         if (pgcform->relhasrules && relation->rd_rules == NULL)
    1500             :         {
    1501           0 :             pgcform->relhasrules = false;
    1502           0 :             dirty = true;
    1503             :         }
    1504      300084 :         if (pgcform->relhastriggers && relation->trigdesc == NULL)
    1505             :         {
    1506           6 :             pgcform->relhastriggers = false;
    1507           6 :             dirty = true;
    1508             :         }
    1509             :     }
    1510             : 
    1511             :     /*
    1512             :      * Update relfrozenxid, unless caller passed InvalidTransactionId
    1513             :      * indicating it has no new data.
    1514             :      *
    1515             :      * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
    1516             :      * stored relfrozenxid is "in the future" then it seems best to assume
    1517             :      * it's corrupt, and overwrite with the oldest remaining XID in the table.
    1518             :      * This should match vac_update_datfrozenxid() concerning what we consider
    1519             :      * to be "in the future".
    1520             :      */
    1521      300446 :     oldfrozenxid = pgcform->relfrozenxid;
    1522      300446 :     futurexid = false;
    1523      300446 :     if (frozenxid_updated)
    1524      257648 :         *frozenxid_updated = false;
    1525      300446 :     if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
    1526             :     {
    1527       60088 :         bool        update = false;
    1528             : 
    1529       60088 :         if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
    1530       59972 :             update = true;
    1531         116 :         else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
    1532           0 :             futurexid = update = true;
    1533             : 
    1534       60088 :         if (update)
    1535             :         {
    1536       59972 :             pgcform->relfrozenxid = frozenxid;
    1537       59972 :             dirty = true;
    1538       59972 :             if (frozenxid_updated)
    1539       59972 :                 *frozenxid_updated = true;
    1540             :         }
    1541             :     }
    1542             : 
    1543             :     /* Similarly for relminmxid */
    1544      300446 :     oldminmulti = pgcform->relminmxid;
    1545      300446 :     futuremxid = false;
    1546      300446 :     if (minmulti_updated)
    1547      257648 :         *minmulti_updated = false;
    1548      300446 :     if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
    1549             :     {
    1550         310 :         bool        update = false;
    1551             : 
    1552         310 :         if (MultiXactIdPrecedes(oldminmulti, minmulti))
    1553         310 :             update = true;
    1554           0 :         else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
    1555           0 :             futuremxid = update = true;
    1556             : 
    1557         310 :         if (update)
    1558             :         {
    1559         310 :             pgcform->relminmxid = minmulti;
    1560         310 :             dirty = true;
    1561         310 :             if (minmulti_updated)
    1562         310 :                 *minmulti_updated = true;
    1563             :         }
    1564             :     }
    1565             : 
    1566             :     /* If anything changed, write out the tuple. */
    1567      300446 :     if (dirty)
    1568       74786 :         systable_inplace_update_finish(inplace_state, ctup);
    1569             :     else
    1570      225660 :         systable_inplace_update_cancel(inplace_state);
    1571             : 
    1572      300446 :     table_close(rd, RowExclusiveLock);
    1573             : 
    1574      300446 :     if (futurexid)
    1575           0 :         ereport(WARNING,
    1576             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1577             :                  errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
    1578             :                                  oldfrozenxid, frozenxid,
    1579             :                                  RelationGetRelationName(relation))));
    1580      300446 :     if (futuremxid)
    1581           0 :         ereport(WARNING,
    1582             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1583             :                  errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
    1584             :                                  oldminmulti, minmulti,
    1585             :                                  RelationGetRelationName(relation))));
    1586      300446 : }
    1587             : 
    1588             : 
    1589             : /*
    1590             :  *  vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
    1591             :  *
    1592             :  *      Update pg_database's datfrozenxid entry for our database to be the
    1593             :  *      minimum of the pg_class.relfrozenxid values.
    1594             :  *
    1595             :  *      Similarly, update our datminmxid to be the minimum of the
    1596             :  *      pg_class.relminmxid values.
    1597             :  *
    1598             :  *      If we are able to advance either pg_database value, also try to
    1599             :  *      truncate pg_xact and pg_multixact.
    1600             :  *
    1601             :  *      We violate transaction semantics here by overwriting the database's
    1602             :  *      existing pg_database tuple with the new values.  This is reasonably
    1603             :  *      safe since the new values are correct whether or not this transaction
    1604             :  *      commits.  As with vac_update_relstats, this avoids leaving dead tuples
    1605             :  *      behind after a VACUUM.
    1606             :  */
    1607             : void
    1608        6292 : vac_update_datfrozenxid(void)
    1609             : {
    1610             :     HeapTuple   tuple;
    1611             :     Form_pg_database dbform;
    1612             :     Relation    relation;
    1613             :     SysScanDesc scan;
    1614             :     HeapTuple   classTup;
    1615             :     TransactionId newFrozenXid;
    1616             :     MultiXactId newMinMulti;
    1617             :     TransactionId lastSaneFrozenXid;
    1618             :     MultiXactId lastSaneMinMulti;
    1619        6292 :     bool        bogus = false;
    1620        6292 :     bool        dirty = false;
    1621             :     ScanKeyData key[1];
    1622             :     void       *inplace_state;
    1623             : 
    1624             :     /*
    1625             :      * Restrict this task to one backend per database.  This avoids race
    1626             :      * conditions that would move datfrozenxid or datminmxid backward.  It
    1627             :      * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
    1628             :      * datfrozenxid passed to an earlier vac_truncate_clog() call.
    1629             :      */
    1630        6292 :     LockDatabaseFrozenIds(ExclusiveLock);
    1631             : 
    1632             :     /*
    1633             :      * Initialize the "min" calculation with
    1634             :      * GetOldestNonRemovableTransactionId(), which is a reasonable
    1635             :      * approximation to the minimum relfrozenxid for not-yet-committed
    1636             :      * pg_class entries for new tables; see AddNewRelationTuple().  So we
    1637             :      * cannot produce a wrong minimum by starting with this.
    1638             :      */
    1639        6292 :     newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
    1640             : 
    1641             :     /*
    1642             :      * Similarly, initialize the MultiXact "min" with the value that would be
    1643             :      * used on pg_class for new tables.  See AddNewRelationTuple().
    1644             :      */
    1645        6292 :     newMinMulti = GetOldestMultiXactId();
    1646             : 
    1647             :     /*
    1648             :      * Identify the latest relfrozenxid and relminmxid values that we could
    1649             :      * validly see during the scan.  These are conservative values, but it's
    1650             :      * not really worth trying to be more exact.
    1651             :      */
    1652        6292 :     lastSaneFrozenXid = ReadNextTransactionId();
    1653        6292 :     lastSaneMinMulti = ReadNextMultiXactId();
    1654             : 
    1655             :     /*
    1656             :      * We must seqscan pg_class to find the minimum Xid, because there is no
    1657             :      * index that can help us here.
    1658             :      *
    1659             :      * See vac_truncate_clog() for the race condition to prevent.
    1660             :      */
    1661        6292 :     relation = table_open(RelationRelationId, AccessShareLock);
    1662             : 
    1663        6292 :     scan = systable_beginscan(relation, InvalidOid, false,
    1664             :                               NULL, 0, NULL);
    1665             : 
    1666     3101568 :     while ((classTup = systable_getnext(scan)) != NULL)
    1667             :     {
    1668     3095276 :         volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup);
    1669     3095276 :         TransactionId relfrozenxid = classForm->relfrozenxid;
    1670     3095276 :         TransactionId relminmxid = classForm->relminmxid;
    1671             : 
    1672             :         /*
    1673             :          * Only consider relations able to hold unfrozen XIDs (anything else
    1674             :          * should have InvalidTransactionId in relfrozenxid anyway).
    1675             :          */
    1676     3095276 :         if (classForm->relkind != RELKIND_RELATION &&
    1677     2476502 :             classForm->relkind != RELKIND_MATVIEW &&
    1678     2474090 :             classForm->relkind != RELKIND_TOASTVALUE)
    1679             :         {
    1680             :             Assert(!TransactionIdIsValid(relfrozenxid));
    1681             :             Assert(!MultiXactIdIsValid(relminmxid));
    1682     2153658 :             continue;
    1683             :         }
    1684             : 
    1685             :         /*
    1686             :          * Some table AMs might not need per-relation xid / multixid horizons.
    1687             :          * It therefore seems reasonable to allow relfrozenxid and relminmxid
    1688             :          * to not be set (i.e. set to their respective Invalid*Id)
    1689             :          * independently. Thus validate and compute horizon for each only if
    1690             :          * set.
    1691             :          *
    1692             :          * If things are working properly, no relation should have a
    1693             :          * relfrozenxid or relminmxid that is "in the future".  However, such
    1694             :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1695             :          * see any entries that are "in the future", chicken out and don't do
    1696             :          * anything.  This ensures we won't truncate clog & multixact SLRUs
    1697             :          * before those relations have been scanned and cleaned up.
    1698             :          */
    1699             : 
    1700      941618 :         if (TransactionIdIsValid(relfrozenxid))
    1701             :         {
    1702             :             Assert(TransactionIdIsNormal(relfrozenxid));
    1703             : 
    1704             :             /* check for values in the future */
    1705      941618 :             if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
    1706             :             {
    1707           0 :                 bogus = true;
    1708           0 :                 break;
    1709             :             }
    1710             : 
    1711             :             /* determine new horizon */
    1712      941618 :             if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
    1713        5250 :                 newFrozenXid = relfrozenxid;
    1714             :         }
    1715             : 
    1716      941618 :         if (MultiXactIdIsValid(relminmxid))
    1717             :         {
    1718             :             /* check for values in the future */
    1719      941618 :             if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
    1720             :             {
    1721           0 :                 bogus = true;
    1722           0 :                 break;
    1723             :             }
    1724             : 
    1725             :             /* determine new horizon */
    1726      941618 :             if (MultiXactIdPrecedes(relminmxid, newMinMulti))
    1727         224 :                 newMinMulti = relminmxid;
    1728             :         }
    1729             :     }
    1730             : 
    1731             :     /* we're done with pg_class */
    1732        6292 :     systable_endscan(scan);
    1733        6292 :     table_close(relation, AccessShareLock);
    1734             : 
    1735             :     /* chicken out if bogus data found */
    1736        6292 :     if (bogus)
    1737           0 :         return;
    1738             : 
    1739             :     Assert(TransactionIdIsNormal(newFrozenXid));
    1740             :     Assert(MultiXactIdIsValid(newMinMulti));
    1741             : 
    1742             :     /* Now fetch the pg_database tuple we need to update. */
    1743        6292 :     relation = table_open(DatabaseRelationId, RowExclusiveLock);
    1744             : 
    1745             :     /*
    1746             :      * Fetch a copy of the tuple to scribble on.  We could check the syscache
    1747             :      * tuple first.  If that concluded !dirty, we'd avoid waiting on
    1748             :      * concurrent heap_update() and would avoid exclusive-locking the buffer.
    1749             :      * For now, don't optimize that.
    1750             :      */
    1751        6292 :     ScanKeyInit(&key[0],
    1752             :                 Anum_pg_database_oid,
    1753             :                 BTEqualStrategyNumber, F_OIDEQ,
    1754             :                 ObjectIdGetDatum(MyDatabaseId));
    1755             : 
    1756        6292 :     systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
    1757             :                                   NULL, 1, key, &tuple, &inplace_state);
    1758             : 
    1759        6292 :     if (!HeapTupleIsValid(tuple))
    1760           0 :         elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
    1761             : 
    1762        6292 :     dbform = (Form_pg_database) GETSTRUCT(tuple);
    1763             : 
    1764             :     /*
    1765             :      * As in vac_update_relstats(), we ordinarily don't want to let
    1766             :      * datfrozenxid go backward; but if it's "in the future" then it must be
    1767             :      * corrupt and it seems best to overwrite it.
    1768             :      */
    1769        6942 :     if (dbform->datfrozenxid != newFrozenXid &&
    1770         650 :         (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
    1771           0 :          TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
    1772             :     {
    1773         650 :         dbform->datfrozenxid = newFrozenXid;
    1774         650 :         dirty = true;
    1775             :     }
    1776             :     else
    1777        5642 :         newFrozenXid = dbform->datfrozenxid;
    1778             : 
    1779             :     /* Ditto for datminmxid */
    1780        6294 :     if (dbform->datminmxid != newMinMulti &&
    1781           2 :         (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
    1782           0 :          MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
    1783             :     {
    1784           2 :         dbform->datminmxid = newMinMulti;
    1785           2 :         dirty = true;
    1786             :     }
    1787             :     else
    1788        6290 :         newMinMulti = dbform->datminmxid;
    1789             : 
    1790        6292 :     if (dirty)
    1791         650 :         systable_inplace_update_finish(inplace_state, tuple);
    1792             :     else
    1793        5642 :         systable_inplace_update_cancel(inplace_state);
    1794             : 
    1795        6292 :     heap_freetuple(tuple);
    1796        6292 :     table_close(relation, RowExclusiveLock);
    1797             : 
    1798             :     /*
    1799             :      * If we were able to advance datfrozenxid or datminmxid, see if we can
    1800             :      * truncate pg_xact and/or pg_multixact.  Also do it if the shared
    1801             :      * XID-wrap-limit info is stale, since this action will update that too.
    1802             :      */
    1803        6292 :     if (dirty || ForceTransactionIdLimitUpdate())
    1804        2446 :         vac_truncate_clog(newFrozenXid, newMinMulti,
    1805             :                           lastSaneFrozenXid, lastSaneMinMulti);
    1806             : }
    1807             : 
    1808             : 
    1809             : /*
    1810             :  *  vac_truncate_clog() -- attempt to truncate the commit log
    1811             :  *
    1812             :  *      Scan pg_database to determine the system-wide oldest datfrozenxid,
    1813             :  *      and use it to truncate the transaction commit log (pg_xact).
    1814             :  *      Also update the XID wrap limit info maintained by varsup.c.
    1815             :  *      Likewise for datminmxid.
    1816             :  *
    1817             :  *      The passed frozenXID and minMulti are the updated values for my own
    1818             :  *      pg_database entry. They're used to initialize the "min" calculations.
    1819             :  *      The caller also passes the "last sane" XID and MXID, since it has
    1820             :  *      those at hand already.
    1821             :  *
    1822             :  *      This routine is only invoked when we've managed to change our
    1823             :  *      DB's datfrozenxid/datminmxid values, or we found that the shared
    1824             :  *      XID-wrap-limit info is stale.
    1825             :  */
    1826             : static void
    1827        2446 : vac_truncate_clog(TransactionId frozenXID,
    1828             :                   MultiXactId minMulti,
    1829             :                   TransactionId lastSaneFrozenXid,
    1830             :                   MultiXactId lastSaneMinMulti)
    1831             : {
    1832        2446 :     TransactionId nextXID = ReadNextTransactionId();
    1833             :     Relation    relation;
    1834             :     TableScanDesc scan;
    1835             :     HeapTuple   tuple;
    1836             :     Oid         oldestxid_datoid;
    1837             :     Oid         minmulti_datoid;
    1838        2446 :     bool        bogus = false;
    1839        2446 :     bool        frozenAlreadyWrapped = false;
    1840             : 
    1841             :     /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
    1842        2446 :     LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
    1843             : 
    1844             :     /* init oldest datoids to sync with my frozenXID/minMulti values */
    1845        2446 :     oldestxid_datoid = MyDatabaseId;
    1846        2446 :     minmulti_datoid = MyDatabaseId;
    1847             : 
    1848             :     /*
    1849             :      * Scan pg_database to compute the minimum datfrozenxid/datminmxid
    1850             :      *
    1851             :      * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
    1852             :      * the values could change while we look at them.  Fetch each one just
    1853             :      * once to ensure sane behavior of the comparison logic.  (Here, as in
    1854             :      * many other places, we assume that fetching or updating an XID in shared
    1855             :      * storage is atomic.)
    1856             :      *
    1857             :      * Note: we need not worry about a race condition with new entries being
    1858             :      * inserted by CREATE DATABASE.  Any such entry will have a copy of some
    1859             :      * existing DB's datfrozenxid, and that source DB cannot be ours because
    1860             :      * of the interlock against copying a DB containing an active backend.
    1861             :      * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
    1862             :      * concurrently modify the datfrozenxid's of different databases, the
    1863             :      * worst possible outcome is that pg_xact is not truncated as aggressively
    1864             :      * as it could be.
    1865             :      */
    1866        2446 :     relation = table_open(DatabaseRelationId, AccessShareLock);
    1867             : 
    1868        2446 :     scan = table_beginscan_catalog(relation, 0, NULL);
    1869             : 
    1870        9606 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1871             :     {
    1872        7160 :         volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
    1873        7160 :         TransactionId datfrozenxid = dbform->datfrozenxid;
    1874        7160 :         TransactionId datminmxid = dbform->datminmxid;
    1875             : 
    1876             :         Assert(TransactionIdIsNormal(datfrozenxid));
    1877             :         Assert(MultiXactIdIsValid(datminmxid));
    1878             : 
    1879             :         /*
    1880             :          * If database is in the process of getting dropped, or has been
    1881             :          * interrupted while doing so, no connections to it are possible
    1882             :          * anymore. Therefore we don't need to take it into account here.
    1883             :          * Which is good, because it can't be processed by autovacuum either.
    1884             :          */
    1885        7160 :         if (database_is_invalid_form((Form_pg_database) dbform))
    1886             :         {
    1887           2 :             elog(DEBUG2,
    1888             :                  "skipping invalid database \"%s\" while computing relfrozenxid",
    1889             :                  NameStr(dbform->datname));
    1890           2 :             continue;
    1891             :         }
    1892             : 
    1893             :         /*
    1894             :          * If things are working properly, no database should have a
    1895             :          * datfrozenxid or datminmxid that is "in the future".  However, such
    1896             :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1897             :          * see any entries that are "in the future", chicken out and don't do
    1898             :          * anything.  This ensures we won't truncate clog before those
    1899             :          * databases have been scanned and cleaned up.  (We will issue the
    1900             :          * "already wrapped" warning if appropriate, though.)
    1901             :          */
    1902       14316 :         if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
    1903        7158 :             MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
    1904           0 :             bogus = true;
    1905             : 
    1906        7158 :         if (TransactionIdPrecedes(nextXID, datfrozenxid))
    1907           0 :             frozenAlreadyWrapped = true;
    1908        7158 :         else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
    1909             :         {
    1910         450 :             frozenXID = datfrozenxid;
    1911         450 :             oldestxid_datoid = dbform->oid;
    1912             :         }
    1913             : 
    1914        7158 :         if (MultiXactIdPrecedes(datminmxid, minMulti))
    1915             :         {
    1916           4 :             minMulti = datminmxid;
    1917           4 :             minmulti_datoid = dbform->oid;
    1918             :         }
    1919             :     }
    1920             : 
    1921        2446 :     table_endscan(scan);
    1922             : 
    1923        2446 :     table_close(relation, AccessShareLock);
    1924             : 
    1925             :     /*
    1926             :      * Do not truncate CLOG if we seem to have suffered wraparound already;
    1927             :      * the computed minimum XID might be bogus.  This case should now be
    1928             :      * impossible due to the defenses in GetNewTransactionId, but we keep the
    1929             :      * test anyway.
    1930             :      */
    1931        2446 :     if (frozenAlreadyWrapped)
    1932             :     {
    1933           0 :         ereport(WARNING,
    1934             :                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
    1935             :                  errdetail("You might have already suffered transaction-wraparound data loss.")));
    1936           0 :         LWLockRelease(WrapLimitsVacuumLock);
    1937           0 :         return;
    1938             :     }
    1939             : 
    1940             :     /* chicken out if data is bogus in any other way */
    1941        2446 :     if (bogus)
    1942             :     {
    1943           0 :         LWLockRelease(WrapLimitsVacuumLock);
    1944           0 :         return;
    1945             :     }
    1946             : 
    1947             :     /*
    1948             :      * Freeze any old transaction IDs in the async notification queue before
    1949             :      * CLOG truncation.
    1950             :      */
    1951        2446 :     AsyncNotifyFreezeXids(frozenXID);
    1952             : 
    1953             :     /*
    1954             :      * Advance the oldest value for commit timestamps before truncating, so
    1955             :      * that if a user requests a timestamp for a transaction we're truncating
    1956             :      * away right after this point, they get NULL instead of an ugly "file not
    1957             :      * found" error from slru.c.  This doesn't matter for xact/multixact
    1958             :      * because they are not subject to arbitrary lookups from users.
    1959             :      */
    1960        2446 :     AdvanceOldestCommitTsXid(frozenXID);
    1961             : 
    1962             :     /*
    1963             :      * Truncate CLOG, multixact and CommitTs to the oldest computed value.
    1964             :      */
    1965        2446 :     TruncateCLOG(frozenXID, oldestxid_datoid);
    1966        2446 :     TruncateCommitTs(frozenXID);
    1967        2446 :     TruncateMultiXact(minMulti, minmulti_datoid);
    1968             : 
    1969             :     /*
    1970             :      * Update the wrap limit for GetNewTransactionId and creation of new
    1971             :      * MultiXactIds.  Note: these functions will also signal the postmaster
    1972             :      * for an(other) autovac cycle if needed.   XXX should we avoid possibly
    1973             :      * signaling twice?
    1974             :      */
    1975        2446 :     SetTransactionIdLimit(frozenXID, oldestxid_datoid);
    1976        2446 :     SetMultiXactIdLimit(minMulti, minmulti_datoid);
    1977             : 
    1978        2446 :     LWLockRelease(WrapLimitsVacuumLock);
    1979             : }
    1980             : 
    1981             : 
    1982             : /*
    1983             :  *  vacuum_rel() -- vacuum one heap relation
    1984             :  *
    1985             :  *      relid identifies the relation to vacuum.  If relation is supplied,
    1986             :  *      use the name therein for reporting any failure to open/lock the rel;
    1987             :  *      do not use it once we've successfully opened the rel, since it might
    1988             :  *      be stale.
    1989             :  *
    1990             :  *      Returns true if it's okay to proceed with a requested ANALYZE
    1991             :  *      operation on this table.
    1992             :  *
    1993             :  *      Doing one heap at a time incurs extra overhead, since we need to
    1994             :  *      check that the heap exists again just before we vacuum it.  The
    1995             :  *      reason that we do this is so that vacuuming can be spread across
    1996             :  *      many small transactions.  Otherwise, two-phase locking would require
    1997             :  *      us to lock the entire database during one pass of the vacuum cleaner.
    1998             :  *
    1999             :  *      At entry and exit, we are not inside a transaction.
    2000             :  */
    2001             : static bool
    2002      258470 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
    2003             :            BufferAccessStrategy bstrategy)
    2004             : {
    2005             :     LOCKMODE    lmode;
    2006             :     Relation    rel;
    2007             :     LockRelId   lockrelid;
    2008             :     Oid         priv_relid;
    2009             :     Oid         toast_relid;
    2010             :     Oid         save_userid;
    2011             :     int         save_sec_context;
    2012             :     int         save_nestlevel;
    2013             :     VacuumParams toast_vacuum_params;
    2014             : 
    2015             :     /*
    2016             :      * This function scribbles on the parameters, so make a copy early to
    2017             :      * avoid affecting the TOAST table (if we do end up recursing to it).
    2018             :      */
    2019      258470 :     memcpy(&toast_vacuum_params, &params, sizeof(VacuumParams));
    2020             : 
    2021             :     /* Begin a transaction for vacuuming this relation */
    2022      258470 :     StartTransactionCommand();
    2023             : 
    2024      258470 :     if (!(params.options & VACOPT_FULL))
    2025             :     {
    2026             :         /*
    2027             :          * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
    2028             :          * other concurrent VACUUMs know that they can ignore this one while
    2029             :          * determining their OldestXmin.  (The reason we don't set it during a
    2030             :          * full VACUUM is exactly that we may have to run user-defined
    2031             :          * functions for functional indexes, and we want to make sure that if
    2032             :          * they use the snapshot set above, any tuples it requires can't get
    2033             :          * removed from other tables.  An index function that depends on the
    2034             :          * contents of other tables is arguably broken, but we won't break it
    2035             :          * here by violating transaction semantics.)
    2036             :          *
    2037             :          * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
    2038             :          * autovacuum; it's used to avoid canceling a vacuum that was invoked
    2039             :          * in an emergency.
    2040             :          *
    2041             :          * Note: these flags remain set until CommitTransaction or
    2042             :          * AbortTransaction.  We don't want to clear them until we reset
    2043             :          * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
    2044             :          * might appear to go backwards, which is probably Not Good.  (We also
    2045             :          * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
    2046             :          * xmin doesn't become visible ahead of setting the flag.)
    2047             :          */
    2048      258056 :         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    2049      258056 :         MyProc->statusFlags |= PROC_IN_VACUUM;
    2050      258056 :         if (params.is_wraparound)
    2051      229692 :             MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
    2052      258056 :         ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
    2053      258056 :         LWLockRelease(ProcArrayLock);
    2054             :     }
    2055             : 
    2056             :     /*
    2057             :      * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
    2058             :      * cutoff xids in local memory wrapping around, and to have updated xmin
    2059             :      * horizons.
    2060             :      */
    2061      258470 :     PushActiveSnapshot(GetTransactionSnapshot());
    2062             : 
    2063             :     /*
    2064             :      * Check for user-requested abort.  Note we want this to be inside a
    2065             :      * transaction, so xact.c doesn't issue useless WARNING.
    2066             :      */
    2067      258470 :     CHECK_FOR_INTERRUPTS();
    2068             : 
    2069             :     /*
    2070             :      * Determine the type of lock we want --- hard exclusive lock for a FULL
    2071             :      * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
    2072             :      * way, we can be sure that no other backend is vacuuming the same table.
    2073             :      */
    2074      516940 :     lmode = (params.options & VACOPT_FULL) ?
    2075      258470 :         AccessExclusiveLock : ShareUpdateExclusiveLock;
    2076             : 
    2077             :     /* open the relation and get the appropriate lock on it */
    2078      258470 :     rel = vacuum_open_relation(relid, relation, params.options,
    2079      258470 :                                params.log_vacuum_min_duration >= 0, lmode);
    2080             : 
    2081             :     /* leave if relation could not be opened or locked */
    2082      258470 :     if (!rel)
    2083             :     {
    2084          24 :         PopActiveSnapshot();
    2085          24 :         CommitTransactionCommand();
    2086          24 :         return false;
    2087             :     }
    2088             : 
    2089             :     /*
    2090             :      * When recursing to a TOAST table, check privileges on the parent.  NB:
    2091             :      * This is only safe to do because we hold a session lock on the main
    2092             :      * relation that prevents concurrent deletion.
    2093             :      */
    2094      258446 :     if (OidIsValid(params.toast_parent))
    2095        9802 :         priv_relid = params.toast_parent;
    2096             :     else
    2097      248644 :         priv_relid = RelationGetRelid(rel);
    2098             : 
    2099             :     /*
    2100             :      * Check if relation needs to be skipped based on privileges.  This check
    2101             :      * happens also when building the relation list to vacuum for a manual
    2102             :      * operation, and needs to be done additionally here as VACUUM could
    2103             :      * happen across multiple transactions where privileges could have changed
    2104             :      * in-between.  Make sure to only generate logs for VACUUM in this case.
    2105             :      */
    2106      258446 :     if (!vacuum_is_permitted_for_relation(priv_relid,
    2107             :                                           rel->rd_rel,
    2108      258446 :                                           params.options & ~VACOPT_ANALYZE))
    2109             :     {
    2110          72 :         relation_close(rel, lmode);
    2111          72 :         PopActiveSnapshot();
    2112          72 :         CommitTransactionCommand();
    2113          72 :         return false;
    2114             :     }
    2115             : 
    2116             :     /*
    2117             :      * Check that it's of a vacuumable relkind.
    2118             :      */
    2119      258374 :     if (rel->rd_rel->relkind != RELKIND_RELATION &&
    2120       94306 :         rel->rd_rel->relkind != RELKIND_MATVIEW &&
    2121       94298 :         rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
    2122         188 :         rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
    2123             :     {
    2124           2 :         ereport(WARNING,
    2125             :                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
    2126             :                         RelationGetRelationName(rel))));
    2127           2 :         relation_close(rel, lmode);
    2128           2 :         PopActiveSnapshot();
    2129           2 :         CommitTransactionCommand();
    2130           2 :         return false;
    2131             :     }
    2132             : 
    2133             :     /*
    2134             :      * Silently ignore tables that are temp tables of other backends ---
    2135             :      * trying to vacuum these will lead to great unhappiness, since their
    2136             :      * contents are probably not up-to-date on disk.  (We don't throw a
    2137             :      * warning here; it would just lead to chatter during a database-wide
    2138             :      * VACUUM.)
    2139             :      */
    2140      258372 :     if (RELATION_IS_OTHER_TEMP(rel))
    2141             :     {
    2142           2 :         relation_close(rel, lmode);
    2143           2 :         PopActiveSnapshot();
    2144           2 :         CommitTransactionCommand();
    2145           2 :         return false;
    2146             :     }
    2147             : 
    2148             :     /*
    2149             :      * Silently ignore partitioned tables as there is no work to be done.  The
    2150             :      * useful work is on their child partitions, which have been queued up for
    2151             :      * us separately.
    2152             :      */
    2153      258370 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    2154             :     {
    2155         186 :         relation_close(rel, lmode);
    2156         186 :         PopActiveSnapshot();
    2157         186 :         CommitTransactionCommand();
    2158             :         /* It's OK to proceed with ANALYZE on this table */
    2159         186 :         return true;
    2160             :     }
    2161             : 
    2162             :     /*
    2163             :      * Get a session-level lock too. This will protect our access to the
    2164             :      * relation across multiple transactions, so that we can vacuum the
    2165             :      * relation's TOAST table (if any) secure in the knowledge that no one is
    2166             :      * deleting the parent relation.
    2167             :      *
    2168             :      * NOTE: this cannot block, even if someone else is waiting for access,
    2169             :      * because the lock manager knows that both lock requests are from the
    2170             :      * same process.
    2171             :      */
    2172      258184 :     lockrelid = rel->rd_lockInfo.lockRelId;
    2173      258184 :     LockRelationIdForSession(&lockrelid, lmode);
    2174             : 
    2175             :     /*
    2176             :      * Set index_cleanup option based on index_cleanup reloption if it wasn't
    2177             :      * specified in VACUUM command, or when running in an autovacuum worker
    2178             :      */
    2179      258184 :     if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
    2180             :     {
    2181             :         StdRdOptIndexCleanup vacuum_index_cleanup;
    2182             : 
    2183      257926 :         if (rel->rd_options == NULL)
    2184      254690 :             vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
    2185             :         else
    2186        3236 :             vacuum_index_cleanup =
    2187        3236 :                 ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
    2188             : 
    2189      257926 :         if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
    2190      257882 :             params.index_cleanup = VACOPTVALUE_AUTO;
    2191          44 :         else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
    2192          22 :             params.index_cleanup = VACOPTVALUE_ENABLED;
    2193             :         else
    2194             :         {
    2195             :             Assert(vacuum_index_cleanup ==
    2196             :                    STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
    2197          22 :             params.index_cleanup = VACOPTVALUE_DISABLED;
    2198             :         }
    2199             :     }
    2200             : 
    2201             : #ifdef USE_INJECTION_POINTS
    2202      258184 :     if (params.index_cleanup == VACOPTVALUE_AUTO)
    2203      257888 :         INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
    2204         296 :     else if (params.index_cleanup == VACOPTVALUE_DISABLED)
    2205         260 :         INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
    2206          36 :     else if (params.index_cleanup == VACOPTVALUE_ENABLED)
    2207          36 :         INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
    2208             : #endif
    2209             : 
    2210             :     /*
    2211             :      * Check if the vacuum_max_eager_freeze_failure_rate table storage
    2212             :      * parameter was specified. This overrides the GUC value.
    2213             :      */
    2214      258184 :     if (rel->rd_options != NULL &&
    2215        3248 :         ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
    2216           0 :         params.max_eager_freeze_failure_rate =
    2217           0 :             ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
    2218             : 
    2219             :     /*
    2220             :      * Set truncate option based on truncate reloption or GUC if it wasn't
    2221             :      * specified in VACUUM command, or when running in an autovacuum worker
    2222             :      */
    2223      258184 :     if (params.truncate == VACOPTVALUE_UNSPECIFIED)
    2224             :     {
    2225      257932 :         StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
    2226             : 
    2227      257932 :         if (opts && opts->vacuum_truncate_set)
    2228             :         {
    2229          32 :             if (opts->vacuum_truncate)
    2230          10 :                 params.truncate = VACOPTVALUE_ENABLED;
    2231             :             else
    2232          22 :                 params.truncate = VACOPTVALUE_DISABLED;
    2233             :         }
    2234      257900 :         else if (vacuum_truncate)
    2235      257878 :             params.truncate = VACOPTVALUE_ENABLED;
    2236             :         else
    2237          22 :             params.truncate = VACOPTVALUE_DISABLED;
    2238             :     }
    2239             : 
    2240             : #ifdef USE_INJECTION_POINTS
    2241      258184 :     if (params.truncate == VACOPTVALUE_AUTO)
    2242           0 :         INJECTION_POINT("vacuum-truncate-auto", NULL);
    2243      258184 :     else if (params.truncate == VACOPTVALUE_DISABLED)
    2244         296 :         INJECTION_POINT("vacuum-truncate-disabled", NULL);
    2245      257888 :     else if (params.truncate == VACOPTVALUE_ENABLED)
    2246      257888 :         INJECTION_POINT("vacuum-truncate-enabled", NULL);
    2247             : #endif
    2248             : 
    2249             :     /*
    2250             :      * Remember the relation's TOAST relation for later, if the caller asked
    2251             :      * us to process it.  In VACUUM FULL, though, the toast table is
    2252             :      * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
    2253             :      * unless PROCESS_MAIN is disabled.
    2254             :      */
    2255      258184 :     if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
    2256       28102 :         ((params.options & VACOPT_FULL) == 0 ||
    2257         386 :          (params.options & VACOPT_PROCESS_MAIN) == 0))
    2258       27722 :         toast_relid = rel->rd_rel->reltoastrelid;
    2259             :     else
    2260      230462 :         toast_relid = InvalidOid;
    2261             : 
    2262             :     /*
    2263             :      * Switch to the table owner's userid, so that any index functions are run
    2264             :      * as that user.  Also lock down security-restricted operations and
    2265             :      * arrange to make GUC variable changes local to this command. (This is
    2266             :      * unnecessary, but harmless, for lazy VACUUM.)
    2267             :      */
    2268      258184 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
    2269      258184 :     SetUserIdAndSecContext(rel->rd_rel->relowner,
    2270             :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
    2271      258184 :     save_nestlevel = NewGUCNestLevel();
    2272      258184 :     RestrictSearchPath();
    2273             : 
    2274             :     /*
    2275             :      * If PROCESS_MAIN is set (the default), it's time to vacuum the main
    2276             :      * relation.  Otherwise, we can skip this part.  If processing the TOAST
    2277             :      * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
    2278             :      * to be set when we recurse to the TOAST table.
    2279             :      */
    2280      258184 :     if (params.options & VACOPT_PROCESS_MAIN)
    2281             :     {
    2282             :         /*
    2283             :          * Do the actual work --- either FULL or "lazy" vacuum
    2284             :          */
    2285      258030 :         if (params.options & VACOPT_FULL)
    2286             :         {
    2287         380 :             ClusterParams cluster_params = {0};
    2288             : 
    2289         380 :             if ((params.options & VACOPT_VERBOSE) != 0)
    2290           2 :                 cluster_params.options |= CLUOPT_VERBOSE;
    2291             : 
    2292             :             /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
    2293         380 :             cluster_rel(rel, InvalidOid, &cluster_params);
    2294             :             /* cluster_rel closes the relation, but keeps lock */
    2295             : 
    2296         374 :             rel = NULL;
    2297             :         }
    2298             :         else
    2299      257650 :             table_relation_vacuum(rel, params, bstrategy);
    2300             :     }
    2301             : 
    2302             :     /* Roll back any GUC changes executed by index functions */
    2303      258176 :     AtEOXact_GUC(false, save_nestlevel);
    2304             : 
    2305             :     /* Restore userid and security context */
    2306      258176 :     SetUserIdAndSecContext(save_userid, save_sec_context);
    2307             : 
    2308             :     /* all done with this class, but hold lock until commit */
    2309      258176 :     if (rel)
    2310      257802 :         relation_close(rel, NoLock);
    2311             : 
    2312             :     /*
    2313             :      * Complete the transaction and free all temporary memory used.
    2314             :      */
    2315      258176 :     PopActiveSnapshot();
    2316      258176 :     CommitTransactionCommand();
    2317             : 
    2318             :     /*
    2319             :      * If the relation has a secondary toast rel, vacuum that too while we
    2320             :      * still hold the session lock on the main table.  Note however that
    2321             :      * "analyze" will not get done on the toast table.  This is good, because
    2322             :      * the toaster always uses hardcoded index access and statistics are
    2323             :      * totally unimportant for toast relations.
    2324             :      */
    2325      258176 :     if (toast_relid != InvalidOid)
    2326             :     {
    2327             :         /*
    2328             :          * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it.  Likewise,
    2329             :          * set toast_parent so that the privilege checks are done on the main
    2330             :          * relation.  NB: This is only safe to do because we hold a session
    2331             :          * lock on the main relation that prevents concurrent deletion.
    2332             :          */
    2333        9802 :         toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
    2334        9802 :         toast_vacuum_params.toast_parent = relid;
    2335             : 
    2336        9802 :         vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
    2337             :     }
    2338             : 
    2339             :     /*
    2340             :      * Now release the session-level lock on the main table.
    2341             :      */
    2342      258176 :     UnlockRelationIdForSession(&lockrelid, lmode);
    2343             : 
    2344             :     /* Report that we really did it. */
    2345      258176 :     return true;
    2346             : }
    2347             : 
    2348             : 
    2349             : /*
    2350             :  * Open all the vacuumable indexes of the given relation, obtaining the
    2351             :  * specified kind of lock on each.  Return an array of Relation pointers for
    2352             :  * the indexes into *Irel, and the number of indexes into *nindexes.
    2353             :  *
    2354             :  * We consider an index vacuumable if it is marked insertable (indisready).
    2355             :  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
    2356             :  * execution, and what we have is too corrupt to be processable.  We will
    2357             :  * vacuum even if the index isn't indisvalid; this is important because in a
    2358             :  * unique index, uniqueness checks will be performed anyway and had better not
    2359             :  * hit dangling index pointers.
    2360             :  */
    2361             : void
    2362      273588 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
    2363             :                  int *nindexes, Relation **Irel)
    2364             : {
    2365             :     List       *indexoidlist;
    2366             :     ListCell   *indexoidscan;
    2367             :     int         i;
    2368             : 
    2369             :     Assert(lockmode != NoLock);
    2370             : 
    2371      273588 :     indexoidlist = RelationGetIndexList(relation);
    2372             : 
    2373             :     /* allocate enough memory for all indexes */
    2374      273588 :     i = list_length(indexoidlist);
    2375             : 
    2376      273588 :     if (i > 0)
    2377      258012 :         *Irel = (Relation *) palloc(i * sizeof(Relation));
    2378             :     else
    2379       15576 :         *Irel = NULL;
    2380             : 
    2381             :     /* collect just the ready indexes */
    2382      273588 :     i = 0;
    2383      683034 :     foreach(indexoidscan, indexoidlist)
    2384             :     {
    2385      409446 :         Oid         indexoid = lfirst_oid(indexoidscan);
    2386             :         Relation    indrel;
    2387             : 
    2388      409446 :         indrel = index_open(indexoid, lockmode);
    2389      409446 :         if (indrel->rd_index->indisready)
    2390      409446 :             (*Irel)[i++] = indrel;
    2391             :         else
    2392           0 :             index_close(indrel, lockmode);
    2393             :     }
    2394             : 
    2395      273588 :     *nindexes = i;
    2396             : 
    2397      273588 :     list_free(indexoidlist);
    2398      273588 : }
    2399             : 
    2400             : /*
    2401             :  * Release the resources acquired by vac_open_indexes.  Optionally release
    2402             :  * the locks (say NoLock to keep 'em).
    2403             :  */
    2404             : void
    2405      274458 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
    2406             : {
    2407      274458 :     if (Irel == NULL)
    2408       16452 :         return;
    2409             : 
    2410      667440 :     while (nindexes--)
    2411             :     {
    2412      409434 :         Relation    ind = Irel[nindexes];
    2413             : 
    2414      409434 :         index_close(ind, lockmode);
    2415             :     }
    2416      258006 :     pfree(Irel);
    2417             : }
    2418             : 
    2419             : /*
    2420             :  * vacuum_delay_point --- check for interrupts and cost-based delay.
    2421             :  *
    2422             :  * This should be called in each major loop of VACUUM processing,
    2423             :  * typically once per page processed.
    2424             :  */
    2425             : void
    2426    87674174 : vacuum_delay_point(bool is_analyze)
    2427             : {
    2428    87674174 :     double      msec = 0;
    2429             : 
    2430             :     /* Always check for interrupts */
    2431    87674174 :     CHECK_FOR_INTERRUPTS();
    2432             : 
    2433    87674174 :     if (InterruptPending ||
    2434    87674174 :         (!VacuumCostActive && !ConfigReloadPending))
    2435    78473992 :         return;
    2436             : 
    2437             :     /*
    2438             :      * Autovacuum workers should reload the configuration file if requested.
    2439             :      * This allows changes to [autovacuum_]vacuum_cost_limit and
    2440             :      * [autovacuum_]vacuum_cost_delay to take effect while a table is being
    2441             :      * vacuumed or analyzed.
    2442             :      */
    2443     9200182 :     if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
    2444             :     {
    2445           0 :         ConfigReloadPending = false;
    2446           0 :         ProcessConfigFile(PGC_SIGHUP);
    2447           0 :         VacuumUpdateCosts();
    2448             :     }
    2449             : 
    2450             :     /*
    2451             :      * If we disabled cost-based delays after reloading the config file,
    2452             :      * return.
    2453             :      */
    2454     9200182 :     if (!VacuumCostActive)
    2455           0 :         return;
    2456             : 
    2457             :     /*
    2458             :      * For parallel vacuum, the delay is computed based on the shared cost
    2459             :      * balance.  See compute_parallel_delay.
    2460             :      */
    2461     9200182 :     if (VacuumSharedCostBalance != NULL)
    2462           0 :         msec = compute_parallel_delay();
    2463     9200182 :     else if (VacuumCostBalance >= vacuum_cost_limit)
    2464        8194 :         msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
    2465             : 
    2466             :     /* Nap if appropriate */
    2467     9200182 :     if (msec > 0)
    2468             :     {
    2469             :         instr_time  delay_start;
    2470             : 
    2471        8194 :         if (msec > vacuum_cost_delay * 4)
    2472          12 :             msec = vacuum_cost_delay * 4;
    2473             : 
    2474        8194 :         if (track_cost_delay_timing)
    2475           0 :             INSTR_TIME_SET_CURRENT(delay_start);
    2476             : 
    2477        8194 :         pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
    2478        8194 :         pg_usleep(msec * 1000);
    2479        8194 :         pgstat_report_wait_end();
    2480             : 
    2481        8194 :         if (track_cost_delay_timing)
    2482             :         {
    2483             :             instr_time  delay_end;
    2484             :             instr_time  delay;
    2485             : 
    2486           0 :             INSTR_TIME_SET_CURRENT(delay_end);
    2487           0 :             INSTR_TIME_SET_ZERO(delay);
    2488           0 :             INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
    2489             : 
    2490             :             /*
    2491             :              * For parallel workers, we only report the delay time every once
    2492             :              * in a while to avoid overloading the leader with messages and
    2493             :              * interrupts.
    2494             :              */
    2495           0 :             if (IsParallelWorker())
    2496             :             {
    2497             :                 static instr_time last_report_time;
    2498             :                 instr_time  time_since_last_report;
    2499             : 
    2500             :                 Assert(!is_analyze);
    2501             : 
    2502             :                 /* Accumulate the delay time */
    2503           0 :                 parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
    2504             : 
    2505             :                 /* Calculate interval since last report */
    2506           0 :                 INSTR_TIME_SET_ZERO(time_since_last_report);
    2507           0 :                 INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
    2508             : 
    2509             :                 /* If we haven't reported in a while, do so now */
    2510           0 :                 if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
    2511             :                     PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
    2512             :                 {
    2513           0 :                     pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
    2514             :                                                         parallel_vacuum_worker_delay_ns);
    2515             : 
    2516             :                     /* Reset variables */
    2517           0 :                     last_report_time = delay_end;
    2518           0 :                     parallel_vacuum_worker_delay_ns = 0;
    2519             :                 }
    2520             :             }
    2521           0 :             else if (is_analyze)
    2522           0 :                 pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
    2523           0 :                                            INSTR_TIME_GET_NANOSEC(delay));
    2524             :             else
    2525           0 :                 pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
    2526           0 :                                            INSTR_TIME_GET_NANOSEC(delay));
    2527             :         }
    2528             : 
    2529             :         /*
    2530             :          * We don't want to ignore postmaster death during very long vacuums
    2531             :          * with vacuum_cost_delay configured.  We can't use the usual
    2532             :          * WaitLatch() approach here because we want microsecond-based sleep
    2533             :          * durations above.
    2534             :          */
    2535        8194 :         if (IsUnderPostmaster && !PostmasterIsAlive())
    2536           0 :             exit(1);
    2537             : 
    2538        8194 :         VacuumCostBalance = 0;
    2539             : 
    2540             :         /*
    2541             :          * Balance and update limit values for autovacuum workers. We must do
    2542             :          * this periodically, as the number of workers across which we are
    2543             :          * balancing the limit may have changed.
    2544             :          *
    2545             :          * TODO: There may be better criteria for determining when to do this
    2546             :          * besides "check after napping".
    2547             :          */
    2548        8194 :         AutoVacuumUpdateCostLimit();
    2549             : 
    2550             :         /* Might have gotten an interrupt while sleeping */
    2551        8194 :         CHECK_FOR_INTERRUPTS();
    2552             :     }
    2553             : }
    2554             : 
    2555             : /*
    2556             :  * Computes the vacuum delay for parallel workers.
    2557             :  *
    2558             :  * The basic idea of a cost-based delay for parallel vacuum is to allow each
    2559             :  * worker to sleep in proportion to the share of work it's done.  We achieve this
    2560             :  * by allowing all parallel vacuum workers including the leader process to
    2561             :  * have a shared view of cost related parameters (mainly VacuumCostBalance).
    2562             :  * We allow each worker to update it as and when it has incurred any cost and
    2563             :  * then based on that decide whether it needs to sleep.  We compute the time
    2564             :  * to sleep for a worker based on the cost it has incurred
    2565             :  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
    2566             :  * that amount.  This avoids putting to sleep those workers which have done less
    2567             :  * I/O than other workers and therefore ensure that workers
    2568             :  * which are doing more I/O got throttled more.
    2569             :  *
    2570             :  * We allow a worker to sleep only if it has performed I/O above a certain
    2571             :  * threshold, which is calculated based on the number of active workers
    2572             :  * (VacuumActiveNWorkers), and the overall cost balance is more than
    2573             :  * VacuumCostLimit set by the system.  Testing reveals that we achieve
    2574             :  * the required throttling if we force a worker that has done more than 50%
    2575             :  * of its share of work to sleep.
    2576             :  */
    2577             : static double
    2578           0 : compute_parallel_delay(void)
    2579             : {
    2580           0 :     double      msec = 0;
    2581             :     uint32      shared_balance;
    2582             :     int         nworkers;
    2583             : 
    2584             :     /* Parallel vacuum must be active */
    2585             :     Assert(VacuumSharedCostBalance);
    2586             : 
    2587           0 :     nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
    2588             : 
    2589             :     /* At least count itself */
    2590             :     Assert(nworkers >= 1);
    2591             : 
    2592             :     /* Update the shared cost balance value atomically */
    2593           0 :     shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
    2594             : 
    2595             :     /* Compute the total local balance for the current worker */
    2596           0 :     VacuumCostBalanceLocal += VacuumCostBalance;
    2597             : 
    2598           0 :     if ((shared_balance >= vacuum_cost_limit) &&
    2599           0 :         (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
    2600             :     {
    2601             :         /* Compute sleep time based on the local cost balance */
    2602           0 :         msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
    2603           0 :         pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
    2604           0 :         VacuumCostBalanceLocal = 0;
    2605             :     }
    2606             : 
    2607             :     /*
    2608             :      * Reset the local balance as we accumulated it into the shared value.
    2609             :      */
    2610           0 :     VacuumCostBalance = 0;
    2611             : 
    2612           0 :     return msec;
    2613             : }
    2614             : 
    2615             : /*
    2616             :  * A wrapper function of defGetBoolean().
    2617             :  *
    2618             :  * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
    2619             :  * of true and false.
    2620             :  */
    2621             : static VacOptValue
    2622         326 : get_vacoptval_from_boolean(DefElem *def)
    2623             : {
    2624         326 :     return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
    2625             : }
    2626             : 
    2627             : /*
    2628             :  *  vac_bulkdel_one_index() -- bulk-deletion for index relation.
    2629             :  *
    2630             :  * Returns bulk delete stats derived from input stats
    2631             :  */
    2632             : IndexBulkDeleteResult *
    2633        2502 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
    2634             :                       TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
    2635             : {
    2636             :     /* Do bulk deletion */
    2637        2502 :     istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
    2638             :                               dead_items);
    2639             : 
    2640        2502 :     ereport(ivinfo->message_level,
    2641             :             (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
    2642             :                     RelationGetRelationName(ivinfo->index),
    2643             :                     dead_items_info->num_items)));
    2644             : 
    2645        2502 :     return istat;
    2646             : }
    2647             : 
    2648             : /*
    2649             :  *  vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
    2650             :  *
    2651             :  * Returns bulk delete stats derived from input stats
    2652             :  */
    2653             : IndexBulkDeleteResult *
    2654      285110 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
    2655             : {
    2656      285110 :     istat = index_vacuum_cleanup(ivinfo, istat);
    2657             : 
    2658      285110 :     if (istat)
    2659        2732 :         ereport(ivinfo->message_level,
    2660             :                 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
    2661             :                         RelationGetRelationName(ivinfo->index),
    2662             :                         istat->num_index_tuples,
    2663             :                         istat->num_pages),
    2664             :                  errdetail("%.0f index row versions were removed.\n"
    2665             :                            "%u index pages were newly deleted.\n"
    2666             :                            "%u index pages are currently deleted, of which %u are currently reusable.",
    2667             :                            istat->tuples_removed,
    2668             :                            istat->pages_newly_deleted,
    2669             :                            istat->pages_deleted, istat->pages_free)));
    2670             : 
    2671      285110 :     return istat;
    2672             : }
    2673             : 
    2674             : /*
    2675             :  *  vac_tid_reaped() -- is a particular tid deletable?
    2676             :  *
    2677             :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2678             :  */
    2679             : static bool
    2680     6476266 : vac_tid_reaped(ItemPointer itemptr, void *state)
    2681             : {
    2682     6476266 :     TidStore   *dead_items = (TidStore *) state;
    2683             : 
    2684     6476266 :     return TidStoreIsMember(dead_items, itemptr);
    2685             : }

Generated by: LCOV version 1.16