LCOV - code coverage report
Current view: top level - src/backend/commands - vacuum.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 91.2 % 770 702
Test Date: 2026-04-07 14:16:30 Functions: 100.0 % 22 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * vacuum.c
       4              :  *    The postgres vacuum cleaner.
       5              :  *
       6              :  * This file includes (a) control and dispatch code for VACUUM and ANALYZE
       7              :  * commands, (b) code to compute various vacuum thresholds, and (c) index
       8              :  * vacuum code.
       9              :  *
      10              :  * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
      11              :  * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
      12              :  * REPACK, handled in repack.c.
      13              :  *
      14              :  *
      15              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      16              :  * Portions Copyright (c) 1994, Regents of the University of California
      17              :  *
      18              :  *
      19              :  * IDENTIFICATION
      20              :  *    src/backend/commands/vacuum.c
      21              :  *
      22              :  *-------------------------------------------------------------------------
      23              :  */
      24              : #include "postgres.h"
      25              : 
      26              : #include <math.h>
      27              : 
      28              : #include "access/clog.h"
      29              : #include "access/commit_ts.h"
      30              : #include "access/genam.h"
      31              : #include "access/heapam.h"
      32              : #include "access/htup_details.h"
      33              : #include "access/multixact.h"
      34              : #include "access/tableam.h"
      35              : #include "access/transam.h"
      36              : #include "access/xact.h"
      37              : #include "catalog/namespace.h"
      38              : #include "catalog/pg_database.h"
      39              : #include "catalog/pg_inherits.h"
      40              : #include "commands/async.h"
      41              : #include "commands/defrem.h"
      42              : #include "commands/progress.h"
      43              : #include "commands/repack.h"
      44              : #include "commands/vacuum.h"
      45              : #include "miscadmin.h"
      46              : #include "nodes/makefuncs.h"
      47              : #include "pgstat.h"
      48              : #include "postmaster/autovacuum.h"
      49              : #include "postmaster/bgworker_internals.h"
      50              : #include "postmaster/interrupt.h"
      51              : #include "storage/bufmgr.h"
      52              : #include "storage/lmgr.h"
      53              : #include "storage/pmsignal.h"
      54              : #include "storage/proc.h"
      55              : #include "storage/procarray.h"
      56              : #include "utils/acl.h"
      57              : #include "utils/fmgroids.h"
      58              : #include "utils/guc.h"
      59              : #include "utils/guc_hooks.h"
      60              : #include "utils/injection_point.h"
      61              : #include "utils/memutils.h"
      62              : #include "utils/snapmgr.h"
      63              : #include "utils/syscache.h"
      64              : #include "utils/wait_event.h"
      65              : 
      66              : /*
      67              :  * Minimum interval for cost-based vacuum delay reports from a parallel worker.
      68              :  * This aims to avoid sending too many messages and waking up the leader too
      69              :  * frequently.
      70              :  */
      71              : #define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS    (NS_PER_S)
      72              : 
      73              : /*
      74              :  * GUC parameters
      75              :  */
      76              : int         vacuum_freeze_min_age;
      77              : int         vacuum_freeze_table_age;
      78              : int         vacuum_multixact_freeze_min_age;
      79              : int         vacuum_multixact_freeze_table_age;
      80              : int         vacuum_failsafe_age;
      81              : int         vacuum_multixact_failsafe_age;
      82              : double      vacuum_max_eager_freeze_failure_rate;
      83              : bool        track_cost_delay_timing;
      84              : bool        vacuum_truncate;
      85              : 
      86              : /*
      87              :  * Variables for cost-based vacuum delay. The defaults differ between
      88              :  * autovacuum and vacuum. They should be set with the appropriate GUC value in
      89              :  * vacuum code. They are initialized here to the defaults for client backends
      90              :  * executing VACUUM or ANALYZE.
      91              :  */
      92              : double      vacuum_cost_delay = 0;
      93              : int         vacuum_cost_limit = 200;
      94              : 
      95              : /* Variable for reporting cost-based vacuum delay from parallel workers. */
      96              : int64       parallel_vacuum_worker_delay_ns = 0;
      97              : 
      98              : /*
      99              :  * VacuumFailsafeActive is a defined as a global so that we can determine
     100              :  * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
     101              :  * If failsafe mode has been engaged, we will not re-enable cost-based delay
     102              :  * for the table until after vacuuming has completed, regardless of other
     103              :  * settings.
     104              :  *
     105              :  * Only VACUUM code should inspect this variable and only table access methods
     106              :  * should set it to true. In Table AM-agnostic VACUUM code, this variable is
     107              :  * inspected to determine whether or not to allow cost-based delays. Table AMs
     108              :  * are free to set it if they desire this behavior, but it is false by default
     109              :  * and reset to false in between vacuuming each relation.
     110              :  */
     111              : bool        VacuumFailsafeActive = false;
     112              : 
     113              : /*
     114              :  * Variables for cost-based parallel vacuum.  See comments atop
     115              :  * compute_parallel_delay to understand how it works.
     116              :  */
     117              : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
     118              : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
     119              : int         VacuumCostBalanceLocal = 0;
     120              : 
     121              : /* non-export function prototypes */
     122              : static List *expand_vacuum_rel(VacuumRelation *vrel,
     123              :                                MemoryContext vac_context, int options);
     124              : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
     125              : static void vac_truncate_clog(TransactionId frozenXID,
     126              :                               MultiXactId minMulti,
     127              :                               TransactionId lastSaneFrozenXid,
     128              :                               MultiXactId lastSaneMinMulti);
     129              : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
     130              :                        BufferAccessStrategy bstrategy, bool isTopLevel);
     131              : static double compute_parallel_delay(void);
     132              : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
     133              : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
     134              : 
     135              : /*
     136              :  * GUC check function to ensure GUC value specified is within the allowable
     137              :  * range.
     138              :  */
     139              : bool
     140         1275 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
     141              :                                 GucSource source)
     142              : {
     143              :     /* Value upper and lower hard limits are inclusive */
     144         1275 :     if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
     145         1275 :                          *newval <= MAX_BAS_VAC_RING_SIZE_KB))
     146         1275 :         return true;
     147              : 
     148              :     /* Value does not fall within any allowable range */
     149            0 :     GUC_check_errdetail("\"%s\" must be 0 or between %d kB and %d kB.",
     150              :                         "vacuum_buffer_usage_limit",
     151              :                         MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
     152              : 
     153            0 :     return false;
     154              : }
     155              : 
     156              : /*
     157              :  * Primary entry point for manual VACUUM and ANALYZE commands
     158              :  *
     159              :  * This is mainly a preparation wrapper for the real operations that will
     160              :  * happen in vacuum().
     161              :  */
     162              : void
     163         8704 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
     164              : {
     165              :     VacuumParams params;
     166         8704 :     BufferAccessStrategy bstrategy = NULL;
     167         8704 :     bool        verbose = false;
     168         8704 :     bool        skip_locked = false;
     169         8704 :     bool        analyze = false;
     170         8704 :     bool        freeze = false;
     171         8704 :     bool        full = false;
     172         8704 :     bool        disable_page_skipping = false;
     173         8704 :     bool        process_main = true;
     174         8704 :     bool        process_toast = true;
     175              :     int         ring_size;
     176         8704 :     bool        skip_database_stats = false;
     177         8704 :     bool        only_database_stats = false;
     178              :     MemoryContext vac_context;
     179              :     ListCell   *lc;
     180              : 
     181              :     /* index_cleanup and truncate values unspecified for now */
     182         8704 :     params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
     183         8704 :     params.truncate = VACOPTVALUE_UNSPECIFIED;
     184              : 
     185              :     /* By default parallel vacuum is enabled */
     186         8704 :     params.nworkers = 0;
     187              : 
     188              :     /* Will be set later if we recurse to a TOAST table. */
     189         8704 :     params.toast_parent = InvalidOid;
     190              : 
     191              :     /*
     192              :      * Set this to an invalid value so it is clear whether or not a
     193              :      * BUFFER_USAGE_LIMIT was specified when making the access strategy.
     194              :      */
     195         8704 :     ring_size = -1;
     196              : 
     197              :     /* Parse options list */
     198        18266 :     foreach(lc, vacstmt->options)
     199              :     {
     200         9586 :         DefElem    *opt = (DefElem *) lfirst(lc);
     201              : 
     202              :         /* Parse common options for VACUUM and ANALYZE */
     203         9586 :         if (strcmp(opt->defname, "verbose") == 0)
     204           25 :             verbose = defGetBoolean(opt);
     205         9561 :         else if (strcmp(opt->defname, "skip_locked") == 0)
     206          182 :             skip_locked = defGetBoolean(opt);
     207         9379 :         else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
     208              :         {
     209              :             const char *hintmsg;
     210              :             int         result;
     211              :             char       *vac_buffer_size;
     212              : 
     213           36 :             vac_buffer_size = defGetString(opt);
     214              : 
     215              :             /*
     216              :              * Check that the specified value is valid and the size falls
     217              :              * within the hard upper and lower limits if it is not 0.
     218              :              */
     219           36 :             if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
     220           32 :                 (result != 0 &&
     221           24 :                  (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
     222              :             {
     223           12 :                 ereport(ERROR,
     224              :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     225              :                          errmsg("%s option must be 0 or between %d kB and %d kB",
     226              :                                 "BUFFER_USAGE_LIMIT",
     227              :                                 MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
     228              :                          hintmsg ? errhint_internal("%s", _(hintmsg)) : 0));
     229              :             }
     230              : 
     231           24 :             ring_size = result;
     232              :         }
     233         9343 :         else if (!vacstmt->is_vacuumcmd)
     234            4 :             ereport(ERROR,
     235              :                     (errcode(ERRCODE_SYNTAX_ERROR),
     236              :                      errmsg("unrecognized %s option \"%s\"",
     237              :                             "ANALYZE", opt->defname),
     238              :                      parser_errposition(pstate, opt->location)));
     239              : 
     240              :         /* Parse options available on VACUUM */
     241         9339 :         else if (strcmp(opt->defname, "analyze") == 0)
     242         1837 :             analyze = defGetBoolean(opt);
     243         7502 :         else if (strcmp(opt->defname, "freeze") == 0)
     244         1891 :             freeze = defGetBoolean(opt);
     245         5611 :         else if (strcmp(opt->defname, "full") == 0)
     246          230 :             full = defGetBoolean(opt);
     247         5381 :         else if (strcmp(opt->defname, "disable_page_skipping") == 0)
     248          120 :             disable_page_skipping = defGetBoolean(opt);
     249         5261 :         else if (strcmp(opt->defname, "index_cleanup") == 0)
     250              :         {
     251              :             /* Interpret no string as the default, which is 'auto' */
     252           98 :             if (!opt->arg)
     253            0 :                 params.index_cleanup = VACOPTVALUE_AUTO;
     254              :             else
     255              :             {
     256           98 :                 char       *sval = defGetString(opt);
     257              : 
     258              :                 /* Try matching on 'auto' string, or fall back on boolean */
     259           98 :                 if (pg_strcasecmp(sval, "auto") == 0)
     260            4 :                     params.index_cleanup = VACOPTVALUE_AUTO;
     261              :                 else
     262           94 :                     params.index_cleanup = get_vacoptval_from_boolean(opt);
     263              :             }
     264              :         }
     265         5163 :         else if (strcmp(opt->defname, "process_main") == 0)
     266           85 :             process_main = defGetBoolean(opt);
     267         5078 :         else if (strcmp(opt->defname, "process_toast") == 0)
     268           89 :             process_toast = defGetBoolean(opt);
     269         4989 :         else if (strcmp(opt->defname, "truncate") == 0)
     270           88 :             params.truncate = get_vacoptval_from_boolean(opt);
     271         4901 :         else if (strcmp(opt->defname, "parallel") == 0)
     272              :         {
     273          199 :             int         nworkers = defGetInt32(opt);
     274              : 
     275          195 :             if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
     276            4 :                 ereport(ERROR,
     277              :                         (errcode(ERRCODE_SYNTAX_ERROR),
     278              :                          errmsg("%s option must be between 0 and %d",
     279              :                                 "PARALLEL",
     280              :                                 MAX_PARALLEL_WORKER_LIMIT),
     281              :                          parser_errposition(pstate, opt->location)));
     282              : 
     283              :             /*
     284              :              * Disable parallel vacuum, if user has specified parallel degree
     285              :              * as zero.
     286              :              */
     287          191 :             if (nworkers == 0)
     288           86 :                 params.nworkers = -1;
     289              :             else
     290          105 :                 params.nworkers = nworkers;
     291              :         }
     292         4702 :         else if (strcmp(opt->defname, "skip_database_stats") == 0)
     293         4623 :             skip_database_stats = defGetBoolean(opt);
     294           79 :         else if (strcmp(opt->defname, "only_database_stats") == 0)
     295           79 :             only_database_stats = defGetBoolean(opt);
     296              :         else
     297            0 :             ereport(ERROR,
     298              :                     (errcode(ERRCODE_SYNTAX_ERROR),
     299              :                      errmsg("unrecognized %s option \"%s\"",
     300              :                             "VACUUM", opt->defname),
     301              :                      parser_errposition(pstate, opt->location)));
     302              :     }
     303              : 
     304              :     /* Set vacuum options */
     305         8680 :     params.options =
     306         8680 :         (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
     307         8680 :         (verbose ? VACOPT_VERBOSE : 0) |
     308         8680 :         (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
     309         8680 :         (analyze ? VACOPT_ANALYZE : 0) |
     310         8680 :         (freeze ? VACOPT_FREEZE : 0) |
     311         8680 :         (full ? VACOPT_FULL : 0) |
     312         8680 :         (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
     313         8680 :         (process_main ? VACOPT_PROCESS_MAIN : 0) |
     314         8680 :         (process_toast ? VACOPT_PROCESS_TOAST : 0) |
     315         8680 :         (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
     316         8680 :         (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
     317              : 
     318              :     /* sanity checks on options */
     319              :     Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
     320              :     Assert((params.options & VACOPT_VACUUM) ||
     321              :            !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
     322              : 
     323         8680 :     if ((params.options & VACOPT_FULL) && params.nworkers > 0)
     324            4 :         ereport(ERROR,
     325              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     326              :                  errmsg("VACUUM FULL cannot be performed in parallel")));
     327              : 
     328              :     /*
     329              :      * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
     330              :      * ERROR for that case.  VACUUM (FULL, ANALYZE) does make use of it, so
     331              :      * we'll permit that.
     332              :      */
     333         8676 :     if (ring_size != -1 && (params.options & VACOPT_FULL) &&
     334            4 :         !(params.options & VACOPT_ANALYZE))
     335            4 :         ereport(ERROR,
     336              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     337              :                  errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
     338              : 
     339              :     /*
     340              :      * Make sure VACOPT_ANALYZE is specified if any column lists are present.
     341              :      */
     342         8672 :     if (!(params.options & VACOPT_ANALYZE))
     343              :     {
     344         7793 :         foreach(lc, vacstmt->rels)
     345              :         {
     346         3841 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     347              : 
     348         3841 :             if (vrel->va_cols != NIL)
     349            4 :                 ereport(ERROR,
     350              :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     351              :                          errmsg("ANALYZE option must be specified when a column list is provided")));
     352              :         }
     353              :     }
     354              : 
     355              :     /*
     356              :      * Sanity check DISABLE_PAGE_SKIPPING option.
     357              :      */
     358         8668 :     if ((params.options & VACOPT_FULL) != 0 &&
     359          214 :         (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
     360            0 :         ereport(ERROR,
     361              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     362              :                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
     363              : 
     364              :     /* sanity check for PROCESS_TOAST */
     365         8668 :     if ((params.options & VACOPT_FULL) != 0 &&
     366          214 :         (params.options & VACOPT_PROCESS_TOAST) == 0)
     367            4 :         ereport(ERROR,
     368              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     369              :                  errmsg("PROCESS_TOAST required with VACUUM FULL")));
     370              : 
     371              :     /* sanity check for ONLY_DATABASE_STATS */
     372         8664 :     if (params.options & VACOPT_ONLY_DATABASE_STATS)
     373              :     {
     374              :         Assert(params.options & VACOPT_VACUUM);
     375           79 :         if (vacstmt->rels != NIL)
     376            4 :             ereport(ERROR,
     377              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     378              :                      errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
     379              :         /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
     380           75 :         if (params.options & ~(VACOPT_VACUUM |
     381              :                                VACOPT_VERBOSE |
     382              :                                VACOPT_PROCESS_MAIN |
     383              :                                VACOPT_PROCESS_TOAST |
     384              :                                VACOPT_ONLY_DATABASE_STATS))
     385            0 :             ereport(ERROR,
     386              :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     387              :                      errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
     388              :     }
     389              : 
     390              :     /*
     391              :      * All freeze ages are zero if the FREEZE option is given; otherwise pass
     392              :      * them as -1 which means to use the default values.
     393              :      */
     394         8660 :     if (params.options & VACOPT_FREEZE)
     395              :     {
     396         1891 :         params.freeze_min_age = 0;
     397         1891 :         params.freeze_table_age = 0;
     398         1891 :         params.multixact_freeze_min_age = 0;
     399         1891 :         params.multixact_freeze_table_age = 0;
     400              :     }
     401              :     else
     402              :     {
     403         6769 :         params.freeze_min_age = -1;
     404         6769 :         params.freeze_table_age = -1;
     405         6769 :         params.multixact_freeze_min_age = -1;
     406         6769 :         params.multixact_freeze_table_age = -1;
     407              :     }
     408              : 
     409              :     /* user-invoked vacuum is never "for wraparound" */
     410         8660 :     params.is_wraparound = false;
     411              : 
     412              :     /*
     413              :      * user-invoked vacuum uses VACOPT_VERBOSE instead of
     414              :      * log_vacuum_min_duration and log_analyze_min_duration
     415              :      */
     416         8660 :     params.log_vacuum_min_duration = -1;
     417         8660 :     params.log_analyze_min_duration = -1;
     418              : 
     419              :     /*
     420              :      * Later, in vacuum_rel(), we check if a reloption override was specified.
     421              :      */
     422         8660 :     params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
     423              : 
     424              :     /*
     425              :      * Create special memory context for cross-transaction storage.
     426              :      *
     427              :      * Since it is a child of PortalContext, it will go away eventually even
     428              :      * if we suffer an error; there's no need for special abort cleanup logic.
     429              :      */
     430         8660 :     vac_context = AllocSetContextCreate(PortalContext,
     431              :                                         "Vacuum",
     432              :                                         ALLOCSET_DEFAULT_SIZES);
     433              : 
     434              :     /*
     435              :      * Make a buffer strategy object in the cross-transaction memory context.
     436              :      * We needn't bother making this for VACUUM (FULL) or VACUUM
     437              :      * (ONLY_DATABASE_STATS) as they'll not make use of it.  VACUUM (FULL,
     438              :      * ANALYZE) is possible, so we'd better ensure that we make a strategy
     439              :      * when we see ANALYZE.
     440              :      */
     441         8660 :     if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
     442          285 :                            VACOPT_FULL)) == 0 ||
     443          285 :         (params.options & VACOPT_ANALYZE) != 0)
     444              :     {
     445              : 
     446         8379 :         MemoryContext old_context = MemoryContextSwitchTo(vac_context);
     447              : 
     448              :         Assert(ring_size >= -1);
     449              : 
     450              :         /*
     451              :          * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
     452              :          * command, it overrides the value of VacuumBufferUsageLimit.  Either
     453              :          * value may be 0, in which case GetAccessStrategyWithSize() will
     454              :          * return NULL, effectively allowing full use of shared buffers.
     455              :          */
     456         8379 :         if (ring_size == -1)
     457         8359 :             ring_size = VacuumBufferUsageLimit;
     458              : 
     459         8379 :         bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
     460              : 
     461         8379 :         MemoryContextSwitchTo(old_context);
     462              :     }
     463              : 
     464              :     /* Now go through the common routine */
     465         8660 :     vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
     466              : 
     467              :     /* Finally, clean up the vacuum memory context */
     468         8573 :     MemoryContextDelete(vac_context);
     469         8573 : }
     470              : 
     471              : /*
     472              :  * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
     473              :  *
     474              :  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
     475              :  * we process all relevant tables in the database.  For each VacuumRelation,
     476              :  * if a valid OID is supplied, the table with that OID is what to process;
     477              :  * otherwise, the VacuumRelation's RangeVar indicates what to process.
     478              :  *
     479              :  * params contains a set of parameters that can be used to customize the
     480              :  * behavior.
     481              :  *
     482              :  * bstrategy may be passed in as NULL when the caller does not want to
     483              :  * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
     484              :  * otherwise, the caller must build a BufferAccessStrategy with the number of
     485              :  * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
     486              :  * using.
     487              :  *
     488              :  * isTopLevel should be passed down from ProcessUtility.
     489              :  *
     490              :  * It is the caller's responsibility that all parameters are allocated in a
     491              :  * memory context that will not disappear at transaction commit.
     492              :  */
     493              : void
     494       108702 : vacuum(List *relations, const VacuumParams *params, BufferAccessStrategy bstrategy,
     495              :        MemoryContext vac_context, bool isTopLevel)
     496              : {
     497              :     static bool in_vacuum = false;
     498              : 
     499              :     const char *stmttype;
     500              :     volatile bool in_outer_xact,
     501              :                 use_own_xacts;
     502              : 
     503       108702 :     stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
     504              : 
     505              :     /*
     506              :      * We cannot run VACUUM inside a user transaction block; if we were inside
     507              :      * a transaction, then our commit- and start-transaction-command calls
     508              :      * would not have the intended effect!  There are numerous other subtle
     509              :      * dependencies on this, too.
     510              :      *
     511              :      * ANALYZE (without VACUUM) can run either way.
     512              :      */
     513       108702 :     if (params->options & VACOPT_VACUUM)
     514              :     {
     515       105654 :         PreventInTransactionBlock(isTopLevel, stmttype);
     516       105641 :         in_outer_xact = false;
     517              :     }
     518              :     else
     519         3048 :         in_outer_xact = IsInTransactionBlock(isTopLevel);
     520              : 
     521              :     /*
     522              :      * Check for and disallow recursive calls.  This could happen when VACUUM
     523              :      * FULL or ANALYZE calls a hostile index expression that itself calls
     524              :      * ANALYZE.
     525              :      */
     526       108689 :     if (in_vacuum)
     527            8 :         ereport(ERROR,
     528              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     529              :                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
     530              :                         stmttype)));
     531              : 
     532              :     /*
     533              :      * Build list of relation(s) to process, putting any new data in
     534              :      * vac_context for safekeeping.
     535              :      */
     536       108681 :     if (params->options & VACOPT_ONLY_DATABASE_STATS)
     537              :     {
     538              :         /* We don't process any tables in this case */
     539              :         Assert(relations == NIL);
     540              :     }
     541       108606 :     else if (relations != NIL)
     542              :     {
     543       108481 :         List       *newrels = NIL;
     544              :         ListCell   *lc;
     545              : 
     546       217043 :         foreach(lc, relations)
     547              :         {
     548       108586 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     549              :             List       *sublist;
     550              :             MemoryContext old_context;
     551              : 
     552       108586 :             sublist = expand_vacuum_rel(vrel, vac_context, params->options);
     553       108562 :             old_context = MemoryContextSwitchTo(vac_context);
     554       108562 :             newrels = list_concat(newrels, sublist);
     555       108562 :             MemoryContextSwitchTo(old_context);
     556              :         }
     557       108457 :         relations = newrels;
     558              :     }
     559              :     else
     560          125 :         relations = get_all_vacuum_rels(vac_context, params->options);
     561              : 
     562              :     /*
     563              :      * Decide whether we need to start/commit our own transactions.
     564              :      *
     565              :      * For VACUUM (with or without ANALYZE): always do so, so that we can
     566              :      * release locks as soon as possible.  (We could possibly use the outer
     567              :      * transaction for a one-table VACUUM, but handling TOAST tables would be
     568              :      * problematic.)
     569              :      *
     570              :      * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
     571              :      * start/commit our own transactions.  Also, there's no need to do so if
     572              :      * only processing one relation.  For multiple relations when not within a
     573              :      * transaction block, and also in an autovacuum worker, use own
     574              :      * transactions so we can release locks sooner.
     575              :      */
     576       108657 :     if (params->options & VACOPT_VACUUM)
     577       105633 :         use_own_xacts = true;
     578              :     else
     579              :     {
     580              :         Assert(params->options & VACOPT_ANALYZE);
     581         3024 :         if (AmAutoVacuumWorkerProcess())
     582          169 :             use_own_xacts = true;
     583         2855 :         else if (in_outer_xact)
     584          164 :             use_own_xacts = false;
     585         2691 :         else if (list_length(relations) > 1)
     586          524 :             use_own_xacts = true;
     587              :         else
     588         2167 :             use_own_xacts = false;
     589              :     }
     590              : 
     591              :     /*
     592              :      * vacuum_rel expects to be entered with no transaction active; it will
     593              :      * start and commit its own transaction.  But we are called by an SQL
     594              :      * command, and so we are executing inside a transaction already. We
     595              :      * commit the transaction started in PostgresMain() here, and start
     596              :      * another one before exiting to match the commit waiting for us back in
     597              :      * PostgresMain().
     598              :      */
     599       108657 :     if (use_own_xacts)
     600              :     {
     601              :         Assert(!in_outer_xact);
     602              : 
     603              :         /* ActiveSnapshot is not set by autovacuum */
     604       106326 :         if (ActiveSnapshotSet())
     605         6284 :             PopActiveSnapshot();
     606              : 
     607              :         /* matches the StartTransaction in PostgresMain() */
     608       106326 :         CommitTransactionCommand();
     609              :     }
     610              : 
     611              :     /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
     612       108657 :     PG_TRY();
     613              :     {
     614              :         ListCell   *cur;
     615              : 
     616       108657 :         in_vacuum = true;
     617       108657 :         VacuumFailsafeActive = false;
     618       108657 :         VacuumUpdateCosts();
     619       108657 :         VacuumCostBalance = 0;
     620       108657 :         VacuumCostBalanceLocal = 0;
     621       108657 :         VacuumSharedCostBalance = NULL;
     622       108657 :         VacuumActiveNWorkers = NULL;
     623              : 
     624              :         /*
     625              :          * Loop to process each selected relation.
     626              :          */
     627       228024 :         foreach(cur, relations)
     628              :         {
     629       119409 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
     630              : 
     631       119409 :             if (params->options & VACOPT_VACUUM)
     632              :             {
     633       111167 :                 if (!vacuum_rel(vrel->oid, vrel->relation, *params, bstrategy,
     634              :                                 isTopLevel))
     635           62 :                     continue;
     636              :             }
     637              : 
     638       119343 :             if (params->options & VACOPT_ANALYZE)
     639              :             {
     640              :                 /*
     641              :                  * If using separate xacts, start one for analyze. Otherwise,
     642              :                  * we can use the outer transaction.
     643              :                  */
     644        10218 :                 if (use_own_xacts)
     645              :                 {
     646         7903 :                     StartTransactionCommand();
     647              :                     /* functions in indexes may want a snapshot set */
     648         7903 :                     PushActiveSnapshot(GetTransactionSnapshot());
     649              :                 }
     650              : 
     651        10218 :                 analyze_rel(vrel->oid, vrel->relation, params,
     652              :                             vrel->va_cols, in_outer_xact, bstrategy);
     653              : 
     654        10180 :                 if (use_own_xacts)
     655              :                 {
     656         7878 :                     PopActiveSnapshot();
     657              :                     /* standard_ProcessUtility() does CCI if !use_own_xacts */
     658         7878 :                     CommandCounterIncrement();
     659         7878 :                     CommitTransactionCommand();
     660              :                 }
     661              :                 else
     662              :                 {
     663              :                     /*
     664              :                      * If we're not using separate xacts, better separate the
     665              :                      * ANALYZE actions with CCIs.  This avoids trouble if user
     666              :                      * says "ANALYZE t, t".
     667              :                      */
     668         2302 :                     CommandCounterIncrement();
     669              :                 }
     670              :             }
     671              : 
     672              :             /*
     673              :              * Ensure VacuumFailsafeActive has been reset before vacuuming the
     674              :              * next relation.
     675              :              */
     676       119305 :             VacuumFailsafeActive = false;
     677              :         }
     678              :     }
     679           42 :     PG_FINALLY();
     680              :     {
     681       108657 :         in_vacuum = false;
     682       108657 :         VacuumCostActive = false;
     683       108657 :         VacuumFailsafeActive = false;
     684       108657 :         VacuumCostBalance = 0;
     685              :     }
     686       108657 :     PG_END_TRY();
     687              : 
     688              :     /*
     689              :      * Finish up processing.
     690              :      */
     691       108615 :     if (use_own_xacts)
     692              :     {
     693              :         /* here, we are not in a transaction */
     694              : 
     695              :         /*
     696              :          * This matches the CommitTransaction waiting for us in
     697              :          * PostgresMain().
     698              :          */
     699       106297 :         StartTransactionCommand();
     700              :     }
     701              : 
     702       108615 :     if ((params->options & VACOPT_VACUUM) &&
     703       105612 :         !(params->options & VACOPT_SKIP_DATABASE_STATS))
     704              :     {
     705              :         /*
     706              :          * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
     707              :          */
     708         1117 :         vac_update_datfrozenxid();
     709              :     }
     710              : 
     711       108615 : }
     712              : 
     713              : /*
     714              :  * Check if the current user has privileges to vacuum or analyze the relation.
     715              :  * If not, issue a WARNING log message and return false to let the caller
     716              :  * decide what to do with this relation.  This routine is used to decide if a
     717              :  * relation can be processed for VACUUM or ANALYZE.
     718              :  */
     719              : bool
     720       147231 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
     721              :                                  uint32 options)
     722              : {
     723              :     char       *relname;
     724              : 
     725              :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     726              : 
     727              :     /*----------
     728              :      * A role has privileges to vacuum or analyze the relation if any of the
     729              :      * following are true:
     730              :      *   - the role owns the current database and the relation is not shared
     731              :      *   - the role has the MAINTAIN privilege on the relation
     732              :      *----------
     733              :      */
     734       147231 :     if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) &&
     735       168499 :          !reltuple->relisshared) ||
     736        23752 :         pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK)
     737       145059 :         return true;
     738              : 
     739         2172 :     relname = NameStr(reltuple->relname);
     740              : 
     741         2172 :     if ((options & VACOPT_VACUUM) != 0)
     742              :     {
     743          148 :         ereport(WARNING,
     744              :                 (errmsg("permission denied to vacuum \"%s\", skipping it",
     745              :                         relname)));
     746              : 
     747              :         /*
     748              :          * For VACUUM ANALYZE, both logs could show up, but just generate
     749              :          * information for VACUUM as that would be the first one to be
     750              :          * processed.
     751              :          */
     752          148 :         return false;
     753              :     }
     754              : 
     755         2024 :     if ((options & VACOPT_ANALYZE) != 0)
     756         2024 :         ereport(WARNING,
     757              :                 (errmsg("permission denied to analyze \"%s\", skipping it",
     758              :                         relname)));
     759              : 
     760         2024 :     return false;
     761              : }
     762              : 
     763              : 
     764              : /*
     765              :  * vacuum_open_relation
     766              :  *
     767              :  * This routine is used for attempting to open and lock a relation which
     768              :  * is going to be vacuumed or analyzed.  If the relation cannot be opened
     769              :  * or locked, a log is emitted if possible.
     770              :  */
     771              : Relation
     772       127201 : vacuum_open_relation(Oid relid, RangeVar *relation, uint32 options,
     773              :                      bool verbose, LOCKMODE lmode)
     774              : {
     775              :     Relation    rel;
     776       127201 :     bool        rel_lock = true;
     777              :     int         elevel;
     778              : 
     779              :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     780              : 
     781              :     /*
     782              :      * Open the relation and get the appropriate lock on it.
     783              :      *
     784              :      * There's a race condition here: the relation may have gone away since
     785              :      * the last time we saw it.  If so, we don't need to vacuum or analyze it.
     786              :      *
     787              :      * If we've been asked not to wait for the relation lock, acquire it first
     788              :      * in non-blocking mode, before calling try_relation_open().
     789              :      */
     790       127201 :     if (!(options & VACOPT_SKIP_LOCKED))
     791       126585 :         rel = try_relation_open(relid, lmode);
     792          616 :     else if (ConditionalLockRelationOid(relid, lmode))
     793          606 :         rel = try_relation_open(relid, NoLock);
     794              :     else
     795              :     {
     796           10 :         rel = NULL;
     797           10 :         rel_lock = false;
     798              :     }
     799              : 
     800              :     /* if relation is opened, leave */
     801       127201 :     if (rel)
     802       127185 :         return rel;
     803              : 
     804              :     /*
     805              :      * Relation could not be opened, hence generate if possible a log
     806              :      * informing on the situation.
     807              :      *
     808              :      * If the RangeVar is not defined, we do not have enough information to
     809              :      * provide a meaningful log statement.  Chances are that the caller has
     810              :      * intentionally not provided this information so that this logging is
     811              :      * skipped, anyway.
     812              :      */
     813           16 :     if (relation == NULL)
     814            9 :         return NULL;
     815              : 
     816              :     /*
     817              :      * Determine the log level.
     818              :      *
     819              :      * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
     820              :      * statements in the permission checks; otherwise, only log if the caller
     821              :      * so requested.
     822              :      */
     823            7 :     if (!AmAutoVacuumWorkerProcess())
     824            7 :         elevel = WARNING;
     825            0 :     else if (verbose)
     826            0 :         elevel = LOG;
     827              :     else
     828            0 :         return NULL;
     829              : 
     830            7 :     if ((options & VACOPT_VACUUM) != 0)
     831              :     {
     832            5 :         if (!rel_lock)
     833            3 :             ereport(elevel,
     834              :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     835              :                      errmsg("skipping vacuum of \"%s\" --- lock not available",
     836              :                             relation->relname)));
     837              :         else
     838            2 :             ereport(elevel,
     839              :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     840              :                      errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
     841              :                             relation->relname)));
     842              : 
     843              :         /*
     844              :          * For VACUUM ANALYZE, both logs could show up, but just generate
     845              :          * information for VACUUM as that would be the first one to be
     846              :          * processed.
     847              :          */
     848            5 :         return NULL;
     849              :     }
     850              : 
     851            2 :     if ((options & VACOPT_ANALYZE) != 0)
     852              :     {
     853            2 :         if (!rel_lock)
     854            1 :             ereport(elevel,
     855              :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     856              :                      errmsg("skipping analyze of \"%s\" --- lock not available",
     857              :                             relation->relname)));
     858              :         else
     859            1 :             ereport(elevel,
     860              :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     861              :                      errmsg("skipping analyze of \"%s\" --- relation no longer exists",
     862              :                             relation->relname)));
     863              :     }
     864              : 
     865            2 :     return NULL;
     866              : }
     867              : 
     868              : 
     869              : /*
     870              :  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
     871              :  * and optionally add VacuumRelations for partitions or inheritance children.
     872              :  *
     873              :  * If a VacuumRelation does not have an OID supplied and is a partitioned
     874              :  * table, an extra entry will be added to the output for each partition.
     875              :  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
     876              :  * it does not want us to expand partitioned tables.
     877              :  *
     878              :  * We take care not to modify the input data structure, but instead build
     879              :  * new VacuumRelation(s) to return.  (But note that they will reference
     880              :  * unmodified parts of the input, eg column lists.)  New data structures
     881              :  * are made in vac_context.
     882              :  */
     883              : static List *
     884       108586 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
     885              :                   int options)
     886              : {
     887       108586 :     List       *vacrels = NIL;
     888              :     MemoryContext oldcontext;
     889              : 
     890              :     /* If caller supplied OID, there's nothing we need do here. */
     891       108586 :     if (OidIsValid(vrel->oid))
     892              :     {
     893       100042 :         oldcontext = MemoryContextSwitchTo(vac_context);
     894       100042 :         vacrels = lappend(vacrels, vrel);
     895       100042 :         MemoryContextSwitchTo(oldcontext);
     896              :     }
     897              :     else
     898              :     {
     899              :         /*
     900              :          * Process a specific relation, and possibly partitions or child
     901              :          * tables thereof.
     902              :          */
     903              :         Oid         relid;
     904              :         HeapTuple   tuple;
     905              :         Form_pg_class classForm;
     906              :         bool        include_children;
     907              :         bool        is_partitioned_table;
     908              :         int         rvr_opts;
     909              : 
     910              :         /*
     911              :          * Since autovacuum workers supply OIDs when calling vacuum(), no
     912              :          * autovacuum worker should reach this code.
     913              :          */
     914              :         Assert(!AmAutoVacuumWorkerProcess());
     915              : 
     916              :         /*
     917              :          * We transiently take AccessShareLock to protect the syscache lookup
     918              :          * below, as well as find_all_inheritors's expectation that the caller
     919              :          * holds some lock on the starting relation.
     920              :          */
     921         8544 :         rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
     922         8544 :         relid = RangeVarGetRelidExtended(vrel->relation,
     923              :                                          AccessShareLock,
     924              :                                          rvr_opts,
     925              :                                          NULL, NULL);
     926              : 
     927              :         /*
     928              :          * If the lock is unavailable, emit the same log statement that
     929              :          * vacuum_rel() and analyze_rel() would.
     930              :          */
     931         8520 :         if (!OidIsValid(relid))
     932              :         {
     933            4 :             if (options & VACOPT_VACUUM)
     934            3 :                 ereport(WARNING,
     935              :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     936              :                          errmsg("skipping vacuum of \"%s\" --- lock not available",
     937              :                                 vrel->relation->relname)));
     938              :             else
     939            1 :                 ereport(WARNING,
     940              :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     941              :                          errmsg("skipping analyze of \"%s\" --- lock not available",
     942              :                                 vrel->relation->relname)));
     943            4 :             return vacrels;
     944              :         }
     945              : 
     946              :         /*
     947              :          * To check whether the relation is a partitioned table and its
     948              :          * ownership, fetch its syscache entry.
     949              :          */
     950         8516 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
     951         8516 :         if (!HeapTupleIsValid(tuple))
     952            0 :             elog(ERROR, "cache lookup failed for relation %u", relid);
     953         8516 :         classForm = (Form_pg_class) GETSTRUCT(tuple);
     954              : 
     955              :         /*
     956              :          * Make a returnable VacuumRelation for this rel if the user has the
     957              :          * required privileges.
     958              :          */
     959         8516 :         if (vacuum_is_permitted_for_relation(relid, classForm, options))
     960              :         {
     961         8364 :             oldcontext = MemoryContextSwitchTo(vac_context);
     962         8364 :             vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
     963              :                                                           relid,
     964              :                                                           vrel->va_cols));
     965         8364 :             MemoryContextSwitchTo(oldcontext);
     966              :         }
     967              : 
     968              :         /*
     969              :          * Vacuuming a partitioned table with ONLY will not do anything since
     970              :          * the partitioned table itself is empty.  Issue a warning if the user
     971              :          * requests this.
     972              :          */
     973         8516 :         include_children = vrel->relation->inh;
     974         8516 :         is_partitioned_table = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
     975         8516 :         if ((options & VACOPT_VACUUM) && is_partitioned_table && !include_children)
     976            4 :             ereport(WARNING,
     977              :                     (errmsg("VACUUM ONLY of partitioned table \"%s\" has no effect",
     978              :                             vrel->relation->relname)));
     979              : 
     980         8516 :         ReleaseSysCache(tuple);
     981              : 
     982              :         /*
     983              :          * Unless the user has specified ONLY, make relation list entries for
     984              :          * its partitions or inheritance child tables.  Note that the list
     985              :          * returned by find_all_inheritors() includes the passed-in OID, so we
     986              :          * have to skip that.  There's no point in taking locks on the
     987              :          * individual partitions or child tables yet, and doing so would just
     988              :          * add unnecessary deadlock risk.  For this last reason, we do not yet
     989              :          * check the ownership of the partitions/tables, which get added to
     990              :          * the list to process.  Ownership will be checked later on anyway.
     991              :          */
     992         8516 :         if (include_children)
     993              :         {
     994         8496 :             List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
     995              :             ListCell   *part_lc;
     996              : 
     997        18445 :             foreach(part_lc, part_oids)
     998              :             {
     999         9949 :                 Oid         part_oid = lfirst_oid(part_lc);
    1000              : 
    1001         9949 :                 if (part_oid == relid)
    1002         8496 :                     continue;   /* ignore original table */
    1003              : 
    1004              :                 /*
    1005              :                  * We omit a RangeVar since it wouldn't be appropriate to
    1006              :                  * complain about failure to open one of these relations
    1007              :                  * later.
    1008              :                  */
    1009         1453 :                 oldcontext = MemoryContextSwitchTo(vac_context);
    1010         1453 :                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
    1011              :                                                               part_oid,
    1012              :                                                               vrel->va_cols));
    1013         1453 :                 MemoryContextSwitchTo(oldcontext);
    1014              :             }
    1015              :         }
    1016              : 
    1017              :         /*
    1018              :          * Release lock again.  This means that by the time we actually try to
    1019              :          * process the table, it might be gone or renamed.  In the former case
    1020              :          * we'll silently ignore it; in the latter case we'll process it
    1021              :          * anyway, but we must beware that the RangeVar doesn't necessarily
    1022              :          * identify it anymore.  This isn't ideal, perhaps, but there's little
    1023              :          * practical alternative, since we're typically going to commit this
    1024              :          * transaction and begin a new one between now and then.  Moreover,
    1025              :          * holding locks on multiple relations would create significant risk
    1026              :          * of deadlock.
    1027              :          */
    1028         8516 :         UnlockRelationOid(relid, AccessShareLock);
    1029              :     }
    1030              : 
    1031       108558 :     return vacrels;
    1032              : }
    1033              : 
    1034              : /*
    1035              :  * Construct a list of VacuumRelations for all vacuumable rels in
    1036              :  * the current database.  The list is built in vac_context.
    1037              :  */
    1038              : static List *
    1039          125 : get_all_vacuum_rels(MemoryContext vac_context, int options)
    1040              : {
    1041          125 :     List       *vacrels = NIL;
    1042              :     Relation    pgclass;
    1043              :     TableScanDesc scan;
    1044              :     HeapTuple   tuple;
    1045              : 
    1046          125 :     pgclass = table_open(RelationRelationId, AccessShareLock);
    1047              : 
    1048          125 :     scan = table_beginscan_catalog(pgclass, 0, NULL);
    1049              : 
    1050        62238 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1051              :     {
    1052        62113 :         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
    1053              :         MemoryContext oldcontext;
    1054        62113 :         Oid         relid = classForm->oid;
    1055              : 
    1056              :         /*
    1057              :          * We include partitioned tables here; depending on which operation is
    1058              :          * to be performed, caller will decide whether to process or ignore
    1059              :          * them.
    1060              :          */
    1061        62113 :         if (classForm->relkind != RELKIND_RELATION &&
    1062        50728 :             classForm->relkind != RELKIND_MATVIEW &&
    1063        50696 :             classForm->relkind != RELKIND_PARTITIONED_TABLE)
    1064        50583 :             continue;
    1065              : 
    1066              :         /* check permissions of relation */
    1067        11530 :         if (!vacuum_is_permitted_for_relation(relid, classForm, options))
    1068         1948 :             continue;
    1069              : 
    1070              :         /*
    1071              :          * Build VacuumRelation(s) specifying the table OIDs to be processed.
    1072              :          * We omit a RangeVar since it wouldn't be appropriate to complain
    1073              :          * about failure to open one of these relations later.
    1074              :          */
    1075         9582 :         oldcontext = MemoryContextSwitchTo(vac_context);
    1076         9582 :         vacrels = lappend(vacrels, makeVacuumRelation(NULL,
    1077              :                                                       relid,
    1078              :                                                       NIL));
    1079         9582 :         MemoryContextSwitchTo(oldcontext);
    1080              :     }
    1081              : 
    1082          125 :     table_endscan(scan);
    1083          125 :     table_close(pgclass, AccessShareLock);
    1084              : 
    1085          125 :     return vacrels;
    1086              : }
    1087              : 
    1088              : /*
    1089              :  * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
    1090              :  *
    1091              :  * The target relation and VACUUM parameters are our inputs.
    1092              :  *
    1093              :  * Output parameters are the cutoffs that VACUUM caller should use.
    1094              :  *
    1095              :  * Return value indicates if vacuumlazy.c caller should make its VACUUM
    1096              :  * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
    1097              :  * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
    1098              :  * minimum).
    1099              :  */
    1100              : bool
    1101       116873 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
    1102              :                    struct VacuumCutoffs *cutoffs)
    1103              : {
    1104              :     int         freeze_min_age,
    1105              :                 multixact_freeze_min_age,
    1106              :                 freeze_table_age,
    1107              :                 multixact_freeze_table_age,
    1108              :                 effective_multixact_freeze_max_age;
    1109              :     TransactionId nextXID,
    1110              :                 safeOldestXmin,
    1111              :                 aggressiveXIDCutoff;
    1112              :     MultiXactId nextMXID,
    1113              :                 safeOldestMxact,
    1114              :                 aggressiveMXIDCutoff;
    1115              : 
    1116              :     /* Use mutable copies of freeze age parameters */
    1117       116873 :     freeze_min_age = params->freeze_min_age;
    1118       116873 :     multixact_freeze_min_age = params->multixact_freeze_min_age;
    1119       116873 :     freeze_table_age = params->freeze_table_age;
    1120       116873 :     multixact_freeze_table_age = params->multixact_freeze_table_age;
    1121              : 
    1122              :     /* Set pg_class fields in cutoffs */
    1123       116873 :     cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
    1124       116873 :     cutoffs->relminmxid = rel->rd_rel->relminmxid;
    1125              : 
    1126              :     /*
    1127              :      * Acquire OldestXmin.
    1128              :      *
    1129              :      * We can always ignore processes running lazy vacuum.  This is because we
    1130              :      * use these values only for deciding which tuples we must keep in the
    1131              :      * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
    1132              :      * XID assigned), it's safe to ignore it.  In theory it could be
    1133              :      * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
    1134              :      * that only one vacuum process can be working on a particular table at
    1135              :      * any time, and that each vacuum is always an independent transaction.
    1136              :      */
    1137       116873 :     cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
    1138              : 
    1139              :     Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
    1140              : 
    1141              :     /* Acquire OldestMxact */
    1142       116873 :     cutoffs->OldestMxact = GetOldestMultiXactId();
    1143              :     Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
    1144              : 
    1145              :     /* Acquire next XID/next MXID values used to apply age-based settings */
    1146       116873 :     nextXID = ReadNextTransactionId();
    1147       116873 :     nextMXID = ReadNextMultiXactId();
    1148              : 
    1149              :     /*
    1150              :      * Also compute the multixact age for which freezing is urgent.  This is
    1151              :      * normally autovacuum_multixact_freeze_max_age, but may be less if
    1152              :      * multixact members are bloated.
    1153              :      */
    1154       116873 :     effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
    1155              : 
    1156              :     /*
    1157              :      * Almost ready to set freeze output parameters; check if OldestXmin or
    1158              :      * OldestMxact are held back to an unsafe degree before we start on that
    1159              :      */
    1160       116873 :     safeOldestXmin = nextXID - autovacuum_freeze_max_age;
    1161       116873 :     if (!TransactionIdIsNormal(safeOldestXmin))
    1162            0 :         safeOldestXmin = FirstNormalTransactionId;
    1163       116873 :     safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
    1164       116873 :     if (safeOldestMxact < FirstMultiXactId)
    1165            0 :         safeOldestMxact = FirstMultiXactId;
    1166       116873 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
    1167        81239 :         ereport(WARNING,
    1168              :                 (errmsg("cutoff for removing and freezing tuples is far in the past"),
    1169              :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1170              :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1171       116873 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
    1172            0 :         ereport(WARNING,
    1173              :                 (errmsg("cutoff for freezing multixacts is far in the past"),
    1174              :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1175              :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1176              : 
    1177              :     /*
    1178              :      * Determine the minimum freeze age to use: as specified by the caller, or
    1179              :      * vacuum_freeze_min_age, but in any case not more than half
    1180              :      * autovacuum_freeze_max_age, so that autovacuums to prevent XID
    1181              :      * wraparound won't occur too frequently.
    1182              :      */
    1183       116873 :     if (freeze_min_age < 0)
    1184         7029 :         freeze_min_age = vacuum_freeze_min_age;
    1185       116873 :     freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
    1186              :     Assert(freeze_min_age >= 0);
    1187              : 
    1188              :     /* Compute FreezeLimit, being careful to generate a normal XID */
    1189       116873 :     cutoffs->FreezeLimit = nextXID - freeze_min_age;
    1190       116873 :     if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
    1191            0 :         cutoffs->FreezeLimit = FirstNormalTransactionId;
    1192              :     /* FreezeLimit must always be <= OldestXmin */
    1193       116873 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
    1194        94068 :         cutoffs->FreezeLimit = cutoffs->OldestXmin;
    1195              : 
    1196              :     /*
    1197              :      * Determine the minimum multixact freeze age to use: as specified by
    1198              :      * caller, or vacuum_multixact_freeze_min_age, but in any case not more
    1199              :      * than half effective_multixact_freeze_max_age, so that autovacuums to
    1200              :      * prevent MultiXact wraparound won't occur too frequently.
    1201              :      */
    1202       116873 :     if (multixact_freeze_min_age < 0)
    1203         7029 :         multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
    1204       116873 :     multixact_freeze_min_age = Min(multixact_freeze_min_age,
    1205              :                                    effective_multixact_freeze_max_age / 2);
    1206              :     Assert(multixact_freeze_min_age >= 0);
    1207              : 
    1208              :     /* Compute MultiXactCutoff, being careful to generate a valid value */
    1209       116873 :     cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
    1210       116873 :     if (cutoffs->MultiXactCutoff < FirstMultiXactId)
    1211            0 :         cutoffs->MultiXactCutoff = FirstMultiXactId;
    1212              :     /* MultiXactCutoff must always be <= OldestMxact */
    1213       116873 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
    1214            2 :         cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
    1215              : 
    1216              :     /*
    1217              :      * Finally, figure out if caller needs to do an aggressive VACUUM or not.
    1218              :      *
    1219              :      * Determine the table freeze age to use: as specified by the caller, or
    1220              :      * the value of the vacuum_freeze_table_age GUC, but in any case not more
    1221              :      * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
    1222              :      * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
    1223              :      * anti-wraparound autovacuum is launched.
    1224              :      */
    1225       116873 :     if (freeze_table_age < 0)
    1226         7029 :         freeze_table_age = vacuum_freeze_table_age;
    1227       116873 :     freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
    1228              :     Assert(freeze_table_age >= 0);
    1229       116873 :     aggressiveXIDCutoff = nextXID - freeze_table_age;
    1230       116873 :     if (!TransactionIdIsNormal(aggressiveXIDCutoff))
    1231            0 :         aggressiveXIDCutoff = FirstNormalTransactionId;
    1232       116873 :     if (TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid,
    1233              :                                       aggressiveXIDCutoff))
    1234       109768 :         return true;
    1235              : 
    1236              :     /*
    1237              :      * Similar to the above, determine the table freeze age to use for
    1238              :      * multixacts: as specified by the caller, or the value of the
    1239              :      * vacuum_multixact_freeze_table_age GUC, but in any case not more than
    1240              :      * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
    1241              :      * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
    1242              :      * multixacts before anti-wraparound autovacuum is launched.
    1243              :      */
    1244         7105 :     if (multixact_freeze_table_age < 0)
    1245         7029 :         multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
    1246         7105 :     multixact_freeze_table_age =
    1247         7105 :         Min(multixact_freeze_table_age,
    1248              :             effective_multixact_freeze_max_age * 0.95);
    1249              :     Assert(multixact_freeze_table_age >= 0);
    1250         7105 :     aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
    1251         7105 :     if (aggressiveMXIDCutoff < FirstMultiXactId)
    1252            0 :         aggressiveMXIDCutoff = FirstMultiXactId;
    1253         7105 :     if (MultiXactIdPrecedesOrEquals(cutoffs->relminmxid,
    1254              :                                     aggressiveMXIDCutoff))
    1255            0 :         return true;
    1256              : 
    1257              :     /* Non-aggressive VACUUM */
    1258         7105 :     return false;
    1259              : }
    1260              : 
    1261              : /*
    1262              :  * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
    1263              :  * mechanism to determine if its table's relfrozenxid and relminmxid are now
    1264              :  * dangerously far in the past.
    1265              :  *
    1266              :  * When we return true, VACUUM caller triggers the failsafe.
    1267              :  */
    1268              : bool
    1269       118702 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
    1270              : {
    1271       118702 :     TransactionId relfrozenxid = cutoffs->relfrozenxid;
    1272       118702 :     MultiXactId relminmxid = cutoffs->relminmxid;
    1273              :     TransactionId xid_skip_limit;
    1274              :     MultiXactId multi_skip_limit;
    1275              :     int         skip_index_vacuum;
    1276              : 
    1277              :     Assert(TransactionIdIsNormal(relfrozenxid));
    1278              :     Assert(MultiXactIdIsValid(relminmxid));
    1279              : 
    1280              :     /*
    1281              :      * Determine the index skipping age to use. In any case no less than
    1282              :      * autovacuum_freeze_max_age * 1.05.
    1283              :      */
    1284       118702 :     skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
    1285              : 
    1286       118702 :     xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
    1287       118702 :     if (!TransactionIdIsNormal(xid_skip_limit))
    1288            0 :         xid_skip_limit = FirstNormalTransactionId;
    1289              : 
    1290       118702 :     if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
    1291              :     {
    1292              :         /* The table's relfrozenxid is too old */
    1293        23684 :         return true;
    1294              :     }
    1295              : 
    1296              :     /*
    1297              :      * Similar to above, determine the index skipping age to use for
    1298              :      * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
    1299              :      * 1.05.
    1300              :      */
    1301        95018 :     skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
    1302              :                             autovacuum_multixact_freeze_max_age * 1.05);
    1303              : 
    1304        95018 :     multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
    1305        95018 :     if (multi_skip_limit < FirstMultiXactId)
    1306            0 :         multi_skip_limit = FirstMultiXactId;
    1307              : 
    1308        95018 :     if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
    1309              :     {
    1310              :         /* The table's relminmxid is too old */
    1311            0 :         return true;
    1312              :     }
    1313              : 
    1314        95018 :     return false;
    1315              : }
    1316              : 
    1317              : /*
    1318              :  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
    1319              :  *
    1320              :  *      If we scanned the whole relation then we should just use the count of
    1321              :  *      live tuples seen; but if we did not, we should not blindly extrapolate
    1322              :  *      from that number, since VACUUM may have scanned a quite nonrandom
    1323              :  *      subset of the table.  When we have only partial information, we take
    1324              :  *      the old value of pg_class.reltuples/pg_class.relpages as a measurement
    1325              :  *      of the tuple density in the unscanned pages.
    1326              :  *
    1327              :  *      Note: scanned_tuples should count only *live* tuples, since
    1328              :  *      pg_class.reltuples is defined that way.
    1329              :  */
    1330              : double
    1331       116484 : vac_estimate_reltuples(Relation relation,
    1332              :                        BlockNumber total_pages,
    1333              :                        BlockNumber scanned_pages,
    1334              :                        double scanned_tuples)
    1335              : {
    1336       116484 :     BlockNumber old_rel_pages = relation->rd_rel->relpages;
    1337       116484 :     double      old_rel_tuples = relation->rd_rel->reltuples;
    1338              :     double      old_density;
    1339              :     double      unscanned_pages;
    1340              :     double      total_tuples;
    1341              : 
    1342              :     /* If we did scan the whole table, just use the count as-is */
    1343       116484 :     if (scanned_pages >= total_pages)
    1344       112641 :         return scanned_tuples;
    1345              : 
    1346              :     /*
    1347              :      * When successive VACUUM commands scan the same few pages again and
    1348              :      * again, without anything from the table really changing, there is a risk
    1349              :      * that our beliefs about tuple density will gradually become distorted.
    1350              :      * This might be caused by vacuumlazy.c implementation details, such as
    1351              :      * its tendency to always scan the last heap page.  Handle that here.
    1352              :      *
    1353              :      * If the relation is _exactly_ the same size according to the existing
    1354              :      * pg_class entry, and only a few of its pages (less than 2%) were
    1355              :      * scanned, keep the existing value of reltuples.  Also keep the existing
    1356              :      * value when only a subset of rel's pages <= a single page were scanned.
    1357              :      *
    1358              :      * (Note: we might be returning -1 here.)
    1359              :      */
    1360         3843 :     if (old_rel_pages == total_pages &&
    1361         3821 :         scanned_pages < (double) total_pages * 0.02)
    1362         2776 :         return old_rel_tuples;
    1363         1067 :     if (scanned_pages <= 1)
    1364          955 :         return old_rel_tuples;
    1365              : 
    1366              :     /*
    1367              :      * If old density is unknown, we can't do much except scale up
    1368              :      * scanned_tuples to match total_pages.
    1369              :      */
    1370          112 :     if (old_rel_tuples < 0 || old_rel_pages == 0)
    1371            4 :         return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
    1372              : 
    1373              :     /*
    1374              :      * Okay, we've covered the corner cases.  The normal calculation is to
    1375              :      * convert the old measurement to a density (tuples per page), then
    1376              :      * estimate the number of tuples in the unscanned pages using that figure,
    1377              :      * and finally add on the number of tuples in the scanned pages.
    1378              :      */
    1379          108 :     old_density = old_rel_tuples / old_rel_pages;
    1380          108 :     unscanned_pages = (double) total_pages - (double) scanned_pages;
    1381          108 :     total_tuples = old_density * unscanned_pages + scanned_tuples;
    1382          108 :     return floor(total_tuples + 0.5);
    1383              : }
    1384              : 
    1385              : 
    1386              : /*
    1387              :  *  vac_update_relstats() -- update statistics for one relation
    1388              :  *
    1389              :  *      Update the whole-relation statistics that are kept in its pg_class
    1390              :  *      row.  There are additional stats that will be updated if we are
    1391              :  *      doing ANALYZE, but we always update these stats.  This routine works
    1392              :  *      for both index and heap relation entries in pg_class.
    1393              :  *
    1394              :  *      We violate transaction semantics here by overwriting the rel's
    1395              :  *      existing pg_class tuple with the new values.  This is reasonably
    1396              :  *      safe as long as we're sure that the new values are correct whether or
    1397              :  *      not this transaction commits.  The reason for doing this is that if
    1398              :  *      we updated these tuples in the usual way, vacuuming pg_class itself
    1399              :  *      wouldn't work very well --- by the time we got done with a vacuum
    1400              :  *      cycle, most of the tuples in pg_class would've been obsoleted.  Of
    1401              :  *      course, this only works for fixed-size not-null columns, but these are.
    1402              :  *
    1403              :  *      Another reason for doing it this way is that when we are in a lazy
    1404              :  *      VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
    1405              :  *      Somebody vacuuming pg_class might think they could delete a tuple
    1406              :  *      marked with xmin = our xid.
    1407              :  *
    1408              :  *      In addition to fundamentally nontransactional statistics such as
    1409              :  *      relpages and relallvisible, we try to maintain certain lazily-updated
    1410              :  *      DDL flags such as relhasindex, by clearing them if no longer correct.
    1411              :  *      It's safe to do this in VACUUM, which can't run in parallel with
    1412              :  *      CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
    1413              :  *      However, it's *not* safe to do it in an ANALYZE that's within an
    1414              :  *      outer transaction, because for example the current transaction might
    1415              :  *      have dropped the last index; then we'd think relhasindex should be
    1416              :  *      cleared, but if the transaction later rolls back this would be wrong.
    1417              :  *      So we refrain from updating the DDL flags if we're inside an outer
    1418              :  *      transaction.  This is OK since postponing the flag maintenance is
    1419              :  *      always allowable.
    1420              :  *
    1421              :  *      Note: num_tuples should count only *live* tuples, since
    1422              :  *      pg_class.reltuples is defined that way.
    1423              :  *
    1424              :  *      This routine is shared by VACUUM and ANALYZE.
    1425              :  */
    1426              : void
    1427       142163 : vac_update_relstats(Relation relation,
    1428              :                     BlockNumber num_pages, double num_tuples,
    1429              :                     BlockNumber num_all_visible_pages,
    1430              :                     BlockNumber num_all_frozen_pages,
    1431              :                     bool hasindex, TransactionId frozenxid,
    1432              :                     MultiXactId minmulti,
    1433              :                     bool *frozenxid_updated, bool *minmulti_updated,
    1434              :                     bool in_outer_xact)
    1435              : {
    1436       142163 :     Oid         relid = RelationGetRelid(relation);
    1437              :     Relation    rd;
    1438              :     ScanKeyData key[1];
    1439              :     HeapTuple   ctup;
    1440              :     void       *inplace_state;
    1441              :     Form_pg_class pgcform;
    1442              :     bool        dirty,
    1443              :                 futurexid,
    1444              :                 futuremxid;
    1445              :     TransactionId oldfrozenxid;
    1446              :     MultiXactId oldminmulti;
    1447              : 
    1448       142163 :     rd = table_open(RelationRelationId, RowExclusiveLock);
    1449              : 
    1450              :     /* Fetch a copy of the tuple to scribble on */
    1451       142163 :     ScanKeyInit(&key[0],
    1452              :                 Anum_pg_class_oid,
    1453              :                 BTEqualStrategyNumber, F_OIDEQ,
    1454              :                 ObjectIdGetDatum(relid));
    1455       142163 :     systable_inplace_update_begin(rd, ClassOidIndexId, true,
    1456              :                                   NULL, 1, key, &ctup, &inplace_state);
    1457       142163 :     if (!HeapTupleIsValid(ctup))
    1458            0 :         elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
    1459              :              relid);
    1460       142163 :     pgcform = (Form_pg_class) GETSTRUCT(ctup);
    1461              : 
    1462              :     /* Apply statistical updates, if any, to copied tuple */
    1463              : 
    1464       142163 :     dirty = false;
    1465       142163 :     if (pgcform->relpages != (int32) num_pages)
    1466              :     {
    1467         5634 :         pgcform->relpages = (int32) num_pages;
    1468         5634 :         dirty = true;
    1469              :     }
    1470       142163 :     if (pgcform->reltuples != (float4) num_tuples)
    1471              :     {
    1472        12353 :         pgcform->reltuples = (float4) num_tuples;
    1473        12353 :         dirty = true;
    1474              :     }
    1475       142163 :     if (pgcform->relallvisible != (int32) num_all_visible_pages)
    1476              :     {
    1477         3796 :         pgcform->relallvisible = (int32) num_all_visible_pages;
    1478         3796 :         dirty = true;
    1479              :     }
    1480       142163 :     if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
    1481              :     {
    1482         3197 :         pgcform->relallfrozen = (int32) num_all_frozen_pages;
    1483         3197 :         dirty = true;
    1484              :     }
    1485              : 
    1486              :     /* Apply DDL updates, but not inside an outer transaction (see above) */
    1487              : 
    1488       142163 :     if (!in_outer_xact)
    1489              :     {
    1490              :         /*
    1491              :          * If we didn't find any indexes, reset relhasindex.
    1492              :          */
    1493       141881 :         if (pgcform->relhasindex && !hasindex)
    1494              :         {
    1495           15 :             pgcform->relhasindex = false;
    1496           15 :             dirty = true;
    1497              :         }
    1498              : 
    1499              :         /* We also clear relhasrules and relhastriggers if needed */
    1500       141881 :         if (pgcform->relhasrules && relation->rd_rules == NULL)
    1501              :         {
    1502            0 :             pgcform->relhasrules = false;
    1503            0 :             dirty = true;
    1504              :         }
    1505       141881 :         if (pgcform->relhastriggers && relation->trigdesc == NULL)
    1506              :         {
    1507            8 :             pgcform->relhastriggers = false;
    1508            8 :             dirty = true;
    1509              :         }
    1510              :     }
    1511              : 
    1512              :     /*
    1513              :      * Update relfrozenxid, unless caller passed InvalidTransactionId
    1514              :      * indicating it has no new data.
    1515              :      *
    1516              :      * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
    1517              :      * stored relfrozenxid is "in the future" then it seems best to assume
    1518              :      * it's corrupt, and overwrite with the oldest remaining XID in the table.
    1519              :      * This should match vac_update_datfrozenxid() concerning what we consider
    1520              :      * to be "in the future".
    1521              :      */
    1522       142163 :     oldfrozenxid = pgcform->relfrozenxid;
    1523       142163 :     futurexid = false;
    1524       142163 :     if (frozenxid_updated)
    1525       116482 :         *frozenxid_updated = false;
    1526       142163 :     if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
    1527              :     {
    1528        33867 :         bool        update = false;
    1529              : 
    1530        33867 :         if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
    1531        33792 :             update = true;
    1532           75 :         else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
    1533            0 :             futurexid = update = true;
    1534              : 
    1535        33867 :         if (update)
    1536              :         {
    1537        33792 :             pgcform->relfrozenxid = frozenxid;
    1538        33792 :             dirty = true;
    1539        33792 :             if (frozenxid_updated)
    1540        33792 :                 *frozenxid_updated = true;
    1541              :         }
    1542              :     }
    1543              : 
    1544              :     /* Similarly for relminmxid */
    1545       142163 :     oldminmulti = pgcform->relminmxid;
    1546       142163 :     futuremxid = false;
    1547       142163 :     if (minmulti_updated)
    1548       116482 :         *minmulti_updated = false;
    1549       142163 :     if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
    1550              :     {
    1551          213 :         bool        update = false;
    1552              : 
    1553          213 :         if (MultiXactIdPrecedes(oldminmulti, minmulti))
    1554          213 :             update = true;
    1555            0 :         else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
    1556            0 :             futuremxid = update = true;
    1557              : 
    1558          213 :         if (update)
    1559              :         {
    1560          213 :             pgcform->relminmxid = minmulti;
    1561          213 :             dirty = true;
    1562          213 :             if (minmulti_updated)
    1563          213 :                 *minmulti_updated = true;
    1564              :         }
    1565              :     }
    1566              : 
    1567              :     /* If anything changed, write out the tuple. */
    1568       142163 :     if (dirty)
    1569        42473 :         systable_inplace_update_finish(inplace_state, ctup);
    1570              :     else
    1571        99690 :         systable_inplace_update_cancel(inplace_state);
    1572              : 
    1573       142163 :     table_close(rd, RowExclusiveLock);
    1574              : 
    1575       142163 :     if (futurexid)
    1576            0 :         ereport(WARNING,
    1577              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1578              :                  errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
    1579              :                                  oldfrozenxid, frozenxid,
    1580              :                                  RelationGetRelationName(relation))));
    1581       142163 :     if (futuremxid)
    1582            0 :         ereport(WARNING,
    1583              :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1584              :                  errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
    1585              :                                  oldminmulti, minmulti,
    1586              :                                  RelationGetRelationName(relation))));
    1587       142163 : }
    1588              : 
    1589              : 
    1590              : /*
    1591              :  *  vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
    1592              :  *
    1593              :  *      Update pg_database's datfrozenxid entry for our database to be the
    1594              :  *      minimum of the pg_class.relfrozenxid values.
    1595              :  *
    1596              :  *      Similarly, update our datminmxid to be the minimum of the
    1597              :  *      pg_class.relminmxid values.
    1598              :  *
    1599              :  *      If we are able to advance either pg_database value, also try to
    1600              :  *      truncate pg_xact and pg_multixact.
    1601              :  *
    1602              :  *      We violate transaction semantics here by overwriting the database's
    1603              :  *      existing pg_database tuple with the new values.  This is reasonably
    1604              :  *      safe since the new values are correct whether or not this transaction
    1605              :  *      commits.  As with vac_update_relstats, this avoids leaving dead tuples
    1606              :  *      behind after a VACUUM.
    1607              :  */
    1608              : void
    1609         2531 : vac_update_datfrozenxid(void)
    1610              : {
    1611              :     HeapTuple   tuple;
    1612              :     Form_pg_database dbform;
    1613              :     Relation    relation;
    1614              :     SysScanDesc scan;
    1615              :     HeapTuple   classTup;
    1616              :     TransactionId newFrozenXid;
    1617              :     MultiXactId newMinMulti;
    1618              :     TransactionId lastSaneFrozenXid;
    1619              :     MultiXactId lastSaneMinMulti;
    1620         2531 :     bool        bogus = false;
    1621         2531 :     bool        dirty = false;
    1622              :     ScanKeyData key[1];
    1623              :     void       *inplace_state;
    1624              : 
    1625              :     /*
    1626              :      * Restrict this task to one backend per database.  This avoids race
    1627              :      * conditions that would move datfrozenxid or datminmxid backward.  It
    1628              :      * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
    1629              :      * datfrozenxid passed to an earlier vac_truncate_clog() call.
    1630              :      */
    1631         2531 :     LockDatabaseFrozenIds(ExclusiveLock);
    1632              : 
    1633              :     /*
    1634              :      * Initialize the "min" calculation with
    1635              :      * GetOldestNonRemovableTransactionId(), which is a reasonable
    1636              :      * approximation to the minimum relfrozenxid for not-yet-committed
    1637              :      * pg_class entries for new tables; see AddNewRelationTuple().  So we
    1638              :      * cannot produce a wrong minimum by starting with this.
    1639              :      */
    1640         2531 :     newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
    1641              : 
    1642              :     /*
    1643              :      * Similarly, initialize the MultiXact "min" with the value that would be
    1644              :      * used on pg_class for new tables.  See AddNewRelationTuple().
    1645              :      */
    1646         2531 :     newMinMulti = GetOldestMultiXactId();
    1647              : 
    1648              :     /*
    1649              :      * Identify the latest relfrozenxid and relminmxid values that we could
    1650              :      * validly see during the scan.  These are conservative values, but it's
    1651              :      * not really worth trying to be more exact.
    1652              :      */
    1653         2531 :     lastSaneFrozenXid = ReadNextTransactionId();
    1654         2531 :     lastSaneMinMulti = ReadNextMultiXactId();
    1655              : 
    1656              :     /*
    1657              :      * We must seqscan pg_class to find the minimum Xid, because there is no
    1658              :      * index that can help us here.
    1659              :      *
    1660              :      * See vac_truncate_clog() for the race condition to prevent.
    1661              :      */
    1662         2531 :     relation = table_open(RelationRelationId, AccessShareLock);
    1663              : 
    1664         2531 :     scan = systable_beginscan(relation, InvalidOid, false,
    1665              :                               NULL, 0, NULL);
    1666              : 
    1667      1478387 :     while ((classTup = systable_getnext(scan)) != NULL)
    1668              :     {
    1669      1475856 :         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
    1670      1475856 :         volatile TransactionId *relfrozenxid_p = &classForm->relfrozenxid;
    1671      1475856 :         volatile TransactionId *relminmxid_p = &classForm->relminmxid;
    1672      1475856 :         TransactionId relfrozenxid = *relfrozenxid_p;
    1673      1475856 :         TransactionId relminmxid = *relminmxid_p;
    1674              : 
    1675              :         /*
    1676              :          * Only consider relations able to hold unfrozen XIDs (anything else
    1677              :          * should have InvalidTransactionId in relfrozenxid anyway).
    1678              :          */
    1679      1475856 :         if (classForm->relkind != RELKIND_RELATION &&
    1680      1162022 :             classForm->relkind != RELKIND_MATVIEW &&
    1681      1160420 :             classForm->relkind != RELKIND_TOASTVALUE)
    1682              :         {
    1683              :             Assert(!TransactionIdIsValid(relfrozenxid));
    1684              :             Assert(!MultiXactIdIsValid(relminmxid));
    1685      1000944 :             continue;
    1686              :         }
    1687              : 
    1688              :         /*
    1689              :          * Some table AMs might not need per-relation xid / multixid horizons.
    1690              :          * It therefore seems reasonable to allow relfrozenxid and relminmxid
    1691              :          * to not be set (i.e. set to their respective Invalid*Id)
    1692              :          * independently. Thus validate and compute horizon for each only if
    1693              :          * set.
    1694              :          *
    1695              :          * If things are working properly, no relation should have a
    1696              :          * relfrozenxid or relminmxid that is "in the future".  However, such
    1697              :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1698              :          * see any entries that are "in the future", chicken out and don't do
    1699              :          * anything.  This ensures we won't truncate clog & multixact SLRUs
    1700              :          * before those relations have been scanned and cleaned up.
    1701              :          */
    1702              : 
    1703       474912 :         if (TransactionIdIsValid(relfrozenxid))
    1704              :         {
    1705              :             Assert(TransactionIdIsNormal(relfrozenxid));
    1706              : 
    1707              :             /* check for values in the future */
    1708       474912 :             if (TransactionIdPrecedes(lastSaneFrozenXid, relfrozenxid))
    1709              :             {
    1710            0 :                 bogus = true;
    1711            0 :                 break;
    1712              :             }
    1713              : 
    1714              :             /* determine new horizon */
    1715       474912 :             if (TransactionIdPrecedes(relfrozenxid, newFrozenXid))
    1716         2491 :                 newFrozenXid = relfrozenxid;
    1717              :         }
    1718              : 
    1719       474912 :         if (MultiXactIdIsValid(relminmxid))
    1720              :         {
    1721              :             /* check for values in the future */
    1722       474912 :             if (MultiXactIdPrecedes(lastSaneMinMulti, relminmxid))
    1723              :             {
    1724            0 :                 bogus = true;
    1725            0 :                 break;
    1726              :             }
    1727              : 
    1728              :             /* determine new horizon */
    1729       474912 :             if (MultiXactIdPrecedes(relminmxid, newMinMulti))
    1730          208 :                 newMinMulti = relminmxid;
    1731              :         }
    1732              :     }
    1733              : 
    1734              :     /* we're done with pg_class */
    1735         2531 :     systable_endscan(scan);
    1736         2531 :     table_close(relation, AccessShareLock);
    1737              : 
    1738              :     /* chicken out if bogus data found */
    1739         2531 :     if (bogus)
    1740            0 :         return;
    1741              : 
    1742              :     Assert(TransactionIdIsNormal(newFrozenXid));
    1743              :     Assert(MultiXactIdIsValid(newMinMulti));
    1744              : 
    1745              :     /* Now fetch the pg_database tuple we need to update. */
    1746         2531 :     relation = table_open(DatabaseRelationId, RowExclusiveLock);
    1747              : 
    1748              :     /*
    1749              :      * Fetch a copy of the tuple to scribble on.  We could check the syscache
    1750              :      * tuple first.  If that concluded !dirty, we'd avoid waiting on
    1751              :      * concurrent heap_update() and would avoid exclusive-locking the buffer.
    1752              :      * For now, don't optimize that.
    1753              :      */
    1754         2531 :     ScanKeyInit(&key[0],
    1755              :                 Anum_pg_database_oid,
    1756              :                 BTEqualStrategyNumber, F_OIDEQ,
    1757              :                 ObjectIdGetDatum(MyDatabaseId));
    1758              : 
    1759         2531 :     systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
    1760              :                                   NULL, 1, key, &tuple, &inplace_state);
    1761              : 
    1762         2531 :     if (!HeapTupleIsValid(tuple))
    1763            0 :         elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
    1764              : 
    1765         2531 :     dbform = (Form_pg_database) GETSTRUCT(tuple);
    1766              : 
    1767              :     /*
    1768              :      * As in vac_update_relstats(), we ordinarily don't want to let
    1769              :      * datfrozenxid go backward; but if it's "in the future" then it must be
    1770              :      * corrupt and it seems best to overwrite it.
    1771              :      */
    1772         2870 :     if (dbform->datfrozenxid != newFrozenXid &&
    1773          339 :         (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
    1774            0 :          TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
    1775              :     {
    1776          339 :         dbform->datfrozenxid = newFrozenXid;
    1777          339 :         dirty = true;
    1778              :     }
    1779              :     else
    1780         2192 :         newFrozenXid = dbform->datfrozenxid;
    1781              : 
    1782              :     /* Ditto for datminmxid */
    1783         2532 :     if (dbform->datminmxid != newMinMulti &&
    1784            1 :         (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
    1785            0 :          MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
    1786              :     {
    1787            1 :         dbform->datminmxid = newMinMulti;
    1788            1 :         dirty = true;
    1789              :     }
    1790              :     else
    1791         2530 :         newMinMulti = dbform->datminmxid;
    1792              : 
    1793         2531 :     if (dirty)
    1794          339 :         systable_inplace_update_finish(inplace_state, tuple);
    1795              :     else
    1796         2192 :         systable_inplace_update_cancel(inplace_state);
    1797              : 
    1798         2531 :     heap_freetuple(tuple);
    1799         2531 :     table_close(relation, RowExclusiveLock);
    1800              : 
    1801              :     /*
    1802              :      * If we were able to advance datfrozenxid or datminmxid, see if we can
    1803              :      * truncate pg_xact and/or pg_multixact.  Also do it if the shared
    1804              :      * XID-wrap-limit info is stale, since this action will update that too.
    1805              :      */
    1806         2531 :     if (dirty || ForceTransactionIdLimitUpdate())
    1807         1066 :         vac_truncate_clog(newFrozenXid, newMinMulti,
    1808              :                           lastSaneFrozenXid, lastSaneMinMulti);
    1809              : }
    1810              : 
    1811              : 
    1812              : /*
    1813              :  *  vac_truncate_clog() -- attempt to truncate the commit log
    1814              :  *
    1815              :  *      Scan pg_database to determine the system-wide oldest datfrozenxid,
    1816              :  *      and use it to truncate the transaction commit log (pg_xact).
    1817              :  *      Also update the XID wrap limit info maintained by varsup.c.
    1818              :  *      Likewise for datminmxid.
    1819              :  *
    1820              :  *      The passed frozenXID and minMulti are the updated values for my own
    1821              :  *      pg_database entry. They're used to initialize the "min" calculations.
    1822              :  *      The caller also passes the "last sane" XID and MXID, since it has
    1823              :  *      those at hand already.
    1824              :  *
    1825              :  *      This routine is only invoked when we've managed to change our
    1826              :  *      DB's datfrozenxid/datminmxid values, or we found that the shared
    1827              :  *      XID-wrap-limit info is stale.
    1828              :  */
    1829              : static void
    1830         1066 : vac_truncate_clog(TransactionId frozenXID,
    1831              :                   MultiXactId minMulti,
    1832              :                   TransactionId lastSaneFrozenXid,
    1833              :                   MultiXactId lastSaneMinMulti)
    1834              : {
    1835         1066 :     TransactionId nextXID = ReadNextTransactionId();
    1836              :     Relation    relation;
    1837              :     TableScanDesc scan;
    1838              :     HeapTuple   tuple;
    1839              :     Oid         oldestxid_datoid;
    1840              :     Oid         minmulti_datoid;
    1841         1066 :     bool        bogus = false;
    1842         1066 :     bool        frozenAlreadyWrapped = false;
    1843              : 
    1844              :     /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
    1845         1066 :     LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
    1846              : 
    1847              :     /* init oldest datoids to sync with my frozenXID/minMulti values */
    1848         1066 :     oldestxid_datoid = MyDatabaseId;
    1849         1066 :     minmulti_datoid = MyDatabaseId;
    1850              : 
    1851              :     /*
    1852              :      * Scan pg_database to compute the minimum datfrozenxid/datminmxid
    1853              :      *
    1854              :      * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
    1855              :      * the values could change while we look at them.  Fetch each one just
    1856              :      * once to ensure sane behavior of the comparison logic.  (Here, as in
    1857              :      * many other places, we assume that fetching or updating an XID in shared
    1858              :      * storage is atomic.)
    1859              :      *
    1860              :      * Note: we need not worry about a race condition with new entries being
    1861              :      * inserted by CREATE DATABASE.  Any such entry will have a copy of some
    1862              :      * existing DB's datfrozenxid, and that source DB cannot be ours because
    1863              :      * of the interlock against copying a DB containing an active backend.
    1864              :      * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
    1865              :      * concurrently modify the datfrozenxid's of different databases, the
    1866              :      * worst possible outcome is that pg_xact is not truncated as aggressively
    1867              :      * as it could be.
    1868              :      */
    1869         1066 :     relation = table_open(DatabaseRelationId, AccessShareLock);
    1870              : 
    1871         1066 :     scan = table_beginscan_catalog(relation, 0, NULL);
    1872              : 
    1873         4166 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1874              :     {
    1875         3100 :         Form_pg_database dbform = (Form_pg_database) GETSTRUCT(tuple);
    1876         3100 :         volatile TransactionId *datfrozenxid_p = &dbform->datfrozenxid;
    1877         3100 :         volatile TransactionId *datminmxid_p = &dbform->datminmxid;
    1878         3100 :         TransactionId datfrozenxid = *datfrozenxid_p;
    1879         3100 :         TransactionId datminmxid = *datminmxid_p;
    1880              : 
    1881              :         Assert(TransactionIdIsNormal(datfrozenxid));
    1882              :         Assert(MultiXactIdIsValid(datminmxid));
    1883              : 
    1884              :         /*
    1885              :          * If database is in the process of getting dropped, or has been
    1886              :          * interrupted while doing so, no connections to it are possible
    1887              :          * anymore. Therefore we don't need to take it into account here.
    1888              :          * Which is good, because it can't be processed by autovacuum either.
    1889              :          */
    1890         3100 :         if (database_is_invalid_form((Form_pg_database) dbform))
    1891              :         {
    1892            3 :             elog(DEBUG2,
    1893              :                  "skipping invalid database \"%s\" while computing relfrozenxid",
    1894              :                  NameStr(dbform->datname));
    1895            3 :             continue;
    1896              :         }
    1897              : 
    1898              :         /*
    1899              :          * If things are working properly, no database should have a
    1900              :          * datfrozenxid or datminmxid that is "in the future".  However, such
    1901              :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1902              :          * see any entries that are "in the future", chicken out and don't do
    1903              :          * anything.  This ensures we won't truncate clog before those
    1904              :          * databases have been scanned and cleaned up.  (We will issue the
    1905              :          * "already wrapped" warning if appropriate, though.)
    1906              :          */
    1907         6194 :         if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
    1908         3097 :             MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
    1909            0 :             bogus = true;
    1910              : 
    1911         3097 :         if (TransactionIdPrecedes(nextXID, datfrozenxid))
    1912            0 :             frozenAlreadyWrapped = true;
    1913         3097 :         else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
    1914              :         {
    1915          282 :             frozenXID = datfrozenxid;
    1916          282 :             oldestxid_datoid = dbform->oid;
    1917              :         }
    1918              : 
    1919         3097 :         if (MultiXactIdPrecedes(datminmxid, minMulti))
    1920              :         {
    1921            2 :             minMulti = datminmxid;
    1922            2 :             minmulti_datoid = dbform->oid;
    1923              :         }
    1924              :     }
    1925              : 
    1926         1066 :     table_endscan(scan);
    1927              : 
    1928         1066 :     table_close(relation, AccessShareLock);
    1929              : 
    1930              :     /*
    1931              :      * Do not truncate CLOG if we seem to have suffered wraparound already;
    1932              :      * the computed minimum XID might be bogus.  This case should now be
    1933              :      * impossible due to the defenses in GetNewTransactionId, but we keep the
    1934              :      * test anyway.
    1935              :      */
    1936         1066 :     if (frozenAlreadyWrapped)
    1937              :     {
    1938            0 :         ereport(WARNING,
    1939              :                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
    1940              :                  errdetail("You might have already suffered transaction-wraparound data loss.")));
    1941            0 :         LWLockRelease(WrapLimitsVacuumLock);
    1942            0 :         return;
    1943              :     }
    1944              : 
    1945              :     /* chicken out if data is bogus in any other way */
    1946         1066 :     if (bogus)
    1947              :     {
    1948            0 :         LWLockRelease(WrapLimitsVacuumLock);
    1949            0 :         return;
    1950              :     }
    1951              : 
    1952              :     /*
    1953              :      * Freeze any old transaction IDs in the async notification queue before
    1954              :      * CLOG truncation.
    1955              :      */
    1956         1066 :     AsyncNotifyFreezeXids(frozenXID);
    1957              : 
    1958              :     /*
    1959              :      * Advance the oldest value for commit timestamps before truncating, so
    1960              :      * that if a user requests a timestamp for a transaction we're truncating
    1961              :      * away right after this point, they get NULL instead of an ugly "file not
    1962              :      * found" error from slru.c.  This doesn't matter for xact/multixact
    1963              :      * because they are not subject to arbitrary lookups from users.
    1964              :      */
    1965         1066 :     AdvanceOldestCommitTsXid(frozenXID);
    1966              : 
    1967              :     /*
    1968              :      * Truncate CLOG, multixact and CommitTs to the oldest computed value.
    1969              :      */
    1970         1066 :     TruncateCLOG(frozenXID, oldestxid_datoid);
    1971         1066 :     TruncateCommitTs(frozenXID);
    1972         1066 :     TruncateMultiXact(minMulti, minmulti_datoid);
    1973              : 
    1974              :     /*
    1975              :      * Update the wrap limit for GetNewTransactionId and creation of new
    1976              :      * MultiXactIds.  Note: these functions will also signal the postmaster
    1977              :      * for an(other) autovac cycle if needed.   XXX should we avoid possibly
    1978              :      * signaling twice?
    1979              :      */
    1980         1066 :     SetTransactionIdLimit(frozenXID, oldestxid_datoid);
    1981         1066 :     SetMultiXactIdLimit(minMulti, minmulti_datoid);
    1982              : 
    1983         1066 :     LWLockRelease(WrapLimitsVacuumLock);
    1984              : }
    1985              : 
    1986              : 
    1987              : /*
    1988              :  *  vacuum_rel() -- vacuum one heap relation
    1989              :  *
    1990              :  *      relid identifies the relation to vacuum.  If relation is supplied,
    1991              :  *      use the name therein for reporting any failure to open/lock the rel;
    1992              :  *      do not use it once we've successfully opened the rel, since it might
    1993              :  *      be stale.
    1994              :  *
    1995              :  *      Returns true if it's okay to proceed with a requested ANALYZE
    1996              :  *      operation on this table.
    1997              :  *
    1998              :  *      Doing one heap at a time incurs extra overhead, since we need to
    1999              :  *      check that the heap exists again just before we vacuum it.  The
    2000              :  *      reason that we do this is so that vacuuming can be spread across
    2001              :  *      many small transactions.  Otherwise, two-phase locking would require
    2002              :  *      us to lock the entire database during one pass of the vacuum cleaner.
    2003              :  *
    2004              :  *      At entry and exit, we are not inside a transaction.
    2005              :  */
    2006              : static bool
    2007       116975 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
    2008              :            BufferAccessStrategy bstrategy, bool isTopLevel)
    2009              : {
    2010              :     LOCKMODE    lmode;
    2011              :     Relation    rel;
    2012              :     LockRelId   lockrelid;
    2013              :     Oid         priv_relid;
    2014              :     Oid         toast_relid;
    2015              :     Oid         save_userid;
    2016              :     int         save_sec_context;
    2017              :     int         save_nestlevel;
    2018              :     VacuumParams toast_vacuum_params;
    2019              : 
    2020              :     /*
    2021              :      * This function scribbles on the parameters, so make a copy early to
    2022              :      * avoid affecting the TOAST table (if we do end up recursing to it).
    2023              :      */
    2024       116975 :     memcpy(&toast_vacuum_params, &params, sizeof(VacuumParams));
    2025              : 
    2026              :     /* Begin a transaction for vacuuming this relation */
    2027       116975 :     StartTransactionCommand();
    2028              : 
    2029       116975 :     if (!(params.options & VACOPT_FULL))
    2030              :     {
    2031              :         /*
    2032              :          * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
    2033              :          * other concurrent VACUUMs know that they can ignore this one while
    2034              :          * determining their OldestXmin.  (The reason we don't set it during a
    2035              :          * full VACUUM is exactly that we may have to run user-defined
    2036              :          * functions for functional indexes, and we want to make sure that if
    2037              :          * they use the snapshot set above, any tuples it requires can't get
    2038              :          * removed from other tables.  An index function that depends on the
    2039              :          * contents of other tables is arguably broken, but we won't break it
    2040              :          * here by violating transaction semantics.)
    2041              :          *
    2042              :          * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
    2043              :          * autovacuum; it's used to avoid canceling a vacuum that was invoked
    2044              :          * in an emergency.
    2045              :          *
    2046              :          * Note: these flags remain set until CommitTransaction or
    2047              :          * AbortTransaction.  We don't want to clear them until we reset
    2048              :          * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
    2049              :          * might appear to go backwards, which is probably Not Good.  (We also
    2050              :          * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
    2051              :          * xmin doesn't become visible ahead of setting the flag.)
    2052              :          */
    2053       116732 :         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    2054       116732 :         MyProc->statusFlags |= PROC_IN_VACUUM;
    2055       116732 :         if (params.is_wraparound)
    2056        99761 :             MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
    2057       116732 :         ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
    2058       116732 :         LWLockRelease(ProcArrayLock);
    2059              :     }
    2060              : 
    2061              :     /*
    2062              :      * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
    2063              :      * cutoff xids in local memory wrapping around, and to have updated xmin
    2064              :      * horizons.
    2065              :      */
    2066       116975 :     PushActiveSnapshot(GetTransactionSnapshot());
    2067              : 
    2068              :     /*
    2069              :      * Check for user-requested abort.  Note we want this to be inside a
    2070              :      * transaction, so xact.c doesn't issue useless WARNING.
    2071              :      */
    2072       116975 :     CHECK_FOR_INTERRUPTS();
    2073              : 
    2074              :     /*
    2075              :      * Determine the type of lock we want --- hard exclusive lock for a FULL
    2076              :      * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
    2077              :      * way, we can be sure that no other backend is vacuuming the same table.
    2078              :      */
    2079       233950 :     lmode = (params.options & VACOPT_FULL) ?
    2080       116975 :         AccessExclusiveLock : ShareUpdateExclusiveLock;
    2081              : 
    2082              :     /* open the relation and get the appropriate lock on it */
    2083       116975 :     rel = vacuum_open_relation(relid, relation, params.options,
    2084       116975 :                                params.log_vacuum_min_duration >= 0, lmode);
    2085              : 
    2086              :     /* leave if relation could not be opened or locked */
    2087       116975 :     if (!rel)
    2088              :     {
    2089           12 :         PopActiveSnapshot();
    2090           12 :         CommitTransactionCommand();
    2091           12 :         return false;
    2092              :     }
    2093              : 
    2094              :     /*
    2095              :      * When recursing to a TOAST table, check privileges on the parent.  NB:
    2096              :      * This is only safe to do because we hold a session lock on the main
    2097              :      * relation that prevents concurrent deletion.
    2098              :      */
    2099       116963 :     if (OidIsValid(params.toast_parent))
    2100         5808 :         priv_relid = params.toast_parent;
    2101              :     else
    2102       111155 :         priv_relid = RelationGetRelid(rel);
    2103              : 
    2104              :     /*
    2105              :      * Check if relation needs to be skipped based on privileges.  This check
    2106              :      * happens also when building the relation list to vacuum for a manual
    2107              :      * operation, and needs to be done additionally here as VACUUM could
    2108              :      * happen across multiple transactions where privileges could have changed
    2109              :      * in-between.  Make sure to only generate logs for VACUUM in this case.
    2110              :      */
    2111       116963 :     if (!vacuum_is_permitted_for_relation(priv_relid,
    2112              :                                           rel->rd_rel,
    2113       116963 :                                           params.options & ~VACOPT_ANALYZE))
    2114              :     {
    2115           48 :         relation_close(rel, lmode);
    2116           48 :         PopActiveSnapshot();
    2117           48 :         CommitTransactionCommand();
    2118           48 :         return false;
    2119              :     }
    2120              : 
    2121              :     /*
    2122              :      * Check that it's of a vacuumable relkind.
    2123              :      */
    2124       116915 :     if (rel->rd_rel->relkind != RELKIND_RELATION &&
    2125        41933 :         rel->rd_rel->relkind != RELKIND_MATVIEW &&
    2126        41928 :         rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
    2127          125 :         rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
    2128              :     {
    2129            1 :         ereport(WARNING,
    2130              :                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
    2131              :                         RelationGetRelationName(rel))));
    2132            1 :         relation_close(rel, lmode);
    2133            1 :         PopActiveSnapshot();
    2134            1 :         CommitTransactionCommand();
    2135            1 :         return false;
    2136              :     }
    2137              : 
    2138              :     /*
    2139              :      * Silently ignore tables that are temp tables of other backends ---
    2140              :      * trying to vacuum these will lead to great unhappiness, since their
    2141              :      * contents are probably not up-to-date on disk.  (We don't throw a
    2142              :      * warning here; it would just lead to chatter during a database-wide
    2143              :      * VACUUM.)
    2144              :      */
    2145       116914 :     if (RELATION_IS_OTHER_TEMP(rel))
    2146              :     {
    2147            1 :         relation_close(rel, lmode);
    2148            1 :         PopActiveSnapshot();
    2149            1 :         CommitTransactionCommand();
    2150            1 :         return false;
    2151              :     }
    2152              : 
    2153              :     /*
    2154              :      * Silently ignore partitioned tables as there is no work to be done.  The
    2155              :      * useful work is on their child partitions, which have been queued up for
    2156              :      * us separately.
    2157              :      */
    2158       116913 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    2159              :     {
    2160          124 :         relation_close(rel, lmode);
    2161          124 :         PopActiveSnapshot();
    2162          124 :         CommitTransactionCommand();
    2163              :         /* It's OK to proceed with ANALYZE on this table */
    2164          124 :         return true;
    2165              :     }
    2166              : 
    2167              :     /*
    2168              :      * Get a session-level lock too. This will protect our access to the
    2169              :      * relation across multiple transactions, so that we can vacuum the
    2170              :      * relation's TOAST table (if any) secure in the knowledge that no one is
    2171              :      * deleting the parent relation.
    2172              :      *
    2173              :      * NOTE: this cannot block, even if someone else is waiting for access,
    2174              :      * because the lock manager knows that both lock requests are from the
    2175              :      * same process.
    2176              :      */
    2177       116789 :     lockrelid = rel->rd_lockInfo.lockRelId;
    2178       116789 :     LockRelationIdForSession(&lockrelid, lmode);
    2179              : 
    2180              :     /*
    2181              :      * Set index_cleanup option based on index_cleanup reloption if it wasn't
    2182              :      * specified in VACUUM command, or when running in an autovacuum worker
    2183              :      */
    2184       116789 :     if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
    2185              :     {
    2186              :         StdRdOptIndexCleanup vacuum_index_cleanup;
    2187              : 
    2188       116646 :         if (rel->rd_options == NULL)
    2189       115425 :             vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
    2190              :         else
    2191         1221 :             vacuum_index_cleanup =
    2192         1221 :                 ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
    2193              : 
    2194       116646 :         if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
    2195       116618 :             params.index_cleanup = VACOPTVALUE_AUTO;
    2196           28 :         else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
    2197           14 :             params.index_cleanup = VACOPTVALUE_ENABLED;
    2198              :         else
    2199              :         {
    2200              :             Assert(vacuum_index_cleanup ==
    2201              :                    STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
    2202           14 :             params.index_cleanup = VACOPTVALUE_DISABLED;
    2203              :         }
    2204              :     }
    2205              : 
    2206              : #ifdef USE_INJECTION_POINTS
    2207       116789 :     if (params.index_cleanup == VACOPTVALUE_AUTO)
    2208       116622 :         INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
    2209          167 :     else if (params.index_cleanup == VACOPTVALUE_DISABLED)
    2210          144 :         INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
    2211           23 :     else if (params.index_cleanup == VACOPTVALUE_ENABLED)
    2212           23 :         INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
    2213              : #endif
    2214              : 
    2215              :     /*
    2216              :      * Check if the vacuum_max_eager_freeze_failure_rate table storage
    2217              :      * parameter was specified. This overrides the GUC value.
    2218              :      */
    2219       116789 :     if (rel->rd_options != NULL &&
    2220         1229 :         ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
    2221            0 :         params.max_eager_freeze_failure_rate =
    2222            0 :             ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
    2223              : 
    2224              :     /*
    2225              :      * Set truncate option based on truncate reloption or GUC if it wasn't
    2226              :      * specified in VACUUM command, or when running in an autovacuum worker
    2227              :      */
    2228       116789 :     if (params.truncate == VACOPTVALUE_UNSPECIFIED)
    2229              :     {
    2230       116650 :         StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
    2231              : 
    2232       116650 :         if (opts && opts->vacuum_truncate != PG_TERNARY_UNSET)
    2233              :         {
    2234           20 :             if (opts->vacuum_truncate == PG_TERNARY_TRUE)
    2235            6 :                 params.truncate = VACOPTVALUE_ENABLED;
    2236              :             else
    2237           14 :                 params.truncate = VACOPTVALUE_DISABLED;
    2238              :         }
    2239       116630 :         else if (vacuum_truncate)
    2240       116616 :             params.truncate = VACOPTVALUE_ENABLED;
    2241              :         else
    2242           14 :             params.truncate = VACOPTVALUE_DISABLED;
    2243              :     }
    2244              : 
    2245              : #ifdef USE_INJECTION_POINTS
    2246       116789 :     if (params.truncate == VACOPTVALUE_AUTO)
    2247            0 :         INJECTION_POINT("vacuum-truncate-auto", NULL);
    2248       116789 :     else if (params.truncate == VACOPTVALUE_DISABLED)
    2249          166 :         INJECTION_POINT("vacuum-truncate-disabled", NULL);
    2250       116623 :     else if (params.truncate == VACOPTVALUE_ENABLED)
    2251       116623 :         INJECTION_POINT("vacuum-truncate-enabled", NULL);
    2252              : #endif
    2253              : 
    2254              :     /*
    2255              :      * Remember the relation's TOAST relation for later, if the caller asked
    2256              :      * us to process it.  In VACUUM FULL, though, the toast table is
    2257              :      * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
    2258              :      * unless PROCESS_MAIN is disabled.
    2259              :      */
    2260       116789 :     if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
    2261        16835 :         ((params.options & VACOPT_FULL) == 0 ||
    2262          226 :          (params.options & VACOPT_PROCESS_MAIN) == 0))
    2263        16613 :         toast_relid = rel->rd_rel->reltoastrelid;
    2264              :     else
    2265       100176 :         toast_relid = InvalidOid;
    2266              : 
    2267              :     /*
    2268              :      * Switch to the table owner's userid, so that any index functions are run
    2269              :      * as that user.  Also lock down security-restricted operations and
    2270              :      * arrange to make GUC variable changes local to this command. (This is
    2271              :      * unnecessary, but harmless, for lazy VACUUM.)
    2272              :      */
    2273       116789 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
    2274       116789 :     SetUserIdAndSecContext(rel->rd_rel->relowner,
    2275              :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
    2276       116789 :     save_nestlevel = NewGUCNestLevel();
    2277       116789 :     RestrictSearchPath();
    2278              : 
    2279              :     /*
    2280              :      * If PROCESS_MAIN is set (the default), it's time to vacuum the main
    2281              :      * relation.  Otherwise, we can skip this part.  If processing the TOAST
    2282              :      * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
    2283              :      * to be set when we recurse to the TOAST table.
    2284              :      */
    2285       116789 :     if (params.options & VACOPT_PROCESS_MAIN)
    2286              :     {
    2287              :         /*
    2288              :          * Do the actual work --- either FULL or "lazy" vacuum
    2289              :          */
    2290       116704 :         if (params.options & VACOPT_FULL)
    2291              :         {
    2292          222 :             ClusterParams cluster_params = {0};
    2293              : 
    2294          222 :             if ((params.options & VACOPT_VERBOSE) != 0)
    2295            1 :                 cluster_params.options |= CLUOPT_VERBOSE;
    2296              : 
    2297              :             /* VACUUM FULL is a variant of REPACK; see repack.c */
    2298          222 :             cluster_rel(REPACK_COMMAND_VACUUMFULL, rel, InvalidOid,
    2299              :                         &cluster_params, isTopLevel);
    2300              :             /* cluster_rel closes the relation, but keeps lock */
    2301              : 
    2302          218 :             rel = NULL;
    2303              :         }
    2304              :         else
    2305       116482 :             table_relation_vacuum(rel, &params, bstrategy);
    2306              :     }
    2307              : 
    2308              :     /* Roll back any GUC changes executed by index functions */
    2309       116785 :     AtEOXact_GUC(false, save_nestlevel);
    2310              : 
    2311              :     /* Restore userid and security context */
    2312       116785 :     SetUserIdAndSecContext(save_userid, save_sec_context);
    2313              : 
    2314              :     /* all done with this class, but hold lock until commit */
    2315       116785 :     if (rel)
    2316       116567 :         relation_close(rel, NoLock);
    2317              : 
    2318              :     /*
    2319              :      * Complete the transaction and free all temporary memory used.
    2320              :      */
    2321       116785 :     PopActiveSnapshot();
    2322       116785 :     CommitTransactionCommand();
    2323              : 
    2324              :     /*
    2325              :      * If the relation has a secondary toast rel, vacuum that too while we
    2326              :      * still hold the session lock on the main table.  Note however that
    2327              :      * "analyze" will not get done on the toast table.  This is good, because
    2328              :      * the toaster always uses hardcoded index access and statistics are
    2329              :      * totally unimportant for toast relations.
    2330              :      */
    2331       116785 :     if (toast_relid != InvalidOid)
    2332              :     {
    2333              :         /*
    2334              :          * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it.  Likewise,
    2335              :          * set toast_parent so that the privilege checks are done on the main
    2336              :          * relation.  NB: This is only safe to do because we hold a session
    2337              :          * lock on the main relation that prevents concurrent deletion.
    2338              :          */
    2339         5808 :         toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
    2340         5808 :         toast_vacuum_params.toast_parent = relid;
    2341              : 
    2342         5808 :         vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy,
    2343              :                    isTopLevel);
    2344              :     }
    2345              : 
    2346              :     /*
    2347              :      * Now release the session-level lock on the main table.
    2348              :      */
    2349       116785 :     UnlockRelationIdForSession(&lockrelid, lmode);
    2350              : 
    2351              :     /* Report that we really did it. */
    2352       116785 :     return true;
    2353              : }
    2354              : 
    2355              : 
    2356              : /*
    2357              :  * Open all the vacuumable indexes of the given relation, obtaining the
    2358              :  * specified kind of lock on each.  Return an array of Relation pointers for
    2359              :  * the indexes into *Irel, and the number of indexes into *nindexes.
    2360              :  *
    2361              :  * We consider an index vacuumable if it is marked insertable (indisready).
    2362              :  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
    2363              :  * execution, and what we have is too corrupt to be processable.  We will
    2364              :  * vacuum even if the index isn't indisvalid; this is important because in a
    2365              :  * unique index, uniqueness checks will be performed anyway and had better not
    2366              :  * hit dangling index pointers.
    2367              :  */
    2368              : void
    2369       126096 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
    2370              :                  int *nindexes, Relation **Irel)
    2371              : {
    2372              :     List       *indexoidlist;
    2373              :     ListCell   *indexoidscan;
    2374              :     int         i;
    2375              : 
    2376              :     Assert(lockmode != NoLock);
    2377              : 
    2378       126096 :     indexoidlist = RelationGetIndexList(relation);
    2379              : 
    2380              :     /* allocate enough memory for all indexes */
    2381       126096 :     i = list_length(indexoidlist);
    2382              : 
    2383       126096 :     if (i > 0)
    2384       118430 :         *Irel = (Relation *) palloc(i * sizeof(Relation));
    2385              :     else
    2386         7666 :         *Irel = NULL;
    2387              : 
    2388              :     /* collect just the ready indexes */
    2389       126096 :     i = 0;
    2390       316847 :     foreach(indexoidscan, indexoidlist)
    2391              :     {
    2392       190751 :         Oid         indexoid = lfirst_oid(indexoidscan);
    2393              :         Relation    indrel;
    2394              : 
    2395       190751 :         indrel = index_open(indexoid, lockmode);
    2396       190751 :         if (indrel->rd_index->indisready)
    2397       190751 :             (*Irel)[i++] = indrel;
    2398              :         else
    2399            0 :             index_close(indrel, lockmode);
    2400              :     }
    2401              : 
    2402       126096 :     *nindexes = i;
    2403              : 
    2404       126096 :     list_free(indexoidlist);
    2405       126096 : }
    2406              : 
    2407              : /*
    2408              :  * Release the resources acquired by vac_open_indexes.  Optionally release
    2409              :  * the locks (say NoLock to keep 'em).
    2410              :  */
    2411              : void
    2412       126680 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
    2413              : {
    2414       126680 :     if (Irel == NULL)
    2415         8254 :         return;
    2416              : 
    2417       309169 :     while (nindexes--)
    2418              :     {
    2419       190743 :         Relation    ind = Irel[nindexes];
    2420              : 
    2421       190743 :         index_close(ind, lockmode);
    2422              :     }
    2423       118426 :     pfree(Irel);
    2424              : }
    2425              : 
    2426              : /*
    2427              :  * vacuum_delay_point --- check for interrupts and cost-based delay.
    2428              :  *
    2429              :  * This should be called in each major loop of VACUUM processing,
    2430              :  * typically once per page processed.
    2431              :  */
    2432              : void
    2433     51193451 : vacuum_delay_point(bool is_analyze)
    2434              : {
    2435     51193451 :     double      msec = 0;
    2436              : 
    2437              :     /* Always check for interrupts */
    2438     51193451 :     CHECK_FOR_INTERRUPTS();
    2439              : 
    2440     51193451 :     if (InterruptPending)
    2441            0 :         return;
    2442              : 
    2443     51193451 :     if (IsParallelWorker())
    2444              :     {
    2445              :         /*
    2446              :          * Update cost-based vacuum delay parameters for a parallel autovacuum
    2447              :          * worker if any changes are detected. It might enable cost-based
    2448              :          * delay so it needs to be called before VacuumCostActive check.
    2449              :          */
    2450          256 :         parallel_vacuum_update_shared_delay_params();
    2451              :     }
    2452              : 
    2453     51193451 :     if (!VacuumCostActive && !ConfigReloadPending)
    2454     46613655 :         return;
    2455              : 
    2456              :     /*
    2457              :      * Autovacuum workers should reload the configuration file if requested.
    2458              :      * This allows changes to [autovacuum_]vacuum_cost_limit and
    2459              :      * [autovacuum_]vacuum_cost_delay to take effect while a table is being
    2460              :      * vacuumed or analyzed.
    2461              :      */
    2462      4579796 :     if (ConfigReloadPending && AmAutoVacuumWorkerProcess())
    2463              :     {
    2464            1 :         ConfigReloadPending = false;
    2465            1 :         ProcessConfigFile(PGC_SIGHUP);
    2466            1 :         VacuumUpdateCosts();
    2467              : 
    2468              :         /*
    2469              :          * Propagate cost-based vacuum delay parameters to shared memory if
    2470              :          * any of them have changed during the config reload.
    2471              :          */
    2472            1 :         parallel_vacuum_propagate_shared_delay_params();
    2473              :     }
    2474              : 
    2475              :     /*
    2476              :      * If we disabled cost-based delays after reloading the config file,
    2477              :      * return.
    2478              :      */
    2479      4579796 :     if (!VacuumCostActive)
    2480            0 :         return;
    2481              : 
    2482              :     /*
    2483              :      * For parallel vacuum, the delay is computed based on the shared cost
    2484              :      * balance.  See compute_parallel_delay.
    2485              :      */
    2486      4579796 :     if (VacuumSharedCostBalance != NULL)
    2487          427 :         msec = compute_parallel_delay();
    2488      4579369 :     else if (VacuumCostBalance >= vacuum_cost_limit)
    2489         2664 :         msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
    2490              : 
    2491              :     /* Nap if appropriate */
    2492      4579796 :     if (msec > 0)
    2493              :     {
    2494              :         instr_time  delay_start;
    2495              : 
    2496         2685 :         if (msec > vacuum_cost_delay * 4)
    2497            5 :             msec = vacuum_cost_delay * 4;
    2498              : 
    2499         2685 :         if (track_cost_delay_timing)
    2500            0 :             INSTR_TIME_SET_CURRENT(delay_start);
    2501              : 
    2502         2685 :         pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
    2503         2685 :         pg_usleep(msec * 1000);
    2504         2685 :         pgstat_report_wait_end();
    2505              : 
    2506         2685 :         if (track_cost_delay_timing)
    2507              :         {
    2508              :             instr_time  delay_end;
    2509              :             instr_time  delay;
    2510              : 
    2511            0 :             INSTR_TIME_SET_CURRENT(delay_end);
    2512            0 :             INSTR_TIME_SET_ZERO(delay);
    2513            0 :             INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start);
    2514              : 
    2515              :             /*
    2516              :              * For parallel workers, we only report the delay time every once
    2517              :              * in a while to avoid overloading the leader with messages and
    2518              :              * interrupts.
    2519              :              */
    2520            0 :             if (IsParallelWorker())
    2521              :             {
    2522              :                 static instr_time last_report_time;
    2523              :                 instr_time  time_since_last_report;
    2524              : 
    2525              :                 Assert(!is_analyze);
    2526              : 
    2527              :                 /* Accumulate the delay time */
    2528            0 :                 parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay);
    2529              : 
    2530              :                 /* Calculate interval since last report */
    2531            0 :                 INSTR_TIME_SET_ZERO(time_since_last_report);
    2532            0 :                 INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time);
    2533              : 
    2534              :                 /* If we haven't reported in a while, do so now */
    2535            0 :                 if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >=
    2536              :                     PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS)
    2537              :                 {
    2538            0 :                     pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME,
    2539              :                                                         parallel_vacuum_worker_delay_ns);
    2540              : 
    2541              :                     /* Reset variables */
    2542            0 :                     last_report_time = delay_end;
    2543            0 :                     parallel_vacuum_worker_delay_ns = 0;
    2544              :                 }
    2545              :             }
    2546            0 :             else if (is_analyze)
    2547            0 :                 pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME,
    2548            0 :                                            INSTR_TIME_GET_NANOSEC(delay));
    2549              :             else
    2550            0 :                 pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME,
    2551            0 :                                            INSTR_TIME_GET_NANOSEC(delay));
    2552              :         }
    2553              : 
    2554              :         /*
    2555              :          * We don't want to ignore postmaster death during very long vacuums
    2556              :          * with vacuum_cost_delay configured.  We can't use the usual
    2557              :          * WaitLatch() approach here because we want microsecond-based sleep
    2558              :          * durations above.
    2559              :          */
    2560         2685 :         if (IsUnderPostmaster && !PostmasterIsAlive())
    2561            0 :             exit(1);
    2562              : 
    2563         2685 :         VacuumCostBalance = 0;
    2564              : 
    2565              :         /*
    2566              :          * Balance and update limit values for autovacuum workers. We must do
    2567              :          * this periodically, as the number of workers across which we are
    2568              :          * balancing the limit may have changed.
    2569              :          *
    2570              :          * TODO: There may be better criteria for determining when to do this
    2571              :          * besides "check after napping".
    2572              :          */
    2573         2685 :         AutoVacuumUpdateCostLimit();
    2574              : 
    2575              :         /* Might have gotten an interrupt while sleeping */
    2576         2685 :         CHECK_FOR_INTERRUPTS();
    2577              :     }
    2578              : }
    2579              : 
    2580              : /*
    2581              :  * Computes the vacuum delay for parallel workers.
    2582              :  *
    2583              :  * The basic idea of a cost-based delay for parallel vacuum is to allow each
    2584              :  * worker to sleep in proportion to the share of work it's done.  We achieve this
    2585              :  * by allowing all parallel vacuum workers including the leader process to
    2586              :  * have a shared view of cost related parameters (mainly VacuumCostBalance).
    2587              :  * We allow each worker to update it as and when it has incurred any cost and
    2588              :  * then based on that decide whether it needs to sleep.  We compute the time
    2589              :  * to sleep for a worker based on the cost it has incurred
    2590              :  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
    2591              :  * that amount.  This avoids putting to sleep those workers which have done less
    2592              :  * I/O than other workers and therefore ensure that workers
    2593              :  * which are doing more I/O got throttled more.
    2594              :  *
    2595              :  * We allow a worker to sleep only if it has performed I/O above a certain
    2596              :  * threshold, which is calculated based on the number of active workers
    2597              :  * (VacuumActiveNWorkers), and the overall cost balance is more than
    2598              :  * VacuumCostLimit set by the system.  Testing reveals that we achieve
    2599              :  * the required throttling if we force a worker that has done more than 50%
    2600              :  * of its share of work to sleep.
    2601              :  */
    2602              : static double
    2603          427 : compute_parallel_delay(void)
    2604              : {
    2605          427 :     double      msec = 0;
    2606              :     uint32      shared_balance;
    2607              :     int         nworkers;
    2608              : 
    2609              :     /* Parallel vacuum must be active */
    2610              :     Assert(VacuumSharedCostBalance);
    2611              : 
    2612          427 :     nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
    2613              : 
    2614              :     /* At least count itself */
    2615              :     Assert(nworkers >= 1);
    2616              : 
    2617              :     /* Update the shared cost balance value atomically */
    2618          427 :     shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
    2619              : 
    2620              :     /* Compute the total local balance for the current worker */
    2621          427 :     VacuumCostBalanceLocal += VacuumCostBalance;
    2622              : 
    2623          427 :     if ((shared_balance >= vacuum_cost_limit) &&
    2624           85 :         (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
    2625              :     {
    2626              :         /* Compute sleep time based on the local cost balance */
    2627           21 :         msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
    2628           21 :         pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
    2629           21 :         VacuumCostBalanceLocal = 0;
    2630              :     }
    2631              : 
    2632              :     /*
    2633              :      * Reset the local balance as we accumulated it into the shared value.
    2634              :      */
    2635          427 :     VacuumCostBalance = 0;
    2636              : 
    2637          427 :     return msec;
    2638              : }
    2639              : 
    2640              : /*
    2641              :  * A wrapper function of defGetBoolean().
    2642              :  *
    2643              :  * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
    2644              :  * of true and false.
    2645              :  */
    2646              : static VacOptValue
    2647          182 : get_vacoptval_from_boolean(DefElem *def)
    2648              : {
    2649          182 :     return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
    2650              : }
    2651              : 
    2652              : /*
    2653              :  *  vac_bulkdel_one_index() -- bulk-deletion for index relation.
    2654              :  *
    2655              :  * Returns bulk delete stats derived from input stats
    2656              :  */
    2657              : IndexBulkDeleteResult *
    2658         1479 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
    2659              :                       TidStore *dead_items, VacDeadItemsInfo *dead_items_info)
    2660              : {
    2661              :     /* Do bulk deletion */
    2662         1479 :     istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
    2663              :                               dead_items);
    2664              : 
    2665         1479 :     ereport(ivinfo->message_level,
    2666              :             (errmsg("scanned index \"%s\" to remove %" PRId64 " row versions",
    2667              :                     RelationGetRelationName(ivinfo->index),
    2668              :                     dead_items_info->num_items)));
    2669              : 
    2670         1479 :     return istat;
    2671              : }
    2672              : 
    2673              : /*
    2674              :  *  vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
    2675              :  *
    2676              :  * Returns bulk delete stats derived from input stats
    2677              :  */
    2678              : IndexBulkDeleteResult *
    2679       140310 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
    2680              : {
    2681       140310 :     istat = index_vacuum_cleanup(ivinfo, istat);
    2682              : 
    2683       140310 :     if (istat)
    2684         1632 :         ereport(ivinfo->message_level,
    2685              :                 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
    2686              :                         RelationGetRelationName(ivinfo->index),
    2687              :                         istat->num_index_tuples,
    2688              :                         istat->num_pages),
    2689              :                  errdetail("%.0f index row versions were removed.\n"
    2690              :                            "%u index pages were newly deleted.\n"
    2691              :                            "%u index pages are currently deleted, of which %u are currently reusable.",
    2692              :                            istat->tuples_removed,
    2693              :                            istat->pages_newly_deleted,
    2694              :                            istat->pages_deleted, istat->pages_free)));
    2695              : 
    2696       140310 :     return istat;
    2697              : }
    2698              : 
    2699              : /*
    2700              :  *  vac_tid_reaped() -- is a particular tid deletable?
    2701              :  *
    2702              :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2703              :  */
    2704              : static bool
    2705      3918623 : vac_tid_reaped(ItemPointer itemptr, void *state)
    2706              : {
    2707      3918623 :     TidStore   *dead_items = (TidStore *) state;
    2708              : 
    2709      3918623 :     return TidStoreIsMember(dead_items, itemptr);
    2710              : }
        

Generated by: LCOV version 2.0-1