LCOV - code coverage report
Current view: top level - src/backend/commands - vacuum.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16beta1 Lines: 661 735 89.9 %
Date: 2023-05-30 18:12:27 Functions: 23 24 95.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuum.c
       4             :  *    The postgres vacuum cleaner.
       5             :  *
       6             :  * This file includes (a) control and dispatch code for VACUUM and ANALYZE
       7             :  * commands, (b) code to compute various vacuum thresholds, and (c) index
       8             :  * vacuum code.
       9             :  *
      10             :  * VACUUM for heap AM is implemented in vacuumlazy.c, parallel vacuum in
      11             :  * vacuumparallel.c, ANALYZE in analyze.c, and VACUUM FULL is a variant of
      12             :  * CLUSTER, handled in cluster.c.
      13             :  *
      14             :  *
      15             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      16             :  * Portions Copyright (c) 1994, Regents of the University of California
      17             :  *
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/commands/vacuum.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "access/clog.h"
      29             : #include "access/commit_ts.h"
      30             : #include "access/genam.h"
      31             : #include "access/heapam.h"
      32             : #include "access/htup_details.h"
      33             : #include "access/multixact.h"
      34             : #include "access/tableam.h"
      35             : #include "access/transam.h"
      36             : #include "access/xact.h"
      37             : #include "catalog/namespace.h"
      38             : #include "catalog/index.h"
      39             : #include "catalog/pg_database.h"
      40             : #include "catalog/pg_inherits.h"
      41             : #include "catalog/pg_namespace.h"
      42             : #include "commands/cluster.h"
      43             : #include "commands/defrem.h"
      44             : #include "commands/tablecmds.h"
      45             : #include "commands/vacuum.h"
      46             : #include "miscadmin.h"
      47             : #include "nodes/makefuncs.h"
      48             : #include "pgstat.h"
      49             : #include "postmaster/autovacuum.h"
      50             : #include "postmaster/bgworker_internals.h"
      51             : #include "postmaster/interrupt.h"
      52             : #include "storage/bufmgr.h"
      53             : #include "storage/lmgr.h"
      54             : #include "storage/pmsignal.h"
      55             : #include "storage/proc.h"
      56             : #include "storage/procarray.h"
      57             : #include "utils/acl.h"
      58             : #include "utils/fmgroids.h"
      59             : #include "utils/guc.h"
      60             : #include "utils/guc_hooks.h"
      61             : #include "utils/memutils.h"
      62             : #include "utils/pg_rusage.h"
      63             : #include "utils/snapmgr.h"
      64             : #include "utils/syscache.h"
      65             : 
      66             : 
      67             : /*
      68             :  * GUC parameters
      69             :  */
      70             : int         vacuum_freeze_min_age;
      71             : int         vacuum_freeze_table_age;
      72             : int         vacuum_multixact_freeze_min_age;
      73             : int         vacuum_multixact_freeze_table_age;
      74             : int         vacuum_failsafe_age;
      75             : int         vacuum_multixact_failsafe_age;
      76             : 
      77             : /*
      78             :  * Variables for cost-based vacuum delay. The defaults differ between
      79             :  * autovacuum and vacuum. They should be set with the appropriate GUC value in
      80             :  * vacuum code. They are initialized here to the defaults for client backends
      81             :  * executing VACUUM or ANALYZE.
      82             :  */
      83             : double      vacuum_cost_delay = 0;
      84             : int         vacuum_cost_limit = 200;
      85             : 
      86             : /*
      87             :  * VacuumFailsafeActive is a defined as a global so that we can determine
      88             :  * whether or not to re-enable cost-based vacuum delay when vacuuming a table.
      89             :  * If failsafe mode has been engaged, we will not re-enable cost-based delay
      90             :  * for the table until after vacuuming has completed, regardless of other
      91             :  * settings.
      92             :  *
      93             :  * Only VACUUM code should inspect this variable and only table access methods
      94             :  * should set it to true. In Table AM-agnostic VACUUM code, this variable is
      95             :  * inspected to determine whether or not to allow cost-based delays. Table AMs
      96             :  * are free to set it if they desire this behavior, but it is false by default
      97             :  * and reset to false in between vacuuming each relation.
      98             :  */
      99             : bool        VacuumFailsafeActive = false;
     100             : 
     101             : /*
     102             :  * Variables for cost-based parallel vacuum.  See comments atop
     103             :  * compute_parallel_delay to understand how it works.
     104             :  */
     105             : pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
     106             : pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
     107             : int         VacuumCostBalanceLocal = 0;
     108             : 
     109             : /* non-export function prototypes */
     110             : static List *expand_vacuum_rel(VacuumRelation *vrel,
     111             :                                MemoryContext vac_context, int options);
     112             : static List *get_all_vacuum_rels(MemoryContext vac_context, int options);
     113             : static void vac_truncate_clog(TransactionId frozenXID,
     114             :                               MultiXactId minMulti,
     115             :                               TransactionId lastSaneFrozenXid,
     116             :                               MultiXactId lastSaneMinMulti);
     117             : static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
     118             :                        bool skip_privs, BufferAccessStrategy bstrategy);
     119             : static double compute_parallel_delay(void);
     120             : static VacOptValue get_vacoptval_from_boolean(DefElem *def);
     121             : static bool vac_tid_reaped(ItemPointer itemptr, void *state);
     122             : static int  vac_cmp_itemptr(const void *left, const void *right);
     123             : 
     124             : /*
     125             :  * GUC check function to ensure GUC value specified is within the allowable
     126             :  * range.
     127             :  */
     128             : bool
     129        3698 : check_vacuum_buffer_usage_limit(int *newval, void **extra,
     130             :                                 GucSource source)
     131             : {
     132             :     /* Value upper and lower hard limits are inclusive */
     133        3698 :     if (*newval == 0 || (*newval >= MIN_BAS_VAC_RING_SIZE_KB &&
     134        3698 :                          *newval <= MAX_BAS_VAC_RING_SIZE_KB))
     135        3698 :         return true;
     136             : 
     137             :     /* Value does not fall within any allowable range */
     138           0 :     GUC_check_errdetail("\"vacuum_buffer_usage_limit\" must be 0 or between %d kB and %d kB",
     139             :                         MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB);
     140             : 
     141           0 :     return false;
     142             : }
     143             : 
     144             : /*
     145             :  * Primary entry point for manual VACUUM and ANALYZE commands
     146             :  *
     147             :  * This is mainly a preparation wrapper for the real operations that will
     148             :  * happen in vacuum().
     149             :  */
     150             : void
     151       10064 : ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
     152             : {
     153             :     VacuumParams params;
     154       10064 :     BufferAccessStrategy bstrategy = NULL;
     155       10064 :     bool        verbose = false;
     156       10064 :     bool        skip_locked = false;
     157       10064 :     bool        analyze = false;
     158       10064 :     bool        freeze = false;
     159       10064 :     bool        full = false;
     160       10064 :     bool        disable_page_skipping = false;
     161       10064 :     bool        process_main = true;
     162       10064 :     bool        process_toast = true;
     163             :     int         ring_size;
     164       10064 :     bool        skip_database_stats = false;
     165       10064 :     bool        only_database_stats = false;
     166             :     MemoryContext vac_context;
     167             :     ListCell   *lc;
     168             : 
     169             :     /* index_cleanup and truncate values unspecified for now */
     170       10064 :     params.index_cleanup = VACOPTVALUE_UNSPECIFIED;
     171       10064 :     params.truncate = VACOPTVALUE_UNSPECIFIED;
     172             : 
     173             :     /* By default parallel vacuum is enabled */
     174       10064 :     params.nworkers = 0;
     175             : 
     176             :     /*
     177             :      * Set this to an invalid value so it is clear whether or not a
     178             :      * BUFFER_USAGE_LIMIT was specified when making the access strategy.
     179             :      */
     180       10064 :     ring_size = -1;
     181             : 
     182             :     /* Parse options list */
     183       16574 :     foreach(lc, vacstmt->options)
     184             :     {
     185        6546 :         DefElem    *opt = (DefElem *) lfirst(lc);
     186             : 
     187             :         /* Parse common options for VACUUM and ANALYZE */
     188        6546 :         if (strcmp(opt->defname, "verbose") == 0)
     189          28 :             verbose = defGetBoolean(opt);
     190        6518 :         else if (strcmp(opt->defname, "skip_locked") == 0)
     191         334 :             skip_locked = defGetBoolean(opt);
     192        6184 :         else if (strcmp(opt->defname, "buffer_usage_limit") == 0)
     193             :         {
     194             :             const char *hintmsg;
     195             :             int         result;
     196             :             char       *vac_buffer_size;
     197             : 
     198          54 :             vac_buffer_size = defGetString(opt);
     199             : 
     200             :             /*
     201             :              * Check that the specified value is valid and the size falls
     202             :              * within the hard upper and lower limits if it is not 0.
     203             :              */
     204          54 :             if (!parse_int(vac_buffer_size, &result, GUC_UNIT_KB, &hintmsg) ||
     205          48 :                 (result != 0 &&
     206          36 :                  (result < MIN_BAS_VAC_RING_SIZE_KB || result > MAX_BAS_VAC_RING_SIZE_KB)))
     207             :             {
     208          18 :                 ereport(ERROR,
     209             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     210             :                          errmsg("BUFFER_USAGE_LIMIT option must be 0 or between %d kB and %d kB",
     211             :                                 MIN_BAS_VAC_RING_SIZE_KB, MAX_BAS_VAC_RING_SIZE_KB),
     212             :                          hintmsg ? errhint("%s", _(hintmsg)) : 0));
     213             :             }
     214             : 
     215          36 :             ring_size = result;
     216             :         }
     217        6130 :         else if (!vacstmt->is_vacuumcmd)
     218           6 :             ereport(ERROR,
     219             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     220             :                      errmsg("unrecognized ANALYZE option \"%s\"", opt->defname),
     221             :                      parser_errposition(pstate, opt->location)));
     222             : 
     223             :         /* Parse options available on VACUUM */
     224        6124 :         else if (strcmp(opt->defname, "analyze") == 0)
     225         820 :             analyze = defGetBoolean(opt);
     226        5304 :         else if (strcmp(opt->defname, "freeze") == 0)
     227        1120 :             freeze = defGetBoolean(opt);
     228        4184 :         else if (strcmp(opt->defname, "full") == 0)
     229         346 :             full = defGetBoolean(opt);
     230        3838 :         else if (strcmp(opt->defname, "disable_page_skipping") == 0)
     231         180 :             disable_page_skipping = defGetBoolean(opt);
     232        3658 :         else if (strcmp(opt->defname, "index_cleanup") == 0)
     233             :         {
     234             :             /* Interpret no string as the default, which is 'auto' */
     235         172 :             if (!opt->arg)
     236           0 :                 params.index_cleanup = VACOPTVALUE_AUTO;
     237             :             else
     238             :             {
     239         172 :                 char       *sval = defGetString(opt);
     240             : 
     241             :                 /* Try matching on 'auto' string, or fall back on boolean */
     242         172 :                 if (pg_strcasecmp(sval, "auto") == 0)
     243           6 :                     params.index_cleanup = VACOPTVALUE_AUTO;
     244             :                 else
     245         166 :                     params.index_cleanup = get_vacoptval_from_boolean(opt);
     246             :             }
     247             :         }
     248        3486 :         else if (strcmp(opt->defname, "process_main") == 0)
     249         154 :             process_main = defGetBoolean(opt);
     250        3332 :         else if (strcmp(opt->defname, "process_toast") == 0)
     251         160 :             process_toast = defGetBoolean(opt);
     252        3172 :         else if (strcmp(opt->defname, "truncate") == 0)
     253         148 :             params.truncate = get_vacoptval_from_boolean(opt);
     254        3024 :         else if (strcmp(opt->defname, "parallel") == 0)
     255             :         {
     256         338 :             if (opt->arg == NULL)
     257             :             {
     258           6 :                 ereport(ERROR,
     259             :                         (errcode(ERRCODE_SYNTAX_ERROR),
     260             :                          errmsg("parallel option requires a value between 0 and %d",
     261             :                                 MAX_PARALLEL_WORKER_LIMIT),
     262             :                          parser_errposition(pstate, opt->location)));
     263             :             }
     264             :             else
     265             :             {
     266             :                 int         nworkers;
     267             : 
     268         332 :                 nworkers = defGetInt32(opt);
     269         332 :                 if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
     270           6 :                     ereport(ERROR,
     271             :                             (errcode(ERRCODE_SYNTAX_ERROR),
     272             :                              errmsg("parallel workers for vacuum must be between 0 and %d",
     273             :                                     MAX_PARALLEL_WORKER_LIMIT),
     274             :                              parser_errposition(pstate, opt->location)));
     275             : 
     276             :                 /*
     277             :                  * Disable parallel vacuum, if user has specified parallel
     278             :                  * degree as zero.
     279             :                  */
     280         326 :                 if (nworkers == 0)
     281         154 :                     params.nworkers = -1;
     282             :                 else
     283         172 :                     params.nworkers = nworkers;
     284             :             }
     285             :         }
     286        2686 :         else if (strcmp(opt->defname, "skip_database_stats") == 0)
     287        2604 :             skip_database_stats = defGetBoolean(opt);
     288          82 :         else if (strcmp(opt->defname, "only_database_stats") == 0)
     289          82 :             only_database_stats = defGetBoolean(opt);
     290             :         else
     291           0 :             ereport(ERROR,
     292             :                     (errcode(ERRCODE_SYNTAX_ERROR),
     293             :                      errmsg("unrecognized VACUUM option \"%s\"", opt->defname),
     294             :                      parser_errposition(pstate, opt->location)));
     295             :     }
     296             : 
     297             :     /* Set vacuum options */
     298       10028 :     params.options =
     299       10028 :         (vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE) |
     300       10028 :         (verbose ? VACOPT_VERBOSE : 0) |
     301       10028 :         (skip_locked ? VACOPT_SKIP_LOCKED : 0) |
     302       10028 :         (analyze ? VACOPT_ANALYZE : 0) |
     303       10028 :         (freeze ? VACOPT_FREEZE : 0) |
     304       10028 :         (full ? VACOPT_FULL : 0) |
     305       10028 :         (disable_page_skipping ? VACOPT_DISABLE_PAGE_SKIPPING : 0) |
     306       10028 :         (process_main ? VACOPT_PROCESS_MAIN : 0) |
     307       10028 :         (process_toast ? VACOPT_PROCESS_TOAST : 0) |
     308       10028 :         (skip_database_stats ? VACOPT_SKIP_DATABASE_STATS : 0) |
     309       10028 :         (only_database_stats ? VACOPT_ONLY_DATABASE_STATS : 0);
     310             : 
     311             :     /* sanity checks on options */
     312             :     Assert(params.options & (VACOPT_VACUUM | VACOPT_ANALYZE));
     313             :     Assert((params.options & VACOPT_VACUUM) ||
     314             :            !(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
     315             : 
     316       10028 :     if ((params.options & VACOPT_FULL) && params.nworkers > 0)
     317           6 :         ereport(ERROR,
     318             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     319             :                  errmsg("VACUUM FULL cannot be performed in parallel")));
     320             : 
     321             :     /*
     322             :      * BUFFER_USAGE_LIMIT does nothing for VACUUM (FULL) so just raise an
     323             :      * ERROR for that case.  VACUUM (FULL, ANALYZE) does make use of it, so
     324             :      * we'll permit that.
     325             :      */
     326       10022 :     if (ring_size != -1 && (params.options & VACOPT_FULL) &&
     327           6 :         !(params.options & VACOPT_ANALYZE))
     328           6 :         ereport(ERROR,
     329             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     330             :                  errmsg("BUFFER_USAGE_LIMIT cannot be specified for VACUUM FULL")));
     331             : 
     332             :     /*
     333             :      * Make sure VACOPT_ANALYZE is specified if any column lists are present.
     334             :      */
     335       10016 :     if (!(params.options & VACOPT_ANALYZE))
     336             :     {
     337        7978 :         foreach(lc, vacstmt->rels)
     338             :         {
     339        3668 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     340             : 
     341        3668 :             if (vrel->va_cols != NIL)
     342           6 :                 ereport(ERROR,
     343             :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     344             :                          errmsg("ANALYZE option must be specified when a column list is provided")));
     345             :         }
     346             :     }
     347             : 
     348             : 
     349             :     /*
     350             :      * Sanity check DISABLE_PAGE_SKIPPING option.
     351             :      */
     352       10010 :     if ((params.options & VACOPT_FULL) != 0 &&
     353         322 :         (params.options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
     354           0 :         ereport(ERROR,
     355             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     356             :                  errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
     357             : 
     358             :     /* sanity check for PROCESS_TOAST */
     359       10010 :     if ((params.options & VACOPT_FULL) != 0 &&
     360         322 :         (params.options & VACOPT_PROCESS_TOAST) == 0)
     361           6 :         ereport(ERROR,
     362             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     363             :                  errmsg("PROCESS_TOAST required with VACUUM FULL")));
     364             : 
     365             :     /* sanity check for ONLY_DATABASE_STATS */
     366       10004 :     if (params.options & VACOPT_ONLY_DATABASE_STATS)
     367             :     {
     368             :         Assert(params.options & VACOPT_VACUUM);
     369          82 :         if (vacstmt->rels != NIL)
     370           6 :             ereport(ERROR,
     371             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     372             :                      errmsg("ONLY_DATABASE_STATS cannot be specified with a list of tables")));
     373             :         /* don't require people to turn off PROCESS_TOAST/MAIN explicitly */
     374          76 :         if (params.options & ~(VACOPT_VACUUM |
     375             :                                VACOPT_VERBOSE |
     376             :                                VACOPT_PROCESS_MAIN |
     377             :                                VACOPT_PROCESS_TOAST |
     378             :                                VACOPT_ONLY_DATABASE_STATS))
     379           0 :             ereport(ERROR,
     380             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     381             :                      errmsg("ONLY_DATABASE_STATS cannot be specified with other VACUUM options")));
     382             :     }
     383             : 
     384             :     /*
     385             :      * All freeze ages are zero if the FREEZE option is given; otherwise pass
     386             :      * them as -1 which means to use the default values.
     387             :      */
     388        9998 :     if (params.options & VACOPT_FREEZE)
     389             :     {
     390        1120 :         params.freeze_min_age = 0;
     391        1120 :         params.freeze_table_age = 0;
     392        1120 :         params.multixact_freeze_min_age = 0;
     393        1120 :         params.multixact_freeze_table_age = 0;
     394             :     }
     395             :     else
     396             :     {
     397        8878 :         params.freeze_min_age = -1;
     398        8878 :         params.freeze_table_age = -1;
     399        8878 :         params.multixact_freeze_min_age = -1;
     400        8878 :         params.multixact_freeze_table_age = -1;
     401             :     }
     402             : 
     403             :     /* user-invoked vacuum is never "for wraparound" */
     404        9998 :     params.is_wraparound = false;
     405             : 
     406             :     /* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
     407        9998 :     params.log_min_duration = -1;
     408             : 
     409             :     /*
     410             :      * Create special memory context for cross-transaction storage.
     411             :      *
     412             :      * Since it is a child of PortalContext, it will go away eventually even
     413             :      * if we suffer an error; there's no need for special abort cleanup logic.
     414             :      */
     415        9998 :     vac_context = AllocSetContextCreate(PortalContext,
     416             :                                         "Vacuum",
     417             :                                         ALLOCSET_DEFAULT_SIZES);
     418             : 
     419             :     /*
     420             :      * Make a buffer strategy object in the cross-transaction memory context.
     421             :      * We needn't bother making this for VACUUM (FULL) or VACUUM
     422             :      * (ONLY_DATABASE_STATS) as they'll not make use of it.  VACUUM (FULL,
     423             :      * ANALYZE) is possible, so we'd better ensure that we make a strategy
     424             :      * when we see ANALYZE.
     425             :      */
     426        9998 :     if ((params.options & (VACOPT_ONLY_DATABASE_STATS |
     427         392 :                            VACOPT_FULL)) == 0 ||
     428         392 :         (params.options & VACOPT_ANALYZE) != 0)
     429             :     {
     430             : 
     431        9612 :         MemoryContext old_context = MemoryContextSwitchTo(vac_context);
     432             : 
     433             :         Assert(ring_size >= -1);
     434             : 
     435             :         /*
     436             :          * If BUFFER_USAGE_LIMIT was specified by the VACUUM or ANALYZE
     437             :          * command, it overrides the value of VacuumBufferUsageLimit.  Either
     438             :          * value may be 0, in which case GetAccessStrategyWithSize() will
     439             :          * return NULL, effectively allowing full use of shared buffers.
     440             :          */
     441        9612 :         if (ring_size == -1)
     442        9582 :             ring_size = VacuumBufferUsageLimit;
     443             : 
     444        9612 :         bstrategy = GetAccessStrategyWithSize(BAS_VACUUM, ring_size);
     445             : 
     446        9612 :         MemoryContextSwitchTo(old_context);
     447             :     }
     448             : 
     449             :     /* Now go through the common routine */
     450        9998 :     vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
     451             : 
     452             :     /* Finally, clean up the vacuum memory context */
     453        9872 :     MemoryContextDelete(vac_context);
     454        9872 : }
     455             : 
     456             : /*
     457             :  * Internal entry point for autovacuum and the VACUUM / ANALYZE commands.
     458             :  *
     459             :  * relations, if not NIL, is a list of VacuumRelation to process; otherwise,
     460             :  * we process all relevant tables in the database.  For each VacuumRelation,
     461             :  * if a valid OID is supplied, the table with that OID is what to process;
     462             :  * otherwise, the VacuumRelation's RangeVar indicates what to process.
     463             :  *
     464             :  * params contains a set of parameters that can be used to customize the
     465             :  * behavior.
     466             :  *
     467             :  * bstrategy may be passed in as NULL when the caller does not want to
     468             :  * restrict the number of shared_buffers that VACUUM / ANALYZE can use,
     469             :  * otherwise, the caller must build a BufferAccessStrategy with the number of
     470             :  * shared_buffers that VACUUM / ANALYZE should try to limit themselves to
     471             :  * using.
     472             :  *
     473             :  * isTopLevel should be passed down from ProcessUtility.
     474             :  *
     475             :  * It is the caller's responsibility that all parameters are allocated in a
     476             :  * memory context that will not disappear at transaction commit.
     477             :  */
     478             : void
     479       10358 : vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
     480             :        MemoryContext vac_context, bool isTopLevel)
     481             : {
     482             :     static bool in_vacuum = false;
     483             : 
     484             :     const char *stmttype;
     485             :     volatile bool in_outer_xact,
     486             :                 use_own_xacts;
     487             : 
     488             :     Assert(params != NULL);
     489             : 
     490       10358 :     stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
     491             : 
     492             :     /*
     493             :      * We cannot run VACUUM inside a user transaction block; if we were inside
     494             :      * a transaction, then our commit- and start-transaction-command calls
     495             :      * would not have the intended effect!  There are numerous other subtle
     496             :      * dependencies on this, too.
     497             :      *
     498             :      * ANALYZE (without VACUUM) can run either way.
     499             :      */
     500       10358 :     if (params->options & VACOPT_VACUUM)
     501             :     {
     502        5304 :         PreventInTransactionBlock(isTopLevel, stmttype);
     503        5292 :         in_outer_xact = false;
     504             :     }
     505             :     else
     506        5054 :         in_outer_xact = IsInTransactionBlock(isTopLevel);
     507             : 
     508             :     /*
     509             :      * Check for and disallow recursive calls.  This could happen when VACUUM
     510             :      * FULL or ANALYZE calls a hostile index expression that itself calls
     511             :      * ANALYZE.
     512             :      */
     513       10346 :     if (in_vacuum)
     514          12 :         ereport(ERROR,
     515             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     516             :                  errmsg("%s cannot be executed from VACUUM or ANALYZE",
     517             :                         stmttype)));
     518             : 
     519             :     /*
     520             :      * Build list of relation(s) to process, putting any new data in
     521             :      * vac_context for safekeeping.
     522             :      */
     523       10334 :     if (params->options & VACOPT_ONLY_DATABASE_STATS)
     524             :     {
     525             :         /* We don't process any tables in this case */
     526             :         Assert(relations == NIL);
     527             :     }
     528       10258 :     else if (relations != NIL)
     529             :     {
     530        9044 :         List       *newrels = NIL;
     531             :         ListCell   *lc;
     532             : 
     533       18176 :         foreach(lc, relations)
     534             :         {
     535        9168 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, lc);
     536             :             List       *sublist;
     537             :             MemoryContext old_context;
     538             : 
     539        9168 :             sublist = expand_vacuum_rel(vrel, vac_context, params->options);
     540        9132 :             old_context = MemoryContextSwitchTo(vac_context);
     541        9132 :             newrels = list_concat(newrels, sublist);
     542        9132 :             MemoryContextSwitchTo(old_context);
     543             :         }
     544        9008 :         relations = newrels;
     545             :     }
     546             :     else
     547        1214 :         relations = get_all_vacuum_rels(vac_context, params->options);
     548             : 
     549             :     /*
     550             :      * Decide whether we need to start/commit our own transactions.
     551             :      *
     552             :      * For VACUUM (with or without ANALYZE): always do so, so that we can
     553             :      * release locks as soon as possible.  (We could possibly use the outer
     554             :      * transaction for a one-table VACUUM, but handling TOAST tables would be
     555             :      * problematic.)
     556             :      *
     557             :      * For ANALYZE (no VACUUM): if inside a transaction block, we cannot
     558             :      * start/commit our own transactions.  Also, there's no need to do so if
     559             :      * only processing one relation.  For multiple relations when not within a
     560             :      * transaction block, and also in an autovacuum worker, use own
     561             :      * transactions so we can release locks sooner.
     562             :      */
     563       10298 :     if (params->options & VACOPT_VACUUM)
     564        5280 :         use_own_xacts = true;
     565             :     else
     566             :     {
     567             :         Assert(params->options & VACOPT_ANALYZE);
     568        5018 :         if (IsAutoVacuumWorkerProcess())
     569         174 :             use_own_xacts = true;
     570        4844 :         else if (in_outer_xact)
     571         208 :             use_own_xacts = false;
     572        4636 :         else if (list_length(relations) > 1)
     573        1152 :             use_own_xacts = true;
     574             :         else
     575        3484 :             use_own_xacts = false;
     576             :     }
     577             : 
     578             :     /*
     579             :      * vacuum_rel expects to be entered with no transaction active; it will
     580             :      * start and commit its own transaction.  But we are called by an SQL
     581             :      * command, and so we are executing inside a transaction already. We
     582             :      * commit the transaction started in PostgresMain() here, and start
     583             :      * another one before exiting to match the commit waiting for us back in
     584             :      * PostgresMain().
     585             :      */
     586       10298 :     if (use_own_xacts)
     587             :     {
     588             :         Assert(!in_outer_xact);
     589             : 
     590             :         /* ActiveSnapshot is not set by autovacuum */
     591        6606 :         if (ActiveSnapshotSet())
     592        6246 :             PopActiveSnapshot();
     593             : 
     594             :         /* matches the StartTransaction in PostgresMain() */
     595        6606 :         CommitTransactionCommand();
     596             :     }
     597             : 
     598             :     /* Turn vacuum cost accounting on or off, and set/clear in_vacuum */
     599       10298 :     PG_TRY();
     600             :     {
     601             :         ListCell   *cur;
     602             : 
     603       10298 :         in_vacuum = true;
     604       10298 :         VacuumFailsafeActive = false;
     605       10298 :         VacuumUpdateCosts();
     606       10298 :         VacuumCostBalance = 0;
     607       10298 :         VacuumPageHit = 0;
     608       10298 :         VacuumPageMiss = 0;
     609       10298 :         VacuumPageDirty = 0;
     610       10298 :         VacuumCostBalanceLocal = 0;
     611       10298 :         VacuumSharedCostBalance = NULL;
     612       10298 :         VacuumActiveNWorkers = NULL;
     613             : 
     614             :         /*
     615             :          * Loop to process each selected relation.
     616             :          */
     617      104266 :         foreach(cur, relations)
     618             :         {
     619       94034 :             VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
     620             : 
     621       94034 :             if (params->options & VACOPT_VACUUM)
     622             :             {
     623       47120 :                 if (!vacuum_rel(vrel->oid, vrel->relation, params, false,
     624             :                                 bstrategy))
     625          62 :                     continue;
     626             :             }
     627             : 
     628       93966 :             if (params->options & VACOPT_ANALYZE)
     629             :             {
     630             :                 /*
     631             :                  * If using separate xacts, start one for analyze. Otherwise,
     632             :                  * we can use the outer transaction.
     633             :                  */
     634       47960 :                 if (use_own_xacts)
     635             :                 {
     636       44312 :                     StartTransactionCommand();
     637             :                     /* functions in indexes may want a snapshot set */
     638       44312 :                     PushActiveSnapshot(GetTransactionSnapshot());
     639             :                 }
     640             : 
     641       47960 :                 analyze_rel(vrel->oid, vrel->relation, params,
     642             :                             vrel->va_cols, in_outer_xact, bstrategy);
     643             : 
     644       47900 :                 if (use_own_xacts)
     645             :                 {
     646       44274 :                     PopActiveSnapshot();
     647       44274 :                     CommitTransactionCommand();
     648             :                 }
     649             :                 else
     650             :                 {
     651             :                     /*
     652             :                      * If we're not using separate xacts, better separate the
     653             :                      * ANALYZE actions with CCIs.  This avoids trouble if user
     654             :                      * says "ANALYZE t, t".
     655             :                      */
     656        3626 :                     CommandCounterIncrement();
     657             :                 }
     658             :             }
     659             : 
     660             :             /*
     661             :              * Ensure VacuumFailsafeActive has been reset before vacuuming the
     662             :              * next relation.
     663             :              */
     664       93906 :             VacuumFailsafeActive = false;
     665             :         }
     666             :     }
     667          66 :     PG_FINALLY();
     668             :     {
     669       10298 :         in_vacuum = false;
     670       10298 :         VacuumCostActive = false;
     671       10298 :         VacuumFailsafeActive = false;
     672       10298 :         VacuumCostBalance = 0;
     673             :     }
     674       10298 :     PG_END_TRY();
     675             : 
     676             :     /*
     677             :      * Finish up processing.
     678             :      */
     679       10232 :     if (use_own_xacts)
     680             :     {
     681             :         /* here, we are not in a transaction */
     682             : 
     683             :         /*
     684             :          * This matches the CommitTransaction waiting for us in
     685             :          * PostgresMain().
     686             :          */
     687        6562 :         StartTransactionCommand();
     688             :     }
     689             : 
     690       10232 :     if ((params->options & VACOPT_VACUUM) &&
     691        5248 :         !(params->options & VACOPT_SKIP_DATABASE_STATS))
     692             :     {
     693             :         /*
     694             :          * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
     695             :          */
     696        2460 :         vac_update_datfrozenxid();
     697             :     }
     698             : 
     699       10232 : }
     700             : 
     701             : /*
     702             :  * Check if the current user has privileges to vacuum or analyze the relation.
     703             :  * If not, issue a WARNING log message and return false to let the caller
     704             :  * decide what to do with this relation.  This routine is used to decide if a
     705             :  * relation can be processed for VACUUM or ANALYZE.
     706             :  */
     707             : bool
     708      187110 : vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple,
     709             :                                  bits32 options)
     710             : {
     711             :     char       *relname;
     712             : 
     713             :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     714             : 
     715             :     /*----------
     716             :      * A role has privileges to vacuum or analyze the relation if any of the
     717             :      * following are true:
     718             :      *   - the role is a superuser
     719             :      *   - the role owns the relation
     720             :      *   - the role owns the current database and the relation is not shared
     721             :      *   - the role has been granted the MAINTAIN privilege on the relation
     722             :      *   - the role has privileges to vacuum/analyze any of the relation's
     723             :      *     partition ancestors
     724             :      *----------
     725             :      */
     726      217656 :     if ((object_ownercheck(DatabaseRelationId, MyDatabaseId, GetUserId()) && !reltuple->relisshared) ||
     727       30976 :         pg_class_aclcheck(relid, GetUserId(), ACL_MAINTAIN) == ACLCHECK_OK ||
     728         430 :         has_partition_ancestor_privs(relid, GetUserId(), ACL_MAINTAIN))
     729      186878 :         return true;
     730             : 
     731         232 :     relname = NameStr(reltuple->relname);
     732             : 
     733         232 :     if ((options & VACOPT_VACUUM) != 0)
     734             :     {
     735         152 :         ereport(WARNING,
     736             :                 (errmsg("permission denied to vacuum \"%s\", skipping it",
     737             :                         relname)));
     738             : 
     739             :         /*
     740             :          * For VACUUM ANALYZE, both logs could show up, but just generate
     741             :          * information for VACUUM as that would be the first one to be
     742             :          * processed.
     743             :          */
     744         152 :         return false;
     745             :     }
     746             : 
     747          80 :     if ((options & VACOPT_ANALYZE) != 0)
     748          80 :         ereport(WARNING,
     749             :                 (errmsg("permission denied to analyze \"%s\", skipping it",
     750             :                         relname)));
     751             : 
     752          80 :     return false;
     753             : }
     754             : 
     755             : 
     756             : /*
     757             :  * vacuum_open_relation
     758             :  *
     759             :  * This routine is used for attempting to open and lock a relation which
     760             :  * is going to be vacuumed or analyzed.  If the relation cannot be opened
     761             :  * or locked, a log is emitted if possible.
     762             :  */
     763             : Relation
     764      122150 : vacuum_open_relation(Oid relid, RangeVar *relation, bits32 options,
     765             :                      bool verbose, LOCKMODE lmode)
     766             : {
     767             :     Relation    rel;
     768      122150 :     bool        rel_lock = true;
     769             :     int         elevel;
     770             : 
     771             :     Assert((options & (VACOPT_VACUUM | VACOPT_ANALYZE)) != 0);
     772             : 
     773             :     /*
     774             :      * Open the relation and get the appropriate lock on it.
     775             :      *
     776             :      * There's a race condition here: the relation may have gone away since
     777             :      * the last time we saw it.  If so, we don't need to vacuum or analyze it.
     778             :      *
     779             :      * If we've been asked not to wait for the relation lock, acquire it first
     780             :      * in non-blocking mode, before calling try_relation_open().
     781             :      */
     782      122150 :     if (!(options & VACOPT_SKIP_LOCKED))
     783      121154 :         rel = try_relation_open(relid, lmode);
     784         996 :     else if (ConditionalLockRelationOid(relid, lmode))
     785         968 :         rel = try_relation_open(relid, NoLock);
     786             :     else
     787             :     {
     788          28 :         rel = NULL;
     789          28 :         rel_lock = false;
     790             :     }
     791             : 
     792             :     /* if relation is opened, leave */
     793      122150 :     if (rel)
     794      122110 :         return rel;
     795             : 
     796             :     /*
     797             :      * Relation could not be opened, hence generate if possible a log
     798             :      * informing on the situation.
     799             :      *
     800             :      * If the RangeVar is not defined, we do not have enough information to
     801             :      * provide a meaningful log statement.  Chances are that the caller has
     802             :      * intentionally not provided this information so that this logging is
     803             :      * skipped, anyway.
     804             :      */
     805          40 :     if (relation == NULL)
     806          18 :         return NULL;
     807             : 
     808             :     /*
     809             :      * Determine the log level.
     810             :      *
     811             :      * For manual VACUUM or ANALYZE, we emit a WARNING to match the log
     812             :      * statements in the permission checks; otherwise, only log if the caller
     813             :      * so requested.
     814             :      */
     815          22 :     if (!IsAutoVacuumWorkerProcess())
     816          14 :         elevel = WARNING;
     817           8 :     else if (verbose)
     818           8 :         elevel = LOG;
     819             :     else
     820           0 :         return NULL;
     821             : 
     822          22 :     if ((options & VACOPT_VACUUM) != 0)
     823             :     {
     824          10 :         if (!rel_lock)
     825           6 :             ereport(elevel,
     826             :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     827             :                      errmsg("skipping vacuum of \"%s\" --- lock not available",
     828             :                             relation->relname)));
     829             :         else
     830           4 :             ereport(elevel,
     831             :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     832             :                      errmsg("skipping vacuum of \"%s\" --- relation no longer exists",
     833             :                             relation->relname)));
     834             : 
     835             :         /*
     836             :          * For VACUUM ANALYZE, both logs could show up, but just generate
     837             :          * information for VACUUM as that would be the first one to be
     838             :          * processed.
     839             :          */
     840          10 :         return NULL;
     841             :     }
     842             : 
     843          12 :     if ((options & VACOPT_ANALYZE) != 0)
     844             :     {
     845          12 :         if (!rel_lock)
     846          10 :             ereport(elevel,
     847             :                     (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     848             :                      errmsg("skipping analyze of \"%s\" --- lock not available",
     849             :                             relation->relname)));
     850             :         else
     851           2 :             ereport(elevel,
     852             :                     (errcode(ERRCODE_UNDEFINED_TABLE),
     853             :                      errmsg("skipping analyze of \"%s\" --- relation no longer exists",
     854             :                             relation->relname)));
     855             :     }
     856             : 
     857          12 :     return NULL;
     858             : }
     859             : 
     860             : 
     861             : /*
     862             :  * Given a VacuumRelation, fill in the table OID if it wasn't specified,
     863             :  * and optionally add VacuumRelations for partitions of the table.
     864             :  *
     865             :  * If a VacuumRelation does not have an OID supplied and is a partitioned
     866             :  * table, an extra entry will be added to the output for each partition.
     867             :  * Presently, only autovacuum supplies OIDs when calling vacuum(), and
     868             :  * it does not want us to expand partitioned tables.
     869             :  *
     870             :  * We take care not to modify the input data structure, but instead build
     871             :  * new VacuumRelation(s) to return.  (But note that they will reference
     872             :  * unmodified parts of the input, eg column lists.)  New data structures
     873             :  * are made in vac_context.
     874             :  */
     875             : static List *
     876        9168 : expand_vacuum_rel(VacuumRelation *vrel, MemoryContext vac_context,
     877             :                   int options)
     878             : {
     879        9168 :     List       *vacrels = NIL;
     880             :     MemoryContext oldcontext;
     881             : 
     882             :     /* If caller supplied OID, there's nothing we need do here. */
     883        9168 :     if (OidIsValid(vrel->oid))
     884             :     {
     885         360 :         oldcontext = MemoryContextSwitchTo(vac_context);
     886         360 :         vacrels = lappend(vacrels, vrel);
     887         360 :         MemoryContextSwitchTo(oldcontext);
     888             :     }
     889             :     else
     890             :     {
     891             :         /* Process a specific relation, and possibly partitions thereof */
     892             :         Oid         relid;
     893             :         HeapTuple   tuple;
     894             :         Form_pg_class classForm;
     895             :         bool        include_parts;
     896             :         int         rvr_opts;
     897             : 
     898             :         /*
     899             :          * Since autovacuum workers supply OIDs when calling vacuum(), no
     900             :          * autovacuum worker should reach this code.
     901             :          */
     902             :         Assert(!IsAutoVacuumWorkerProcess());
     903             : 
     904             :         /*
     905             :          * We transiently take AccessShareLock to protect the syscache lookup
     906             :          * below, as well as find_all_inheritors's expectation that the caller
     907             :          * holds some lock on the starting relation.
     908             :          */
     909        8808 :         rvr_opts = (options & VACOPT_SKIP_LOCKED) ? RVR_SKIP_LOCKED : 0;
     910        8808 :         relid = RangeVarGetRelidExtended(vrel->relation,
     911             :                                          AccessShareLock,
     912             :                                          rvr_opts,
     913             :                                          NULL, NULL);
     914             : 
     915             :         /*
     916             :          * If the lock is unavailable, emit the same log statement that
     917             :          * vacuum_rel() and analyze_rel() would.
     918             :          */
     919        8772 :         if (!OidIsValid(relid))
     920             :         {
     921           8 :             if (options & VACOPT_VACUUM)
     922           6 :                 ereport(WARNING,
     923             :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     924             :                          errmsg("skipping vacuum of \"%s\" --- lock not available",
     925             :                                 vrel->relation->relname)));
     926             :             else
     927           2 :                 ereport(WARNING,
     928             :                         (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     929             :                          errmsg("skipping analyze of \"%s\" --- lock not available",
     930             :                                 vrel->relation->relname)));
     931           8 :             return vacrels;
     932             :         }
     933             : 
     934             :         /*
     935             :          * To check whether the relation is a partitioned table and its
     936             :          * ownership, fetch its syscache entry.
     937             :          */
     938        8764 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
     939        8764 :         if (!HeapTupleIsValid(tuple))
     940           0 :             elog(ERROR, "cache lookup failed for relation %u", relid);
     941        8764 :         classForm = (Form_pg_class) GETSTRUCT(tuple);
     942             : 
     943             :         /*
     944             :          * Make a returnable VacuumRelation for this rel if the user has the
     945             :          * required privileges.
     946             :          */
     947        8764 :         if (vacuum_is_permitted_for_relation(relid, classForm, options))
     948             :         {
     949        8586 :             oldcontext = MemoryContextSwitchTo(vac_context);
     950        8586 :             vacrels = lappend(vacrels, makeVacuumRelation(vrel->relation,
     951             :                                                           relid,
     952             :                                                           vrel->va_cols));
     953        8586 :             MemoryContextSwitchTo(oldcontext);
     954             :         }
     955             : 
     956             : 
     957        8764 :         include_parts = (classForm->relkind == RELKIND_PARTITIONED_TABLE);
     958        8764 :         ReleaseSysCache(tuple);
     959             : 
     960             :         /*
     961             :          * If it is, make relation list entries for its partitions.  Note that
     962             :          * the list returned by find_all_inheritors() includes the passed-in
     963             :          * OID, so we have to skip that.  There's no point in taking locks on
     964             :          * the individual partitions yet, and doing so would just add
     965             :          * unnecessary deadlock risk.  For this last reason we do not check
     966             :          * yet the ownership of the partitions, which get added to the list to
     967             :          * process.  Ownership will be checked later on anyway.
     968             :          */
     969        8764 :         if (include_parts)
     970             :         {
     971         684 :             List       *part_oids = find_all_inheritors(relid, NoLock, NULL);
     972             :             ListCell   *part_lc;
     973             : 
     974        3198 :             foreach(part_lc, part_oids)
     975             :             {
     976        2514 :                 Oid         part_oid = lfirst_oid(part_lc);
     977             : 
     978        2514 :                 if (part_oid == relid)
     979         684 :                     continue;   /* ignore original table */
     980             : 
     981             :                 /*
     982             :                  * We omit a RangeVar since it wouldn't be appropriate to
     983             :                  * complain about failure to open one of these relations
     984             :                  * later.
     985             :                  */
     986        1830 :                 oldcontext = MemoryContextSwitchTo(vac_context);
     987        1830 :                 vacrels = lappend(vacrels, makeVacuumRelation(NULL,
     988             :                                                               part_oid,
     989             :                                                               vrel->va_cols));
     990        1830 :                 MemoryContextSwitchTo(oldcontext);
     991             :             }
     992             :         }
     993             : 
     994             :         /*
     995             :          * Release lock again.  This means that by the time we actually try to
     996             :          * process the table, it might be gone or renamed.  In the former case
     997             :          * we'll silently ignore it; in the latter case we'll process it
     998             :          * anyway, but we must beware that the RangeVar doesn't necessarily
     999             :          * identify it anymore.  This isn't ideal, perhaps, but there's little
    1000             :          * practical alternative, since we're typically going to commit this
    1001             :          * transaction and begin a new one between now and then.  Moreover,
    1002             :          * holding locks on multiple relations would create significant risk
    1003             :          * of deadlock.
    1004             :          */
    1005        8764 :         UnlockRelationOid(relid, AccessShareLock);
    1006             :     }
    1007             : 
    1008        9124 :     return vacrels;
    1009             : }
    1010             : 
    1011             : /*
    1012             :  * Construct a list of VacuumRelations for all vacuumable rels in
    1013             :  * the current database.  The list is built in vac_context.
    1014             :  */
    1015             : static List *
    1016        1214 : get_all_vacuum_rels(MemoryContext vac_context, int options)
    1017             : {
    1018        1214 :     List       *vacrels = NIL;
    1019             :     Relation    pgclass;
    1020             :     TableScanDesc scan;
    1021             :     HeapTuple   tuple;
    1022             : 
    1023        1214 :     pgclass = table_open(RelationRelationId, AccessShareLock);
    1024             : 
    1025        1214 :     scan = table_beginscan_catalog(pgclass, 0, NULL);
    1026             : 
    1027      504438 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1028             :     {
    1029      503224 :         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
    1030             :         MemoryContext oldcontext;
    1031      503224 :         Oid         relid = classForm->oid;
    1032             : 
    1033             :         /*
    1034             :          * We include partitioned tables here; depending on which operation is
    1035             :          * to be performed, caller will decide whether to process or ignore
    1036             :          * them.
    1037             :          */
    1038      503224 :         if (classForm->relkind != RELKIND_RELATION &&
    1039      419966 :             classForm->relkind != RELKIND_MATVIEW &&
    1040      419960 :             classForm->relkind != RELKIND_PARTITIONED_TABLE)
    1041      419918 :             continue;
    1042             : 
    1043             :         /* check permissions of relation */
    1044       83306 :         if (!vacuum_is_permitted_for_relation(relid, classForm, options))
    1045           0 :             continue;
    1046             : 
    1047             :         /*
    1048             :          * Build VacuumRelation(s) specifying the table OIDs to be processed.
    1049             :          * We omit a RangeVar since it wouldn't be appropriate to complain
    1050             :          * about failure to open one of these relations later.
    1051             :          */
    1052       83306 :         oldcontext = MemoryContextSwitchTo(vac_context);
    1053       83306 :         vacrels = lappend(vacrels, makeVacuumRelation(NULL,
    1054             :                                                       relid,
    1055             :                                                       NIL));
    1056       83306 :         MemoryContextSwitchTo(oldcontext);
    1057             :     }
    1058             : 
    1059        1214 :     table_endscan(scan);
    1060        1214 :     table_close(pgclass, AccessShareLock);
    1061             : 
    1062        1214 :     return vacrels;
    1063             : }
    1064             : 
    1065             : /*
    1066             :  * vacuum_get_cutoffs() -- compute OldestXmin and freeze cutoff points
    1067             :  *
    1068             :  * The target relation and VACUUM parameters are our inputs.
    1069             :  *
    1070             :  * Output parameters are the cutoffs that VACUUM caller should use.
    1071             :  *
    1072             :  * Return value indicates if vacuumlazy.c caller should make its VACUUM
    1073             :  * operation aggressive.  An aggressive VACUUM must advance relfrozenxid up to
    1074             :  * FreezeLimit (at a minimum), and relminmxid up to MultiXactCutoff (at a
    1075             :  * minimum).
    1076             :  */
    1077             : bool
    1078       74022 : vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
    1079             :                    struct VacuumCutoffs *cutoffs)
    1080             : {
    1081             :     int         freeze_min_age,
    1082             :                 multixact_freeze_min_age,
    1083             :                 freeze_table_age,
    1084             :                 multixact_freeze_table_age,
    1085             :                 effective_multixact_freeze_max_age;
    1086             :     TransactionId nextXID,
    1087             :                 safeOldestXmin,
    1088             :                 aggressiveXIDCutoff;
    1089             :     MultiXactId nextMXID,
    1090             :                 safeOldestMxact,
    1091             :                 aggressiveMXIDCutoff;
    1092             : 
    1093             :     /* Use mutable copies of freeze age parameters */
    1094       74022 :     freeze_min_age = params->freeze_min_age;
    1095       74022 :     multixact_freeze_min_age = params->multixact_freeze_min_age;
    1096       74022 :     freeze_table_age = params->freeze_table_age;
    1097       74022 :     multixact_freeze_table_age = params->multixact_freeze_table_age;
    1098             : 
    1099             :     /* Set pg_class fields in cutoffs */
    1100       74022 :     cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
    1101       74022 :     cutoffs->relminmxid = rel->rd_rel->relminmxid;
    1102             : 
    1103             :     /*
    1104             :      * Acquire OldestXmin.
    1105             :      *
    1106             :      * We can always ignore processes running lazy vacuum.  This is because we
    1107             :      * use these values only for deciding which tuples we must keep in the
    1108             :      * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
    1109             :      * XID assigned), it's safe to ignore it.  In theory it could be
    1110             :      * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
    1111             :      * that only one vacuum process can be working on a particular table at
    1112             :      * any time, and that each vacuum is always an independent transaction.
    1113             :      */
    1114       74022 :     cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
    1115             : 
    1116       74022 :     if (OldSnapshotThresholdActive())
    1117             :     {
    1118             :         TransactionId limit_xmin;
    1119             :         TimestampTz limit_ts;
    1120             : 
    1121           6 :         if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
    1122             :                                                 &limit_xmin, &limit_ts))
    1123             :         {
    1124             :             /*
    1125             :              * TODO: We should only set the threshold if we are pruning on the
    1126             :              * basis of the increased limits.  Not as crucial here as it is
    1127             :              * for opportunistic pruning (which often happens at a much higher
    1128             :              * frequency), but would still be a significant improvement.
    1129             :              */
    1130           6 :             SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
    1131           6 :             cutoffs->OldestXmin = limit_xmin;
    1132             :         }
    1133             :     }
    1134             : 
    1135             :     Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
    1136             : 
    1137             :     /* Acquire OldestMxact */
    1138       74022 :     cutoffs->OldestMxact = GetOldestMultiXactId();
    1139             :     Assert(MultiXactIdIsValid(cutoffs->OldestMxact));
    1140             : 
    1141             :     /* Acquire next XID/next MXID values used to apply age-based settings */
    1142       74022 :     nextXID = ReadNextTransactionId();
    1143       74022 :     nextMXID = ReadNextMultiXactId();
    1144             : 
    1145             :     /*
    1146             :      * Also compute the multixact age for which freezing is urgent.  This is
    1147             :      * normally autovacuum_multixact_freeze_max_age, but may be less if we are
    1148             :      * short of multixact member space.
    1149             :      */
    1150       74022 :     effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
    1151             : 
    1152             :     /*
    1153             :      * Almost ready to set freeze output parameters; check if OldestXmin or
    1154             :      * OldestMxact are held back to an unsafe degree before we start on that
    1155             :      */
    1156       74022 :     safeOldestXmin = nextXID - autovacuum_freeze_max_age;
    1157       74022 :     if (!TransactionIdIsNormal(safeOldestXmin))
    1158           0 :         safeOldestXmin = FirstNormalTransactionId;
    1159       74022 :     safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
    1160       74022 :     if (safeOldestMxact < FirstMultiXactId)
    1161           0 :         safeOldestMxact = FirstMultiXactId;
    1162       74022 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, safeOldestXmin))
    1163           0 :         ereport(WARNING,
    1164             :                 (errmsg("cutoff for removing and freezing tuples is far in the past"),
    1165             :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1166             :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1167       74022 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, safeOldestMxact))
    1168           0 :         ereport(WARNING,
    1169             :                 (errmsg("cutoff for freezing multixacts is far in the past"),
    1170             :                  errhint("Close open transactions soon to avoid wraparound problems.\n"
    1171             :                          "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
    1172             : 
    1173             :     /*
    1174             :      * Determine the minimum freeze age to use: as specified by the caller, or
    1175             :      * vacuum_freeze_min_age, but in any case not more than half
    1176             :      * autovacuum_freeze_max_age, so that autovacuums to prevent XID
    1177             :      * wraparound won't occur too frequently.
    1178             :      */
    1179       74022 :     if (freeze_min_age < 0)
    1180        7280 :         freeze_min_age = vacuum_freeze_min_age;
    1181       74022 :     freeze_min_age = Min(freeze_min_age, autovacuum_freeze_max_age / 2);
    1182             :     Assert(freeze_min_age >= 0);
    1183             : 
    1184             :     /* Compute FreezeLimit, being careful to generate a normal XID */
    1185       74022 :     cutoffs->FreezeLimit = nextXID - freeze_min_age;
    1186       74022 :     if (!TransactionIdIsNormal(cutoffs->FreezeLimit))
    1187           0 :         cutoffs->FreezeLimit = FirstNormalTransactionId;
    1188             :     /* FreezeLimit must always be <= OldestXmin */
    1189       74022 :     if (TransactionIdPrecedes(cutoffs->OldestXmin, cutoffs->FreezeLimit))
    1190         600 :         cutoffs->FreezeLimit = cutoffs->OldestXmin;
    1191             : 
    1192             :     /*
    1193             :      * Determine the minimum multixact freeze age to use: as specified by
    1194             :      * caller, or vacuum_multixact_freeze_min_age, but in any case not more
    1195             :      * than half effective_multixact_freeze_max_age, so that autovacuums to
    1196             :      * prevent MultiXact wraparound won't occur too frequently.
    1197             :      */
    1198       74022 :     if (multixact_freeze_min_age < 0)
    1199        7280 :         multixact_freeze_min_age = vacuum_multixact_freeze_min_age;
    1200       74022 :     multixact_freeze_min_age = Min(multixact_freeze_min_age,
    1201             :                                    effective_multixact_freeze_max_age / 2);
    1202             :     Assert(multixact_freeze_min_age >= 0);
    1203             : 
    1204             :     /* Compute MultiXactCutoff, being careful to generate a valid value */
    1205       74022 :     cutoffs->MultiXactCutoff = nextMXID - multixact_freeze_min_age;
    1206       74022 :     if (cutoffs->MultiXactCutoff < FirstMultiXactId)
    1207           0 :         cutoffs->MultiXactCutoff = FirstMultiXactId;
    1208             :     /* MultiXactCutoff must always be <= OldestMxact */
    1209       74022 :     if (MultiXactIdPrecedes(cutoffs->OldestMxact, cutoffs->MultiXactCutoff))
    1210           4 :         cutoffs->MultiXactCutoff = cutoffs->OldestMxact;
    1211             : 
    1212             :     /*
    1213             :      * Finally, figure out if caller needs to do an aggressive VACUUM or not.
    1214             :      *
    1215             :      * Determine the table freeze age to use: as specified by the caller, or
    1216             :      * the value of the vacuum_freeze_table_age GUC, but in any case not more
    1217             :      * than autovacuum_freeze_max_age * 0.95, so that if you have e.g nightly
    1218             :      * VACUUM schedule, the nightly VACUUM gets a chance to freeze XIDs before
    1219             :      * anti-wraparound autovacuum is launched.
    1220             :      */
    1221       74022 :     if (freeze_table_age < 0)
    1222        7280 :         freeze_table_age = vacuum_freeze_table_age;
    1223       74022 :     freeze_table_age = Min(freeze_table_age, autovacuum_freeze_max_age * 0.95);
    1224             :     Assert(freeze_table_age >= 0);
    1225       74022 :     aggressiveXIDCutoff = nextXID - freeze_table_age;
    1226       74022 :     if (!TransactionIdIsNormal(aggressiveXIDCutoff))
    1227           0 :         aggressiveXIDCutoff = FirstNormalTransactionId;
    1228       74022 :     if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid,
    1229             :                                       aggressiveXIDCutoff))
    1230       66560 :         return true;
    1231             : 
    1232             :     /*
    1233             :      * Similar to the above, determine the table freeze age to use for
    1234             :      * multixacts: as specified by the caller, or the value of the
    1235             :      * vacuum_multixact_freeze_table_age GUC, but in any case not more than
    1236             :      * effective_multixact_freeze_max_age * 0.95, so that if you have e.g.
    1237             :      * nightly VACUUM schedule, the nightly VACUUM gets a chance to freeze
    1238             :      * multixacts before anti-wraparound autovacuum is launched.
    1239             :      */
    1240        7462 :     if (multixact_freeze_table_age < 0)
    1241        7280 :         multixact_freeze_table_age = vacuum_multixact_freeze_table_age;
    1242        7462 :     multixact_freeze_table_age =
    1243        7462 :         Min(multixact_freeze_table_age,
    1244             :             effective_multixact_freeze_max_age * 0.95);
    1245             :     Assert(multixact_freeze_table_age >= 0);
    1246        7462 :     aggressiveMXIDCutoff = nextMXID - multixact_freeze_table_age;
    1247        7462 :     if (aggressiveMXIDCutoff < FirstMultiXactId)
    1248           0 :         aggressiveMXIDCutoff = FirstMultiXactId;
    1249        7462 :     if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid,
    1250             :                                     aggressiveMXIDCutoff))
    1251           0 :         return true;
    1252             : 
    1253             :     /* Non-aggressive VACUUM */
    1254        7462 :     return false;
    1255             : }
    1256             : 
    1257             : /*
    1258             :  * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe
    1259             :  * mechanism to determine if its table's relfrozenxid and relminmxid are now
    1260             :  * dangerously far in the past.
    1261             :  *
    1262             :  * When we return true, VACUUM caller triggers the failsafe.
    1263             :  */
    1264             : bool
    1265       84582 : vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs)
    1266             : {
    1267       84582 :     TransactionId relfrozenxid = cutoffs->relfrozenxid;
    1268       84582 :     MultiXactId relminmxid = cutoffs->relminmxid;
    1269             :     TransactionId xid_skip_limit;
    1270             :     MultiXactId multi_skip_limit;
    1271             :     int         skip_index_vacuum;
    1272             : 
    1273             :     Assert(TransactionIdIsNormal(relfrozenxid));
    1274             :     Assert(MultiXactIdIsValid(relminmxid));
    1275             : 
    1276             :     /*
    1277             :      * Determine the index skipping age to use. In any case no less than
    1278             :      * autovacuum_freeze_max_age * 1.05.
    1279             :      */
    1280       84582 :     skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05);
    1281             : 
    1282       84582 :     xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum;
    1283       84582 :     if (!TransactionIdIsNormal(xid_skip_limit))
    1284           0 :         xid_skip_limit = FirstNormalTransactionId;
    1285             : 
    1286       84582 :     if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit))
    1287             :     {
    1288             :         /* The table's relfrozenxid is too old */
    1289           0 :         return true;
    1290             :     }
    1291             : 
    1292             :     /*
    1293             :      * Similar to above, determine the index skipping age to use for
    1294             :      * multixact. In any case no less than autovacuum_multixact_freeze_max_age *
    1295             :      * 1.05.
    1296             :      */
    1297       84582 :     skip_index_vacuum = Max(vacuum_multixact_failsafe_age,
    1298             :                             autovacuum_multixact_freeze_max_age * 1.05);
    1299             : 
    1300       84582 :     multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum;
    1301       84582 :     if (multi_skip_limit < FirstMultiXactId)
    1302           0 :         multi_skip_limit = FirstMultiXactId;
    1303             : 
    1304       84582 :     if (MultiXactIdPrecedes(relminmxid, multi_skip_limit))
    1305             :     {
    1306             :         /* The table's relminmxid is too old */
    1307           0 :         return true;
    1308             :     }
    1309             : 
    1310       84582 :     return false;
    1311             : }
    1312             : 
    1313             : /*
    1314             :  * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
    1315             :  *
    1316             :  *      If we scanned the whole relation then we should just use the count of
    1317             :  *      live tuples seen; but if we did not, we should not blindly extrapolate
    1318             :  *      from that number, since VACUUM may have scanned a quite nonrandom
    1319             :  *      subset of the table.  When we have only partial information, we take
    1320             :  *      the old value of pg_class.reltuples/pg_class.relpages as a measurement
    1321             :  *      of the tuple density in the unscanned pages.
    1322             :  *
    1323             :  *      Note: scanned_tuples should count only *live* tuples, since
    1324             :  *      pg_class.reltuples is defined that way.
    1325             :  */
    1326             : double
    1327       73500 : vac_estimate_reltuples(Relation relation,
    1328             :                        BlockNumber total_pages,
    1329             :                        BlockNumber scanned_pages,
    1330             :                        double scanned_tuples)
    1331             : {
    1332       73500 :     BlockNumber old_rel_pages = relation->rd_rel->relpages;
    1333       73500 :     double      old_rel_tuples = relation->rd_rel->reltuples;
    1334             :     double      old_density;
    1335             :     double      unscanned_pages;
    1336             :     double      total_tuples;
    1337             : 
    1338             :     /* If we did scan the whole table, just use the count as-is */
    1339       73500 :     if (scanned_pages >= total_pages)
    1340       73292 :         return scanned_tuples;
    1341             : 
    1342             :     /*
    1343             :      * When successive VACUUM commands scan the same few pages again and
    1344             :      * again, without anything from the table really changing, there is a risk
    1345             :      * that our beliefs about tuple density will gradually become distorted.
    1346             :      * This might be caused by vacuumlazy.c implementation details, such as
    1347             :      * its tendency to always scan the last heap page.  Handle that here.
    1348             :      *
    1349             :      * If the relation is _exactly_ the same size according to the existing
    1350             :      * pg_class entry, and only a few of its pages (less than 2%) were
    1351             :      * scanned, keep the existing value of reltuples.  Also keep the existing
    1352             :      * value when only a subset of rel's pages <= a single page were scanned.
    1353             :      *
    1354             :      * (Note: we might be returning -1 here.)
    1355             :      */
    1356         208 :     if (old_rel_pages == total_pages &&
    1357         182 :         scanned_pages < (double) total_pages * 0.02)
    1358         116 :         return old_rel_tuples;
    1359          92 :     if (scanned_pages <= 1)
    1360          64 :         return old_rel_tuples;
    1361             : 
    1362             :     /*
    1363             :      * If old density is unknown, we can't do much except scale up
    1364             :      * scanned_tuples to match total_pages.
    1365             :      */
    1366          28 :     if (old_rel_tuples < 0 || old_rel_pages == 0)
    1367           0 :         return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
    1368             : 
    1369             :     /*
    1370             :      * Okay, we've covered the corner cases.  The normal calculation is to
    1371             :      * convert the old measurement to a density (tuples per page), then
    1372             :      * estimate the number of tuples in the unscanned pages using that figure,
    1373             :      * and finally add on the number of tuples in the scanned pages.
    1374             :      */
    1375          28 :     old_density = old_rel_tuples / old_rel_pages;
    1376          28 :     unscanned_pages = (double) total_pages - (double) scanned_pages;
    1377          28 :     total_tuples = old_density * unscanned_pages + scanned_tuples;
    1378          28 :     return floor(total_tuples + 0.5);
    1379             : }
    1380             : 
    1381             : 
    1382             : /*
    1383             :  *  vac_update_relstats() -- update statistics for one relation
    1384             :  *
    1385             :  *      Update the whole-relation statistics that are kept in its pg_class
    1386             :  *      row.  There are additional stats that will be updated if we are
    1387             :  *      doing ANALYZE, but we always update these stats.  This routine works
    1388             :  *      for both index and heap relation entries in pg_class.
    1389             :  *
    1390             :  *      We violate transaction semantics here by overwriting the rel's
    1391             :  *      existing pg_class tuple with the new values.  This is reasonably
    1392             :  *      safe as long as we're sure that the new values are correct whether or
    1393             :  *      not this transaction commits.  The reason for doing this is that if
    1394             :  *      we updated these tuples in the usual way, vacuuming pg_class itself
    1395             :  *      wouldn't work very well --- by the time we got done with a vacuum
    1396             :  *      cycle, most of the tuples in pg_class would've been obsoleted.  Of
    1397             :  *      course, this only works for fixed-size not-null columns, but these are.
    1398             :  *
    1399             :  *      Another reason for doing it this way is that when we are in a lazy
    1400             :  *      VACUUM and have PROC_IN_VACUUM set, we mustn't do any regular updates.
    1401             :  *      Somebody vacuuming pg_class might think they could delete a tuple
    1402             :  *      marked with xmin = our xid.
    1403             :  *
    1404             :  *      In addition to fundamentally nontransactional statistics such as
    1405             :  *      relpages and relallvisible, we try to maintain certain lazily-updated
    1406             :  *      DDL flags such as relhasindex, by clearing them if no longer correct.
    1407             :  *      It's safe to do this in VACUUM, which can't run in parallel with
    1408             :  *      CREATE INDEX/RULE/TRIGGER and can't be part of a transaction block.
    1409             :  *      However, it's *not* safe to do it in an ANALYZE that's within an
    1410             :  *      outer transaction, because for example the current transaction might
    1411             :  *      have dropped the last index; then we'd think relhasindex should be
    1412             :  *      cleared, but if the transaction later rolls back this would be wrong.
    1413             :  *      So we refrain from updating the DDL flags if we're inside an outer
    1414             :  *      transaction.  This is OK since postponing the flag maintenance is
    1415             :  *      always allowable.
    1416             :  *
    1417             :  *      Note: num_tuples should count only *live* tuples, since
    1418             :  *      pg_class.reltuples is defined that way.
    1419             :  *
    1420             :  *      This routine is shared by VACUUM and ANALYZE.
    1421             :  */
    1422             : void
    1423      209318 : vac_update_relstats(Relation relation,
    1424             :                     BlockNumber num_pages, double num_tuples,
    1425             :                     BlockNumber num_all_visible_pages,
    1426             :                     bool hasindex, TransactionId frozenxid,
    1427             :                     MultiXactId minmulti,
    1428             :                     bool *frozenxid_updated, bool *minmulti_updated,
    1429             :                     bool in_outer_xact)
    1430             : {
    1431      209318 :     Oid         relid = RelationGetRelid(relation);
    1432             :     Relation    rd;
    1433             :     HeapTuple   ctup;
    1434             :     Form_pg_class pgcform;
    1435             :     bool        dirty,
    1436             :                 futurexid,
    1437             :                 futuremxid;
    1438             :     TransactionId oldfrozenxid;
    1439             :     MultiXactId oldminmulti;
    1440             : 
    1441      209318 :     rd = table_open(RelationRelationId, RowExclusiveLock);
    1442             : 
    1443             :     /* Fetch a copy of the tuple to scribble on */
    1444      209318 :     ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
    1445      209318 :     if (!HeapTupleIsValid(ctup))
    1446           0 :         elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
    1447             :              relid);
    1448      209318 :     pgcform = (Form_pg_class) GETSTRUCT(ctup);
    1449             : 
    1450             :     /* Apply statistical updates, if any, to copied tuple */
    1451             : 
    1452      209318 :     dirty = false;
    1453      209318 :     if (pgcform->relpages != (int32) num_pages)
    1454             :     {
    1455       32078 :         pgcform->relpages = (int32) num_pages;
    1456       32078 :         dirty = true;
    1457             :     }
    1458      209318 :     if (pgcform->reltuples != (float4) num_tuples)
    1459             :     {
    1460       86742 :         pgcform->reltuples = (float4) num_tuples;
    1461       86742 :         dirty = true;
    1462             :     }
    1463      209318 :     if (pgcform->relallvisible != (int32) num_all_visible_pages)
    1464             :     {
    1465       26656 :         pgcform->relallvisible = (int32) num_all_visible_pages;
    1466       26656 :         dirty = true;
    1467             :     }
    1468             : 
    1469             :     /* Apply DDL updates, but not inside an outer transaction (see above) */
    1470             : 
    1471      209318 :     if (!in_outer_xact)
    1472             :     {
    1473             :         /*
    1474             :          * If we didn't find any indexes, reset relhasindex.
    1475             :          */
    1476      209046 :         if (pgcform->relhasindex && !hasindex)
    1477             :         {
    1478          12 :             pgcform->relhasindex = false;
    1479          12 :             dirty = true;
    1480             :         }
    1481             : 
    1482             :         /* We also clear relhasrules and relhastriggers if needed */
    1483      209046 :         if (pgcform->relhasrules && relation->rd_rules == NULL)
    1484             :         {
    1485           0 :             pgcform->relhasrules = false;
    1486           0 :             dirty = true;
    1487             :         }
    1488      209046 :         if (pgcform->relhastriggers && relation->trigdesc == NULL)
    1489             :         {
    1490           6 :             pgcform->relhastriggers = false;
    1491           6 :             dirty = true;
    1492             :         }
    1493             :     }
    1494             : 
    1495             :     /*
    1496             :      * Update relfrozenxid, unless caller passed InvalidTransactionId
    1497             :      * indicating it has no new data.
    1498             :      *
    1499             :      * Ordinarily, we don't let relfrozenxid go backwards.  However, if the
    1500             :      * stored relfrozenxid is "in the future" then it seems best to assume
    1501             :      * it's corrupt, and overwrite with the oldest remaining XID in the table.
    1502             :      * This should match vac_update_datfrozenxid() concerning what we consider
    1503             :      * to be "in the future".
    1504             :      */
    1505      209318 :     oldfrozenxid = pgcform->relfrozenxid;
    1506      209318 :     futurexid = false;
    1507      209318 :     if (frozenxid_updated)
    1508       73496 :         *frozenxid_updated = false;
    1509      209318 :     if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
    1510             :     {
    1511       71692 :         bool        update = false;
    1512             : 
    1513       71692 :         if (TransactionIdPrecedes(oldfrozenxid, frozenxid))
    1514       71640 :             update = true;
    1515          52 :         else if (TransactionIdPrecedes(ReadNextTransactionId(), oldfrozenxid))
    1516           0 :             futurexid = update = true;
    1517             : 
    1518       71692 :         if (update)
    1519             :         {
    1520       71640 :             pgcform->relfrozenxid = frozenxid;
    1521       71640 :             dirty = true;
    1522       71640 :             if (frozenxid_updated)
    1523       71640 :                 *frozenxid_updated = true;
    1524             :         }
    1525             :     }
    1526             : 
    1527             :     /* Similarly for relminmxid */
    1528      209318 :     oldminmulti = pgcform->relminmxid;
    1529      209318 :     futuremxid = false;
    1530      209318 :     if (minmulti_updated)
    1531       73496 :         *minmulti_updated = false;
    1532      209318 :     if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
    1533             :     {
    1534          58 :         bool        update = false;
    1535             : 
    1536          58 :         if (MultiXactIdPrecedes(oldminmulti, minmulti))
    1537          58 :             update = true;
    1538           0 :         else if (MultiXactIdPrecedes(ReadNextMultiXactId(), oldminmulti))
    1539           0 :             futuremxid = update = true;
    1540             : 
    1541          58 :         if (update)
    1542             :         {
    1543          58 :             pgcform->relminmxid = minmulti;
    1544          58 :             dirty = true;
    1545          58 :             if (minmulti_updated)
    1546          58 :                 *minmulti_updated = true;
    1547             :         }
    1548             :     }
    1549             : 
    1550             :     /* If anything changed, write out the tuple. */
    1551      209318 :     if (dirty)
    1552      131788 :         heap_inplace_update(rd, ctup);
    1553             : 
    1554      209318 :     table_close(rd, RowExclusiveLock);
    1555             : 
    1556      209318 :     if (futurexid)
    1557           0 :         ereport(WARNING,
    1558             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1559             :                  errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
    1560             :                                  oldfrozenxid, frozenxid,
    1561             :                                  RelationGetRelationName(relation))));
    1562      209318 :     if (futuremxid)
    1563           0 :         ereport(WARNING,
    1564             :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1565             :                  errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
    1566             :                                  oldminmulti, minmulti,
    1567             :                                  RelationGetRelationName(relation))));
    1568      209318 : }
    1569             : 
    1570             : 
    1571             : /*
    1572             :  *  vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
    1573             :  *
    1574             :  *      Update pg_database's datfrozenxid entry for our database to be the
    1575             :  *      minimum of the pg_class.relfrozenxid values.
    1576             :  *
    1577             :  *      Similarly, update our datminmxid to be the minimum of the
    1578             :  *      pg_class.relminmxid values.
    1579             :  *
    1580             :  *      If we are able to advance either pg_database value, also try to
    1581             :  *      truncate pg_xact and pg_multixact.
    1582             :  *
    1583             :  *      We violate transaction semantics here by overwriting the database's
    1584             :  *      existing pg_database tuple with the new values.  This is reasonably
    1585             :  *      safe since the new values are correct whether or not this transaction
    1586             :  *      commits.  As with vac_update_relstats, this avoids leaving dead tuples
    1587             :  *      behind after a VACUUM.
    1588             :  */
    1589             : void
    1590        2484 : vac_update_datfrozenxid(void)
    1591             : {
    1592             :     HeapTuple   tuple;
    1593             :     Form_pg_database dbform;
    1594             :     Relation    relation;
    1595             :     SysScanDesc scan;
    1596             :     HeapTuple   classTup;
    1597             :     TransactionId newFrozenXid;
    1598             :     MultiXactId newMinMulti;
    1599             :     TransactionId lastSaneFrozenXid;
    1600             :     MultiXactId lastSaneMinMulti;
    1601        2484 :     bool        bogus = false;
    1602        2484 :     bool        dirty = false;
    1603             :     ScanKeyData key[1];
    1604             : 
    1605             :     /*
    1606             :      * Restrict this task to one backend per database.  This avoids race
    1607             :      * conditions that would move datfrozenxid or datminmxid backward.  It
    1608             :      * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
    1609             :      * datfrozenxid passed to an earlier vac_truncate_clog() call.
    1610             :      */
    1611        2484 :     LockDatabaseFrozenIds(ExclusiveLock);
    1612             : 
    1613             :     /*
    1614             :      * Initialize the "min" calculation with
    1615             :      * GetOldestNonRemovableTransactionId(), which is a reasonable
    1616             :      * approximation to the minimum relfrozenxid for not-yet-committed
    1617             :      * pg_class entries for new tables; see AddNewRelationTuple().  So we
    1618             :      * cannot produce a wrong minimum by starting with this.
    1619             :      */
    1620        2484 :     newFrozenXid = GetOldestNonRemovableTransactionId(NULL);
    1621             : 
    1622             :     /*
    1623             :      * Similarly, initialize the MultiXact "min" with the value that would be
    1624             :      * used on pg_class for new tables.  See AddNewRelationTuple().
    1625             :      */
    1626        2484 :     newMinMulti = GetOldestMultiXactId();
    1627             : 
    1628             :     /*
    1629             :      * Identify the latest relfrozenxid and relminmxid values that we could
    1630             :      * validly see during the scan.  These are conservative values, but it's
    1631             :      * not really worth trying to be more exact.
    1632             :      */
    1633        2484 :     lastSaneFrozenXid = ReadNextTransactionId();
    1634        2484 :     lastSaneMinMulti = ReadNextMultiXactId();
    1635             : 
    1636             :     /*
    1637             :      * We must seqscan pg_class to find the minimum Xid, because there is no
    1638             :      * index that can help us here.
    1639             :      */
    1640        2484 :     relation = table_open(RelationRelationId, AccessShareLock);
    1641             : 
    1642        2484 :     scan = systable_beginscan(relation, InvalidOid, false,
    1643             :                               NULL, 0, NULL);
    1644             : 
    1645     1345396 :     while ((classTup = systable_getnext(scan)) != NULL)
    1646             :     {
    1647     1342912 :         Form_pg_class classForm = (Form_pg_class) GETSTRUCT(classTup);
    1648             : 
    1649             :         /*
    1650             :          * Only consider relations able to hold unfrozen XIDs (anything else
    1651             :          * should have InvalidTransactionId in relfrozenxid anyway).
    1652             :          */
    1653     1342912 :         if (classForm->relkind != RELKIND_RELATION &&
    1654     1050070 :             classForm->relkind != RELKIND_MATVIEW &&
    1655     1048130 :             classForm->relkind != RELKIND_TOASTVALUE)
    1656             :         {
    1657             :             Assert(!TransactionIdIsValid(classForm->relfrozenxid));
    1658             :             Assert(!MultiXactIdIsValid(classForm->relminmxid));
    1659      894218 :             continue;
    1660             :         }
    1661             : 
    1662             :         /*
    1663             :          * Some table AMs might not need per-relation xid / multixid horizons.
    1664             :          * It therefore seems reasonable to allow relfrozenxid and relminmxid
    1665             :          * to not be set (i.e. set to their respective Invalid*Id)
    1666             :          * independently. Thus validate and compute horizon for each only if
    1667             :          * set.
    1668             :          *
    1669             :          * If things are working properly, no relation should have a
    1670             :          * relfrozenxid or relminmxid that is "in the future".  However, such
    1671             :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1672             :          * see any entries that are "in the future", chicken out and don't do
    1673             :          * anything.  This ensures we won't truncate clog & multixact SLRUs
    1674             :          * before those relations have been scanned and cleaned up.
    1675             :          */
    1676             : 
    1677      448694 :         if (TransactionIdIsValid(classForm->relfrozenxid))
    1678             :         {
    1679             :             Assert(TransactionIdIsNormal(classForm->relfrozenxid));
    1680             : 
    1681             :             /* check for values in the future */
    1682      448694 :             if (TransactionIdPrecedes(lastSaneFrozenXid, classForm->relfrozenxid))
    1683             :             {
    1684           0 :                 bogus = true;
    1685           0 :                 break;
    1686             :             }
    1687             : 
    1688             :             /* determine new horizon */
    1689      448694 :             if (TransactionIdPrecedes(classForm->relfrozenxid, newFrozenXid))
    1690        3380 :                 newFrozenXid = classForm->relfrozenxid;
    1691             :         }
    1692             : 
    1693      448694 :         if (MultiXactIdIsValid(classForm->relminmxid))
    1694             :         {
    1695             :             /* check for values in the future */
    1696      448694 :             if (MultiXactIdPrecedes(lastSaneMinMulti, classForm->relminmxid))
    1697             :             {
    1698           0 :                 bogus = true;
    1699           0 :                 break;
    1700             :             }
    1701             : 
    1702             :             /* determine new horizon */
    1703      448694 :             if (MultiXactIdPrecedes(classForm->relminmxid, newMinMulti))
    1704         190 :                 newMinMulti = classForm->relminmxid;
    1705             :         }
    1706             :     }
    1707             : 
    1708             :     /* we're done with pg_class */
    1709        2484 :     systable_endscan(scan);
    1710        2484 :     table_close(relation, AccessShareLock);
    1711             : 
    1712             :     /* chicken out if bogus data found */
    1713        2484 :     if (bogus)
    1714           0 :         return;
    1715             : 
    1716             :     Assert(TransactionIdIsNormal(newFrozenXid));
    1717             :     Assert(MultiXactIdIsValid(newMinMulti));
    1718             : 
    1719             :     /* Now fetch the pg_database tuple we need to update. */
    1720        2484 :     relation = table_open(DatabaseRelationId, RowExclusiveLock);
    1721             : 
    1722             :     /*
    1723             :      * Get the pg_database tuple to scribble on.  Note that this does not
    1724             :      * directly rely on the syscache to avoid issues with flattened toast
    1725             :      * values for the in-place update.
    1726             :      */
    1727        2484 :     ScanKeyInit(&key[0],
    1728             :                 Anum_pg_database_oid,
    1729             :                 BTEqualStrategyNumber, F_OIDEQ,
    1730             :                 ObjectIdGetDatum(MyDatabaseId));
    1731             : 
    1732        2484 :     scan = systable_beginscan(relation, DatabaseOidIndexId, true,
    1733             :                               NULL, 1, key);
    1734        2484 :     tuple = systable_getnext(scan);
    1735        2484 :     tuple = heap_copytuple(tuple);
    1736        2484 :     systable_endscan(scan);
    1737             : 
    1738        2484 :     if (!HeapTupleIsValid(tuple))
    1739           0 :         elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
    1740             : 
    1741        2484 :     dbform = (Form_pg_database) GETSTRUCT(tuple);
    1742             : 
    1743             :     /*
    1744             :      * As in vac_update_relstats(), we ordinarily don't want to let
    1745             :      * datfrozenxid go backward; but if it's "in the future" then it must be
    1746             :      * corrupt and it seems best to overwrite it.
    1747             :      */
    1748        3118 :     if (dbform->datfrozenxid != newFrozenXid &&
    1749         634 :         (TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
    1750           0 :          TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
    1751             :     {
    1752         634 :         dbform->datfrozenxid = newFrozenXid;
    1753         634 :         dirty = true;
    1754             :     }
    1755             :     else
    1756        1850 :         newFrozenXid = dbform->datfrozenxid;
    1757             : 
    1758             :     /* Ditto for datminmxid */
    1759        2484 :     if (dbform->datminmxid != newMinMulti &&
    1760           0 :         (MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
    1761           0 :          MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
    1762             :     {
    1763           0 :         dbform->datminmxid = newMinMulti;
    1764           0 :         dirty = true;
    1765             :     }
    1766             :     else
    1767        2484 :         newMinMulti = dbform->datminmxid;
    1768             : 
    1769        2484 :     if (dirty)
    1770         634 :         heap_inplace_update(relation, tuple);
    1771             : 
    1772        2484 :     heap_freetuple(tuple);
    1773        2484 :     table_close(relation, RowExclusiveLock);
    1774             : 
    1775             :     /*
    1776             :      * If we were able to advance datfrozenxid or datminmxid, see if we can
    1777             :      * truncate pg_xact and/or pg_multixact.  Also do it if the shared
    1778             :      * XID-wrap-limit info is stale, since this action will update that too.
    1779             :      */
    1780        2484 :     if (dirty || ForceTransactionIdLimitUpdate())
    1781         634 :         vac_truncate_clog(newFrozenXid, newMinMulti,
    1782             :                           lastSaneFrozenXid, lastSaneMinMulti);
    1783             : }
    1784             : 
    1785             : 
    1786             : /*
    1787             :  *  vac_truncate_clog() -- attempt to truncate the commit log
    1788             :  *
    1789             :  *      Scan pg_database to determine the system-wide oldest datfrozenxid,
    1790             :  *      and use it to truncate the transaction commit log (pg_xact).
    1791             :  *      Also update the XID wrap limit info maintained by varsup.c.
    1792             :  *      Likewise for datminmxid.
    1793             :  *
    1794             :  *      The passed frozenXID and minMulti are the updated values for my own
    1795             :  *      pg_database entry. They're used to initialize the "min" calculations.
    1796             :  *      The caller also passes the "last sane" XID and MXID, since it has
    1797             :  *      those at hand already.
    1798             :  *
    1799             :  *      This routine is only invoked when we've managed to change our
    1800             :  *      DB's datfrozenxid/datminmxid values, or we found that the shared
    1801             :  *      XID-wrap-limit info is stale.
    1802             :  */
    1803             : static void
    1804         634 : vac_truncate_clog(TransactionId frozenXID,
    1805             :                   MultiXactId minMulti,
    1806             :                   TransactionId lastSaneFrozenXid,
    1807             :                   MultiXactId lastSaneMinMulti)
    1808             : {
    1809         634 :     TransactionId nextXID = ReadNextTransactionId();
    1810             :     Relation    relation;
    1811             :     TableScanDesc scan;
    1812             :     HeapTuple   tuple;
    1813             :     Oid         oldestxid_datoid;
    1814             :     Oid         minmulti_datoid;
    1815         634 :     bool        bogus = false;
    1816         634 :     bool        frozenAlreadyWrapped = false;
    1817             : 
    1818             :     /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
    1819         634 :     LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
    1820             : 
    1821             :     /* init oldest datoids to sync with my frozenXID/minMulti values */
    1822         634 :     oldestxid_datoid = MyDatabaseId;
    1823         634 :     minmulti_datoid = MyDatabaseId;
    1824             : 
    1825             :     /*
    1826             :      * Scan pg_database to compute the minimum datfrozenxid/datminmxid
    1827             :      *
    1828             :      * Since vac_update_datfrozenxid updates datfrozenxid/datminmxid in-place,
    1829             :      * the values could change while we look at them.  Fetch each one just
    1830             :      * once to ensure sane behavior of the comparison logic.  (Here, as in
    1831             :      * many other places, we assume that fetching or updating an XID in shared
    1832             :      * storage is atomic.)
    1833             :      *
    1834             :      * Note: we need not worry about a race condition with new entries being
    1835             :      * inserted by CREATE DATABASE.  Any such entry will have a copy of some
    1836             :      * existing DB's datfrozenxid, and that source DB cannot be ours because
    1837             :      * of the interlock against copying a DB containing an active backend.
    1838             :      * Hence the new entry will not reduce the minimum.  Also, if two VACUUMs
    1839             :      * concurrently modify the datfrozenxid's of different databases, the
    1840             :      * worst possible outcome is that pg_xact is not truncated as aggressively
    1841             :      * as it could be.
    1842             :      */
    1843         634 :     relation = table_open(DatabaseRelationId, AccessShareLock);
    1844             : 
    1845         634 :     scan = table_beginscan_catalog(relation, 0, NULL);
    1846             : 
    1847        1344 :     while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1848             :     {
    1849         710 :         volatile FormData_pg_database *dbform = (Form_pg_database) GETSTRUCT(tuple);
    1850         710 :         TransactionId datfrozenxid = dbform->datfrozenxid;
    1851         710 :         TransactionId datminmxid = dbform->datminmxid;
    1852             : 
    1853             :         Assert(TransactionIdIsNormal(datfrozenxid));
    1854             :         Assert(MultiXactIdIsValid(datminmxid));
    1855             : 
    1856             :         /*
    1857             :          * If things are working properly, no database should have a
    1858             :          * datfrozenxid or datminmxid that is "in the future".  However, such
    1859             :          * cases have been known to arise due to bugs in pg_upgrade.  If we
    1860             :          * see any entries that are "in the future", chicken out and don't do
    1861             :          * anything.  This ensures we won't truncate clog before those
    1862             :          * databases have been scanned and cleaned up.  (We will issue the
    1863             :          * "already wrapped" warning if appropriate, though.)
    1864             :          */
    1865        1420 :         if (TransactionIdPrecedes(lastSaneFrozenXid, datfrozenxid) ||
    1866         710 :             MultiXactIdPrecedes(lastSaneMinMulti, datminmxid))
    1867           0 :             bogus = true;
    1868             : 
    1869         710 :         if (TransactionIdPrecedes(nextXID, datfrozenxid))
    1870           0 :             frozenAlreadyWrapped = true;
    1871         710 :         else if (TransactionIdPrecedes(datfrozenxid, frozenXID))
    1872             :         {
    1873          32 :             frozenXID = datfrozenxid;
    1874          32 :             oldestxid_datoid = dbform->oid;
    1875             :         }
    1876             : 
    1877         710 :         if (MultiXactIdPrecedes(datminmxid, minMulti))
    1878             :         {
    1879           0 :             minMulti = datminmxid;
    1880           0 :             minmulti_datoid = dbform->oid;
    1881             :         }
    1882             :     }
    1883             : 
    1884         634 :     table_endscan(scan);
    1885             : 
    1886         634 :     table_close(relation, AccessShareLock);
    1887             : 
    1888             :     /*
    1889             :      * Do not truncate CLOG if we seem to have suffered wraparound already;
    1890             :      * the computed minimum XID might be bogus.  This case should now be
    1891             :      * impossible due to the defenses in GetNewTransactionId, but we keep the
    1892             :      * test anyway.
    1893             :      */
    1894         634 :     if (frozenAlreadyWrapped)
    1895             :     {
    1896           0 :         ereport(WARNING,
    1897             :                 (errmsg("some databases have not been vacuumed in over 2 billion transactions"),
    1898             :                  errdetail("You might have already suffered transaction-wraparound data loss.")));
    1899           0 :         return;
    1900             :     }
    1901             : 
    1902             :     /* chicken out if data is bogus in any other way */
    1903         634 :     if (bogus)
    1904           0 :         return;
    1905             : 
    1906             :     /*
    1907             :      * Advance the oldest value for commit timestamps before truncating, so
    1908             :      * that if a user requests a timestamp for a transaction we're truncating
    1909             :      * away right after this point, they get NULL instead of an ugly "file not
    1910             :      * found" error from slru.c.  This doesn't matter for xact/multixact
    1911             :      * because they are not subject to arbitrary lookups from users.
    1912             :      */
    1913         634 :     AdvanceOldestCommitTsXid(frozenXID);
    1914             : 
    1915             :     /*
    1916             :      * Truncate CLOG, multixact and CommitTs to the oldest computed value.
    1917             :      */
    1918         634 :     TruncateCLOG(frozenXID, oldestxid_datoid);
    1919         634 :     TruncateCommitTs(frozenXID);
    1920         634 :     TruncateMultiXact(minMulti, minmulti_datoid);
    1921             : 
    1922             :     /*
    1923             :      * Update the wrap limit for GetNewTransactionId and creation of new
    1924             :      * MultiXactIds.  Note: these functions will also signal the postmaster
    1925             :      * for an(other) autovac cycle if needed.   XXX should we avoid possibly
    1926             :      * signaling twice?
    1927             :      */
    1928         634 :     SetTransactionIdLimit(frozenXID, oldestxid_datoid);
    1929         634 :     SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
    1930             : 
    1931         634 :     LWLockRelease(WrapLimitsVacuumLock);
    1932             : }
    1933             : 
    1934             : 
    1935             : /*
    1936             :  *  vacuum_rel() -- vacuum one heap relation
    1937             :  *
    1938             :  *      relid identifies the relation to vacuum.  If relation is supplied,
    1939             :  *      use the name therein for reporting any failure to open/lock the rel;
    1940             :  *      do not use it once we've successfully opened the rel, since it might
    1941             :  *      be stale.
    1942             :  *
    1943             :  *      Returns true if it's okay to proceed with a requested ANALYZE
    1944             :  *      operation on this table.
    1945             :  *
    1946             :  *      Doing one heap at a time incurs extra overhead, since we need to
    1947             :  *      check that the heap exists again just before we vacuum it.  The
    1948             :  *      reason that we do this is so that vacuuming can be spread across
    1949             :  *      many small transactions.  Otherwise, two-phase locking would require
    1950             :  *      us to lock the entire database during one pass of the vacuum cleaner.
    1951             :  *
    1952             :  *      At entry and exit, we are not inside a transaction.
    1953             :  */
    1954             : static bool
    1955       74190 : vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
    1956             :            bool skip_privs, BufferAccessStrategy bstrategy)
    1957             : {
    1958             :     LOCKMODE    lmode;
    1959             :     Relation    rel;
    1960             :     LockRelId   lockrelid;
    1961             :     Oid         toast_relid;
    1962             :     Oid         save_userid;
    1963             :     int         save_sec_context;
    1964             :     int         save_nestlevel;
    1965             : 
    1966             :     Assert(params != NULL);
    1967             : 
    1968             :     /* Begin a transaction for vacuuming this relation */
    1969       74190 :     StartTransactionCommand();
    1970             : 
    1971       74190 :     if (!(params->options & VACOPT_FULL))
    1972             :     {
    1973             :         /*
    1974             :          * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
    1975             :          * other concurrent VACUUMs know that they can ignore this one while
    1976             :          * determining their OldestXmin.  (The reason we don't set it during a
    1977             :          * full VACUUM is exactly that we may have to run user-defined
    1978             :          * functions for functional indexes, and we want to make sure that if
    1979             :          * they use the snapshot set above, any tuples it requires can't get
    1980             :          * removed from other tables.  An index function that depends on the
    1981             :          * contents of other tables is arguably broken, but we won't break it
    1982             :          * here by violating transaction semantics.)
    1983             :          *
    1984             :          * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
    1985             :          * autovacuum; it's used to avoid canceling a vacuum that was invoked
    1986             :          * in an emergency.
    1987             :          *
    1988             :          * Note: these flags remain set until CommitTransaction or
    1989             :          * AbortTransaction.  We don't want to clear them until we reset
    1990             :          * MyProc->xid/xmin, otherwise GetOldestNonRemovableTransactionId()
    1991             :          * might appear to go backwards, which is probably Not Good.  (We also
    1992             :          * set PROC_IN_VACUUM *before* taking our own snapshot, so that our
    1993             :          * xmin doesn't become visible ahead of setting the flag.)
    1994             :          */
    1995       73828 :         LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
    1996       73828 :         MyProc->statusFlags |= PROC_IN_VACUUM;
    1997       73828 :         if (params->is_wraparound)
    1998           0 :             MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
    1999       73828 :         ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
    2000       73828 :         LWLockRelease(ProcArrayLock);
    2001             :     }
    2002             : 
    2003             :     /*
    2004             :      * Need to acquire a snapshot to prevent pg_subtrans from being truncated,
    2005             :      * cutoff xids in local memory wrapping around, and to have updated xmin
    2006             :      * horizons.
    2007             :      */
    2008       74190 :     PushActiveSnapshot(GetTransactionSnapshot());
    2009             : 
    2010             :     /*
    2011             :      * Check for user-requested abort.  Note we want this to be inside a
    2012             :      * transaction, so xact.c doesn't issue useless WARNING.
    2013             :      */
    2014       74190 :     CHECK_FOR_INTERRUPTS();
    2015             : 
    2016             :     /*
    2017             :      * Determine the type of lock we want --- hard exclusive lock for a FULL
    2018             :      * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
    2019             :      * way, we can be sure that no other backend is vacuuming the same table.
    2020             :      */
    2021      148380 :     lmode = (params->options & VACOPT_FULL) ?
    2022       74190 :         AccessExclusiveLock : ShareUpdateExclusiveLock;
    2023             : 
    2024             :     /* open the relation and get the appropriate lock on it */
    2025       74190 :     rel = vacuum_open_relation(relid, relation, params->options,
    2026       74190 :                                params->log_min_duration >= 0, lmode);
    2027             : 
    2028             :     /* leave if relation could not be opened or locked */
    2029       74190 :     if (!rel)
    2030             :     {
    2031          24 :         PopActiveSnapshot();
    2032          24 :         CommitTransactionCommand();
    2033          24 :         return false;
    2034             :     }
    2035             : 
    2036             :     /*
    2037             :      * Check if relation needs to be skipped based on privileges.  This check
    2038             :      * happens also when building the relation list to vacuum for a manual
    2039             :      * operation, and needs to be done additionally here as VACUUM could
    2040             :      * happen across multiple transactions where privileges could have changed
    2041             :      * in-between.  Make sure to only generate logs for VACUUM in this case.
    2042             :      */
    2043       74166 :     if (!skip_privs &&
    2044       47096 :         !vacuum_is_permitted_for_relation(RelationGetRelid(rel),
    2045             :                                           rel->rd_rel,
    2046       47096 :                                           params->options & VACOPT_VACUUM))
    2047             :     {
    2048          36 :         relation_close(rel, lmode);
    2049          36 :         PopActiveSnapshot();
    2050          36 :         CommitTransactionCommand();
    2051          36 :         return false;
    2052             :     }
    2053             : 
    2054             :     /*
    2055             :      * Check that it's of a vacuumable relkind.
    2056             :      */
    2057       74130 :     if (rel->rd_rel->relkind != RELKIND_RELATION &&
    2058       27234 :         rel->rd_rel->relkind != RELKIND_MATVIEW &&
    2059       27226 :         rel->rd_rel->relkind != RELKIND_TOASTVALUE &&
    2060         152 :         rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
    2061             :     {
    2062           2 :         ereport(WARNING,
    2063             :                 (errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
    2064             :                         RelationGetRelationName(rel))));
    2065           2 :         relation_close(rel, lmode);
    2066           2 :         PopActiveSnapshot();
    2067           2 :         CommitTransactionCommand();
    2068           2 :         return false;
    2069             :     }
    2070             : 
    2071             :     /*
    2072             :      * Silently ignore tables that are temp tables of other backends ---
    2073             :      * trying to vacuum these will lead to great unhappiness, since their
    2074             :      * contents are probably not up-to-date on disk.  (We don't throw a
    2075             :      * warning here; it would just lead to chatter during a database-wide
    2076             :      * VACUUM.)
    2077             :      */
    2078       74128 :     if (RELATION_IS_OTHER_TEMP(rel))
    2079             :     {
    2080           0 :         relation_close(rel, lmode);
    2081           0 :         PopActiveSnapshot();
    2082           0 :         CommitTransactionCommand();
    2083           0 :         return false;
    2084             :     }
    2085             : 
    2086             :     /*
    2087             :      * Silently ignore partitioned tables as there is no work to be done.  The
    2088             :      * useful work is on their child partitions, which have been queued up for
    2089             :      * us separately.
    2090             :      */
    2091       74128 :     if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
    2092             :     {
    2093         150 :         relation_close(rel, lmode);
    2094         150 :         PopActiveSnapshot();
    2095         150 :         CommitTransactionCommand();
    2096             :         /* It's OK to proceed with ANALYZE on this table */
    2097         150 :         return true;
    2098             :     }
    2099             : 
    2100             :     /*
    2101             :      * Get a session-level lock too. This will protect our access to the
    2102             :      * relation across multiple transactions, so that we can vacuum the
    2103             :      * relation's TOAST table (if any) secure in the knowledge that no one is
    2104             :      * deleting the parent relation.
    2105             :      *
    2106             :      * NOTE: this cannot block, even if someone else is waiting for access,
    2107             :      * because the lock manager knows that both lock requests are from the
    2108             :      * same process.
    2109             :      */
    2110       73978 :     lockrelid = rel->rd_lockInfo.lockRelId;
    2111       73978 :     LockRelationIdForSession(&lockrelid, lmode);
    2112             : 
    2113             :     /*
    2114             :      * Set index_cleanup option based on index_cleanup reloption if it wasn't
    2115             :      * specified in VACUUM command, or when running in an autovacuum worker
    2116             :      */
    2117       73978 :     if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
    2118             :     {
    2119             :         StdRdOptIndexCleanup vacuum_index_cleanup;
    2120             : 
    2121        4920 :         if (rel->rd_options == NULL)
    2122        4660 :             vacuum_index_cleanup = STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO;
    2123             :         else
    2124         260 :             vacuum_index_cleanup =
    2125         260 :                 ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
    2126             : 
    2127        4920 :         if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
    2128        4896 :             params->index_cleanup = VACOPTVALUE_AUTO;
    2129          24 :         else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
    2130          12 :             params->index_cleanup = VACOPTVALUE_ENABLED;
    2131             :         else
    2132             :         {
    2133             :             Assert(vacuum_index_cleanup ==
    2134             :                    STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
    2135          12 :             params->index_cleanup = VACOPTVALUE_DISABLED;
    2136             :         }
    2137             :     }
    2138             : 
    2139             :     /*
    2140             :      * Set truncate option based on truncate reloption if it wasn't specified
    2141             :      * in VACUUM command, or when running in an autovacuum worker
    2142             :      */
    2143       73978 :     if (params->truncate == VACOPTVALUE_UNSPECIFIED)
    2144             :     {
    2145        4944 :         if (rel->rd_options == NULL ||
    2146         260 :             ((StdRdOptions *) rel->rd_options)->vacuum_truncate)
    2147        4938 :             params->truncate = VACOPTVALUE_ENABLED;
    2148             :         else
    2149           6 :             params->truncate = VACOPTVALUE_DISABLED;
    2150             :     }
    2151             : 
    2152             :     /*
    2153             :      * Remember the relation's TOAST relation for later, if the caller asked
    2154             :      * us to process it.  In VACUUM FULL, though, the toast table is
    2155             :      * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
    2156             :      * unless PROCESS_MAIN is disabled.
    2157             :      */
    2158       73978 :     if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
    2159       73644 :         ((params->options & VACOPT_FULL) == 0 ||
    2160         334 :          (params->options & VACOPT_PROCESS_MAIN) == 0))
    2161       73316 :         toast_relid = rel->rd_rel->reltoastrelid;
    2162             :     else
    2163         662 :         toast_relid = InvalidOid;
    2164             : 
    2165             :     /*
    2166             :      * Switch to the table owner's userid, so that any index functions are run
    2167             :      * as that user.  Also lock down security-restricted operations and
    2168             :      * arrange to make GUC variable changes local to this command. (This is
    2169             :      * unnecessary, but harmless, for lazy VACUUM.)
    2170             :      */
    2171       73978 :     GetUserIdAndSecContext(&save_userid, &save_sec_context);
    2172       73978 :     SetUserIdAndSecContext(rel->rd_rel->relowner,
    2173             :                            save_sec_context | SECURITY_RESTRICTED_OPERATION);
    2174       73978 :     save_nestlevel = NewGUCNestLevel();
    2175             : 
    2176             :     /*
    2177             :      * If PROCESS_MAIN is set (the default), it's time to vacuum the main
    2178             :      * relation.  Otherwise, we can skip this part.  If processing the TOAST
    2179             :      * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
    2180             :      * to be set when we recurse to the TOAST table.
    2181             :      */
    2182       73978 :     if (params->options & VACOPT_PROCESS_MAIN)
    2183             :     {
    2184             :         /*
    2185             :          * Do the actual work --- either FULL or "lazy" vacuum
    2186             :          */
    2187       73824 :         if (params->options & VACOPT_FULL)
    2188             :         {
    2189         328 :             ClusterParams cluster_params = {0};
    2190             : 
    2191             :             /* close relation before vacuuming, but hold lock until commit */
    2192         328 :             relation_close(rel, NoLock);
    2193         328 :             rel = NULL;
    2194             : 
    2195         328 :             if ((params->options & VACOPT_VERBOSE) != 0)
    2196           0 :                 cluster_params.options |= CLUOPT_VERBOSE;
    2197             : 
    2198             :             /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
    2199         328 :             cluster_rel(relid, InvalidOid, &cluster_params);
    2200             :         }
    2201             :         else
    2202       73496 :             table_relation_vacuum(rel, params, bstrategy);
    2203             :     }
    2204             : 
    2205             :     /* Roll back any GUC changes executed by index functions */
    2206       73972 :     AtEOXact_GUC(false, save_nestlevel);
    2207             : 
    2208             :     /* Restore userid and security context */
    2209       73972 :     SetUserIdAndSecContext(save_userid, save_sec_context);
    2210             : 
    2211             :     /* all done with this class, but hold lock until commit */
    2212       73972 :     if (rel)
    2213       73650 :         relation_close(rel, NoLock);
    2214             : 
    2215             :     /*
    2216             :      * Complete the transaction and free all temporary memory used.
    2217             :      */
    2218       73972 :     PopActiveSnapshot();
    2219       73972 :     CommitTransactionCommand();
    2220             : 
    2221             :     /*
    2222             :      * If the relation has a secondary toast rel, vacuum that too while we
    2223             :      * still hold the session lock on the main table.  Note however that
    2224             :      * "analyze" will not get done on the toast table.  This is good, because
    2225             :      * the toaster always uses hardcoded index access and statistics are
    2226             :      * totally unimportant for toast relations.
    2227             :      */
    2228       73972 :     if (toast_relid != InvalidOid)
    2229             :     {
    2230             :         VacuumParams toast_vacuum_params;
    2231             : 
    2232             :         /* force VACOPT_PROCESS_MAIN so vacuum_rel() processes it */
    2233       27070 :         memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
    2234       27070 :         toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
    2235             : 
    2236       27070 :         vacuum_rel(toast_relid, NULL, &toast_vacuum_params, true, bstrategy);
    2237             :     }
    2238             : 
    2239             :     /*
    2240             :      * Now release the session-level lock on the main table.
    2241             :      */
    2242       73972 :     UnlockRelationIdForSession(&lockrelid, lmode);
    2243             : 
    2244             :     /* Report that we really did it. */
    2245       73972 :     return true;
    2246             : }
    2247             : 
    2248             : 
    2249             : /*
    2250             :  * Open all the vacuumable indexes of the given relation, obtaining the
    2251             :  * specified kind of lock on each.  Return an array of Relation pointers for
    2252             :  * the indexes into *Irel, and the number of indexes into *nindexes.
    2253             :  *
    2254             :  * We consider an index vacuumable if it is marked insertable (indisready).
    2255             :  * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in
    2256             :  * execution, and what we have is too corrupt to be processable.  We will
    2257             :  * vacuum even if the index isn't indisvalid; this is important because in a
    2258             :  * unique index, uniqueness checks will be performed anyway and had better not
    2259             :  * hit dangling index pointers.
    2260             :  */
    2261             : void
    2262      120134 : vac_open_indexes(Relation relation, LOCKMODE lockmode,
    2263             :                  int *nindexes, Relation **Irel)
    2264             : {
    2265             :     List       *indexoidlist;
    2266             :     ListCell   *indexoidscan;
    2267             :     int         i;
    2268             : 
    2269             :     Assert(lockmode != NoLock);
    2270             : 
    2271      120134 :     indexoidlist = RelationGetIndexList(relation);
    2272             : 
    2273             :     /* allocate enough memory for all indexes */
    2274      120134 :     i = list_length(indexoidlist);
    2275             : 
    2276      120134 :     if (i > 0)
    2277      111682 :         *Irel = (Relation *) palloc(i * sizeof(Relation));
    2278             :     else
    2279        8452 :         *Irel = NULL;
    2280             : 
    2281             :     /* collect just the ready indexes */
    2282      120134 :     i = 0;
    2283      310816 :     foreach(indexoidscan, indexoidlist)
    2284             :     {
    2285      190682 :         Oid         indexoid = lfirst_oid(indexoidscan);
    2286             :         Relation    indrel;
    2287             : 
    2288      190682 :         indrel = index_open(indexoid, lockmode);
    2289      190682 :         if (indrel->rd_index->indisready)
    2290      190682 :             (*Irel)[i++] = indrel;
    2291             :         else
    2292           0 :             index_close(indrel, lockmode);
    2293             :     }
    2294             : 
    2295      120134 :     *nindexes = i;
    2296             : 
    2297      120134 :     list_free(indexoidlist);
    2298      120134 : }
    2299             : 
    2300             : /*
    2301             :  * Release the resources acquired by vac_open_indexes.  Optionally release
    2302             :  * the locks (say NoLock to keep 'em).
    2303             :  */
    2304             : void
    2305      120804 : vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
    2306             : {
    2307      120804 :     if (Irel == NULL)
    2308        9130 :         return;
    2309             : 
    2310      302342 :     while (nindexes--)
    2311             :     {
    2312      190668 :         Relation    ind = Irel[nindexes];
    2313             : 
    2314      190668 :         index_close(ind, lockmode);
    2315             :     }
    2316      111674 :     pfree(Irel);
    2317             : }
    2318             : 
    2319             : /*
    2320             :  * vacuum_delay_point --- check for interrupts and cost-based delay.
    2321             :  *
    2322             :  * This should be called in each major loop of VACUUM processing,
    2323             :  * typically once per page processed.
    2324             :  */
    2325             : void
    2326   234706102 : vacuum_delay_point(void)
    2327             : {
    2328   234706102 :     double      msec = 0;
    2329             : 
    2330             :     /* Always check for interrupts */
    2331   234706102 :     CHECK_FOR_INTERRUPTS();
    2332             : 
    2333   234706102 :     if (InterruptPending ||
    2334   234706102 :         (!VacuumCostActive && !ConfigReloadPending))
    2335   229711414 :         return;
    2336             : 
    2337             :     /*
    2338             :      * Autovacuum workers should reload the configuration file if requested.
    2339             :      * This allows changes to [autovacuum_]vacuum_cost_limit and
    2340             :      * [autovacuum_]vacuum_cost_delay to take effect while a table is being
    2341             :      * vacuumed or analyzed.
    2342             :      */
    2343     4994688 :     if (ConfigReloadPending && IsAutoVacuumWorkerProcess())
    2344             :     {
    2345           0 :         ConfigReloadPending = false;
    2346           0 :         ProcessConfigFile(PGC_SIGHUP);
    2347           0 :         VacuumUpdateCosts();
    2348             :     }
    2349             : 
    2350             :     /*
    2351             :      * If we disabled cost-based delays after reloading the config file,
    2352             :      * return.
    2353             :      */
    2354     4994688 :     if (!VacuumCostActive)
    2355           0 :         return;
    2356             : 
    2357             :     /*
    2358             :      * For parallel vacuum, the delay is computed based on the shared cost
    2359             :      * balance.  See compute_parallel_delay.
    2360             :      */
    2361     4994688 :     if (VacuumSharedCostBalance != NULL)
    2362           0 :         msec = compute_parallel_delay();
    2363     4994688 :     else if (VacuumCostBalance >= vacuum_cost_limit)
    2364         716 :         msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
    2365             : 
    2366             :     /* Nap if appropriate */
    2367     4994688 :     if (msec > 0)
    2368             :     {
    2369         716 :         if (msec > vacuum_cost_delay * 4)
    2370          10 :             msec = vacuum_cost_delay * 4;
    2371             : 
    2372         716 :         pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY);
    2373         716 :         pg_usleep(msec * 1000);
    2374         716 :         pgstat_report_wait_end();
    2375             : 
    2376             :         /*
    2377             :          * We don't want to ignore postmaster death during very long vacuums
    2378             :          * with vacuum_cost_delay configured.  We can't use the usual
    2379             :          * WaitLatch() approach here because we want microsecond-based sleep
    2380             :          * durations above.
    2381             :          */
    2382         716 :         if (IsUnderPostmaster && !PostmasterIsAlive())
    2383           0 :             exit(1);
    2384             : 
    2385         716 :         VacuumCostBalance = 0;
    2386             : 
    2387             :         /*
    2388             :          * Balance and update limit values for autovacuum workers. We must do
    2389             :          * this periodically, as the number of workers across which we are
    2390             :          * balancing the limit may have changed.
    2391             :          *
    2392             :          * TODO: There may be better criteria for determining when to do this
    2393             :          * besides "check after napping".
    2394             :          */
    2395         716 :         AutoVacuumUpdateCostLimit();
    2396             : 
    2397             :         /* Might have gotten an interrupt while sleeping */
    2398         716 :         CHECK_FOR_INTERRUPTS();
    2399             :     }
    2400             : }
    2401             : 
    2402             : /*
    2403             :  * Computes the vacuum delay for parallel workers.
    2404             :  *
    2405             :  * The basic idea of a cost-based delay for parallel vacuum is to allow each
    2406             :  * worker to sleep in proportion to the share of work it's done.  We achieve this
    2407             :  * by allowing all parallel vacuum workers including the leader process to
    2408             :  * have a shared view of cost related parameters (mainly VacuumCostBalance).
    2409             :  * We allow each worker to update it as and when it has incurred any cost and
    2410             :  * then based on that decide whether it needs to sleep.  We compute the time
    2411             :  * to sleep for a worker based on the cost it has incurred
    2412             :  * (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
    2413             :  * that amount.  This avoids putting to sleep those workers which have done less
    2414             :  * I/O than other workers and therefore ensure that workers
    2415             :  * which are doing more I/O got throttled more.
    2416             :  *
    2417             :  * We allow a worker to sleep only if it has performed I/O above a certain
    2418             :  * threshold, which is calculated based on the number of active workers
    2419             :  * (VacuumActiveNWorkers), and the overall cost balance is more than
    2420             :  * VacuumCostLimit set by the system.  Testing reveals that we achieve
    2421             :  * the required throttling if we force a worker that has done more than 50%
    2422             :  * of its share of work to sleep.
    2423             :  */
    2424             : static double
    2425           0 : compute_parallel_delay(void)
    2426             : {
    2427           0 :     double      msec = 0;
    2428             :     uint32      shared_balance;
    2429             :     int         nworkers;
    2430             : 
    2431             :     /* Parallel vacuum must be active */
    2432             :     Assert(VacuumSharedCostBalance);
    2433             : 
    2434           0 :     nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
    2435             : 
    2436             :     /* At least count itself */
    2437             :     Assert(nworkers >= 1);
    2438             : 
    2439             :     /* Update the shared cost balance value atomically */
    2440           0 :     shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
    2441             : 
    2442             :     /* Compute the total local balance for the current worker */
    2443           0 :     VacuumCostBalanceLocal += VacuumCostBalance;
    2444             : 
    2445           0 :     if ((shared_balance >= vacuum_cost_limit) &&
    2446           0 :         (VacuumCostBalanceLocal > 0.5 * ((double) vacuum_cost_limit / nworkers)))
    2447             :     {
    2448             :         /* Compute sleep time based on the local cost balance */
    2449           0 :         msec = vacuum_cost_delay * VacuumCostBalanceLocal / vacuum_cost_limit;
    2450           0 :         pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
    2451           0 :         VacuumCostBalanceLocal = 0;
    2452             :     }
    2453             : 
    2454             :     /*
    2455             :      * Reset the local balance as we accumulated it into the shared value.
    2456             :      */
    2457           0 :     VacuumCostBalance = 0;
    2458             : 
    2459           0 :     return msec;
    2460             : }
    2461             : 
    2462             : /*
    2463             :  * A wrapper function of defGetBoolean().
    2464             :  *
    2465             :  * This function returns VACOPTVALUE_ENABLED and VACOPTVALUE_DISABLED instead
    2466             :  * of true and false.
    2467             :  */
    2468             : static VacOptValue
    2469         314 : get_vacoptval_from_boolean(DefElem *def)
    2470             : {
    2471         314 :     return defGetBoolean(def) ? VACOPTVALUE_ENABLED : VACOPTVALUE_DISABLED;
    2472             : }
    2473             : 
    2474             : /*
    2475             :  *  vac_bulkdel_one_index() -- bulk-deletion for index relation.
    2476             :  *
    2477             :  * Returns bulk delete stats derived from input stats
    2478             :  */
    2479             : IndexBulkDeleteResult *
    2480        7590 : vac_bulkdel_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat,
    2481             :                       VacDeadItems *dead_items)
    2482             : {
    2483             :     /* Do bulk deletion */
    2484        7590 :     istat = index_bulk_delete(ivinfo, istat, vac_tid_reaped,
    2485             :                               (void *) dead_items);
    2486             : 
    2487        7590 :     ereport(ivinfo->message_level,
    2488             :             (errmsg("scanned index \"%s\" to remove %d row versions",
    2489             :                     RelationGetRelationName(ivinfo->index),
    2490             :                     dead_items->num_items)));
    2491             : 
    2492        7590 :     return istat;
    2493             : }
    2494             : 
    2495             : /*
    2496             :  *  vac_cleanup_one_index() -- do post-vacuum cleanup for index relation.
    2497             :  *
    2498             :  * Returns bulk delete stats derived from input stats
    2499             :  */
    2500             : IndexBulkDeleteResult *
    2501      109312 : vac_cleanup_one_index(IndexVacuumInfo *ivinfo, IndexBulkDeleteResult *istat)
    2502             : {
    2503      109312 :     istat = index_vacuum_cleanup(ivinfo, istat);
    2504             : 
    2505      109312 :     if (istat)
    2506        7864 :         ereport(ivinfo->message_level,
    2507             :                 (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
    2508             :                         RelationGetRelationName(ivinfo->index),
    2509             :                         istat->num_index_tuples,
    2510             :                         istat->num_pages),
    2511             :                  errdetail("%.0f index row versions were removed.\n"
    2512             :                            "%u index pages were newly deleted.\n"
    2513             :                            "%u index pages are currently deleted, of which %u are currently reusable.",
    2514             :                            istat->tuples_removed,
    2515             :                            istat->pages_newly_deleted,
    2516             :                            istat->pages_deleted, istat->pages_free)));
    2517             : 
    2518      109312 :     return istat;
    2519             : }
    2520             : 
    2521             : /*
    2522             :  * Returns the total required space for VACUUM's dead_items array given a
    2523             :  * max_items value.
    2524             :  */
    2525             : Size
    2526       73496 : vac_max_items_to_alloc_size(int max_items)
    2527             : {
    2528             :     Assert(max_items <= MAXDEADITEMS(MaxAllocSize));
    2529             : 
    2530       73496 :     return offsetof(VacDeadItems, items) + sizeof(ItemPointerData) * max_items;
    2531             : }
    2532             : 
    2533             : /*
    2534             :  *  vac_tid_reaped() -- is a particular tid deletable?
    2535             :  *
    2536             :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2537             :  *
    2538             :  *      Assumes dead_items array is sorted (in ascending TID order).
    2539             :  */
    2540             : static bool
    2541    16156432 : vac_tid_reaped(ItemPointer itemptr, void *state)
    2542             : {
    2543    16156432 :     VacDeadItems *dead_items = (VacDeadItems *) state;
    2544             :     int64       litem,
    2545             :                 ritem,
    2546             :                 item;
    2547             :     ItemPointer res;
    2548             : 
    2549    16156432 :     litem = itemptr_encode(&dead_items->items[0]);
    2550    16156432 :     ritem = itemptr_encode(&dead_items->items[dead_items->num_items - 1]);
    2551    16156432 :     item = itemptr_encode(itemptr);
    2552             : 
    2553             :     /*
    2554             :      * Doing a simple bound check before bsearch() is useful to avoid the
    2555             :      * extra cost of bsearch(), especially if dead items on the heap are
    2556             :      * concentrated in a certain range.  Since this function is called for
    2557             :      * every index tuple, it pays to be really fast.
    2558             :      */
    2559    16156432 :     if (item < litem || item > ritem)
    2560     7144938 :         return false;
    2561             : 
    2562     9011494 :     res = (ItemPointer) bsearch(itemptr,
    2563     9011494 :                                 dead_items->items,
    2564     9011494 :                                 dead_items->num_items,
    2565             :                                 sizeof(ItemPointerData),
    2566             :                                 vac_cmp_itemptr);
    2567             : 
    2568     9011494 :     return (res != NULL);
    2569             : }
    2570             : 
    2571             : /*
    2572             :  * Comparator routines for use with qsort() and bsearch().
    2573             :  */
    2574             : static int
    2575    88144246 : vac_cmp_itemptr(const void *left, const void *right)
    2576             : {
    2577             :     BlockNumber lblk,
    2578             :                 rblk;
    2579             :     OffsetNumber loff,
    2580             :                 roff;
    2581             : 
    2582    88144246 :     lblk = ItemPointerGetBlockNumber((ItemPointer) left);
    2583    88144246 :     rblk = ItemPointerGetBlockNumber((ItemPointer) right);
    2584             : 
    2585    88144246 :     if (lblk < rblk)
    2586    28655538 :         return -1;
    2587    59488708 :     if (lblk > rblk)
    2588    29250668 :         return 1;
    2589             : 
    2590    30238040 :     loff = ItemPointerGetOffsetNumber((ItemPointer) left);
    2591    30238040 :     roff = ItemPointerGetOffsetNumber((ItemPointer) right);
    2592             : 
    2593    30238040 :     if (loff < roff)
    2594    14348116 :         return -1;
    2595    15889924 :     if (loff > roff)
    2596    12733422 :         return 1;
    2597             : 
    2598     3156502 :     return 0;
    2599             : }

Generated by: LCOV version 1.14