LCOV - code coverage report
Current view: top level - src/backend/access/heap - vacuumlazy.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 746 851 87.7 %
Date: 2024-11-21 08:14:44 Functions: 28 28 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuumlazy.c
       4             :  *    Concurrent ("lazy") vacuuming.
       5             :  *
       6             :  * The major space usage for vacuuming is storage for the dead tuple IDs that
       7             :  * are to be removed from indexes.  We want to ensure we can vacuum even the
       8             :  * very largest relations with finite memory space usage.  To do that, we set
       9             :  * upper bounds on the memory that can be used for keeping track of dead TIDs
      10             :  * at once.
      11             :  *
      12             :  * We are willing to use at most maintenance_work_mem (or perhaps
      13             :  * autovacuum_work_mem) memory space to keep track of dead TIDs.  If the
      14             :  * TID store is full, we must call lazy_vacuum to vacuum indexes (and to vacuum
      15             :  * the pages that we've pruned). This frees up the memory space dedicated to
      16             :  * store dead TIDs.
      17             :  *
      18             :  * In practice VACUUM will often complete its initial pass over the target
      19             :  * heap relation without ever running out of space to store TIDs.  This means
      20             :  * that there only needs to be one call to lazy_vacuum, after the initial pass
      21             :  * completes.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  *
      27             :  * IDENTIFICATION
      28             :  *    src/backend/access/heap/vacuumlazy.c
      29             :  *
      30             :  *-------------------------------------------------------------------------
      31             :  */
      32             : #include "postgres.h"
      33             : 
      34             : #include <math.h>
      35             : 
      36             : #include "access/genam.h"
      37             : #include "access/heapam.h"
      38             : #include "access/htup_details.h"
      39             : #include "access/multixact.h"
      40             : #include "access/tidstore.h"
      41             : #include "access/transam.h"
      42             : #include "access/visibilitymap.h"
      43             : #include "access/xloginsert.h"
      44             : #include "catalog/storage.h"
      45             : #include "commands/dbcommands.h"
      46             : #include "commands/progress.h"
      47             : #include "commands/vacuum.h"
      48             : #include "common/int.h"
      49             : #include "executor/instrument.h"
      50             : #include "miscadmin.h"
      51             : #include "pgstat.h"
      52             : #include "portability/instr_time.h"
      53             : #include "postmaster/autovacuum.h"
      54             : #include "storage/bufmgr.h"
      55             : #include "storage/freespace.h"
      56             : #include "storage/lmgr.h"
      57             : #include "utils/lsyscache.h"
      58             : #include "utils/pg_rusage.h"
      59             : #include "utils/timestamp.h"
      60             : 
      61             : 
      62             : /*
      63             :  * Space/time tradeoff parameters: do these need to be user-tunable?
      64             :  *
      65             :  * To consider truncating the relation, we want there to be at least
      66             :  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
      67             :  * is less) potentially-freeable pages.
      68             :  */
      69             : #define REL_TRUNCATE_MINIMUM    1000
      70             : #define REL_TRUNCATE_FRACTION   16
      71             : 
      72             : /*
      73             :  * Timing parameters for truncate locking heuristics.
      74             :  *
      75             :  * These were not exposed as user tunable GUC values because it didn't seem
      76             :  * that the potential for improvement was great enough to merit the cost of
      77             :  * supporting them.
      78             :  */
      79             : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL     20  /* ms */
      80             : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL      50  /* ms */
      81             : #define VACUUM_TRUNCATE_LOCK_TIMEOUT            5000    /* ms */
      82             : 
      83             : /*
      84             :  * Threshold that controls whether we bypass index vacuuming and heap
      85             :  * vacuuming as an optimization
      86             :  */
      87             : #define BYPASS_THRESHOLD_PAGES  0.02    /* i.e. 2% of rel_pages */
      88             : 
      89             : /*
      90             :  * Perform a failsafe check each time we scan another 4GB of pages.
      91             :  * (Note that this is deliberately kept to a power-of-two, usually 2^19.)
      92             :  */
      93             : #define FAILSAFE_EVERY_PAGES \
      94             :     ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ))
      95             : 
      96             : /*
      97             :  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
      98             :  * (it won't be exact because we only vacuum FSM after processing a heap page
      99             :  * that has some removable tuples).  When there are indexes, this is ignored,
     100             :  * and we vacuum FSM after each index/heap cleaning pass.
     101             :  */
     102             : #define VACUUM_FSM_EVERY_PAGES \
     103             :     ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
     104             : 
     105             : /*
     106             :  * Before we consider skipping a page that's marked as clean in
     107             :  * visibility map, we must've seen at least this many clean pages.
     108             :  */
     109             : #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
     110             : 
     111             : /*
     112             :  * Size of the prefetch window for lazy vacuum backwards truncation scan.
     113             :  * Needs to be a power of 2.
     114             :  */
     115             : #define PREFETCH_SIZE           ((BlockNumber) 32)
     116             : 
     117             : /*
     118             :  * Macro to check if we are in a parallel vacuum.  If true, we are in the
     119             :  * parallel mode and the DSM segment is initialized.
     120             :  */
     121             : #define ParallelVacuumIsActive(vacrel) ((vacrel)->pvs != NULL)
     122             : 
     123             : /* Phases of vacuum during which we report error context. */
     124             : typedef enum
     125             : {
     126             :     VACUUM_ERRCB_PHASE_UNKNOWN,
     127             :     VACUUM_ERRCB_PHASE_SCAN_HEAP,
     128             :     VACUUM_ERRCB_PHASE_VACUUM_INDEX,
     129             :     VACUUM_ERRCB_PHASE_VACUUM_HEAP,
     130             :     VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
     131             :     VACUUM_ERRCB_PHASE_TRUNCATE,
     132             : } VacErrPhase;
     133             : 
     134             : typedef struct LVRelState
     135             : {
     136             :     /* Target heap relation and its indexes */
     137             :     Relation    rel;
     138             :     Relation   *indrels;
     139             :     int         nindexes;
     140             : 
     141             :     /* Buffer access strategy and parallel vacuum state */
     142             :     BufferAccessStrategy bstrategy;
     143             :     ParallelVacuumState *pvs;
     144             : 
     145             :     /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */
     146             :     bool        aggressive;
     147             :     /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */
     148             :     bool        skipwithvm;
     149             :     /* Consider index vacuuming bypass optimization? */
     150             :     bool        consider_bypass_optimization;
     151             : 
     152             :     /* Doing index vacuuming, index cleanup, rel truncation? */
     153             :     bool        do_index_vacuuming;
     154             :     bool        do_index_cleanup;
     155             :     bool        do_rel_truncate;
     156             : 
     157             :     /* VACUUM operation's cutoffs for freezing and pruning */
     158             :     struct VacuumCutoffs cutoffs;
     159             :     GlobalVisState *vistest;
     160             :     /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */
     161             :     TransactionId NewRelfrozenXid;
     162             :     MultiXactId NewRelminMxid;
     163             :     bool        skippedallvis;
     164             : 
     165             :     /* Error reporting state */
     166             :     char       *dbname;
     167             :     char       *relnamespace;
     168             :     char       *relname;
     169             :     char       *indname;        /* Current index name */
     170             :     BlockNumber blkno;          /* used only for heap operations */
     171             :     OffsetNumber offnum;        /* used only for heap operations */
     172             :     VacErrPhase phase;
     173             :     bool        verbose;        /* VACUUM VERBOSE? */
     174             : 
     175             :     /*
     176             :      * dead_items stores TIDs whose index tuples are deleted by index
     177             :      * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page
     178             :      * that has been processed by lazy_scan_prune.  Also needed by
     179             :      * lazy_vacuum_heap_rel, which marks the same LP_DEAD line pointers as
     180             :      * LP_UNUSED during second heap pass.
     181             :      *
     182             :      * Both dead_items and dead_items_info are allocated in shared memory in
     183             :      * parallel vacuum cases.
     184             :      */
     185             :     TidStore   *dead_items;     /* TIDs whose index tuples we'll delete */
     186             :     VacDeadItemsInfo *dead_items_info;
     187             : 
     188             :     BlockNumber rel_pages;      /* total number of pages */
     189             :     BlockNumber scanned_pages;  /* # pages examined (not skipped via VM) */
     190             :     BlockNumber removed_pages;  /* # pages removed by relation truncation */
     191             :     BlockNumber frozen_pages;   /* # pages with newly frozen tuples */
     192             :     BlockNumber lpdead_item_pages;  /* # pages with LP_DEAD items */
     193             :     BlockNumber missed_dead_pages;  /* # pages with missed dead tuples */
     194             :     BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
     195             : 
     196             :     /* Statistics output by us, for table */
     197             :     double      new_rel_tuples; /* new estimated total # of tuples */
     198             :     double      new_live_tuples;    /* new estimated total # of live tuples */
     199             :     /* Statistics output by index AMs */
     200             :     IndexBulkDeleteResult **indstats;
     201             : 
     202             :     /* Instrumentation counters */
     203             :     int         num_index_scans;
     204             :     /* Counters that follow are only for scanned_pages */
     205             :     int64       tuples_deleted; /* # deleted from table */
     206             :     int64       tuples_frozen;  /* # newly frozen */
     207             :     int64       lpdead_items;   /* # deleted from indexes */
     208             :     int64       live_tuples;    /* # live tuples remaining */
     209             :     int64       recently_dead_tuples;   /* # dead, but not yet removable */
     210             :     int64       missed_dead_tuples; /* # removable, but not removed */
     211             : 
     212             :     /* State maintained by heap_vac_scan_next_block() */
     213             :     BlockNumber current_block;  /* last block returned */
     214             :     BlockNumber next_unskippable_block; /* next unskippable block */
     215             :     bool        next_unskippable_allvis;    /* its visibility status */
     216             :     Buffer      next_unskippable_vmbuffer;  /* buffer containing its VM bit */
     217             : } LVRelState;
     218             : 
     219             : /* Struct for saving and restoring vacuum error information. */
     220             : typedef struct LVSavedErrInfo
     221             : {
     222             :     BlockNumber blkno;
     223             :     OffsetNumber offnum;
     224             :     VacErrPhase phase;
     225             : } LVSavedErrInfo;
     226             : 
     227             : 
     228             : /* non-export function prototypes */
     229             : static void lazy_scan_heap(LVRelState *vacrel);
     230             : static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
     231             :                                      bool *all_visible_according_to_vm);
     232             : static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
     233             : static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
     234             :                                    BlockNumber blkno, Page page,
     235             :                                    bool sharelock, Buffer vmbuffer);
     236             : static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
     237             :                             BlockNumber blkno, Page page,
     238             :                             Buffer vmbuffer, bool all_visible_according_to_vm,
     239             :                             bool *has_lpdead_items);
     240             : static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
     241             :                               BlockNumber blkno, Page page,
     242             :                               bool *has_lpdead_items);
     243             : static void lazy_vacuum(LVRelState *vacrel);
     244             : static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
     245             : static void lazy_vacuum_heap_rel(LVRelState *vacrel);
     246             : static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
     247             :                                   Buffer buffer, OffsetNumber *deadoffsets,
     248             :                                   int num_offsets, Buffer vmbuffer);
     249             : static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
     250             : static void lazy_cleanup_all_indexes(LVRelState *vacrel);
     251             : static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
     252             :                                                     IndexBulkDeleteResult *istat,
     253             :                                                     double reltuples,
     254             :                                                     LVRelState *vacrel);
     255             : static IndexBulkDeleteResult *lazy_cleanup_one_index(Relation indrel,
     256             :                                                      IndexBulkDeleteResult *istat,
     257             :                                                      double reltuples,
     258             :                                                      bool estimated_count,
     259             :                                                      LVRelState *vacrel);
     260             : static bool should_attempt_truncation(LVRelState *vacrel);
     261             : static void lazy_truncate_heap(LVRelState *vacrel);
     262             : static BlockNumber count_nondeletable_pages(LVRelState *vacrel,
     263             :                                             bool *lock_waiter_detected);
     264             : static void dead_items_alloc(LVRelState *vacrel, int nworkers);
     265             : static void dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
     266             :                            int num_offsets);
     267             : static void dead_items_reset(LVRelState *vacrel);
     268             : static void dead_items_cleanup(LVRelState *vacrel);
     269             : static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
     270             :                                      TransactionId *visibility_cutoff_xid, bool *all_frozen);
     271             : static void update_relstats_all_indexes(LVRelState *vacrel);
     272             : static void vacuum_error_callback(void *arg);
     273             : static void update_vacuum_error_info(LVRelState *vacrel,
     274             :                                      LVSavedErrInfo *saved_vacrel,
     275             :                                      int phase, BlockNumber blkno,
     276             :                                      OffsetNumber offnum);
     277             : static void restore_vacuum_error_info(LVRelState *vacrel,
     278             :                                       const LVSavedErrInfo *saved_vacrel);
     279             : 
     280             : 
     281             : /*
     282             :  *  heap_vacuum_rel() -- perform VACUUM for one heap relation
     283             :  *
     284             :  *      This routine sets things up for and then calls lazy_scan_heap, where
     285             :  *      almost all work actually takes place.  Finalizes everything after call
     286             :  *      returns by managing relation truncation and updating rel's pg_class
     287             :  *      entry. (Also updates pg_class entries for any indexes that need it.)
     288             :  *
     289             :  *      At entry, we have already established a transaction and opened
     290             :  *      and locked the relation.
     291             :  */
     292             : void
     293       97130 : heap_vacuum_rel(Relation rel, VacuumParams *params,
     294             :                 BufferAccessStrategy bstrategy)
     295             : {
     296             :     LVRelState *vacrel;
     297             :     bool        verbose,
     298             :                 instrument,
     299             :                 skipwithvm,
     300             :                 frozenxid_updated,
     301             :                 minmulti_updated;
     302             :     BlockNumber orig_rel_pages,
     303             :                 new_rel_pages,
     304             :                 new_rel_allvisible;
     305             :     PGRUsage    ru0;
     306       97130 :     TimestampTz starttime = 0;
     307       97130 :     PgStat_Counter startreadtime = 0,
     308       97130 :                 startwritetime = 0;
     309       97130 :     WalUsage    startwalusage = pgWalUsage;
     310       97130 :     BufferUsage startbufferusage = pgBufferUsage;
     311             :     ErrorContextCallback errcallback;
     312       97130 :     char      **indnames = NULL;
     313             : 
     314       97130 :     verbose = (params->options & VACOPT_VERBOSE) != 0;
     315      173518 :     instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
     316       76388 :                               params->log_min_duration >= 0));
     317       97130 :     if (instrument)
     318             :     {
     319       76408 :         pg_rusage_init(&ru0);
     320       76408 :         starttime = GetCurrentTimestamp();
     321       76408 :         if (track_io_timing)
     322             :         {
     323           0 :             startreadtime = pgStatBlockReadTime;
     324           0 :             startwritetime = pgStatBlockWriteTime;
     325             :         }
     326             :     }
     327             : 
     328       97130 :     pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
     329             :                                   RelationGetRelid(rel));
     330             : 
     331             :     /*
     332             :      * Setup error traceback support for ereport() first.  The idea is to set
     333             :      * up an error context callback to display additional information on any
     334             :      * error during a vacuum.  During different phases of vacuum, we update
     335             :      * the state so that the error context callback always display current
     336             :      * information.
     337             :      *
     338             :      * Copy the names of heap rel into local memory for error reporting
     339             :      * purposes, too.  It isn't always safe to assume that we can get the name
     340             :      * of each rel.  It's convenient for code in lazy_scan_heap to always use
     341             :      * these temp copies.
     342             :      */
     343       97130 :     vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
     344       97130 :     vacrel->dbname = get_database_name(MyDatabaseId);
     345       97130 :     vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
     346       97130 :     vacrel->relname = pstrdup(RelationGetRelationName(rel));
     347       97130 :     vacrel->indname = NULL;
     348       97130 :     vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
     349       97130 :     vacrel->verbose = verbose;
     350       97130 :     errcallback.callback = vacuum_error_callback;
     351       97130 :     errcallback.arg = vacrel;
     352       97130 :     errcallback.previous = error_context_stack;
     353       97130 :     error_context_stack = &errcallback;
     354             : 
     355             :     /* Set up high level stuff about rel and its indexes */
     356       97130 :     vacrel->rel = rel;
     357       97130 :     vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
     358             :                      &vacrel->indrels);
     359       97130 :     vacrel->bstrategy = bstrategy;
     360       97130 :     if (instrument && vacrel->nindexes > 0)
     361             :     {
     362             :         /* Copy index names used by instrumentation (not error reporting) */
     363       73210 :         indnames = palloc(sizeof(char *) * vacrel->nindexes);
     364      188138 :         for (int i = 0; i < vacrel->nindexes; i++)
     365      114928 :             indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
     366             :     }
     367             : 
     368             :     /*
     369             :      * The index_cleanup param either disables index vacuuming and cleanup or
     370             :      * forces it to go ahead when we would otherwise apply the index bypass
     371             :      * optimization.  The default is 'auto', which leaves the final decision
     372             :      * up to lazy_vacuum().
     373             :      *
     374             :      * The truncate param allows user to avoid attempting relation truncation,
     375             :      * though it can't force truncation to happen.
     376             :      */
     377             :     Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
     378             :     Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
     379             :            params->truncate != VACOPTVALUE_AUTO);
     380             : 
     381             :     /*
     382             :      * While VacuumFailSafeActive is reset to false before calling this, we
     383             :      * still need to reset it here due to recursive calls.
     384             :      */
     385       97130 :     VacuumFailsafeActive = false;
     386       97130 :     vacrel->consider_bypass_optimization = true;
     387       97130 :     vacrel->do_index_vacuuming = true;
     388       97130 :     vacrel->do_index_cleanup = true;
     389       97130 :     vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
     390       97130 :     if (params->index_cleanup == VACOPTVALUE_DISABLED)
     391             :     {
     392             :         /* Force disable index vacuuming up-front */
     393         264 :         vacrel->do_index_vacuuming = false;
     394         264 :         vacrel->do_index_cleanup = false;
     395             :     }
     396       96866 :     else if (params->index_cleanup == VACOPTVALUE_ENABLED)
     397             :     {
     398             :         /* Force index vacuuming.  Note that failsafe can still bypass. */
     399          32 :         vacrel->consider_bypass_optimization = false;
     400             :     }
     401             :     else
     402             :     {
     403             :         /* Default/auto, make all decisions dynamically */
     404             :         Assert(params->index_cleanup == VACOPTVALUE_AUTO);
     405             :     }
     406             : 
     407             :     /* Initialize page counters explicitly (be tidy) */
     408       97130 :     vacrel->scanned_pages = 0;
     409       97130 :     vacrel->removed_pages = 0;
     410       97130 :     vacrel->frozen_pages = 0;
     411       97130 :     vacrel->lpdead_item_pages = 0;
     412       97130 :     vacrel->missed_dead_pages = 0;
     413       97130 :     vacrel->nonempty_pages = 0;
     414             :     /* dead_items_alloc allocates vacrel->dead_items later on */
     415             : 
     416             :     /* Allocate/initialize output statistics state */
     417       97130 :     vacrel->new_rel_tuples = 0;
     418       97130 :     vacrel->new_live_tuples = 0;
     419       97130 :     vacrel->indstats = (IndexBulkDeleteResult **)
     420       97130 :         palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
     421             : 
     422             :     /* Initialize remaining counters (be tidy) */
     423       97130 :     vacrel->num_index_scans = 0;
     424       97130 :     vacrel->tuples_deleted = 0;
     425       97130 :     vacrel->tuples_frozen = 0;
     426       97130 :     vacrel->lpdead_items = 0;
     427       97130 :     vacrel->live_tuples = 0;
     428       97130 :     vacrel->recently_dead_tuples = 0;
     429       97130 :     vacrel->missed_dead_tuples = 0;
     430             : 
     431             :     /*
     432             :      * Get cutoffs that determine which deleted tuples are considered DEAD,
     433             :      * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze.  Then determine
     434             :      * the extent of the blocks that we'll scan in lazy_scan_heap.  It has to
     435             :      * happen in this order to ensure that the OldestXmin cutoff field works
     436             :      * as an upper bound on the XIDs stored in the pages we'll actually scan
     437             :      * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
     438             :      *
     439             :      * Next acquire vistest, a related cutoff that's used in pruning.  We use
     440             :      * vistest in combination with OldestXmin to ensure that
     441             :      * heap_page_prune_and_freeze() always removes any deleted tuple whose
     442             :      * xmax is < OldestXmin.  lazy_scan_prune must never become confused about
     443             :      * whether a tuple should be frozen or removed.  (In the future we might
     444             :      * want to teach lazy_scan_prune to recompute vistest from time to time,
     445             :      * to increase the number of dead tuples it can prune away.)
     446             :      */
     447       97130 :     vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
     448       97130 :     vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
     449       97130 :     vacrel->vistest = GlobalVisTestFor(rel);
     450             :     /* Initialize state used to track oldest extant XID/MXID */
     451       97130 :     vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
     452       97130 :     vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
     453       97130 :     vacrel->skippedallvis = false;
     454       97130 :     skipwithvm = true;
     455       97130 :     if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
     456             :     {
     457             :         /*
     458             :          * Force aggressive mode, and disable skipping blocks using the
     459             :          * visibility map (even those set all-frozen)
     460             :          */
     461         298 :         vacrel->aggressive = true;
     462         298 :         skipwithvm = false;
     463             :     }
     464             : 
     465       97130 :     vacrel->skipwithvm = skipwithvm;
     466             : 
     467       97130 :     if (verbose)
     468             :     {
     469          20 :         if (vacrel->aggressive)
     470           0 :             ereport(INFO,
     471             :                     (errmsg("aggressively vacuuming \"%s.%s.%s\"",
     472             :                             vacrel->dbname, vacrel->relnamespace,
     473             :                             vacrel->relname)));
     474             :         else
     475          20 :             ereport(INFO,
     476             :                     (errmsg("vacuuming \"%s.%s.%s\"",
     477             :                             vacrel->dbname, vacrel->relnamespace,
     478             :                             vacrel->relname)));
     479             :     }
     480             : 
     481             :     /*
     482             :      * Allocate dead_items memory using dead_items_alloc.  This handles
     483             :      * parallel VACUUM initialization as part of allocating shared memory
     484             :      * space used for dead_items.  (But do a failsafe precheck first, to
     485             :      * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
     486             :      * is already dangerously old.)
     487             :      */
     488       97130 :     lazy_check_wraparound_failsafe(vacrel);
     489       97130 :     dead_items_alloc(vacrel, params->nworkers);
     490             : 
     491             :     /*
     492             :      * Call lazy_scan_heap to perform all required heap pruning, index
     493             :      * vacuuming, and heap vacuuming (plus related processing)
     494             :      */
     495       97130 :     lazy_scan_heap(vacrel);
     496             : 
     497             :     /*
     498             :      * Free resources managed by dead_items_alloc.  This ends parallel mode in
     499             :      * passing when necessary.
     500             :      */
     501       97130 :     dead_items_cleanup(vacrel);
     502             :     Assert(!IsInParallelMode());
     503             : 
     504             :     /*
     505             :      * Update pg_class entries for each of rel's indexes where appropriate.
     506             :      *
     507             :      * Unlike the later update to rel's pg_class entry, this is not critical.
     508             :      * Maintains relpages/reltuples statistics used by the planner only.
     509             :      */
     510       97130 :     if (vacrel->do_index_cleanup)
     511       81978 :         update_relstats_all_indexes(vacrel);
     512             : 
     513             :     /* Done with rel's indexes */
     514       97130 :     vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
     515             : 
     516             :     /* Optionally truncate rel */
     517       97130 :     if (should_attempt_truncation(vacrel))
     518         268 :         lazy_truncate_heap(vacrel);
     519             : 
     520             :     /* Pop the error context stack */
     521       97130 :     error_context_stack = errcallback.previous;
     522             : 
     523             :     /* Report that we are now doing final cleanup */
     524       97130 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     525             :                                  PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
     526             : 
     527             :     /*
     528             :      * Prepare to update rel's pg_class entry.
     529             :      *
     530             :      * Aggressive VACUUMs must always be able to advance relfrozenxid to a
     531             :      * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
     532             :      * Non-aggressive VACUUMs may advance them by any amount, or not at all.
     533             :      */
     534             :     Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
     535             :            TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
     536             :                                          vacrel->cutoffs.relfrozenxid,
     537             :                                          vacrel->NewRelfrozenXid));
     538             :     Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
     539             :            MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
     540             :                                        vacrel->cutoffs.relminmxid,
     541             :                                        vacrel->NewRelminMxid));
     542       97130 :     if (vacrel->skippedallvis)
     543             :     {
     544             :         /*
     545             :          * Must keep original relfrozenxid in a non-aggressive VACUUM that
     546             :          * chose to skip an all-visible page range.  The state that tracks new
     547             :          * values will have missed unfrozen XIDs from the pages we skipped.
     548             :          */
     549             :         Assert(!vacrel->aggressive);
     550          50 :         vacrel->NewRelfrozenXid = InvalidTransactionId;
     551          50 :         vacrel->NewRelminMxid = InvalidMultiXactId;
     552             :     }
     553             : 
     554             :     /*
     555             :      * For safety, clamp relallvisible to be not more than what we're setting
     556             :      * pg_class.relpages to
     557             :      */
     558       97130 :     new_rel_pages = vacrel->rel_pages;   /* After possible rel truncation */
     559       97130 :     visibilitymap_count(rel, &new_rel_allvisible, NULL);
     560       97130 :     if (new_rel_allvisible > new_rel_pages)
     561           0 :         new_rel_allvisible = new_rel_pages;
     562             : 
     563             :     /*
     564             :      * Now actually update rel's pg_class entry.
     565             :      *
     566             :      * In principle new_live_tuples could be -1 indicating that we (still)
     567             :      * don't know the tuple count.  In practice that can't happen, since we
     568             :      * scan every page that isn't skipped using the visibility map.
     569             :      */
     570       97130 :     vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
     571       97130 :                         new_rel_allvisible, vacrel->nindexes > 0,
     572             :                         vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
     573             :                         &frozenxid_updated, &minmulti_updated, false);
     574             : 
     575             :     /*
     576             :      * Report results to the cumulative stats system, too.
     577             :      *
     578             :      * Deliberately avoid telling the stats system about LP_DEAD items that
     579             :      * remain in the table due to VACUUM bypassing index and heap vacuuming.
     580             :      * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
     581             :      * It seems like a good idea to err on the side of not vacuuming again too
     582             :      * soon in cases where the failsafe prevented significant amounts of heap
     583             :      * vacuuming.
     584             :      */
     585       58426 :     pgstat_report_vacuum(RelationGetRelid(rel),
     586       97130 :                          rel->rd_rel->relisshared,
     587       38704 :                          Max(vacrel->new_live_tuples, 0),
     588       97130 :                          vacrel->recently_dead_tuples +
     589       97130 :                          vacrel->missed_dead_tuples);
     590       97130 :     pgstat_progress_end_command();
     591             : 
     592       97130 :     if (instrument)
     593             :     {
     594       76408 :         TimestampTz endtime = GetCurrentTimestamp();
     595             : 
     596       76444 :         if (verbose || params->log_min_duration == 0 ||
     597          36 :             TimestampDifferenceExceeds(starttime, endtime,
     598             :                                        params->log_min_duration))
     599             :         {
     600             :             long        secs_dur;
     601             :             int         usecs_dur;
     602             :             WalUsage    walusage;
     603             :             BufferUsage bufferusage;
     604             :             StringInfoData buf;
     605             :             char       *msgfmt;
     606             :             int32       diff;
     607       76372 :             double      read_rate = 0,
     608       76372 :                         write_rate = 0;
     609             :             int64       total_blks_hit;
     610             :             int64       total_blks_read;
     611             :             int64       total_blks_dirtied;
     612             : 
     613       76372 :             TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
     614       76372 :             memset(&walusage, 0, sizeof(WalUsage));
     615       76372 :             WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
     616       76372 :             memset(&bufferusage, 0, sizeof(BufferUsage));
     617       76372 :             BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
     618             : 
     619       76372 :             total_blks_hit = bufferusage.shared_blks_hit +
     620       76372 :                 bufferusage.local_blks_hit;
     621       76372 :             total_blks_read = bufferusage.shared_blks_read +
     622       76372 :                 bufferusage.local_blks_read;
     623       76372 :             total_blks_dirtied = bufferusage.shared_blks_dirtied +
     624       76372 :                 bufferusage.local_blks_dirtied;
     625             : 
     626       76372 :             initStringInfo(&buf);
     627       76372 :             if (verbose)
     628             :             {
     629             :                 /*
     630             :                  * Aggressiveness already reported earlier, in dedicated
     631             :                  * VACUUM VERBOSE ereport
     632             :                  */
     633             :                 Assert(!params->is_wraparound);
     634          20 :                 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
     635             :             }
     636       76352 :             else if (params->is_wraparound)
     637             :             {
     638             :                 /*
     639             :                  * While it's possible for a VACUUM to be both is_wraparound
     640             :                  * and !aggressive, that's just a corner-case -- is_wraparound
     641             :                  * implies aggressive.  Produce distinct output for the corner
     642             :                  * case all the same, just in case.
     643             :                  */
     644       76306 :                 if (vacrel->aggressive)
     645       76306 :                     msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     646             :                 else
     647           0 :                     msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     648             :             }
     649             :             else
     650             :             {
     651          46 :                 if (vacrel->aggressive)
     652          12 :                     msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
     653             :                 else
     654          34 :                     msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
     655             :             }
     656       76372 :             appendStringInfo(&buf, msgfmt,
     657             :                              vacrel->dbname,
     658             :                              vacrel->relnamespace,
     659             :                              vacrel->relname,
     660             :                              vacrel->num_index_scans);
     661      106466 :             appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
     662             :                              vacrel->removed_pages,
     663             :                              new_rel_pages,
     664             :                              vacrel->scanned_pages,
     665             :                              orig_rel_pages == 0 ? 100.0 :
     666       30094 :                              100.0 * vacrel->scanned_pages / orig_rel_pages);
     667       76372 :             appendStringInfo(&buf,
     668       76372 :                              _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
     669       76372 :                              (long long) vacrel->tuples_deleted,
     670       76372 :                              (long long) vacrel->new_rel_tuples,
     671       76372 :                              (long long) vacrel->recently_dead_tuples);
     672       76372 :             if (vacrel->missed_dead_tuples > 0)
     673           0 :                 appendStringInfo(&buf,
     674           0 :                                  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
     675           0 :                                  (long long) vacrel->missed_dead_tuples,
     676             :                                  vacrel->missed_dead_pages);
     677       76372 :             diff = (int32) (ReadNextTransactionId() -
     678       76372 :                             vacrel->cutoffs.OldestXmin);
     679       76372 :             appendStringInfo(&buf,
     680       76372 :                              _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
     681             :                              vacrel->cutoffs.OldestXmin, diff);
     682       76372 :             if (frozenxid_updated)
     683             :             {
     684       32756 :                 diff = (int32) (vacrel->NewRelfrozenXid -
     685       32756 :                                 vacrel->cutoffs.relfrozenxid);
     686       32756 :                 appendStringInfo(&buf,
     687       32756 :                                  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
     688             :                                  vacrel->NewRelfrozenXid, diff);
     689             :             }
     690       76372 :             if (minmulti_updated)
     691             :             {
     692          26 :                 diff = (int32) (vacrel->NewRelminMxid -
     693          26 :                                 vacrel->cutoffs.relminmxid);
     694          26 :                 appendStringInfo(&buf,
     695          26 :                                  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
     696             :                                  vacrel->NewRelminMxid, diff);
     697             :             }
     698       76372 :             appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
     699             :                              vacrel->frozen_pages,
     700             :                              orig_rel_pages == 0 ? 100.0 :
     701       30094 :                              100.0 * vacrel->frozen_pages / orig_rel_pages,
     702       76372 :                              (long long) vacrel->tuples_frozen);
     703       76372 :             if (vacrel->do_index_vacuuming)
     704             :             {
     705       61704 :                 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
     706       61656 :                     appendStringInfoString(&buf, _("index scan not needed: "));
     707             :                 else
     708          48 :                     appendStringInfoString(&buf, _("index scan needed: "));
     709             : 
     710       61704 :                 msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
     711             :             }
     712             :             else
     713             :             {
     714       14668 :                 if (!VacuumFailsafeActive)
     715           0 :                     appendStringInfoString(&buf, _("index scan bypassed: "));
     716             :                 else
     717       14668 :                     appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
     718             : 
     719       14668 :                 msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
     720             :             }
     721       76372 :             appendStringInfo(&buf, msgfmt,
     722             :                              vacrel->lpdead_item_pages,
     723             :                              orig_rel_pages == 0 ? 100.0 :
     724       30094 :                              100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
     725       76372 :                              (long long) vacrel->lpdead_items);
     726      191214 :             for (int i = 0; i < vacrel->nindexes; i++)
     727             :             {
     728      114842 :                 IndexBulkDeleteResult *istat = vacrel->indstats[i];
     729             : 
     730      114842 :                 if (!istat)
     731      114750 :                     continue;
     732             : 
     733          92 :                 appendStringInfo(&buf,
     734          92 :                                  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
     735          92 :                                  indnames[i],
     736             :                                  istat->num_pages,
     737             :                                  istat->pages_newly_deleted,
     738             :                                  istat->pages_deleted,
     739             :                                  istat->pages_free);
     740             :             }
     741       76372 :             if (track_io_timing)
     742             :             {
     743           0 :                 double      read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
     744           0 :                 double      write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
     745             : 
     746           0 :                 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
     747             :                                  read_ms, write_ms);
     748             :             }
     749       76372 :             if (secs_dur > 0 || usecs_dur > 0)
     750             :             {
     751       76372 :                 read_rate = (double) BLCKSZ * total_blks_read /
     752       76372 :                     (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
     753       76372 :                 write_rate = (double) BLCKSZ * total_blks_dirtied /
     754       76372 :                     (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
     755             :             }
     756       76372 :             appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     757             :                              read_rate, write_rate);
     758       76372 :             appendStringInfo(&buf,
     759       76372 :                              _("buffer usage: %lld hits, %lld reads, %lld dirtied\n"),
     760             :                              (long long) total_blks_hit,
     761             :                              (long long) total_blks_read,
     762             :                              (long long) total_blks_dirtied);
     763       76372 :             appendStringInfo(&buf,
     764       76372 :                              _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
     765       76372 :                              (long long) walusage.wal_records,
     766       76372 :                              (long long) walusage.wal_fpi,
     767       76372 :                              (unsigned long long) walusage.wal_bytes);
     768       76372 :             appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
     769             : 
     770       76372 :             ereport(verbose ? INFO : LOG,
     771             :                     (errmsg_internal("%s", buf.data)));
     772       76372 :             pfree(buf.data);
     773             :         }
     774             :     }
     775             : 
     776             :     /* Cleanup index statistics and index names */
     777      241400 :     for (int i = 0; i < vacrel->nindexes; i++)
     778             :     {
     779      144270 :         if (vacrel->indstats[i])
     780        2192 :             pfree(vacrel->indstats[i]);
     781             : 
     782      144270 :         if (instrument)
     783      114928 :             pfree(indnames[i]);
     784             :     }
     785       97130 : }
     786             : 
     787             : /*
     788             :  *  lazy_scan_heap() -- workhorse function for VACUUM
     789             :  *
     790             :  *      This routine prunes each page in the heap, and considers the need to
     791             :  *      freeze remaining tuples with storage (not including pages that can be
     792             :  *      skipped using the visibility map).  Also performs related maintenance
     793             :  *      of the FSM and visibility map.  These steps all take place during an
     794             :  *      initial pass over the target heap relation.
     795             :  *
     796             :  *      Also invokes lazy_vacuum_all_indexes to vacuum indexes, which largely
     797             :  *      consists of deleting index tuples that point to LP_DEAD items left in
     798             :  *      heap pages following pruning.  Earlier initial pass over the heap will
     799             :  *      have collected the TIDs whose index tuples need to be removed.
     800             :  *
     801             :  *      Finally, invokes lazy_vacuum_heap_rel to vacuum heap pages, which
     802             :  *      largely consists of marking LP_DEAD items (from vacrel->dead_items)
     803             :  *      as LP_UNUSED.  This has to happen in a second, final pass over the
     804             :  *      heap, to preserve a basic invariant that all index AMs rely on: no
     805             :  *      extant index tuple can ever be allowed to contain a TID that points to
     806             :  *      an LP_UNUSED line pointer in the heap.  We must disallow premature
     807             :  *      recycling of line pointers to avoid index scans that get confused
     808             :  *      about which TID points to which tuple immediately after recycling.
     809             :  *      (Actually, this isn't a concern when target heap relation happens to
     810             :  *      have no indexes, which allows us to safely apply the one-pass strategy
     811             :  *      as an optimization).
     812             :  *
     813             :  *      In practice we often have enough space to fit all TIDs, and so won't
     814             :  *      need to call lazy_vacuum more than once, after our initial pass over
     815             :  *      the heap has totally finished.  Otherwise things are slightly more
     816             :  *      complicated: our "initial pass" over the heap applies only to those
     817             :  *      pages that were pruned before we needed to call lazy_vacuum, and our
     818             :  *      "final pass" over the heap only vacuums these same heap pages.
     819             :  *      However, we process indexes in full every time lazy_vacuum is called,
     820             :  *      which makes index processing very inefficient when memory is in short
     821             :  *      supply.
     822             :  */
     823             : static void
     824       97130 : lazy_scan_heap(LVRelState *vacrel)
     825             : {
     826       97130 :     BlockNumber rel_pages = vacrel->rel_pages,
     827             :                 blkno,
     828       97130 :                 next_fsm_block_to_vacuum = 0;
     829             :     bool        all_visible_according_to_vm;
     830             : 
     831       97130 :     TidStore   *dead_items = vacrel->dead_items;
     832       97130 :     VacDeadItemsInfo *dead_items_info = vacrel->dead_items_info;
     833       97130 :     Buffer      vmbuffer = InvalidBuffer;
     834       97130 :     const int   initprog_index[] = {
     835             :         PROGRESS_VACUUM_PHASE,
     836             :         PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
     837             :         PROGRESS_VACUUM_MAX_DEAD_TUPLE_BYTES
     838             :     };
     839             :     int64       initprog_val[3];
     840             : 
     841             :     /* Report that we're scanning the heap, advertising total # of blocks */
     842       97130 :     initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
     843       97130 :     initprog_val[1] = rel_pages;
     844       97130 :     initprog_val[2] = dead_items_info->max_bytes;
     845       97130 :     pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
     846             : 
     847             :     /* Initialize for the first heap_vac_scan_next_block() call */
     848       97130 :     vacrel->current_block = InvalidBlockNumber;
     849       97130 :     vacrel->next_unskippable_block = InvalidBlockNumber;
     850       97130 :     vacrel->next_unskippable_allvis = false;
     851       97130 :     vacrel->next_unskippable_vmbuffer = InvalidBuffer;
     852             : 
     853      507676 :     while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
     854             :     {
     855             :         Buffer      buf;
     856             :         Page        page;
     857             :         bool        has_lpdead_items;
     858      410546 :         bool        got_cleanup_lock = false;
     859             : 
     860      410546 :         vacrel->scanned_pages++;
     861             : 
     862             :         /* Report as block scanned, update error traceback information */
     863      410546 :         pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
     864      410546 :         update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
     865             :                                  blkno, InvalidOffsetNumber);
     866             : 
     867      410546 :         vacuum_delay_point();
     868             : 
     869             :         /*
     870             :          * Regularly check if wraparound failsafe should trigger.
     871             :          *
     872             :          * There is a similar check inside lazy_vacuum_all_indexes(), but
     873             :          * relfrozenxid might start to look dangerously old before we reach
     874             :          * that point.  This check also provides failsafe coverage for the
     875             :          * one-pass strategy, and the two-pass strategy with the index_cleanup
     876             :          * param set to 'off'.
     877             :          */
     878      410546 :         if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
     879           0 :             lazy_check_wraparound_failsafe(vacrel);
     880             : 
     881             :         /*
     882             :          * Consider if we definitely have enough space to process TIDs on page
     883             :          * already.  If we are close to overrunning the available space for
     884             :          * dead_items TIDs, pause and do a cycle of vacuuming before we tackle
     885             :          * this page.
     886             :          */
     887      410546 :         if (TidStoreMemoryUsage(dead_items) > dead_items_info->max_bytes)
     888             :         {
     889             :             /*
     890             :              * Before beginning index vacuuming, we release any pin we may
     891             :              * hold on the visibility map page.  This isn't necessary for
     892             :              * correctness, but we do it anyway to avoid holding the pin
     893             :              * across a lengthy, unrelated operation.
     894             :              */
     895           0 :             if (BufferIsValid(vmbuffer))
     896             :             {
     897           0 :                 ReleaseBuffer(vmbuffer);
     898           0 :                 vmbuffer = InvalidBuffer;
     899             :             }
     900             : 
     901             :             /* Perform a round of index and heap vacuuming */
     902           0 :             vacrel->consider_bypass_optimization = false;
     903           0 :             lazy_vacuum(vacrel);
     904             : 
     905             :             /*
     906             :              * Vacuum the Free Space Map to make newly-freed space visible on
     907             :              * upper-level FSM pages.  Note we have not yet processed blkno.
     908             :              */
     909           0 :             FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
     910             :                                     blkno);
     911           0 :             next_fsm_block_to_vacuum = blkno;
     912             : 
     913             :             /* Report that we are once again scanning the heap */
     914           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     915             :                                          PROGRESS_VACUUM_PHASE_SCAN_HEAP);
     916             :         }
     917             : 
     918             :         /*
     919             :          * Pin the visibility map page in case we need to mark the page
     920             :          * all-visible.  In most cases this will be very cheap, because we'll
     921             :          * already have the correct page pinned anyway.
     922             :          */
     923      410546 :         visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
     924             : 
     925      410546 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     926             :                                  vacrel->bstrategy);
     927      410546 :         page = BufferGetPage(buf);
     928             : 
     929             :         /*
     930             :          * We need a buffer cleanup lock to prune HOT chains and defragment
     931             :          * the page in lazy_scan_prune.  But when it's not possible to acquire
     932             :          * a cleanup lock right away, we may be able to settle for reduced
     933             :          * processing using lazy_scan_noprune.
     934             :          */
     935      410546 :         got_cleanup_lock = ConditionalLockBufferForCleanup(buf);
     936             : 
     937      410546 :         if (!got_cleanup_lock)
     938          10 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     939             : 
     940             :         /* Check for new or empty pages before lazy_scan_[no]prune call */
     941      410546 :         if (lazy_scan_new_or_empty(vacrel, buf, blkno, page, !got_cleanup_lock,
     942      410546 :                                    vmbuffer))
     943             :         {
     944             :             /* Processed as new/empty page (lock and pin released) */
     945        1212 :             continue;
     946             :         }
     947             : 
     948             :         /*
     949             :          * If we didn't get the cleanup lock, we can still collect LP_DEAD
     950             :          * items in the dead_items area for later vacuuming, count live and
     951             :          * recently dead tuples for vacuum logging, and determine if this
     952             :          * block could later be truncated. If we encounter any xid/mxids that
     953             :          * require advancing the relfrozenxid/relminxid, we'll have to wait
     954             :          * for a cleanup lock and call lazy_scan_prune().
     955             :          */
     956      409334 :         if (!got_cleanup_lock &&
     957          10 :             !lazy_scan_noprune(vacrel, buf, blkno, page, &has_lpdead_items))
     958             :         {
     959             :             /*
     960             :              * lazy_scan_noprune could not do all required processing.  Wait
     961             :              * for a cleanup lock, and call lazy_scan_prune in the usual way.
     962             :              */
     963             :             Assert(vacrel->aggressive);
     964           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     965           0 :             LockBufferForCleanup(buf);
     966           0 :             got_cleanup_lock = true;
     967             :         }
     968             : 
     969             :         /*
     970             :          * If we have a cleanup lock, we must now prune, freeze, and count
     971             :          * tuples. We may have acquired the cleanup lock originally, or we may
     972             :          * have gone back and acquired it after lazy_scan_noprune() returned
     973             :          * false. Either way, the page hasn't been processed yet.
     974             :          *
     975             :          * Like lazy_scan_noprune(), lazy_scan_prune() will count
     976             :          * recently_dead_tuples and live tuples for vacuum logging, determine
     977             :          * if the block can later be truncated, and accumulate the details of
     978             :          * remaining LP_DEAD line pointers on the page into dead_items. These
     979             :          * dead items include those pruned by lazy_scan_prune() as well as
     980             :          * line pointers previously marked LP_DEAD.
     981             :          */
     982      409334 :         if (got_cleanup_lock)
     983      409324 :             lazy_scan_prune(vacrel, buf, blkno, page,
     984             :                             vmbuffer, all_visible_according_to_vm,
     985             :                             &has_lpdead_items);
     986             : 
     987             :         /*
     988             :          * Now drop the buffer lock and, potentially, update the FSM.
     989             :          *
     990             :          * Our goal is to update the freespace map the last time we touch the
     991             :          * page. If we'll process a block in the second pass, we may free up
     992             :          * additional space on the page, so it is better to update the FSM
     993             :          * after the second pass. If the relation has no indexes, or if index
     994             :          * vacuuming is disabled, there will be no second heap pass; if this
     995             :          * particular page has no dead items, the second heap pass will not
     996             :          * touch this page. So, in those cases, update the FSM now.
     997             :          *
     998             :          * Note: In corner cases, it's possible to miss updating the FSM
     999             :          * entirely. If index vacuuming is currently enabled, we'll skip the
    1000             :          * FSM update now. But if failsafe mode is later activated, or there
    1001             :          * are so few dead tuples that index vacuuming is bypassed, there will
    1002             :          * also be no opportunity to update the FSM later, because we'll never
    1003             :          * revisit this page. Since updating the FSM is desirable but not
    1004             :          * absolutely required, that's OK.
    1005             :          */
    1006      409334 :         if (vacrel->nindexes == 0
    1007      390582 :             || !vacrel->do_index_vacuuming
    1008      313020 :             || !has_lpdead_items)
    1009      387300 :         {
    1010      387300 :             Size        freespace = PageGetHeapFreeSpace(page);
    1011             : 
    1012      387300 :             UnlockReleaseBuffer(buf);
    1013      387300 :             RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1014             : 
    1015             :             /*
    1016             :              * Periodically perform FSM vacuuming to make newly-freed space
    1017             :              * visible on upper FSM pages. This is done after vacuuming if the
    1018             :              * table has indexes. There will only be newly-freed space if we
    1019             :              * held the cleanup lock and lazy_scan_prune() was called.
    1020             :              */
    1021      387300 :             if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
    1022           0 :                 blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
    1023             :             {
    1024           0 :                 FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
    1025             :                                         blkno);
    1026           0 :                 next_fsm_block_to_vacuum = blkno;
    1027             :             }
    1028             :         }
    1029             :         else
    1030       22034 :             UnlockReleaseBuffer(buf);
    1031             :     }
    1032             : 
    1033       97130 :     vacrel->blkno = InvalidBlockNumber;
    1034       97130 :     if (BufferIsValid(vmbuffer))
    1035       38832 :         ReleaseBuffer(vmbuffer);
    1036             : 
    1037             :     /* report that everything is now scanned */
    1038       97130 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
    1039             : 
    1040             :     /* now we can compute the new value for pg_class.reltuples */
    1041      194260 :     vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
    1042             :                                                      vacrel->scanned_pages,
    1043       97130 :                                                      vacrel->live_tuples);
    1044             : 
    1045             :     /*
    1046             :      * Also compute the total number of surviving heap entries.  In the
    1047             :      * (unlikely) scenario that new_live_tuples is -1, take it as zero.
    1048             :      */
    1049       97130 :     vacrel->new_rel_tuples =
    1050       97130 :         Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
    1051       97130 :         vacrel->missed_dead_tuples;
    1052             : 
    1053             :     /*
    1054             :      * Do index vacuuming (call each index's ambulkdelete routine), then do
    1055             :      * related heap vacuuming
    1056             :      */
    1057       97130 :     if (dead_items_info->num_items > 0)
    1058        1012 :         lazy_vacuum(vacrel);
    1059             : 
    1060             :     /*
    1061             :      * Vacuum the remainder of the Free Space Map.  We must do this whether or
    1062             :      * not there were indexes, and whether or not we bypassed index vacuuming.
    1063             :      */
    1064       97130 :     if (blkno > next_fsm_block_to_vacuum)
    1065       38832 :         FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
    1066             : 
    1067             :     /* report all blocks vacuumed */
    1068       97130 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1069             : 
    1070             :     /* Do final index cleanup (call each index's amvacuumcleanup routine) */
    1071       97130 :     if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
    1072       77642 :         lazy_cleanup_all_indexes(vacrel);
    1073       97130 : }
    1074             : 
    1075             : /*
    1076             :  *  heap_vac_scan_next_block() -- get next block for vacuum to process
    1077             :  *
    1078             :  * lazy_scan_heap() calls here every time it needs to get the next block to
    1079             :  * prune and vacuum.  The function uses the visibility map, vacuum options,
    1080             :  * and various thresholds to skip blocks which do not need to be processed and
    1081             :  * sets blkno to the next block to process.
    1082             :  *
    1083             :  * The block number and visibility status of the next block to process are set
    1084             :  * in *blkno and *all_visible_according_to_vm.  The return value is false if
    1085             :  * there are no further blocks to process.
    1086             :  *
    1087             :  * vacrel is an in/out parameter here.  Vacuum options and information about
    1088             :  * the relation are read.  vacrel->skippedallvis is set if we skip a block
    1089             :  * that's all-visible but not all-frozen, to ensure that we don't update
    1090             :  * relfrozenxid in that case.  vacrel also holds information about the next
    1091             :  * unskippable block, as bookkeeping for this function.
    1092             :  */
    1093             : static bool
    1094      507676 : heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
    1095             :                          bool *all_visible_according_to_vm)
    1096             : {
    1097             :     BlockNumber next_block;
    1098             : 
    1099             :     /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
    1100      507676 :     next_block = vacrel->current_block + 1;
    1101             : 
    1102             :     /* Have we reached the end of the relation? */
    1103      507676 :     if (next_block >= vacrel->rel_pages)
    1104             :     {
    1105       97130 :         if (BufferIsValid(vacrel->next_unskippable_vmbuffer))
    1106             :         {
    1107       36492 :             ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
    1108       36492 :             vacrel->next_unskippable_vmbuffer = InvalidBuffer;
    1109             :         }
    1110       97130 :         *blkno = vacrel->rel_pages;
    1111       97130 :         return false;
    1112             :     }
    1113             : 
    1114             :     /*
    1115             :      * We must be in one of the three following states:
    1116             :      */
    1117      410546 :     if (next_block > vacrel->next_unskippable_block ||
    1118      145642 :         vacrel->next_unskippable_block == InvalidBlockNumber)
    1119             :     {
    1120             :         /*
    1121             :          * 1. We have just processed an unskippable block (or we're at the
    1122             :          * beginning of the scan).  Find the next unskippable block using the
    1123             :          * visibility map.
    1124             :          */
    1125             :         bool        skipsallvis;
    1126             : 
    1127      303736 :         find_next_unskippable_block(vacrel, &skipsallvis);
    1128             : 
    1129             :         /*
    1130             :          * We now know the next block that we must process.  It can be the
    1131             :          * next block after the one we just processed, or something further
    1132             :          * ahead.  If it's further ahead, we can jump to it, but we choose to
    1133             :          * do so only if we can skip at least SKIP_PAGES_THRESHOLD consecutive
    1134             :          * pages.  Since we're reading sequentially, the OS should be doing
    1135             :          * readahead for us, so there's no gain in skipping a page now and
    1136             :          * then.  Skipping such a range might even discourage sequential
    1137             :          * detection.
    1138             :          *
    1139             :          * This test also enables more frequent relfrozenxid advancement
    1140             :          * during non-aggressive VACUUMs.  If the range has any all-visible
    1141             :          * pages then skipping makes updating relfrozenxid unsafe, which is a
    1142             :          * real downside.
    1143             :          */
    1144      303736 :         if (vacrel->next_unskippable_block - next_block >= SKIP_PAGES_THRESHOLD)
    1145             :         {
    1146        3276 :             next_block = vacrel->next_unskippable_block;
    1147        3276 :             if (skipsallvis)
    1148          50 :                 vacrel->skippedallvis = true;
    1149             :         }
    1150             :     }
    1151             : 
    1152             :     /* Now we must be in one of the two remaining states: */
    1153      410546 :     if (next_block < vacrel->next_unskippable_block)
    1154             :     {
    1155             :         /*
    1156             :          * 2. We are processing a range of blocks that we could have skipped
    1157             :          * but chose not to.  We know that they are all-visible in the VM,
    1158             :          * otherwise they would've been unskippable.
    1159             :          */
    1160      106810 :         *blkno = vacrel->current_block = next_block;
    1161      106810 :         *all_visible_according_to_vm = true;
    1162      106810 :         return true;
    1163             :     }
    1164             :     else
    1165             :     {
    1166             :         /*
    1167             :          * 3. We reached the next unskippable block.  Process it.  On next
    1168             :          * iteration, we will be back in state 1.
    1169             :          */
    1170             :         Assert(next_block == vacrel->next_unskippable_block);
    1171             : 
    1172      303736 :         *blkno = vacrel->current_block = next_block;
    1173      303736 :         *all_visible_according_to_vm = vacrel->next_unskippable_allvis;
    1174      303736 :         return true;
    1175             :     }
    1176             : }
    1177             : 
    1178             : /*
    1179             :  * Find the next unskippable block in a vacuum scan using the visibility map.
    1180             :  * The next unskippable block and its visibility information is updated in
    1181             :  * vacrel.
    1182             :  *
    1183             :  * Note: our opinion of which blocks can be skipped can go stale immediately.
    1184             :  * It's okay if caller "misses" a page whose all-visible or all-frozen marking
    1185             :  * was concurrently cleared, though.  All that matters is that caller scan all
    1186             :  * pages whose tuples might contain XIDs < OldestXmin, or MXIDs < OldestMxact.
    1187             :  * (Actually, non-aggressive VACUUMs can choose to skip all-visible pages with
    1188             :  * older XIDs/MXIDs.  The *skippedallvis flag will be set here when the choice
    1189             :  * to skip such a range is actually made, making everything safe.)
    1190             :  */
    1191             : static void
    1192      303736 : find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
    1193             : {
    1194      303736 :     BlockNumber rel_pages = vacrel->rel_pages;
    1195      303736 :     BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
    1196      303736 :     Buffer      next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
    1197             :     bool        next_unskippable_allvis;
    1198             : 
    1199      303736 :     *skipsallvis = false;
    1200             : 
    1201             :     for (;;)
    1202      336836 :     {
    1203      640572 :         uint8       mapbits = visibilitymap_get_status(vacrel->rel,
    1204             :                                                        next_unskippable_block,
    1205             :                                                        &next_unskippable_vmbuffer);
    1206             : 
    1207      640572 :         next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
    1208             : 
    1209             :         /*
    1210             :          * A block is unskippable if it is not all visible according to the
    1211             :          * visibility map.
    1212             :          */
    1213      640572 :         if (!next_unskippable_allvis)
    1214             :         {
    1215             :             Assert((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0);
    1216      270648 :             break;
    1217             :         }
    1218             : 
    1219             :         /*
    1220             :          * Caller must scan the last page to determine whether it has tuples
    1221             :          * (caller must have the opportunity to set vacrel->nonempty_pages).
    1222             :          * This rule avoids having lazy_truncate_heap() take access-exclusive
    1223             :          * lock on rel to attempt a truncation that fails anyway, just because
    1224             :          * there are tuples on the last page (it is likely that there will be
    1225             :          * tuples on other nearby pages as well, but those can be skipped).
    1226             :          *
    1227             :          * Implement this by always treating the last block as unsafe to skip.
    1228             :          */
    1229      369924 :         if (next_unskippable_block == rel_pages - 1)
    1230       32348 :             break;
    1231             : 
    1232             :         /* DISABLE_PAGE_SKIPPING makes all skipping unsafe */
    1233      337576 :         if (!vacrel->skipwithvm)
    1234         740 :             break;
    1235             : 
    1236             :         /*
    1237             :          * Aggressive VACUUM caller can't skip pages just because they are
    1238             :          * all-visible.  They may still skip all-frozen pages, which can't
    1239             :          * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
    1240             :          */
    1241      336836 :         if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
    1242             :         {
    1243        4930 :             if (vacrel->aggressive)
    1244           0 :                 break;
    1245             : 
    1246             :             /*
    1247             :              * All-visible block is safe to skip in non-aggressive case.  But
    1248             :              * remember that the final range contains such a block for later.
    1249             :              */
    1250        4930 :             *skipsallvis = true;
    1251             :         }
    1252             : 
    1253      336836 :         next_unskippable_block++;
    1254             :     }
    1255             : 
    1256             :     /* write the local variables back to vacrel */
    1257      303736 :     vacrel->next_unskippable_block = next_unskippable_block;
    1258      303736 :     vacrel->next_unskippable_allvis = next_unskippable_allvis;
    1259      303736 :     vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
    1260      303736 : }
    1261             : 
    1262             : /*
    1263             :  *  lazy_scan_new_or_empty() -- lazy_scan_heap() new/empty page handling.
    1264             :  *
    1265             :  * Must call here to handle both new and empty pages before calling
    1266             :  * lazy_scan_prune or lazy_scan_noprune, since they're not prepared to deal
    1267             :  * with new or empty pages.
    1268             :  *
    1269             :  * It's necessary to consider new pages as a special case, since the rules for
    1270             :  * maintaining the visibility map and FSM with empty pages are a little
    1271             :  * different (though new pages can be truncated away during rel truncation).
    1272             :  *
    1273             :  * Empty pages are not really a special case -- they're just heap pages that
    1274             :  * have no allocated tuples (including even LP_UNUSED items).  You might
    1275             :  * wonder why we need to handle them here all the same.  It's only necessary
    1276             :  * because of a corner-case involving a hard crash during heap relation
    1277             :  * extension.  If we ever make relation-extension crash safe, then it should
    1278             :  * no longer be necessary to deal with empty pages here (or new pages, for
    1279             :  * that matter).
    1280             :  *
    1281             :  * Caller must hold at least a shared lock.  We might need to escalate the
    1282             :  * lock in that case, so the type of lock caller holds needs to be specified
    1283             :  * using 'sharelock' argument.
    1284             :  *
    1285             :  * Returns false in common case where caller should go on to call
    1286             :  * lazy_scan_prune (or lazy_scan_noprune).  Otherwise returns true, indicating
    1287             :  * that lazy_scan_heap is done processing the page, releasing lock on caller's
    1288             :  * behalf.
    1289             :  */
    1290             : static bool
    1291      410546 : lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
    1292             :                        Page page, bool sharelock, Buffer vmbuffer)
    1293             : {
    1294             :     Size        freespace;
    1295             : 
    1296      410546 :     if (PageIsNew(page))
    1297             :     {
    1298             :         /*
    1299             :          * All-zeroes pages can be left over if either a backend extends the
    1300             :          * relation by a single page, but crashes before the newly initialized
    1301             :          * page has been written out, or when bulk-extending the relation
    1302             :          * (which creates a number of empty pages at the tail end of the
    1303             :          * relation), and then enters them into the FSM.
    1304             :          *
    1305             :          * Note we do not enter the page into the visibilitymap. That has the
    1306             :          * downside that we repeatedly visit this page in subsequent vacuums,
    1307             :          * but otherwise we'll never discover the space on a promoted standby.
    1308             :          * The harm of repeated checking ought to normally not be too bad. The
    1309             :          * space usually should be used at some point, otherwise there
    1310             :          * wouldn't be any regular vacuums.
    1311             :          *
    1312             :          * Make sure these pages are in the FSM, to ensure they can be reused.
    1313             :          * Do that by testing if there's any space recorded for the page. If
    1314             :          * not, enter it. We do so after releasing the lock on the heap page,
    1315             :          * the FSM is approximate, after all.
    1316             :          */
    1317        1170 :         UnlockReleaseBuffer(buf);
    1318             : 
    1319        1170 :         if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
    1320             :         {
    1321         858 :             freespace = BLCKSZ - SizeOfPageHeaderData;
    1322             : 
    1323         858 :             RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1324             :         }
    1325             : 
    1326        1170 :         return true;
    1327             :     }
    1328             : 
    1329      409376 :     if (PageIsEmpty(page))
    1330             :     {
    1331             :         /*
    1332             :          * It seems likely that caller will always be able to get a cleanup
    1333             :          * lock on an empty page.  But don't take any chances -- escalate to
    1334             :          * an exclusive lock (still don't need a cleanup lock, though).
    1335             :          */
    1336          42 :         if (sharelock)
    1337             :         {
    1338           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    1339           0 :             LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    1340             : 
    1341           0 :             if (!PageIsEmpty(page))
    1342             :             {
    1343             :                 /* page isn't new or empty -- keep lock and pin for now */
    1344           0 :                 return false;
    1345             :             }
    1346             :         }
    1347             :         else
    1348             :         {
    1349             :             /* Already have a full cleanup lock (which is more than enough) */
    1350             :         }
    1351             : 
    1352             :         /*
    1353             :          * Unlike new pages, empty pages are always set all-visible and
    1354             :          * all-frozen.
    1355             :          */
    1356          42 :         if (!PageIsAllVisible(page))
    1357             :         {
    1358           0 :             START_CRIT_SECTION();
    1359             : 
    1360             :             /* mark buffer dirty before writing a WAL record */
    1361           0 :             MarkBufferDirty(buf);
    1362             : 
    1363             :             /*
    1364             :              * It's possible that another backend has extended the heap,
    1365             :              * initialized the page, and then failed to WAL-log the page due
    1366             :              * to an ERROR.  Since heap extension is not WAL-logged, recovery
    1367             :              * might try to replay our record setting the page all-visible and
    1368             :              * find that the page isn't initialized, which will cause a PANIC.
    1369             :              * To prevent that, check whether the page has been previously
    1370             :              * WAL-logged, and if not, do that now.
    1371             :              */
    1372           0 :             if (RelationNeedsWAL(vacrel->rel) &&
    1373           0 :                 PageGetLSN(page) == InvalidXLogRecPtr)
    1374           0 :                 log_newpage_buffer(buf, true);
    1375             : 
    1376           0 :             PageSetAllVisible(page);
    1377           0 :             visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1378             :                               vmbuffer, InvalidTransactionId,
    1379             :                               VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
    1380           0 :             END_CRIT_SECTION();
    1381             :         }
    1382             : 
    1383          42 :         freespace = PageGetHeapFreeSpace(page);
    1384          42 :         UnlockReleaseBuffer(buf);
    1385          42 :         RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    1386          42 :         return true;
    1387             :     }
    1388             : 
    1389             :     /* page isn't new or empty -- keep lock and pin */
    1390      409334 :     return false;
    1391             : }
    1392             : 
    1393             : /* qsort comparator for sorting OffsetNumbers */
    1394             : static int
    1395     5565892 : cmpOffsetNumbers(const void *a, const void *b)
    1396             : {
    1397     5565892 :     return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
    1398             : }
    1399             : 
    1400             : /*
    1401             :  *  lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
    1402             :  *
    1403             :  * Caller must hold pin and buffer cleanup lock on the buffer.
    1404             :  *
    1405             :  * vmbuffer is the buffer containing the VM block with visibility information
    1406             :  * for the heap block, blkno. all_visible_according_to_vm is the saved
    1407             :  * visibility status of the heap block looked up earlier by the caller. We
    1408             :  * won't rely entirely on this status, as it may be out of date.
    1409             :  *
    1410             :  * *has_lpdead_items is set to true or false depending on whether, upon return
    1411             :  * from this function, any LP_DEAD items are still present on the page.
    1412             :  */
    1413             : static void
    1414      409324 : lazy_scan_prune(LVRelState *vacrel,
    1415             :                 Buffer buf,
    1416             :                 BlockNumber blkno,
    1417             :                 Page page,
    1418             :                 Buffer vmbuffer,
    1419             :                 bool all_visible_according_to_vm,
    1420             :                 bool *has_lpdead_items)
    1421             : {
    1422      409324 :     Relation    rel = vacrel->rel;
    1423             :     PruneFreezeResult presult;
    1424      409324 :     int         prune_options = 0;
    1425             : 
    1426             :     Assert(BufferGetBlockNumber(buf) == blkno);
    1427             : 
    1428             :     /*
    1429             :      * Prune all HOT-update chains and potentially freeze tuples on this page.
    1430             :      *
    1431             :      * If the relation has no indexes, we can immediately mark would-be dead
    1432             :      * items LP_UNUSED.
    1433             :      *
    1434             :      * The number of tuples removed from the page is returned in
    1435             :      * presult.ndeleted.  It should not be confused with presult.lpdead_items;
    1436             :      * presult.lpdead_items's final value can be thought of as the number of
    1437             :      * tuples that were deleted from indexes.
    1438             :      *
    1439             :      * We will update the VM after collecting LP_DEAD items and freezing
    1440             :      * tuples. Pruning will have determined whether or not the page is
    1441             :      * all-visible.
    1442             :      */
    1443      409324 :     prune_options = HEAP_PAGE_PRUNE_FREEZE;
    1444      409324 :     if (vacrel->nindexes == 0)
    1445       18752 :         prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
    1446             : 
    1447      409324 :     heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
    1448             :                                &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
    1449             :                                &vacrel->offnum,
    1450             :                                &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
    1451             : 
    1452             :     Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
    1453             :     Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
    1454             : 
    1455      409324 :     if (presult.nfrozen > 0)
    1456             :     {
    1457             :         /*
    1458             :          * We don't increment the frozen_pages instrumentation counter when
    1459             :          * nfrozen == 0, since it only counts pages with newly frozen tuples
    1460             :          * (don't confuse that with pages newly set all-frozen in VM).
    1461             :          */
    1462       32318 :         vacrel->frozen_pages++;
    1463             :     }
    1464             : 
    1465             :     /*
    1466             :      * VACUUM will call heap_page_is_all_visible() during the second pass over
    1467             :      * the heap to determine all_visible and all_frozen for the page -- this
    1468             :      * is a specialized version of the logic from this function.  Now that
    1469             :      * we've finished pruning and freezing, make sure that we're in total
    1470             :      * agreement with heap_page_is_all_visible() using an assertion.
    1471             :      */
    1472             : #ifdef USE_ASSERT_CHECKING
    1473             :     /* Note that all_frozen value does not matter when !all_visible */
    1474             :     if (presult.all_visible)
    1475             :     {
    1476             :         TransactionId debug_cutoff;
    1477             :         bool        debug_all_frozen;
    1478             : 
    1479             :         Assert(presult.lpdead_items == 0);
    1480             : 
    1481             :         if (!heap_page_is_all_visible(vacrel, buf,
    1482             :                                       &debug_cutoff, &debug_all_frozen))
    1483             :             Assert(false);
    1484             : 
    1485             :         Assert(presult.all_frozen == debug_all_frozen);
    1486             : 
    1487             :         Assert(!TransactionIdIsValid(debug_cutoff) ||
    1488             :                debug_cutoff == presult.vm_conflict_horizon);
    1489             :     }
    1490             : #endif
    1491             : 
    1492             :     /*
    1493             :      * Now save details of the LP_DEAD items from the page in vacrel
    1494             :      */
    1495      409324 :     if (presult.lpdead_items > 0)
    1496             :     {
    1497       26732 :         vacrel->lpdead_item_pages++;
    1498             : 
    1499             :         /*
    1500             :          * deadoffsets are collected incrementally in
    1501             :          * heap_page_prune_and_freeze() as each dead line pointer is recorded,
    1502             :          * with an indeterminate order, but dead_items_add requires them to be
    1503             :          * sorted.
    1504             :          */
    1505       26732 :         qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
    1506             :               cmpOffsetNumbers);
    1507             : 
    1508       26732 :         dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
    1509             :     }
    1510             : 
    1511             :     /* Finally, add page-local counts to whole-VACUUM counts */
    1512      409324 :     vacrel->tuples_deleted += presult.ndeleted;
    1513      409324 :     vacrel->tuples_frozen += presult.nfrozen;
    1514      409324 :     vacrel->lpdead_items += presult.lpdead_items;
    1515      409324 :     vacrel->live_tuples += presult.live_tuples;
    1516      409324 :     vacrel->recently_dead_tuples += presult.recently_dead_tuples;
    1517             : 
    1518             :     /* Can't truncate this page */
    1519      409324 :     if (presult.hastup)
    1520      395558 :         vacrel->nonempty_pages = blkno + 1;
    1521             : 
    1522             :     /* Did we find LP_DEAD items? */
    1523      409324 :     *has_lpdead_items = (presult.lpdead_items > 0);
    1524             : 
    1525             :     Assert(!presult.all_visible || !(*has_lpdead_items));
    1526             : 
    1527             :     /*
    1528             :      * Handle setting visibility map bit based on information from the VM (as
    1529             :      * of last heap_vac_scan_next_block() call), and from all_visible and
    1530             :      * all_frozen variables
    1531             :      */
    1532      409324 :     if (!all_visible_according_to_vm && presult.all_visible)
    1533       53386 :     {
    1534       53386 :         uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    1535             : 
    1536       53386 :         if (presult.all_frozen)
    1537             :         {
    1538             :             Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
    1539       40104 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    1540             :         }
    1541             : 
    1542             :         /*
    1543             :          * It should never be the case that the visibility map page is set
    1544             :          * while the page-level bit is clear, but the reverse is allowed (if
    1545             :          * checksums are not enabled).  Regardless, set both bits so that we
    1546             :          * get back in sync.
    1547             :          *
    1548             :          * NB: If the heap page is all-visible but the VM bit is not set, we
    1549             :          * don't need to dirty the heap page.  However, if checksums are
    1550             :          * enabled, we do need to make sure that the heap page is dirtied
    1551             :          * before passing it to visibilitymap_set(), because it may be logged.
    1552             :          * Given that this situation should only happen in rare cases after a
    1553             :          * crash, it is not worth optimizing.
    1554             :          */
    1555       53386 :         PageSetAllVisible(page);
    1556       53386 :         MarkBufferDirty(buf);
    1557       53386 :         visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1558             :                           vmbuffer, presult.vm_conflict_horizon,
    1559             :                           flags);
    1560             :     }
    1561             : 
    1562             :     /*
    1563             :      * As of PostgreSQL 9.2, the visibility map bit should never be set if the
    1564             :      * page-level bit is clear.  However, it's possible that the bit got
    1565             :      * cleared after heap_vac_scan_next_block() was called, so we must recheck
    1566             :      * with buffer lock before concluding that the VM is corrupt.
    1567             :      */
    1568      355938 :     else if (all_visible_according_to_vm && !PageIsAllVisible(page) &&
    1569           0 :              visibilitymap_get_status(vacrel->rel, blkno, &vmbuffer) != 0)
    1570             :     {
    1571           0 :         elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
    1572             :              vacrel->relname, blkno);
    1573           0 :         visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
    1574             :                             VISIBILITYMAP_VALID_BITS);
    1575             :     }
    1576             : 
    1577             :     /*
    1578             :      * It's possible for the value returned by
    1579             :      * GetOldestNonRemovableTransactionId() to move backwards, so it's not
    1580             :      * wrong for us to see tuples that appear to not be visible to everyone
    1581             :      * yet, while PD_ALL_VISIBLE is already set. The real safe xmin value
    1582             :      * never moves backwards, but GetOldestNonRemovableTransactionId() is
    1583             :      * conservative and sometimes returns a value that's unnecessarily small,
    1584             :      * so if we see that contradiction it just means that the tuples that we
    1585             :      * think are not visible to everyone yet actually are, and the
    1586             :      * PD_ALL_VISIBLE flag is correct.
    1587             :      *
    1588             :      * There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
    1589             :      * however.
    1590             :      */
    1591      355938 :     else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
    1592             :     {
    1593           0 :         elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
    1594             :              vacrel->relname, blkno);
    1595           0 :         PageClearAllVisible(page);
    1596           0 :         MarkBufferDirty(buf);
    1597           0 :         visibilitymap_clear(vacrel->rel, blkno, vmbuffer,
    1598             :                             VISIBILITYMAP_VALID_BITS);
    1599             :     }
    1600             : 
    1601             :     /*
    1602             :      * If the all-visible page is all-frozen but not marked as such yet, mark
    1603             :      * it as all-frozen.  Note that all_frozen is only valid if all_visible is
    1604             :      * true, so we must check both all_visible and all_frozen.
    1605             :      */
    1606      355938 :     else if (all_visible_according_to_vm && presult.all_visible &&
    1607      139854 :              presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
    1608             :     {
    1609             :         /*
    1610             :          * Avoid relying on all_visible_according_to_vm as a proxy for the
    1611             :          * page-level PD_ALL_VISIBLE bit being set, since it might have become
    1612             :          * stale -- even when all_visible is set
    1613             :          */
    1614          22 :         if (!PageIsAllVisible(page))
    1615             :         {
    1616           0 :             PageSetAllVisible(page);
    1617           0 :             MarkBufferDirty(buf);
    1618             :         }
    1619             : 
    1620             :         /*
    1621             :          * Set the page all-frozen (and all-visible) in the VM.
    1622             :          *
    1623             :          * We can pass InvalidTransactionId as our cutoff_xid, since a
    1624             :          * snapshotConflictHorizon sufficient to make everything safe for REDO
    1625             :          * was logged when the page's tuples were frozen.
    1626             :          */
    1627             :         Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
    1628          22 :         visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
    1629             :                           vmbuffer, InvalidTransactionId,
    1630             :                           VISIBILITYMAP_ALL_VISIBLE |
    1631             :                           VISIBILITYMAP_ALL_FROZEN);
    1632             :     }
    1633      409324 : }
    1634             : 
    1635             : /*
    1636             :  *  lazy_scan_noprune() -- lazy_scan_prune() without pruning or freezing
    1637             :  *
    1638             :  * Caller need only hold a pin and share lock on the buffer, unlike
    1639             :  * lazy_scan_prune, which requires a full cleanup lock.  While pruning isn't
    1640             :  * performed here, it's quite possible that an earlier opportunistic pruning
    1641             :  * operation left LP_DEAD items behind.  We'll at least collect any such items
    1642             :  * in dead_items for removal from indexes.
    1643             :  *
    1644             :  * For aggressive VACUUM callers, we may return false to indicate that a full
    1645             :  * cleanup lock is required for processing by lazy_scan_prune.  This is only
    1646             :  * necessary when the aggressive VACUUM needs to freeze some tuple XIDs from
    1647             :  * one or more tuples on the page.  We always return true for non-aggressive
    1648             :  * callers.
    1649             :  *
    1650             :  * If this function returns true, *has_lpdead_items gets set to true or false
    1651             :  * depending on whether, upon return from this function, any LP_DEAD items are
    1652             :  * present on the page. If this function returns false, *has_lpdead_items
    1653             :  * is not updated.
    1654             :  */
    1655             : static bool
    1656          10 : lazy_scan_noprune(LVRelState *vacrel,
    1657             :                   Buffer buf,
    1658             :                   BlockNumber blkno,
    1659             :                   Page page,
    1660             :                   bool *has_lpdead_items)
    1661             : {
    1662             :     OffsetNumber offnum,
    1663             :                 maxoff;
    1664             :     int         lpdead_items,
    1665             :                 live_tuples,
    1666             :                 recently_dead_tuples,
    1667             :                 missed_dead_tuples;
    1668             :     bool        hastup;
    1669             :     HeapTupleHeader tupleheader;
    1670          10 :     TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
    1671          10 :     MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
    1672             :     OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
    1673             : 
    1674             :     Assert(BufferGetBlockNumber(buf) == blkno);
    1675             : 
    1676          10 :     hastup = false;             /* for now */
    1677             : 
    1678          10 :     lpdead_items = 0;
    1679          10 :     live_tuples = 0;
    1680          10 :     recently_dead_tuples = 0;
    1681          10 :     missed_dead_tuples = 0;
    1682             : 
    1683          10 :     maxoff = PageGetMaxOffsetNumber(page);
    1684         274 :     for (offnum = FirstOffsetNumber;
    1685             :          offnum <= maxoff;
    1686         264 :          offnum = OffsetNumberNext(offnum))
    1687             :     {
    1688             :         ItemId      itemid;
    1689             :         HeapTupleData tuple;
    1690             : 
    1691         264 :         vacrel->offnum = offnum;
    1692         264 :         itemid = PageGetItemId(page, offnum);
    1693             : 
    1694         264 :         if (!ItemIdIsUsed(itemid))
    1695          38 :             continue;
    1696             : 
    1697         228 :         if (ItemIdIsRedirected(itemid))
    1698             :         {
    1699           2 :             hastup = true;
    1700           2 :             continue;
    1701             :         }
    1702             : 
    1703         226 :         if (ItemIdIsDead(itemid))
    1704             :         {
    1705             :             /*
    1706             :              * Deliberately don't set hastup=true here.  See same point in
    1707             :              * lazy_scan_prune for an explanation.
    1708             :              */
    1709           0 :             deadoffsets[lpdead_items++] = offnum;
    1710           0 :             continue;
    1711             :         }
    1712             : 
    1713         226 :         hastup = true;          /* page prevents rel truncation */
    1714         226 :         tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
    1715         226 :         if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
    1716             :                                      &NoFreezePageRelfrozenXid,
    1717             :                                      &NoFreezePageRelminMxid))
    1718             :         {
    1719             :             /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */
    1720         128 :             if (vacrel->aggressive)
    1721             :             {
    1722             :                 /*
    1723             :                  * Aggressive VACUUMs must always be able to advance rel's
    1724             :                  * relfrozenxid to a value >= FreezeLimit (and be able to
    1725             :                  * advance rel's relminmxid to a value >= MultiXactCutoff).
    1726             :                  * The ongoing aggressive VACUUM won't be able to do that
    1727             :                  * unless it can freeze an XID (or MXID) from this tuple now.
    1728             :                  *
    1729             :                  * The only safe option is to have caller perform processing
    1730             :                  * of this page using lazy_scan_prune.  Caller might have to
    1731             :                  * wait a while for a cleanup lock, but it can't be helped.
    1732             :                  */
    1733           0 :                 vacrel->offnum = InvalidOffsetNumber;
    1734           0 :                 return false;
    1735             :             }
    1736             : 
    1737             :             /*
    1738             :              * Non-aggressive VACUUMs are under no obligation to advance
    1739             :              * relfrozenxid (even by one XID).  We can be much laxer here.
    1740             :              *
    1741             :              * Currently we always just accept an older final relfrozenxid
    1742             :              * and/or relminmxid value.  We never make caller wait or work a
    1743             :              * little harder, even when it likely makes sense to do so.
    1744             :              */
    1745             :         }
    1746             : 
    1747         226 :         ItemPointerSet(&(tuple.t_self), blkno, offnum);
    1748         226 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    1749         226 :         tuple.t_len = ItemIdGetLength(itemid);
    1750         226 :         tuple.t_tableOid = RelationGetRelid(vacrel->rel);
    1751             : 
    1752         226 :         switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
    1753             :                                          buf))
    1754             :         {
    1755         220 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1756             :             case HEAPTUPLE_LIVE:
    1757             : 
    1758             :                 /*
    1759             :                  * Count both cases as live, just like lazy_scan_prune
    1760             :                  */
    1761         220 :                 live_tuples++;
    1762             : 
    1763         220 :                 break;
    1764           2 :             case HEAPTUPLE_DEAD:
    1765             : 
    1766             :                 /*
    1767             :                  * There is some useful work for pruning to do, that won't be
    1768             :                  * done due to failure to get a cleanup lock.
    1769             :                  */
    1770           2 :                 missed_dead_tuples++;
    1771           2 :                 break;
    1772           4 :             case HEAPTUPLE_RECENTLY_DEAD:
    1773             : 
    1774             :                 /*
    1775             :                  * Count in recently_dead_tuples, just like lazy_scan_prune
    1776             :                  */
    1777           4 :                 recently_dead_tuples++;
    1778           4 :                 break;
    1779           0 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1780             : 
    1781             :                 /*
    1782             :                  * Do not count these rows as live, just like lazy_scan_prune
    1783             :                  */
    1784           0 :                 break;
    1785           0 :             default:
    1786           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1787             :                 break;
    1788             :         }
    1789             :     }
    1790             : 
    1791          10 :     vacrel->offnum = InvalidOffsetNumber;
    1792             : 
    1793             :     /*
    1794             :      * By here we know for sure that caller can put off freezing and pruning
    1795             :      * this particular page until the next VACUUM.  Remember its details now.
    1796             :      * (lazy_scan_prune expects a clean slate, so we have to do this last.)
    1797             :      */
    1798          10 :     vacrel->NewRelfrozenXid = NoFreezePageRelfrozenXid;
    1799          10 :     vacrel->NewRelminMxid = NoFreezePageRelminMxid;
    1800             : 
    1801             :     /* Save any LP_DEAD items found on the page in dead_items */
    1802          10 :     if (vacrel->nindexes == 0)
    1803             :     {
    1804             :         /* Using one-pass strategy (since table has no indexes) */
    1805           0 :         if (lpdead_items > 0)
    1806             :         {
    1807             :             /*
    1808             :              * Perfunctory handling for the corner case where a single pass
    1809             :              * strategy VACUUM cannot get a cleanup lock, and it turns out
    1810             :              * that there is one or more LP_DEAD items: just count the LP_DEAD
    1811             :              * items as missed_dead_tuples instead. (This is a bit dishonest,
    1812             :              * but it beats having to maintain specialized heap vacuuming code
    1813             :              * forever, for vanishingly little benefit.)
    1814             :              */
    1815           0 :             hastup = true;
    1816           0 :             missed_dead_tuples += lpdead_items;
    1817             :         }
    1818             :     }
    1819          10 :     else if (lpdead_items > 0)
    1820             :     {
    1821             :         /*
    1822             :          * Page has LP_DEAD items, and so any references/TIDs that remain in
    1823             :          * indexes will be deleted during index vacuuming (and then marked
    1824             :          * LP_UNUSED in the heap)
    1825             :          */
    1826           0 :         vacrel->lpdead_item_pages++;
    1827             : 
    1828           0 :         dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
    1829             : 
    1830           0 :         vacrel->lpdead_items += lpdead_items;
    1831             :     }
    1832             : 
    1833             :     /*
    1834             :      * Finally, add relevant page-local counts to whole-VACUUM counts
    1835             :      */
    1836          10 :     vacrel->live_tuples += live_tuples;
    1837          10 :     vacrel->recently_dead_tuples += recently_dead_tuples;
    1838          10 :     vacrel->missed_dead_tuples += missed_dead_tuples;
    1839          10 :     if (missed_dead_tuples > 0)
    1840           2 :         vacrel->missed_dead_pages++;
    1841             : 
    1842             :     /* Can't truncate this page */
    1843          10 :     if (hastup)
    1844          10 :         vacrel->nonempty_pages = blkno + 1;
    1845             : 
    1846             :     /* Did we find LP_DEAD items? */
    1847          10 :     *has_lpdead_items = (lpdead_items > 0);
    1848             : 
    1849             :     /* Caller won't need to call lazy_scan_prune with same page */
    1850          10 :     return true;
    1851             : }
    1852             : 
    1853             : /*
    1854             :  * Main entry point for index vacuuming and heap vacuuming.
    1855             :  *
    1856             :  * Removes items collected in dead_items from table's indexes, then marks the
    1857             :  * same items LP_UNUSED in the heap.  See the comments above lazy_scan_heap
    1858             :  * for full details.
    1859             :  *
    1860             :  * Also empties dead_items, freeing up space for later TIDs.
    1861             :  *
    1862             :  * We may choose to bypass index vacuuming at this point, though only when the
    1863             :  * ongoing VACUUM operation will definitely only have one index scan/round of
    1864             :  * index vacuuming.
    1865             :  */
    1866             : static void
    1867        1012 : lazy_vacuum(LVRelState *vacrel)
    1868             : {
    1869             :     bool        bypass;
    1870             : 
    1871             :     /* Should not end up here with no indexes */
    1872             :     Assert(vacrel->nindexes > 0);
    1873             :     Assert(vacrel->lpdead_item_pages > 0);
    1874             : 
    1875        1012 :     if (!vacrel->do_index_vacuuming)
    1876             :     {
    1877             :         Assert(!vacrel->do_index_cleanup);
    1878          30 :         dead_items_reset(vacrel);
    1879          30 :         return;
    1880             :     }
    1881             : 
    1882             :     /*
    1883             :      * Consider bypassing index vacuuming (and heap vacuuming) entirely.
    1884             :      *
    1885             :      * We currently only do this in cases where the number of LP_DEAD items
    1886             :      * for the entire VACUUM operation is close to zero.  This avoids sharp
    1887             :      * discontinuities in the duration and overhead of successive VACUUM
    1888             :      * operations that run against the same table with a fixed workload.
    1889             :      * Ideally, successive VACUUM operations will behave as if there are
    1890             :      * exactly zero LP_DEAD items in cases where there are close to zero.
    1891             :      *
    1892             :      * This is likely to be helpful with a table that is continually affected
    1893             :      * by UPDATEs that can mostly apply the HOT optimization, but occasionally
    1894             :      * have small aberrations that lead to just a few heap pages retaining
    1895             :      * only one or two LP_DEAD items.  This is pretty common; even when the
    1896             :      * DBA goes out of their way to make UPDATEs use HOT, it is practically
    1897             :      * impossible to predict whether HOT will be applied in 100% of cases.
    1898             :      * It's far easier to ensure that 99%+ of all UPDATEs against a table use
    1899             :      * HOT through careful tuning.
    1900             :      */
    1901         982 :     bypass = false;
    1902         982 :     if (vacrel->consider_bypass_optimization && vacrel->rel_pages > 0)
    1903             :     {
    1904             :         BlockNumber threshold;
    1905             : 
    1906             :         Assert(vacrel->num_index_scans == 0);
    1907             :         Assert(vacrel->lpdead_items == vacrel->dead_items_info->num_items);
    1908             :         Assert(vacrel->do_index_vacuuming);
    1909             :         Assert(vacrel->do_index_cleanup);
    1910             : 
    1911             :         /*
    1912             :          * This crossover point at which we'll start to do index vacuuming is
    1913             :          * expressed as a percentage of the total number of heap pages in the
    1914             :          * table that are known to have at least one LP_DEAD item.  This is
    1915             :          * much more important than the total number of LP_DEAD items, since
    1916             :          * it's a proxy for the number of heap pages whose visibility map bits
    1917             :          * cannot be set on account of bypassing index and heap vacuuming.
    1918             :          *
    1919             :          * We apply one further precautionary test: the space currently used
    1920             :          * to store the TIDs (TIDs that now all point to LP_DEAD items) must
    1921             :          * not exceed 32MB.  This limits the risk that we will bypass index
    1922             :          * vacuuming again and again until eventually there is a VACUUM whose
    1923             :          * dead_items space is not CPU cache resident.
    1924             :          *
    1925             :          * We don't take any special steps to remember the LP_DEAD items (such
    1926             :          * as counting them in our final update to the stats system) when the
    1927             :          * optimization is applied.  Though the accounting used in analyze.c's
    1928             :          * acquire_sample_rows() will recognize the same LP_DEAD items as dead
    1929             :          * rows in its own stats report, that's okay. The discrepancy should
    1930             :          * be negligible.  If this optimization is ever expanded to cover more
    1931             :          * cases then this may need to be reconsidered.
    1932             :          */
    1933         964 :         threshold = (double) vacrel->rel_pages * BYPASS_THRESHOLD_PAGES;
    1934         968 :         bypass = (vacrel->lpdead_item_pages < threshold &&
    1935           4 :                   (TidStoreMemoryUsage(vacrel->dead_items) < (32L * 1024L * 1024L)));
    1936             :     }
    1937             : 
    1938         982 :     if (bypass)
    1939             :     {
    1940             :         /*
    1941             :          * There are almost zero TIDs.  Behave as if there were precisely
    1942             :          * zero: bypass index vacuuming, but do index cleanup.
    1943             :          *
    1944             :          * We expect that the ongoing VACUUM operation will finish very
    1945             :          * quickly, so there is no point in considering speeding up as a
    1946             :          * failsafe against wraparound failure. (Index cleanup is expected to
    1947             :          * finish very quickly in cases where there were no ambulkdelete()
    1948             :          * calls.)
    1949             :          */
    1950           4 :         vacrel->do_index_vacuuming = false;
    1951             :     }
    1952         978 :     else if (lazy_vacuum_all_indexes(vacrel))
    1953             :     {
    1954             :         /*
    1955             :          * We successfully completed a round of index vacuuming.  Do related
    1956             :          * heap vacuuming now.
    1957             :          */
    1958         978 :         lazy_vacuum_heap_rel(vacrel);
    1959             :     }
    1960             :     else
    1961             :     {
    1962             :         /*
    1963             :          * Failsafe case.
    1964             :          *
    1965             :          * We attempted index vacuuming, but didn't finish a full round/full
    1966             :          * index scan.  This happens when relfrozenxid or relminmxid is too
    1967             :          * far in the past.
    1968             :          *
    1969             :          * From this point on the VACUUM operation will do no further index
    1970             :          * vacuuming or heap vacuuming.  This VACUUM operation won't end up
    1971             :          * back here again.
    1972             :          */
    1973             :         Assert(VacuumFailsafeActive);
    1974             :     }
    1975             : 
    1976             :     /*
    1977             :      * Forget the LP_DEAD items that we just vacuumed (or just decided to not
    1978             :      * vacuum)
    1979             :      */
    1980         982 :     dead_items_reset(vacrel);
    1981             : }
    1982             : 
    1983             : /*
    1984             :  *  lazy_vacuum_all_indexes() -- Main entry for index vacuuming
    1985             :  *
    1986             :  * Returns true in the common case when all indexes were successfully
    1987             :  * vacuumed.  Returns false in rare cases where we determined that the ongoing
    1988             :  * VACUUM operation is at risk of taking too long to finish, leading to
    1989             :  * wraparound failure.
    1990             :  */
    1991             : static bool
    1992         978 : lazy_vacuum_all_indexes(LVRelState *vacrel)
    1993             : {
    1994         978 :     bool        allindexes = true;
    1995         978 :     double      old_live_tuples = vacrel->rel->rd_rel->reltuples;
    1996         978 :     const int   progress_start_index[] = {
    1997             :         PROGRESS_VACUUM_PHASE,
    1998             :         PROGRESS_VACUUM_INDEXES_TOTAL
    1999             :     };
    2000         978 :     const int   progress_end_index[] = {
    2001             :         PROGRESS_VACUUM_INDEXES_TOTAL,
    2002             :         PROGRESS_VACUUM_INDEXES_PROCESSED,
    2003             :         PROGRESS_VACUUM_NUM_INDEX_VACUUMS
    2004             :     };
    2005             :     int64       progress_start_val[2];
    2006             :     int64       progress_end_val[3];
    2007             : 
    2008             :     Assert(vacrel->nindexes > 0);
    2009             :     Assert(vacrel->do_index_vacuuming);
    2010             :     Assert(vacrel->do_index_cleanup);
    2011             : 
    2012             :     /* Precheck for XID wraparound emergencies */
    2013         978 :     if (lazy_check_wraparound_failsafe(vacrel))
    2014             :     {
    2015             :         /* Wraparound emergency -- don't even start an index scan */
    2016           0 :         return false;
    2017             :     }
    2018             : 
    2019             :     /*
    2020             :      * Report that we are now vacuuming indexes and the number of indexes to
    2021             :      * vacuum.
    2022             :      */
    2023         978 :     progress_start_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_INDEX;
    2024         978 :     progress_start_val[1] = vacrel->nindexes;
    2025         978 :     pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
    2026             : 
    2027         978 :     if (!ParallelVacuumIsActive(vacrel))
    2028             :     {
    2029        2876 :         for (int idx = 0; idx < vacrel->nindexes; idx++)
    2030             :         {
    2031        1912 :             Relation    indrel = vacrel->indrels[idx];
    2032        1912 :             IndexBulkDeleteResult *istat = vacrel->indstats[idx];
    2033             : 
    2034        1912 :             vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat,
    2035             :                                                           old_live_tuples,
    2036             :                                                           vacrel);
    2037             : 
    2038             :             /* Report the number of indexes vacuumed */
    2039        1912 :             pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
    2040        1912 :                                          idx + 1);
    2041             : 
    2042        1912 :             if (lazy_check_wraparound_failsafe(vacrel))
    2043             :             {
    2044             :                 /* Wraparound emergency -- end current index scan */
    2045           0 :                 allindexes = false;
    2046           0 :                 break;
    2047             :             }
    2048             :         }
    2049             :     }
    2050             :     else
    2051             :     {
    2052             :         /* Outsource everything to parallel variant */
    2053          14 :         parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
    2054             :                                             vacrel->num_index_scans);
    2055             : 
    2056             :         /*
    2057             :          * Do a postcheck to consider applying wraparound failsafe now.  Note
    2058             :          * that parallel VACUUM only gets the precheck and this postcheck.
    2059             :          */
    2060          14 :         if (lazy_check_wraparound_failsafe(vacrel))
    2061           0 :             allindexes = false;
    2062             :     }
    2063             : 
    2064             :     /*
    2065             :      * We delete all LP_DEAD items from the first heap pass in all indexes on
    2066             :      * each call here (except calls where we choose to do the failsafe). This
    2067             :      * makes the next call to lazy_vacuum_heap_rel() safe (except in the event
    2068             :      * of the failsafe triggering, which prevents the next call from taking
    2069             :      * place).
    2070             :      */
    2071             :     Assert(vacrel->num_index_scans > 0 ||
    2072             :            vacrel->dead_items_info->num_items == vacrel->lpdead_items);
    2073             :     Assert(allindexes || VacuumFailsafeActive);
    2074             : 
    2075             :     /*
    2076             :      * Increase and report the number of index scans.  Also, we reset
    2077             :      * PROGRESS_VACUUM_INDEXES_TOTAL and PROGRESS_VACUUM_INDEXES_PROCESSED.
    2078             :      *
    2079             :      * We deliberately include the case where we started a round of bulk
    2080             :      * deletes that we weren't able to finish due to the failsafe triggering.
    2081             :      */
    2082         978 :     vacrel->num_index_scans++;
    2083         978 :     progress_end_val[0] = 0;
    2084         978 :     progress_end_val[1] = 0;
    2085         978 :     progress_end_val[2] = vacrel->num_index_scans;
    2086         978 :     pgstat_progress_update_multi_param(3, progress_end_index, progress_end_val);
    2087             : 
    2088         978 :     return allindexes;
    2089             : }
    2090             : 
    2091             : /*
    2092             :  *  lazy_vacuum_heap_rel() -- second pass over the heap for two pass strategy
    2093             :  *
    2094             :  * This routine marks LP_DEAD items in vacrel->dead_items as LP_UNUSED. Pages
    2095             :  * that never had lazy_scan_prune record LP_DEAD items are not visited at all.
    2096             :  *
    2097             :  * We may also be able to truncate the line pointer array of the heap pages we
    2098             :  * visit.  If there is a contiguous group of LP_UNUSED items at the end of the
    2099             :  * array, it can be reclaimed as free space.  These LP_UNUSED items usually
    2100             :  * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
    2101             :  * each page to LP_UNUSED, and then consider if it's possible to truncate the
    2102             :  * page's line pointer array).
    2103             :  *
    2104             :  * Note: the reason for doing this as a second pass is we cannot remove the
    2105             :  * tuples until we've removed their index entries, and we want to process
    2106             :  * index entry removal in batches as large as possible.
    2107             :  */
    2108             : static void
    2109         978 : lazy_vacuum_heap_rel(LVRelState *vacrel)
    2110             : {
    2111         978 :     BlockNumber vacuumed_pages = 0;
    2112         978 :     Buffer      vmbuffer = InvalidBuffer;
    2113             :     LVSavedErrInfo saved_err_info;
    2114             :     TidStoreIter *iter;
    2115             :     TidStoreIterResult *iter_result;
    2116             : 
    2117             :     Assert(vacrel->do_index_vacuuming);
    2118             :     Assert(vacrel->do_index_cleanup);
    2119             :     Assert(vacrel->num_index_scans > 0);
    2120             : 
    2121             :     /* Report that we are now vacuuming the heap */
    2122         978 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2123             :                                  PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
    2124             : 
    2125             :     /* Update error traceback information */
    2126         978 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2127             :                              VACUUM_ERRCB_PHASE_VACUUM_HEAP,
    2128             :                              InvalidBlockNumber, InvalidOffsetNumber);
    2129             : 
    2130         978 :     iter = TidStoreBeginIterate(vacrel->dead_items);
    2131       23008 :     while ((iter_result = TidStoreIterateNext(iter)) != NULL)
    2132             :     {
    2133             :         BlockNumber blkno;
    2134             :         Buffer      buf;
    2135             :         Page        page;
    2136             :         Size        freespace;
    2137             :         OffsetNumber offsets[MaxOffsetNumber];
    2138             :         int         num_offsets;
    2139             : 
    2140       22030 :         vacuum_delay_point();
    2141             : 
    2142       22030 :         blkno = iter_result->blkno;
    2143       22030 :         vacrel->blkno = blkno;
    2144             : 
    2145       22030 :         num_offsets = TidStoreGetBlockOffsets(iter_result, offsets, lengthof(offsets));
    2146             :         Assert(num_offsets <= lengthof(offsets));
    2147             : 
    2148             :         /*
    2149             :          * Pin the visibility map page in case we need to mark the page
    2150             :          * all-visible.  In most cases this will be very cheap, because we'll
    2151             :          * already have the correct page pinned anyway.
    2152             :          */
    2153       22030 :         visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
    2154             : 
    2155             :         /* We need a non-cleanup exclusive lock to mark dead_items unused */
    2156       22030 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
    2157             :                                  vacrel->bstrategy);
    2158       22030 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
    2159       22030 :         lazy_vacuum_heap_page(vacrel, blkno, buf, offsets,
    2160             :                               num_offsets, vmbuffer);
    2161             : 
    2162             :         /* Now that we've vacuumed the page, record its available space */
    2163       22030 :         page = BufferGetPage(buf);
    2164       22030 :         freespace = PageGetHeapFreeSpace(page);
    2165             : 
    2166       22030 :         UnlockReleaseBuffer(buf);
    2167       22030 :         RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
    2168       22030 :         vacuumed_pages++;
    2169             :     }
    2170         978 :     TidStoreEndIterate(iter);
    2171             : 
    2172         978 :     vacrel->blkno = InvalidBlockNumber;
    2173         978 :     if (BufferIsValid(vmbuffer))
    2174         978 :         ReleaseBuffer(vmbuffer);
    2175             : 
    2176             :     /*
    2177             :      * We set all LP_DEAD items from the first heap pass to LP_UNUSED during
    2178             :      * the second heap pass.  No more, no less.
    2179             :      */
    2180             :     Assert(vacrel->num_index_scans > 1 ||
    2181             :            (vacrel->dead_items_info->num_items == vacrel->lpdead_items &&
    2182             :             vacuumed_pages == vacrel->lpdead_item_pages));
    2183             : 
    2184         978 :     ereport(DEBUG2,
    2185             :             (errmsg("table \"%s\": removed %lld dead item identifiers in %u pages",
    2186             :                     vacrel->relname, (long long) vacrel->dead_items_info->num_items,
    2187             :                     vacuumed_pages)));
    2188             : 
    2189             :     /* Revert to the previous phase information for error traceback */
    2190         978 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2191         978 : }
    2192             : 
    2193             : /*
    2194             :  *  lazy_vacuum_heap_page() -- free page's LP_DEAD items listed in the
    2195             :  *                        vacrel->dead_items store.
    2196             :  *
    2197             :  * Caller must have an exclusive buffer lock on the buffer (though a full
    2198             :  * cleanup lock is also acceptable).  vmbuffer must be valid and already have
    2199             :  * a pin on blkno's visibility map page.
    2200             :  */
    2201             : static void
    2202       22030 : lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
    2203             :                       OffsetNumber *deadoffsets, int num_offsets,
    2204             :                       Buffer vmbuffer)
    2205             : {
    2206       22030 :     Page        page = BufferGetPage(buffer);
    2207             :     OffsetNumber unused[MaxHeapTuplesPerPage];
    2208       22030 :     int         nunused = 0;
    2209             :     TransactionId visibility_cutoff_xid;
    2210             :     bool        all_frozen;
    2211             :     LVSavedErrInfo saved_err_info;
    2212             : 
    2213             :     Assert(vacrel->do_index_vacuuming);
    2214             : 
    2215       22030 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    2216             : 
    2217             :     /* Update error traceback information */
    2218       22030 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2219             :                              VACUUM_ERRCB_PHASE_VACUUM_HEAP, blkno,
    2220             :                              InvalidOffsetNumber);
    2221             : 
    2222       22030 :     START_CRIT_SECTION();
    2223             : 
    2224     1473808 :     for (int i = 0; i < num_offsets; i++)
    2225             :     {
    2226             :         ItemId      itemid;
    2227     1451778 :         OffsetNumber toff = deadoffsets[i];
    2228             : 
    2229     1451778 :         itemid = PageGetItemId(page, toff);
    2230             : 
    2231             :         Assert(ItemIdIsDead(itemid) && !ItemIdHasStorage(itemid));
    2232     1451778 :         ItemIdSetUnused(itemid);
    2233     1451778 :         unused[nunused++] = toff;
    2234             :     }
    2235             : 
    2236             :     Assert(nunused > 0);
    2237             : 
    2238             :     /* Attempt to truncate line pointer array now */
    2239       22030 :     PageTruncateLinePointerArray(page);
    2240             : 
    2241             :     /*
    2242             :      * Mark buffer dirty before we write WAL.
    2243             :      */
    2244       22030 :     MarkBufferDirty(buffer);
    2245             : 
    2246             :     /* XLOG stuff */
    2247       22030 :     if (RelationNeedsWAL(vacrel->rel))
    2248             :     {
    2249       20546 :         log_heap_prune_and_freeze(vacrel->rel, buffer,
    2250             :                                   InvalidTransactionId,
    2251             :                                   false,    /* no cleanup lock required */
    2252             :                                   PRUNE_VACUUM_CLEANUP,
    2253             :                                   NULL, 0,  /* frozen */
    2254             :                                   NULL, 0,  /* redirected */
    2255             :                                   NULL, 0,  /* dead */
    2256             :                                   unused, nunused);
    2257             :     }
    2258             : 
    2259             :     /*
    2260             :      * End critical section, so we safely can do visibility tests (which
    2261             :      * possibly need to perform IO and allocate memory!). If we crash now the
    2262             :      * page (including the corresponding vm bit) might not be marked all
    2263             :      * visible, but that's fine. A later vacuum will fix that.
    2264             :      */
    2265       22030 :     END_CRIT_SECTION();
    2266             : 
    2267             :     /*
    2268             :      * Now that we have removed the LP_DEAD items from the page, once again
    2269             :      * check if the page has become all-visible.  The page is already marked
    2270             :      * dirty, exclusively locked, and, if needed, a full page image has been
    2271             :      * emitted.
    2272             :      */
    2273             :     Assert(!PageIsAllVisible(page));
    2274       22030 :     if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
    2275             :                                  &all_frozen))
    2276             :     {
    2277       21960 :         uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    2278             : 
    2279       21960 :         if (all_frozen)
    2280             :         {
    2281             :             Assert(!TransactionIdIsValid(visibility_cutoff_xid));
    2282       16966 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    2283             :         }
    2284             : 
    2285       21960 :         PageSetAllVisible(page);
    2286       21960 :         visibilitymap_set(vacrel->rel, blkno, buffer, InvalidXLogRecPtr,
    2287             :                           vmbuffer, visibility_cutoff_xid, flags);
    2288             :     }
    2289             : 
    2290             :     /* Revert to the previous phase information for error traceback */
    2291       22030 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2292       22030 : }
    2293             : 
    2294             : /*
    2295             :  * Trigger the failsafe to avoid wraparound failure when vacrel table has a
    2296             :  * relfrozenxid and/or relminmxid that is dangerously far in the past.
    2297             :  * Triggering the failsafe makes the ongoing VACUUM bypass any further index
    2298             :  * vacuuming and heap vacuuming.  Truncating the heap is also bypassed.
    2299             :  *
    2300             :  * Any remaining work (work that VACUUM cannot just bypass) is typically sped
    2301             :  * up when the failsafe triggers.  VACUUM stops applying any cost-based delay
    2302             :  * that it started out with.
    2303             :  *
    2304             :  * Returns true when failsafe has been triggered.
    2305             :  */
    2306             : static bool
    2307      100034 : lazy_check_wraparound_failsafe(LVRelState *vacrel)
    2308             : {
    2309             :     /* Don't warn more than once per VACUUM */
    2310      100034 :     if (VacuumFailsafeActive)
    2311           0 :         return true;
    2312             : 
    2313      100034 :     if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
    2314             :     {
    2315       14888 :         const int   progress_index[] = {
    2316             :             PROGRESS_VACUUM_INDEXES_TOTAL,
    2317             :             PROGRESS_VACUUM_INDEXES_PROCESSED
    2318             :         };
    2319       14888 :         int64       progress_val[2] = {0, 0};
    2320             : 
    2321       14888 :         VacuumFailsafeActive = true;
    2322             : 
    2323             :         /*
    2324             :          * Abandon use of a buffer access strategy to allow use of all of
    2325             :          * shared buffers.  We assume the caller who allocated the memory for
    2326             :          * the BufferAccessStrategy will free it.
    2327             :          */
    2328       14888 :         vacrel->bstrategy = NULL;
    2329             : 
    2330             :         /* Disable index vacuuming, index cleanup, and heap rel truncation */
    2331       14888 :         vacrel->do_index_vacuuming = false;
    2332       14888 :         vacrel->do_index_cleanup = false;
    2333       14888 :         vacrel->do_rel_truncate = false;
    2334             : 
    2335             :         /* Reset the progress counters */
    2336       14888 :         pgstat_progress_update_multi_param(2, progress_index, progress_val);
    2337             : 
    2338       14888 :         ereport(WARNING,
    2339             :                 (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
    2340             :                         vacrel->dbname, vacrel->relnamespace, vacrel->relname,
    2341             :                         vacrel->num_index_scans),
    2342             :                  errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
    2343             :                  errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
    2344             :                          "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
    2345             : 
    2346             :         /* Stop applying cost limits from this point on */
    2347       14888 :         VacuumCostActive = false;
    2348       14888 :         VacuumCostBalance = 0;
    2349             : 
    2350       14888 :         return true;
    2351             :     }
    2352             : 
    2353       85146 :     return false;
    2354             : }
    2355             : 
    2356             : /*
    2357             :  *  lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
    2358             :  */
    2359             : static void
    2360       77642 : lazy_cleanup_all_indexes(LVRelState *vacrel)
    2361             : {
    2362       77642 :     double      reltuples = vacrel->new_rel_tuples;
    2363       77642 :     bool        estimated_count = vacrel->scanned_pages < vacrel->rel_pages;
    2364       77642 :     const int   progress_start_index[] = {
    2365             :         PROGRESS_VACUUM_PHASE,
    2366             :         PROGRESS_VACUUM_INDEXES_TOTAL
    2367             :     };
    2368       77642 :     const int   progress_end_index[] = {
    2369             :         PROGRESS_VACUUM_INDEXES_TOTAL,
    2370             :         PROGRESS_VACUUM_INDEXES_PROCESSED
    2371             :     };
    2372             :     int64       progress_start_val[2];
    2373       77642 :     int64       progress_end_val[2] = {0, 0};
    2374             : 
    2375             :     Assert(vacrel->do_index_cleanup);
    2376             :     Assert(vacrel->nindexes > 0);
    2377             : 
    2378             :     /*
    2379             :      * Report that we are now cleaning up indexes and the number of indexes to
    2380             :      * cleanup.
    2381             :      */
    2382       77642 :     progress_start_val[0] = PROGRESS_VACUUM_PHASE_INDEX_CLEANUP;
    2383       77642 :     progress_start_val[1] = vacrel->nindexes;
    2384       77642 :     pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val);
    2385             : 
    2386       77642 :     if (!ParallelVacuumIsActive(vacrel))
    2387             :     {
    2388      199176 :         for (int idx = 0; idx < vacrel->nindexes; idx++)
    2389             :         {
    2390      121556 :             Relation    indrel = vacrel->indrels[idx];
    2391      121556 :             IndexBulkDeleteResult *istat = vacrel->indstats[idx];
    2392             : 
    2393      243112 :             vacrel->indstats[idx] =
    2394      121556 :                 lazy_cleanup_one_index(indrel, istat, reltuples,
    2395             :                                        estimated_count, vacrel);
    2396             : 
    2397             :             /* Report the number of indexes cleaned up */
    2398      121556 :             pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
    2399      121556 :                                          idx + 1);
    2400             :         }
    2401             :     }
    2402             :     else
    2403             :     {
    2404             :         /* Outsource everything to parallel variant */
    2405          22 :         parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples,
    2406             :                                             vacrel->num_index_scans,
    2407             :                                             estimated_count);
    2408             :     }
    2409             : 
    2410             :     /* Reset the progress counters */
    2411       77642 :     pgstat_progress_update_multi_param(2, progress_end_index, progress_end_val);
    2412       77642 : }
    2413             : 
    2414             : /*
    2415             :  *  lazy_vacuum_one_index() -- vacuum index relation.
    2416             :  *
    2417             :  *      Delete all the index tuples containing a TID collected in
    2418             :  *      vacrel->dead_items.  Also update running statistics. Exact
    2419             :  *      details depend on index AM's ambulkdelete routine.
    2420             :  *
    2421             :  *      reltuples is the number of heap tuples to be passed to the
    2422             :  *      bulkdelete callback.  It's always assumed to be estimated.
    2423             :  *      See indexam.sgml for more info.
    2424             :  *
    2425             :  * Returns bulk delete stats derived from input stats
    2426             :  */
    2427             : static IndexBulkDeleteResult *
    2428        1912 : lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
    2429             :                       double reltuples, LVRelState *vacrel)
    2430             : {
    2431             :     IndexVacuumInfo ivinfo;
    2432             :     LVSavedErrInfo saved_err_info;
    2433             : 
    2434        1912 :     ivinfo.index = indrel;
    2435        1912 :     ivinfo.heaprel = vacrel->rel;
    2436        1912 :     ivinfo.analyze_only = false;
    2437        1912 :     ivinfo.report_progress = false;
    2438        1912 :     ivinfo.estimated_count = true;
    2439        1912 :     ivinfo.message_level = DEBUG2;
    2440        1912 :     ivinfo.num_heap_tuples = reltuples;
    2441        1912 :     ivinfo.strategy = vacrel->bstrategy;
    2442             : 
    2443             :     /*
    2444             :      * Update error traceback information.
    2445             :      *
    2446             :      * The index name is saved during this phase and restored immediately
    2447             :      * after this phase.  See vacuum_error_callback.
    2448             :      */
    2449             :     Assert(vacrel->indname == NULL);
    2450        1912 :     vacrel->indname = pstrdup(RelationGetRelationName(indrel));
    2451        1912 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2452             :                              VACUUM_ERRCB_PHASE_VACUUM_INDEX,
    2453             :                              InvalidBlockNumber, InvalidOffsetNumber);
    2454             : 
    2455             :     /* Do bulk deletion */
    2456        1912 :     istat = vac_bulkdel_one_index(&ivinfo, istat, (void *) vacrel->dead_items,
    2457             :                                   vacrel->dead_items_info);
    2458             : 
    2459             :     /* Revert to the previous phase information for error traceback */
    2460        1912 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2461        1912 :     pfree(vacrel->indname);
    2462        1912 :     vacrel->indname = NULL;
    2463             : 
    2464        1912 :     return istat;
    2465             : }
    2466             : 
    2467             : /*
    2468             :  *  lazy_cleanup_one_index() -- do post-vacuum cleanup for index relation.
    2469             :  *
    2470             :  *      Calls index AM's amvacuumcleanup routine.  reltuples is the number
    2471             :  *      of heap tuples and estimated_count is true if reltuples is an
    2472             :  *      estimated value.  See indexam.sgml for more info.
    2473             :  *
    2474             :  * Returns bulk delete stats derived from input stats
    2475             :  */
    2476             : static IndexBulkDeleteResult *
    2477      121556 : lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
    2478             :                        double reltuples, bool estimated_count,
    2479             :                        LVRelState *vacrel)
    2480             : {
    2481             :     IndexVacuumInfo ivinfo;
    2482             :     LVSavedErrInfo saved_err_info;
    2483             : 
    2484      121556 :     ivinfo.index = indrel;
    2485      121556 :     ivinfo.heaprel = vacrel->rel;
    2486      121556 :     ivinfo.analyze_only = false;
    2487      121556 :     ivinfo.report_progress = false;
    2488      121556 :     ivinfo.estimated_count = estimated_count;
    2489      121556 :     ivinfo.message_level = DEBUG2;
    2490             : 
    2491      121556 :     ivinfo.num_heap_tuples = reltuples;
    2492      121556 :     ivinfo.strategy = vacrel->bstrategy;
    2493             : 
    2494             :     /*
    2495             :      * Update error traceback information.
    2496             :      *
    2497             :      * The index name is saved during this phase and restored immediately
    2498             :      * after this phase.  See vacuum_error_callback.
    2499             :      */
    2500             :     Assert(vacrel->indname == NULL);
    2501      121556 :     vacrel->indname = pstrdup(RelationGetRelationName(indrel));
    2502      121556 :     update_vacuum_error_info(vacrel, &saved_err_info,
    2503             :                              VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
    2504             :                              InvalidBlockNumber, InvalidOffsetNumber);
    2505             : 
    2506      121556 :     istat = vac_cleanup_one_index(&ivinfo, istat);
    2507             : 
    2508             :     /* Revert to the previous phase information for error traceback */
    2509      121556 :     restore_vacuum_error_info(vacrel, &saved_err_info);
    2510      121556 :     pfree(vacrel->indname);
    2511      121556 :     vacrel->indname = NULL;
    2512             : 
    2513      121556 :     return istat;
    2514             : }
    2515             : 
    2516             : /*
    2517             :  * should_attempt_truncation - should we attempt to truncate the heap?
    2518             :  *
    2519             :  * Don't even think about it unless we have a shot at releasing a goodly
    2520             :  * number of pages.  Otherwise, the time taken isn't worth it, mainly because
    2521             :  * an AccessExclusive lock must be replayed on any hot standby, where it can
    2522             :  * be particularly disruptive.
    2523             :  *
    2524             :  * Also don't attempt it if wraparound failsafe is in effect.  The entire
    2525             :  * system might be refusing to allocate new XIDs at this point.  The system
    2526             :  * definitely won't return to normal unless and until VACUUM actually advances
    2527             :  * the oldest relfrozenxid -- which hasn't happened for target rel just yet.
    2528             :  * If lazy_truncate_heap attempted to acquire an AccessExclusiveLock to
    2529             :  * truncate the table under these circumstances, an XID exhaustion error might
    2530             :  * make it impossible for VACUUM to fix the underlying XID exhaustion problem.
    2531             :  * There is very little chance of truncation working out when the failsafe is
    2532             :  * in effect in any case.  lazy_scan_prune makes the optimistic assumption
    2533             :  * that any LP_DEAD items it encounters will always be LP_UNUSED by the time
    2534             :  * we're called.
    2535             :  */
    2536             : static bool
    2537       97130 : should_attempt_truncation(LVRelState *vacrel)
    2538             : {
    2539             :     BlockNumber possibly_freeable;
    2540             : 
    2541       97130 :     if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
    2542       15128 :         return false;
    2543             : 
    2544       82002 :     possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
    2545       82002 :     if (possibly_freeable > 0 &&
    2546         278 :         (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
    2547         278 :          possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION))
    2548         268 :         return true;
    2549             : 
    2550       81734 :     return false;
    2551             : }
    2552             : 
    2553             : /*
    2554             :  * lazy_truncate_heap - try to truncate off any empty pages at the end
    2555             :  */
    2556             : static void
    2557         268 : lazy_truncate_heap(LVRelState *vacrel)
    2558             : {
    2559         268 :     BlockNumber orig_rel_pages = vacrel->rel_pages;
    2560             :     BlockNumber new_rel_pages;
    2561             :     bool        lock_waiter_detected;
    2562             :     int         lock_retry;
    2563             : 
    2564             :     /* Report that we are now truncating */
    2565         268 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2566             :                                  PROGRESS_VACUUM_PHASE_TRUNCATE);
    2567             : 
    2568             :     /* Update error traceback information one last time */
    2569         268 :     update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
    2570             :                              vacrel->nonempty_pages, InvalidOffsetNumber);
    2571             : 
    2572             :     /*
    2573             :      * Loop until no more truncating can be done.
    2574             :      */
    2575             :     do
    2576             :     {
    2577             :         /*
    2578             :          * We need full exclusive lock on the relation in order to do
    2579             :          * truncation. If we can't get it, give up rather than waiting --- we
    2580             :          * don't want to block other backends, and we don't want to deadlock
    2581             :          * (which is quite possible considering we already hold a lower-grade
    2582             :          * lock).
    2583             :          */
    2584         268 :         lock_waiter_detected = false;
    2585         268 :         lock_retry = 0;
    2586             :         while (true)
    2587             :         {
    2588         672 :             if (ConditionalLockRelation(vacrel->rel, AccessExclusiveLock))
    2589         264 :                 break;
    2590             : 
    2591             :             /*
    2592             :              * Check for interrupts while trying to (re-)acquire the exclusive
    2593             :              * lock.
    2594             :              */
    2595         408 :             CHECK_FOR_INTERRUPTS();
    2596             : 
    2597         408 :             if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
    2598             :                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
    2599             :             {
    2600             :                 /*
    2601             :                  * We failed to establish the lock in the specified number of
    2602             :                  * retries. This means we give up truncating.
    2603             :                  */
    2604           4 :                 ereport(vacrel->verbose ? INFO : DEBUG2,
    2605             :                         (errmsg("\"%s\": stopping truncate due to conflicting lock request",
    2606             :                                 vacrel->relname)));
    2607           6 :                 return;
    2608             :             }
    2609             : 
    2610         404 :             (void) WaitLatch(MyLatch,
    2611             :                              WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
    2612             :                              VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL,
    2613             :                              WAIT_EVENT_VACUUM_TRUNCATE);
    2614         404 :             ResetLatch(MyLatch);
    2615             :         }
    2616             : 
    2617             :         /*
    2618             :          * Now that we have exclusive lock, look to see if the rel has grown
    2619             :          * whilst we were vacuuming with non-exclusive lock.  If so, give up;
    2620             :          * the newly added pages presumably contain non-deletable tuples.
    2621             :          */
    2622         264 :         new_rel_pages = RelationGetNumberOfBlocks(vacrel->rel);
    2623         264 :         if (new_rel_pages != orig_rel_pages)
    2624             :         {
    2625             :             /*
    2626             :              * Note: we intentionally don't update vacrel->rel_pages with the
    2627             :              * new rel size here.  If we did, it would amount to assuming that
    2628             :              * the new pages are empty, which is unlikely. Leaving the numbers
    2629             :              * alone amounts to assuming that the new pages have the same
    2630             :              * tuple density as existing ones, which is less unlikely.
    2631             :              */
    2632           0 :             UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2633           0 :             return;
    2634             :         }
    2635             : 
    2636             :         /*
    2637             :          * Scan backwards from the end to verify that the end pages actually
    2638             :          * contain no tuples.  This is *necessary*, not optional, because
    2639             :          * other backends could have added tuples to these pages whilst we
    2640             :          * were vacuuming.
    2641             :          */
    2642         264 :         new_rel_pages = count_nondeletable_pages(vacrel, &lock_waiter_detected);
    2643         264 :         vacrel->blkno = new_rel_pages;
    2644             : 
    2645         264 :         if (new_rel_pages >= orig_rel_pages)
    2646             :         {
    2647             :             /* can't do anything after all */
    2648           2 :             UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2649           2 :             return;
    2650             :         }
    2651             : 
    2652             :         /*
    2653             :          * Okay to truncate.
    2654             :          */
    2655         262 :         RelationTruncate(vacrel->rel, new_rel_pages);
    2656             : 
    2657             :         /*
    2658             :          * We can release the exclusive lock as soon as we have truncated.
    2659             :          * Other backends can't safely access the relation until they have
    2660             :          * processed the smgr invalidation that smgrtruncate sent out ... but
    2661             :          * that should happen as part of standard invalidation processing once
    2662             :          * they acquire lock on the relation.
    2663             :          */
    2664         262 :         UnlockRelation(vacrel->rel, AccessExclusiveLock);
    2665             : 
    2666             :         /*
    2667             :          * Update statistics.  Here, it *is* correct to adjust rel_pages
    2668             :          * without also touching reltuples, since the tuple count wasn't
    2669             :          * changed by the truncation.
    2670             :          */
    2671         262 :         vacrel->removed_pages += orig_rel_pages - new_rel_pages;
    2672         262 :         vacrel->rel_pages = new_rel_pages;
    2673             : 
    2674         262 :         ereport(vacrel->verbose ? INFO : DEBUG2,
    2675             :                 (errmsg("table \"%s\": truncated %u to %u pages",
    2676             :                         vacrel->relname,
    2677             :                         orig_rel_pages, new_rel_pages)));
    2678         262 :         orig_rel_pages = new_rel_pages;
    2679         262 :     } while (new_rel_pages > vacrel->nonempty_pages && lock_waiter_detected);
    2680             : }
    2681             : 
    2682             : /*
    2683             :  * Rescan end pages to verify that they are (still) empty of tuples.
    2684             :  *
    2685             :  * Returns number of nondeletable pages (last nonempty page + 1).
    2686             :  */
    2687             : static BlockNumber
    2688         264 : count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
    2689             : {
    2690             :     BlockNumber blkno;
    2691             :     BlockNumber prefetchedUntil;
    2692             :     instr_time  starttime;
    2693             : 
    2694             :     /* Initialize the starttime if we check for conflicting lock requests */
    2695         264 :     INSTR_TIME_SET_CURRENT(starttime);
    2696             : 
    2697             :     /*
    2698             :      * Start checking blocks at what we believe relation end to be and move
    2699             :      * backwards.  (Strange coding of loop control is needed because blkno is
    2700             :      * unsigned.)  To make the scan faster, we prefetch a few blocks at a time
    2701             :      * in forward direction, so that OS-level readahead can kick in.
    2702             :      */
    2703         264 :     blkno = vacrel->rel_pages;
    2704             :     StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
    2705             :                      "prefetch size must be power of 2");
    2706         264 :     prefetchedUntil = InvalidBlockNumber;
    2707        4110 :     while (blkno > vacrel->nonempty_pages)
    2708             :     {
    2709             :         Buffer      buf;
    2710             :         Page        page;
    2711             :         OffsetNumber offnum,
    2712             :                     maxoff;
    2713             :         bool        hastup;
    2714             : 
    2715             :         /*
    2716             :          * Check if another process requests a lock on our relation. We are
    2717             :          * holding an AccessExclusiveLock here, so they will be waiting. We
    2718             :          * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
    2719             :          * only check if that interval has elapsed once every 32 blocks to
    2720             :          * keep the number of system calls and actual shared lock table
    2721             :          * lookups to a minimum.
    2722             :          */
    2723        3856 :         if ((blkno % 32) == 0)
    2724             :         {
    2725             :             instr_time  currenttime;
    2726             :             instr_time  elapsed;
    2727             : 
    2728         124 :             INSTR_TIME_SET_CURRENT(currenttime);
    2729         124 :             elapsed = currenttime;
    2730         124 :             INSTR_TIME_SUBTRACT(elapsed, starttime);
    2731         124 :             if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
    2732             :                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
    2733             :             {
    2734           0 :                 if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
    2735             :                 {
    2736           0 :                     ereport(vacrel->verbose ? INFO : DEBUG2,
    2737             :                             (errmsg("table \"%s\": suspending truncate due to conflicting lock request",
    2738             :                                     vacrel->relname)));
    2739             : 
    2740           0 :                     *lock_waiter_detected = true;
    2741           0 :                     return blkno;
    2742             :                 }
    2743           0 :                 starttime = currenttime;
    2744             :             }
    2745             :         }
    2746             : 
    2747             :         /*
    2748             :          * We don't insert a vacuum delay point here, because we have an
    2749             :          * exclusive lock on the table which we want to hold for as short a
    2750             :          * time as possible.  We still need to check for interrupts however.
    2751             :          */
    2752        3856 :         CHECK_FOR_INTERRUPTS();
    2753             : 
    2754        3856 :         blkno--;
    2755             : 
    2756             :         /* If we haven't prefetched this lot yet, do so now. */
    2757        3856 :         if (prefetchedUntil > blkno)
    2758             :         {
    2759             :             BlockNumber prefetchStart;
    2760             :             BlockNumber pblkno;
    2761             : 
    2762         354 :             prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
    2763        5766 :             for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
    2764             :             {
    2765        5412 :                 PrefetchBuffer(vacrel->rel, MAIN_FORKNUM, pblkno);
    2766        5412 :                 CHECK_FOR_INTERRUPTS();
    2767             :             }
    2768         354 :             prefetchedUntil = prefetchStart;
    2769             :         }
    2770             : 
    2771        3856 :         buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
    2772             :                                  vacrel->bstrategy);
    2773             : 
    2774             :         /* In this phase we only need shared access to the buffer */
    2775        3856 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
    2776             : 
    2777        3856 :         page = BufferGetPage(buf);
    2778             : 
    2779        3856 :         if (PageIsNew(page) || PageIsEmpty(page))
    2780             :         {
    2781        1640 :             UnlockReleaseBuffer(buf);
    2782        1640 :             continue;
    2783             :         }
    2784             : 
    2785        2216 :         hastup = false;
    2786        2216 :         maxoff = PageGetMaxOffsetNumber(page);
    2787        4422 :         for (offnum = FirstOffsetNumber;
    2788             :              offnum <= maxoff;
    2789        2206 :              offnum = OffsetNumberNext(offnum))
    2790             :         {
    2791             :             ItemId      itemid;
    2792             : 
    2793        2216 :             itemid = PageGetItemId(page, offnum);
    2794             : 
    2795             :             /*
    2796             :              * Note: any non-unused item should be taken as a reason to keep
    2797             :              * this page.  Even an LP_DEAD item makes truncation unsafe, since
    2798             :              * we must not have cleaned out its index entries.
    2799             :              */
    2800        2216 :             if (ItemIdIsUsed(itemid))
    2801             :             {
    2802          10 :                 hastup = true;
    2803          10 :                 break;          /* can stop scanning */
    2804             :             }
    2805             :         }                       /* scan along page */
    2806             : 
    2807        2216 :         UnlockReleaseBuffer(buf);
    2808             : 
    2809             :         /* Done scanning if we found a tuple here */
    2810        2216 :         if (hastup)
    2811          10 :             return blkno + 1;
    2812             :     }
    2813             : 
    2814             :     /*
    2815             :      * If we fall out of the loop, all the previously-thought-to-be-empty
    2816             :      * pages still are; we need not bother to look at the last known-nonempty
    2817             :      * page.
    2818             :      */
    2819         254 :     return vacrel->nonempty_pages;
    2820             : }
    2821             : 
    2822             : /*
    2823             :  * Allocate dead_items and dead_items_info (either using palloc, or in dynamic
    2824             :  * shared memory). Sets both in vacrel for caller.
    2825             :  *
    2826             :  * Also handles parallel initialization as part of allocating dead_items in
    2827             :  * DSM when required.
    2828             :  */
    2829             : static void
    2830       97130 : dead_items_alloc(LVRelState *vacrel, int nworkers)
    2831             : {
    2832             :     VacDeadItemsInfo *dead_items_info;
    2833      270648 :     int         vac_work_mem = AmAutoVacuumWorkerProcess() &&
    2834       76388 :         autovacuum_work_mem != -1 ?
    2835      173518 :         autovacuum_work_mem : maintenance_work_mem;
    2836             : 
    2837             :     /*
    2838             :      * Initialize state for a parallel vacuum.  As of now, only one worker can
    2839             :      * be used for an index, so we invoke parallelism only if there are at
    2840             :      * least two indexes on a table.
    2841             :      */
    2842       97130 :     if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming)
    2843             :     {
    2844             :         /*
    2845             :          * Since parallel workers cannot access data in temporary tables, we
    2846             :          * can't perform parallel vacuum on them.
    2847             :          */
    2848        7940 :         if (RelationUsesLocalBuffers(vacrel->rel))
    2849             :         {
    2850             :             /*
    2851             :              * Give warning only if the user explicitly tries to perform a
    2852             :              * parallel vacuum on the temporary table.
    2853             :              */
    2854           6 :             if (nworkers > 0)
    2855           6 :                 ereport(WARNING,
    2856             :                         (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
    2857             :                                 vacrel->relname)));
    2858             :         }
    2859             :         else
    2860        7934 :             vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
    2861             :                                                vacrel->nindexes, nworkers,
    2862             :                                                vac_work_mem,
    2863        7934 :                                                vacrel->verbose ? INFO : DEBUG2,
    2864             :                                                vacrel->bstrategy);
    2865             : 
    2866             :         /*
    2867             :          * If parallel mode started, dead_items and dead_items_info spaces are
    2868             :          * allocated in DSM.
    2869             :          */
    2870        7940 :         if (ParallelVacuumIsActive(vacrel))
    2871             :         {
    2872          22 :             vacrel->dead_items = parallel_vacuum_get_dead_items(vacrel->pvs,
    2873             :                                                                 &vacrel->dead_items_info);
    2874          22 :             return;
    2875             :         }
    2876             :     }
    2877             : 
    2878             :     /*
    2879             :      * Serial VACUUM case. Allocate both dead_items and dead_items_info
    2880             :      * locally.
    2881             :      */
    2882             : 
    2883       97108 :     dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
    2884       97108 :     dead_items_info->max_bytes = vac_work_mem * 1024L;
    2885       97108 :     dead_items_info->num_items = 0;
    2886       97108 :     vacrel->dead_items_info = dead_items_info;
    2887             : 
    2888       97108 :     vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
    2889             : }
    2890             : 
    2891             : /*
    2892             :  * Add the given block number and offset numbers to dead_items.
    2893             :  */
    2894             : static void
    2895       26732 : dead_items_add(LVRelState *vacrel, BlockNumber blkno, OffsetNumber *offsets,
    2896             :                int num_offsets)
    2897             : {
    2898       26732 :     TidStore   *dead_items = vacrel->dead_items;
    2899       26732 :     const int   prog_index[2] = {
    2900             :         PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS,
    2901             :         PROGRESS_VACUUM_DEAD_TUPLE_BYTES
    2902             :     };
    2903             :     int64       prog_val[2];
    2904             : 
    2905       26732 :     TidStoreSetBlockOffsets(dead_items, blkno, offsets, num_offsets);
    2906       26732 :     vacrel->dead_items_info->num_items += num_offsets;
    2907             : 
    2908             :     /* update the progress information */
    2909       26732 :     prog_val[0] = vacrel->dead_items_info->num_items;
    2910       26732 :     prog_val[1] = TidStoreMemoryUsage(dead_items);
    2911       26732 :     pgstat_progress_update_multi_param(2, prog_index, prog_val);
    2912       26732 : }
    2913             : 
    2914             : /*
    2915             :  * Forget all collected dead items.
    2916             :  */
    2917             : static void
    2918        1012 : dead_items_reset(LVRelState *vacrel)
    2919             : {
    2920        1012 :     TidStore   *dead_items = vacrel->dead_items;
    2921             : 
    2922        1012 :     if (ParallelVacuumIsActive(vacrel))
    2923             :     {
    2924          14 :         parallel_vacuum_reset_dead_items(vacrel->pvs);
    2925          14 :         return;
    2926             :     }
    2927             : 
    2928             :     /* Recreate the tidstore with the same max_bytes limitation */
    2929         998 :     TidStoreDestroy(dead_items);
    2930         998 :     vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
    2931             : 
    2932             :     /* Reset the counter */
    2933         998 :     vacrel->dead_items_info->num_items = 0;
    2934             : }
    2935             : 
    2936             : /*
    2937             :  * Perform cleanup for resources allocated in dead_items_alloc
    2938             :  */
    2939             : static void
    2940       97130 : dead_items_cleanup(LVRelState *vacrel)
    2941             : {
    2942       97130 :     if (!ParallelVacuumIsActive(vacrel))
    2943             :     {
    2944             :         /* Don't bother with pfree here */
    2945       97108 :         return;
    2946             :     }
    2947             : 
    2948             :     /* End parallel mode */
    2949          22 :     parallel_vacuum_end(vacrel->pvs, vacrel->indstats);
    2950          22 :     vacrel->pvs = NULL;
    2951             : }
    2952             : 
    2953             : /*
    2954             :  * Check if every tuple in the given page is visible to all current and future
    2955             :  * transactions. Also return the visibility_cutoff_xid which is the highest
    2956             :  * xmin amongst the visible tuples.  Set *all_frozen to true if every tuple
    2957             :  * on this page is frozen.
    2958             :  *
    2959             :  * This is a stripped down version of lazy_scan_prune().  If you change
    2960             :  * anything here, make sure that everything stays in sync.  Note that an
    2961             :  * assertion calls us to verify that everybody still agrees.  Be sure to avoid
    2962             :  * introducing new side-effects here.
    2963             :  */
    2964             : static bool
    2965       22030 : heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
    2966             :                          TransactionId *visibility_cutoff_xid,
    2967             :                          bool *all_frozen)
    2968             : {
    2969       22030 :     Page        page = BufferGetPage(buf);
    2970       22030 :     BlockNumber blockno = BufferGetBlockNumber(buf);
    2971             :     OffsetNumber offnum,
    2972             :                 maxoff;
    2973       22030 :     bool        all_visible = true;
    2974             : 
    2975       22030 :     *visibility_cutoff_xid = InvalidTransactionId;
    2976       22030 :     *all_frozen = true;
    2977             : 
    2978       22030 :     maxoff = PageGetMaxOffsetNumber(page);
    2979     1116990 :     for (offnum = FirstOffsetNumber;
    2980     1095030 :          offnum <= maxoff && all_visible;
    2981     1094960 :          offnum = OffsetNumberNext(offnum))
    2982             :     {
    2983             :         ItemId      itemid;
    2984             :         HeapTupleData tuple;
    2985             : 
    2986             :         /*
    2987             :          * Set the offset number so that we can display it along with any
    2988             :          * error that occurred while processing this tuple.
    2989             :          */
    2990     1094960 :         vacrel->offnum = offnum;
    2991     1094960 :         itemid = PageGetItemId(page, offnum);
    2992             : 
    2993             :         /* Unused or redirect line pointers are of no interest */
    2994     1094960 :         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
    2995      278412 :             continue;
    2996             : 
    2997      816548 :         ItemPointerSet(&(tuple.t_self), blockno, offnum);
    2998             : 
    2999             :         /*
    3000             :          * Dead line pointers can have index pointers pointing to them. So
    3001             :          * they can't be treated as visible
    3002             :          */
    3003      816548 :         if (ItemIdIsDead(itemid))
    3004             :         {
    3005           0 :             all_visible = false;
    3006           0 :             *all_frozen = false;
    3007           0 :             break;
    3008             :         }
    3009             : 
    3010             :         Assert(ItemIdIsNormal(itemid));
    3011             : 
    3012      816548 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    3013      816548 :         tuple.t_len = ItemIdGetLength(itemid);
    3014      816548 :         tuple.t_tableOid = RelationGetRelid(vacrel->rel);
    3015             : 
    3016      816548 :         switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
    3017             :                                          buf))
    3018             :         {
    3019      816510 :             case HEAPTUPLE_LIVE:
    3020             :                 {
    3021             :                     TransactionId xmin;
    3022             : 
    3023             :                     /* Check comments in lazy_scan_prune. */
    3024      816510 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    3025             :                     {
    3026           0 :                         all_visible = false;
    3027           0 :                         *all_frozen = false;
    3028           0 :                         break;
    3029             :                     }
    3030             : 
    3031             :                     /*
    3032             :                      * The inserter definitely committed. But is it old enough
    3033             :                      * that everyone sees it as committed?
    3034             :                      */
    3035      816510 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    3036      816510 :                     if (!TransactionIdPrecedes(xmin,
    3037             :                                                vacrel->cutoffs.OldestXmin))
    3038             :                     {
    3039          32 :                         all_visible = false;
    3040          32 :                         *all_frozen = false;
    3041          32 :                         break;
    3042             :                     }
    3043             : 
    3044             :                     /* Track newest xmin on page. */
    3045      816478 :                     if (TransactionIdFollows(xmin, *visibility_cutoff_xid) &&
    3046             :                         TransactionIdIsNormal(xmin))
    3047       18642 :                         *visibility_cutoff_xid = xmin;
    3048             : 
    3049             :                     /* Check whether this tuple is already frozen or not */
    3050     1019864 :                     if (all_visible && *all_frozen &&
    3051      203386 :                         heap_tuple_needs_eventual_freeze(tuple.t_data))
    3052        5010 :                         *all_frozen = false;
    3053             :                 }
    3054      816478 :                 break;
    3055             : 
    3056          38 :             case HEAPTUPLE_DEAD:
    3057             :             case HEAPTUPLE_RECENTLY_DEAD:
    3058             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    3059             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    3060             :                 {
    3061          38 :                     all_visible = false;
    3062          38 :                     *all_frozen = false;
    3063          38 :                     break;
    3064             :                 }
    3065           0 :             default:
    3066           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    3067             :                 break;
    3068             :         }
    3069             :     }                           /* scan along page */
    3070             : 
    3071             :     /* Clear the offset information once we have processed the given page. */
    3072       22030 :     vacrel->offnum = InvalidOffsetNumber;
    3073             : 
    3074       22030 :     return all_visible;
    3075             : }
    3076             : 
    3077             : /*
    3078             :  * Update index statistics in pg_class if the statistics are accurate.
    3079             :  */
    3080             : static void
    3081       81978 : update_relstats_all_indexes(LVRelState *vacrel)
    3082             : {
    3083       81978 :     Relation   *indrels = vacrel->indrels;
    3084       81978 :     int         nindexes = vacrel->nindexes;
    3085       81978 :     IndexBulkDeleteResult **indstats = vacrel->indstats;
    3086             : 
    3087             :     Assert(vacrel->do_index_cleanup);
    3088             : 
    3089      203632 :     for (int idx = 0; idx < nindexes; idx++)
    3090             :     {
    3091      121654 :         Relation    indrel = indrels[idx];
    3092      121654 :         IndexBulkDeleteResult *istat = indstats[idx];
    3093             : 
    3094      121654 :         if (istat == NULL || istat->estimated_count)
    3095      119472 :             continue;
    3096             : 
    3097             :         /* Update index statistics */
    3098        2182 :         vac_update_relstats(indrel,
    3099             :                             istat->num_pages,
    3100             :                             istat->num_index_tuples,
    3101             :                             0,
    3102             :                             false,
    3103             :                             InvalidTransactionId,
    3104             :                             InvalidMultiXactId,
    3105             :                             NULL, NULL, false);
    3106             :     }
    3107       81978 : }
    3108             : 
    3109             : /*
    3110             :  * Error context callback for errors occurring during vacuum.  The error
    3111             :  * context messages for index phases should match the messages set in parallel
    3112             :  * vacuum.  If you change this function for those phases, change
    3113             :  * parallel_vacuum_error_callback() as well.
    3114             :  */
    3115             : static void
    3116       59116 : vacuum_error_callback(void *arg)
    3117             : {
    3118       59116 :     LVRelState *errinfo = arg;
    3119             : 
    3120       59116 :     switch (errinfo->phase)
    3121             :     {
    3122           0 :         case VACUUM_ERRCB_PHASE_SCAN_HEAP:
    3123           0 :             if (BlockNumberIsValid(errinfo->blkno))
    3124             :             {
    3125           0 :                 if (OffsetNumberIsValid(errinfo->offnum))
    3126           0 :                     errcontext("while scanning block %u offset %u of relation \"%s.%s\"",
    3127           0 :                                errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
    3128             :                 else
    3129           0 :                     errcontext("while scanning block %u of relation \"%s.%s\"",
    3130             :                                errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3131             :             }
    3132             :             else
    3133           0 :                 errcontext("while scanning relation \"%s.%s\"",
    3134             :                            errinfo->relnamespace, errinfo->relname);
    3135           0 :             break;
    3136             : 
    3137           0 :         case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
    3138           0 :             if (BlockNumberIsValid(errinfo->blkno))
    3139             :             {
    3140           0 :                 if (OffsetNumberIsValid(errinfo->offnum))
    3141           0 :                     errcontext("while vacuuming block %u offset %u of relation \"%s.%s\"",
    3142           0 :                                errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
    3143             :                 else
    3144           0 :                     errcontext("while vacuuming block %u of relation \"%s.%s\"",
    3145             :                                errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3146             :             }
    3147             :             else
    3148           0 :                 errcontext("while vacuuming relation \"%s.%s\"",
    3149             :                            errinfo->relnamespace, errinfo->relname);
    3150           0 :             break;
    3151             : 
    3152           0 :         case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
    3153           0 :             errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
    3154             :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3155           0 :             break;
    3156             : 
    3157           0 :         case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
    3158           0 :             errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
    3159             :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3160           0 :             break;
    3161             : 
    3162           6 :         case VACUUM_ERRCB_PHASE_TRUNCATE:
    3163           6 :             if (BlockNumberIsValid(errinfo->blkno))
    3164           6 :                 errcontext("while truncating relation \"%s.%s\" to %u blocks",
    3165             :                            errinfo->relnamespace, errinfo->relname, errinfo->blkno);
    3166           6 :             break;
    3167             : 
    3168       59110 :         case VACUUM_ERRCB_PHASE_UNKNOWN:
    3169             :         default:
    3170       59110 :             return;             /* do nothing; the errinfo may not be
    3171             :                                  * initialized */
    3172             :     }
    3173             : }
    3174             : 
    3175             : /*
    3176             :  * Updates the information required for vacuum error callback.  This also saves
    3177             :  * the current information which can be later restored via restore_vacuum_error_info.
    3178             :  */
    3179             : static void
    3180      557290 : update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel,
    3181             :                          int phase, BlockNumber blkno, OffsetNumber offnum)
    3182             : {
    3183      557290 :     if (saved_vacrel)
    3184             :     {
    3185      146476 :         saved_vacrel->offnum = vacrel->offnum;
    3186      146476 :         saved_vacrel->blkno = vacrel->blkno;
    3187      146476 :         saved_vacrel->phase = vacrel->phase;
    3188             :     }
    3189             : 
    3190      557290 :     vacrel->blkno = blkno;
    3191      557290 :     vacrel->offnum = offnum;
    3192      557290 :     vacrel->phase = phase;
    3193      557290 : }
    3194             : 
    3195             : /*
    3196             :  * Restores the vacuum information saved via a prior call to update_vacuum_error_info.
    3197             :  */
    3198             : static void
    3199      146476 : restore_vacuum_error_info(LVRelState *vacrel,
    3200             :                           const LVSavedErrInfo *saved_vacrel)
    3201             : {
    3202      146476 :     vacrel->blkno = saved_vacrel->blkno;
    3203      146476 :     vacrel->offnum = saved_vacrel->offnum;
    3204      146476 :     vacrel->phase = saved_vacrel->phase;
    3205      146476 : }

Generated by: LCOV version 1.14