LCOV - code coverage report
Current view: top level - src/backend/access/heap - vacuumlazy.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13beta1 Lines: 905 1015 89.2 %
Date: 2020-05-25 05:06:35 Functions: 33 33 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuumlazy.c
       4             :  *    Concurrent ("lazy") vacuuming.
       5             :  *
       6             :  *
       7             :  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
       8             :  * TIDs.  We want to ensure we can vacuum even the very largest relations with
       9             :  * finite memory space usage.  To do that, we set upper bounds on the number of
      10             :  * tuples we will keep track of at once.
      11             :  *
      12             :  * We are willing to use at most maintenance_work_mem (or perhaps
      13             :  * autovacuum_work_mem) memory space to keep track of dead tuples.  We
      14             :  * initially allocate an array of TIDs of that size, with an upper limit that
      15             :  * depends on table size (this limit ensures we don't allocate a huge area
      16             :  * uselessly for vacuuming small tables).  If the array threatens to overflow,
      17             :  * we suspend the heap scan phase and perform a pass of index cleanup and page
      18             :  * compaction, then resume the heap scan with an empty TID array.
      19             :  *
      20             :  * If we're processing a table with no indexes, we can just vacuum each page
      21             :  * as we go; there's no need to save up multiple tuples to minimize the number
      22             :  * of index scans performed.  So we don't use maintenance_work_mem memory for
      23             :  * the TID array, just enough to hold as many heap tuples as fit on one page.
      24             :  *
      25             :  * Lazy vacuum supports parallel execution with parallel worker processes.  In
      26             :  * a parallel vacuum, we perform both index vacuum and index cleanup with
      27             :  * parallel worker processes.  Individual indexes are processed by one vacuum
      28             :  * process.  At the beginning of a lazy vacuum (at lazy_scan_heap) we prepare
      29             :  * the parallel context and initialize the DSM segment that contains shared
      30             :  * information as well as the memory space for storing dead tuples.  When
      31             :  * starting either index vacuum or index cleanup, we launch parallel worker
      32             :  * processes.  Once all indexes are processed the parallel worker processes
      33             :  * exit.  After that, the leader process re-initializes the parallel context
      34             :  * so that it can use the same DSM for multiple passes of index vacuum and
      35             :  * for performing index cleanup.  For updating the index statistics, we need
      36             :  * to update the system table and since updates are not allowed during
      37             :  * parallel mode we update the index statistics after exiting from the
      38             :  * parallel mode.
      39             :  *
      40             :  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
      41             :  * Portions Copyright (c) 1994, Regents of the University of California
      42             :  *
      43             :  *
      44             :  * IDENTIFICATION
      45             :  *    src/backend/access/heap/vacuumlazy.c
      46             :  *
      47             :  *-------------------------------------------------------------------------
      48             :  */
      49             : #include "postgres.h"
      50             : 
      51             : #include <math.h>
      52             : 
      53             : #include "access/amapi.h"
      54             : #include "access/genam.h"
      55             : #include "access/heapam.h"
      56             : #include "access/heapam_xlog.h"
      57             : #include "access/htup_details.h"
      58             : #include "access/multixact.h"
      59             : #include "access/parallel.h"
      60             : #include "access/transam.h"
      61             : #include "access/visibilitymap.h"
      62             : #include "access/xact.h"
      63             : #include "access/xlog.h"
      64             : #include "catalog/storage.h"
      65             : #include "commands/dbcommands.h"
      66             : #include "commands/progress.h"
      67             : #include "commands/vacuum.h"
      68             : #include "executor/instrument.h"
      69             : #include "miscadmin.h"
      70             : #include "optimizer/paths.h"
      71             : #include "pgstat.h"
      72             : #include "portability/instr_time.h"
      73             : #include "postmaster/autovacuum.h"
      74             : #include "storage/bufmgr.h"
      75             : #include "storage/freespace.h"
      76             : #include "storage/lmgr.h"
      77             : #include "tcop/tcopprot.h"
      78             : #include "utils/lsyscache.h"
      79             : #include "utils/memutils.h"
      80             : #include "utils/pg_rusage.h"
      81             : #include "utils/timestamp.h"
      82             : 
      83             : 
      84             : /*
      85             :  * Space/time tradeoff parameters: do these need to be user-tunable?
      86             :  *
      87             :  * To consider truncating the relation, we want there to be at least
      88             :  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
      89             :  * is less) potentially-freeable pages.
      90             :  */
      91             : #define REL_TRUNCATE_MINIMUM    1000
      92             : #define REL_TRUNCATE_FRACTION   16
      93             : 
      94             : /*
      95             :  * Timing parameters for truncate locking heuristics.
      96             :  *
      97             :  * These were not exposed as user tunable GUC values because it didn't seem
      98             :  * that the potential for improvement was great enough to merit the cost of
      99             :  * supporting them.
     100             :  */
     101             : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL     20  /* ms */
     102             : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL      50  /* ms */
     103             : #define VACUUM_TRUNCATE_LOCK_TIMEOUT            5000    /* ms */
     104             : 
     105             : /*
     106             :  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
     107             :  * (it won't be exact because we only vacuum FSM after processing a heap page
     108             :  * that has some removable tuples).  When there are indexes, this is ignored,
     109             :  * and we vacuum FSM after each index/heap cleaning pass.
     110             :  */
     111             : #define VACUUM_FSM_EVERY_PAGES \
     112             :     ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
     113             : 
     114             : /*
     115             :  * Guesstimation of number of dead tuples per page.  This is used to
     116             :  * provide an upper limit to memory allocated when vacuuming small
     117             :  * tables.
     118             :  */
     119             : #define LAZY_ALLOC_TUPLES       MaxHeapTuplesPerPage
     120             : 
     121             : /*
     122             :  * Before we consider skipping a page that's marked as clean in
     123             :  * visibility map, we must've seen at least this many clean pages.
     124             :  */
     125             : #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
     126             : 
     127             : /*
     128             :  * Size of the prefetch window for lazy vacuum backwards truncation scan.
     129             :  * Needs to be a power of 2.
     130             :  */
     131             : #define PREFETCH_SIZE           ((BlockNumber) 32)
     132             : 
     133             : /*
     134             :  * DSM keys for parallel vacuum.  Unlike other parallel execution code, since
     135             :  * we don't need to worry about DSM keys conflicting with plan_node_id we can
     136             :  * use small integers.
     137             :  */
     138             : #define PARALLEL_VACUUM_KEY_SHARED          1
     139             : #define PARALLEL_VACUUM_KEY_DEAD_TUPLES     2
     140             : #define PARALLEL_VACUUM_KEY_QUERY_TEXT      3
     141             : #define PARALLEL_VACUUM_KEY_BUFFER_USAGE    4
     142             : #define PARALLEL_VACUUM_KEY_WAL_USAGE       5
     143             : 
     144             : /*
     145             :  * Macro to check if we are in a parallel vacuum.  If true, we are in the
     146             :  * parallel mode and the DSM segment is initialized.
     147             :  */
     148             : #define ParallelVacuumIsActive(lps) PointerIsValid(lps)
     149             : 
     150             : /* Phases of vacuum during which we report error context. */
     151             : typedef enum
     152             : {
     153             :     VACUUM_ERRCB_PHASE_UNKNOWN,
     154             :     VACUUM_ERRCB_PHASE_SCAN_HEAP,
     155             :     VACUUM_ERRCB_PHASE_VACUUM_INDEX,
     156             :     VACUUM_ERRCB_PHASE_VACUUM_HEAP,
     157             :     VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
     158             :     VACUUM_ERRCB_PHASE_TRUNCATE
     159             : } VacErrPhase;
     160             : 
     161             : /*
     162             :  * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
     163             :  * This is allocated in the DSM segment in parallel mode and in local memory
     164             :  * in non-parallel mode.
     165             :  */
     166             : typedef struct LVDeadTuples
     167             : {
     168             :     int         max_tuples;     /* # slots allocated in array */
     169             :     int         num_tuples;     /* current # of entries */
     170             :     /* List of TIDs of tuples we intend to delete */
     171             :     /* NB: this list is ordered by TID address */
     172             :     ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER];    /* array of
     173             :                                                          * ItemPointerData */
     174             : } LVDeadTuples;
     175             : 
     176             : /* The dead tuple space consists of LVDeadTuples and dead tuple TIDs */
     177             : #define SizeOfDeadTuples(cnt) \
     178             :     add_size(offsetof(LVDeadTuples, itemptrs), \
     179             :              mul_size(sizeof(ItemPointerData), cnt))
     180             : #define MAXDEADTUPLES(max_size) \
     181             :         (((max_size) - offsetof(LVDeadTuples, itemptrs)) / sizeof(ItemPointerData))
     182             : 
     183             : /*
     184             :  * Shared information among parallel workers.  So this is allocated in the DSM
     185             :  * segment.
     186             :  */
     187             : typedef struct LVShared
     188             : {
     189             :     /*
     190             :      * Target table relid and log level.  These fields are not modified during
     191             :      * the lazy vacuum.
     192             :      */
     193             :     Oid         relid;
     194             :     int         elevel;
     195             : 
     196             :     /*
     197             :      * An indication for vacuum workers to perform either index vacuum or
     198             :      * index cleanup.  first_time is true only if for_cleanup is true and
     199             :      * bulk-deletion is not performed yet.
     200             :      */
     201             :     bool        for_cleanup;
     202             :     bool        first_time;
     203             : 
     204             :     /*
     205             :      * Fields for both index vacuum and cleanup.
     206             :      *
     207             :      * reltuples is the total number of input heap tuples.  We set either old
     208             :      * live tuples in the index vacuum case or the new live tuples in the
     209             :      * index cleanup case.
     210             :      *
     211             :      * estimated_count is true if reltuples is an estimated value.
     212             :      */
     213             :     double      reltuples;
     214             :     bool        estimated_count;
     215             : 
     216             :     /*
     217             :      * In single process lazy vacuum we could consume more memory during index
     218             :      * vacuuming or cleanup apart from the memory for heap scanning.  In
     219             :      * parallel vacuum, since individual vacuum workers can consume memory
     220             :      * equal to maintenance_work_mem, the new maintenance_work_mem for each
     221             :      * worker is set such that the parallel operation doesn't consume more
     222             :      * memory than single process lazy vacuum.
     223             :      */
     224             :     int         maintenance_work_mem_worker;
     225             : 
     226             :     /*
     227             :      * Shared vacuum cost balance.  During parallel vacuum,
     228             :      * VacuumSharedCostBalance points to this value and it accumulates the
     229             :      * balance of each parallel vacuum worker.
     230             :      */
     231             :     pg_atomic_uint32 cost_balance;
     232             : 
     233             :     /*
     234             :      * Number of active parallel workers.  This is used for computing the
     235             :      * minimum threshold of the vacuum cost balance before a worker sleeps for
     236             :      * cost-based delay.
     237             :      */
     238             :     pg_atomic_uint32 active_nworkers;
     239             : 
     240             :     /*
     241             :      * Variables to control parallel vacuum.  We have a bitmap to indicate
     242             :      * which index has stats in shared memory.  The set bit in the map
     243             :      * indicates that the particular index supports a parallel vacuum.
     244             :      */
     245             :     pg_atomic_uint32 idx;       /* counter for vacuuming and clean up */
     246             :     uint32      offset;         /* sizeof header incl. bitmap */
     247             :     bits8       bitmap[FLEXIBLE_ARRAY_MEMBER];  /* bit map of NULLs */
     248             : 
     249             :     /* Shared index statistics data follows at end of struct */
     250             : } LVShared;
     251             : 
     252             : #define SizeOfLVShared (offsetof(LVShared, bitmap) + sizeof(bits8))
     253             : #define GetSharedIndStats(s) \
     254             :     ((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
     255             : #define IndStatsIsNull(s, i) \
     256             :     (!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
     257             : 
     258             : /*
     259             :  * Struct for an index bulk-deletion statistic used for parallel vacuum.  This
     260             :  * is allocated in the DSM segment.
     261             :  */
     262             : typedef struct LVSharedIndStats
     263             : {
     264             :     bool        updated;        /* are the stats updated? */
     265             :     IndexBulkDeleteResult stats;
     266             : } LVSharedIndStats;
     267             : 
     268             : /* Struct for maintaining a parallel vacuum state. */
     269             : typedef struct LVParallelState
     270             : {
     271             :     ParallelContext *pcxt;
     272             : 
     273             :     /* Shared information among parallel vacuum workers */
     274             :     LVShared   *lvshared;
     275             : 
     276             :     /* Points to buffer usage area in DSM */
     277             :     BufferUsage *buffer_usage;
     278             : 
     279             :     /* Points to WAL usage area in DSM */
     280             :     WalUsage   *wal_usage;
     281             : 
     282             :     /*
     283             :      * The number of indexes that support parallel index bulk-deletion and
     284             :      * parallel index cleanup respectively.
     285             :      */
     286             :     int         nindexes_parallel_bulkdel;
     287             :     int         nindexes_parallel_cleanup;
     288             :     int         nindexes_parallel_condcleanup;
     289             : } LVParallelState;
     290             : 
     291             : typedef struct LVRelStats
     292             : {
     293             :     char       *relnamespace;
     294             :     char       *relname;
     295             :     /* useindex = true means two-pass strategy; false means one-pass */
     296             :     bool        useindex;
     297             :     /* Overall statistics about rel */
     298             :     BlockNumber old_rel_pages;  /* previous value of pg_class.relpages */
     299             :     BlockNumber rel_pages;      /* total number of pages */
     300             :     BlockNumber scanned_pages;  /* number of pages we examined */
     301             :     BlockNumber pinskipped_pages;   /* # of pages we skipped due to a pin */
     302             :     BlockNumber frozenskipped_pages;    /* # of frozen pages we skipped */
     303             :     BlockNumber tupcount_pages; /* pages whose tuples we counted */
     304             :     double      old_live_tuples;    /* previous value of pg_class.reltuples */
     305             :     double      new_rel_tuples; /* new estimated total # of tuples */
     306             :     double      new_live_tuples;    /* new estimated total # of live tuples */
     307             :     double      new_dead_tuples;    /* new estimated total # of dead tuples */
     308             :     BlockNumber pages_removed;
     309             :     double      tuples_deleted;
     310             :     BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
     311             :     LVDeadTuples *dead_tuples;
     312             :     int         num_index_scans;
     313             :     TransactionId latestRemovedXid;
     314             :     bool        lock_waiter_detected;
     315             : 
     316             :     /* Used for error callback */
     317             :     char       *indname;
     318             :     BlockNumber blkno;          /* used only for heap operations */
     319             :     VacErrPhase phase;
     320             : } LVRelStats;
     321             : 
     322             : /* A few variables that don't seem worth passing around as parameters */
     323             : static int  elevel = -1;
     324             : 
     325             : static TransactionId OldestXmin;
     326             : static TransactionId FreezeLimit;
     327             : static MultiXactId MultiXactCutoff;
     328             : 
     329             : static BufferAccessStrategy vac_strategy;
     330             : 
     331             : 
     332             : /* non-export function prototypes */
     333             : static void lazy_scan_heap(Relation onerel, VacuumParams *params,
     334             :                            LVRelStats *vacrelstats, Relation *Irel, int nindexes,
     335             :                            bool aggressive);
     336             : static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
     337             : static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
     338             : static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
     339             :                                     IndexBulkDeleteResult **stats,
     340             :                                     LVRelStats *vacrelstats, LVParallelState *lps,
     341             :                                     int nindexes);
     342             : static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
     343             :                               LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats);
     344             : static void lazy_cleanup_index(Relation indrel,
     345             :                                IndexBulkDeleteResult **stats,
     346             :                                double reltuples, bool estimated_count, LVRelStats *vacrelstats);
     347             : static int  lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
     348             :                              int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
     349             : static bool should_attempt_truncation(VacuumParams *params,
     350             :                                       LVRelStats *vacrelstats);
     351             : static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
     352             : static BlockNumber count_nondeletable_pages(Relation onerel,
     353             :                                             LVRelStats *vacrelstats);
     354             : static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
     355             : static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
     356             :                                    ItemPointer itemptr);
     357             : static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
     358             : static int  vac_cmp_itemptr(const void *left, const void *right);
     359             : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
     360             :                                      TransactionId *visibility_cutoff_xid, bool *all_frozen);
     361             : static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
     362             :                                          LVRelStats *vacrelstats, LVParallelState *lps,
     363             :                                          int nindexes);
     364             : static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
     365             :                                   LVShared *lvshared, LVDeadTuples *dead_tuples,
     366             :                                   int nindexes, LVRelStats *vacrelstats);
     367             : static void vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
     368             :                                   LVRelStats *vacrelstats, LVParallelState *lps,
     369             :                                   int nindexes);
     370             : static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
     371             :                              LVShared *lvshared, LVSharedIndStats *shared_indstats,
     372             :                              LVDeadTuples *dead_tuples, LVRelStats *vacrelstats);
     373             : static void lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
     374             :                                      LVRelStats *vacrelstats, LVParallelState *lps,
     375             :                                      int nindexes);
     376             : static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
     377             : static int  compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
     378             :                                             bool *can_parallel_vacuum);
     379             : static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
     380             :                                      int nindexes);
     381             : static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
     382             :                                     int nindexes);
     383             : static LVParallelState *begin_parallel_vacuum(Oid relid, Relation *Irel,
     384             :                                               LVRelStats *vacrelstats, BlockNumber nblocks,
     385             :                                               int nindexes, int nrequested);
     386             : static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
     387             :                                 LVParallelState *lps, int nindexes);
     388             : static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
     389             : static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
     390             : static void vacuum_error_callback(void *arg);
     391             : static void update_vacuum_error_info(LVRelStats *errinfo, int phase,
     392             :                                      BlockNumber blkno, char *indname);
     393             : 
     394             : 
     395             : /*
     396             :  *  heap_vacuum_rel() -- perform VACUUM for one heap relation
     397             :  *
     398             :  *      This routine vacuums a single heap, cleans out its indexes, and
     399             :  *      updates its relpages and reltuples statistics.
     400             :  *
     401             :  *      At entry, we have already established a transaction and opened
     402             :  *      and locked the relation.
     403             :  */
     404             : void
     405       43502 : heap_vacuum_rel(Relation onerel, VacuumParams *params,
     406             :                 BufferAccessStrategy bstrategy)
     407             : {
     408             :     LVRelStats *vacrelstats;
     409             :     Relation   *Irel;
     410             :     int         nindexes;
     411             :     PGRUsage    ru0;
     412       43502 :     TimestampTz starttime = 0;
     413       43502 :     WalUsage    walusage_start = pgWalUsage;
     414       43502 :     WalUsage    walusage = {0, 0, 0};
     415             :     long        secs;
     416             :     int         usecs;
     417             :     double      read_rate,
     418             :                 write_rate;
     419             :     bool        aggressive;     /* should we scan all unfrozen pages? */
     420             :     bool        scanned_all_unfrozen;   /* actually scanned all such pages? */
     421             :     TransactionId xidFullScanLimit;
     422             :     MultiXactId mxactFullScanLimit;
     423             :     BlockNumber new_rel_pages;
     424             :     BlockNumber new_rel_allvisible;
     425             :     double      new_live_tuples;
     426             :     TransactionId new_frozen_xid;
     427             :     MultiXactId new_min_multi;
     428             :     ErrorContextCallback errcallback;
     429             : 
     430             :     Assert(params != NULL);
     431             :     Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT);
     432             :     Assert(params->truncate != VACOPT_TERNARY_DEFAULT);
     433             : 
     434             :     /* not every AM requires these to be valid, but heap does */
     435             :     Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
     436             :     Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
     437             : 
     438             :     /* measure elapsed time iff autovacuum logging requires it */
     439       43502 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     440             :     {
     441         110 :         pg_rusage_init(&ru0);
     442         110 :         starttime = GetCurrentTimestamp();
     443             :     }
     444             : 
     445       43502 :     if (params->options & VACOPT_VERBOSE)
     446          10 :         elevel = INFO;
     447             :     else
     448       43492 :         elevel = DEBUG2;
     449             : 
     450       43502 :     pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
     451             :                                   RelationGetRelid(onerel));
     452             : 
     453       43502 :     vac_strategy = bstrategy;
     454             : 
     455       43502 :     vacuum_set_xid_limits(onerel,
     456             :                           params->freeze_min_age,
     457             :                           params->freeze_table_age,
     458             :                           params->multixact_freeze_min_age,
     459             :                           params->multixact_freeze_table_age,
     460             :                           &OldestXmin, &FreezeLimit, &xidFullScanLimit,
     461             :                           &MultiXactCutoff, &mxactFullScanLimit);
     462             : 
     463             :     /*
     464             :      * We request an aggressive scan if the table's frozen Xid is now older
     465             :      * than or equal to the requested Xid full-table scan limit; or if the
     466             :      * table's minimum MultiXactId is older than or equal to the requested
     467             :      * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
     468             :      */
     469       43502 :     aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
     470             :                                                xidFullScanLimit);
     471       43502 :     aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
     472             :                                               mxactFullScanLimit);
     473       43502 :     if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
     474         212 :         aggressive = true;
     475             : 
     476       43502 :     vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
     477             : 
     478       43502 :     vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel));
     479       43502 :     vacrelstats->relname = pstrdup(RelationGetRelationName(onerel));
     480       43502 :     vacrelstats->indname = NULL;
     481       43502 :     vacrelstats->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
     482       43502 :     vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
     483       43502 :     vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
     484       43502 :     vacrelstats->num_index_scans = 0;
     485       43502 :     vacrelstats->pages_removed = 0;
     486       43502 :     vacrelstats->lock_waiter_detected = false;
     487             : 
     488             :     /* Open all indexes of the relation */
     489       43502 :     vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
     490       84872 :     vacrelstats->useindex = (nindexes > 0 &&
     491       41370 :                              params->index_cleanup == VACOPT_TERNARY_ENABLED);
     492             : 
     493             :     /*
     494             :      * Setup error traceback support for ereport().  The idea is to set up an
     495             :      * error context callback to display additional information on any error
     496             :      * during a vacuum.  During different phases of vacuum (heap scan, heap
     497             :      * vacuum, index vacuum, index clean up, heap truncate), we update the
     498             :      * error context callback to display appropriate information.
     499             :      *
     500             :      * Note that the index vacuum and heap vacuum phases may be called
     501             :      * multiple times in the middle of the heap scan phase.  So the old phase
     502             :      * information is restored at the end of those phases.
     503             :      */
     504       43502 :     errcallback.callback = vacuum_error_callback;
     505       43502 :     errcallback.arg = vacrelstats;
     506       43502 :     errcallback.previous = error_context_stack;
     507       43502 :     error_context_stack = &errcallback;
     508             : 
     509             :     /* Do the vacuuming */
     510       43502 :     lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
     511             : 
     512             :     /* Done with indexes */
     513       43502 :     vac_close_indexes(nindexes, Irel, NoLock);
     514             : 
     515             :     /*
     516             :      * Compute whether we actually scanned the all unfrozen pages. If we did,
     517             :      * we can adjust relfrozenxid and relminmxid.
     518             :      *
     519             :      * NB: We need to check this before truncating the relation, because that
     520             :      * will change ->rel_pages.
     521             :      */
     522       87004 :     if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
     523       43502 :         < vacrelstats->rel_pages)
     524             :     {
     525             :         Assert(!aggressive);
     526          16 :         scanned_all_unfrozen = false;
     527             :     }
     528             :     else
     529       43486 :         scanned_all_unfrozen = true;
     530             : 
     531             :     /*
     532             :      * Optionally truncate the relation.
     533             :      */
     534       43502 :     if (should_attempt_truncation(params, vacrelstats))
     535             :     {
     536             :         /*
     537             :          * Update error traceback information.  This is the last phase during
     538             :          * which we add context information to errors, so we don't need to
     539             :          * revert to the previous phase.
     540             :          */
     541         118 :         update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_TRUNCATE,
     542             :                                  vacrelstats->nonempty_pages, NULL);
     543         118 :         lazy_truncate_heap(onerel, vacrelstats);
     544             :     }
     545             : 
     546             :     /* Pop the error context stack */
     547       43502 :     error_context_stack = errcallback.previous;
     548             : 
     549             :     /* Report that we are now doing final cleanup */
     550       43502 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     551             :                                  PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
     552             : 
     553             :     /*
     554             :      * Update statistics in pg_class.
     555             :      *
     556             :      * A corner case here is that if we scanned no pages at all because every
     557             :      * page is all-visible, we should not update relpages/reltuples, because
     558             :      * we have no new information to contribute.  In particular this keeps us
     559             :      * from replacing relpages=reltuples=0 (which means "unknown tuple
     560             :      * density") with nonzero relpages and reltuples=0 (which means "zero
     561             :      * tuple density") unless there's some actual evidence for the latter.
     562             :      *
     563             :      * It's important that we use tupcount_pages and not scanned_pages for the
     564             :      * check described above; scanned_pages counts pages where we could not
     565             :      * get cleanup lock, and which were processed only for frozenxid purposes.
     566             :      *
     567             :      * We do update relallvisible even in the corner case, since if the table
     568             :      * is all-visible we'd definitely like to know that.  But clamp the value
     569             :      * to be not more than what we're setting relpages to.
     570             :      *
     571             :      * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
     572             :      * since then we don't know for certain that all tuples have a newer xmin.
     573             :      */
     574       43502 :     new_rel_pages = vacrelstats->rel_pages;
     575       43502 :     new_live_tuples = vacrelstats->new_live_tuples;
     576       43502 :     if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
     577             :     {
     578           2 :         new_rel_pages = vacrelstats->old_rel_pages;
     579           2 :         new_live_tuples = vacrelstats->old_live_tuples;
     580             :     }
     581             : 
     582       43502 :     visibilitymap_count(onerel, &new_rel_allvisible, NULL);
     583       43502 :     if (new_rel_allvisible > new_rel_pages)
     584           0 :         new_rel_allvisible = new_rel_pages;
     585             : 
     586       43502 :     new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
     587       43502 :     new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
     588             : 
     589       43502 :     vac_update_relstats(onerel,
     590             :                         new_rel_pages,
     591             :                         new_live_tuples,
     592             :                         new_rel_allvisible,
     593             :                         nindexes > 0,
     594             :                         new_frozen_xid,
     595             :                         new_min_multi,
     596             :                         false);
     597             : 
     598             :     /* report results to the stats collector, too */
     599       87004 :     pgstat_report_vacuum(RelationGetRelid(onerel),
     600       43502 :                          onerel->rd_rel->relisshared,
     601             :                          new_live_tuples,
     602       43502 :                          vacrelstats->new_dead_tuples);
     603       43502 :     pgstat_progress_end_command();
     604             : 
     605             :     /* and log the action if appropriate */
     606       43502 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     607             :     {
     608         110 :         TimestampTz endtime = GetCurrentTimestamp();
     609             : 
     610         110 :         if (params->log_min_duration == 0 ||
     611           0 :             TimestampDifferenceExceeds(starttime, endtime,
     612             :                                        params->log_min_duration))
     613             :         {
     614             :             StringInfoData buf;
     615             :             char       *msgfmt;
     616             : 
     617         110 :             TimestampDifference(starttime, endtime, &secs, &usecs);
     618             : 
     619         110 :             memset(&walusage, 0, sizeof(WalUsage));
     620         110 :             WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
     621             : 
     622         110 :             read_rate = 0;
     623         110 :             write_rate = 0;
     624         110 :             if ((secs > 0) || (usecs > 0))
     625             :             {
     626         220 :                 read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
     627         110 :                     (secs + usecs / 1000000.0);
     628         220 :                 write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
     629         110 :                     (secs + usecs / 1000000.0);
     630             :             }
     631             : 
     632             :             /*
     633             :              * This is pretty messy, but we split it up so that we can skip
     634             :              * emitting individual parts of the message when not applicable.
     635             :              */
     636         110 :             initStringInfo(&buf);
     637         110 :             if (params->is_wraparound)
     638             :             {
     639           0 :                 if (aggressive)
     640           0 :                     msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     641             :                 else
     642           0 :                     msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     643             :             }
     644             :             else
     645             :             {
     646         110 :                 if (aggressive)
     647           0 :                     msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
     648             :                 else
     649         110 :                     msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
     650             :             }
     651         110 :             appendStringInfo(&buf, msgfmt,
     652             :                              get_database_name(MyDatabaseId),
     653             :                              vacrelstats->relnamespace,
     654             :                              vacrelstats->relname,
     655             :                              vacrelstats->num_index_scans);
     656         110 :             appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
     657             :                              vacrelstats->pages_removed,
     658             :                              vacrelstats->rel_pages,
     659             :                              vacrelstats->pinskipped_pages,
     660             :                              vacrelstats->frozenskipped_pages);
     661         220 :             appendStringInfo(&buf,
     662         110 :                              _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
     663             :                              vacrelstats->tuples_deleted,
     664             :                              vacrelstats->new_rel_tuples,
     665             :                              vacrelstats->new_dead_tuples,
     666             :                              OldestXmin);
     667         220 :             appendStringInfo(&buf,
     668         110 :                              _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
     669             :                              (long long) VacuumPageHit,
     670             :                              (long long) VacuumPageMiss,
     671             :                              (long long) VacuumPageDirty);
     672         110 :             appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     673             :                              read_rate, write_rate);
     674         110 :             appendStringInfo(&buf, _("system usage: %s\n"), pg_rusage_show(&ru0));
     675         220 :             appendStringInfo(&buf,
     676         110 :                              _("WAL usage: %ld records, %ld full page images, "
     677             :                                UINT64_FORMAT " bytes"),
     678             :                              walusage.wal_records,
     679             :                              walusage.wal_fpi,
     680             :                              walusage.wal_bytes);
     681             : 
     682         110 :             ereport(LOG,
     683             :                     (errmsg_internal("%s", buf.data)));
     684         110 :             pfree(buf.data);
     685             :         }
     686             :     }
     687       43502 : }
     688             : 
     689             : /*
     690             :  * For Hot Standby we need to know the highest transaction id that will
     691             :  * be removed by any change. VACUUM proceeds in a number of passes so
     692             :  * we need to consider how each pass operates. The first phase runs
     693             :  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
     694             :  * progresses - these will have a latestRemovedXid on each record.
     695             :  * In some cases this removes all of the tuples to be removed, though
     696             :  * often we have dead tuples with index pointers so we must remember them
     697             :  * for removal in phase 3. Index records for those rows are removed
     698             :  * in phase 2 and index blocks do not have MVCC information attached.
     699             :  * So before we can allow removal of any index tuples we need to issue
     700             :  * a WAL record containing the latestRemovedXid of rows that will be
     701             :  * removed in phase three. This allows recovery queries to block at the
     702             :  * correct place, i.e. before phase two, rather than during phase three
     703             :  * which would be after the rows have become inaccessible.
     704             :  */
     705             : static void
     706        1844 : vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
     707             : {
     708             :     /*
     709             :      * Skip this for relations for which no WAL is to be written, or if we're
     710             :      * not trying to support archive recovery.
     711             :      */
     712        1844 :     if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
     713          24 :         return;
     714             : 
     715             :     /*
     716             :      * No need to write the record at all unless it contains a valid value
     717             :      */
     718        1820 :     if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
     719        1058 :         (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
     720             : }
     721             : 
     722             : /*
     723             :  *  lazy_scan_heap() -- scan an open heap relation
     724             :  *
     725             :  *      This routine prunes each page in the heap, which will among other
     726             :  *      things truncate dead tuples to dead line pointers, defragment the
     727             :  *      page, and set commit status bits (see heap_page_prune).  It also builds
     728             :  *      lists of dead tuples and pages with free space, calculates statistics
     729             :  *      on the number of live tuples in the heap, and marks pages as
     730             :  *      all-visible if appropriate.  When done, or when we run low on space for
     731             :  *      dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
     732             :  *      to reclaim dead line pointers.
     733             :  *
     734             :  *      If the table has at least two indexes, we execute both index vacuum
     735             :  *      and index cleanup with parallel workers unless parallel vacuum is
     736             :  *      disabled.  In a parallel vacuum, we enter parallel mode and then
     737             :  *      create both the parallel context and the DSM segment before starting
     738             :  *      heap scan so that we can record dead tuples to the DSM segment.  All
     739             :  *      parallel workers are launched at beginning of index vacuuming and
     740             :  *      index cleanup and they exit once done with all indexes.  At the end of
     741             :  *      this function we exit from parallel mode.  Index bulk-deletion results
     742             :  *      are stored in the DSM segment and we update index statistics for all
     743             :  *      the indexes after exiting from parallel mode since writes are not
     744             :  *      allowed during parallel mode.
     745             :  *
     746             :  *      If there are no indexes then we can reclaim line pointers on the fly;
     747             :  *      dead line pointers need only be retained until all index pointers that
     748             :  *      reference them have been killed.
     749             :  */
     750             : static void
     751       43502 : lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
     752             :                Relation *Irel, int nindexes, bool aggressive)
     753             : {
     754       43502 :     LVParallelState *lps = NULL;
     755             :     LVDeadTuples *dead_tuples;
     756             :     BlockNumber nblocks,
     757             :                 blkno;
     758             :     HeapTupleData tuple;
     759       43502 :     TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
     760       43502 :     TransactionId relminmxid = onerel->rd_rel->relminmxid;
     761             :     BlockNumber empty_pages,
     762             :                 vacuumed_pages,
     763             :                 next_fsm_block_to_vacuum;
     764             :     double      num_tuples,     /* total number of nonremovable tuples */
     765             :                 live_tuples,    /* live tuples (reltuples estimate) */
     766             :                 tups_vacuumed,  /* tuples cleaned up by vacuum */
     767             :                 nkeep,          /* dead-but-not-removable tuples */
     768             :                 nunused;        /* unused line pointers */
     769             :     IndexBulkDeleteResult **indstats;
     770             :     int         i;
     771             :     PGRUsage    ru0;
     772       43502 :     Buffer      vmbuffer = InvalidBuffer;
     773             :     BlockNumber next_unskippable_block;
     774             :     bool        skipping_blocks;
     775             :     xl_heap_freeze_tuple *frozen;
     776             :     StringInfoData buf;
     777       43502 :     const int   initprog_index[] = {
     778             :         PROGRESS_VACUUM_PHASE,
     779             :         PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
     780             :         PROGRESS_VACUUM_MAX_DEAD_TUPLES
     781             :     };
     782             :     int64       initprog_val[3];
     783             : 
     784       43502 :     pg_rusage_init(&ru0);
     785             : 
     786       43502 :     if (aggressive)
     787       38134 :         ereport(elevel,
     788             :                 (errmsg("aggressively vacuuming \"%s.%s\"",
     789             :                         vacrelstats->relnamespace,
     790             :                         vacrelstats->relname)));
     791             :     else
     792        5368 :         ereport(elevel,
     793             :                 (errmsg("vacuuming \"%s.%s\"",
     794             :                         vacrelstats->relnamespace,
     795             :                         vacrelstats->relname)));
     796             : 
     797       43502 :     empty_pages = vacuumed_pages = 0;
     798       43502 :     next_fsm_block_to_vacuum = (BlockNumber) 0;
     799       43502 :     num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
     800             : 
     801             :     indstats = (IndexBulkDeleteResult **)
     802       43502 :         palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
     803             : 
     804       43502 :     nblocks = RelationGetNumberOfBlocks(onerel);
     805       43502 :     vacrelstats->rel_pages = nblocks;
     806       43502 :     vacrelstats->scanned_pages = 0;
     807       43502 :     vacrelstats->tupcount_pages = 0;
     808       43502 :     vacrelstats->nonempty_pages = 0;
     809       43502 :     vacrelstats->latestRemovedXid = InvalidTransactionId;
     810             : 
     811             :     /*
     812             :      * Initialize state for a parallel vacuum.  As of now, only one worker can
     813             :      * be used for an index, so we invoke parallelism only if there are at
     814             :      * least two indexes on a table.
     815             :      */
     816       43502 :     if (params->nworkers >= 0 && vacrelstats->useindex && nindexes > 1)
     817             :     {
     818             :         /*
     819             :          * Since parallel workers cannot access data in temporary tables, we
     820             :          * can't perform parallel vacuum on them.
     821             :          */
     822       18620 :         if (RelationUsesLocalBuffers(onerel))
     823             :         {
     824             :             /*
     825             :              * Give warning only if the user explicitly tries to perform a
     826             :              * parallel vacuum on the temporary table.
     827             :              */
     828           4 :             if (params->nworkers > 0)
     829           4 :                 ereport(WARNING,
     830             :                         (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
     831             :                                 vacrelstats->relname)));
     832             :         }
     833             :         else
     834       18616 :             lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
     835             :                                         vacrelstats, nblocks, nindexes,
     836             :                                         params->nworkers);
     837             :     }
     838             : 
     839             :     /*
     840             :      * Allocate the space for dead tuples in case parallel vacuum is not
     841             :      * initialized.
     842             :      */
     843       43502 :     if (!ParallelVacuumIsActive(lps))
     844       43490 :         lazy_space_alloc(vacrelstats, nblocks);
     845             : 
     846       43502 :     dead_tuples = vacrelstats->dead_tuples;
     847       43502 :     frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
     848             : 
     849             :     /* Report that we're scanning the heap, advertising total # of blocks */
     850       43502 :     initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
     851       43502 :     initprog_val[1] = nblocks;
     852       43502 :     initprog_val[2] = dead_tuples->max_tuples;
     853       43502 :     pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
     854             : 
     855             :     /*
     856             :      * Except when aggressive is set, we want to skip pages that are
     857             :      * all-visible according to the visibility map, but only when we can skip
     858             :      * at least SKIP_PAGES_THRESHOLD consecutive pages.  Since we're reading
     859             :      * sequentially, the OS should be doing readahead for us, so there's no
     860             :      * gain in skipping a page now and then; that's likely to disable
     861             :      * readahead and so be counterproductive. Also, skipping even a single
     862             :      * page means that we can't update relfrozenxid, so we only want to do it
     863             :      * if we can skip a goodly number of pages.
     864             :      *
     865             :      * When aggressive is set, we can't skip pages just because they are
     866             :      * all-visible, but we can still skip pages that are all-frozen, since
     867             :      * such pages do not need freezing and do not affect the value that we can
     868             :      * safely set for relfrozenxid or relminmxid.
     869             :      *
     870             :      * Before entering the main loop, establish the invariant that
     871             :      * next_unskippable_block is the next block number >= blkno that we can't
     872             :      * skip based on the visibility map, either all-visible for a regular scan
     873             :      * or all-frozen for an aggressive scan.  We set it to nblocks if there's
     874             :      * no such block.  We also set up the skipping_blocks flag correctly at
     875             :      * this stage.
     876             :      *
     877             :      * Note: The value returned by visibilitymap_get_status could be slightly
     878             :      * out-of-date, since we make this test before reading the corresponding
     879             :      * heap page or locking the buffer.  This is OK.  If we mistakenly think
     880             :      * that the page is all-visible or all-frozen when in fact the flag's just
     881             :      * been cleared, we might fail to vacuum the page.  It's easy to see that
     882             :      * skipping a page when aggressive is not set is not a very big deal; we
     883             :      * might leave some dead tuples lying around, but the next vacuum will
     884             :      * find them.  But even when aggressive *is* set, it's still OK if we miss
     885             :      * a page whose all-frozen marking has just been cleared.  Any new XIDs
     886             :      * just added to that page are necessarily newer than the GlobalXmin we
     887             :      * computed, so they'll have no effect on the value to which we can safely
     888             :      * set relfrozenxid.  A similar argument applies for MXIDs and relminmxid.
     889             :      *
     890             :      * We will scan the table's last page, at least to the extent of
     891             :      * determining whether it has tuples or not, even if it should be skipped
     892             :      * according to the above rules; except when we've already determined that
     893             :      * it's not worth trying to truncate the table.  This avoids having
     894             :      * lazy_truncate_heap() take access-exclusive lock on the table to attempt
     895             :      * a truncation that just fails immediately because there are tuples in
     896             :      * the last page.  This is worth avoiding mainly because such a lock must
     897             :      * be replayed on any hot standby, where it can be disruptive.
     898             :      */
     899       43502 :     next_unskippable_block = 0;
     900       43502 :     if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     901             :     {
     902       56186 :         while (next_unskippable_block < nblocks)
     903             :         {
     904             :             uint8       vmstatus;
     905             : 
     906       29538 :             vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
     907             :                                                 &vmbuffer);
     908       29538 :             if (aggressive)
     909             :             {
     910       16784 :                 if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
     911       15190 :                     break;
     912             :             }
     913             :             else
     914             :             {
     915       12754 :                 if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
     916        1452 :                     break;
     917             :             }
     918       12896 :             vacuum_delay_point();
     919       12896 :             next_unskippable_block++;
     920             :         }
     921             :     }
     922             : 
     923       43502 :     if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
     924         140 :         skipping_blocks = true;
     925             :     else
     926       43362 :         skipping_blocks = false;
     927             : 
     928      247374 :     for (blkno = 0; blkno < nblocks; blkno++)
     929             :     {
     930             :         Buffer      buf;
     931             :         Page        page;
     932             :         OffsetNumber offnum,
     933             :                     maxoff;
     934             :         bool        tupgone,
     935             :                     hastup;
     936             :         int         prev_dead_count;
     937             :         int         nfrozen;
     938             :         Size        freespace;
     939      203872 :         bool        all_visible_according_to_vm = false;
     940             :         bool        all_visible;
     941      203872 :         bool        all_frozen = true;  /* provided all_visible is also true */
     942             :         bool        has_dead_tuples;
     943      203872 :         TransactionId visibility_cutoff_xid = InvalidTransactionId;
     944             : 
     945             :         /* see note above about forcing scanning of last page */
     946             : #define FORCE_CHECK_PAGE() \
     947             :         (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
     948             : 
     949      203872 :         pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
     950             : 
     951      203872 :         update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_SCAN_HEAP,
     952             :                                  blkno, NULL);
     953             : 
     954      203872 :         if (blkno == next_unskippable_block)
     955             :         {
     956             :             /* Time to advance next_unskippable_block */
     957      187666 :             next_unskippable_block++;
     958      187666 :             if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     959             :             {
     960      190106 :                 while (next_unskippable_block < nblocks)
     961             :                 {
     962             :                     uint8       vmskipflags;
     963             : 
     964      173464 :                     vmskipflags = visibilitymap_get_status(onerel,
     965             :                                                            next_unskippable_block,
     966             :                                                            &vmbuffer);
     967      173464 :                     if (aggressive)
     968             :                     {
     969      135166 :                         if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
     970      135166 :                             break;
     971             :                     }
     972             :                     else
     973             :                     {
     974       38298 :                         if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
     975       34988 :                             break;
     976             :                     }
     977        3310 :                     vacuum_delay_point();
     978        3310 :                     next_unskippable_block++;
     979             :                 }
     980             :             }
     981             : 
     982             :             /*
     983             :              * We know we can't skip the current block.  But set up
     984             :              * skipping_blocks to do the right thing at the following blocks.
     985             :              */
     986      187666 :             if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
     987          28 :                 skipping_blocks = true;
     988             :             else
     989      187638 :                 skipping_blocks = false;
     990             : 
     991             :             /*
     992             :              * Normally, the fact that we can't skip this block must mean that
     993             :              * it's not all-visible.  But in an aggressive vacuum we know only
     994             :              * that it's not all-frozen, so it might still be all-visible.
     995             :              */
     996      187666 :             if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
     997         800 :                 all_visible_according_to_vm = true;
     998             :         }
     999             :         else
    1000             :         {
    1001             :             /*
    1002             :              * The current block is potentially skippable; if we've seen a
    1003             :              * long enough run of skippable blocks to justify skipping it, and
    1004             :              * we're not forced to check it, then go ahead and skip.
    1005             :              * Otherwise, the page must be at least all-visible if not
    1006             :              * all-frozen, so we can set all_visible_according_to_vm = true.
    1007             :              */
    1008       16206 :             if (skipping_blocks && !FORCE_CHECK_PAGE())
    1009             :             {
    1010             :                 /*
    1011             :                  * Tricky, tricky.  If this is in aggressive vacuum, the page
    1012             :                  * must have been all-frozen at the time we checked whether it
    1013             :                  * was skippable, but it might not be any more.  We must be
    1014             :                  * careful to count it as a skipped all-frozen page in that
    1015             :                  * case, or else we'll think we can't update relfrozenxid and
    1016             :                  * relminmxid.  If it's not an aggressive vacuum, we don't
    1017             :                  * know whether it was all-frozen, so we have to recheck; but
    1018             :                  * in this case an approximate answer is OK.
    1019             :                  */
    1020       11264 :                 if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
    1021        9704 :                     vacrelstats->frozenskipped_pages++;
    1022       11272 :                 continue;
    1023             :             }
    1024        4942 :             all_visible_according_to_vm = true;
    1025             :         }
    1026             : 
    1027      192608 :         vacuum_delay_point();
    1028             : 
    1029             :         /*
    1030             :          * If we are close to overrunning the available space for dead-tuple
    1031             :          * TIDs, pause and do a cycle of vacuuming before we tackle this page.
    1032             :          */
    1033      192608 :         if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
    1034           0 :             dead_tuples->num_tuples > 0)
    1035             :         {
    1036             :             /*
    1037             :              * Before beginning index vacuuming, we release any pin we may
    1038             :              * hold on the visibility map page.  This isn't necessary for
    1039             :              * correctness, but we do it anyway to avoid holding the pin
    1040             :              * across a lengthy, unrelated operation.
    1041             :              */
    1042           0 :             if (BufferIsValid(vmbuffer))
    1043             :             {
    1044           0 :                 ReleaseBuffer(vmbuffer);
    1045           0 :                 vmbuffer = InvalidBuffer;
    1046             :             }
    1047             : 
    1048             :             /* Work on all the indexes, then the heap */
    1049           0 :             lazy_vacuum_all_indexes(onerel, Irel, indstats,
    1050             :                                     vacrelstats, lps, nindexes);
    1051             : 
    1052             :             /* Remove tuples from heap */
    1053           0 :             lazy_vacuum_heap(onerel, vacrelstats);
    1054             : 
    1055             :             /*
    1056             :              * Forget the now-vacuumed tuples, and press on, but be careful
    1057             :              * not to reset latestRemovedXid since we want that value to be
    1058             :              * valid.
    1059             :              */
    1060           0 :             dead_tuples->num_tuples = 0;
    1061             : 
    1062             :             /*
    1063             :              * Vacuum the Free Space Map to make newly-freed space visible on
    1064             :              * upper-level FSM pages.  Note we have not yet processed blkno.
    1065             :              */
    1066           0 :             FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
    1067           0 :             next_fsm_block_to_vacuum = blkno;
    1068             : 
    1069             :             /* Report that we are once again scanning the heap */
    1070           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1071             :                                          PROGRESS_VACUUM_PHASE_SCAN_HEAP);
    1072             :         }
    1073             : 
    1074             :         /*
    1075             :          * Pin the visibility map page in case we need to mark the page
    1076             :          * all-visible.  In most cases this will be very cheap, because we'll
    1077             :          * already have the correct page pinned anyway.  However, it's
    1078             :          * possible that (a) next_unskippable_block is covered by a different
    1079             :          * VM page than the current block or (b) we released our pin and did a
    1080             :          * cycle of index vacuuming.
    1081             :          *
    1082             :          */
    1083      192608 :         visibilitymap_pin(onerel, blkno, &vmbuffer);
    1084             : 
    1085      192608 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
    1086             :                                  RBM_NORMAL, vac_strategy);
    1087             : 
    1088             :         /* We need buffer cleanup lock so that we can prune HOT chains. */
    1089      192608 :         if (!ConditionalLockBufferForCleanup(buf))
    1090             :         {
    1091             :             /*
    1092             :              * If we're not performing an aggressive scan to guard against XID
    1093             :              * wraparound, and we don't want to forcibly check the page, then
    1094             :              * it's OK to skip vacuuming pages we get a lock conflict on. They
    1095             :              * will be dealt with in some future vacuum.
    1096             :              */
    1097           8 :             if (!aggressive && !FORCE_CHECK_PAGE())
    1098             :             {
    1099           6 :                 ReleaseBuffer(buf);
    1100           6 :                 vacrelstats->pinskipped_pages++;
    1101           6 :                 continue;
    1102             :             }
    1103             : 
    1104             :             /*
    1105             :              * Read the page with share lock to see if any xids on it need to
    1106             :              * be frozen.  If not we just skip the page, after updating our
    1107             :              * scan statistics.  If there are some, we wait for cleanup lock.
    1108             :              *
    1109             :              * We could defer the lock request further by remembering the page
    1110             :              * and coming back to it later, or we could even register
    1111             :              * ourselves for multiple buffers and then service whichever one
    1112             :              * is received first.  For now, this seems good enough.
    1113             :              *
    1114             :              * If we get here with aggressive false, then we're just forcibly
    1115             :              * checking the page, and so we don't want to insist on getting
    1116             :              * the lock; we only need to know if the page contains tuples, so
    1117             :              * that we can update nonempty_pages correctly.  It's convenient
    1118             :              * to use lazy_check_needs_freeze() for both situations, though.
    1119             :              */
    1120           2 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
    1121           2 :             if (!lazy_check_needs_freeze(buf, &hastup))
    1122             :             {
    1123           2 :                 UnlockReleaseBuffer(buf);
    1124           2 :                 vacrelstats->scanned_pages++;
    1125           2 :                 vacrelstats->pinskipped_pages++;
    1126           2 :                 if (hastup)
    1127           2 :                     vacrelstats->nonempty_pages = blkno + 1;
    1128           2 :                 continue;
    1129             :             }
    1130           0 :             if (!aggressive)
    1131             :             {
    1132             :                 /*
    1133             :                  * Here, we must not advance scanned_pages; that would amount
    1134             :                  * to claiming that the page contains no freezable tuples.
    1135             :                  */
    1136           0 :                 UnlockReleaseBuffer(buf);
    1137           0 :                 vacrelstats->pinskipped_pages++;
    1138           0 :                 if (hastup)
    1139           0 :                     vacrelstats->nonempty_pages = blkno + 1;
    1140           0 :                 continue;
    1141             :             }
    1142           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    1143           0 :             LockBufferForCleanup(buf);
    1144             :             /* drop through to normal processing */
    1145             :         }
    1146             : 
    1147      192600 :         vacrelstats->scanned_pages++;
    1148      192600 :         vacrelstats->tupcount_pages++;
    1149             : 
    1150      192600 :         page = BufferGetPage(buf);
    1151             : 
    1152      192600 :         if (PageIsNew(page))
    1153             :         {
    1154             :             /*
    1155             :              * All-zeroes pages can be left over if either a backend extends
    1156             :              * the relation by a single page, but crashes before the newly
    1157             :              * initialized page has been written out, or when bulk-extending
    1158             :              * the relation (which creates a number of empty pages at the tail
    1159             :              * end of the relation, but enters them into the FSM).
    1160             :              *
    1161             :              * Note we do not enter the page into the visibilitymap. That has
    1162             :              * the downside that we repeatedly visit this page in subsequent
    1163             :              * vacuums, but otherwise we'll never not discover the space on a
    1164             :              * promoted standby. The harm of repeated checking ought to
    1165             :              * normally not be too bad - the space usually should be used at
    1166             :              * some point, otherwise there wouldn't be any regular vacuums.
    1167             :              *
    1168             :              * Make sure these pages are in the FSM, to ensure they can be
    1169             :              * reused. Do that by testing if there's any space recorded for
    1170             :              * the page. If not, enter it. We do so after releasing the lock
    1171             :              * on the heap page, the FSM is approximate, after all.
    1172             :              */
    1173           0 :             UnlockReleaseBuffer(buf);
    1174             : 
    1175           0 :             empty_pages++;
    1176             : 
    1177           0 :             if (GetRecordedFreeSpace(onerel, blkno) == 0)
    1178             :             {
    1179             :                 Size        freespace;
    1180             : 
    1181           0 :                 freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
    1182           0 :                 RecordPageWithFreeSpace(onerel, blkno, freespace);
    1183             :             }
    1184           0 :             continue;
    1185             :         }
    1186             : 
    1187      192600 :         if (PageIsEmpty(page))
    1188             :         {
    1189           0 :             empty_pages++;
    1190           0 :             freespace = PageGetHeapFreeSpace(page);
    1191             : 
    1192             :             /*
    1193             :              * Empty pages are always all-visible and all-frozen (note that
    1194             :              * the same is currently not true for new pages, see above).
    1195             :              */
    1196           0 :             if (!PageIsAllVisible(page))
    1197             :             {
    1198           0 :                 START_CRIT_SECTION();
    1199             : 
    1200             :                 /* mark buffer dirty before writing a WAL record */
    1201           0 :                 MarkBufferDirty(buf);
    1202             : 
    1203             :                 /*
    1204             :                  * It's possible that another backend has extended the heap,
    1205             :                  * initialized the page, and then failed to WAL-log the page
    1206             :                  * due to an ERROR.  Since heap extension is not WAL-logged,
    1207             :                  * recovery might try to replay our record setting the page
    1208             :                  * all-visible and find that the page isn't initialized, which
    1209             :                  * will cause a PANIC.  To prevent that, check whether the
    1210             :                  * page has been previously WAL-logged, and if not, do that
    1211             :                  * now.
    1212             :                  */
    1213           0 :                 if (RelationNeedsWAL(onerel) &&
    1214           0 :                     PageGetLSN(page) == InvalidXLogRecPtr)
    1215           0 :                     log_newpage_buffer(buf, true);
    1216             : 
    1217           0 :                 PageSetAllVisible(page);
    1218           0 :                 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1219             :                                   vmbuffer, InvalidTransactionId,
    1220             :                                   VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
    1221           0 :                 END_CRIT_SECTION();
    1222             :             }
    1223             : 
    1224           0 :             UnlockReleaseBuffer(buf);
    1225           0 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
    1226           0 :             continue;
    1227             :         }
    1228             : 
    1229             :         /*
    1230             :          * Prune all HOT-update chains in this page.
    1231             :          *
    1232             :          * We count tuples removed by the pruning step as removed by VACUUM.
    1233             :          */
    1234      192600 :         tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
    1235             :                                          &vacrelstats->latestRemovedXid);
    1236             : 
    1237             :         /*
    1238             :          * Now scan the page to collect vacuumable items and check for tuples
    1239             :          * requiring freezing.
    1240             :          */
    1241      192600 :         all_visible = true;
    1242      192600 :         has_dead_tuples = false;
    1243      192600 :         nfrozen = 0;
    1244      192600 :         hastup = false;
    1245      192600 :         prev_dead_count = dead_tuples->num_tuples;
    1246      192600 :         maxoff = PageGetMaxOffsetNumber(page);
    1247             : 
    1248             :         /*
    1249             :          * Note: If you change anything in the loop below, also look at
    1250             :          * heap_page_is_all_visible to see if that needs to be changed.
    1251             :          */
    1252    12824150 :         for (offnum = FirstOffsetNumber;
    1253             :              offnum <= maxoff;
    1254    12631550 :              offnum = OffsetNumberNext(offnum))
    1255             :         {
    1256             :             ItemId      itemid;
    1257             : 
    1258    12631550 :             itemid = PageGetItemId(page, offnum);
    1259             : 
    1260             :             /* Unused items require no processing, but we count 'em */
    1261    12631550 :             if (!ItemIdIsUsed(itemid))
    1262             :             {
    1263      155330 :                 nunused += 1;
    1264      155330 :                 continue;
    1265             :             }
    1266             : 
    1267             :             /* Redirect items mustn't be touched */
    1268    12476220 :             if (ItemIdIsRedirected(itemid))
    1269             :             {
    1270       53360 :                 hastup = true;  /* this page won't be truncatable */
    1271       53360 :                 continue;
    1272             :             }
    1273             : 
    1274    12422860 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
    1275             : 
    1276             :             /*
    1277             :              * DEAD line pointers are to be vacuumed normally; but we don't
    1278             :              * count them in tups_vacuumed, else we'd be double-counting (at
    1279             :              * least in the common case where heap_page_prune() just freed up
    1280             :              * a non-HOT tuple).
    1281             :              */
    1282    12422860 :             if (ItemIdIsDead(itemid))
    1283             :             {
    1284     1157996 :                 lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
    1285     1157996 :                 all_visible = false;
    1286     1157996 :                 continue;
    1287             :             }
    1288             : 
    1289             :             Assert(ItemIdIsNormal(itemid));
    1290             : 
    1291    11264864 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    1292    11264864 :             tuple.t_len = ItemIdGetLength(itemid);
    1293    11264864 :             tuple.t_tableOid = RelationGetRelid(onerel);
    1294             : 
    1295    11264864 :             tupgone = false;
    1296             : 
    1297             :             /*
    1298             :              * The criteria for counting a tuple as live in this block need to
    1299             :              * match what analyze.c's acquire_sample_rows() does, otherwise
    1300             :              * VACUUM and ANALYZE may produce wildly different reltuples
    1301             :              * values, e.g. when there are many recently-dead tuples.
    1302             :              *
    1303             :              * The logic here is a bit simpler than acquire_sample_rows(), as
    1304             :              * VACUUM can't run inside a transaction block, which makes some
    1305             :              * cases impossible (e.g. in-progress insert from the same
    1306             :              * transaction).
    1307             :              */
    1308    11264864 :             switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
    1309             :             {
    1310           0 :                 case HEAPTUPLE_DEAD:
    1311             : 
    1312             :                     /*
    1313             :                      * Ordinarily, DEAD tuples would have been removed by
    1314             :                      * heap_page_prune(), but it's possible that the tuple
    1315             :                      * state changed since heap_page_prune() looked.  In
    1316             :                      * particular an INSERT_IN_PROGRESS tuple could have
    1317             :                      * changed to DEAD if the inserter aborted.  So this
    1318             :                      * cannot be considered an error condition.
    1319             :                      *
    1320             :                      * If the tuple is HOT-updated then it must only be
    1321             :                      * removed by a prune operation; so we keep it just as if
    1322             :                      * it were RECENTLY_DEAD.  Also, if it's a heap-only
    1323             :                      * tuple, we choose to keep it, because it'll be a lot
    1324             :                      * cheaper to get rid of it in the next pruning pass than
    1325             :                      * to treat it like an indexed tuple. Finally, if index
    1326             :                      * cleanup is disabled, the second heap pass will not
    1327             :                      * execute, and the tuple will not get removed, so we must
    1328             :                      * treat it like any other dead tuple that we choose to
    1329             :                      * keep.
    1330             :                      *
    1331             :                      * If this were to happen for a tuple that actually needed
    1332             :                      * to be deleted, we'd be in trouble, because it'd
    1333             :                      * possibly leave a tuple below the relation's xmin
    1334             :                      * horizon alive.  heap_prepare_freeze_tuple() is prepared
    1335             :                      * to detect that case and abort the transaction,
    1336             :                      * preventing corruption.
    1337             :                      */
    1338           0 :                     if (HeapTupleIsHotUpdated(&tuple) ||
    1339           0 :                         HeapTupleIsHeapOnly(&tuple) ||
    1340           0 :                         params->index_cleanup == VACOPT_TERNARY_DISABLED)
    1341           0 :                         nkeep += 1;
    1342             :                     else
    1343           0 :                         tupgone = true; /* we can delete the tuple */
    1344           0 :                     all_visible = false;
    1345           0 :                     break;
    1346    10848866 :                 case HEAPTUPLE_LIVE:
    1347             : 
    1348             :                     /*
    1349             :                      * Count it as live.  Not only is this natural, but it's
    1350             :                      * also what acquire_sample_rows() does.
    1351             :                      */
    1352    10848866 :                     live_tuples += 1;
    1353             : 
    1354             :                     /*
    1355             :                      * Is the tuple definitely visible to all transactions?
    1356             :                      *
    1357             :                      * NB: Like with per-tuple hint bits, we can't set the
    1358             :                      * PD_ALL_VISIBLE flag if the inserter committed
    1359             :                      * asynchronously. See SetHintBits for more info. Check
    1360             :                      * that the tuple is hinted xmin-committed because of
    1361             :                      * that.
    1362             :                      */
    1363    10848866 :                     if (all_visible)
    1364             :                     {
    1365             :                         TransactionId xmin;
    1366             : 
    1367    10205120 :                         if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    1368             :                         {
    1369          42 :                             all_visible = false;
    1370          42 :                             break;
    1371             :                         }
    1372             : 
    1373             :                         /*
    1374             :                          * The inserter definitely committed. But is it old
    1375             :                          * enough that everyone sees it as committed?
    1376             :                          */
    1377    10205078 :                         xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    1378    10205078 :                         if (!TransactionIdPrecedes(xmin, OldestXmin))
    1379             :                         {
    1380        3118 :                             all_visible = false;
    1381        3118 :                             break;
    1382             :                         }
    1383             : 
    1384             :                         /* Track newest xmin on page. */
    1385    10201960 :                         if (TransactionIdFollows(xmin, visibility_cutoff_xid))
    1386      488158 :                             visibility_cutoff_xid = xmin;
    1387             :                     }
    1388    10845706 :                     break;
    1389      413442 :                 case HEAPTUPLE_RECENTLY_DEAD:
    1390             : 
    1391             :                     /*
    1392             :                      * If tuple is recently deleted then we must not remove it
    1393             :                      * from relation.
    1394             :                      */
    1395      413442 :                     nkeep += 1;
    1396      413442 :                     all_visible = false;
    1397      413442 :                     break;
    1398        2504 :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1399             : 
    1400             :                     /*
    1401             :                      * This is an expected case during concurrent vacuum.
    1402             :                      *
    1403             :                      * We do not count these rows as live, because we expect
    1404             :                      * the inserting transaction to update the counters at
    1405             :                      * commit, and we assume that will happen only after we
    1406             :                      * report our results.  This assumption is a bit shaky,
    1407             :                      * but it is what acquire_sample_rows() does, so be
    1408             :                      * consistent.
    1409             :                      */
    1410        2504 :                     all_visible = false;
    1411        2504 :                     break;
    1412          52 :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1413             :                     /* This is an expected case during concurrent vacuum */
    1414          52 :                     all_visible = false;
    1415             : 
    1416             :                     /*
    1417             :                      * Count such rows as live.  As above, we assume the
    1418             :                      * deleting transaction will commit and update the
    1419             :                      * counters after we report.
    1420             :                      */
    1421          52 :                     live_tuples += 1;
    1422          52 :                     break;
    1423           0 :                 default:
    1424           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1425             :                     break;
    1426             :             }
    1427             : 
    1428    11264864 :             if (tupgone)
    1429             :             {
    1430           0 :                 lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
    1431           0 :                 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
    1432             :                                                        &vacrelstats->latestRemovedXid);
    1433           0 :                 tups_vacuumed += 1;
    1434           0 :                 has_dead_tuples = true;
    1435             :             }
    1436             :             else
    1437             :             {
    1438             :                 bool        tuple_totally_frozen;
    1439             : 
    1440    11264864 :                 num_tuples += 1;
    1441    11264864 :                 hastup = true;
    1442             : 
    1443             :                 /*
    1444             :                  * Each non-removable tuple must be checked to see if it needs
    1445             :                  * freezing.  Note we already have exclusive buffer lock.
    1446             :                  */
    1447    11264864 :                 if (heap_prepare_freeze_tuple(tuple.t_data,
    1448             :                                               relfrozenxid, relminmxid,
    1449             :                                               FreezeLimit, MultiXactCutoff,
    1450    11264864 :                                               &frozen[nfrozen],
    1451             :                                               &tuple_totally_frozen))
    1452     4736526 :                     frozen[nfrozen++].offset = offnum;
    1453             : 
    1454    11264864 :                 if (!tuple_totally_frozen)
    1455     2226156 :                     all_frozen = false;
    1456             :             }
    1457             :         }                       /* scan along page */
    1458             : 
    1459             :         /*
    1460             :          * If we froze any tuples, mark the buffer dirty, and write a WAL
    1461             :          * record recording the changes.  We must log the changes to be
    1462             :          * crash-safe against future truncation of CLOG.
    1463             :          */
    1464      192600 :         if (nfrozen > 0)
    1465             :         {
    1466       86236 :             START_CRIT_SECTION();
    1467             : 
    1468       86236 :             MarkBufferDirty(buf);
    1469             : 
    1470             :             /* execute collected freezes */
    1471     4822762 :             for (i = 0; i < nfrozen; i++)
    1472             :             {
    1473             :                 ItemId      itemid;
    1474             :                 HeapTupleHeader htup;
    1475             : 
    1476     4736526 :                 itemid = PageGetItemId(page, frozen[i].offset);
    1477     4736526 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
    1478             : 
    1479     4736526 :                 heap_execute_freeze_tuple(htup, &frozen[i]);
    1480             :             }
    1481             : 
    1482             :             /* Now WAL-log freezing if necessary */
    1483       86236 :             if (RelationNeedsWAL(onerel))
    1484             :             {
    1485             :                 XLogRecPtr  recptr;
    1486             : 
    1487       86236 :                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
    1488             :                                          frozen, nfrozen);
    1489       86236 :                 PageSetLSN(page, recptr);
    1490             :             }
    1491             : 
    1492       86236 :             END_CRIT_SECTION();
    1493             :         }
    1494             : 
    1495             :         /*
    1496             :          * If there are no indexes we can vacuum the page right now instead of
    1497             :          * doing a second scan. Also we don't do that but forget dead tuples
    1498             :          * when index cleanup is disabled.
    1499             :          */
    1500      192600 :         if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
    1501             :         {
    1502         520 :             if (nindexes == 0)
    1503             :             {
    1504             :                 /* Remove tuples from heap if the table has no index */
    1505         506 :                 lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
    1506         506 :                 vacuumed_pages++;
    1507         506 :                 has_dead_tuples = false;
    1508             :             }
    1509             :             else
    1510             :             {
    1511             :                 /*
    1512             :                  * Here, we have indexes but index cleanup is disabled.
    1513             :                  * Instead of vacuuming the dead tuples on the heap, we just
    1514             :                  * forget them.
    1515             :                  *
    1516             :                  * Note that vacrelstats->dead_tuples could have tuples which
    1517             :                  * became dead after HOT-pruning but are not marked dead yet.
    1518             :                  * We do not process them because it's a very rare condition,
    1519             :                  * and the next vacuum will process them anyway.
    1520             :                  */
    1521             :                 Assert(params->index_cleanup == VACOPT_TERNARY_DISABLED);
    1522             :             }
    1523             : 
    1524             :             /*
    1525             :              * Forget the now-vacuumed tuples, and press on, but be careful
    1526             :              * not to reset latestRemovedXid since we want that value to be
    1527             :              * valid.
    1528             :              */
    1529         520 :             dead_tuples->num_tuples = 0;
    1530             : 
    1531             :             /*
    1532             :              * Periodically do incremental FSM vacuuming to make newly-freed
    1533             :              * space visible on upper FSM pages.  Note: although we've cleaned
    1534             :              * the current block, we haven't yet updated its FSM entry (that
    1535             :              * happens further down), so passing end == blkno is correct.
    1536             :              */
    1537         520 :             if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
    1538             :             {
    1539           0 :                 FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
    1540             :                                         blkno);
    1541           0 :                 next_fsm_block_to_vacuum = blkno;
    1542             :             }
    1543             :         }
    1544             : 
    1545      192600 :         freespace = PageGetHeapFreeSpace(page);
    1546             : 
    1547             :         /* mark page all-visible, if appropriate */
    1548      192600 :         if (all_visible && !all_visible_according_to_vm)
    1549      159348 :         {
    1550      159348 :             uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    1551             : 
    1552      159348 :             if (all_frozen)
    1553      143060 :                 flags |= VISIBILITYMAP_ALL_FROZEN;
    1554             : 
    1555             :             /*
    1556             :              * It should never be the case that the visibility map page is set
    1557             :              * while the page-level bit is clear, but the reverse is allowed
    1558             :              * (if checksums are not enabled).  Regardless, set both bits so
    1559             :              * that we get back in sync.
    1560             :              *
    1561             :              * NB: If the heap page is all-visible but the VM bit is not set,
    1562             :              * we don't need to dirty the heap page.  However, if checksums
    1563             :              * are enabled, we do need to make sure that the heap page is
    1564             :              * dirtied before passing it to visibilitymap_set(), because it
    1565             :              * may be logged.  Given that this situation should only happen in
    1566             :              * rare cases after a crash, it is not worth optimizing.
    1567             :              */
    1568      159348 :             PageSetAllVisible(page);
    1569      159348 :             MarkBufferDirty(buf);
    1570      159348 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1571             :                               vmbuffer, visibility_cutoff_xid, flags);
    1572             :         }
    1573             : 
    1574             :         /*
    1575             :          * As of PostgreSQL 9.2, the visibility map bit should never be set if
    1576             :          * the page-level bit is clear.  However, it's possible that the bit
    1577             :          * got cleared after we checked it and before we took the buffer
    1578             :          * content lock, so we must recheck before jumping to the conclusion
    1579             :          * that something bad has happened.
    1580             :          */
    1581       33252 :         else if (all_visible_according_to_vm && !PageIsAllVisible(page)
    1582           0 :                  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
    1583             :         {
    1584           0 :             elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
    1585             :                  vacrelstats->relname, blkno);
    1586           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1587             :                                 VISIBILITYMAP_VALID_BITS);
    1588             :         }
    1589             : 
    1590             :         /*
    1591             :          * It's possible for the value returned by GetOldestXmin() to move
    1592             :          * backwards, so it's not wrong for us to see tuples that appear to
    1593             :          * not be visible to everyone yet, while PD_ALL_VISIBLE is already
    1594             :          * set. The real safe xmin value never moves backwards, but
    1595             :          * GetOldestXmin() is conservative and sometimes returns a value
    1596             :          * that's unnecessarily small, so if we see that contradiction it just
    1597             :          * means that the tuples that we think are not visible to everyone yet
    1598             :          * actually are, and the PD_ALL_VISIBLE flag is correct.
    1599             :          *
    1600             :          * There should never be dead tuples on a page with PD_ALL_VISIBLE
    1601             :          * set, however.
    1602             :          */
    1603       33252 :         else if (PageIsAllVisible(page) && has_dead_tuples)
    1604             :         {
    1605           0 :             elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
    1606             :                  vacrelstats->relname, blkno);
    1607           0 :             PageClearAllVisible(page);
    1608           0 :             MarkBufferDirty(buf);
    1609           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1610             :                                 VISIBILITYMAP_VALID_BITS);
    1611             :         }
    1612             : 
    1613             :         /*
    1614             :          * If the all-visible page is all-frozen but not marked as such yet,
    1615             :          * mark it as all-frozen.  Note that all_frozen is only valid if
    1616             :          * all_visible is true, so we must check both.
    1617             :          */
    1618       33252 :         else if (all_visible_according_to_vm && all_visible && all_frozen &&
    1619        5244 :                  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
    1620             :         {
    1621             :             /*
    1622             :              * We can pass InvalidTransactionId as the cutoff XID here,
    1623             :              * because setting the all-frozen bit doesn't cause recovery
    1624             :              * conflicts.
    1625             :              */
    1626          20 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1627             :                               vmbuffer, InvalidTransactionId,
    1628             :                               VISIBILITYMAP_ALL_FROZEN);
    1629             :         }
    1630             : 
    1631      192600 :         UnlockReleaseBuffer(buf);
    1632             : 
    1633             :         /* Remember the location of the last page with nonremovable tuples */
    1634      192600 :         if (hastup)
    1635      183310 :             vacrelstats->nonempty_pages = blkno + 1;
    1636             : 
    1637             :         /*
    1638             :          * If we remembered any tuples for deletion, then the page will be
    1639             :          * visited again by lazy_vacuum_heap, which will compute and record
    1640             :          * its post-compaction free space.  If not, then we're done with this
    1641             :          * page, so remember its free space as-is.  (This path will always be
    1642             :          * taken if there are no indexes.)
    1643             :          */
    1644      192600 :         if (dead_tuples->num_tuples == prev_dead_count)
    1645      172382 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
    1646             :     }
    1647             : 
    1648             :     /* report that everything is scanned and vacuumed */
    1649       43502 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
    1650             : 
    1651       43502 :     pfree(frozen);
    1652             : 
    1653             :     /* save stats for use later */
    1654       43502 :     vacrelstats->tuples_deleted = tups_vacuumed;
    1655       43502 :     vacrelstats->new_dead_tuples = nkeep;
    1656             : 
    1657             :     /* now we can compute the new value for pg_class.reltuples */
    1658       43502 :     vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
    1659             :                                                           nblocks,
    1660             :                                                           vacrelstats->tupcount_pages,
    1661             :                                                           live_tuples);
    1662             : 
    1663             :     /* also compute total number of surviving heap entries */
    1664       43502 :     vacrelstats->new_rel_tuples =
    1665       43502 :         vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
    1666             : 
    1667             :     /*
    1668             :      * Release any remaining pin on visibility map page.
    1669             :      */
    1670       43502 :     if (BufferIsValid(vmbuffer))
    1671             :     {
    1672       17818 :         ReleaseBuffer(vmbuffer);
    1673       17818 :         vmbuffer = InvalidBuffer;
    1674             :     }
    1675             : 
    1676             :     /* If any tuples need to be deleted, perform final vacuum cycle */
    1677             :     /* XXX put a threshold on min number of tuples here? */
    1678       43502 :     if (dead_tuples->num_tuples > 0)
    1679             :     {
    1680             :         /* Work on all the indexes, and then the heap */
    1681        1844 :         lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats,
    1682             :                                 lps, nindexes);
    1683             : 
    1684             :         /* Remove tuples from heap */
    1685        1844 :         lazy_vacuum_heap(onerel, vacrelstats);
    1686             :     }
    1687             : 
    1688             :     /*
    1689             :      * Vacuum the remainder of the Free Space Map.  We must do this whether or
    1690             :      * not there were indexes.
    1691             :      */
    1692       43502 :     if (blkno > next_fsm_block_to_vacuum)
    1693       17818 :         FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
    1694             : 
    1695             :     /* report all blocks vacuumed */
    1696       43502 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1697             : 
    1698             :     /* Do post-vacuum cleanup */
    1699       43502 :     if (vacrelstats->useindex)
    1700       41338 :         lazy_cleanup_all_indexes(Irel, indstats, vacrelstats, lps, nindexes);
    1701             : 
    1702             :     /*
    1703             :      * End parallel mode before updating index statistics as we cannot write
    1704             :      * during parallel mode.
    1705             :      */
    1706       43502 :     if (ParallelVacuumIsActive(lps))
    1707          12 :         end_parallel_vacuum(Irel, indstats, lps, nindexes);
    1708             : 
    1709             :     /* Update index statistics */
    1710       43502 :     update_index_statistics(Irel, indstats, nindexes);
    1711             : 
    1712             :     /* If no indexes, make log report that lazy_vacuum_heap would've made */
    1713       43502 :     if (vacuumed_pages)
    1714          52 :         ereport(elevel,
    1715             :                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
    1716             :                         vacrelstats->relname,
    1717             :                         tups_vacuumed, vacuumed_pages)));
    1718             : 
    1719             :     /*
    1720             :      * This is pretty messy, but we split it up so that we can skip emitting
    1721             :      * individual parts of the message when not applicable.
    1722             :      */
    1723       43502 :     initStringInfo(&buf);
    1724       87004 :     appendStringInfo(&buf,
    1725       43502 :                      _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
    1726             :                      nkeep, OldestXmin);
    1727       43502 :     appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
    1728             :                      nunused);
    1729       43502 :     appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
    1730             :                                     "Skipped %u pages due to buffer pins, ",
    1731       43502 :                                     vacrelstats->pinskipped_pages),
    1732             :                      vacrelstats->pinskipped_pages);
    1733       43502 :     appendStringInfo(&buf, ngettext("%u frozen page.\n",
    1734             :                                     "%u frozen pages.\n",
    1735       43502 :                                     vacrelstats->frozenskipped_pages),
    1736             :                      vacrelstats->frozenskipped_pages);
    1737       43502 :     appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
    1738             :                                     "%u pages are entirely empty.\n",
    1739             :                                     empty_pages),
    1740             :                      empty_pages);
    1741       43502 :     appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
    1742             : 
    1743       43502 :     ereport(elevel,
    1744             :             (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
    1745             :                     vacrelstats->relname,
    1746             :                     tups_vacuumed, num_tuples,
    1747             :                     vacrelstats->scanned_pages, nblocks),
    1748             :              errdetail_internal("%s", buf.data)));
    1749       43502 :     pfree(buf.data);
    1750       43502 : }
    1751             : 
    1752             : /*
    1753             :  *  lazy_vacuum_all_indexes() -- vacuum all indexes of relation.
    1754             :  *
    1755             :  * We process the indexes serially unless we are doing parallel vacuum.
    1756             :  */
    1757             : static void
    1758        1844 : lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
    1759             :                         IndexBulkDeleteResult **stats,
    1760             :                         LVRelStats *vacrelstats, LVParallelState *lps,
    1761             :                         int nindexes)
    1762             : {
    1763             :     Assert(!IsParallelWorker());
    1764             :     Assert(nindexes > 0);
    1765             : 
    1766             :     /* Log cleanup info before we touch indexes */
    1767        1844 :     vacuum_log_cleanup_info(onerel, vacrelstats);
    1768             : 
    1769             :     /* Report that we are now vacuuming indexes */
    1770        1844 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1771             :                                  PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
    1772             : 
    1773             :     /* Perform index vacuuming with parallel workers for parallel vacuum. */
    1774        1844 :     if (ParallelVacuumIsActive(lps))
    1775             :     {
    1776             :         /* Tell parallel workers to do index vacuuming */
    1777           8 :         lps->lvshared->for_cleanup = false;
    1778           8 :         lps->lvshared->first_time = false;
    1779             : 
    1780             :         /*
    1781             :          * We can only provide an approximate value of num_heap_tuples in
    1782             :          * vacuum cases.
    1783             :          */
    1784           8 :         lps->lvshared->reltuples = vacrelstats->old_live_tuples;
    1785           8 :         lps->lvshared->estimated_count = true;
    1786             : 
    1787           8 :         lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
    1788             :     }
    1789             :     else
    1790             :     {
    1791             :         int         idx;
    1792             : 
    1793        5844 :         for (idx = 0; idx < nindexes; idx++)
    1794        4008 :             lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
    1795             :                               vacrelstats->old_live_tuples, vacrelstats);
    1796             :     }
    1797             : 
    1798             :     /* Increase and report the number of index scans */
    1799        1844 :     vacrelstats->num_index_scans++;
    1800        1844 :     pgstat_progress_update_param(PROGRESS_VACUUM_NUM_INDEX_VACUUMS,
    1801        1844 :                                  vacrelstats->num_index_scans);
    1802        1844 : }
    1803             : 
    1804             : 
    1805             : /*
    1806             :  *  lazy_vacuum_heap() -- second pass over the heap
    1807             :  *
    1808             :  *      This routine marks dead tuples as unused and compacts out free
    1809             :  *      space on their pages.  Pages not having dead tuples recorded from
    1810             :  *      lazy_scan_heap are not visited at all.
    1811             :  *
    1812             :  * Note: the reason for doing this as a second pass is we cannot remove
    1813             :  * the tuples until we've removed their index entries, and we want to
    1814             :  * process index entry removal in batches as large as possible.
    1815             :  */
    1816             : static void
    1817        1844 : lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
    1818             : {
    1819             :     int         tupindex;
    1820             :     int         npages;
    1821             :     PGRUsage    ru0;
    1822        1844 :     Buffer      vmbuffer = InvalidBuffer;
    1823             :     LVRelStats  olderrinfo;
    1824             : 
    1825             :     /* Report that we are now vacuuming the heap */
    1826        1844 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1827             :                                  PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
    1828             : 
    1829             :     /* Update error traceback information */
    1830        1844 :     olderrinfo = *vacrelstats;
    1831        1844 :     update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
    1832             :                              InvalidBlockNumber, NULL);
    1833             : 
    1834        1844 :     pg_rusage_init(&ru0);
    1835        1844 :     npages = 0;
    1836             : 
    1837        1844 :     tupindex = 0;
    1838       22062 :     while (tupindex < vacrelstats->dead_tuples->num_tuples)
    1839             :     {
    1840             :         BlockNumber tblk;
    1841             :         Buffer      buf;
    1842             :         Page        page;
    1843             :         Size        freespace;
    1844             : 
    1845       20218 :         vacuum_delay_point();
    1846             : 
    1847       20218 :         tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
    1848       20218 :         vacrelstats->blkno = tblk;
    1849       20218 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
    1850             :                                  vac_strategy);
    1851       20218 :         if (!ConditionalLockBufferForCleanup(buf))
    1852             :         {
    1853           0 :             ReleaseBuffer(buf);
    1854           0 :             ++tupindex;
    1855           0 :             continue;
    1856             :         }
    1857       20218 :         tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
    1858             :                                     &vmbuffer);
    1859             : 
    1860             :         /* Now that we've compacted the page, record its available space */
    1861       20218 :         page = BufferGetPage(buf);
    1862       20218 :         freespace = PageGetHeapFreeSpace(page);
    1863             : 
    1864       20218 :         UnlockReleaseBuffer(buf);
    1865       20218 :         RecordPageWithFreeSpace(onerel, tblk, freespace);
    1866       20218 :         npages++;
    1867             :     }
    1868             : 
    1869        1844 :     if (BufferIsValid(vmbuffer))
    1870             :     {
    1871        1828 :         ReleaseBuffer(vmbuffer);
    1872        1828 :         vmbuffer = InvalidBuffer;
    1873             :     }
    1874             : 
    1875        1844 :     ereport(elevel,
    1876             :             (errmsg("\"%s\": removed %d row versions in %d pages",
    1877             :                     vacrelstats->relname,
    1878             :                     tupindex, npages),
    1879             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    1880             : 
    1881             :     /* Revert to the previous phase information for error traceback */
    1882        3688 :     update_vacuum_error_info(vacrelstats,
    1883        1844 :                              olderrinfo.phase,
    1884             :                              olderrinfo.blkno,
    1885             :                              olderrinfo.indname);
    1886        1844 : }
    1887             : 
    1888             : /*
    1889             :  *  lazy_vacuum_page() -- free dead tuples on a page
    1890             :  *                   and repair its fragmentation.
    1891             :  *
    1892             :  * Caller must hold pin and buffer cleanup lock on the buffer.
    1893             :  *
    1894             :  * tupindex is the index in vacrelstats->dead_tuples of the first dead
    1895             :  * tuple for this page.  We assume the rest follow sequentially.
    1896             :  * The return value is the first tupindex after the tuples of this page.
    1897             :  */
    1898             : static int
    1899       20724 : lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
    1900             :                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
    1901             : {
    1902       20724 :     LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
    1903       20724 :     Page        page = BufferGetPage(buffer);
    1904             :     OffsetNumber unused[MaxOffsetNumber];
    1905       20724 :     int         uncnt = 0;
    1906             :     TransactionId visibility_cutoff_xid;
    1907             :     bool        all_frozen;
    1908             :     LVRelStats  olderrinfo;
    1909             : 
    1910       20724 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1911             : 
    1912             :     /* Update error traceback information */
    1913       20724 :     olderrinfo = *vacrelstats;
    1914       20724 :     update_vacuum_error_info(vacrelstats, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
    1915             :                              blkno, NULL);
    1916             : 
    1917       20724 :     START_CRIT_SECTION();
    1918             : 
    1919     1178636 :     for (; tupindex < dead_tuples->num_tuples; tupindex++)
    1920             :     {
    1921             :         BlockNumber tblk;
    1922             :         OffsetNumber toff;
    1923             :         ItemId      itemid;
    1924             : 
    1925     1176286 :         tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
    1926     1176286 :         if (tblk != blkno)
    1927       18374 :             break;              /* past end of tuples for this block */
    1928     1157912 :         toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
    1929     1157912 :         itemid = PageGetItemId(page, toff);
    1930     1157912 :         ItemIdSetUnused(itemid);
    1931     1157912 :         unused[uncnt++] = toff;
    1932             :     }
    1933             : 
    1934       20724 :     PageRepairFragmentation(page);
    1935             : 
    1936             :     /*
    1937             :      * Mark buffer dirty before we write WAL.
    1938             :      */
    1939       20724 :     MarkBufferDirty(buffer);
    1940             : 
    1941             :     /* XLOG stuff */
    1942       20724 :     if (RelationNeedsWAL(onerel))
    1943             :     {
    1944             :         XLogRecPtr  recptr;
    1945             : 
    1946       20724 :         recptr = log_heap_clean(onerel, buffer,
    1947             :                                 NULL, 0, NULL, 0,
    1948             :                                 unused, uncnt,
    1949             :                                 vacrelstats->latestRemovedXid);
    1950       20724 :         PageSetLSN(page, recptr);
    1951             :     }
    1952             : 
    1953             :     /*
    1954             :      * End critical section, so we safely can do visibility tests (which
    1955             :      * possibly need to perform IO and allocate memory!). If we crash now the
    1956             :      * page (including the corresponding vm bit) might not be marked all
    1957             :      * visible, but that's fine. A later vacuum will fix that.
    1958             :      */
    1959       20724 :     END_CRIT_SECTION();
    1960             : 
    1961             :     /*
    1962             :      * Now that we have removed the dead tuples from the page, once again
    1963             :      * check if the page has become all-visible.  The page is already marked
    1964             :      * dirty, exclusively locked, and, if needed, a full page image has been
    1965             :      * emitted in the log_heap_clean() above.
    1966             :      */
    1967       20724 :     if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
    1968             :                                  &all_frozen))
    1969       20402 :         PageSetAllVisible(page);
    1970             : 
    1971             :     /*
    1972             :      * All the changes to the heap page have been done. If the all-visible
    1973             :      * flag is now set, also set the VM all-visible bit (and, if possible, the
    1974             :      * all-frozen bit) unless this has already been done previously.
    1975             :      */
    1976       20724 :     if (PageIsAllVisible(page))
    1977             :     {
    1978       20402 :         uint8       vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
    1979       20402 :         uint8       flags = 0;
    1980             : 
    1981             :         /* Set the VM all-frozen bit to flag, if needed */
    1982       20402 :         if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
    1983       20402 :             flags |= VISIBILITYMAP_ALL_VISIBLE;
    1984       20402 :         if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
    1985       16614 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    1986             : 
    1987             :         Assert(BufferIsValid(*vmbuffer));
    1988       20402 :         if (flags != 0)
    1989       20402 :             visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
    1990             :                               *vmbuffer, visibility_cutoff_xid, flags);
    1991             :     }
    1992             : 
    1993             :     /* Revert to the previous phase information for error traceback */
    1994       41448 :     update_vacuum_error_info(vacrelstats,
    1995       20724 :                              olderrinfo.phase,
    1996             :                              olderrinfo.blkno,
    1997             :                              olderrinfo.indname);
    1998       20724 :     return tupindex;
    1999             : }
    2000             : 
    2001             : /*
    2002             :  *  lazy_check_needs_freeze() -- scan page to see if any tuples
    2003             :  *                   need to be cleaned to avoid wraparound
    2004             :  *
    2005             :  * Returns true if the page needs to be vacuumed using cleanup lock.
    2006             :  * Also returns a flag indicating whether page contains any tuples at all.
    2007             :  */
    2008             : static bool
    2009           2 : lazy_check_needs_freeze(Buffer buf, bool *hastup)
    2010             : {
    2011           2 :     Page        page = BufferGetPage(buf);
    2012             :     OffsetNumber offnum,
    2013             :                 maxoff;
    2014             :     HeapTupleHeader tupleheader;
    2015             : 
    2016           2 :     *hastup = false;
    2017             : 
    2018             :     /*
    2019             :      * New and empty pages, obviously, don't contain tuples. We could make
    2020             :      * sure that the page is registered in the FSM, but it doesn't seem worth
    2021             :      * waiting for a cleanup lock just for that, especially because it's
    2022             :      * likely that the pin holder will do so.
    2023             :      */
    2024           2 :     if (PageIsNew(page) || PageIsEmpty(page))
    2025           0 :         return false;
    2026             : 
    2027           2 :     maxoff = PageGetMaxOffsetNumber(page);
    2028          44 :     for (offnum = FirstOffsetNumber;
    2029             :          offnum <= maxoff;
    2030          42 :          offnum = OffsetNumberNext(offnum))
    2031             :     {
    2032             :         ItemId      itemid;
    2033             : 
    2034          42 :         itemid = PageGetItemId(page, offnum);
    2035             : 
    2036             :         /* this should match hastup test in count_nondeletable_pages() */
    2037          42 :         if (ItemIdIsUsed(itemid))
    2038          42 :             *hastup = true;
    2039             : 
    2040             :         /* dead and redirect items never need freezing */
    2041          42 :         if (!ItemIdIsNormal(itemid))
    2042           0 :             continue;
    2043             : 
    2044          42 :         tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
    2045             : 
    2046          42 :         if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
    2047             :                                     MultiXactCutoff, buf))
    2048           0 :             return true;
    2049             :     }                           /* scan along page */
    2050             : 
    2051           2 :     return false;
    2052             : }
    2053             : 
    2054             : /*
    2055             :  * Perform index vacuum or index cleanup with parallel workers.  This function
    2056             :  * must be used by the parallel vacuum leader process.  The caller must set
    2057             :  * lps->lvshared->for_cleanup to indicate whether to perform vacuum or
    2058             :  * cleanup.
    2059             :  */
    2060             : static void
    2061          20 : lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
    2062             :                              LVRelStats *vacrelstats, LVParallelState *lps,
    2063             :                              int nindexes)
    2064             : {
    2065             :     int         nworkers;
    2066             : 
    2067             :     Assert(!IsParallelWorker());
    2068             :     Assert(ParallelVacuumIsActive(lps));
    2069             :     Assert(nindexes > 0);
    2070             : 
    2071             :     /* Determine the number of parallel workers to launch */
    2072          20 :     if (lps->lvshared->for_cleanup)
    2073             :     {
    2074          12 :         if (lps->lvshared->first_time)
    2075           8 :             nworkers = lps->nindexes_parallel_cleanup +
    2076           4 :                 lps->nindexes_parallel_condcleanup;
    2077             :         else
    2078           8 :             nworkers = lps->nindexes_parallel_cleanup;
    2079             :     }
    2080             :     else
    2081           8 :         nworkers = lps->nindexes_parallel_bulkdel;
    2082             : 
    2083             :     /* The leader process will participate */
    2084          20 :     nworkers--;
    2085             : 
    2086             :     /*
    2087             :      * It is possible that parallel context is initialized with fewer workers
    2088             :      * than the number of indexes that need a separate worker in the current
    2089             :      * phase, so we need to consider it.  See compute_parallel_vacuum_workers.
    2090             :      */
    2091          20 :     nworkers = Min(nworkers, lps->pcxt->nworkers);
    2092             : 
    2093             :     /* Setup the shared cost-based vacuum delay and launch workers */
    2094          20 :     if (nworkers > 0)
    2095             :     {
    2096          16 :         if (vacrelstats->num_index_scans > 0)
    2097             :         {
    2098             :             /* Reset the parallel index processing counter */
    2099           4 :             pg_atomic_write_u32(&(lps->lvshared->idx), 0);
    2100             : 
    2101             :             /* Reinitialize the parallel context to relaunch parallel workers */
    2102           4 :             ReinitializeParallelDSM(lps->pcxt);
    2103             :         }
    2104             : 
    2105             :         /*
    2106             :          * Set up shared cost balance and the number of active workers for
    2107             :          * vacuum delay.  We need to do this before launching workers as
    2108             :          * otherwise, they might not see the updated values for these
    2109             :          * parameters.
    2110             :          */
    2111          16 :         pg_atomic_write_u32(&(lps->lvshared->cost_balance), VacuumCostBalance);
    2112          16 :         pg_atomic_write_u32(&(lps->lvshared->active_nworkers), 0);
    2113             : 
    2114             :         /*
    2115             :          * The number of workers can vary between bulkdelete and cleanup
    2116             :          * phase.
    2117             :          */
    2118          16 :         ReinitializeParallelWorkers(lps->pcxt, nworkers);
    2119             : 
    2120          16 :         LaunchParallelWorkers(lps->pcxt);
    2121             : 
    2122          16 :         if (lps->pcxt->nworkers_launched > 0)
    2123             :         {
    2124             :             /*
    2125             :              * Reset the local cost values for leader backend as we have
    2126             :              * already accumulated the remaining balance of heap.
    2127             :              */
    2128          16 :             VacuumCostBalance = 0;
    2129          16 :             VacuumCostBalanceLocal = 0;
    2130             : 
    2131             :             /* Enable shared cost balance for leader backend */
    2132          16 :             VacuumSharedCostBalance = &(lps->lvshared->cost_balance);
    2133          16 :             VacuumActiveNWorkers = &(lps->lvshared->active_nworkers);
    2134             :         }
    2135             : 
    2136          16 :         if (lps->lvshared->for_cleanup)
    2137           8 :             ereport(elevel,
    2138             :                     (errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
    2139             :                                      "launched %d parallel vacuum workers for index cleanup (planned: %d)",
    2140             :                                      lps->pcxt->nworkers_launched),
    2141             :                             lps->pcxt->nworkers_launched, nworkers)));
    2142             :         else
    2143           8 :             ereport(elevel,
    2144             :                     (errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
    2145             :                                      "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
    2146             :                                      lps->pcxt->nworkers_launched),
    2147             :                             lps->pcxt->nworkers_launched, nworkers)));
    2148             :     }
    2149             : 
    2150             :     /* Process the indexes that can be processed by only leader process */
    2151          20 :     vacuum_indexes_leader(Irel, stats, vacrelstats, lps, nindexes);
    2152             : 
    2153             :     /*
    2154             :      * Join as a parallel worker.  The leader process alone processes all the
    2155             :      * indexes in the case where no workers are launched.
    2156             :      */
    2157          20 :     parallel_vacuum_index(Irel, stats, lps->lvshared,
    2158             :                           vacrelstats->dead_tuples, nindexes, vacrelstats);
    2159             : 
    2160             :     /*
    2161             :      * Next, accumulate buffer and WAL usage.  (This must wait for the workers
    2162             :      * to finish, or we might get incomplete data.)
    2163             :      */
    2164          20 :     if (nworkers > 0)
    2165             :     {
    2166             :         int         i;
    2167             : 
    2168             :         /* Wait for all vacuum workers to finish */
    2169          16 :         WaitForParallelWorkersToFinish(lps->pcxt);
    2170             : 
    2171          40 :         for (i = 0; i < lps->pcxt->nworkers_launched; i++)
    2172          24 :             InstrAccumParallelQuery(&lps->buffer_usage[i], &lps->wal_usage[i]);
    2173             :     }
    2174             : 
    2175             :     /*
    2176             :      * Carry the shared balance value to heap scan and disable shared costing
    2177             :      */
    2178          20 :     if (VacuumSharedCostBalance)
    2179             :     {
    2180          16 :         VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
    2181          16 :         VacuumSharedCostBalance = NULL;
    2182          16 :         VacuumActiveNWorkers = NULL;
    2183             :     }
    2184          20 : }
    2185             : 
    2186             : /*
    2187             :  * Index vacuum/cleanup routine used by the leader process and parallel
    2188             :  * vacuum worker processes to process the indexes in parallel.
    2189             :  */
    2190             : static void
    2191          44 : parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
    2192             :                       LVShared *lvshared, LVDeadTuples *dead_tuples,
    2193             :                       int nindexes, LVRelStats *vacrelstats)
    2194             : {
    2195             :     /*
    2196             :      * Increment the active worker count if we are able to launch any worker.
    2197             :      */
    2198          44 :     if (VacuumActiveNWorkers)
    2199          40 :         pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
    2200             : 
    2201             :     /* Loop until all indexes are vacuumed */
    2202             :     for (;;)
    2203          80 :     {
    2204             :         int         idx;
    2205             :         LVSharedIndStats *shared_indstats;
    2206             : 
    2207             :         /* Get an index number to process */
    2208         124 :         idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
    2209             : 
    2210             :         /* Done for all indexes? */
    2211         124 :         if (idx >= nindexes)
    2212          44 :             break;
    2213             : 
    2214             :         /* Get the index statistics of this index from DSM */
    2215          80 :         shared_indstats = get_indstats(lvshared, idx);
    2216             : 
    2217             :         /*
    2218             :          * Skip processing indexes that don't participate in parallel
    2219             :          * operation
    2220             :          */
    2221         160 :         if (shared_indstats == NULL ||
    2222          80 :             skip_parallel_vacuum_index(Irel[idx], lvshared))
    2223          24 :             continue;
    2224             : 
    2225             :         /* Do vacuum or cleanup of the index */
    2226          56 :         vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
    2227             :                          dead_tuples, vacrelstats);
    2228             :     }
    2229             : 
    2230             :     /*
    2231             :      * We have completed the index vacuum so decrement the active worker
    2232             :      * count.
    2233             :      */
    2234          44 :     if (VacuumActiveNWorkers)
    2235          40 :         pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
    2236          44 : }
    2237             : 
    2238             : /*
    2239             :  * Vacuum or cleanup indexes that can be processed by only the leader process
    2240             :  * because these indexes don't support parallel operation at that phase.
    2241             :  */
    2242             : static void
    2243          20 : vacuum_indexes_leader(Relation *Irel, IndexBulkDeleteResult **stats,
    2244             :                       LVRelStats *vacrelstats, LVParallelState *lps,
    2245             :                       int nindexes)
    2246             : {
    2247             :     int         i;
    2248             : 
    2249             :     Assert(!IsParallelWorker());
    2250             : 
    2251             :     /*
    2252             :      * Increment the active worker count if we are able to launch any worker.
    2253             :      */
    2254          20 :     if (VacuumActiveNWorkers)
    2255          16 :         pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
    2256             : 
    2257         108 :     for (i = 0; i < nindexes; i++)
    2258             :     {
    2259             :         LVSharedIndStats *shared_indstats;
    2260             : 
    2261          88 :         shared_indstats = get_indstats(lps->lvshared, i);
    2262             : 
    2263             :         /* Process the indexes skipped by parallel workers */
    2264         176 :         if (shared_indstats == NULL ||
    2265          88 :             skip_parallel_vacuum_index(Irel[i], lps->lvshared))
    2266          32 :             vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
    2267             :                              shared_indstats, vacrelstats->dead_tuples,
    2268             :                              vacrelstats);
    2269             :     }
    2270             : 
    2271             :     /*
    2272             :      * We have completed the index vacuum so decrement the active worker
    2273             :      * count.
    2274             :      */
    2275          20 :     if (VacuumActiveNWorkers)
    2276          16 :         pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
    2277          20 : }
    2278             : 
    2279             : /*
    2280             :  * Vacuum or cleanup index either by leader process or by one of the worker
    2281             :  * process.  After processing the index this function copies the index
    2282             :  * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
    2283             :  * segment.
    2284             :  */
    2285             : static void
    2286          88 : vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
    2287             :                  LVShared *lvshared, LVSharedIndStats *shared_indstats,
    2288             :                  LVDeadTuples *dead_tuples, LVRelStats *vacrelstats)
    2289             : {
    2290          88 :     IndexBulkDeleteResult *bulkdelete_res = NULL;
    2291             : 
    2292          88 :     if (shared_indstats)
    2293             :     {
    2294             :         /* Get the space for IndexBulkDeleteResult */
    2295          88 :         bulkdelete_res = &(shared_indstats->stats);
    2296             : 
    2297             :         /*
    2298             :          * Update the pointer to the corresponding bulk-deletion result if
    2299             :          * someone has already updated it.
    2300             :          */
    2301          88 :         if (shared_indstats->updated && *stats == NULL)
    2302           0 :             *stats = bulkdelete_res;
    2303             :     }
    2304             : 
    2305             :     /* Do vacuum or cleanup of the index */
    2306          88 :     if (lvshared->for_cleanup)
    2307          56 :         lazy_cleanup_index(indrel, stats, lvshared->reltuples,
    2308          56 :                            lvshared->estimated_count, vacrelstats);
    2309             :     else
    2310          32 :         lazy_vacuum_index(indrel, stats, dead_tuples,
    2311             :                           lvshared->reltuples, vacrelstats);
    2312             : 
    2313             :     /*
    2314             :      * Copy the index bulk-deletion result returned from ambulkdelete and
    2315             :      * amvacuumcleanup to the DSM segment if it's the first cycle because they
    2316             :      * allocate locally and it's possible that an index will be vacuumed by a
    2317             :      * different vacuum process the next cycle.  Copying the result normally
    2318             :      * happens only the first time an index is vacuumed.  For any additional
    2319             :      * vacuum pass, we directly point to the result on the DSM segment and
    2320             :      * pass it to vacuum index APIs so that workers can update it directly.
    2321             :      *
    2322             :      * Since all vacuum workers write the bulk-deletion result at different
    2323             :      * slots we can write them without locking.
    2324             :      */
    2325          88 :     if (shared_indstats && !shared_indstats->updated && *stats != NULL)
    2326             :     {
    2327          52 :         memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
    2328          52 :         shared_indstats->updated = true;
    2329             : 
    2330             :         /*
    2331             :          * Now that stats[idx] points to the DSM segment, we don't need the
    2332             :          * locally allocated results.
    2333             :          */
    2334          52 :         pfree(*stats);
    2335          52 :         *stats = bulkdelete_res;
    2336             :     }
    2337          88 : }
    2338             : 
    2339             : /*
    2340             :  *  lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
    2341             :  *
    2342             :  * Cleanup indexes.  We process the indexes serially unless we are doing
    2343             :  * parallel vacuum.
    2344             :  */
    2345             : static void
    2346       41338 : lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
    2347             :                          LVRelStats *vacrelstats, LVParallelState *lps,
    2348             :                          int nindexes)
    2349             : {
    2350             :     int         idx;
    2351             : 
    2352             :     Assert(!IsParallelWorker());
    2353             :     Assert(nindexes > 0);
    2354             : 
    2355             :     /* Report that we are now cleaning up indexes */
    2356       41338 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2357             :                                  PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
    2358             : 
    2359             :     /*
    2360             :      * If parallel vacuum is active we perform index cleanup with parallel
    2361             :      * workers.
    2362             :      */
    2363       41338 :     if (ParallelVacuumIsActive(lps))
    2364             :     {
    2365             :         /* Tell parallel workers to do index cleanup */
    2366          12 :         lps->lvshared->for_cleanup = true;
    2367          24 :         lps->lvshared->first_time =
    2368          24 :             (vacrelstats->num_index_scans == 0);
    2369             : 
    2370             :         /*
    2371             :          * Now we can provide a better estimate of total number of surviving
    2372             :          * tuples (we assume indexes are more interested in that than in the
    2373             :          * number of nominally live tuples).
    2374             :          */
    2375          12 :         lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
    2376          24 :         lps->lvshared->estimated_count =
    2377          24 :             (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
    2378             : 
    2379          12 :         lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
    2380             :     }
    2381             :     else
    2382             :     {
    2383      105116 :         for (idx = 0; idx < nindexes; idx++)
    2384       63790 :             lazy_cleanup_index(Irel[idx], &stats[idx],
    2385             :                                vacrelstats->new_rel_tuples,
    2386       63790 :                                vacrelstats->tupcount_pages < vacrelstats->rel_pages,
    2387             :                                vacrelstats);
    2388             :     }
    2389       41338 : }
    2390             : 
    2391             : /*
    2392             :  *  lazy_vacuum_index() -- vacuum one index relation.
    2393             :  *
    2394             :  *      Delete all the index entries pointing to tuples listed in
    2395             :  *      dead_tuples, and update running statistics.
    2396             :  *
    2397             :  *      reltuples is the number of heap tuples to be passed to the
    2398             :  *      bulkdelete callback.
    2399             :  */
    2400             : static void
    2401        4040 : lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
    2402             :                   LVDeadTuples *dead_tuples, double reltuples, LVRelStats *vacrelstats)
    2403             : {
    2404             :     IndexVacuumInfo ivinfo;
    2405             :     const char *msg;
    2406             :     PGRUsage    ru0;
    2407             :     LVRelStats  olderrinfo;
    2408             : 
    2409        4040 :     pg_rusage_init(&ru0);
    2410             : 
    2411        4040 :     ivinfo.index = indrel;
    2412        4040 :     ivinfo.analyze_only = false;
    2413        4040 :     ivinfo.report_progress = false;
    2414        4040 :     ivinfo.estimated_count = true;
    2415        4040 :     ivinfo.message_level = elevel;
    2416        4040 :     ivinfo.num_heap_tuples = reltuples;
    2417        4040 :     ivinfo.strategy = vac_strategy;
    2418             : 
    2419             :     /* Update error traceback information */
    2420        4040 :     olderrinfo = *vacrelstats;
    2421        4040 :     update_vacuum_error_info(vacrelstats,
    2422             :                              VACUUM_ERRCB_PHASE_VACUUM_INDEX,
    2423             :                              InvalidBlockNumber,
    2424        4040 :                              RelationGetRelationName(indrel));
    2425             : 
    2426             :     /* Do bulk deletion */
    2427        4040 :     *stats = index_bulk_delete(&ivinfo, *stats,
    2428             :                                lazy_tid_reaped, (void *) dead_tuples);
    2429             : 
    2430        4040 :     if (IsParallelWorker())
    2431           0 :         msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker");
    2432             :     else
    2433        4040 :         msg = gettext_noop("scanned index \"%s\" to remove %d row versions");
    2434             : 
    2435        4040 :     ereport(elevel,
    2436             :             (errmsg(msg,
    2437             :                     vacrelstats->indname,
    2438             :                     dead_tuples->num_tuples),
    2439             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    2440             : 
    2441             :     /* Revert to the previous phase information for error traceback */
    2442        8080 :     update_vacuum_error_info(vacrelstats,
    2443        4040 :                              olderrinfo.phase,
    2444             :                              olderrinfo.blkno,
    2445             :                              olderrinfo.indname);
    2446        4040 : }
    2447             : 
    2448             : /*
    2449             :  *  lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
    2450             :  *
    2451             :  *      reltuples is the number of heap tuples and estimated_count is true
    2452             :  *      if reltuples is an estimated value.
    2453             :  */
    2454             : static void
    2455       63846 : lazy_cleanup_index(Relation indrel,
    2456             :                    IndexBulkDeleteResult **stats,
    2457             :                    double reltuples, bool estimated_count, LVRelStats *vacrelstats)
    2458             : {
    2459             :     IndexVacuumInfo ivinfo;
    2460             :     const char *msg;
    2461             :     PGRUsage    ru0;
    2462             :     LVRelStats  olderrcbarg;
    2463             : 
    2464       63846 :     pg_rusage_init(&ru0);
    2465             : 
    2466       63846 :     ivinfo.index = indrel;
    2467       63846 :     ivinfo.analyze_only = false;
    2468       63846 :     ivinfo.report_progress = false;
    2469       63846 :     ivinfo.estimated_count = estimated_count;
    2470       63846 :     ivinfo.message_level = elevel;
    2471             : 
    2472       63846 :     ivinfo.num_heap_tuples = reltuples;
    2473       63846 :     ivinfo.strategy = vac_strategy;
    2474             : 
    2475             :     /* Update error traceback information */
    2476       63846 :     olderrcbarg = *vacrelstats;
    2477       63846 :     update_vacuum_error_info(vacrelstats,
    2478             :                              VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
    2479             :                              InvalidBlockNumber,
    2480       63846 :                              RelationGetRelationName(indrel));
    2481             : 
    2482       63846 :     *stats = index_vacuum_cleanup(&ivinfo, *stats);
    2483             : 
    2484             :     /* Revert back to the old phase information for error traceback */
    2485      127692 :     update_vacuum_error_info(vacrelstats,
    2486       63846 :                              olderrcbarg.phase,
    2487             :                              olderrcbarg.blkno,
    2488             :                              olderrcbarg.indname);
    2489       63846 :     if (!(*stats))
    2490        2010 :         return;
    2491             : 
    2492       61836 :     if (IsParallelWorker())
    2493           0 :         msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages as reported by parallel vacuum worker");
    2494             :     else
    2495       61836 :         msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages");
    2496             : 
    2497       61836 :     ereport(elevel,
    2498             :             (errmsg(msg,
    2499             :                     RelationGetRelationName(indrel),
    2500             :                     (*stats)->num_index_tuples,
    2501             :                     (*stats)->num_pages),
    2502             :              errdetail("%.0f index row versions were removed.\n"
    2503             :                        "%u index pages have been deleted, %u are currently reusable.\n"
    2504             :                        "%s.",
    2505             :                        (*stats)->tuples_removed,
    2506             :                        (*stats)->pages_deleted, (*stats)->pages_free,
    2507             :                        pg_rusage_show(&ru0))));
    2508             : }
    2509             : 
    2510             : /*
    2511             :  * should_attempt_truncation - should we attempt to truncate the heap?
    2512             :  *
    2513             :  * Don't even think about it unless we have a shot at releasing a goodly
    2514             :  * number of pages.  Otherwise, the time taken isn't worth it.
    2515             :  *
    2516             :  * Also don't attempt it if we are doing early pruning/vacuuming, because a
    2517             :  * scan which cannot find a truncated heap page cannot determine that the
    2518             :  * snapshot is too old to read that page.  We might be able to get away with
    2519             :  * truncating all except one of the pages, setting its LSN to (at least) the
    2520             :  * maximum of the truncated range if we also treated an index leaf tuple
    2521             :  * pointing to a missing heap page as something to trigger the "snapshot too
    2522             :  * old" error, but that seems fragile and seems like it deserves its own patch
    2523             :  * if we consider it.
    2524             :  *
    2525             :  * This is split out so that we can test whether truncation is going to be
    2526             :  * called for before we actually do it.  If you change the logic here, be
    2527             :  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
    2528             :  */
    2529             : static bool
    2530       43632 : should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
    2531             : {
    2532             :     BlockNumber possibly_freeable;
    2533             : 
    2534       43632 :     if (params->truncate == VACOPT_TERNARY_DISABLED)
    2535          16 :         return false;
    2536             : 
    2537       43616 :     possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
    2538       43616 :     if (possibly_freeable > 0 &&
    2539         266 :         (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
    2540         266 :          possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
    2541         248 :         old_snapshot_threshold < 0)
    2542         248 :         return true;
    2543             :     else
    2544       43368 :         return false;
    2545             : }
    2546             : 
    2547             : /*
    2548             :  * lazy_truncate_heap - try to truncate off any empty pages at the end
    2549             :  */
    2550             : static void
    2551         118 : lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
    2552             : {
    2553         118 :     BlockNumber old_rel_pages = vacrelstats->rel_pages;
    2554             :     BlockNumber new_rel_pages;
    2555             :     int         lock_retry;
    2556             : 
    2557             :     /* Report that we are now truncating */
    2558         118 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    2559             :                                  PROGRESS_VACUUM_PHASE_TRUNCATE);
    2560             : 
    2561             :     /*
    2562             :      * Loop until no more truncating can be done.
    2563             :      */
    2564             :     do
    2565             :     {
    2566             :         PGRUsage    ru0;
    2567             : 
    2568         118 :         pg_rusage_init(&ru0);
    2569             : 
    2570             :         /*
    2571             :          * We need full exclusive lock on the relation in order to do
    2572             :          * truncation. If we can't get it, give up rather than waiting --- we
    2573             :          * don't want to block other backends, and we don't want to deadlock
    2574             :          * (which is quite possible considering we already hold a lower-grade
    2575             :          * lock).
    2576             :          */
    2577         118 :         vacrelstats->lock_waiter_detected = false;
    2578         118 :         lock_retry = 0;
    2579             :         while (true)
    2580             :         {
    2581         518 :             if (ConditionalLockRelation(onerel, AccessExclusiveLock))
    2582         114 :                 break;
    2583             : 
    2584             :             /*
    2585             :              * Check for interrupts while trying to (re-)acquire the exclusive
    2586             :              * lock.
    2587             :              */
    2588         404 :             CHECK_FOR_INTERRUPTS();
    2589             : 
    2590         404 :             if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
    2591             :                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
    2592             :             {
    2593             :                 /*
    2594             :                  * We failed to establish the lock in the specified number of
    2595             :                  * retries. This means we give up truncating.
    2596             :                  */
    2597           4 :                 vacrelstats->lock_waiter_detected = true;
    2598           4 :                 ereport(elevel,
    2599             :                         (errmsg("\"%s\": stopping truncate due to conflicting lock request",
    2600             :                                 vacrelstats->relname)));
    2601           4 :                 return;
    2602             :             }
    2603             : 
    2604         400 :             pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L);
    2605             :         }
    2606             : 
    2607             :         /*
    2608             :          * Now that we have exclusive lock, look to see if the rel has grown
    2609             :          * whilst we were vacuuming with non-exclusive lock.  If so, give up;
    2610             :          * the newly added pages presumably contain non-deletable tuples.
    2611             :          */
    2612         114 :         new_rel_pages = RelationGetNumberOfBlocks(onerel);
    2613         114 :         if (new_rel_pages != old_rel_pages)
    2614             :         {
    2615             :             /*
    2616             :              * Note: we intentionally don't update vacrelstats->rel_pages with
    2617             :              * the new rel size here.  If we did, it would amount to assuming
    2618             :              * that the new pages are empty, which is unlikely. Leaving the
    2619             :              * numbers alone amounts to assuming that the new pages have the
    2620             :              * same tuple density as existing ones, which is less unlikely.
    2621             :              */
    2622           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    2623           0 :             return;
    2624             :         }
    2625             : 
    2626             :         /*
    2627             :          * Scan backwards from the end to verify that the end pages actually
    2628             :          * contain no tuples.  This is *necessary*, not optional, because
    2629             :          * other backends could have added tuples to these pages whilst we
    2630             :          * were vacuuming.
    2631             :          */
    2632         114 :         new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
    2633         114 :         vacrelstats->blkno = new_rel_pages;
    2634             : 
    2635         114 :         if (new_rel_pages >= old_rel_pages)
    2636             :         {
    2637             :             /* can't do anything after all */
    2638           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    2639           0 :             return;
    2640             :         }
    2641             : 
    2642             :         /*
    2643             :          * Okay to truncate.
    2644             :          */
    2645         114 :         RelationTruncate(onerel, new_rel_pages);
    2646             : 
    2647             :         /*
    2648             :          * We can release the exclusive lock as soon as we have truncated.
    2649             :          * Other backends can't safely access the relation until they have
    2650             :          * processed the smgr invalidation that smgrtruncate sent out ... but
    2651             :          * that should happen as part of standard invalidation processing once
    2652             :          * they acquire lock on the relation.
    2653             :          */
    2654         114 :         UnlockRelation(onerel, AccessExclusiveLock);
    2655             : 
    2656             :         /*
    2657             :          * Update statistics.  Here, it *is* correct to adjust rel_pages
    2658             :          * without also touching reltuples, since the tuple count wasn't
    2659             :          * changed by the truncation.
    2660             :          */
    2661         114 :         vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
    2662         114 :         vacrelstats->rel_pages = new_rel_pages;
    2663             : 
    2664         114 :         ereport(elevel,
    2665             :                 (errmsg("\"%s\": truncated %u to %u pages",
    2666             :                         vacrelstats->relname,
    2667             :                         old_rel_pages, new_rel_pages),
    2668             :                  errdetail_internal("%s",
    2669             :                                     pg_rusage_show(&ru0))));
    2670         114 :         old_rel_pages = new_rel_pages;
    2671         116 :     } while (new_rel_pages > vacrelstats->nonempty_pages &&
    2672         114 :              vacrelstats->lock_waiter_detected);
    2673             : }
    2674             : 
    2675             : /*
    2676             :  * Rescan end pages to verify that they are (still) empty of tuples.
    2677             :  *
    2678             :  * Returns number of nondeletable pages (last nonempty page + 1).
    2679             :  */
    2680             : static BlockNumber
    2681         114 : count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
    2682             : {
    2683             :     BlockNumber blkno;
    2684             :     BlockNumber prefetchedUntil;
    2685             :     instr_time  starttime;
    2686             : 
    2687             :     /* Initialize the starttime if we check for conflicting lock requests */
    2688         114 :     INSTR_TIME_SET_CURRENT(starttime);
    2689             : 
    2690             :     /*
    2691             :      * Start checking blocks at what we believe relation end to be and move
    2692             :      * backwards.  (Strange coding of loop control is needed because blkno is
    2693             :      * unsigned.)  To make the scan faster, we prefetch a few blocks at a time
    2694             :      * in forward direction, so that OS-level readahead can kick in.
    2695             :      */
    2696         114 :     blkno = vacrelstats->rel_pages;
    2697             :     StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
    2698             :                      "prefetch size must be power of 2");
    2699         114 :     prefetchedUntil = InvalidBlockNumber;
    2700        1158 :     while (blkno > vacrelstats->nonempty_pages)
    2701             :     {
    2702             :         Buffer      buf;
    2703             :         Page        page;
    2704             :         OffsetNumber offnum,
    2705             :                     maxoff;
    2706             :         bool        hastup;
    2707             : 
    2708             :         /*
    2709             :          * Check if another process requests a lock on our relation. We are
    2710             :          * holding an AccessExclusiveLock here, so they will be waiting. We
    2711             :          * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
    2712             :          * only check if that interval has elapsed once every 32 blocks to
    2713             :          * keep the number of system calls and actual shared lock table
    2714             :          * lookups to a minimum.
    2715             :          */
    2716        1046 :         if ((blkno % 32) == 0)
    2717             :         {
    2718             :             instr_time  currenttime;
    2719             :             instr_time  elapsed;
    2720             : 
    2721          32 :             INSTR_TIME_SET_CURRENT(currenttime);
    2722          32 :             elapsed = currenttime;
    2723          32 :             INSTR_TIME_SUBTRACT(elapsed, starttime);
    2724          32 :             if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
    2725             :                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
    2726             :             {
    2727           0 :                 if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
    2728             :                 {
    2729           0 :                     ereport(elevel,
    2730             :                             (errmsg("\"%s\": suspending truncate due to conflicting lock request",
    2731             :                                     vacrelstats->relname)));
    2732             : 
    2733           0 :                     vacrelstats->lock_waiter_detected = true;
    2734           0 :                     return blkno;
    2735             :                 }
    2736           0 :                 starttime = currenttime;
    2737             :             }
    2738             :         }
    2739             : 
    2740             :         /*
    2741             :          * We don't insert a vacuum delay point here, because we have an
    2742             :          * exclusive lock on the table which we want to hold for as short a
    2743             :          * time as possible.  We still need to check for interrupts however.
    2744             :          */
    2745        1046 :         CHECK_FOR_INTERRUPTS();
    2746             : 
    2747        1046 :         blkno--;
    2748             : 
    2749             :         /* If we haven't prefetched this lot yet, do so now. */
    2750        1046 :         if (prefetchedUntil > blkno)
    2751             :         {
    2752             :             BlockNumber prefetchStart;
    2753             :             BlockNumber pblkno;
    2754             : 
    2755         138 :             prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
    2756        1760 :             for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
    2757             :             {
    2758        1622 :                 PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
    2759        1622 :                 CHECK_FOR_INTERRUPTS();
    2760             :             }
    2761         138 :             prefetchedUntil = prefetchStart;
    2762             :         }
    2763             : 
    2764        1046 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
    2765             :                                  RBM_NORMAL, vac_strategy);
    2766             : 
    2767             :         /* In this phase we only need shared access to the buffer */
    2768        1046 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
    2769             : 
    2770        1046 :         page = BufferGetPage(buf);
    2771             : 
    2772        1046 :         if (PageIsNew(page) || PageIsEmpty(page))
    2773             :         {
    2774           0 :             UnlockReleaseBuffer(buf);
    2775           0 :             continue;
    2776             :         }
    2777             : 
    2778        1046 :         hastup = false;
    2779        1046 :         maxoff = PageGetMaxOffsetNumber(page);
    2780      118858 :         for (offnum = FirstOffsetNumber;
    2781             :              offnum <= maxoff;
    2782      117812 :              offnum = OffsetNumberNext(offnum))
    2783             :         {
    2784             :             ItemId      itemid;
    2785             : 
    2786      117814 :             itemid = PageGetItemId(page, offnum);
    2787             : 
    2788             :             /*
    2789             :              * Note: any non-unused item should be taken as a reason to keep
    2790             :              * this page.  We formerly thought that DEAD tuples could be
    2791             :              * thrown away, but that's not so, because we'd not have cleaned
    2792             :              * out their index entries.
    2793             :              */
    2794      117814 :             if (ItemIdIsUsed(itemid))
    2795             :             {
    2796           2 :                 hastup = true;
    2797           2 :                 break;          /* can stop scanning */
    2798             :             }
    2799             :         }                       /* scan along page */
    2800             : 
    2801        1046 :         UnlockReleaseBuffer(buf);
    2802             : 
    2803             :         /* Done scanning if we found a tuple here */
    2804        1046 :         if (hastup)
    2805           2 :             return blkno + 1;
    2806             :     }
    2807             : 
    2808             :     /*
    2809             :      * If we fall out of the loop, all the previously-thought-to-be-empty
    2810             :      * pages still are; we need not bother to look at the last known-nonempty
    2811             :      * page.
    2812             :      */
    2813         112 :     return vacrelstats->nonempty_pages;
    2814             : }
    2815             : 
    2816             : /*
    2817             :  * Return the maximum number of dead tuples we can record.
    2818             :  */
    2819             : static long
    2820       43502 : compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
    2821             : {
    2822             :     long        maxtuples;
    2823       43502 :     int         vac_work_mem = IsAutoVacuumWorkerProcess() &&
    2824         200 :     autovacuum_work_mem != -1 ?
    2825       43702 :     autovacuum_work_mem : maintenance_work_mem;
    2826             : 
    2827       43502 :     if (useindex)
    2828             :     {
    2829       41338 :         maxtuples = MAXDEADTUPLES(vac_work_mem * 1024L);
    2830       41338 :         maxtuples = Min(maxtuples, INT_MAX);
    2831       41338 :         maxtuples = Min(maxtuples, MAXDEADTUPLES(MaxAllocSize));
    2832             : 
    2833             :         /* curious coding here to ensure the multiplication can't overflow */
    2834       41338 :         if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
    2835       41338 :             maxtuples = relblocks * LAZY_ALLOC_TUPLES;
    2836             : 
    2837             :         /* stay sane if small maintenance_work_mem */
    2838       41338 :         maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
    2839             :     }
    2840             :     else
    2841        2164 :         maxtuples = MaxHeapTuplesPerPage;
    2842             : 
    2843       43502 :     return maxtuples;
    2844             : }
    2845             : 
    2846             : /*
    2847             :  * lazy_space_alloc - space allocation decisions for lazy vacuum
    2848             :  *
    2849             :  * See the comments at the head of this file for rationale.
    2850             :  */
    2851             : static void
    2852       43490 : lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
    2853             : {
    2854       43490 :     LVDeadTuples *dead_tuples = NULL;
    2855             :     long        maxtuples;
    2856             : 
    2857       43490 :     maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
    2858             : 
    2859       43490 :     dead_tuples = (LVDeadTuples *) palloc(SizeOfDeadTuples(maxtuples));
    2860       43490 :     dead_tuples->num_tuples = 0;
    2861       43490 :     dead_tuples->max_tuples = (int) maxtuples;
    2862             : 
    2863       43490 :     vacrelstats->dead_tuples = dead_tuples;
    2864       43490 : }
    2865             : 
    2866             : /*
    2867             :  * lazy_record_dead_tuple - remember one deletable tuple
    2868             :  */
    2869             : static void
    2870     1157996 : lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
    2871             : {
    2872             :     /*
    2873             :      * The array shouldn't overflow under normal behavior, but perhaps it
    2874             :      * could if we are given a really small maintenance_work_mem. In that
    2875             :      * case, just forget the last few tuples (we'll get 'em next time).
    2876             :      */
    2877     1157996 :     if (dead_tuples->num_tuples < dead_tuples->max_tuples)
    2878             :     {
    2879     1157996 :         dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
    2880     1157996 :         dead_tuples->num_tuples++;
    2881     1157996 :         pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
    2882     1157996 :                                      dead_tuples->num_tuples);
    2883             :     }
    2884     1157996 : }
    2885             : 
    2886             : /*
    2887             :  *  lazy_tid_reaped() -- is a particular tid deletable?
    2888             :  *
    2889             :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2890             :  *
    2891             :  *      Assumes dead_tuples array is in sorted order.
    2892             :  */
    2893             : static bool
    2894    12794038 : lazy_tid_reaped(ItemPointer itemptr, void *state)
    2895             : {
    2896    12794038 :     LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
    2897             :     ItemPointer res;
    2898             : 
    2899    25588076 :     res = (ItemPointer) bsearch((void *) itemptr,
    2900    12794038 :                                 (void *) dead_tuples->itemptrs,
    2901    12794038 :                                 dead_tuples->num_tuples,
    2902             :                                 sizeof(ItemPointerData),
    2903             :                                 vac_cmp_itemptr);
    2904             : 
    2905    12794038 :     return (res != NULL);
    2906             : }
    2907             : 
    2908             : /*
    2909             :  * Comparator routines for use with qsort() and bsearch().
    2910             :  */
    2911             : static int
    2912    77355750 : vac_cmp_itemptr(const void *left, const void *right)
    2913             : {
    2914             :     BlockNumber lblk,
    2915             :                 rblk;
    2916             :     OffsetNumber loff,
    2917             :                 roff;
    2918             : 
    2919    77355750 :     lblk = ItemPointerGetBlockNumber((ItemPointer) left);
    2920    77355750 :     rblk = ItemPointerGetBlockNumber((ItemPointer) right);
    2921             : 
    2922    77355750 :     if (lblk < rblk)
    2923    45822838 :         return -1;
    2924    31532912 :     if (lblk > rblk)
    2925    18990686 :         return 1;
    2926             : 
    2927    12542226 :     loff = ItemPointerGetOffsetNumber((ItemPointer) left);
    2928    12542226 :     roff = ItemPointerGetOffsetNumber((ItemPointer) right);
    2929             : 
    2930    12542226 :     if (loff < roff)
    2931     5907144 :         return -1;
    2932     6635082 :     if (loff > roff)
    2933     4956646 :         return 1;
    2934             : 
    2935     1678436 :     return 0;
    2936             : }
    2937             : 
    2938             : /*
    2939             :  * Check if every tuple in the given page is visible to all current and future
    2940             :  * transactions. Also return the visibility_cutoff_xid which is the highest
    2941             :  * xmin amongst the visible tuples.  Set *all_frozen to true if every tuple
    2942             :  * on this page is frozen.
    2943             :  */
    2944             : static bool
    2945       20724 : heap_page_is_all_visible(Relation rel, Buffer buf,
    2946             :                          TransactionId *visibility_cutoff_xid,
    2947             :                          bool *all_frozen)
    2948             : {
    2949       20724 :     Page        page = BufferGetPage(buf);
    2950       20724 :     BlockNumber blockno = BufferGetBlockNumber(buf);
    2951             :     OffsetNumber offnum,
    2952             :                 maxoff;
    2953       20724 :     bool        all_visible = true;
    2954             : 
    2955       20724 :     *visibility_cutoff_xid = InvalidTransactionId;
    2956       20724 :     *all_frozen = true;
    2957             : 
    2958             :     /*
    2959             :      * This is a stripped down version of the line pointer scan in
    2960             :      * lazy_scan_heap(). So if you change anything here, also check that code.
    2961             :      */
    2962       20724 :     maxoff = PageGetMaxOffsetNumber(page);
    2963     1641282 :     for (offnum = FirstOffsetNumber;
    2964     1620868 :          offnum <= maxoff && all_visible;
    2965     1620558 :          offnum = OffsetNumberNext(offnum))
    2966             :     {
    2967             :         ItemId      itemid;
    2968             :         HeapTupleData tuple;
    2969             : 
    2970     1620558 :         itemid = PageGetItemId(page, offnum);
    2971             : 
    2972             :         /* Unused or redirect line pointers are of no interest */
    2973     1620558 :         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
    2974     1262180 :             continue;
    2975             : 
    2976      358378 :         ItemPointerSet(&(tuple.t_self), blockno, offnum);
    2977             : 
    2978             :         /*
    2979             :          * Dead line pointers can have index pointers pointing to them. So
    2980             :          * they can't be treated as visible
    2981             :          */
    2982      358378 :         if (ItemIdIsDead(itemid))
    2983             :         {
    2984           0 :             all_visible = false;
    2985           0 :             *all_frozen = false;
    2986           0 :             break;
    2987             :         }
    2988             : 
    2989             :         Assert(ItemIdIsNormal(itemid));
    2990             : 
    2991      358378 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2992      358378 :         tuple.t_len = ItemIdGetLength(itemid);
    2993      358378 :         tuple.t_tableOid = RelationGetRelid(rel);
    2994             : 
    2995      358378 :         switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
    2996             :         {
    2997      358136 :             case HEAPTUPLE_LIVE:
    2998             :                 {
    2999             :                     TransactionId xmin;
    3000             : 
    3001             :                     /* Check comments in lazy_scan_heap. */
    3002      358136 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    3003             :                     {
    3004           0 :                         all_visible = false;
    3005           0 :                         *all_frozen = false;
    3006           0 :                         break;
    3007             :                     }
    3008             : 
    3009             :                     /*
    3010             :                      * The inserter definitely committed. But is it old enough
    3011             :                      * that everyone sees it as committed?
    3012             :                      */
    3013      358136 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    3014      358136 :                     if (!TransactionIdPrecedes(xmin, OldestXmin))
    3015             :                     {
    3016          80 :                         all_visible = false;
    3017          80 :                         *all_frozen = false;
    3018          80 :                         break;
    3019             :                     }
    3020             : 
    3021             :                     /* Track newest xmin on page. */
    3022      358056 :                     if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
    3023       30056 :                         *visibility_cutoff_xid = xmin;
    3024             : 
    3025             :                     /* Check whether this tuple is already frozen or not */
    3026      614526 :                     if (all_visible && *all_frozen &&
    3027      256470 :                         heap_tuple_needs_eventual_freeze(tuple.t_data))
    3028        3906 :                         *all_frozen = false;
    3029             :                 }
    3030      358056 :                 break;
    3031             : 
    3032         242 :             case HEAPTUPLE_DEAD:
    3033             :             case HEAPTUPLE_RECENTLY_DEAD:
    3034             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    3035             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    3036             :                 {
    3037         242 :                     all_visible = false;
    3038         242 :                     *all_frozen = false;
    3039         242 :                     break;
    3040             :                 }
    3041           0 :             default:
    3042           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    3043             :                 break;
    3044             :         }
    3045             :     }                           /* scan along page */
    3046             : 
    3047       20724 :     return all_visible;
    3048             : }
    3049             : 
    3050             : /*
    3051             :  * Compute the number of parallel worker processes to request.  Both index
    3052             :  * vacuum and index cleanup can be executed with parallel workers.  The index
    3053             :  * is eligible for parallel vacuum iff its size is greater than
    3054             :  * min_parallel_index_scan_size as invoking workers for very small indexes
    3055             :  * can hurt performance.
    3056             :  *
    3057             :  * nrequested is the number of parallel workers that user requested.  If
    3058             :  * nrequested is 0, we compute the parallel degree based on nindexes, that is
    3059             :  * the number of indexes that support parallel vacuum.  This function also
    3060             :  * sets can_parallel_vacuum to remember indexes that participate in parallel
    3061             :  * vacuum.
    3062             :  */
    3063             : static int
    3064       18616 : compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested,
    3065             :                                 bool *can_parallel_vacuum)
    3066             : {
    3067       18616 :     int         nindexes_parallel = 0;
    3068       18616 :     int         nindexes_parallel_bulkdel = 0;
    3069       18616 :     int         nindexes_parallel_cleanup = 0;
    3070             :     int         parallel_workers;
    3071             :     int         i;
    3072             : 
    3073             :     /*
    3074             :      * We don't allow performing parallel operation in standalone backend or
    3075             :      * when parallelism is disabled.
    3076             :      */
    3077       18616 :     if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
    3078       17184 :         return 0;
    3079             : 
    3080             :     /*
    3081             :      * Compute the number of indexes that can participate in parallel vacuum.
    3082             :      */
    3083        4638 :     for (i = 0; i < nindexes; i++)
    3084             :     {
    3085        3206 :         uint8       vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
    3086             : 
    3087        3206 :         if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
    3088        3206 :             RelationGetNumberOfBlocks(Irel[i]) < min_parallel_index_scan_size)
    3089        3142 :             continue;
    3090             : 
    3091          64 :         can_parallel_vacuum[i] = true;
    3092             : 
    3093          64 :         if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
    3094          56 :             nindexes_parallel_bulkdel++;
    3095          64 :         if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
    3096          48 :             ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
    3097          52 :             nindexes_parallel_cleanup++;
    3098             :     }
    3099             : 
    3100        1432 :     nindexes_parallel = Max(nindexes_parallel_bulkdel,
    3101             :                             nindexes_parallel_cleanup);
    3102             : 
    3103             :     /* The leader process takes one index */
    3104        1432 :     nindexes_parallel--;
    3105             : 
    3106             :     /* No index supports parallel vacuum */
    3107        1432 :     if (nindexes_parallel <= 0)
    3108        1420 :         return 0;
    3109             : 
    3110             :     /* Compute the parallel degree */
    3111          12 :     parallel_workers = (nrequested > 0) ?
    3112          12 :         Min(nrequested, nindexes_parallel) : nindexes_parallel;
    3113             : 
    3114             :     /* Cap by max_parallel_maintenance_workers */
    3115          12 :     parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
    3116             : 
    3117          12 :     return parallel_workers;
    3118             : }
    3119             : 
    3120             : /*
    3121             :  * Initialize variables for shared index statistics, set NULL bitmap and the
    3122             :  * size of stats for each index.
    3123             :  */
    3124             : static void
    3125          12 : prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
    3126             :                          int nindexes)
    3127             : {
    3128             :     int         i;
    3129             : 
    3130             :     /* Currently, we don't support parallel vacuum for autovacuum */
    3131             :     Assert(!IsAutoVacuumWorkerProcess());
    3132             : 
    3133             :     /* Set NULL for all indexes */
    3134          12 :     memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
    3135             : 
    3136          68 :     for (i = 0; i < nindexes; i++)
    3137             :     {
    3138          56 :         if (!can_parallel_vacuum[i])
    3139           0 :             continue;
    3140             : 
    3141             :         /* Set NOT NULL as this index does support parallelism */
    3142          56 :         lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
    3143             :     }
    3144          12 : }
    3145             : 
    3146             : /*
    3147             :  * Update index statistics in pg_class if the statistics are accurate.
    3148             :  */
    3149             : static void
    3150       43502 : update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
    3151             :                         int nindexes)
    3152             : {
    3153             :     int         i;
    3154             : 
    3155             :     Assert(!IsInParallelMode());
    3156             : 
    3157      107416 :     for (i = 0; i < nindexes; i++)
    3158             :     {
    3159       63914 :         if (stats[i] == NULL || stats[i]->estimated_count)
    3160        2078 :             continue;
    3161             : 
    3162             :         /* Update index statistics */
    3163      123672 :         vac_update_relstats(Irel[i],
    3164       61836 :                             stats[i]->num_pages,
    3165       61836 :                             stats[i]->num_index_tuples,
    3166             :                             0,
    3167             :                             false,
    3168             :                             InvalidTransactionId,
    3169             :                             InvalidMultiXactId,
    3170             :                             false);
    3171       61836 :         pfree(stats[i]);
    3172             :     }
    3173       43502 : }
    3174             : 
    3175             : /*
    3176             :  * This function prepares and returns parallel vacuum state if we can launch
    3177             :  * even one worker.  This function is responsible for entering parallel mode,
    3178             :  * create a parallel context, and then initialize the DSM segment.
    3179             :  */
    3180             : static LVParallelState *
    3181       18616 : begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
    3182             :                       BlockNumber nblocks, int nindexes, int nrequested)
    3183             : {
    3184       18616 :     LVParallelState *lps = NULL;
    3185             :     ParallelContext *pcxt;
    3186             :     LVShared   *shared;
    3187             :     LVDeadTuples *dead_tuples;
    3188             :     BufferUsage *buffer_usage;
    3189             :     WalUsage   *wal_usage;
    3190             :     bool       *can_parallel_vacuum;
    3191             :     long        maxtuples;
    3192             :     char       *sharedquery;
    3193             :     Size        est_shared;
    3194             :     Size        est_deadtuples;
    3195       18616 :     int         nindexes_mwm = 0;
    3196       18616 :     int         parallel_workers = 0;
    3197             :     int         querylen;
    3198             :     int         i;
    3199             : 
    3200             :     /*
    3201             :      * A parallel vacuum must be requested and there must be indexes on the
    3202             :      * relation
    3203             :      */
    3204             :     Assert(nrequested >= 0);
    3205             :     Assert(nindexes > 0);
    3206             : 
    3207             :     /*
    3208             :      * Compute the number of parallel vacuum workers to launch
    3209             :      */
    3210       18616 :     can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
    3211       18616 :     parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
    3212             :                                                        nrequested,
    3213             :                                                        can_parallel_vacuum);
    3214             : 
    3215             :     /* Can't perform vacuum in parallel */
    3216       18616 :     if (parallel_workers <= 0)
    3217             :     {
    3218       18604 :         pfree(can_parallel_vacuum);
    3219       18604 :         return lps;
    3220             :     }
    3221             : 
    3222          12 :     lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
    3223             : 
    3224          12 :     EnterParallelMode();
    3225          12 :     pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
    3226             :                                  parallel_workers);
    3227             :     Assert(pcxt->nworkers > 0);
    3228          12 :     lps->pcxt = pcxt;
    3229             : 
    3230             :     /* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
    3231          12 :     est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
    3232          68 :     for (i = 0; i < nindexes; i++)
    3233             :     {
    3234          56 :         uint8       vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
    3235             : 
    3236             :         /*
    3237             :          * Cleanup option should be either disabled, always performing in
    3238             :          * parallel or conditionally performing in parallel.
    3239             :          */
    3240             :         Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
    3241             :                ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
    3242             :         Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
    3243             : 
    3244             :         /* Skip indexes that don't participate in parallel vacuum */
    3245          56 :         if (!can_parallel_vacuum[i])
    3246           0 :             continue;
    3247             : 
    3248          56 :         if (Irel[i]->rd_indam->amusemaintenanceworkmem)
    3249           8 :             nindexes_mwm++;
    3250             : 
    3251          56 :         est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
    3252             : 
    3253             :         /*
    3254             :          * Remember the number of indexes that support parallel operation for
    3255             :          * each phase.
    3256             :          */
    3257          56 :         if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
    3258          48 :             lps->nindexes_parallel_bulkdel++;
    3259          56 :         if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
    3260          16 :             lps->nindexes_parallel_cleanup++;
    3261          56 :         if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
    3262          32 :             lps->nindexes_parallel_condcleanup++;
    3263             :     }
    3264          12 :     shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
    3265          12 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    3266             : 
    3267             :     /* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
    3268          12 :     maxtuples = compute_max_dead_tuples(nblocks, true);
    3269          12 :     est_deadtuples = MAXALIGN(SizeOfDeadTuples(maxtuples));
    3270          12 :     shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
    3271          12 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    3272             : 
    3273             :     /*
    3274             :      * Estimate space for BufferUsage and WalUsage --
    3275             :      * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
    3276             :      *
    3277             :      * If there are no extensions loaded that care, we could skip this.  We
    3278             :      * have no way of knowing whether anyone's looking at pgBufferUsage or
    3279             :      * pgWalUsage, so do it unconditionally.
    3280             :      */
    3281          12 :     shm_toc_estimate_chunk(&pcxt->estimator,
    3282             :                            mul_size(sizeof(BufferUsage), pcxt->nworkers));
    3283          12 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    3284          12 :     shm_toc_estimate_chunk(&pcxt->estimator,
    3285             :                            mul_size(sizeof(WalUsage), pcxt->nworkers));
    3286          12 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    3287             : 
    3288             :     /* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
    3289          12 :     querylen = strlen(debug_query_string);
    3290          12 :     shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
    3291          12 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
    3292             : 
    3293          12 :     InitializeParallelDSM(pcxt);
    3294             : 
    3295             :     /* Prepare shared information */
    3296          12 :     shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
    3297         432 :     MemSet(shared, 0, est_shared);
    3298          12 :     shared->relid = relid;
    3299          12 :     shared->elevel = elevel;
    3300          12 :     shared->maintenance_work_mem_worker =
    3301             :         (nindexes_mwm > 0) ?
    3302          12 :         maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
    3303             :         maintenance_work_mem;
    3304             : 
    3305          12 :     pg_atomic_init_u32(&(shared->cost_balance), 0);
    3306          12 :     pg_atomic_init_u32(&(shared->active_nworkers), 0);
    3307          12 :     pg_atomic_init_u32(&(shared->idx), 0);
    3308          12 :     shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
    3309          12 :     prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
    3310             : 
    3311          12 :     shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
    3312          12 :     lps->lvshared = shared;
    3313             : 
    3314             :     /* Prepare the dead tuple space */
    3315          12 :     dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
    3316          12 :     dead_tuples->max_tuples = maxtuples;
    3317          12 :     dead_tuples->num_tuples = 0;
    3318          12 :     MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
    3319          12 :     shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
    3320          12 :     vacrelstats->dead_tuples = dead_tuples;
    3321             : 
    3322             :     /*
    3323             :      * Allocate space for each worker's BufferUsage and WalUsage; no need to
    3324             :      * initialize
    3325             :      */
    3326          12 :     buffer_usage = shm_toc_allocate(pcxt->toc,
    3327          12 :                                     mul_size(sizeof(BufferUsage), pcxt->nworkers));
    3328          12 :     shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, buffer_usage);
    3329          12 :     lps->buffer_usage = buffer_usage;
    3330          12 :     wal_usage = shm_toc_allocate(pcxt->toc,
    3331          12 :                                  mul_size(sizeof(WalUsage), pcxt->nworkers));
    3332          12 :     shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_WAL_USAGE, wal_usage);
    3333          12 :     lps->wal_usage = wal_usage;
    3334             : 
    3335             :     /* Store query string for workers */
    3336          12 :     sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
    3337          12 :     memcpy(sharedquery, debug_query_string, querylen + 1);
    3338          12 :     sharedquery[querylen] = '\0';
    3339          12 :     shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
    3340             : 
    3341          12 :     pfree(can_parallel_vacuum);
    3342          12 :     return lps;
    3343             : }
    3344             : 
    3345             : /*
    3346             :  * Destroy the parallel context, and end parallel mode.
    3347             :  *
    3348             :  * Since writes are not allowed during parallel mode, copy the
    3349             :  * updated index statistics from DSM into local memory and then later use that
    3350             :  * to update the index statistics.  One might think that we can exit from
    3351             :  * parallel mode, update the index statistics and then destroy parallel
    3352             :  * context, but that won't be safe (see ExitParallelMode).
    3353             :  */
    3354             : static void
    3355          12 : end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
    3356             :                     LVParallelState *lps, int nindexes)
    3357             : {
    3358             :     int         i;
    3359             : 
    3360             :     Assert(!IsParallelWorker());
    3361             : 
    3362             :     /* Copy the updated statistics */
    3363          68 :     for (i = 0; i < nindexes; i++)
    3364             :     {
    3365          56 :         LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
    3366             : 
    3367             :         /*
    3368             :          * Skip unused slot.  The statistics of this index are already stored
    3369             :          * in local memory.
    3370             :          */
    3371          56 :         if (indstats == NULL)
    3372           0 :             continue;
    3373             : 
    3374          56 :         if (indstats->updated)
    3375             :         {
    3376          52 :             stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
    3377          52 :             memcpy(stats[i], &(indstats->stats), sizeof(IndexBulkDeleteResult));
    3378             :         }
    3379             :         else
    3380           4 :             stats[i] = NULL;
    3381             :     }
    3382             : 
    3383          12 :     DestroyParallelContext(lps->pcxt);
    3384          12 :     ExitParallelMode();
    3385             : 
    3386             :     /* Deactivate parallel vacuum */
    3387          12 :     pfree(lps);
    3388          12 :     lps = NULL;
    3389          12 : }
    3390             : 
    3391             : /* Return the Nth index statistics or NULL */
    3392             : static LVSharedIndStats *
    3393         224 : get_indstats(LVShared *lvshared, int n)
    3394             : {
    3395             :     int         i;
    3396             :     char       *p;
    3397             : 
    3398         224 :     if (IndStatsIsNull(lvshared, n))
    3399           0 :         return NULL;
    3400             : 
    3401         224 :     p = (char *) GetSharedIndStats(lvshared);
    3402         720 :     for (i = 0; i < n; i++)
    3403             :     {
    3404         496 :         if (IndStatsIsNull(lvshared, i))
    3405           0 :             continue;
    3406             : 
    3407         496 :         p += sizeof(LVSharedIndStats);
    3408             :     }
    3409             : 
    3410         224 :     return (LVSharedIndStats *) p;
    3411             : }
    3412             : 
    3413             : /*
    3414             :  * Returns true, if the given index can't participate in parallel index vacuum
    3415             :  * or parallel index cleanup, false, otherwise.
    3416             :  */
    3417             : static bool
    3418         168 : skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
    3419             : {
    3420         168 :     uint8       vacoptions = indrel->rd_indam->amparallelvacuumoptions;
    3421             : 
    3422             :     /* first_time must be true only if for_cleanup is true */
    3423             :     Assert(lvshared->for_cleanup || !lvshared->first_time);
    3424             : 
    3425         168 :     if (lvshared->for_cleanup)
    3426             :     {
    3427             :         /* Skip, if the index does not support parallel cleanup */
    3428         104 :         if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
    3429          72 :             ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
    3430          16 :             return true;
    3431             : 
    3432             :         /*
    3433             :          * Skip, if the index supports parallel cleanup conditionally, but we
    3434             :          * have already processed the index (for bulkdelete).  See the
    3435             :          * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
    3436             :          * when indexes support parallel cleanup conditionally.
    3437             :          */
    3438          88 :         if (!lvshared->first_time &&
    3439          48 :             ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
    3440          32 :             return true;
    3441             :     }
    3442          64 :     else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
    3443             :     {
    3444             :         /* Skip if the index does not support parallel bulk deletion */
    3445           8 :         return true;
    3446             :     }
    3447             : 
    3448         112 :     return false;
    3449             : }
    3450             : 
    3451             : /*
    3452             :  * Perform work within a launched parallel process.
    3453             :  *
    3454             :  * Since parallel vacuum workers perform only index vacuum or index cleanup,
    3455             :  * we don't need to report progress information.
    3456             :  */
    3457             : void
    3458          24 : parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
    3459             : {
    3460             :     Relation    onerel;
    3461             :     Relation   *indrels;
    3462             :     LVShared   *lvshared;
    3463             :     LVDeadTuples *dead_tuples;
    3464             :     BufferUsage *buffer_usage;
    3465             :     WalUsage   *wal_usage;
    3466             :     int         nindexes;
    3467             :     char       *sharedquery;
    3468             :     IndexBulkDeleteResult **stats;
    3469             :     LVRelStats  vacrelstats;
    3470             :     ErrorContextCallback errcallback;
    3471             : 
    3472          24 :     lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED,
    3473             :                                            false);
    3474          24 :     elevel = lvshared->elevel;
    3475             : 
    3476          24 :     ereport(DEBUG1,
    3477             :             (errmsg("starting parallel vacuum worker for %s",
    3478             :                     lvshared->for_cleanup ? "cleanup" : "bulk delete")));
    3479             : 
    3480             :     /* Set debug_query_string for individual workers */
    3481          24 :     sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false);
    3482          24 :     debug_query_string = sharedquery;
    3483          24 :     pgstat_report_activity(STATE_RUNNING, debug_query_string);
    3484             : 
    3485             :     /*
    3486             :      * Open table.  The lock mode is the same as the leader process.  It's
    3487             :      * okay because the lock mode does not conflict among the parallel
    3488             :      * workers.
    3489             :      */
    3490          24 :     onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
    3491             : 
    3492             :     /*
    3493             :      * Open all indexes. indrels are sorted in order by OID, which should be
    3494             :      * matched to the leader's one.
    3495             :      */
    3496          24 :     vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
    3497             :     Assert(nindexes > 0);
    3498             : 
    3499             :     /* Set dead tuple space */
    3500          24 :     dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
    3501             :                                                   PARALLEL_VACUUM_KEY_DEAD_TUPLES,
    3502             :                                                   false);
    3503             : 
    3504             :     /* Set cost-based vacuum delay */
    3505          24 :     VacuumCostActive = (VacuumCostDelay > 0);
    3506          24 :     VacuumCostBalance = 0;
    3507          24 :     VacuumPageHit = 0;
    3508          24 :     VacuumPageMiss = 0;
    3509          24 :     VacuumPageDirty = 0;
    3510          24 :     VacuumCostBalanceLocal = 0;
    3511          24 :     VacuumSharedCostBalance = &(lvshared->cost_balance);
    3512          24 :     VacuumActiveNWorkers = &(lvshared->active_nworkers);
    3513             : 
    3514             :     stats = (IndexBulkDeleteResult **)
    3515          24 :         palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
    3516             : 
    3517          24 :     if (lvshared->maintenance_work_mem_worker > 0)
    3518          24 :         maintenance_work_mem = lvshared->maintenance_work_mem_worker;
    3519             : 
    3520             :     /*
    3521             :      * Initialize vacrelstats for use as error callback arg by parallel
    3522             :      * worker.
    3523             :      */
    3524          24 :     vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(onerel));
    3525          24 :     vacrelstats.relname = pstrdup(RelationGetRelationName(onerel));
    3526          24 :     vacrelstats.indname = NULL;
    3527          24 :     vacrelstats.phase = VACUUM_ERRCB_PHASE_UNKNOWN; /* Not yet processing */
    3528             : 
    3529             :     /* Setup error traceback support for ereport() */
    3530          24 :     errcallback.callback = vacuum_error_callback;
    3531          24 :     errcallback.arg = &vacrelstats;
    3532          24 :     errcallback.previous = error_context_stack;
    3533          24 :     error_context_stack = &errcallback;
    3534             : 
    3535             :     /* Prepare to track buffer usage during parallel execution */
    3536          24 :     InstrStartParallelQuery();
    3537             : 
    3538             :     /* Process indexes to perform vacuum/cleanup */
    3539          24 :     parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes,
    3540             :                           &vacrelstats);
    3541             : 
    3542             :     /* Report buffer/WAL usage during parallel execution */
    3543          24 :     buffer_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_BUFFER_USAGE, false);
    3544          24 :     wal_usage = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_WAL_USAGE, false);
    3545          24 :     InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
    3546          24 :                           &wal_usage[ParallelWorkerNumber]);
    3547             : 
    3548             :     /* Pop the error context stack */
    3549          24 :     error_context_stack = errcallback.previous;
    3550             : 
    3551          24 :     vac_close_indexes(nindexes, indrels, RowExclusiveLock);
    3552          24 :     table_close(onerel, ShareUpdateExclusiveLock);
    3553          24 :     pfree(stats);
    3554          24 : }
    3555             : 
    3556             : /*
    3557             :  * Error context callback for errors occurring during vacuum.
    3558             :  */
    3559             : static void
    3560          34 : vacuum_error_callback(void *arg)
    3561             : {
    3562          34 :     LVRelStats *errinfo = arg;
    3563             : 
    3564          34 :     switch (errinfo->phase)
    3565             :     {
    3566          12 :         case VACUUM_ERRCB_PHASE_SCAN_HEAP:
    3567          12 :             if (BlockNumberIsValid(errinfo->blkno))
    3568          12 :                 errcontext("while scanning block %u of relation \"%s.%s\"",
    3569             :                            errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3570          12 :             break;
    3571             : 
    3572           0 :         case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
    3573           0 :             if (BlockNumberIsValid(errinfo->blkno))
    3574           0 :                 errcontext("while vacuuming block %u of relation \"%s.%s\"",
    3575             :                            errinfo->blkno, errinfo->relnamespace, errinfo->relname);
    3576           0 :             break;
    3577             : 
    3578           0 :         case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
    3579           0 :             errcontext("while vacuuming index \"%s\" of relation \"%s.%s\"",
    3580             :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3581           0 :             break;
    3582             : 
    3583           0 :         case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
    3584           0 :             errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
    3585             :                        errinfo->indname, errinfo->relnamespace, errinfo->relname);
    3586           0 :             break;
    3587             : 
    3588           6 :         case VACUUM_ERRCB_PHASE_TRUNCATE:
    3589           6 :             if (BlockNumberIsValid(errinfo->blkno))
    3590           6 :                 errcontext("while truncating relation \"%s.%s\" to %u blocks",
    3591             :                            errinfo->relnamespace, errinfo->relname, errinfo->blkno);
    3592           6 :             break;
    3593             : 
    3594          16 :         case VACUUM_ERRCB_PHASE_UNKNOWN:
    3595             :         default:
    3596          16 :             return;             /* do nothing; the errinfo may not be
    3597             :                                  * initialized */
    3598             :     }
    3599             : }
    3600             : 
    3601             : /* Update vacuum error callback for the current phase, block, and index. */
    3602             : static void
    3603      384898 : update_vacuum_error_info(LVRelStats *errinfo, int phase, BlockNumber blkno,
    3604             :                          char *indname)
    3605             : {
    3606      384898 :     errinfo->blkno = blkno;
    3607      384898 :     errinfo->phase = phase;
    3608             : 
    3609             :     /* Free index name from any previous phase */
    3610      384898 :     if (errinfo->indname)
    3611       67886 :         pfree(errinfo->indname);
    3612             : 
    3613             :     /* For index phases, save the name of the current index for the callback */
    3614      384898 :     errinfo->indname = indname ? pstrdup(indname) : NULL;
    3615      384898 : }

Generated by: LCOV version 1.13