LCOV - code coverage report
Current view: top level - src/backend/access/heap - vacuumlazy.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 557 650 85.7 %
Date: 2019-06-19 14:06:47 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuumlazy.c
       4             :  *    Concurrent ("lazy") vacuuming.
       5             :  *
       6             :  *
       7             :  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
       8             :  * TIDs.  We want to ensure we can vacuum even the very largest relations with
       9             :  * finite memory space usage.  To do that, we set upper bounds on the number of
      10             :  * tuples we will keep track of at once.
      11             :  *
      12             :  * We are willing to use at most maintenance_work_mem (or perhaps
      13             :  * autovacuum_work_mem) memory space to keep track of dead tuples.  We
      14             :  * initially allocate an array of TIDs of that size, with an upper limit that
      15             :  * depends on table size (this limit ensures we don't allocate a huge area
      16             :  * uselessly for vacuuming small tables).  If the array threatens to overflow,
      17             :  * we suspend the heap scan phase and perform a pass of index cleanup and page
      18             :  * compaction, then resume the heap scan with an empty TID array.
      19             :  *
      20             :  * If we're processing a table with no indexes, we can just vacuum each page
      21             :  * as we go; there's no need to save up multiple tuples to minimize the number
      22             :  * of index scans performed.  So we don't use maintenance_work_mem memory for
      23             :  * the TID array, just enough to hold as many heap tuples as fit on one page.
      24             :  *
      25             :  *
      26             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/backend/access/heap/vacuumlazy.c
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres.h"
      36             : 
      37             : #include <math.h>
      38             : 
      39             : #include "access/genam.h"
      40             : #include "access/heapam.h"
      41             : #include "access/heapam_xlog.h"
      42             : #include "access/htup_details.h"
      43             : #include "access/multixact.h"
      44             : #include "access/transam.h"
      45             : #include "access/visibilitymap.h"
      46             : #include "access/xlog.h"
      47             : #include "catalog/storage.h"
      48             : #include "commands/dbcommands.h"
      49             : #include "commands/progress.h"
      50             : #include "commands/vacuum.h"
      51             : #include "miscadmin.h"
      52             : #include "pgstat.h"
      53             : #include "portability/instr_time.h"
      54             : #include "postmaster/autovacuum.h"
      55             : #include "storage/bufmgr.h"
      56             : #include "storage/freespace.h"
      57             : #include "storage/lmgr.h"
      58             : #include "utils/lsyscache.h"
      59             : #include "utils/memutils.h"
      60             : #include "utils/pg_rusage.h"
      61             : #include "utils/timestamp.h"
      62             : 
      63             : 
      64             : /*
      65             :  * Space/time tradeoff parameters: do these need to be user-tunable?
      66             :  *
      67             :  * To consider truncating the relation, we want there to be at least
      68             :  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
      69             :  * is less) potentially-freeable pages.
      70             :  */
      71             : #define REL_TRUNCATE_MINIMUM    1000
      72             : #define REL_TRUNCATE_FRACTION   16
      73             : 
      74             : /*
      75             :  * Timing parameters for truncate locking heuristics.
      76             :  *
      77             :  * These were not exposed as user tunable GUC values because it didn't seem
      78             :  * that the potential for improvement was great enough to merit the cost of
      79             :  * supporting them.
      80             :  */
      81             : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL     20  /* ms */
      82             : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL      50  /* ms */
      83             : #define VACUUM_TRUNCATE_LOCK_TIMEOUT            5000    /* ms */
      84             : 
      85             : /*
      86             :  * When a table has no indexes, vacuum the FSM after every 8GB, approximately
      87             :  * (it won't be exact because we only vacuum FSM after processing a heap page
      88             :  * that has some removable tuples).  When there are indexes, this is ignored,
      89             :  * and we vacuum FSM after each index/heap cleaning pass.
      90             :  */
      91             : #define VACUUM_FSM_EVERY_PAGES \
      92             :     ((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
      93             : 
      94             : /*
      95             :  * Guesstimation of number of dead tuples per page.  This is used to
      96             :  * provide an upper limit to memory allocated when vacuuming small
      97             :  * tables.
      98             :  */
      99             : #define LAZY_ALLOC_TUPLES       MaxHeapTuplesPerPage
     100             : 
     101             : /*
     102             :  * Before we consider skipping a page that's marked as clean in
     103             :  * visibility map, we must've seen at least this many clean pages.
     104             :  */
     105             : #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
     106             : 
     107             : /*
     108             :  * Size of the prefetch window for lazy vacuum backwards truncation scan.
     109             :  * Needs to be a power of 2.
     110             :  */
     111             : #define PREFETCH_SIZE           ((BlockNumber) 32)
     112             : 
     113             : typedef struct LVRelStats
     114             : {
     115             :     /* useindex = true means two-pass strategy; false means one-pass */
     116             :     bool        useindex;
     117             :     /* Overall statistics about rel */
     118             :     BlockNumber old_rel_pages;  /* previous value of pg_class.relpages */
     119             :     BlockNumber rel_pages;      /* total number of pages */
     120             :     BlockNumber scanned_pages;  /* number of pages we examined */
     121             :     BlockNumber pinskipped_pages;   /* # of pages we skipped due to a pin */
     122             :     BlockNumber frozenskipped_pages;    /* # of frozen pages we skipped */
     123             :     BlockNumber tupcount_pages; /* pages whose tuples we counted */
     124             :     double      old_live_tuples;    /* previous value of pg_class.reltuples */
     125             :     double      new_rel_tuples; /* new estimated total # of tuples */
     126             :     double      new_live_tuples;    /* new estimated total # of live tuples */
     127             :     double      new_dead_tuples;    /* new estimated total # of dead tuples */
     128             :     BlockNumber pages_removed;
     129             :     double      tuples_deleted;
     130             :     BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
     131             :     /* List of TIDs of tuples we intend to delete */
     132             :     /* NB: this list is ordered by TID address */
     133             :     int         num_dead_tuples;    /* current # of entries */
     134             :     int         max_dead_tuples;    /* # slots allocated in array */
     135             :     ItemPointer dead_tuples;    /* array of ItemPointerData */
     136             :     int         num_index_scans;
     137             :     TransactionId latestRemovedXid;
     138             :     bool        lock_waiter_detected;
     139             : } LVRelStats;
     140             : 
     141             : 
     142             : /* A few variables that don't seem worth passing around as parameters */
     143             : static int  elevel = -1;
     144             : 
     145             : static TransactionId OldestXmin;
     146             : static TransactionId FreezeLimit;
     147             : static MultiXactId MultiXactCutoff;
     148             : 
     149             : static BufferAccessStrategy vac_strategy;
     150             : 
     151             : 
     152             : /* non-export function prototypes */
     153             : static void lazy_scan_heap(Relation onerel, VacuumParams *params,
     154             :                            LVRelStats *vacrelstats, Relation *Irel, int nindexes,
     155             :                            bool aggressive);
     156             : static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
     157             : static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
     158             : static void lazy_vacuum_index(Relation indrel,
     159             :                               IndexBulkDeleteResult **stats,
     160             :                               LVRelStats *vacrelstats);
     161             : static void lazy_cleanup_index(Relation indrel,
     162             :                                IndexBulkDeleteResult *stats,
     163             :                                LVRelStats *vacrelstats);
     164             : static int  lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
     165             :                              int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
     166             : static bool should_attempt_truncation(VacuumParams *params,
     167             :                                       LVRelStats *vacrelstats);
     168             : static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
     169             : static BlockNumber count_nondeletable_pages(Relation onerel,
     170             :                                             LVRelStats *vacrelstats);
     171             : static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
     172             : static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
     173             :                                    ItemPointer itemptr);
     174             : static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
     175             : static int  vac_cmp_itemptr(const void *left, const void *right);
     176             : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
     177             :                                      TransactionId *visibility_cutoff_xid, bool *all_frozen);
     178             : 
     179             : 
     180             : /*
     181             :  *  heap_vacuum_rel() -- perform VACUUM for one heap relation
     182             :  *
     183             :  *      This routine vacuums a single heap, cleans out its indexes, and
     184             :  *      updates its relpages and reltuples statistics.
     185             :  *
     186             :  *      At entry, we have already established a transaction and opened
     187             :  *      and locked the relation.
     188             :  */
     189             : void
     190       41420 : heap_vacuum_rel(Relation onerel, VacuumParams *params,
     191             :                 BufferAccessStrategy bstrategy)
     192             : {
     193             :     LVRelStats *vacrelstats;
     194             :     Relation   *Irel;
     195             :     int         nindexes;
     196             :     PGRUsage    ru0;
     197       41420 :     TimestampTz starttime = 0;
     198             :     long        secs;
     199             :     int         usecs;
     200             :     double      read_rate,
     201             :                 write_rate;
     202             :     bool        aggressive;     /* should we scan all unfrozen pages? */
     203             :     bool        scanned_all_unfrozen;   /* actually scanned all such pages? */
     204             :     TransactionId xidFullScanLimit;
     205             :     MultiXactId mxactFullScanLimit;
     206             :     BlockNumber new_rel_pages;
     207             :     BlockNumber new_rel_allvisible;
     208             :     double      new_live_tuples;
     209             :     TransactionId new_frozen_xid;
     210             :     MultiXactId new_min_multi;
     211             : 
     212             :     Assert(params != NULL);
     213             :     Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT);
     214             :     Assert(params->truncate != VACOPT_TERNARY_DEFAULT);
     215             : 
     216             :     /* not every AM requires these to be valid, but heap does */
     217             :     Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
     218             :     Assert(MultiXactIdIsValid(onerel->rd_rel->relminmxid));
     219             : 
     220             :     /* measure elapsed time iff autovacuum logging requires it */
     221       41420 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     222             :     {
     223         112 :         pg_rusage_init(&ru0);
     224         112 :         starttime = GetCurrentTimestamp();
     225             :     }
     226             : 
     227       41420 :     if (params->options & VACOPT_VERBOSE)
     228          10 :         elevel = INFO;
     229             :     else
     230       41410 :         elevel = DEBUG2;
     231             : 
     232       41420 :     pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
     233             :                                   RelationGetRelid(onerel));
     234             : 
     235       41420 :     vac_strategy = bstrategy;
     236             : 
     237       41420 :     vacuum_set_xid_limits(onerel,
     238             :                           params->freeze_min_age,
     239             :                           params->freeze_table_age,
     240             :                           params->multixact_freeze_min_age,
     241             :                           params->multixact_freeze_table_age,
     242             :                           &OldestXmin, &FreezeLimit, &xidFullScanLimit,
     243             :                           &MultiXactCutoff, &mxactFullScanLimit);
     244             : 
     245             :     /*
     246             :      * We request an aggressive scan if the table's frozen Xid is now older
     247             :      * than or equal to the requested Xid full-table scan limit; or if the
     248             :      * table's minimum MultiXactId is older than or equal to the requested
     249             :      * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
     250             :      */
     251       41420 :     aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
     252             :                                                xidFullScanLimit);
     253       41420 :     aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
     254             :                                               mxactFullScanLimit);
     255       41420 :     if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
     256         228 :         aggressive = true;
     257             : 
     258             :     /*
     259             :      * Normally the relfrozenxid for an anti-wraparound vacuum will be old
     260             :      * enough to force an aggressive vacuum.  However, a concurrent vacuum
     261             :      * might have already done this work that the relfrozenxid in relcache has
     262             :      * been updated.  If that happens this vacuum is redundant, so skip it.
     263             :      */
     264       41420 :     if (params->is_wraparound && !aggressive)
     265             :     {
     266           0 :         ereport(DEBUG1,
     267             :                 (errmsg("skipping redundant vacuum to prevent wraparound of table \"%s.%s.%s\"",
     268             :                         get_database_name(MyDatabaseId),
     269             :                         get_namespace_name(RelationGetNamespace(onerel)),
     270             :                         RelationGetRelationName(onerel))));
     271           0 :         pgstat_progress_end_command();
     272           0 :         return;
     273             :     }
     274             : 
     275       41420 :     vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
     276             : 
     277       41420 :     vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
     278       41420 :     vacrelstats->old_live_tuples = onerel->rd_rel->reltuples;
     279       41420 :     vacrelstats->num_index_scans = 0;
     280       41420 :     vacrelstats->pages_removed = 0;
     281       41420 :     vacrelstats->lock_waiter_detected = false;
     282             : 
     283             :     /* Open all indexes of the relation */
     284       41420 :     vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
     285       79866 :     vacrelstats->useindex = (nindexes > 0 &&
     286       38446 :                              params->index_cleanup == VACOPT_TERNARY_ENABLED);
     287             : 
     288             :     /* Do the vacuuming */
     289       41420 :     lazy_scan_heap(onerel, params, vacrelstats, Irel, nindexes, aggressive);
     290             : 
     291             :     /* Done with indexes */
     292       41420 :     vac_close_indexes(nindexes, Irel, NoLock);
     293             : 
     294             :     /*
     295             :      * Compute whether we actually scanned the all unfrozen pages. If we did,
     296             :      * we can adjust relfrozenxid and relminmxid.
     297             :      *
     298             :      * NB: We need to check this before truncating the relation, because that
     299             :      * will change ->rel_pages.
     300             :      */
     301       82840 :     if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
     302       41420 :         < vacrelstats->rel_pages)
     303             :     {
     304             :         Assert(!aggressive);
     305          10 :         scanned_all_unfrozen = false;
     306             :     }
     307             :     else
     308       41410 :         scanned_all_unfrozen = true;
     309             : 
     310             :     /*
     311             :      * Optionally truncate the relation.
     312             :      */
     313       41420 :     if (should_attempt_truncation(params, vacrelstats))
     314         132 :         lazy_truncate_heap(onerel, vacrelstats);
     315             : 
     316             :     /* Report that we are now doing final cleanup */
     317       41420 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     318             :                                  PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
     319             : 
     320             :     /*
     321             :      * Update statistics in pg_class.
     322             :      *
     323             :      * A corner case here is that if we scanned no pages at all because every
     324             :      * page is all-visible, we should not update relpages/reltuples, because
     325             :      * we have no new information to contribute.  In particular this keeps us
     326             :      * from replacing relpages=reltuples=0 (which means "unknown tuple
     327             :      * density") with nonzero relpages and reltuples=0 (which means "zero
     328             :      * tuple density") unless there's some actual evidence for the latter.
     329             :      *
     330             :      * It's important that we use tupcount_pages and not scanned_pages for the
     331             :      * check described above; scanned_pages counts pages where we could not
     332             :      * get cleanup lock, and which were processed only for frozenxid purposes.
     333             :      *
     334             :      * We do update relallvisible even in the corner case, since if the table
     335             :      * is all-visible we'd definitely like to know that.  But clamp the value
     336             :      * to be not more than what we're setting relpages to.
     337             :      *
     338             :      * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
     339             :      * since then we don't know for certain that all tuples have a newer xmin.
     340             :      */
     341       41420 :     new_rel_pages = vacrelstats->rel_pages;
     342       41420 :     new_live_tuples = vacrelstats->new_live_tuples;
     343       41420 :     if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
     344             :     {
     345           2 :         new_rel_pages = vacrelstats->old_rel_pages;
     346           2 :         new_live_tuples = vacrelstats->old_live_tuples;
     347             :     }
     348             : 
     349       41420 :     visibilitymap_count(onerel, &new_rel_allvisible, NULL);
     350       41420 :     if (new_rel_allvisible > new_rel_pages)
     351           0 :         new_rel_allvisible = new_rel_pages;
     352             : 
     353       41420 :     new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
     354       41420 :     new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
     355             : 
     356       41420 :     vac_update_relstats(onerel,
     357             :                         new_rel_pages,
     358             :                         new_live_tuples,
     359             :                         new_rel_allvisible,
     360             :                         nindexes > 0,
     361             :                         new_frozen_xid,
     362             :                         new_min_multi,
     363             :                         false);
     364             : 
     365             :     /* report results to the stats collector, too */
     366       82840 :     pgstat_report_vacuum(RelationGetRelid(onerel),
     367       41420 :                          onerel->rd_rel->relisshared,
     368             :                          new_live_tuples,
     369       41420 :                          vacrelstats->new_dead_tuples);
     370       41420 :     pgstat_progress_end_command();
     371             : 
     372             :     /* and log the action if appropriate */
     373       41420 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     374             :     {
     375         112 :         TimestampTz endtime = GetCurrentTimestamp();
     376             : 
     377         112 :         if (params->log_min_duration == 0 ||
     378           0 :             TimestampDifferenceExceeds(starttime, endtime,
     379             :                                        params->log_min_duration))
     380             :         {
     381             :             StringInfoData buf;
     382             :             char       *msgfmt;
     383             : 
     384         112 :             TimestampDifference(starttime, endtime, &secs, &usecs);
     385             : 
     386         112 :             read_rate = 0;
     387         112 :             write_rate = 0;
     388         112 :             if ((secs > 0) || (usecs > 0))
     389             :             {
     390         224 :                 read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
     391         112 :                     (secs + usecs / 1000000.0);
     392         224 :                 write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
     393         112 :                     (secs + usecs / 1000000.0);
     394             :             }
     395             : 
     396             :             /*
     397             :              * This is pretty messy, but we split it up so that we can skip
     398             :              * emitting individual parts of the message when not applicable.
     399             :              */
     400         112 :             initStringInfo(&buf);
     401         112 :             if (params->is_wraparound)
     402             :             {
     403             :                 /* an anti-wraparound vacuum has to be aggressive */
     404             :                 Assert(aggressive);
     405           0 :                 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
     406             :             }
     407             :             else
     408             :             {
     409         112 :                 if (aggressive)
     410           0 :                     msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
     411             :                 else
     412         112 :                     msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
     413             :             }
     414         224 :             appendStringInfo(&buf, msgfmt,
     415             :                              get_database_name(MyDatabaseId),
     416         112 :                              get_namespace_name(RelationGetNamespace(onerel)),
     417         112 :                              RelationGetRelationName(onerel),
     418             :                              vacrelstats->num_index_scans);
     419         112 :             appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
     420             :                              vacrelstats->pages_removed,
     421             :                              vacrelstats->rel_pages,
     422             :                              vacrelstats->pinskipped_pages,
     423             :                              vacrelstats->frozenskipped_pages);
     424         224 :             appendStringInfo(&buf,
     425         112 :                              _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
     426             :                              vacrelstats->tuples_deleted,
     427             :                              vacrelstats->new_rel_tuples,
     428             :                              vacrelstats->new_dead_tuples,
     429             :                              OldestXmin);
     430         224 :             appendStringInfo(&buf,
     431         112 :                              _("buffer usage: %d hits, %d misses, %d dirtied\n"),
     432             :                              VacuumPageHit,
     433             :                              VacuumPageMiss,
     434             :                              VacuumPageDirty);
     435         112 :             appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     436             :                              read_rate, write_rate);
     437         112 :             appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
     438             : 
     439         112 :             ereport(LOG,
     440             :                     (errmsg_internal("%s", buf.data)));
     441         112 :             pfree(buf.data);
     442             :         }
     443             :     }
     444             : }
     445             : 
     446             : /*
     447             :  * For Hot Standby we need to know the highest transaction id that will
     448             :  * be removed by any change. VACUUM proceeds in a number of passes so
     449             :  * we need to consider how each pass operates. The first phase runs
     450             :  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
     451             :  * progresses - these will have a latestRemovedXid on each record.
     452             :  * In some cases this removes all of the tuples to be removed, though
     453             :  * often we have dead tuples with index pointers so we must remember them
     454             :  * for removal in phase 3. Index records for those rows are removed
     455             :  * in phase 2 and index blocks do not have MVCC information attached.
     456             :  * So before we can allow removal of any index tuples we need to issue
     457             :  * a WAL record containing the latestRemovedXid of rows that will be
     458             :  * removed in phase three. This allows recovery queries to block at the
     459             :  * correct place, i.e. before phase two, rather than during phase three
     460             :  * which would be after the rows have become inaccessible.
     461             :  */
     462             : static void
     463        2338 : vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
     464             : {
     465             :     /*
     466             :      * Skip this for relations for which no WAL is to be written, or if we're
     467             :      * not trying to support archive recovery.
     468             :      */
     469        2338 :     if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
     470          26 :         return;
     471             : 
     472             :     /*
     473             :      * No need to write the record at all unless it contains a valid value
     474             :      */
     475        2312 :     if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
     476        1318 :         (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
     477             : }
     478             : 
     479             : /*
     480             :  *  lazy_scan_heap() -- scan an open heap relation
     481             :  *
     482             :  *      This routine prunes each page in the heap, which will among other
     483             :  *      things truncate dead tuples to dead line pointers, defragment the
     484             :  *      page, and set commit status bits (see heap_page_prune).  It also builds
     485             :  *      lists of dead tuples and pages with free space, calculates statistics
     486             :  *      on the number of live tuples in the heap, and marks pages as
     487             :  *      all-visible if appropriate.  When done, or when we run low on space for
     488             :  *      dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
     489             :  *      to reclaim dead line pointers.
     490             :  *
     491             :  *      If there are no indexes then we can reclaim line pointers on the fly;
     492             :  *      dead line pointers need only be retained until all index pointers that
     493             :  *      reference them have been killed.
     494             :  */
     495             : static void
     496       41420 : lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
     497             :                Relation *Irel, int nindexes, bool aggressive)
     498             : {
     499             :     BlockNumber nblocks,
     500             :                 blkno;
     501             :     HeapTupleData tuple;
     502             :     char       *relname;
     503       41420 :     TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid;
     504       41420 :     TransactionId relminmxid = onerel->rd_rel->relminmxid;
     505             :     BlockNumber empty_pages,
     506             :                 vacuumed_pages,
     507             :                 next_fsm_block_to_vacuum;
     508             :     double      num_tuples,     /* total number of nonremovable tuples */
     509             :                 live_tuples,    /* live tuples (reltuples estimate) */
     510             :                 tups_vacuumed,  /* tuples cleaned up by vacuum */
     511             :                 nkeep,          /* dead-but-not-removable tuples */
     512             :                 nunused;        /* unused line pointers */
     513             :     IndexBulkDeleteResult **indstats;
     514             :     int         i;
     515             :     PGRUsage    ru0;
     516       41420 :     Buffer      vmbuffer = InvalidBuffer;
     517             :     BlockNumber next_unskippable_block;
     518             :     bool        skipping_blocks;
     519             :     xl_heap_freeze_tuple *frozen;
     520             :     StringInfoData buf;
     521       41420 :     const int   initprog_index[] = {
     522             :         PROGRESS_VACUUM_PHASE,
     523             :         PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
     524             :         PROGRESS_VACUUM_MAX_DEAD_TUPLES
     525             :     };
     526             :     int64       initprog_val[3];
     527             : 
     528       41420 :     pg_rusage_init(&ru0);
     529             : 
     530       41420 :     relname = RelationGetRelationName(onerel);
     531       41420 :     if (aggressive)
     532       36580 :         ereport(elevel,
     533             :                 (errmsg("aggressively vacuuming \"%s.%s\"",
     534             :                         get_namespace_name(RelationGetNamespace(onerel)),
     535             :                         relname)));
     536             :     else
     537        4840 :         ereport(elevel,
     538             :                 (errmsg("vacuuming \"%s.%s\"",
     539             :                         get_namespace_name(RelationGetNamespace(onerel)),
     540             :                         relname)));
     541             : 
     542       41420 :     empty_pages = vacuumed_pages = 0;
     543       41420 :     next_fsm_block_to_vacuum = (BlockNumber) 0;
     544       41420 :     num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
     545             : 
     546       41420 :     indstats = (IndexBulkDeleteResult **)
     547       41420 :         palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
     548             : 
     549       41420 :     nblocks = RelationGetNumberOfBlocks(onerel);
     550       41420 :     vacrelstats->rel_pages = nblocks;
     551       41420 :     vacrelstats->scanned_pages = 0;
     552       41420 :     vacrelstats->tupcount_pages = 0;
     553       41420 :     vacrelstats->nonempty_pages = 0;
     554       41420 :     vacrelstats->latestRemovedXid = InvalidTransactionId;
     555             : 
     556       41420 :     lazy_space_alloc(vacrelstats, nblocks);
     557       41420 :     frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
     558             : 
     559             :     /* Report that we're scanning the heap, advertising total # of blocks */
     560       41420 :     initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
     561       41420 :     initprog_val[1] = nblocks;
     562       41420 :     initprog_val[2] = vacrelstats->max_dead_tuples;
     563       41420 :     pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
     564             : 
     565             :     /*
     566             :      * Except when aggressive is set, we want to skip pages that are
     567             :      * all-visible according to the visibility map, but only when we can skip
     568             :      * at least SKIP_PAGES_THRESHOLD consecutive pages.  Since we're reading
     569             :      * sequentially, the OS should be doing readahead for us, so there's no
     570             :      * gain in skipping a page now and then; that's likely to disable
     571             :      * readahead and so be counterproductive. Also, skipping even a single
     572             :      * page means that we can't update relfrozenxid, so we only want to do it
     573             :      * if we can skip a goodly number of pages.
     574             :      *
     575             :      * When aggressive is set, we can't skip pages just because they are
     576             :      * all-visible, but we can still skip pages that are all-frozen, since
     577             :      * such pages do not need freezing and do not affect the value that we can
     578             :      * safely set for relfrozenxid or relminmxid.
     579             :      *
     580             :      * Before entering the main loop, establish the invariant that
     581             :      * next_unskippable_block is the next block number >= blkno that we can't
     582             :      * skip based on the visibility map, either all-visible for a regular scan
     583             :      * or all-frozen for an aggressive scan.  We set it to nblocks if there's
     584             :      * no such block.  We also set up the skipping_blocks flag correctly at
     585             :      * this stage.
     586             :      *
     587             :      * Note: The value returned by visibilitymap_get_status could be slightly
     588             :      * out-of-date, since we make this test before reading the corresponding
     589             :      * heap page or locking the buffer.  This is OK.  If we mistakenly think
     590             :      * that the page is all-visible or all-frozen when in fact the flag's just
     591             :      * been cleared, we might fail to vacuum the page.  It's easy to see that
     592             :      * skipping a page when aggressive is not set is not a very big deal; we
     593             :      * might leave some dead tuples lying around, but the next vacuum will
     594             :      * find them.  But even when aggressive *is* set, it's still OK if we miss
     595             :      * a page whose all-frozen marking has just been cleared.  Any new XIDs
     596             :      * just added to that page are necessarily newer than the GlobalXmin we
     597             :      * computed, so they'll have no effect on the value to which we can safely
     598             :      * set relfrozenxid.  A similar argument applies for MXIDs and relminmxid.
     599             :      *
     600             :      * We will scan the table's last page, at least to the extent of
     601             :      * determining whether it has tuples or not, even if it should be skipped
     602             :      * according to the above rules; except when we've already determined that
     603             :      * it's not worth trying to truncate the table.  This avoids having
     604             :      * lazy_truncate_heap() take access-exclusive lock on the table to attempt
     605             :      * a truncation that just fails immediately because there are tuples in
     606             :      * the last page.  This is worth avoiding mainly because such a lock must
     607             :      * be replayed on any hot standby, where it can be disruptive.
     608             :      */
     609       41420 :     next_unskippable_block = 0;
     610       41420 :     if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     611             :     {
     612       93414 :         while (next_unskippable_block < nblocks)
     613             :         {
     614             :             uint8       vmstatus;
     615             : 
     616       26854 :             vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
     617             :                                                 &vmbuffer);
     618       26854 :             if (aggressive)
     619             :             {
     620       16032 :                 if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
     621       14470 :                     break;
     622             :             }
     623             :             else
     624             :             {
     625       10822 :                 if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
     626        1354 :                     break;
     627             :             }
     628       11030 :             vacuum_delay_point();
     629       11030 :             next_unskippable_block++;
     630             :         }
     631             :     }
     632             : 
     633       41420 :     if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
     634         120 :         skipping_blocks = true;
     635             :     else
     636       41300 :         skipping_blocks = false;
     637             : 
     638      225448 :     for (blkno = 0; blkno < nblocks; blkno++)
     639             :     {
     640             :         Buffer      buf;
     641             :         Page        page;
     642             :         OffsetNumber offnum,
     643             :                     maxoff;
     644             :         bool        tupgone,
     645             :                     hastup;
     646             :         int         prev_dead_count;
     647             :         int         nfrozen;
     648             :         Size        freespace;
     649      184028 :         bool        all_visible_according_to_vm = false;
     650             :         bool        all_visible;
     651      184028 :         bool        all_frozen = true;  /* provided all_visible is also true */
     652             :         bool        has_dead_tuples;
     653      184028 :         TransactionId visibility_cutoff_xid = InvalidTransactionId;
     654             : 
     655             :         /* see note above about forcing scanning of last page */
     656             : #define FORCE_CHECK_PAGE() \
     657             :         (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats))
     658             : 
     659      184028 :         pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
     660             : 
     661      184028 :         if (blkno == next_unskippable_block)
     662             :         {
     663             :             /* Time to advance next_unskippable_block */
     664      170936 :             next_unskippable_block++;
     665      170936 :             if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     666             :             {
     667      342174 :                 while (next_unskippable_block < nblocks)
     668             :                 {
     669             :                     uint8       vmskipflags;
     670             : 
     671      156294 :                     vmskipflags = visibilitymap_get_status(onerel,
     672             :                                                            next_unskippable_block,
     673             :                                                            &vmbuffer);
     674      156294 :                     if (aggressive)
     675             :                     {
     676      118398 :                         if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
     677      118398 :                             break;
     678             :                     }
     679             :                     else
     680             :                     {
     681       37896 :                         if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
     682       35834 :                             break;
     683             :                     }
     684        2062 :                     vacuum_delay_point();
     685        2062 :                     next_unskippable_block++;
     686             :                 }
     687             :             }
     688             : 
     689             :             /*
     690             :              * We know we can't skip the current block.  But set up
     691             :              * skipping_blocks to do the right thing at the following blocks.
     692             :              */
     693      170936 :             if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
     694           6 :                 skipping_blocks = true;
     695             :             else
     696      170930 :                 skipping_blocks = false;
     697             : 
     698             :             /*
     699             :              * Normally, the fact that we can't skip this block must mean that
     700             :              * it's not all-visible.  But in an aggressive vacuum we know only
     701             :              * that it's not all-frozen, so it might still be all-visible.
     702             :              */
     703      170936 :             if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
     704         786 :                 all_visible_according_to_vm = true;
     705             :         }
     706             :         else
     707             :         {
     708             :             /*
     709             :              * The current block is potentially skippable; if we've seen a
     710             :              * long enough run of skippable blocks to justify skipping it, and
     711             :              * we're not forced to check it, then go ahead and skip.
     712             :              * Otherwise, the page must be at least all-visible if not
     713             :              * all-frozen, so we can set all_visible_according_to_vm = true.
     714             :              */
     715       13092 :             if (skipping_blocks && !FORCE_CHECK_PAGE())
     716             :             {
     717             :                 /*
     718             :                  * Tricky, tricky.  If this is in aggressive vacuum, the page
     719             :                  * must have been all-frozen at the time we checked whether it
     720             :                  * was skippable, but it might not be any more.  We must be
     721             :                  * careful to count it as a skipped all-frozen page in that
     722             :                  * case, or else we'll think we can't update relfrozenxid and
     723             :                  * relminmxid.  If it's not an aggressive vacuum, we don't
     724             :                  * know whether it was all-frozen, so we have to recheck; but
     725             :                  * in this case an approximate answer is OK.
     726             :                  */
     727        8052 :                 if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
     728        6480 :                     vacrelstats->frozenskipped_pages++;
     729       16262 :                 continue;
     730             :             }
     731        5040 :             all_visible_according_to_vm = true;
     732             :         }
     733             : 
     734      175976 :         vacuum_delay_point();
     735             : 
     736             :         /*
     737             :          * If we are close to overrunning the available space for dead-tuple
     738             :          * TIDs, pause and do a cycle of vacuuming before we tackle this page.
     739             :          */
     740      175976 :         if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
     741           0 :             vacrelstats->num_dead_tuples > 0)
     742             :         {
     743           0 :             const int   hvp_index[] = {
     744             :                 PROGRESS_VACUUM_PHASE,
     745             :                 PROGRESS_VACUUM_NUM_INDEX_VACUUMS
     746             :             };
     747             :             int64       hvp_val[2];
     748             : 
     749             :             /*
     750             :              * Before beginning index vacuuming, we release any pin we may
     751             :              * hold on the visibility map page.  This isn't necessary for
     752             :              * correctness, but we do it anyway to avoid holding the pin
     753             :              * across a lengthy, unrelated operation.
     754             :              */
     755           0 :             if (BufferIsValid(vmbuffer))
     756             :             {
     757           0 :                 ReleaseBuffer(vmbuffer);
     758           0 :                 vmbuffer = InvalidBuffer;
     759             :             }
     760             : 
     761             :             /* Log cleanup info before we touch indexes */
     762           0 :             vacuum_log_cleanup_info(onerel, vacrelstats);
     763             : 
     764             :             /* Report that we are now vacuuming indexes */
     765           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     766             :                                          PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
     767             : 
     768             :             /* Remove index entries */
     769           0 :             for (i = 0; i < nindexes; i++)
     770           0 :                 lazy_vacuum_index(Irel[i],
     771           0 :                                   &indstats[i],
     772             :                                   vacrelstats);
     773             : 
     774             :             /*
     775             :              * Report that we are now vacuuming the heap.  We also increase
     776             :              * the number of index scans here; note that by using
     777             :              * pgstat_progress_update_multi_param we can update both
     778             :              * parameters atomically.
     779             :              */
     780           0 :             hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP;
     781           0 :             hvp_val[1] = vacrelstats->num_index_scans + 1;
     782           0 :             pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
     783             : 
     784             :             /* Remove tuples from heap */
     785           0 :             lazy_vacuum_heap(onerel, vacrelstats);
     786             : 
     787             :             /*
     788             :              * Forget the now-vacuumed tuples, and press on, but be careful
     789             :              * not to reset latestRemovedXid since we want that value to be
     790             :              * valid.
     791             :              */
     792           0 :             vacrelstats->num_dead_tuples = 0;
     793           0 :             vacrelstats->num_index_scans++;
     794             : 
     795             :             /*
     796             :              * Vacuum the Free Space Map to make newly-freed space visible on
     797             :              * upper-level FSM pages.  Note we have not yet processed blkno.
     798             :              */
     799           0 :             FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
     800           0 :             next_fsm_block_to_vacuum = blkno;
     801             : 
     802             :             /* Report that we are once again scanning the heap */
     803           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     804             :                                          PROGRESS_VACUUM_PHASE_SCAN_HEAP);
     805             :         }
     806             : 
     807             :         /*
     808             :          * Pin the visibility map page in case we need to mark the page
     809             :          * all-visible.  In most cases this will be very cheap, because we'll
     810             :          * already have the correct page pinned anyway.  However, it's
     811             :          * possible that (a) next_unskippable_block is covered by a different
     812             :          * VM page than the current block or (b) we released our pin and did a
     813             :          * cycle of index vacuuming.
     814             :          *
     815             :          */
     816      175976 :         visibilitymap_pin(onerel, blkno, &vmbuffer);
     817             : 
     818      175976 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
     819             :                                  RBM_NORMAL, vac_strategy);
     820             : 
     821             :         /* We need buffer cleanup lock so that we can prune HOT chains. */
     822      175976 :         if (!ConditionalLockBufferForCleanup(buf))
     823             :         {
     824             :             /*
     825             :              * If we're not performing an aggressive scan to guard against XID
     826             :              * wraparound, and we don't want to forcibly check the page, then
     827             :              * it's OK to skip vacuuming pages we get a lock conflict on. They
     828             :              * will be dealt with in some future vacuum.
     829             :              */
     830           4 :             if (!aggressive && !FORCE_CHECK_PAGE())
     831             :             {
     832           2 :                 ReleaseBuffer(buf);
     833           2 :                 vacrelstats->pinskipped_pages++;
     834           2 :                 continue;
     835             :             }
     836             : 
     837             :             /*
     838             :              * Read the page with share lock to see if any xids on it need to
     839             :              * be frozen.  If not we just skip the page, after updating our
     840             :              * scan statistics.  If there are some, we wait for cleanup lock.
     841             :              *
     842             :              * We could defer the lock request further by remembering the page
     843             :              * and coming back to it later, or we could even register
     844             :              * ourselves for multiple buffers and then service whichever one
     845             :              * is received first.  For now, this seems good enough.
     846             :              *
     847             :              * If we get here with aggressive false, then we're just forcibly
     848             :              * checking the page, and so we don't want to insist on getting
     849             :              * the lock; we only need to know if the page contains tuples, so
     850             :              * that we can update nonempty_pages correctly.  It's convenient
     851             :              * to use lazy_check_needs_freeze() for both situations, though.
     852             :              */
     853           2 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     854           2 :             if (!lazy_check_needs_freeze(buf, &hastup))
     855             :             {
     856           2 :                 UnlockReleaseBuffer(buf);
     857           2 :                 vacrelstats->scanned_pages++;
     858           2 :                 vacrelstats->pinskipped_pages++;
     859           2 :                 if (hastup)
     860           2 :                     vacrelstats->nonempty_pages = blkno + 1;
     861           2 :                 continue;
     862             :             }
     863           0 :             if (!aggressive)
     864             :             {
     865             :                 /*
     866             :                  * Here, we must not advance scanned_pages; that would amount
     867             :                  * to claiming that the page contains no freezable tuples.
     868             :                  */
     869           0 :                 UnlockReleaseBuffer(buf);
     870           0 :                 vacrelstats->pinskipped_pages++;
     871           0 :                 if (hastup)
     872           0 :                     vacrelstats->nonempty_pages = blkno + 1;
     873           0 :                 continue;
     874             :             }
     875           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     876           0 :             LockBufferForCleanup(buf);
     877             :             /* drop through to normal processing */
     878             :         }
     879             : 
     880      175972 :         vacrelstats->scanned_pages++;
     881      175972 :         vacrelstats->tupcount_pages++;
     882             : 
     883      175972 :         page = BufferGetPage(buf);
     884             : 
     885      175972 :         if (PageIsNew(page))
     886             :         {
     887             :             bool        still_new;
     888             : 
     889             :             /*
     890             :              * All-zeroes pages can be left over if either a backend extends
     891             :              * the relation by a single page, but crashes before the newly
     892             :              * initialized page has been written out, or when bulk-extending
     893             :              * the relation (which creates a number of empty pages at the tail
     894             :              * end of the relation, but enters them into the FSM).
     895             :              *
     896             :              * Make sure these pages are in the FSM, to ensure they can be
     897             :              * reused. Do that by testing if there's any space recorded for
     898             :              * the page. If not, enter it.
     899             :              *
     900             :              * Note we do not enter the page into the visibilitymap. That has
     901             :              * the downside that we repeatedly visit this page in subsequent
     902             :              * vacuums, but otherwise we'll never not discover the space on a
     903             :              * promoted standby. The harm of repeated checking ought to
     904             :              * normally not be too bad - the space usually should be used at
     905             :              * some point, otherwise there wouldn't be any regular vacuums.
     906             :              */
     907             : 
     908             :             /*
     909             :              * Perform checking of FSM after releasing lock, the fsm is
     910             :              * approximate, after all.
     911             :              */
     912         154 :             still_new = PageIsNew(page);
     913         154 :             UnlockReleaseBuffer(buf);
     914             : 
     915         154 :             if (still_new)
     916             :             {
     917         154 :                 empty_pages++;
     918             : 
     919         154 :                 if (GetRecordedFreeSpace(onerel, blkno) == 0)
     920             :                 {
     921             :                     Size        freespace;
     922             : 
     923           0 :                     freespace = BufferGetPageSize(buf) - SizeOfPageHeaderData;
     924           0 :                     RecordPageWithFreeSpace(onerel, blkno, freespace);
     925             :                 }
     926             :             }
     927         154 :             continue;
     928             :         }
     929             : 
     930      175818 :         if (PageIsEmpty(page))
     931             :         {
     932           0 :             empty_pages++;
     933           0 :             freespace = PageGetHeapFreeSpace(page);
     934             : 
     935             :             /*
     936             :              * Empty pages are always all-visible and all-frozen (note that
     937             :              * the same is currently not true for new pages, see above).
     938             :              */
     939           0 :             if (!PageIsAllVisible(page))
     940             :             {
     941           0 :                 START_CRIT_SECTION();
     942             : 
     943             :                 /* mark buffer dirty before writing a WAL record */
     944           0 :                 MarkBufferDirty(buf);
     945             : 
     946             :                 /*
     947             :                  * It's possible that another backend has extended the heap,
     948             :                  * initialized the page, and then failed to WAL-log the page
     949             :                  * due to an ERROR.  Since heap extension is not WAL-logged,
     950             :                  * recovery might try to replay our record setting the page
     951             :                  * all-visible and find that the page isn't initialized, which
     952             :                  * will cause a PANIC.  To prevent that, check whether the
     953             :                  * page has been previously WAL-logged, and if not, do that
     954             :                  * now.
     955             :                  */
     956           0 :                 if (RelationNeedsWAL(onerel) &&
     957           0 :                     PageGetLSN(page) == InvalidXLogRecPtr)
     958           0 :                     log_newpage_buffer(buf, true);
     959             : 
     960           0 :                 PageSetAllVisible(page);
     961           0 :                 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
     962             :                                   vmbuffer, InvalidTransactionId,
     963             :                                   VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
     964           0 :                 END_CRIT_SECTION();
     965             :             }
     966             : 
     967           0 :             UnlockReleaseBuffer(buf);
     968           0 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
     969           0 :             continue;
     970             :         }
     971             : 
     972             :         /*
     973             :          * Prune all HOT-update chains in this page.
     974             :          *
     975             :          * We count tuples removed by the pruning step as removed by VACUUM.
     976             :          */
     977      175818 :         tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
     978             :                                          &vacrelstats->latestRemovedXid);
     979             : 
     980             :         /*
     981             :          * Now scan the page to collect vacuumable items and check for tuples
     982             :          * requiring freezing.
     983             :          */
     984      175818 :         all_visible = true;
     985      175818 :         has_dead_tuples = false;
     986      175818 :         nfrozen = 0;
     987      175818 :         hastup = false;
     988      175818 :         prev_dead_count = vacrelstats->num_dead_tuples;
     989      175818 :         maxoff = PageGetMaxOffsetNumber(page);
     990             : 
     991             :         /*
     992             :          * Note: If you change anything in the loop below, also look at
     993             :          * heap_page_is_all_visible to see if that needs to be changed.
     994             :          */
     995    12064100 :         for (offnum = FirstOffsetNumber;
     996             :              offnum <= maxoff;
     997    11712464 :              offnum = OffsetNumberNext(offnum))
     998             :         {
     999             :             ItemId      itemid;
    1000             : 
    1001    11712464 :             itemid = PageGetItemId(page, offnum);
    1002             : 
    1003             :             /* Unused items require no processing, but we count 'em */
    1004    11712464 :             if (!ItemIdIsUsed(itemid))
    1005             :             {
    1006      131148 :                 nunused += 1;
    1007      131148 :                 continue;
    1008             :             }
    1009             : 
    1010             :             /* Redirect items mustn't be touched */
    1011    11581316 :             if (ItemIdIsRedirected(itemid))
    1012             :             {
    1013       46302 :                 hastup = true;  /* this page won't be truncatable */
    1014       46302 :                 continue;
    1015             :             }
    1016             : 
    1017    11535014 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
    1018             : 
    1019             :             /*
    1020             :              * DEAD line pointers are to be vacuumed normally; but we don't
    1021             :              * count them in tups_vacuumed, else we'd be double-counting (at
    1022             :              * least in the common case where heap_page_prune() just freed up
    1023             :              * a non-HOT tuple).
    1024             :              */
    1025    11535014 :             if (ItemIdIsDead(itemid))
    1026             :             {
    1027     1144982 :                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
    1028     1144982 :                 all_visible = false;
    1029     1144982 :                 continue;
    1030             :             }
    1031             : 
    1032             :             Assert(ItemIdIsNormal(itemid));
    1033             : 
    1034    10390032 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    1035    10390032 :             tuple.t_len = ItemIdGetLength(itemid);
    1036    10390032 :             tuple.t_tableOid = RelationGetRelid(onerel);
    1037             : 
    1038    10390032 :             tupgone = false;
    1039             : 
    1040             :             /*
    1041             :              * The criteria for counting a tuple as live in this block need to
    1042             :              * match what analyze.c's acquire_sample_rows() does, otherwise
    1043             :              * VACUUM and ANALYZE may produce wildly different reltuples
    1044             :              * values, e.g. when there are many recently-dead tuples.
    1045             :              *
    1046             :              * The logic here is a bit simpler than acquire_sample_rows(), as
    1047             :              * VACUUM can't run inside a transaction block, which makes some
    1048             :              * cases impossible (e.g. in-progress insert from the same
    1049             :              * transaction).
    1050             :              */
    1051    10390032 :             switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
    1052             :             {
    1053             :                 case HEAPTUPLE_DEAD:
    1054             : 
    1055             :                     /*
    1056             :                      * Ordinarily, DEAD tuples would have been removed by
    1057             :                      * heap_page_prune(), but it's possible that the tuple
    1058             :                      * state changed since heap_page_prune() looked.  In
    1059             :                      * particular an INSERT_IN_PROGRESS tuple could have
    1060             :                      * changed to DEAD if the inserter aborted.  So this
    1061             :                      * cannot be considered an error condition.
    1062             :                      *
    1063             :                      * If the tuple is HOT-updated then it must only be
    1064             :                      * removed by a prune operation; so we keep it just as if
    1065             :                      * it were RECENTLY_DEAD.  Also, if it's a heap-only
    1066             :                      * tuple, we choose to keep it, because it'll be a lot
    1067             :                      * cheaper to get rid of it in the next pruning pass than
    1068             :                      * to treat it like an indexed tuple. Finally, if index
    1069             :                      * cleanup is disabled, the second heap pass will not
    1070             :                      * execute, and the tuple will not get removed, so we must
    1071             :                      * treat it like any other dead tuple that we choose to
    1072             :                      * keep.
    1073             :                      *
    1074             :                      * If this were to happen for a tuple that actually needed
    1075             :                      * to be deleted, we'd be in trouble, because it'd
    1076             :                      * possibly leave a tuple below the relation's xmin
    1077             :                      * horizon alive.  heap_prepare_freeze_tuple() is prepared
    1078             :                      * to detect that case and abort the transaction,
    1079             :                      * preventing corruption.
    1080             :                      */
    1081           0 :                     if (HeapTupleIsHotUpdated(&tuple) ||
    1082           0 :                         HeapTupleIsHeapOnly(&tuple) ||
    1083           0 :                         params->index_cleanup == VACOPT_TERNARY_DISABLED)
    1084           0 :                         nkeep += 1;
    1085             :                     else
    1086           0 :                         tupgone = true; /* we can delete the tuple */
    1087           0 :                     all_visible = false;
    1088           0 :                     break;
    1089             :                 case HEAPTUPLE_LIVE:
    1090             : 
    1091             :                     /*
    1092             :                      * Count it as live.  Not only is this natural, but it's
    1093             :                      * also what acquire_sample_rows() does.
    1094             :                      */
    1095     9587936 :                     live_tuples += 1;
    1096             : 
    1097             :                     /*
    1098             :                      * Is the tuple definitely visible to all transactions?
    1099             :                      *
    1100             :                      * NB: Like with per-tuple hint bits, we can't set the
    1101             :                      * PD_ALL_VISIBLE flag if the inserter committed
    1102             :                      * asynchronously. See SetHintBits for more info. Check
    1103             :                      * that the tuple is hinted xmin-committed because of
    1104             :                      * that.
    1105             :                      */
    1106     9587936 :                     if (all_visible)
    1107             :                     {
    1108             :                         TransactionId xmin;
    1109             : 
    1110     9039578 :                         if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    1111             :                         {
    1112          34 :                             all_visible = false;
    1113          34 :                             break;
    1114             :                         }
    1115             : 
    1116             :                         /*
    1117             :                          * The inserter definitely committed. But is it old
    1118             :                          * enough that everyone sees it as committed?
    1119             :                          */
    1120     9039544 :                         xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    1121     9039544 :                         if (!TransactionIdPrecedes(xmin, OldestXmin))
    1122             :                         {
    1123        1920 :                             all_visible = false;
    1124        1920 :                             break;
    1125             :                         }
    1126             : 
    1127             :                         /* Track newest xmin on page. */
    1128     9037624 :                         if (TransactionIdFollows(xmin, visibility_cutoff_xid))
    1129      416880 :                             visibility_cutoff_xid = xmin;
    1130             :                     }
    1131     9585982 :                     break;
    1132             :                 case HEAPTUPLE_RECENTLY_DEAD:
    1133             : 
    1134             :                     /*
    1135             :                      * If tuple is recently deleted then we must not remove it
    1136             :                      * from relation.
    1137             :                      */
    1138      801448 :                     nkeep += 1;
    1139      801448 :                     all_visible = false;
    1140      801448 :                     break;
    1141             :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1142             : 
    1143             :                     /*
    1144             :                      * This is an expected case during concurrent vacuum.
    1145             :                      *
    1146             :                      * We do not count these rows as live, because we expect
    1147             :                      * the inserting transaction to update the counters at
    1148             :                      * commit, and we assume that will happen only after we
    1149             :                      * report our results.  This assumption is a bit shaky,
    1150             :                      * but it is what acquire_sample_rows() does, so be
    1151             :                      * consistent.
    1152             :                      */
    1153         646 :                     all_visible = false;
    1154         646 :                     break;
    1155             :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1156             :                     /* This is an expected case during concurrent vacuum */
    1157           2 :                     all_visible = false;
    1158             : 
    1159             :                     /*
    1160             :                      * Count such rows as live.  As above, we assume the
    1161             :                      * deleting transaction will commit and update the
    1162             :                      * counters after we report.
    1163             :                      */
    1164           2 :                     live_tuples += 1;
    1165           2 :                     break;
    1166             :                 default:
    1167           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1168             :                     break;
    1169             :             }
    1170             : 
    1171    10390032 :             if (tupgone)
    1172             :             {
    1173           0 :                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
    1174           0 :                 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
    1175             :                                                        &vacrelstats->latestRemovedXid);
    1176           0 :                 tups_vacuumed += 1;
    1177           0 :                 has_dead_tuples = true;
    1178             :             }
    1179             :             else
    1180             :             {
    1181             :                 bool        tuple_totally_frozen;
    1182             : 
    1183    10390032 :                 num_tuples += 1;
    1184    10390032 :                 hastup = true;
    1185             : 
    1186             :                 /*
    1187             :                  * Each non-removable tuple must be checked to see if it needs
    1188             :                  * freezing.  Note we already have exclusive buffer lock.
    1189             :                  */
    1190    10390032 :                 if (heap_prepare_freeze_tuple(tuple.t_data,
    1191             :                                               relfrozenxid, relminmxid,
    1192             :                                               FreezeLimit, MultiXactCutoff,
    1193    10390032 :                                               &frozen[nfrozen],
    1194             :                                               &tuple_totally_frozen))
    1195     5222210 :                     frozen[nfrozen++].offset = offnum;
    1196             : 
    1197    10390032 :                 if (!tuple_totally_frozen)
    1198     2375368 :                     all_frozen = false;
    1199             :             }
    1200             :         }                       /* scan along page */
    1201             : 
    1202             :         /*
    1203             :          * If we froze any tuples, mark the buffer dirty, and write a WAL
    1204             :          * record recording the changes.  We must log the changes to be
    1205             :          * crash-safe against future truncation of CLOG.
    1206             :          */
    1207      175818 :         if (nfrozen > 0)
    1208             :         {
    1209       84718 :             START_CRIT_SECTION();
    1210             : 
    1211       84718 :             MarkBufferDirty(buf);
    1212             : 
    1213             :             /* execute collected freezes */
    1214     5306928 :             for (i = 0; i < nfrozen; i++)
    1215             :             {
    1216             :                 ItemId      itemid;
    1217             :                 HeapTupleHeader htup;
    1218             : 
    1219     5222210 :                 itemid = PageGetItemId(page, frozen[i].offset);
    1220     5222210 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
    1221             : 
    1222     5222210 :                 heap_execute_freeze_tuple(htup, &frozen[i]);
    1223             :             }
    1224             : 
    1225             :             /* Now WAL-log freezing if necessary */
    1226       84718 :             if (RelationNeedsWAL(onerel))
    1227             :             {
    1228             :                 XLogRecPtr  recptr;
    1229             : 
    1230       84716 :                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
    1231             :                                          frozen, nfrozen);
    1232       84716 :                 PageSetLSN(page, recptr);
    1233             :             }
    1234             : 
    1235       84718 :             END_CRIT_SECTION();
    1236             :         }
    1237             : 
    1238             :         /*
    1239             :          * If there are no indexes we can vacuum the page right now instead of
    1240             :          * doing a second scan. Also we don't do that but forget dead tuples
    1241             :          * when index cleanup is disabled.
    1242             :          */
    1243      175818 :         if (!vacrelstats->useindex && vacrelstats->num_dead_tuples > 0)
    1244             :         {
    1245         490 :             if (nindexes == 0)
    1246             :             {
    1247             :                 /* Remove tuples from heap if the table has no index */
    1248         490 :                 lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
    1249         490 :                 vacuumed_pages++;
    1250         490 :                 has_dead_tuples = false;
    1251             :             }
    1252             :             else
    1253             :             {
    1254             :                 /*
    1255             :                  * Here, we have indexes but index cleanup is disabled.
    1256             :                  * Instead of vacuuming the dead tuples on the heap, we just
    1257             :                  * forget them.
    1258             :                  *
    1259             :                  * Note that vacrelstats->dead_tuples could have tuples which
    1260             :                  * became dead after HOT-pruning but are not marked dead yet.
    1261             :                  * We do not process them because it's a very rare condition,
    1262             :                  * and the next vacuum will process them anyway.
    1263             :                  */
    1264             :                 Assert(params->index_cleanup == VACOPT_TERNARY_DISABLED);
    1265             :             }
    1266             : 
    1267             :             /*
    1268             :              * Forget the now-vacuumed tuples, and press on, but be careful
    1269             :              * not to reset latestRemovedXid since we want that value to be
    1270             :              * valid.
    1271             :              */
    1272         490 :             vacrelstats->num_dead_tuples = 0;
    1273             : 
    1274             :             /*
    1275             :              * Periodically do incremental FSM vacuuming to make newly-freed
    1276             :              * space visible on upper FSM pages.  Note: although we've cleaned
    1277             :              * the current block, we haven't yet updated its FSM entry (that
    1278             :              * happens further down), so passing end == blkno is correct.
    1279             :              */
    1280         490 :             if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
    1281             :             {
    1282           0 :                 FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum,
    1283             :                                         blkno);
    1284           0 :                 next_fsm_block_to_vacuum = blkno;
    1285             :             }
    1286             :         }
    1287             : 
    1288      175818 :         freespace = PageGetHeapFreeSpace(page);
    1289             : 
    1290             :         /* mark page all-visible, if appropriate */
    1291      175818 :         if (all_visible && !all_visible_according_to_vm)
    1292      139726 :         {
    1293      139726 :             uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    1294             : 
    1295      139726 :             if (all_frozen)
    1296      124418 :                 flags |= VISIBILITYMAP_ALL_FROZEN;
    1297             : 
    1298             :             /*
    1299             :              * It should never be the case that the visibility map page is set
    1300             :              * while the page-level bit is clear, but the reverse is allowed
    1301             :              * (if checksums are not enabled).  Regardless, set the both bits
    1302             :              * so that we get back in sync.
    1303             :              *
    1304             :              * NB: If the heap page is all-visible but the VM bit is not set,
    1305             :              * we don't need to dirty the heap page.  However, if checksums
    1306             :              * are enabled, we do need to make sure that the heap page is
    1307             :              * dirtied before passing it to visibilitymap_set(), because it
    1308             :              * may be logged.  Given that this situation should only happen in
    1309             :              * rare cases after a crash, it is not worth optimizing.
    1310             :              */
    1311      139726 :             PageSetAllVisible(page);
    1312      139726 :             MarkBufferDirty(buf);
    1313      139726 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1314             :                               vmbuffer, visibility_cutoff_xid, flags);
    1315             :         }
    1316             : 
    1317             :         /*
    1318             :          * As of PostgreSQL 9.2, the visibility map bit should never be set if
    1319             :          * the page-level bit is clear.  However, it's possible that the bit
    1320             :          * got cleared after we checked it and before we took the buffer
    1321             :          * content lock, so we must recheck before jumping to the conclusion
    1322             :          * that something bad has happened.
    1323             :          */
    1324       36092 :         else if (all_visible_according_to_vm && !PageIsAllVisible(page)
    1325           0 :                  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
    1326             :         {
    1327           0 :             elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
    1328             :                  relname, blkno);
    1329           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1330             :                                 VISIBILITYMAP_VALID_BITS);
    1331             :         }
    1332             : 
    1333             :         /*
    1334             :          * It's possible for the value returned by GetOldestXmin() to move
    1335             :          * backwards, so it's not wrong for us to see tuples that appear to
    1336             :          * not be visible to everyone yet, while PD_ALL_VISIBLE is already
    1337             :          * set. The real safe xmin value never moves backwards, but
    1338             :          * GetOldestXmin() is conservative and sometimes returns a value
    1339             :          * that's unnecessarily small, so if we see that contradiction it just
    1340             :          * means that the tuples that we think are not visible to everyone yet
    1341             :          * actually are, and the PD_ALL_VISIBLE flag is correct.
    1342             :          *
    1343             :          * There should never be dead tuples on a page with PD_ALL_VISIBLE
    1344             :          * set, however.
    1345             :          */
    1346       36092 :         else if (PageIsAllVisible(page) && has_dead_tuples)
    1347             :         {
    1348           0 :             elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
    1349             :                  relname, blkno);
    1350           0 :             PageClearAllVisible(page);
    1351           0 :             MarkBufferDirty(buf);
    1352           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1353             :                                 VISIBILITYMAP_VALID_BITS);
    1354             :         }
    1355             : 
    1356             :         /*
    1357             :          * If the all-visible page is turned out to be all-frozen but not
    1358             :          * marked, we should so mark it.  Note that all_frozen is only valid
    1359             :          * if all_visible is true, so we must check both.
    1360             :          */
    1361       41506 :         else if (all_visible_according_to_vm && all_visible && all_frozen &&
    1362        5414 :                  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
    1363             :         {
    1364             :             /*
    1365             :              * We can pass InvalidTransactionId as the cutoff XID here,
    1366             :              * because setting the all-frozen bit doesn't cause recovery
    1367             :              * conflicts.
    1368             :              */
    1369          12 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1370             :                               vmbuffer, InvalidTransactionId,
    1371             :                               VISIBILITYMAP_ALL_FROZEN);
    1372             :         }
    1373             : 
    1374      175818 :         UnlockReleaseBuffer(buf);
    1375             : 
    1376             :         /* Remember the location of the last page with nonremovable tuples */
    1377      175818 :         if (hastup)
    1378      166794 :             vacrelstats->nonempty_pages = blkno + 1;
    1379             : 
    1380             :         /*
    1381             :          * If we remembered any tuples for deletion, then the page will be
    1382             :          * visited again by lazy_vacuum_heap, which will compute and record
    1383             :          * its post-compaction free space.  If not, then we're done with this
    1384             :          * page, so remember its free space as-is.  (This path will always be
    1385             :          * taken if there are no indexes.)
    1386             :          */
    1387      175818 :         if (vacrelstats->num_dead_tuples == prev_dead_count)
    1388      154244 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
    1389             :     }
    1390             : 
    1391             :     /* report that everything is scanned and vacuumed */
    1392       41420 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
    1393             : 
    1394       41420 :     pfree(frozen);
    1395             : 
    1396             :     /* save stats for use later */
    1397       41420 :     vacrelstats->tuples_deleted = tups_vacuumed;
    1398       41420 :     vacrelstats->new_dead_tuples = nkeep;
    1399             : 
    1400             :     /* now we can compute the new value for pg_class.reltuples */
    1401       41420 :     vacrelstats->new_live_tuples = vac_estimate_reltuples(onerel,
    1402             :                                                           nblocks,
    1403             :                                                           vacrelstats->tupcount_pages,
    1404             :                                                           live_tuples);
    1405             : 
    1406             :     /* also compute total number of surviving heap entries */
    1407       41420 :     vacrelstats->new_rel_tuples =
    1408       41420 :         vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
    1409             : 
    1410             :     /*
    1411             :      * Release any remaining pin on visibility map page.
    1412             :      */
    1413       41420 :     if (BufferIsValid(vmbuffer))
    1414             :     {
    1415       16892 :         ReleaseBuffer(vmbuffer);
    1416       16892 :         vmbuffer = InvalidBuffer;
    1417             :     }
    1418             : 
    1419             :     /* If any tuples need to be deleted, perform final vacuum cycle */
    1420             :     /* XXX put a threshold on min number of tuples here? */
    1421       41420 :     if (vacrelstats->num_dead_tuples > 0)
    1422             :     {
    1423        2338 :         const int   hvp_index[] = {
    1424             :             PROGRESS_VACUUM_PHASE,
    1425             :             PROGRESS_VACUUM_NUM_INDEX_VACUUMS
    1426             :         };
    1427             :         int64       hvp_val[2];
    1428             : 
    1429             :         /* Log cleanup info before we touch indexes */
    1430        2338 :         vacuum_log_cleanup_info(onerel, vacrelstats);
    1431             : 
    1432             :         /* Report that we are now vacuuming indexes */
    1433        2338 :         pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1434             :                                      PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
    1435             : 
    1436             :         /* Remove index entries */
    1437        7290 :         for (i = 0; i < nindexes; i++)
    1438        4952 :             lazy_vacuum_index(Irel[i],
    1439        4952 :                               &indstats[i],
    1440             :                               vacrelstats);
    1441             : 
    1442             :         /* Report that we are now vacuuming the heap */
    1443        2338 :         hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP;
    1444        2338 :         hvp_val[1] = vacrelstats->num_index_scans + 1;
    1445        2338 :         pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
    1446             : 
    1447             :         /* Remove tuples from heap */
    1448        2338 :         pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1449             :                                      PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
    1450        2338 :         lazy_vacuum_heap(onerel, vacrelstats);
    1451        2338 :         vacrelstats->num_index_scans++;
    1452             :     }
    1453             : 
    1454             :     /*
    1455             :      * Vacuum the remainder of the Free Space Map.  We must do this whether or
    1456             :      * not there were indexes.
    1457             :      */
    1458       41420 :     if (blkno > next_fsm_block_to_vacuum)
    1459       16892 :         FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
    1460             : 
    1461             :     /* report all blocks vacuumed; and that we're cleaning up */
    1462       41420 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1463       41420 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1464             :                                  PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
    1465             : 
    1466             :     /* Do post-vacuum cleanup and statistics update for each index */
    1467       41420 :     if (vacrelstats->useindex)
    1468             :     {
    1469       96828 :         for (i = 0; i < nindexes; i++)
    1470       58390 :             lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
    1471             :     }
    1472             : 
    1473             :     /* If no indexes, make log report that lazy_vacuum_heap would've made */
    1474       41420 :     if (vacuumed_pages)
    1475          56 :         ereport(elevel,
    1476             :                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
    1477             :                         RelationGetRelationName(onerel),
    1478             :                         tups_vacuumed, vacuumed_pages)));
    1479             : 
    1480             :     /*
    1481             :      * This is pretty messy, but we split it up so that we can skip emitting
    1482             :      * individual parts of the message when not applicable.
    1483             :      */
    1484       41420 :     initStringInfo(&buf);
    1485       82840 :     appendStringInfo(&buf,
    1486       41420 :                      _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
    1487             :                      nkeep, OldestXmin);
    1488       41420 :     appendStringInfo(&buf, _("There were %.0f unused item identifiers.\n"),
    1489             :                      nunused);
    1490       82840 :     appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
    1491             :                                     "Skipped %u pages due to buffer pins, ",
    1492       41420 :                                     vacrelstats->pinskipped_pages),
    1493             :                      vacrelstats->pinskipped_pages);
    1494       82840 :     appendStringInfo(&buf, ngettext("%u frozen page.\n",
    1495             :                                     "%u frozen pages.\n",
    1496       41420 :                                     vacrelstats->frozenskipped_pages),
    1497             :                      vacrelstats->frozenskipped_pages);
    1498       41420 :     appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
    1499             :                                     "%u pages are entirely empty.\n",
    1500             :                                     empty_pages),
    1501             :                      empty_pages);
    1502       41420 :     appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
    1503             : 
    1504       41420 :     ereport(elevel,
    1505             :             (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
    1506             :                     RelationGetRelationName(onerel),
    1507             :                     tups_vacuumed, num_tuples,
    1508             :                     vacrelstats->scanned_pages, nblocks),
    1509             :              errdetail_internal("%s", buf.data)));
    1510       41420 :     pfree(buf.data);
    1511       41420 : }
    1512             : 
    1513             : 
    1514             : /*
    1515             :  *  lazy_vacuum_heap() -- second pass over the heap
    1516             :  *
    1517             :  *      This routine marks dead tuples as unused and compacts out free
    1518             :  *      space on their pages.  Pages not having dead tuples recorded from
    1519             :  *      lazy_scan_heap are not visited at all.
    1520             :  *
    1521             :  * Note: the reason for doing this as a second pass is we cannot remove
    1522             :  * the tuples until we've removed their index entries, and we want to
    1523             :  * process index entry removal in batches as large as possible.
    1524             :  */
    1525             : static void
    1526        2338 : lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
    1527             : {
    1528             :     int         tupindex;
    1529             :     int         npages;
    1530             :     PGRUsage    ru0;
    1531        2338 :     Buffer      vmbuffer = InvalidBuffer;
    1532             : 
    1533        2338 :     pg_rusage_init(&ru0);
    1534        2338 :     npages = 0;
    1535             : 
    1536        2338 :     tupindex = 0;
    1537       26250 :     while (tupindex < vacrelstats->num_dead_tuples)
    1538             :     {
    1539             :         BlockNumber tblk;
    1540             :         Buffer      buf;
    1541             :         Page        page;
    1542             :         Size        freespace;
    1543             : 
    1544       21574 :         vacuum_delay_point();
    1545             : 
    1546       21574 :         tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
    1547       21574 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
    1548             :                                  vac_strategy);
    1549       21574 :         if (!ConditionalLockBufferForCleanup(buf))
    1550             :         {
    1551           0 :             ReleaseBuffer(buf);
    1552           0 :             ++tupindex;
    1553           0 :             continue;
    1554             :         }
    1555       21574 :         tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
    1556             :                                     &vmbuffer);
    1557             : 
    1558             :         /* Now that we've compacted the page, record its available space */
    1559       21574 :         page = BufferGetPage(buf);
    1560       21574 :         freespace = PageGetHeapFreeSpace(page);
    1561             : 
    1562       21574 :         UnlockReleaseBuffer(buf);
    1563       21574 :         RecordPageWithFreeSpace(onerel, tblk, freespace);
    1564       21574 :         npages++;
    1565             :     }
    1566             : 
    1567        2338 :     if (BufferIsValid(vmbuffer))
    1568             :     {
    1569        2322 :         ReleaseBuffer(vmbuffer);
    1570        2322 :         vmbuffer = InvalidBuffer;
    1571             :     }
    1572             : 
    1573        2338 :     ereport(elevel,
    1574             :             (errmsg("\"%s\": removed %d row versions in %d pages",
    1575             :                     RelationGetRelationName(onerel),
    1576             :                     tupindex, npages),
    1577             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    1578        2338 : }
    1579             : 
    1580             : /*
    1581             :  *  lazy_vacuum_page() -- free dead tuples on a page
    1582             :  *                   and repair its fragmentation.
    1583             :  *
    1584             :  * Caller must hold pin and buffer cleanup lock on the buffer.
    1585             :  *
    1586             :  * tupindex is the index in vacrelstats->dead_tuples of the first dead
    1587             :  * tuple for this page.  We assume the rest follow sequentially.
    1588             :  * The return value is the first tupindex after the tuples of this page.
    1589             :  */
    1590             : static int
    1591       22064 : lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
    1592             :                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
    1593             : {
    1594       22064 :     Page        page = BufferGetPage(buffer);
    1595             :     OffsetNumber unused[MaxOffsetNumber];
    1596       22064 :     int         uncnt = 0;
    1597             :     TransactionId visibility_cutoff_xid;
    1598             :     bool        all_frozen;
    1599             : 
    1600       22064 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1601             : 
    1602       22064 :     START_CRIT_SECTION();
    1603             : 
    1604     1167046 :     for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
    1605             :     {
    1606             :         BlockNumber tblk;
    1607             :         OffsetNumber toff;
    1608             :         ItemId      itemid;
    1609             : 
    1610     1164218 :         tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
    1611     1164218 :         if (tblk != blkno)
    1612       19236 :             break;              /* past end of tuples for this block */
    1613     1144982 :         toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
    1614     1144982 :         itemid = PageGetItemId(page, toff);
    1615     1144982 :         ItemIdSetUnused(itemid);
    1616     1144982 :         unused[uncnt++] = toff;
    1617             :     }
    1618             : 
    1619       22064 :     PageRepairFragmentation(page);
    1620             : 
    1621             :     /*
    1622             :      * Mark buffer dirty before we write WAL.
    1623             :      */
    1624       22064 :     MarkBufferDirty(buffer);
    1625             : 
    1626             :     /* XLOG stuff */
    1627       22064 :     if (RelationNeedsWAL(onerel))
    1628             :     {
    1629             :         XLogRecPtr  recptr;
    1630             : 
    1631       22064 :         recptr = log_heap_clean(onerel, buffer,
    1632             :                                 NULL, 0, NULL, 0,
    1633             :                                 unused, uncnt,
    1634             :                                 vacrelstats->latestRemovedXid);
    1635       22064 :         PageSetLSN(page, recptr);
    1636             :     }
    1637             : 
    1638             :     /*
    1639             :      * End critical section, so we safely can do visibility tests (which
    1640             :      * possibly need to perform IO and allocate memory!). If we crash now the
    1641             :      * page (including the corresponding vm bit) might not be marked all
    1642             :      * visible, but that's fine. A later vacuum will fix that.
    1643             :      */
    1644       22064 :     END_CRIT_SECTION();
    1645             : 
    1646             :     /*
    1647             :      * Now that we have removed the dead tuples from the page, once again
    1648             :      * check if the page has become all-visible.  The page is already marked
    1649             :      * dirty, exclusively locked, and, if needed, a full page image has been
    1650             :      * emitted in the log_heap_clean() above.
    1651             :      */
    1652       22064 :     if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
    1653             :                                  &all_frozen))
    1654       21448 :         PageSetAllVisible(page);
    1655             : 
    1656             :     /*
    1657             :      * All the changes to the heap page have been done. If the all-visible
    1658             :      * flag is now set, also set the VM all-visible bit (and, if possible, the
    1659             :      * all-frozen bit) unless this has already been done previously.
    1660             :      */
    1661       22064 :     if (PageIsAllVisible(page))
    1662             :     {
    1663       21448 :         uint8       vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
    1664       21448 :         uint8       flags = 0;
    1665             : 
    1666             :         /* Set the VM all-frozen bit to flag, if needed */
    1667       21448 :         if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
    1668       21448 :             flags |= VISIBILITYMAP_ALL_VISIBLE;
    1669       21448 :         if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
    1670       17678 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    1671             : 
    1672             :         Assert(BufferIsValid(*vmbuffer));
    1673       21448 :         if (flags != 0)
    1674       21448 :             visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
    1675             :                               *vmbuffer, visibility_cutoff_xid, flags);
    1676             :     }
    1677             : 
    1678       22064 :     return tupindex;
    1679             : }
    1680             : 
    1681             : /*
    1682             :  *  lazy_check_needs_freeze() -- scan page to see if any tuples
    1683             :  *                   need to be cleaned to avoid wraparound
    1684             :  *
    1685             :  * Returns true if the page needs to be vacuumed using cleanup lock.
    1686             :  * Also returns a flag indicating whether page contains any tuples at all.
    1687             :  */
    1688             : static bool
    1689           2 : lazy_check_needs_freeze(Buffer buf, bool *hastup)
    1690             : {
    1691           2 :     Page        page = BufferGetPage(buf);
    1692             :     OffsetNumber offnum,
    1693             :                 maxoff;
    1694             :     HeapTupleHeader tupleheader;
    1695             : 
    1696           2 :     *hastup = false;
    1697             : 
    1698             :     /*
    1699             :      * New and empty pages, obviously, don't contain tuples. We could make
    1700             :      * sure that the page is registered in the FSM, but it doesn't seem worth
    1701             :      * waiting for a cleanup lock just for that, especially because it's
    1702             :      * likely that the pin holder will do so.
    1703             :      */
    1704           2 :     if (PageIsNew(page) || PageIsEmpty(page))
    1705           0 :         return false;
    1706             : 
    1707           2 :     maxoff = PageGetMaxOffsetNumber(page);
    1708          46 :     for (offnum = FirstOffsetNumber;
    1709             :          offnum <= maxoff;
    1710          42 :          offnum = OffsetNumberNext(offnum))
    1711             :     {
    1712             :         ItemId      itemid;
    1713             : 
    1714          42 :         itemid = PageGetItemId(page, offnum);
    1715             : 
    1716             :         /* this should match hastup test in count_nondeletable_pages() */
    1717          42 :         if (ItemIdIsUsed(itemid))
    1718          42 :             *hastup = true;
    1719             : 
    1720             :         /* dead and redirect items never need freezing */
    1721          42 :         if (!ItemIdIsNormal(itemid))
    1722           0 :             continue;
    1723             : 
    1724          42 :         tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
    1725             : 
    1726          42 :         if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
    1727             :                                     MultiXactCutoff, buf))
    1728           0 :             return true;
    1729             :     }                           /* scan along page */
    1730             : 
    1731           2 :     return false;
    1732             : }
    1733             : 
    1734             : 
    1735             : /*
    1736             :  *  lazy_vacuum_index() -- vacuum one index relation.
    1737             :  *
    1738             :  *      Delete all the index entries pointing to tuples listed in
    1739             :  *      vacrelstats->dead_tuples, and update running statistics.
    1740             :  */
    1741             : static void
    1742        4952 : lazy_vacuum_index(Relation indrel,
    1743             :                   IndexBulkDeleteResult **stats,
    1744             :                   LVRelStats *vacrelstats)
    1745             : {
    1746             :     IndexVacuumInfo ivinfo;
    1747             :     PGRUsage    ru0;
    1748             : 
    1749        4952 :     pg_rusage_init(&ru0);
    1750             : 
    1751        4952 :     ivinfo.index = indrel;
    1752        4952 :     ivinfo.analyze_only = false;
    1753        4952 :     ivinfo.report_progress = false;
    1754        4952 :     ivinfo.estimated_count = true;
    1755        4952 :     ivinfo.message_level = elevel;
    1756             :     /* We can only provide an approximate value of num_heap_tuples here */
    1757        4952 :     ivinfo.num_heap_tuples = vacrelstats->old_live_tuples;
    1758        4952 :     ivinfo.strategy = vac_strategy;
    1759             : 
    1760             :     /* Do bulk deletion */
    1761        4952 :     *stats = index_bulk_delete(&ivinfo, *stats,
    1762             :                                lazy_tid_reaped, (void *) vacrelstats);
    1763             : 
    1764        4952 :     ereport(elevel,
    1765             :             (errmsg("scanned index \"%s\" to remove %d row versions",
    1766             :                     RelationGetRelationName(indrel),
    1767             :                     vacrelstats->num_dead_tuples),
    1768             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    1769        4952 : }
    1770             : 
    1771             : /*
    1772             :  *  lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
    1773             :  */
    1774             : static void
    1775       58390 : lazy_cleanup_index(Relation indrel,
    1776             :                    IndexBulkDeleteResult *stats,
    1777             :                    LVRelStats *vacrelstats)
    1778             : {
    1779             :     IndexVacuumInfo ivinfo;
    1780             :     PGRUsage    ru0;
    1781             : 
    1782       58390 :     pg_rusage_init(&ru0);
    1783             : 
    1784       58390 :     ivinfo.index = indrel;
    1785       58390 :     ivinfo.analyze_only = false;
    1786       58390 :     ivinfo.report_progress = false;
    1787       58390 :     ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
    1788       58390 :     ivinfo.message_level = elevel;
    1789             : 
    1790             :     /*
    1791             :      * Now we can provide a better estimate of total number of surviving
    1792             :      * tuples (we assume indexes are more interested in that than in the
    1793             :      * number of nominally live tuples).
    1794             :      */
    1795       58390 :     ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
    1796       58390 :     ivinfo.strategy = vac_strategy;
    1797             : 
    1798       58390 :     stats = index_vacuum_cleanup(&ivinfo, stats);
    1799             : 
    1800       58390 :     if (!stats)
    1801        1722 :         return;
    1802             : 
    1803             :     /*
    1804             :      * Now update statistics in pg_class, but only if the index says the count
    1805             :      * is accurate.
    1806             :      */
    1807       56668 :     if (!stats->estimated_count)
    1808       56668 :         vac_update_relstats(indrel,
    1809             :                             stats->num_pages,
    1810             :                             stats->num_index_tuples,
    1811             :                             0,
    1812             :                             false,
    1813             :                             InvalidTransactionId,
    1814             :                             InvalidMultiXactId,
    1815             :                             false);
    1816             : 
    1817       56668 :     ereport(elevel,
    1818             :             (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
    1819             :                     RelationGetRelationName(indrel),
    1820             :                     stats->num_index_tuples,
    1821             :                     stats->num_pages),
    1822             :              errdetail("%.0f index row versions were removed.\n"
    1823             :                        "%u index pages have been deleted, %u are currently reusable.\n"
    1824             :                        "%s.",
    1825             :                        stats->tuples_removed,
    1826             :                        stats->pages_deleted, stats->pages_free,
    1827             :                        pg_rusage_show(&ru0))));
    1828             : 
    1829       56668 :     pfree(stats);
    1830             : }
    1831             : 
    1832             : /*
    1833             :  * should_attempt_truncation - should we attempt to truncate the heap?
    1834             :  *
    1835             :  * Don't even think about it unless we have a shot at releasing a goodly
    1836             :  * number of pages.  Otherwise, the time taken isn't worth it.
    1837             :  *
    1838             :  * Also don't attempt it if we are doing early pruning/vacuuming, because a
    1839             :  * scan which cannot find a truncated heap page cannot determine that the
    1840             :  * snapshot is too old to read that page.  We might be able to get away with
    1841             :  * truncating all except one of the pages, setting its LSN to (at least) the
    1842             :  * maximum of the truncated range if we also treated an index leaf tuple
    1843             :  * pointing to a missing heap page as something to trigger the "snapshot too
    1844             :  * old" error, but that seems fragile and seems like it deserves its own patch
    1845             :  * if we consider it.
    1846             :  *
    1847             :  * This is split out so that we can test whether truncation is going to be
    1848             :  * called for before we actually do it.  If you change the logic here, be
    1849             :  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
    1850             :  */
    1851             : static bool
    1852       41524 : should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats)
    1853             : {
    1854             :     BlockNumber possibly_freeable;
    1855             : 
    1856       41524 :     if (params->truncate == VACOPT_TERNARY_DISABLED)
    1857          16 :         return false;
    1858             : 
    1859       41508 :     possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
    1860       41508 :     if (possibly_freeable > 0 &&
    1861         254 :         (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
    1862         490 :          possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
    1863         236 :         old_snapshot_threshold < 0)
    1864         236 :         return true;
    1865             :     else
    1866       41272 :         return false;
    1867             : }
    1868             : 
    1869             : /*
    1870             :  * lazy_truncate_heap - try to truncate off any empty pages at the end
    1871             :  */
    1872             : static void
    1873         132 : lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
    1874             : {
    1875         132 :     BlockNumber old_rel_pages = vacrelstats->rel_pages;
    1876             :     BlockNumber new_rel_pages;
    1877             :     PGRUsage    ru0;
    1878             :     int         lock_retry;
    1879             : 
    1880         132 :     pg_rusage_init(&ru0);
    1881             : 
    1882             :     /* Report that we are now truncating */
    1883         132 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1884             :                                  PROGRESS_VACUUM_PHASE_TRUNCATE);
    1885             : 
    1886             :     /*
    1887             :      * Loop until no more truncating can be done.
    1888             :      */
    1889             :     do
    1890             :     {
    1891             :         /*
    1892             :          * We need full exclusive lock on the relation in order to do
    1893             :          * truncation. If we can't get it, give up rather than waiting --- we
    1894             :          * don't want to block other backends, and we don't want to deadlock
    1895             :          * (which is quite possible considering we already hold a lower-grade
    1896             :          * lock).
    1897             :          */
    1898         132 :         vacrelstats->lock_waiter_detected = false;
    1899         132 :         lock_retry = 0;
    1900             :         while (true)
    1901             :         {
    1902         932 :             if (ConditionalLockRelation(onerel, AccessExclusiveLock))
    1903         128 :                 break;
    1904             : 
    1905             :             /*
    1906             :              * Check for interrupts while trying to (re-)acquire the exclusive
    1907             :              * lock.
    1908             :              */
    1909         404 :             CHECK_FOR_INTERRUPTS();
    1910             : 
    1911         404 :             if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
    1912             :                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
    1913             :             {
    1914             :                 /*
    1915             :                  * We failed to establish the lock in the specified number of
    1916             :                  * retries. This means we give up truncating.
    1917             :                  */
    1918           4 :                 vacrelstats->lock_waiter_detected = true;
    1919           4 :                 ereport(elevel,
    1920             :                         (errmsg("\"%s\": stopping truncate due to conflicting lock request",
    1921             :                                 RelationGetRelationName(onerel))));
    1922           8 :                 return;
    1923             :             }
    1924             : 
    1925         400 :             pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L);
    1926             :         }
    1927             : 
    1928             :         /*
    1929             :          * Now that we have exclusive lock, look to see if the rel has grown
    1930             :          * whilst we were vacuuming with non-exclusive lock.  If so, give up;
    1931             :          * the newly added pages presumably contain non-deletable tuples.
    1932             :          */
    1933         128 :         new_rel_pages = RelationGetNumberOfBlocks(onerel);
    1934         128 :         if (new_rel_pages != old_rel_pages)
    1935             :         {
    1936             :             /*
    1937             :              * Note: we intentionally don't update vacrelstats->rel_pages with
    1938             :              * the new rel size here.  If we did, it would amount to assuming
    1939             :              * that the new pages are empty, which is unlikely. Leaving the
    1940             :              * numbers alone amounts to assuming that the new pages have the
    1941             :              * same tuple density as existing ones, which is less unlikely.
    1942             :              */
    1943           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    1944           0 :             return;
    1945             :         }
    1946             : 
    1947             :         /*
    1948             :          * Scan backwards from the end to verify that the end pages actually
    1949             :          * contain no tuples.  This is *necessary*, not optional, because
    1950             :          * other backends could have added tuples to these pages whilst we
    1951             :          * were vacuuming.
    1952             :          */
    1953         128 :         new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
    1954             : 
    1955         128 :         if (new_rel_pages >= old_rel_pages)
    1956             :         {
    1957             :             /* can't do anything after all */
    1958           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    1959           0 :             return;
    1960             :         }
    1961             : 
    1962             :         /*
    1963             :          * Okay to truncate.
    1964             :          */
    1965         128 :         RelationTruncate(onerel, new_rel_pages);
    1966             : 
    1967             :         /*
    1968             :          * We can release the exclusive lock as soon as we have truncated.
    1969             :          * Other backends can't safely access the relation until they have
    1970             :          * processed the smgr invalidation that smgrtruncate sent out ... but
    1971             :          * that should happen as part of standard invalidation processing once
    1972             :          * they acquire lock on the relation.
    1973             :          */
    1974         128 :         UnlockRelation(onerel, AccessExclusiveLock);
    1975             : 
    1976             :         /*
    1977             :          * Update statistics.  Here, it *is* correct to adjust rel_pages
    1978             :          * without also touching reltuples, since the tuple count wasn't
    1979             :          * changed by the truncation.
    1980             :          */
    1981         128 :         vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
    1982         128 :         vacrelstats->rel_pages = new_rel_pages;
    1983             : 
    1984         128 :         ereport(elevel,
    1985             :                 (errmsg("\"%s\": truncated %u to %u pages",
    1986             :                         RelationGetRelationName(onerel),
    1987             :                         old_rel_pages, new_rel_pages),
    1988             :                  errdetail_internal("%s",
    1989             :                                     pg_rusage_show(&ru0))));
    1990         128 :         old_rel_pages = new_rel_pages;
    1991         130 :     } while (new_rel_pages > vacrelstats->nonempty_pages &&
    1992         128 :              vacrelstats->lock_waiter_detected);
    1993             : }
    1994             : 
    1995             : /*
    1996             :  * Rescan end pages to verify that they are (still) empty of tuples.
    1997             :  *
    1998             :  * Returns number of nondeletable pages (last nonempty page + 1).
    1999             :  */
    2000             : static BlockNumber
    2001         128 : count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
    2002             : {
    2003             :     BlockNumber blkno;
    2004             :     BlockNumber prefetchedUntil;
    2005             :     instr_time  starttime;
    2006             : 
    2007             :     /* Initialize the starttime if we check for conflicting lock requests */
    2008         128 :     INSTR_TIME_SET_CURRENT(starttime);
    2009             : 
    2010             :     /*
    2011             :      * Start checking blocks at what we believe relation end to be and move
    2012             :      * backwards.  (Strange coding of loop control is needed because blkno is
    2013             :      * unsigned.)  To make the scan faster, we prefetch a few blocks at a time
    2014             :      * in forward direction, so that OS-level readahead can kick in.
    2015             :      */
    2016         128 :     blkno = vacrelstats->rel_pages;
    2017             :     StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
    2018             :                      "prefetch size must be power of 2");
    2019         128 :     prefetchedUntil = InvalidBlockNumber;
    2020        1482 :     while (blkno > vacrelstats->nonempty_pages)
    2021             :     {
    2022             :         Buffer      buf;
    2023             :         Page        page;
    2024             :         OffsetNumber offnum,
    2025             :                     maxoff;
    2026             :         bool        hastup;
    2027             : 
    2028             :         /*
    2029             :          * Check if another process requests a lock on our relation. We are
    2030             :          * holding an AccessExclusiveLock here, so they will be waiting. We
    2031             :          * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
    2032             :          * only check if that interval has elapsed once every 32 blocks to
    2033             :          * keep the number of system calls and actual shared lock table
    2034             :          * lookups to a minimum.
    2035             :          */
    2036        1228 :         if ((blkno % 32) == 0)
    2037             :         {
    2038             :             instr_time  currenttime;
    2039             :             instr_time  elapsed;
    2040             : 
    2041          38 :             INSTR_TIME_SET_CURRENT(currenttime);
    2042          38 :             elapsed = currenttime;
    2043          38 :             INSTR_TIME_SUBTRACT(elapsed, starttime);
    2044          38 :             if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
    2045             :                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
    2046             :             {
    2047           0 :                 if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
    2048             :                 {
    2049           0 :                     ereport(elevel,
    2050             :                             (errmsg("\"%s\": suspending truncate due to conflicting lock request",
    2051             :                                     RelationGetRelationName(onerel))));
    2052             : 
    2053           0 :                     vacrelstats->lock_waiter_detected = true;
    2054           0 :                     return blkno;
    2055             :                 }
    2056           0 :                 starttime = currenttime;
    2057             :             }
    2058             :         }
    2059             : 
    2060             :         /*
    2061             :          * We don't insert a vacuum delay point here, because we have an
    2062             :          * exclusive lock on the table which we want to hold for as short a
    2063             :          * time as possible.  We still need to check for interrupts however.
    2064             :          */
    2065        1228 :         CHECK_FOR_INTERRUPTS();
    2066             : 
    2067        1228 :         blkno--;
    2068             : 
    2069             :         /* If we haven't prefetched this lot yet, do so now. */
    2070        1228 :         if (prefetchedUntil > blkno)
    2071             :         {
    2072             :             BlockNumber prefetchStart;
    2073             :             BlockNumber pblkno;
    2074             : 
    2075         162 :             prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
    2076        2128 :             for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
    2077             :             {
    2078        1966 :                 PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
    2079        1966 :                 CHECK_FOR_INTERRUPTS();
    2080             :             }
    2081         162 :             prefetchedUntil = prefetchStart;
    2082             :         }
    2083             : 
    2084        1228 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
    2085             :                                  RBM_NORMAL, vac_strategy);
    2086             : 
    2087             :         /* In this phase we only need shared access to the buffer */
    2088        1228 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
    2089             : 
    2090        1228 :         page = BufferGetPage(buf);
    2091             : 
    2092        1228 :         if (PageIsNew(page) || PageIsEmpty(page))
    2093             :         {
    2094          90 :             UnlockReleaseBuffer(buf);
    2095          90 :             continue;
    2096             :         }
    2097             : 
    2098        1138 :         hastup = false;
    2099        1138 :         maxoff = PageGetMaxOffsetNumber(page);
    2100      130366 :         for (offnum = FirstOffsetNumber;
    2101             :              offnum <= maxoff;
    2102      128090 :              offnum = OffsetNumberNext(offnum))
    2103             :         {
    2104             :             ItemId      itemid;
    2105             : 
    2106      128092 :             itemid = PageGetItemId(page, offnum);
    2107             : 
    2108             :             /*
    2109             :              * Note: any non-unused item should be taken as a reason to keep
    2110             :              * this page.  We formerly thought that DEAD tuples could be
    2111             :              * thrown away, but that's not so, because we'd not have cleaned
    2112             :              * out their index entries.
    2113             :              */
    2114      128092 :             if (ItemIdIsUsed(itemid))
    2115             :             {
    2116           2 :                 hastup = true;
    2117           2 :                 break;          /* can stop scanning */
    2118             :             }
    2119             :         }                       /* scan along page */
    2120             : 
    2121        1138 :         UnlockReleaseBuffer(buf);
    2122             : 
    2123             :         /* Done scanning if we found a tuple here */
    2124        1138 :         if (hastup)
    2125           2 :             return blkno + 1;
    2126             :     }
    2127             : 
    2128             :     /*
    2129             :      * If we fall out of the loop, all the previously-thought-to-be-empty
    2130             :      * pages still are; we need not bother to look at the last known-nonempty
    2131             :      * page.
    2132             :      */
    2133         126 :     return vacrelstats->nonempty_pages;
    2134             : }
    2135             : 
    2136             : /*
    2137             :  * lazy_space_alloc - space allocation decisions for lazy vacuum
    2138             :  *
    2139             :  * See the comments at the head of this file for rationale.
    2140             :  */
    2141             : static void
    2142       41420 : lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
    2143             : {
    2144             :     long        maxtuples;
    2145       83038 :     int         vac_work_mem = IsAutoVacuumWorkerProcess() &&
    2146         198 :     autovacuum_work_mem != -1 ?
    2147       41420 :     autovacuum_work_mem : maintenance_work_mem;
    2148             : 
    2149       41420 :     if (vacrelstats->useindex)
    2150             :     {
    2151       38438 :         maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
    2152       38438 :         maxtuples = Min(maxtuples, INT_MAX);
    2153       38438 :         maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
    2154             : 
    2155             :         /* curious coding here to ensure the multiplication can't overflow */
    2156       38438 :         if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
    2157       38438 :             maxtuples = relblocks * LAZY_ALLOC_TUPLES;
    2158             : 
    2159             :         /* stay sane if small maintenance_work_mem */
    2160       38438 :         maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
    2161             :     }
    2162             :     else
    2163             :     {
    2164        2982 :         maxtuples = MaxHeapTuplesPerPage;
    2165             :     }
    2166             : 
    2167       41420 :     vacrelstats->num_dead_tuples = 0;
    2168       41420 :     vacrelstats->max_dead_tuples = (int) maxtuples;
    2169       41420 :     vacrelstats->dead_tuples = (ItemPointer)
    2170       41420 :         palloc(maxtuples * sizeof(ItemPointerData));
    2171       41420 : }
    2172             : 
    2173             : /*
    2174             :  * lazy_record_dead_tuple - remember one deletable tuple
    2175             :  */
    2176             : static void
    2177     1144982 : lazy_record_dead_tuple(LVRelStats *vacrelstats,
    2178             :                        ItemPointer itemptr)
    2179             : {
    2180             :     /*
    2181             :      * The array shouldn't overflow under normal behavior, but perhaps it
    2182             :      * could if we are given a really small maintenance_work_mem. In that
    2183             :      * case, just forget the last few tuples (we'll get 'em next time).
    2184             :      */
    2185     1144982 :     if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
    2186             :     {
    2187     1144982 :         vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
    2188     1144982 :         vacrelstats->num_dead_tuples++;
    2189     1144982 :         pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
    2190     1144982 :                                      vacrelstats->num_dead_tuples);
    2191             :     }
    2192     1144982 : }
    2193             : 
    2194             : /*
    2195             :  *  lazy_tid_reaped() -- is a particular tid deletable?
    2196             :  *
    2197             :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2198             :  *
    2199             :  *      Assumes dead_tuples array is in sorted order.
    2200             :  */
    2201             : static bool
    2202    11900364 : lazy_tid_reaped(ItemPointer itemptr, void *state)
    2203             : {
    2204    11900364 :     LVRelStats *vacrelstats = (LVRelStats *) state;
    2205             :     ItemPointer res;
    2206             : 
    2207    23800728 :     res = (ItemPointer) bsearch((void *) itemptr,
    2208    11900364 :                                 (void *) vacrelstats->dead_tuples,
    2209    11900364 :                                 vacrelstats->num_dead_tuples,
    2210             :                                 sizeof(ItemPointerData),
    2211             :                                 vac_cmp_itemptr);
    2212             : 
    2213    11900364 :     return (res != NULL);
    2214             : }
    2215             : 
    2216             : /*
    2217             :  * Comparator routines for use with qsort() and bsearch().
    2218             :  */
    2219             : static int
    2220    83137822 : vac_cmp_itemptr(const void *left, const void *right)
    2221             : {
    2222             :     BlockNumber lblk,
    2223             :                 rblk;
    2224             :     OffsetNumber loff,
    2225             :                 roff;
    2226             : 
    2227    83137822 :     lblk = ItemPointerGetBlockNumber((ItemPointer) left);
    2228    83137822 :     rblk = ItemPointerGetBlockNumber((ItemPointer) right);
    2229             : 
    2230    83137822 :     if (lblk < rblk)
    2231    52803494 :         return -1;
    2232    30334328 :     if (lblk > rblk)
    2233    17765908 :         return 1;
    2234             : 
    2235    12568420 :     loff = ItemPointerGetOffsetNumber((ItemPointer) left);
    2236    12568420 :     roff = ItemPointerGetOffsetNumber((ItemPointer) right);
    2237             : 
    2238    12568420 :     if (loff < roff)
    2239     5996930 :         return -1;
    2240     6571490 :     if (loff > roff)
    2241     4952964 :         return 1;
    2242             : 
    2243     1618526 :     return 0;
    2244             : }
    2245             : 
    2246             : /*
    2247             :  * Check if every tuple in the given page is visible to all current and future
    2248             :  * transactions. Also return the visibility_cutoff_xid which is the highest
    2249             :  * xmin amongst the visible tuples.  Set *all_frozen to true if every tuple
    2250             :  * on this page is frozen.
    2251             :  */
    2252             : static bool
    2253       22064 : heap_page_is_all_visible(Relation rel, Buffer buf,
    2254             :                          TransactionId *visibility_cutoff_xid,
    2255             :                          bool *all_frozen)
    2256             : {
    2257       22064 :     Page        page = BufferGetPage(buf);
    2258       22064 :     BlockNumber blockno = BufferGetBlockNumber(buf);
    2259             :     OffsetNumber offnum,
    2260             :                 maxoff;
    2261       22064 :     bool        all_visible = true;
    2262             : 
    2263       22064 :     *visibility_cutoff_xid = InvalidTransactionId;
    2264       22064 :     *all_frozen = true;
    2265             : 
    2266             :     /*
    2267             :      * This is a stripped down version of the line pointer scan in
    2268             :      * lazy_scan_heap(). So if you change anything here, also check that code.
    2269             :      */
    2270       22064 :     maxoff = PageGetMaxOffsetNumber(page);
    2271     1689848 :     for (offnum = FirstOffsetNumber;
    2272     1646336 :          offnum <= maxoff && all_visible;
    2273     1645720 :          offnum = OffsetNumberNext(offnum))
    2274             :     {
    2275             :         ItemId      itemid;
    2276             :         HeapTupleData tuple;
    2277             : 
    2278     1645720 :         itemid = PageGetItemId(page, offnum);
    2279             : 
    2280             :         /* Unused or redirect line pointers are of no interest */
    2281     1645720 :         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
    2282     1245996 :             continue;
    2283             : 
    2284      399724 :         ItemPointerSet(&(tuple.t_self), blockno, offnum);
    2285             : 
    2286             :         /*
    2287             :          * Dead line pointers can have index pointers pointing to them. So
    2288             :          * they can't be treated as visible
    2289             :          */
    2290      399724 :         if (ItemIdIsDead(itemid))
    2291             :         {
    2292           0 :             all_visible = false;
    2293           0 :             *all_frozen = false;
    2294           0 :             break;
    2295             :         }
    2296             : 
    2297             :         Assert(ItemIdIsNormal(itemid));
    2298             : 
    2299      399724 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2300      399724 :         tuple.t_len = ItemIdGetLength(itemid);
    2301      399724 :         tuple.t_tableOid = RelationGetRelid(rel);
    2302             : 
    2303      399724 :         switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
    2304             :         {
    2305             :             case HEAPTUPLE_LIVE:
    2306             :                 {
    2307             :                     TransactionId xmin;
    2308             : 
    2309             :                     /* Check comments in lazy_scan_heap. */
    2310      399212 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    2311             :                     {
    2312           0 :                         all_visible = false;
    2313           0 :                         *all_frozen = false;
    2314           0 :                         break;
    2315             :                     }
    2316             : 
    2317             :                     /*
    2318             :                      * The inserter definitely committed. But is it old enough
    2319             :                      * that everyone sees it as committed?
    2320             :                      */
    2321      399212 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    2322      399212 :                     if (!TransactionIdPrecedes(xmin, OldestXmin))
    2323             :                     {
    2324         104 :                         all_visible = false;
    2325         104 :                         *all_frozen = false;
    2326         104 :                         break;
    2327             :                     }
    2328             : 
    2329             :                     /* Track newest xmin on page. */
    2330      399108 :                     if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
    2331       31446 :                         *visibility_cutoff_xid = xmin;
    2332             : 
    2333             :                     /* Check whether this tuple is already frozen or not */
    2334      693970 :                     if (all_visible && *all_frozen &&
    2335      294862 :                         heap_tuple_needs_eventual_freeze(tuple.t_data))
    2336        3952 :                         *all_frozen = false;
    2337             :                 }
    2338      399108 :                 break;
    2339             : 
    2340             :             case HEAPTUPLE_DEAD:
    2341             :             case HEAPTUPLE_RECENTLY_DEAD:
    2342             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    2343             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    2344             :                 {
    2345         512 :                     all_visible = false;
    2346         512 :                     *all_frozen = false;
    2347         512 :                     break;
    2348             :                 }
    2349             :             default:
    2350           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    2351             :                 break;
    2352             :         }
    2353             :     }                           /* scan along page */
    2354             : 
    2355       22064 :     return all_visible;
    2356             : }

Generated by: LCOV version 1.13