LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_handler.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 740 798 92.7 %
Date: 2025-04-01 15:15:16 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heapam_handler.c
       4             :  *    heap table access method code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/heapam_handler.c
      12             :  *
      13             :  *
      14             :  * NOTES
      15             :  *    This files wires up the lower level heapam.c et al routines with the
      16             :  *    tableam abstraction.
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/genam.h"
      23             : #include "access/heapam.h"
      24             : #include "access/heaptoast.h"
      25             : #include "access/multixact.h"
      26             : #include "access/rewriteheap.h"
      27             : #include "access/syncscan.h"
      28             : #include "access/tableam.h"
      29             : #include "access/tsmapi.h"
      30             : #include "access/visibilitymap.h"
      31             : #include "access/xact.h"
      32             : #include "catalog/catalog.h"
      33             : #include "catalog/index.h"
      34             : #include "catalog/storage.h"
      35             : #include "catalog/storage_xlog.h"
      36             : #include "commands/progress.h"
      37             : #include "executor/executor.h"
      38             : #include "miscadmin.h"
      39             : #include "pgstat.h"
      40             : #include "storage/bufmgr.h"
      41             : #include "storage/bufpage.h"
      42             : #include "storage/lmgr.h"
      43             : #include "storage/predicate.h"
      44             : #include "storage/procarray.h"
      45             : #include "storage/smgr.h"
      46             : #include "utils/builtins.h"
      47             : #include "utils/rel.h"
      48             : 
      49             : static void reform_and_rewrite_tuple(HeapTuple tuple,
      50             :                                      Relation OldHeap, Relation NewHeap,
      51             :                                      Datum *values, bool *isnull, RewriteState rwstate);
      52             : 
      53             : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
      54             :                                    HeapTuple tuple,
      55             :                                    OffsetNumber tupoffset);
      56             : 
      57             : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
      58             : 
      59             : static bool BitmapHeapScanNextBlock(TableScanDesc scan,
      60             :                                     bool *recheck,
      61             :                                     uint64 *lossy_pages, uint64 *exact_pages);
      62             : 
      63             : 
      64             : /* ------------------------------------------------------------------------
      65             :  * Slot related callbacks for heap AM
      66             :  * ------------------------------------------------------------------------
      67             :  */
      68             : 
      69             : static const TupleTableSlotOps *
      70    26308012 : heapam_slot_callbacks(Relation relation)
      71             : {
      72    26308012 :     return &TTSOpsBufferHeapTuple;
      73             : }
      74             : 
      75             : 
      76             : /* ------------------------------------------------------------------------
      77             :  * Index Scan Callbacks for heap AM
      78             :  * ------------------------------------------------------------------------
      79             :  */
      80             : 
      81             : static IndexFetchTableData *
      82    25190238 : heapam_index_fetch_begin(Relation rel)
      83             : {
      84    25190238 :     IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
      85             : 
      86    25190238 :     hscan->xs_base.rel = rel;
      87    25190238 :     hscan->xs_cbuf = InvalidBuffer;
      88             : 
      89    25190238 :     return &hscan->xs_base;
      90             : }
      91             : 
      92             : static void
      93    46156924 : heapam_index_fetch_reset(IndexFetchTableData *scan)
      94             : {
      95    46156924 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
      96             : 
      97    46156924 :     if (BufferIsValid(hscan->xs_cbuf))
      98             :     {
      99    21472070 :         ReleaseBuffer(hscan->xs_cbuf);
     100    21472070 :         hscan->xs_cbuf = InvalidBuffer;
     101             :     }
     102    46156924 : }
     103             : 
     104             : static void
     105    25188552 : heapam_index_fetch_end(IndexFetchTableData *scan)
     106             : {
     107    25188552 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     108             : 
     109    25188552 :     heapam_index_fetch_reset(scan);
     110             : 
     111    25188552 :     pfree(hscan);
     112    25188552 : }
     113             : 
     114             : static bool
     115    35839764 : heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
     116             :                          ItemPointer tid,
     117             :                          Snapshot snapshot,
     118             :                          TupleTableSlot *slot,
     119             :                          bool *call_again, bool *all_dead)
     120             : {
     121    35839764 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     122    35839764 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     123             :     bool        got_heap_tuple;
     124             : 
     125             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     126             : 
     127             :     /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
     128    35839764 :     if (!*call_again)
     129             :     {
     130             :         /* Switch to correct buffer if we don't have it already */
     131    35681626 :         Buffer      prev_buf = hscan->xs_cbuf;
     132             : 
     133    35681626 :         hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
     134             :                                               hscan->xs_base.rel,
     135             :                                               ItemPointerGetBlockNumber(tid));
     136             : 
     137             :         /*
     138             :          * Prune page, but only if we weren't already on this page
     139             :          */
     140    35681620 :         if (prev_buf != hscan->xs_cbuf)
     141    24696078 :             heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
     142             :     }
     143             : 
     144             :     /* Obtain share-lock on the buffer so we can examine visibility */
     145    35839758 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
     146    35839758 :     got_heap_tuple = heap_hot_search_buffer(tid,
     147             :                                             hscan->xs_base.rel,
     148             :                                             hscan->xs_cbuf,
     149             :                                             snapshot,
     150             :                                             &bslot->base.tupdata,
     151             :                                             all_dead,
     152    35839758 :                                             !*call_again);
     153    35839754 :     bslot->base.tupdata.t_self = *tid;
     154    35839754 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
     155             : 
     156    35839754 :     if (got_heap_tuple)
     157             :     {
     158             :         /*
     159             :          * Only in a non-MVCC snapshot can more than one member of the HOT
     160             :          * chain be visible.
     161             :          */
     162    23280438 :         *call_again = !IsMVCCSnapshot(snapshot);
     163             : 
     164    23280438 :         slot->tts_tableOid = RelationGetRelid(scan->rel);
     165    23280438 :         ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
     166             :     }
     167             :     else
     168             :     {
     169             :         /* We've reached the end of the HOT chain. */
     170    12559316 :         *call_again = false;
     171             :     }
     172             : 
     173    35839754 :     return got_heap_tuple;
     174             : }
     175             : 
     176             : 
     177             : /* ------------------------------------------------------------------------
     178             :  * Callbacks for non-modifying operations on individual tuples for heap AM
     179             :  * ------------------------------------------------------------------------
     180             :  */
     181             : 
     182             : static bool
     183      352828 : heapam_fetch_row_version(Relation relation,
     184             :                          ItemPointer tid,
     185             :                          Snapshot snapshot,
     186             :                          TupleTableSlot *slot)
     187             : {
     188      352828 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     189             :     Buffer      buffer;
     190             : 
     191             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     192             : 
     193      352828 :     bslot->base.tupdata.t_self = *tid;
     194      352828 :     if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false))
     195             :     {
     196             :         /* store in slot, transferring existing pin */
     197      352140 :         ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
     198      352140 :         slot->tts_tableOid = RelationGetRelid(relation);
     199             : 
     200      352140 :         return true;
     201             :     }
     202             : 
     203         688 :     return false;
     204             : }
     205             : 
     206             : static bool
     207         706 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
     208             : {
     209         706 :     HeapScanDesc hscan = (HeapScanDesc) scan;
     210             : 
     211        1394 :     return ItemPointerIsValid(tid) &&
     212         688 :         ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
     213             : }
     214             : 
     215             : static bool
     216      227058 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
     217             :                                 Snapshot snapshot)
     218             : {
     219      227058 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     220             :     bool        res;
     221             : 
     222             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     223             :     Assert(BufferIsValid(bslot->buffer));
     224             : 
     225             :     /*
     226             :      * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
     227             :      * Caller should be holding pin, but not lock.
     228             :      */
     229      227058 :     LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
     230      227058 :     res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
     231             :                                        bslot->buffer);
     232      227058 :     LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
     233             : 
     234      227058 :     return res;
     235             : }
     236             : 
     237             : 
     238             : /* ----------------------------------------------------------------------------
     239             :  *  Functions for manipulations of physical tuples for heap AM.
     240             :  * ----------------------------------------------------------------------------
     241             :  */
     242             : 
     243             : static void
     244    14155098 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
     245             :                     int options, BulkInsertState bistate)
     246             : {
     247    14155098 :     bool        shouldFree = true;
     248    14155098 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     249             : 
     250             :     /* Update the tuple with table oid */
     251    14155098 :     slot->tts_tableOid = RelationGetRelid(relation);
     252    14155098 :     tuple->t_tableOid = slot->tts_tableOid;
     253             : 
     254             :     /* Perform the insertion, and copy the resulting ItemPointer */
     255    14155098 :     heap_insert(relation, tuple, cid, options, bistate);
     256    14155064 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     257             : 
     258    14155064 :     if (shouldFree)
     259     2938238 :         pfree(tuple);
     260    14155064 : }
     261             : 
     262             : static void
     263        4128 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
     264             :                                 CommandId cid, int options,
     265             :                                 BulkInsertState bistate, uint32 specToken)
     266             : {
     267        4128 :     bool        shouldFree = true;
     268        4128 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     269             : 
     270             :     /* Update the tuple with table oid */
     271        4128 :     slot->tts_tableOid = RelationGetRelid(relation);
     272        4128 :     tuple->t_tableOid = slot->tts_tableOid;
     273             : 
     274        4128 :     HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
     275        4128 :     options |= HEAP_INSERT_SPECULATIVE;
     276             : 
     277             :     /* Perform the insertion, and copy the resulting ItemPointer */
     278        4128 :     heap_insert(relation, tuple, cid, options, bistate);
     279        4128 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     280             : 
     281        4128 :     if (shouldFree)
     282          60 :         pfree(tuple);
     283        4128 : }
     284             : 
     285             : static void
     286        4122 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
     287             :                                   uint32 specToken, bool succeeded)
     288             : {
     289        4122 :     bool        shouldFree = true;
     290        4122 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     291             : 
     292             :     /* adjust the tuple's state accordingly */
     293        4122 :     if (succeeded)
     294        4112 :         heap_finish_speculative(relation, &slot->tts_tid);
     295             :     else
     296          10 :         heap_abort_speculative(relation, &slot->tts_tid);
     297             : 
     298        4122 :     if (shouldFree)
     299          60 :         pfree(tuple);
     300        4122 : }
     301             : 
     302             : static TM_Result
     303     1726496 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
     304             :                     Snapshot snapshot, Snapshot crosscheck, bool wait,
     305             :                     TM_FailureData *tmfd, bool changingPart)
     306             : {
     307             :     /*
     308             :      * Currently Deleting of index tuples are handled at vacuum, in case if
     309             :      * the storage itself is cleaning the dead tuples by itself, it is the
     310             :      * time to call the index tuple deletion also.
     311             :      */
     312     1726496 :     return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
     313             : }
     314             : 
     315             : 
     316             : static TM_Result
     317      385484 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
     318             :                     CommandId cid, Snapshot snapshot, Snapshot crosscheck,
     319             :                     bool wait, TM_FailureData *tmfd,
     320             :                     LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
     321             : {
     322      385484 :     bool        shouldFree = true;
     323      385484 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     324             :     TM_Result   result;
     325             : 
     326             :     /* Update the tuple with table oid */
     327      385484 :     slot->tts_tableOid = RelationGetRelid(relation);
     328      385484 :     tuple->t_tableOid = slot->tts_tableOid;
     329             : 
     330      385484 :     result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
     331             :                          tmfd, lockmode, update_indexes);
     332      385460 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     333             : 
     334             :     /*
     335             :      * Decide whether new index entries are needed for the tuple
     336             :      *
     337             :      * Note: heap_update returns the tid (location) of the new tuple in the
     338             :      * t_self field.
     339             :      *
     340             :      * If the update is not HOT, we must update all indexes. If the update is
     341             :      * HOT, it could be that we updated summarized columns, so we either
     342             :      * update only summarized indexes, or none at all.
     343             :      */
     344      385460 :     if (result != TM_Ok)
     345             :     {
     346             :         Assert(*update_indexes == TU_None);
     347         304 :         *update_indexes = TU_None;
     348             :     }
     349      385156 :     else if (!HeapTupleIsHeapOnly(tuple))
     350             :         Assert(*update_indexes == TU_All);
     351             :     else
     352             :         Assert((*update_indexes == TU_Summarizing) ||
     353             :                (*update_indexes == TU_None));
     354             : 
     355      385460 :     if (shouldFree)
     356       63882 :         pfree(tuple);
     357             : 
     358      385460 :     return result;
     359             : }
     360             : 
     361             : static TM_Result
     362      169384 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
     363             :                   TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
     364             :                   LockWaitPolicy wait_policy, uint8 flags,
     365             :                   TM_FailureData *tmfd)
     366             : {
     367      169384 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     368             :     TM_Result   result;
     369             :     Buffer      buffer;
     370      169384 :     HeapTuple   tuple = &bslot->base.tupdata;
     371             :     bool        follow_updates;
     372             : 
     373      169384 :     follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
     374      169384 :     tmfd->traversed = false;
     375             : 
     376             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     377             : 
     378      169690 : tuple_lock_retry:
     379      169690 :     tuple->t_self = *tid;
     380      169690 :     result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
     381             :                              follow_updates, &buffer, tmfd);
     382             : 
     383      169672 :     if (result == TM_Updated &&
     384         374 :         (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
     385             :     {
     386             :         /* Should not encounter speculative tuple on recheck */
     387             :         Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
     388             : 
     389         348 :         ReleaseBuffer(buffer);
     390             : 
     391         348 :         if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
     392             :         {
     393             :             SnapshotData SnapshotDirty;
     394             :             TransactionId priorXmax;
     395             : 
     396             :             /* it was updated, so look at the updated version */
     397         348 :             *tid = tmfd->ctid;
     398             :             /* updated row should have xmin matching this xmax */
     399         348 :             priorXmax = tmfd->xmax;
     400             : 
     401             :             /* signal that a tuple later in the chain is getting locked */
     402         348 :             tmfd->traversed = true;
     403             : 
     404             :             /*
     405             :              * fetch target tuple
     406             :              *
     407             :              * Loop here to deal with updated or busy tuples
     408             :              */
     409         348 :             InitDirtySnapshot(SnapshotDirty);
     410             :             for (;;)
     411             :             {
     412         400 :                 if (ItemPointerIndicatesMovedPartitions(tid))
     413          18 :                     ereport(ERROR,
     414             :                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
     415             :                              errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
     416             : 
     417         382 :                 tuple->t_self = *tid;
     418         382 :                 if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer, true))
     419             :                 {
     420             :                     /*
     421             :                      * If xmin isn't what we're expecting, the slot must have
     422             :                      * been recycled and reused for an unrelated tuple.  This
     423             :                      * implies that the latest version of the row was deleted,
     424             :                      * so we need do nothing.  (Should be safe to examine xmin
     425             :                      * without getting buffer's content lock.  We assume
     426             :                      * reading a TransactionId to be atomic, and Xmin never
     427             :                      * changes in an existing tuple, except to invalid or
     428             :                      * frozen, and neither of those can match priorXmax.)
     429             :                      */
     430         324 :                     if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     431             :                                              priorXmax))
     432             :                     {
     433           0 :                         ReleaseBuffer(buffer);
     434          22 :                         return TM_Deleted;
     435             :                     }
     436             : 
     437             :                     /* otherwise xmin should not be dirty... */
     438         324 :                     if (TransactionIdIsValid(SnapshotDirty.xmin))
     439           0 :                         ereport(ERROR,
     440             :                                 (errcode(ERRCODE_DATA_CORRUPTED),
     441             :                                  errmsg_internal("t_xmin %u is uncommitted in tuple (%u,%u) to be updated in table \"%s\"",
     442             :                                                  SnapshotDirty.xmin,
     443             :                                                  ItemPointerGetBlockNumber(&tuple->t_self),
     444             :                                                  ItemPointerGetOffsetNumber(&tuple->t_self),
     445             :                                                  RelationGetRelationName(relation))));
     446             : 
     447             :                     /*
     448             :                      * If tuple is being updated by other transaction then we
     449             :                      * have to wait for its commit/abort, or die trying.
     450             :                      */
     451         324 :                     if (TransactionIdIsValid(SnapshotDirty.xmax))
     452             :                     {
     453           4 :                         ReleaseBuffer(buffer);
     454           4 :                         switch (wait_policy)
     455             :                         {
     456           0 :                             case LockWaitBlock:
     457           0 :                                 XactLockTableWait(SnapshotDirty.xmax,
     458             :                                                   relation, &tuple->t_self,
     459             :                                                   XLTW_FetchUpdated);
     460           0 :                                 break;
     461           2 :                             case LockWaitSkip:
     462           2 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false))
     463             :                                     /* skip instead of waiting */
     464           2 :                                     return TM_WouldBlock;
     465           0 :                                 break;
     466           2 :                             case LockWaitError:
     467           2 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
     468           2 :                                     ereport(ERROR,
     469             :                                             (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     470             :                                              errmsg("could not obtain lock on row in relation \"%s\"",
     471             :                                                     RelationGetRelationName(relation))));
     472           0 :                                 break;
     473             :                         }
     474           0 :                         continue;   /* loop back to repeat heap_fetch */
     475             :                     }
     476             : 
     477             :                     /*
     478             :                      * If tuple was inserted by our own transaction, we have
     479             :                      * to check cmin against cid: cmin >= current CID means
     480             :                      * our command cannot see the tuple, so we should ignore
     481             :                      * it. Otherwise heap_lock_tuple() will throw an error,
     482             :                      * and so would any later attempt to update or delete the
     483             :                      * tuple.  (We need not check cmax because
     484             :                      * HeapTupleSatisfiesDirty will consider a tuple deleted
     485             :                      * by our transaction dead, regardless of cmax.)  We just
     486             :                      * checked that priorXmax == xmin, so we can test that
     487             :                      * variable instead of doing HeapTupleHeaderGetXmin again.
     488             :                      */
     489         334 :                     if (TransactionIdIsCurrentTransactionId(priorXmax) &&
     490          14 :                         HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
     491             :                     {
     492          14 :                         tmfd->xmax = priorXmax;
     493             : 
     494             :                         /*
     495             :                          * Cmin is the problematic value, so store that. See
     496             :                          * above.
     497             :                          */
     498          14 :                         tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
     499          14 :                         ReleaseBuffer(buffer);
     500          14 :                         return TM_SelfModified;
     501             :                     }
     502             : 
     503             :                     /*
     504             :                      * This is a live tuple, so try to lock it again.
     505             :                      */
     506         306 :                     ReleaseBuffer(buffer);
     507         306 :                     goto tuple_lock_retry;
     508             :                 }
     509             : 
     510             :                 /*
     511             :                  * If the referenced slot was actually empty, the latest
     512             :                  * version of the row must have been deleted, so we need do
     513             :                  * nothing.
     514             :                  */
     515          58 :                 if (tuple->t_data == NULL)
     516             :                 {
     517             :                     Assert(!BufferIsValid(buffer));
     518           0 :                     return TM_Deleted;
     519             :                 }
     520             : 
     521             :                 /*
     522             :                  * As above, if xmin isn't what we're expecting, do nothing.
     523             :                  */
     524          58 :                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     525             :                                          priorXmax))
     526             :                 {
     527           0 :                     ReleaseBuffer(buffer);
     528           0 :                     return TM_Deleted;
     529             :                 }
     530             : 
     531             :                 /*
     532             :                  * If we get here, the tuple was found but failed
     533             :                  * SnapshotDirty. Assuming the xmin is either a committed xact
     534             :                  * or our own xact (as it certainly should be if we're trying
     535             :                  * to modify the tuple), this must mean that the row was
     536             :                  * updated or deleted by either a committed xact or our own
     537             :                  * xact.  If it was deleted, we can ignore it; if it was
     538             :                  * updated then chain up to the next version and repeat the
     539             :                  * whole process.
     540             :                  *
     541             :                  * As above, it should be safe to examine xmax and t_ctid
     542             :                  * without the buffer content lock, because they can't be
     543             :                  * changing.  We'd better hold a buffer pin though.
     544             :                  */
     545          58 :                 if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
     546             :                 {
     547             :                     /* deleted, so forget about it */
     548           6 :                     ReleaseBuffer(buffer);
     549           6 :                     return TM_Deleted;
     550             :                 }
     551             : 
     552             :                 /* updated, so look at the updated row */
     553          52 :                 *tid = tuple->t_data->t_ctid;
     554             :                 /* updated row should have xmin matching this xmax */
     555          52 :                 priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
     556          52 :                 ReleaseBuffer(buffer);
     557             :                 /* loop back to fetch next in chain */
     558             :             }
     559             :         }
     560             :         else
     561             :         {
     562             :             /* tuple was deleted, so give up */
     563           0 :             return TM_Deleted;
     564             :         }
     565             :     }
     566             : 
     567      169324 :     slot->tts_tableOid = RelationGetRelid(relation);
     568      169324 :     tuple->t_tableOid = slot->tts_tableOid;
     569             : 
     570             :     /* store in slot, transferring existing pin */
     571      169324 :     ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
     572             : 
     573      169324 :     return result;
     574             : }
     575             : 
     576             : 
     577             : /* ------------------------------------------------------------------------
     578             :  * DDL related callbacks for heap AM.
     579             :  * ------------------------------------------------------------------------
     580             :  */
     581             : 
     582             : static void
     583       62218 : heapam_relation_set_new_filelocator(Relation rel,
     584             :                                     const RelFileLocator *newrlocator,
     585             :                                     char persistence,
     586             :                                     TransactionId *freezeXid,
     587             :                                     MultiXactId *minmulti)
     588             : {
     589             :     SMgrRelation srel;
     590             : 
     591             :     /*
     592             :      * Initialize to the minimum XID that could put tuples in the table. We
     593             :      * know that no xacts older than RecentXmin are still running, so that
     594             :      * will do.
     595             :      */
     596       62218 :     *freezeXid = RecentXmin;
     597             : 
     598             :     /*
     599             :      * Similarly, initialize the minimum Multixact to the first value that
     600             :      * could possibly be stored in tuples in the table.  Running transactions
     601             :      * could reuse values from their local cache, so we are careful to
     602             :      * consider all currently running multis.
     603             :      *
     604             :      * XXX this could be refined further, but is it worth the hassle?
     605             :      */
     606       62218 :     *minmulti = GetOldestMultiXactId();
     607             : 
     608       62218 :     srel = RelationCreateStorage(*newrlocator, persistence, true);
     609             : 
     610             :     /*
     611             :      * If required, set up an init fork for an unlogged table so that it can
     612             :      * be correctly reinitialized on restart.
     613             :      */
     614       62218 :     if (persistence == RELPERSISTENCE_UNLOGGED)
     615             :     {
     616             :         Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
     617             :                rel->rd_rel->relkind == RELKIND_TOASTVALUE);
     618         234 :         smgrcreate(srel, INIT_FORKNUM, false);
     619         234 :         log_smgrcreate(newrlocator, INIT_FORKNUM);
     620             :     }
     621             : 
     622       62218 :     smgrclose(srel);
     623       62218 : }
     624             : 
     625             : static void
     626         576 : heapam_relation_nontransactional_truncate(Relation rel)
     627             : {
     628         576 :     RelationTruncate(rel, 0);
     629         576 : }
     630             : 
     631             : static void
     632          98 : heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
     633             : {
     634             :     SMgrRelation dstrel;
     635             : 
     636             :     /*
     637             :      * Since we copy the file directly without looking at the shared buffers,
     638             :      * we'd better first flush out any pages of the source relation that are
     639             :      * in shared buffers.  We assume no new changes will be made while we are
     640             :      * holding exclusive lock on the rel.
     641             :      */
     642          98 :     FlushRelationBuffers(rel);
     643             : 
     644             :     /*
     645             :      * Create and copy all forks of the relation, and schedule unlinking of
     646             :      * old physical files.
     647             :      *
     648             :      * NOTE: any conflict in relfilenumber value will be caught in
     649             :      * RelationCreateStorage().
     650             :      */
     651          98 :     dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true);
     652             : 
     653             :     /* copy main fork */
     654          98 :     RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM,
     655          98 :                         rel->rd_rel->relpersistence);
     656             : 
     657             :     /* copy those extra forks that exist */
     658         392 :     for (ForkNumber forkNum = MAIN_FORKNUM + 1;
     659         294 :          forkNum <= MAX_FORKNUM; forkNum++)
     660             :     {
     661         294 :         if (smgrexists(RelationGetSmgr(rel), forkNum))
     662             :         {
     663          18 :             smgrcreate(dstrel, forkNum, false);
     664             : 
     665             :             /*
     666             :              * WAL log creation if the relation is persistent, or this is the
     667             :              * init fork of an unlogged relation.
     668             :              */
     669          18 :             if (RelationIsPermanent(rel) ||
     670           6 :                 (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
     671             :                  forkNum == INIT_FORKNUM))
     672          12 :                 log_smgrcreate(newrlocator, forkNum);
     673          18 :             RelationCopyStorage(RelationGetSmgr(rel), dstrel, forkNum,
     674          18 :                                 rel->rd_rel->relpersistence);
     675             :         }
     676             :     }
     677             : 
     678             : 
     679             :     /* drop old relation, and close new one */
     680          98 :     RelationDropStorage(rel);
     681          98 :     smgrclose(dstrel);
     682          98 : }
     683             : 
     684             : static void
     685         562 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
     686             :                                  Relation OldIndex, bool use_sort,
     687             :                                  TransactionId OldestXmin,
     688             :                                  TransactionId *xid_cutoff,
     689             :                                  MultiXactId *multi_cutoff,
     690             :                                  double *num_tuples,
     691             :                                  double *tups_vacuumed,
     692             :                                  double *tups_recently_dead)
     693             : {
     694             :     RewriteState rwstate;
     695             :     IndexScanDesc indexScan;
     696             :     TableScanDesc tableScan;
     697             :     HeapScanDesc heapScan;
     698             :     bool        is_system_catalog;
     699             :     Tuplesortstate *tuplesort;
     700         562 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
     701         562 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
     702             :     TupleTableSlot *slot;
     703             :     int         natts;
     704             :     Datum      *values;
     705             :     bool       *isnull;
     706             :     BufferHeapTupleTableSlot *hslot;
     707         562 :     BlockNumber prev_cblock = InvalidBlockNumber;
     708             : 
     709             :     /* Remember if it's a system catalog */
     710         562 :     is_system_catalog = IsSystemRelation(OldHeap);
     711             : 
     712             :     /*
     713             :      * Valid smgr_targblock implies something already wrote to the relation.
     714             :      * This may be harmless, but this function hasn't planned for it.
     715             :      */
     716             :     Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
     717             : 
     718             :     /* Preallocate values/isnull arrays */
     719         562 :     natts = newTupDesc->natts;
     720         562 :     values = (Datum *) palloc(natts * sizeof(Datum));
     721         562 :     isnull = (bool *) palloc(natts * sizeof(bool));
     722             : 
     723             :     /* Initialize the rewrite operation */
     724         562 :     rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
     725             :                                  *multi_cutoff);
     726             : 
     727             : 
     728             :     /* Set up sorting if wanted */
     729         562 :     if (use_sort)
     730         110 :         tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
     731             :                                             maintenance_work_mem,
     732             :                                             NULL, TUPLESORT_NONE);
     733             :     else
     734         452 :         tuplesort = NULL;
     735             : 
     736             :     /*
     737             :      * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
     738             :      * that still need to be copied, we scan with SnapshotAny and use
     739             :      * HeapTupleSatisfiesVacuum for the visibility test.
     740             :      */
     741         562 :     if (OldIndex != NULL && !use_sort)
     742          78 :     {
     743          78 :         const int   ci_index[] = {
     744             :             PROGRESS_CLUSTER_PHASE,
     745             :             PROGRESS_CLUSTER_INDEX_RELID
     746             :         };
     747             :         int64       ci_val[2];
     748             : 
     749             :         /* Set phase and OIDOldIndex to columns */
     750          78 :         ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
     751          78 :         ci_val[1] = RelationGetRelid(OldIndex);
     752          78 :         pgstat_progress_update_multi_param(2, ci_index, ci_val);
     753             : 
     754          78 :         tableScan = NULL;
     755          78 :         heapScan = NULL;
     756          78 :         indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, NULL, 0, 0);
     757          78 :         index_rescan(indexScan, NULL, 0, NULL, 0);
     758             :     }
     759             :     else
     760             :     {
     761             :         /* In scan-and-sort mode and also VACUUM FULL, set phase */
     762         484 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     763             :                                      PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
     764             : 
     765         484 :         tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
     766         484 :         heapScan = (HeapScanDesc) tableScan;
     767         484 :         indexScan = NULL;
     768             : 
     769             :         /* Set total heap blocks */
     770         484 :         pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
     771         484 :                                      heapScan->rs_nblocks);
     772             :     }
     773             : 
     774         562 :     slot = table_slot_create(OldHeap, NULL);
     775         562 :     hslot = (BufferHeapTupleTableSlot *) slot;
     776             : 
     777             :     /*
     778             :      * Scan through the OldHeap, either in OldIndex order or sequentially;
     779             :      * copy each tuple into the NewHeap, or transiently to the tuplesort
     780             :      * module.  Note that we don't bother sorting dead tuples (they won't get
     781             :      * to the new table anyway).
     782             :      */
     783             :     for (;;)
     784      780586 :     {
     785             :         HeapTuple   tuple;
     786             :         Buffer      buf;
     787             :         bool        isdead;
     788             : 
     789      781148 :         CHECK_FOR_INTERRUPTS();
     790             : 
     791      781148 :         if (indexScan != NULL)
     792             :         {
     793         186 :             if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
     794          78 :                 break;
     795             : 
     796             :             /* Since we used no scan keys, should never need to recheck */
     797         108 :             if (indexScan->xs_recheck)
     798           0 :                 elog(ERROR, "CLUSTER does not support lossy index conditions");
     799             :         }
     800             :         else
     801             :         {
     802      780962 :             if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
     803             :             {
     804             :                 /*
     805             :                  * If the last pages of the scan were empty, we would go to
     806             :                  * the next phase while heap_blks_scanned != heap_blks_total.
     807             :                  * Instead, to ensure that heap_blks_scanned is equivalent to
     808             :                  * heap_blks_total after the table scan phase, this parameter
     809             :                  * is manually updated to the correct value when the table
     810             :                  * scan finishes.
     811             :                  */
     812         484 :                 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
     813         484 :                                              heapScan->rs_nblocks);
     814         484 :                 break;
     815             :             }
     816             : 
     817             :             /*
     818             :              * In scan-and-sort mode and also VACUUM FULL, set heap blocks
     819             :              * scanned
     820             :              *
     821             :              * Note that heapScan may start at an offset and wrap around, i.e.
     822             :              * rs_startblock may be >0, and rs_cblock may end with a number
     823             :              * below rs_startblock. To prevent showing this wraparound to the
     824             :              * user, we offset rs_cblock by rs_startblock (modulo rs_nblocks).
     825             :              */
     826      780478 :             if (prev_cblock != heapScan->rs_cblock)
     827             :             {
     828       11208 :                 pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
     829       11208 :                                              (heapScan->rs_cblock +
     830       11208 :                                               heapScan->rs_nblocks -
     831       11208 :                                               heapScan->rs_startblock
     832       11208 :                                               ) % heapScan->rs_nblocks + 1);
     833       11208 :                 prev_cblock = heapScan->rs_cblock;
     834             :             }
     835             :         }
     836             : 
     837      780586 :         tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
     838      780586 :         buf = hslot->buffer;
     839             : 
     840      780586 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
     841             : 
     842      780586 :         switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
     843             :         {
     844       32488 :             case HEAPTUPLE_DEAD:
     845             :                 /* Definitely dead */
     846       32488 :                 isdead = true;
     847       32488 :                 break;
     848       55128 :             case HEAPTUPLE_RECENTLY_DEAD:
     849       55128 :                 *tups_recently_dead += 1;
     850             :                 /* fall through */
     851      747902 :             case HEAPTUPLE_LIVE:
     852             :                 /* Live or recently dead, must copy it */
     853      747902 :                 isdead = false;
     854      747902 :                 break;
     855         150 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     856             : 
     857             :                 /*
     858             :                  * Since we hold exclusive lock on the relation, normally the
     859             :                  * only way to see this is if it was inserted earlier in our
     860             :                  * own transaction.  However, it can happen in system
     861             :                  * catalogs, since we tend to release write lock before commit
     862             :                  * there.  Give a warning if neither case applies; but in any
     863             :                  * case we had better copy it.
     864             :                  */
     865         150 :                 if (!is_system_catalog &&
     866          20 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
     867           0 :                     elog(WARNING, "concurrent insert in progress within table \"%s\"",
     868             :                          RelationGetRelationName(OldHeap));
     869             :                 /* treat as live */
     870         150 :                 isdead = false;
     871         150 :                 break;
     872          46 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     873             : 
     874             :                 /*
     875             :                  * Similar situation to INSERT_IN_PROGRESS case.
     876             :                  */
     877          46 :                 if (!is_system_catalog &&
     878          30 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
     879           0 :                     elog(WARNING, "concurrent delete in progress within table \"%s\"",
     880             :                          RelationGetRelationName(OldHeap));
     881             :                 /* treat as recently dead */
     882          46 :                 *tups_recently_dead += 1;
     883          46 :                 isdead = false;
     884          46 :                 break;
     885           0 :             default:
     886           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     887             :                 isdead = false; /* keep compiler quiet */
     888             :                 break;
     889             :         }
     890             : 
     891      780586 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     892             : 
     893      780586 :         if (isdead)
     894             :         {
     895       32488 :             *tups_vacuumed += 1;
     896             :             /* heap rewrite module still needs to see it... */
     897       32488 :             if (rewrite_heap_dead_tuple(rwstate, tuple))
     898             :             {
     899             :                 /* A previous recently-dead tuple is now known dead */
     900           0 :                 *tups_vacuumed += 1;
     901           0 :                 *tups_recently_dead -= 1;
     902             :             }
     903       32488 :             continue;
     904             :         }
     905             : 
     906      748098 :         *num_tuples += 1;
     907      748098 :         if (tuplesort != NULL)
     908             :         {
     909      547380 :             tuplesort_putheaptuple(tuplesort, tuple);
     910             : 
     911             :             /*
     912             :              * In scan-and-sort mode, report increase in number of tuples
     913             :              * scanned
     914             :              */
     915      547380 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     916      547380 :                                          *num_tuples);
     917             :         }
     918             :         else
     919             :         {
     920      200718 :             const int   ct_index[] = {
     921             :                 PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     922             :                 PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
     923             :             };
     924             :             int64       ct_val[2];
     925             : 
     926      200718 :             reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
     927             :                                      values, isnull, rwstate);
     928             : 
     929             :             /*
     930             :              * In indexscan mode and also VACUUM FULL, report increase in
     931             :              * number of tuples scanned and written
     932             :              */
     933      200718 :             ct_val[0] = *num_tuples;
     934      200718 :             ct_val[1] = *num_tuples;
     935      200718 :             pgstat_progress_update_multi_param(2, ct_index, ct_val);
     936             :         }
     937             :     }
     938             : 
     939         562 :     if (indexScan != NULL)
     940          78 :         index_endscan(indexScan);
     941         562 :     if (tableScan != NULL)
     942         484 :         table_endscan(tableScan);
     943         562 :     if (slot)
     944         562 :         ExecDropSingleTupleTableSlot(slot);
     945             : 
     946             :     /*
     947             :      * In scan-and-sort mode, complete the sort, then read out all live tuples
     948             :      * from the tuplestore and write them to the new relation.
     949             :      */
     950         562 :     if (tuplesort != NULL)
     951             :     {
     952         110 :         double      n_tuples = 0;
     953             : 
     954             :         /* Report that we are now sorting tuples */
     955         110 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     956             :                                      PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
     957             : 
     958         110 :         tuplesort_performsort(tuplesort);
     959             : 
     960             :         /* Report that we are now writing new heap */
     961         110 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     962             :                                      PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
     963             : 
     964             :         for (;;)
     965      547380 :         {
     966             :             HeapTuple   tuple;
     967             : 
     968      547490 :             CHECK_FOR_INTERRUPTS();
     969             : 
     970      547490 :             tuple = tuplesort_getheaptuple(tuplesort, true);
     971      547490 :             if (tuple == NULL)
     972         110 :                 break;
     973             : 
     974      547380 :             n_tuples += 1;
     975      547380 :             reform_and_rewrite_tuple(tuple,
     976             :                                      OldHeap, NewHeap,
     977             :                                      values, isnull,
     978             :                                      rwstate);
     979             :             /* Report n_tuples */
     980      547380 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
     981             :                                          n_tuples);
     982             :         }
     983             : 
     984         110 :         tuplesort_end(tuplesort);
     985             :     }
     986             : 
     987             :     /* Write out any remaining tuples, and fsync if needed */
     988         562 :     end_heap_rewrite(rwstate);
     989             : 
     990             :     /* Clean up */
     991         562 :     pfree(values);
     992         562 :     pfree(isnull);
     993         562 : }
     994             : 
     995             : /*
     996             :  * Prepare to analyze the next block in the read stream.  Returns false if
     997             :  * the stream is exhausted and true otherwise. The scan must have been started
     998             :  * with SO_TYPE_ANALYZE option.
     999             :  *
    1000             :  * This routine holds a buffer pin and lock on the heap page.  They are held
    1001             :  * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
    1002             :  * items of the heap page are analyzed.
    1003             :  */
    1004             : static bool
    1005      144890 : heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
    1006             : {
    1007      144890 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    1008             : 
    1009             :     /*
    1010             :      * We must maintain a pin on the target page's buffer to ensure that
    1011             :      * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
    1012             :      * under us.  It comes from the stream already pinned.   We also choose to
    1013             :      * hold sharelock on the buffer throughout --- we could release and
    1014             :      * re-acquire sharelock for each tuple, but since we aren't doing much
    1015             :      * work per tuple, the extra lock traffic is probably better avoided.
    1016             :      */
    1017      144890 :     hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
    1018      144890 :     if (!BufferIsValid(hscan->rs_cbuf))
    1019       16188 :         return false;
    1020             : 
    1021      128702 :     LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1022             : 
    1023      128702 :     hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
    1024      128702 :     hscan->rs_cindex = FirstOffsetNumber;
    1025      128702 :     return true;
    1026             : }
    1027             : 
    1028             : static bool
    1029    10285642 : heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
    1030             :                                double *liverows, double *deadrows,
    1031             :                                TupleTableSlot *slot)
    1032             : {
    1033    10285642 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    1034             :     Page        targpage;
    1035             :     OffsetNumber maxoffset;
    1036             :     BufferHeapTupleTableSlot *hslot;
    1037             : 
    1038             :     Assert(TTS_IS_BUFFERTUPLE(slot));
    1039             : 
    1040    10285642 :     hslot = (BufferHeapTupleTableSlot *) slot;
    1041    10285642 :     targpage = BufferGetPage(hscan->rs_cbuf);
    1042    10285642 :     maxoffset = PageGetMaxOffsetNumber(targpage);
    1043             : 
    1044             :     /* Inner loop over all tuples on the selected page */
    1045    10759554 :     for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
    1046             :     {
    1047             :         ItemId      itemid;
    1048    10630852 :         HeapTuple   targtuple = &hslot->base.tupdata;
    1049    10630852 :         bool        sample_it = false;
    1050             : 
    1051    10630852 :         itemid = PageGetItemId(targpage, hscan->rs_cindex);
    1052             : 
    1053             :         /*
    1054             :          * We ignore unused and redirect line pointers.  DEAD line pointers
    1055             :          * should be counted as dead, because we need vacuum to run to get rid
    1056             :          * of them.  Note that this rule agrees with the way that
    1057             :          * heap_page_prune_and_freeze() counts things.
    1058             :          */
    1059    10630852 :         if (!ItemIdIsNormal(itemid))
    1060             :         {
    1061      287106 :             if (ItemIdIsDead(itemid))
    1062      154872 :                 *deadrows += 1;
    1063      287106 :             continue;
    1064             :         }
    1065             : 
    1066    10343746 :         ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
    1067             : 
    1068    10343746 :         targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
    1069    10343746 :         targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
    1070    10343746 :         targtuple->t_len = ItemIdGetLength(itemid);
    1071             : 
    1072    10343746 :         switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
    1073             :                                          hscan->rs_cbuf))
    1074             :         {
    1075     9765690 :             case HEAPTUPLE_LIVE:
    1076     9765690 :                 sample_it = true;
    1077     9765690 :                 *liverows += 1;
    1078     9765690 :                 break;
    1079             : 
    1080      185074 :             case HEAPTUPLE_DEAD:
    1081             :             case HEAPTUPLE_RECENTLY_DEAD:
    1082             :                 /* Count dead and recently-dead rows */
    1083      185074 :                 *deadrows += 1;
    1084      185074 :                 break;
    1085             : 
    1086      278240 :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1087             : 
    1088             :                 /*
    1089             :                  * Insert-in-progress rows are not counted.  We assume that
    1090             :                  * when the inserting transaction commits or aborts, it will
    1091             :                  * send a stats message to increment the proper count.  This
    1092             :                  * works right only if that transaction ends after we finish
    1093             :                  * analyzing the table; if things happen in the other order,
    1094             :                  * its stats update will be overwritten by ours.  However, the
    1095             :                  * error will be large only if the other transaction runs long
    1096             :                  * enough to insert many tuples, so assuming it will finish
    1097             :                  * after us is the safer option.
    1098             :                  *
    1099             :                  * A special case is that the inserting transaction might be
    1100             :                  * our own.  In this case we should count and sample the row,
    1101             :                  * to accommodate users who load a table and analyze it in one
    1102             :                  * transaction.  (pgstat_report_analyze has to adjust the
    1103             :                  * numbers we report to the cumulative stats system to make
    1104             :                  * this come out right.)
    1105             :                  */
    1106      278240 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
    1107             :                 {
    1108      278224 :                     sample_it = true;
    1109      278224 :                     *liverows += 1;
    1110             :                 }
    1111      278240 :                 break;
    1112             : 
    1113      114742 :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1114             : 
    1115             :                 /*
    1116             :                  * We count and sample delete-in-progress rows the same as
    1117             :                  * live ones, so that the stats counters come out right if the
    1118             :                  * deleting transaction commits after us, per the same
    1119             :                  * reasoning given above.
    1120             :                  *
    1121             :                  * If the delete was done by our own transaction, however, we
    1122             :                  * must count the row as dead to make pgstat_report_analyze's
    1123             :                  * stats adjustments come out right.  (Note: this works out
    1124             :                  * properly when the row was both inserted and deleted in our
    1125             :                  * xact.)
    1126             :                  *
    1127             :                  * The net effect of these choices is that we act as though an
    1128             :                  * IN_PROGRESS transaction hasn't happened yet, except if it
    1129             :                  * is our own transaction, which we assume has happened.
    1130             :                  *
    1131             :                  * This approach ensures that we behave sanely if we see both
    1132             :                  * the pre-image and post-image rows for a row being updated
    1133             :                  * by a concurrent transaction: we will sample the pre-image
    1134             :                  * but not the post-image.  We also get sane results if the
    1135             :                  * concurrent transaction never commits.
    1136             :                  */
    1137      114742 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
    1138        1716 :                     *deadrows += 1;
    1139             :                 else
    1140             :                 {
    1141      113026 :                     sample_it = true;
    1142      113026 :                     *liverows += 1;
    1143             :                 }
    1144      114742 :                 break;
    1145             : 
    1146           0 :             default:
    1147           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1148             :                 break;
    1149             :         }
    1150             : 
    1151    10343746 :         if (sample_it)
    1152             :         {
    1153    10156940 :             ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
    1154    10156940 :             hscan->rs_cindex++;
    1155             : 
    1156             :             /* note that we leave the buffer locked here! */
    1157    10156940 :             return true;
    1158             :         }
    1159             :     }
    1160             : 
    1161             :     /* Now release the lock and pin on the page */
    1162      128702 :     UnlockReleaseBuffer(hscan->rs_cbuf);
    1163      128702 :     hscan->rs_cbuf = InvalidBuffer;
    1164             : 
    1165             :     /* also prevent old slot contents from having pin on page */
    1166      128702 :     ExecClearTuple(slot);
    1167             : 
    1168      128702 :     return false;
    1169             : }
    1170             : 
    1171             : static double
    1172       54506 : heapam_index_build_range_scan(Relation heapRelation,
    1173             :                               Relation indexRelation,
    1174             :                               IndexInfo *indexInfo,
    1175             :                               bool allow_sync,
    1176             :                               bool anyvisible,
    1177             :                               bool progress,
    1178             :                               BlockNumber start_blockno,
    1179             :                               BlockNumber numblocks,
    1180             :                               IndexBuildCallback callback,
    1181             :                               void *callback_state,
    1182             :                               TableScanDesc scan)
    1183             : {
    1184             :     HeapScanDesc hscan;
    1185             :     bool        is_system_catalog;
    1186             :     bool        checking_uniqueness;
    1187             :     HeapTuple   heapTuple;
    1188             :     Datum       values[INDEX_MAX_KEYS];
    1189             :     bool        isnull[INDEX_MAX_KEYS];
    1190             :     double      reltuples;
    1191             :     ExprState  *predicate;
    1192             :     TupleTableSlot *slot;
    1193             :     EState     *estate;
    1194             :     ExprContext *econtext;
    1195             :     Snapshot    snapshot;
    1196       54506 :     bool        need_unregister_snapshot = false;
    1197             :     TransactionId OldestXmin;
    1198       54506 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1199       54506 :     BlockNumber root_blkno = InvalidBlockNumber;
    1200             :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1201             : 
    1202             :     /*
    1203             :      * sanity checks
    1204             :      */
    1205             :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1206             : 
    1207             :     /* Remember if it's a system catalog */
    1208       54506 :     is_system_catalog = IsSystemRelation(heapRelation);
    1209             : 
    1210             :     /* See whether we're verifying uniqueness/exclusion properties */
    1211       68858 :     checking_uniqueness = (indexInfo->ii_Unique ||
    1212       14352 :                            indexInfo->ii_ExclusionOps != NULL);
    1213             : 
    1214             :     /*
    1215             :      * "Any visible" mode is not compatible with uniqueness checks; make sure
    1216             :      * only one of those is requested.
    1217             :      */
    1218             :     Assert(!(anyvisible && checking_uniqueness));
    1219             : 
    1220             :     /*
    1221             :      * Need an EState for evaluation of index expressions and partial-index
    1222             :      * predicates.  Also a slot to hold the current tuple.
    1223             :      */
    1224       54506 :     estate = CreateExecutorState();
    1225       54506 :     econtext = GetPerTupleExprContext(estate);
    1226       54506 :     slot = table_slot_create(heapRelation, NULL);
    1227             : 
    1228             :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1229       54506 :     econtext->ecxt_scantuple = slot;
    1230             : 
    1231             :     /* Set up execution state for predicate, if any. */
    1232       54506 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1233             : 
    1234             :     /*
    1235             :      * Prepare for scan of the base relation.  In a normal index build, we use
    1236             :      * SnapshotAny because we must retrieve all tuples and do our own time
    1237             :      * qual checks (because we have to index RECENTLY_DEAD tuples). In a
    1238             :      * concurrent build, or during bootstrap, we take a regular MVCC snapshot
    1239             :      * and index whatever's live according to that.
    1240             :      */
    1241       54506 :     OldestXmin = InvalidTransactionId;
    1242             : 
    1243             :     /* okay to ignore lazy VACUUMs here */
    1244       54506 :     if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
    1245       39278 :         OldestXmin = GetOldestNonRemovableTransactionId(heapRelation);
    1246             : 
    1247       54506 :     if (!scan)
    1248             :     {
    1249             :         /*
    1250             :          * Serial index build.
    1251             :          *
    1252             :          * Must begin our own heap scan in this case.  We may also need to
    1253             :          * register a snapshot whose lifetime is under our direct control.
    1254             :          */
    1255       54054 :         if (!TransactionIdIsValid(OldestXmin))
    1256             :         {
    1257       15138 :             snapshot = RegisterSnapshot(GetTransactionSnapshot());
    1258       15138 :             need_unregister_snapshot = true;
    1259             :         }
    1260             :         else
    1261       38916 :             snapshot = SnapshotAny;
    1262             : 
    1263       54054 :         scan = table_beginscan_strat(heapRelation,  /* relation */
    1264             :                                      snapshot,  /* snapshot */
    1265             :                                      0, /* number of keys */
    1266             :                                      NULL,  /* scan key */
    1267             :                                      true,  /* buffer access strategy OK */
    1268             :                                      allow_sync);   /* syncscan OK? */
    1269             :     }
    1270             :     else
    1271             :     {
    1272             :         /*
    1273             :          * Parallel index build.
    1274             :          *
    1275             :          * Parallel case never registers/unregisters own snapshot.  Snapshot
    1276             :          * is taken from parallel heap scan, and is SnapshotAny or an MVCC
    1277             :          * snapshot, based on same criteria as serial case.
    1278             :          */
    1279             :         Assert(!IsBootstrapProcessingMode());
    1280             :         Assert(allow_sync);
    1281         452 :         snapshot = scan->rs_snapshot;
    1282             :     }
    1283             : 
    1284       54506 :     hscan = (HeapScanDesc) scan;
    1285             : 
    1286             :     /*
    1287             :      * Must have called GetOldestNonRemovableTransactionId() if using
    1288             :      * SnapshotAny.  Shouldn't have for an MVCC snapshot. (It's especially
    1289             :      * worth checking this for parallel builds, since ambuild routines that
    1290             :      * support parallel builds must work these details out for themselves.)
    1291             :      */
    1292             :     Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
    1293             :     Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
    1294             :            !TransactionIdIsValid(OldestXmin));
    1295             :     Assert(snapshot == SnapshotAny || !anyvisible);
    1296             : 
    1297             :     /* Publish number of blocks to scan */
    1298       54506 :     if (progress)
    1299             :     {
    1300             :         BlockNumber nblocks;
    1301             : 
    1302       51284 :         if (hscan->rs_base.rs_parallel != NULL)
    1303             :         {
    1304             :             ParallelBlockTableScanDesc pbscan;
    1305             : 
    1306         168 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1307         168 :             nblocks = pbscan->phs_nblocks;
    1308             :         }
    1309             :         else
    1310       51116 :             nblocks = hscan->rs_nblocks;
    1311             : 
    1312       51284 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1313             :                                      nblocks);
    1314             :     }
    1315             : 
    1316             :     /* set our scan endpoints */
    1317       54506 :     if (!allow_sync)
    1318        3706 :         heap_setscanlimits(scan, start_blockno, numblocks);
    1319             :     else
    1320             :     {
    1321             :         /* syncscan can only be requested on whole relation */
    1322             :         Assert(start_blockno == 0);
    1323             :         Assert(numblocks == InvalidBlockNumber);
    1324             :     }
    1325             : 
    1326       54506 :     reltuples = 0;
    1327             : 
    1328             :     /*
    1329             :      * Scan all tuples in the base relation.
    1330             :      */
    1331    17303678 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1332             :     {
    1333             :         bool        tupleIsAlive;
    1334             : 
    1335    17249184 :         CHECK_FOR_INTERRUPTS();
    1336             : 
    1337             :         /* Report scan progress, if asked to. */
    1338    17249184 :         if (progress)
    1339             :         {
    1340    14801810 :             BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
    1341             : 
    1342    14801810 :             if (blocks_done != previous_blkno)
    1343             :             {
    1344      192136 :                 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1345             :                                              blocks_done);
    1346      192136 :                 previous_blkno = blocks_done;
    1347             :             }
    1348             :         }
    1349             : 
    1350             :         /*
    1351             :          * When dealing with a HOT-chain of updated tuples, we want to index
    1352             :          * the values of the live tuple (if any), but index it under the TID
    1353             :          * of the chain's root tuple.  This approach is necessary to preserve
    1354             :          * the HOT-chain structure in the heap. So we need to be able to find
    1355             :          * the root item offset for every tuple that's in a HOT-chain.  When
    1356             :          * first reaching a new page of the relation, call
    1357             :          * heap_get_root_tuples() to build a map of root item offsets on the
    1358             :          * page.
    1359             :          *
    1360             :          * It might look unsafe to use this information across buffer
    1361             :          * lock/unlock.  However, we hold ShareLock on the table so no
    1362             :          * ordinary insert/update/delete should occur; and we hold pin on the
    1363             :          * buffer continuously while visiting the page, so no pruning
    1364             :          * operation can occur either.
    1365             :          *
    1366             :          * In cases with only ShareUpdateExclusiveLock on the table, it's
    1367             :          * possible for some HOT tuples to appear that we didn't know about
    1368             :          * when we first read the page.  To handle that case, we re-obtain the
    1369             :          * list of root offsets when a HOT tuple points to a root item that we
    1370             :          * don't know about.
    1371             :          *
    1372             :          * Also, although our opinions about tuple liveness could change while
    1373             :          * we scan the page (due to concurrent transaction commits/aborts),
    1374             :          * the chain root locations won't, so this info doesn't need to be
    1375             :          * rebuilt after waiting for another transaction.
    1376             :          *
    1377             :          * Note the implied assumption that there is no more than one live
    1378             :          * tuple per HOT-chain --- else we could create more than one index
    1379             :          * entry pointing to the same root tuple.
    1380             :          */
    1381    17249184 :         if (hscan->rs_cblock != root_blkno)
    1382             :         {
    1383      214702 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1384             : 
    1385      214702 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1386      214702 :             heap_get_root_tuples(page, root_offsets);
    1387      214702 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1388             : 
    1389      214702 :             root_blkno = hscan->rs_cblock;
    1390             :         }
    1391             : 
    1392    17249184 :         if (snapshot == SnapshotAny)
    1393             :         {
    1394             :             /* do our own time qual check */
    1395             :             bool        indexIt;
    1396             :             TransactionId xwait;
    1397             : 
    1398    14665206 :     recheck:
    1399             : 
    1400             :             /*
    1401             :              * We could possibly get away with not locking the buffer here,
    1402             :              * since caller should hold ShareLock on the relation, but let's
    1403             :              * be conservative about it.  (This remark is still correct even
    1404             :              * with HOT-pruning: our pin on the buffer prevents pruning.)
    1405             :              */
    1406    14665206 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1407             : 
    1408             :             /*
    1409             :              * The criteria for counting a tuple as live in this block need to
    1410             :              * match what analyze.c's heapam_scan_analyze_next_tuple() does,
    1411             :              * otherwise CREATE INDEX and ANALYZE may produce wildly different
    1412             :              * reltuples values, e.g. when there are many recently-dead
    1413             :              * tuples.
    1414             :              */
    1415    14665206 :             switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
    1416             :                                              hscan->rs_cbuf))
    1417             :             {
    1418        1816 :                 case HEAPTUPLE_DEAD:
    1419             :                     /* Definitely dead, we can ignore it */
    1420        1816 :                     indexIt = false;
    1421        1816 :                     tupleIsAlive = false;
    1422        1816 :                     break;
    1423    10901574 :                 case HEAPTUPLE_LIVE:
    1424             :                     /* Normal case, index and unique-check it */
    1425    10901574 :                     indexIt = true;
    1426    10901574 :                     tupleIsAlive = true;
    1427             :                     /* Count it as live, too */
    1428    10901574 :                     reltuples += 1;
    1429    10901574 :                     break;
    1430      223408 :                 case HEAPTUPLE_RECENTLY_DEAD:
    1431             : 
    1432             :                     /*
    1433             :                      * If tuple is recently deleted then we must index it
    1434             :                      * anyway to preserve MVCC semantics.  (Pre-existing
    1435             :                      * transactions could try to use the index after we finish
    1436             :                      * building it, and may need to see such tuples.)
    1437             :                      *
    1438             :                      * However, if it was HOT-updated then we must only index
    1439             :                      * the live tuple at the end of the HOT-chain.  Since this
    1440             :                      * breaks semantics for pre-existing snapshots, mark the
    1441             :                      * index as unusable for them.
    1442             :                      *
    1443             :                      * We don't count recently-dead tuples in reltuples, even
    1444             :                      * if we index them; see heapam_scan_analyze_next_tuple().
    1445             :                      */
    1446      223408 :                     if (HeapTupleIsHotUpdated(heapTuple))
    1447             :                     {
    1448         212 :                         indexIt = false;
    1449             :                         /* mark the index as unsafe for old snapshots */
    1450         212 :                         indexInfo->ii_BrokenHotChain = true;
    1451             :                     }
    1452             :                     else
    1453      223196 :                         indexIt = true;
    1454             :                     /* In any case, exclude the tuple from unique-checking */
    1455      223408 :                     tupleIsAlive = false;
    1456      223408 :                     break;
    1457     3538330 :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1458             : 
    1459             :                     /*
    1460             :                      * In "anyvisible" mode, this tuple is visible and we
    1461             :                      * don't need any further checks.
    1462             :                      */
    1463     3538330 :                     if (anyvisible)
    1464             :                     {
    1465       61472 :                         indexIt = true;
    1466       61472 :                         tupleIsAlive = true;
    1467       61472 :                         reltuples += 1;
    1468       61472 :                         break;
    1469             :                     }
    1470             : 
    1471             :                     /*
    1472             :                      * Since caller should hold ShareLock or better, normally
    1473             :                      * the only way to see this is if it was inserted earlier
    1474             :                      * in our own transaction.  However, it can happen in
    1475             :                      * system catalogs, since we tend to release write lock
    1476             :                      * before commit there.  Give a warning if neither case
    1477             :                      * applies.
    1478             :                      */
    1479     3476858 :                     xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
    1480     3476858 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1481             :                     {
    1482           6 :                         if (!is_system_catalog)
    1483           0 :                             elog(WARNING, "concurrent insert in progress within table \"%s\"",
    1484             :                                  RelationGetRelationName(heapRelation));
    1485             : 
    1486             :                         /*
    1487             :                          * If we are performing uniqueness checks, indexing
    1488             :                          * such a tuple could lead to a bogus uniqueness
    1489             :                          * failure.  In that case we wait for the inserting
    1490             :                          * transaction to finish and check again.
    1491             :                          */
    1492           6 :                         if (checking_uniqueness)
    1493             :                         {
    1494             :                             /*
    1495             :                              * Must drop the lock on the buffer before we wait
    1496             :                              */
    1497           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1498           0 :                             XactLockTableWait(xwait, heapRelation,
    1499             :                                               &heapTuple->t_self,
    1500             :                                               XLTW_InsertIndexUnique);
    1501           0 :                             CHECK_FOR_INTERRUPTS();
    1502           0 :                             goto recheck;
    1503             :                         }
    1504             :                     }
    1505             :                     else
    1506             :                     {
    1507             :                         /*
    1508             :                          * For consistency with
    1509             :                          * heapam_scan_analyze_next_tuple(), count
    1510             :                          * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
    1511             :                          * when inserted by our own transaction.
    1512             :                          */
    1513     3476852 :                         reltuples += 1;
    1514             :                     }
    1515             : 
    1516             :                     /*
    1517             :                      * We must index such tuples, since if the index build
    1518             :                      * commits then they're good.
    1519             :                      */
    1520     3476858 :                     indexIt = true;
    1521     3476858 :                     tupleIsAlive = true;
    1522     3476858 :                     break;
    1523          78 :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1524             : 
    1525             :                     /*
    1526             :                      * As with INSERT_IN_PROGRESS case, this is unexpected
    1527             :                      * unless it's our own deletion or a system catalog; but
    1528             :                      * in anyvisible mode, this tuple is visible.
    1529             :                      */
    1530          78 :                     if (anyvisible)
    1531             :                     {
    1532           0 :                         indexIt = true;
    1533           0 :                         tupleIsAlive = false;
    1534           0 :                         reltuples += 1;
    1535           0 :                         break;
    1536             :                     }
    1537             : 
    1538          78 :                     xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
    1539          78 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1540             :                     {
    1541           0 :                         if (!is_system_catalog)
    1542           0 :                             elog(WARNING, "concurrent delete in progress within table \"%s\"",
    1543             :                                  RelationGetRelationName(heapRelation));
    1544             : 
    1545             :                         /*
    1546             :                          * If we are performing uniqueness checks, assuming
    1547             :                          * the tuple is dead could lead to missing a
    1548             :                          * uniqueness violation.  In that case we wait for the
    1549             :                          * deleting transaction to finish and check again.
    1550             :                          *
    1551             :                          * Also, if it's a HOT-updated tuple, we should not
    1552             :                          * index it but rather the live tuple at the end of
    1553             :                          * the HOT-chain.  However, the deleting transaction
    1554             :                          * could abort, possibly leaving this tuple as live
    1555             :                          * after all, in which case it has to be indexed. The
    1556             :                          * only way to know what to do is to wait for the
    1557             :                          * deleting transaction to finish and check again.
    1558             :                          */
    1559           0 :                         if (checking_uniqueness ||
    1560           0 :                             HeapTupleIsHotUpdated(heapTuple))
    1561             :                         {
    1562             :                             /*
    1563             :                              * Must drop the lock on the buffer before we wait
    1564             :                              */
    1565           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1566           0 :                             XactLockTableWait(xwait, heapRelation,
    1567             :                                               &heapTuple->t_self,
    1568             :                                               XLTW_InsertIndexUnique);
    1569           0 :                             CHECK_FOR_INTERRUPTS();
    1570           0 :                             goto recheck;
    1571             :                         }
    1572             : 
    1573             :                         /*
    1574             :                          * Otherwise index it but don't check for uniqueness,
    1575             :                          * the same as a RECENTLY_DEAD tuple.
    1576             :                          */
    1577           0 :                         indexIt = true;
    1578             : 
    1579             :                         /*
    1580             :                          * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
    1581             :                          * if they were not deleted by the current
    1582             :                          * transaction.  That's what
    1583             :                          * heapam_scan_analyze_next_tuple() does, and we want
    1584             :                          * the behavior to be consistent.
    1585             :                          */
    1586           0 :                         reltuples += 1;
    1587             :                     }
    1588          78 :                     else if (HeapTupleIsHotUpdated(heapTuple))
    1589             :                     {
    1590             :                         /*
    1591             :                          * It's a HOT-updated tuple deleted by our own xact.
    1592             :                          * We can assume the deletion will commit (else the
    1593             :                          * index contents don't matter), so treat the same as
    1594             :                          * RECENTLY_DEAD HOT-updated tuples.
    1595             :                          */
    1596           0 :                         indexIt = false;
    1597             :                         /* mark the index as unsafe for old snapshots */
    1598           0 :                         indexInfo->ii_BrokenHotChain = true;
    1599             :                     }
    1600             :                     else
    1601             :                     {
    1602             :                         /*
    1603             :                          * It's a regular tuple deleted by our own xact. Index
    1604             :                          * it, but don't check for uniqueness nor count in
    1605             :                          * reltuples, the same as a RECENTLY_DEAD tuple.
    1606             :                          */
    1607          78 :                         indexIt = true;
    1608             :                     }
    1609             :                     /* In any case, exclude the tuple from unique-checking */
    1610          78 :                     tupleIsAlive = false;
    1611          78 :                     break;
    1612           0 :                 default:
    1613           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1614             :                     indexIt = tupleIsAlive = false; /* keep compiler quiet */
    1615             :                     break;
    1616             :             }
    1617             : 
    1618    14665206 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1619             : 
    1620    14665206 :             if (!indexIt)
    1621        2028 :                 continue;
    1622             :         }
    1623             :         else
    1624             :         {
    1625             :             /* heap_getnext did the time qual check */
    1626     2583978 :             tupleIsAlive = true;
    1627     2583978 :             reltuples += 1;
    1628             :         }
    1629             : 
    1630    17247156 :         MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1631             : 
    1632             :         /* Set up for predicate or expression evaluation */
    1633    17247156 :         ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
    1634             : 
    1635             :         /*
    1636             :          * In a partial index, discard tuples that don't satisfy the
    1637             :          * predicate.
    1638             :          */
    1639    17247156 :         if (predicate != NULL)
    1640             :         {
    1641      138558 :             if (!ExecQual(predicate, econtext))
    1642       49668 :                 continue;
    1643             :         }
    1644             : 
    1645             :         /*
    1646             :          * For the current heap tuple, extract all the attributes we use in
    1647             :          * this index, and note which are null.  This also performs evaluation
    1648             :          * of any expressions needed.
    1649             :          */
    1650    17197488 :         FormIndexDatum(indexInfo,
    1651             :                        slot,
    1652             :                        estate,
    1653             :                        values,
    1654             :                        isnull);
    1655             : 
    1656             :         /*
    1657             :          * You'd think we should go ahead and build the index tuple here, but
    1658             :          * some index AMs want to do further processing on the data first.  So
    1659             :          * pass the values[] and isnull[] arrays, instead.
    1660             :          */
    1661             : 
    1662    17197476 :         if (HeapTupleIsHeapOnly(heapTuple))
    1663             :         {
    1664             :             /*
    1665             :              * For a heap-only tuple, pretend its TID is that of the root. See
    1666             :              * src/backend/access/heap/README.HOT for discussion.
    1667             :              */
    1668             :             ItemPointerData tid;
    1669             :             OffsetNumber offnum;
    1670             : 
    1671        8472 :             offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
    1672             : 
    1673             :             /*
    1674             :              * If a HOT tuple points to a root that we don't know about,
    1675             :              * obtain root items afresh.  If that still fails, report it as
    1676             :              * corruption.
    1677             :              */
    1678        8472 :             if (root_offsets[offnum - 1] == InvalidOffsetNumber)
    1679             :             {
    1680           0 :                 Page        page = BufferGetPage(hscan->rs_cbuf);
    1681             : 
    1682           0 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1683           0 :                 heap_get_root_tuples(page, root_offsets);
    1684           0 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1685             :             }
    1686             : 
    1687        8472 :             if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
    1688           0 :                 ereport(ERROR,
    1689             :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1690             :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1691             :                                          ItemPointerGetBlockNumber(&heapTuple->t_self),
    1692             :                                          offnum,
    1693             :                                          RelationGetRelationName(heapRelation))));
    1694             : 
    1695        8472 :             ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
    1696        8472 :                            root_offsets[offnum - 1]);
    1697             : 
    1698             :             /* Call the AM's callback routine to process the tuple */
    1699        8472 :             callback(indexRelation, &tid, values, isnull, tupleIsAlive,
    1700             :                      callback_state);
    1701             :         }
    1702             :         else
    1703             :         {
    1704             :             /* Call the AM's callback routine to process the tuple */
    1705    17189004 :             callback(indexRelation, &heapTuple->t_self, values, isnull,
    1706             :                      tupleIsAlive, callback_state);
    1707             :         }
    1708             :     }
    1709             : 
    1710             :     /* Report scan progress one last time. */
    1711       54494 :     if (progress)
    1712             :     {
    1713             :         BlockNumber blks_done;
    1714             : 
    1715       51272 :         if (hscan->rs_base.rs_parallel != NULL)
    1716             :         {
    1717             :             ParallelBlockTableScanDesc pbscan;
    1718             : 
    1719         168 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1720         168 :             blks_done = pbscan->phs_nblocks;
    1721             :         }
    1722             :         else
    1723       51104 :             blks_done = hscan->rs_nblocks;
    1724             : 
    1725       51272 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1726             :                                      blks_done);
    1727             :     }
    1728             : 
    1729       54494 :     table_endscan(scan);
    1730             : 
    1731             :     /* we can now forget our snapshot, if set and registered by us */
    1732       54494 :     if (need_unregister_snapshot)
    1733       15132 :         UnregisterSnapshot(snapshot);
    1734             : 
    1735       54494 :     ExecDropSingleTupleTableSlot(slot);
    1736             : 
    1737       54494 :     FreeExecutorState(estate);
    1738             : 
    1739             :     /* These may have been pointing to the now-gone estate */
    1740       54494 :     indexInfo->ii_ExpressionsState = NIL;
    1741       54494 :     indexInfo->ii_PredicateState = NULL;
    1742             : 
    1743       54494 :     return reltuples;
    1744             : }
    1745             : 
    1746             : static void
    1747         714 : heapam_index_validate_scan(Relation heapRelation,
    1748             :                            Relation indexRelation,
    1749             :                            IndexInfo *indexInfo,
    1750             :                            Snapshot snapshot,
    1751             :                            ValidateIndexState *state)
    1752             : {
    1753             :     TableScanDesc scan;
    1754             :     HeapScanDesc hscan;
    1755             :     HeapTuple   heapTuple;
    1756             :     Datum       values[INDEX_MAX_KEYS];
    1757             :     bool        isnull[INDEX_MAX_KEYS];
    1758             :     ExprState  *predicate;
    1759             :     TupleTableSlot *slot;
    1760             :     EState     *estate;
    1761             :     ExprContext *econtext;
    1762         714 :     BlockNumber root_blkno = InvalidBlockNumber;
    1763             :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1764             :     bool        in_index[MaxHeapTuplesPerPage];
    1765         714 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1766             : 
    1767             :     /* state variables for the merge */
    1768         714 :     ItemPointer indexcursor = NULL;
    1769             :     ItemPointerData decoded;
    1770         714 :     bool        tuplesort_empty = false;
    1771             : 
    1772             :     /*
    1773             :      * sanity checks
    1774             :      */
    1775             :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1776             : 
    1777             :     /*
    1778             :      * Need an EState for evaluation of index expressions and partial-index
    1779             :      * predicates.  Also a slot to hold the current tuple.
    1780             :      */
    1781         714 :     estate = CreateExecutorState();
    1782         714 :     econtext = GetPerTupleExprContext(estate);
    1783         714 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
    1784             :                                     &TTSOpsHeapTuple);
    1785             : 
    1786             :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1787         714 :     econtext->ecxt_scantuple = slot;
    1788             : 
    1789             :     /* Set up execution state for predicate, if any. */
    1790         714 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1791             : 
    1792             :     /*
    1793             :      * Prepare for scan of the base relation.  We need just those tuples
    1794             :      * satisfying the passed-in reference snapshot.  We must disable syncscan
    1795             :      * here, because it's critical that we read from block zero forward to
    1796             :      * match the sorted TIDs.
    1797             :      */
    1798         714 :     scan = table_beginscan_strat(heapRelation,  /* relation */
    1799             :                                  snapshot,  /* snapshot */
    1800             :                                  0, /* number of keys */
    1801             :                                  NULL,  /* scan key */
    1802             :                                  true,  /* buffer access strategy OK */
    1803             :                                  false);    /* syncscan not OK */
    1804         714 :     hscan = (HeapScanDesc) scan;
    1805             : 
    1806         714 :     pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1807         714 :                                  hscan->rs_nblocks);
    1808             : 
    1809             :     /*
    1810             :      * Scan all tuples matching the snapshot.
    1811             :      */
    1812      253208 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1813             :     {
    1814      252494 :         ItemPointer heapcursor = &heapTuple->t_self;
    1815             :         ItemPointerData rootTuple;
    1816             :         OffsetNumber root_offnum;
    1817             : 
    1818      252494 :         CHECK_FOR_INTERRUPTS();
    1819             : 
    1820      252494 :         state->htups += 1;
    1821             : 
    1822      252494 :         if ((previous_blkno == InvalidBlockNumber) ||
    1823      252054 :             (hscan->rs_cblock != previous_blkno))
    1824             :         {
    1825        4952 :             pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1826        4952 :                                          hscan->rs_cblock);
    1827        4952 :             previous_blkno = hscan->rs_cblock;
    1828             :         }
    1829             : 
    1830             :         /*
    1831             :          * As commented in table_index_build_scan, we should index heap-only
    1832             :          * tuples under the TIDs of their root tuples; so when we advance onto
    1833             :          * a new heap page, build a map of root item offsets on the page.
    1834             :          *
    1835             :          * This complicates merging against the tuplesort output: we will
    1836             :          * visit the live tuples in order by their offsets, but the root
    1837             :          * offsets that we need to compare against the index contents might be
    1838             :          * ordered differently.  So we might have to "look back" within the
    1839             :          * tuplesort output, but only within the current page.  We handle that
    1840             :          * by keeping a bool array in_index[] showing all the
    1841             :          * already-passed-over tuplesort output TIDs of the current page. We
    1842             :          * clear that array here, when advancing onto a new heap page.
    1843             :          */
    1844      252494 :         if (hscan->rs_cblock != root_blkno)
    1845             :         {
    1846        4952 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1847             : 
    1848        4952 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1849        4952 :             heap_get_root_tuples(page, root_offsets);
    1850        4952 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1851             : 
    1852        4952 :             memset(in_index, 0, sizeof(in_index));
    1853             : 
    1854        4952 :             root_blkno = hscan->rs_cblock;
    1855             :         }
    1856             : 
    1857             :         /* Convert actual tuple TID to root TID */
    1858      252494 :         rootTuple = *heapcursor;
    1859      252494 :         root_offnum = ItemPointerGetOffsetNumber(heapcursor);
    1860             : 
    1861      252494 :         if (HeapTupleIsHeapOnly(heapTuple))
    1862             :         {
    1863          24 :             root_offnum = root_offsets[root_offnum - 1];
    1864          24 :             if (!OffsetNumberIsValid(root_offnum))
    1865           0 :                 ereport(ERROR,
    1866             :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1867             :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1868             :                                          ItemPointerGetBlockNumber(heapcursor),
    1869             :                                          ItemPointerGetOffsetNumber(heapcursor),
    1870             :                                          RelationGetRelationName(heapRelation))));
    1871          24 :             ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
    1872             :         }
    1873             : 
    1874             :         /*
    1875             :          * "merge" by skipping through the index tuples until we find or pass
    1876             :          * the current root tuple.
    1877             :          */
    1878      575228 :         while (!tuplesort_empty &&
    1879      574734 :                (!indexcursor ||
    1880      574734 :                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
    1881             :         {
    1882             :             Datum       ts_val;
    1883             :             bool        ts_isnull;
    1884             : 
    1885      322734 :             if (indexcursor)
    1886             :             {
    1887             :                 /*
    1888             :                  * Remember index items seen earlier on the current heap page
    1889             :                  */
    1890      322294 :                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
    1891      316138 :                     in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
    1892             :             }
    1893             : 
    1894      322734 :             tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
    1895             :                                                   false, &ts_val, &ts_isnull,
    1896      322734 :                                                   NULL);
    1897             :             Assert(tuplesort_empty || !ts_isnull);
    1898      322734 :             if (!tuplesort_empty)
    1899             :             {
    1900      322702 :                 itemptr_decode(&decoded, DatumGetInt64(ts_val));
    1901      322702 :                 indexcursor = &decoded;
    1902             :             }
    1903             :             else
    1904             :             {
    1905             :                 /* Be tidy */
    1906          32 :                 indexcursor = NULL;
    1907             :             }
    1908             :         }
    1909             : 
    1910             :         /*
    1911             :          * If the tuplesort has overshot *and* we didn't see a match earlier,
    1912             :          * then this tuple is missing from the index, so insert it.
    1913             :          */
    1914      504934 :         if ((tuplesort_empty ||
    1915      252440 :              ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
    1916         134 :             !in_index[root_offnum - 1])
    1917             :         {
    1918         116 :             MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1919             : 
    1920             :             /* Set up for predicate or expression evaluation */
    1921         116 :             ExecStoreHeapTuple(heapTuple, slot, false);
    1922             : 
    1923             :             /*
    1924             :              * In a partial index, discard tuples that don't satisfy the
    1925             :              * predicate.
    1926             :              */
    1927         116 :             if (predicate != NULL)
    1928             :             {
    1929          48 :                 if (!ExecQual(predicate, econtext))
    1930          48 :                     continue;
    1931             :             }
    1932             : 
    1933             :             /*
    1934             :              * For the current heap tuple, extract all the attributes we use
    1935             :              * in this index, and note which are null.  This also performs
    1936             :              * evaluation of any expressions needed.
    1937             :              */
    1938          68 :             FormIndexDatum(indexInfo,
    1939             :                            slot,
    1940             :                            estate,
    1941             :                            values,
    1942             :                            isnull);
    1943             : 
    1944             :             /*
    1945             :              * You'd think we should go ahead and build the index tuple here,
    1946             :              * but some index AMs want to do further processing on the data
    1947             :              * first. So pass the values[] and isnull[] arrays, instead.
    1948             :              */
    1949             : 
    1950             :             /*
    1951             :              * If the tuple is already committed dead, you might think we
    1952             :              * could suppress uniqueness checking, but this is no longer true
    1953             :              * in the presence of HOT, because the insert is actually a proxy
    1954             :              * for a uniqueness check on the whole HOT-chain.  That is, the
    1955             :              * tuple we have here could be dead because it was already
    1956             :              * HOT-updated, and if so the updating transaction will not have
    1957             :              * thought it should insert index entries.  The index AM will
    1958             :              * check the whole HOT-chain and correctly detect a conflict if
    1959             :              * there is one.
    1960             :              */
    1961             : 
    1962          68 :             index_insert(indexRelation,
    1963             :                          values,
    1964             :                          isnull,
    1965             :                          &rootTuple,
    1966             :                          heapRelation,
    1967          68 :                          indexInfo->ii_Unique ?
    1968             :                          UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
    1969             :                          false,
    1970             :                          indexInfo);
    1971             : 
    1972          68 :             state->tups_inserted += 1;
    1973             :         }
    1974             :     }
    1975             : 
    1976         714 :     table_endscan(scan);
    1977             : 
    1978         714 :     ExecDropSingleTupleTableSlot(slot);
    1979             : 
    1980         714 :     FreeExecutorState(estate);
    1981             : 
    1982             :     /* These may have been pointing to the now-gone estate */
    1983         714 :     indexInfo->ii_ExpressionsState = NIL;
    1984         714 :     indexInfo->ii_PredicateState = NULL;
    1985         714 : }
    1986             : 
    1987             : /*
    1988             :  * Return the number of blocks that have been read by this scan since
    1989             :  * starting.  This is meant for progress reporting rather than be fully
    1990             :  * accurate: in a parallel scan, workers can be concurrently reading blocks
    1991             :  * further ahead than what we report.
    1992             :  */
    1993             : static BlockNumber
    1994    14801810 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
    1995             : {
    1996    14801810 :     ParallelBlockTableScanDesc bpscan = NULL;
    1997             :     BlockNumber startblock;
    1998             :     BlockNumber blocks_done;
    1999             : 
    2000    14801810 :     if (hscan->rs_base.rs_parallel != NULL)
    2001             :     {
    2002     2393428 :         bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    2003     2393428 :         startblock = bpscan->phs_startblock;
    2004             :     }
    2005             :     else
    2006    12408382 :         startblock = hscan->rs_startblock;
    2007             : 
    2008             :     /*
    2009             :      * Might have wrapped around the end of the relation, if startblock was
    2010             :      * not zero.
    2011             :      */
    2012    14801810 :     if (hscan->rs_cblock > startblock)
    2013    14271706 :         blocks_done = hscan->rs_cblock - startblock;
    2014             :     else
    2015             :     {
    2016             :         BlockNumber nblocks;
    2017             : 
    2018      530104 :         nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
    2019      530104 :         blocks_done = nblocks - startblock +
    2020      530104 :             hscan->rs_cblock;
    2021             :     }
    2022             : 
    2023    14801810 :     return blocks_done;
    2024             : }
    2025             : 
    2026             : 
    2027             : /* ------------------------------------------------------------------------
    2028             :  * Miscellaneous callbacks for the heap AM
    2029             :  * ------------------------------------------------------------------------
    2030             :  */
    2031             : 
    2032             : /*
    2033             :  * Check to see whether the table needs a TOAST table.  It does only if
    2034             :  * (1) there are any toastable attributes, and (2) the maximum length
    2035             :  * of a tuple could exceed TOAST_TUPLE_THRESHOLD.  (We don't want to
    2036             :  * create a toast table for something like "f1 varchar(20)".)
    2037             :  */
    2038             : static bool
    2039       42662 : heapam_relation_needs_toast_table(Relation rel)
    2040             : {
    2041       42662 :     int32       data_length = 0;
    2042       42662 :     bool        maxlength_unknown = false;
    2043       42662 :     bool        has_toastable_attrs = false;
    2044       42662 :     TupleDesc   tupdesc = rel->rd_att;
    2045             :     int32       tuple_length;
    2046             :     int         i;
    2047             : 
    2048      169434 :     for (i = 0; i < tupdesc->natts; i++)
    2049             :     {
    2050      126772 :         Form_pg_attribute att = TupleDescAttr(tupdesc, i);
    2051             : 
    2052      126772 :         if (att->attisdropped)
    2053        1014 :             continue;
    2054      125758 :         if (att->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
    2055         814 :             continue;
    2056      124944 :         data_length = att_align_nominal(data_length, att->attalign);
    2057      124944 :         if (att->attlen > 0)
    2058             :         {
    2059             :             /* Fixed-length types are never toastable */
    2060       93278 :             data_length += att->attlen;
    2061             :         }
    2062             :         else
    2063             :         {
    2064       31666 :             int32       maxlen = type_maximum_size(att->atttypid,
    2065             :                                                    att->atttypmod);
    2066             : 
    2067       31666 :             if (maxlen < 0)
    2068       29248 :                 maxlength_unknown = true;
    2069             :             else
    2070        2418 :                 data_length += maxlen;
    2071       31666 :             if (att->attstorage != TYPSTORAGE_PLAIN)
    2072       30566 :                 has_toastable_attrs = true;
    2073             :         }
    2074             :     }
    2075       42662 :     if (!has_toastable_attrs)
    2076       24456 :         return false;           /* nothing to toast? */
    2077       18206 :     if (maxlength_unknown)
    2078       16416 :         return true;            /* any unlimited-length attrs? */
    2079        1790 :     tuple_length = MAXALIGN(SizeofHeapTupleHeader +
    2080        1790 :                             BITMAPLEN(tupdesc->natts)) +
    2081        1790 :         MAXALIGN(data_length);
    2082        1790 :     return (tuple_length > TOAST_TUPLE_THRESHOLD);
    2083             : }
    2084             : 
    2085             : /*
    2086             :  * TOAST tables for heap relations are just heap relations.
    2087             :  */
    2088             : static Oid
    2089       16966 : heapam_relation_toast_am(Relation rel)
    2090             : {
    2091       16966 :     return rel->rd_rel->relam;
    2092             : }
    2093             : 
    2094             : 
    2095             : /* ------------------------------------------------------------------------
    2096             :  * Planner related callbacks for the heap AM
    2097             :  * ------------------------------------------------------------------------
    2098             :  */
    2099             : 
    2100             : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
    2101             :     (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
    2102             : #define HEAP_USABLE_BYTES_PER_PAGE \
    2103             :     (BLCKSZ - SizeOfPageHeaderData)
    2104             : 
    2105             : static void
    2106      427768 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
    2107             :                          BlockNumber *pages, double *tuples,
    2108             :                          double *allvisfrac)
    2109             : {
    2110      427768 :     table_block_relation_estimate_size(rel, attr_widths, pages,
    2111             :                                        tuples, allvisfrac,
    2112             :                                        HEAP_OVERHEAD_BYTES_PER_TUPLE,
    2113             :                                        HEAP_USABLE_BYTES_PER_PAGE);
    2114      427768 : }
    2115             : 
    2116             : 
    2117             : /* ------------------------------------------------------------------------
    2118             :  * Executor related callbacks for the heap AM
    2119             :  * ------------------------------------------------------------------------
    2120             :  */
    2121             : 
    2122             : static bool
    2123     6651496 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
    2124             :                               TupleTableSlot *slot,
    2125             :                               bool *recheck,
    2126             :                               uint64 *lossy_pages,
    2127             :                               uint64 *exact_pages)
    2128             : {
    2129     6651496 :     BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
    2130     6651496 :     HeapScanDesc hscan = (HeapScanDesc) bscan;
    2131             :     OffsetNumber targoffset;
    2132             :     Page        page;
    2133             :     ItemId      lp;
    2134             : 
    2135             :     /*
    2136             :      * Out of range?  If so, nothing more to look at on this page
    2137             :      */
    2138     7023238 :     while (hscan->rs_cindex >= hscan->rs_ntuples)
    2139             :     {
    2140             :         /*
    2141             :          * Emit empty tuples before advancing to the next block
    2142             :          */
    2143      994168 :         if (bscan->rs_empty_tuples_pending > 0)
    2144             :         {
    2145             :             /*
    2146             :              * If we don't have to fetch the tuple, just return nulls.
    2147             :              */
    2148      597246 :             ExecStoreAllNullTuple(slot);
    2149      597246 :             bscan->rs_empty_tuples_pending--;
    2150             : 
    2151             :             /*
    2152             :              * We do not recheck all NULL tuples. Because the streaming read
    2153             :              * API only yields TBMIterateResults for blocks actually fetched
    2154             :              * from the heap, we must unset `recheck` ourselves here to ensure
    2155             :              * correct results.
    2156             :              *
    2157             :              * Our read stream callback accrues a count of empty tuples to
    2158             :              * emit and then emits them after emitting tuples from the next
    2159             :              * fetched block. If no blocks need fetching, we'll emit the
    2160             :              * accrued count at the end of the scan.
    2161             :              */
    2162      597246 :             *recheck = false;
    2163      597246 :             return true;
    2164             :         }
    2165             : 
    2166             :         /*
    2167             :          * Returns false if the bitmap is exhausted and there are no further
    2168             :          * blocks we need to scan.
    2169             :          */
    2170      396922 :         if (!BitmapHeapScanNextBlock(scan, recheck, lossy_pages, exact_pages))
    2171       25174 :             return false;
    2172             :     }
    2173             : 
    2174     6029070 :     targoffset = hscan->rs_vistuples[hscan->rs_cindex];
    2175     6029070 :     page = BufferGetPage(hscan->rs_cbuf);
    2176     6029070 :     lp = PageGetItemId(page, targoffset);
    2177             :     Assert(ItemIdIsNormal(lp));
    2178             : 
    2179     6029070 :     hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
    2180     6029070 :     hscan->rs_ctup.t_len = ItemIdGetLength(lp);
    2181     6029070 :     hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
    2182     6029070 :     ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
    2183             : 
    2184     6029070 :     pgstat_count_heap_fetch(scan->rs_rd);
    2185             : 
    2186             :     /*
    2187             :      * Set up the result slot to point to this tuple.  Note that the slot
    2188             :      * acquires a pin on the buffer.
    2189             :      */
    2190     6029070 :     ExecStoreBufferHeapTuple(&hscan->rs_ctup,
    2191             :                              slot,
    2192             :                              hscan->rs_cbuf);
    2193             : 
    2194     6029070 :     hscan->rs_cindex++;
    2195             : 
    2196     6029070 :     return true;
    2197             : }
    2198             : 
    2199             : static bool
    2200       12908 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
    2201             : {
    2202       12908 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2203       12908 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2204             :     BlockNumber blockno;
    2205             : 
    2206             :     /* return false immediately if relation is empty */
    2207       12908 :     if (hscan->rs_nblocks == 0)
    2208           0 :         return false;
    2209             : 
    2210             :     /* release previous scan buffer, if any */
    2211       12908 :     if (BufferIsValid(hscan->rs_cbuf))
    2212             :     {
    2213       12732 :         ReleaseBuffer(hscan->rs_cbuf);
    2214       12732 :         hscan->rs_cbuf = InvalidBuffer;
    2215             :     }
    2216             : 
    2217       12908 :     if (tsm->NextSampleBlock)
    2218        4442 :         blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
    2219             :     else
    2220             :     {
    2221             :         /* scanning table sequentially */
    2222             : 
    2223        8466 :         if (hscan->rs_cblock == InvalidBlockNumber)
    2224             :         {
    2225             :             Assert(!hscan->rs_inited);
    2226          78 :             blockno = hscan->rs_startblock;
    2227             :         }
    2228             :         else
    2229             :         {
    2230             :             Assert(hscan->rs_inited);
    2231             : 
    2232        8388 :             blockno = hscan->rs_cblock + 1;
    2233             : 
    2234        8388 :             if (blockno >= hscan->rs_nblocks)
    2235             :             {
    2236             :                 /* wrap to beginning of rel, might not have started at 0 */
    2237          78 :                 blockno = 0;
    2238             :             }
    2239             : 
    2240             :             /*
    2241             :              * Report our new scan position for synchronization purposes.
    2242             :              *
    2243             :              * Note: we do this before checking for end of scan so that the
    2244             :              * final state of the position hint is back at the start of the
    2245             :              * rel.  That's not strictly necessary, but otherwise when you run
    2246             :              * the same query multiple times the starting position would shift
    2247             :              * a little bit backwards on every invocation, which is confusing.
    2248             :              * We don't guarantee any specific ordering in general, though.
    2249             :              */
    2250        8388 :             if (scan->rs_flags & SO_ALLOW_SYNC)
    2251           0 :                 ss_report_location(scan->rs_rd, blockno);
    2252             : 
    2253        8388 :             if (blockno == hscan->rs_startblock)
    2254             :             {
    2255          78 :                 blockno = InvalidBlockNumber;
    2256             :             }
    2257             :         }
    2258             :     }
    2259             : 
    2260       12908 :     hscan->rs_cblock = blockno;
    2261             : 
    2262       12908 :     if (!BlockNumberIsValid(blockno))
    2263             :     {
    2264         170 :         hscan->rs_inited = false;
    2265         170 :         return false;
    2266             :     }
    2267             : 
    2268             :     Assert(hscan->rs_cblock < hscan->rs_nblocks);
    2269             : 
    2270             :     /*
    2271             :      * Be sure to check for interrupts at least once per page.  Checks at
    2272             :      * higher code levels won't be able to stop a sample scan that encounters
    2273             :      * many pages' worth of consecutive dead tuples.
    2274             :      */
    2275       12738 :     CHECK_FOR_INTERRUPTS();
    2276             : 
    2277             :     /* Read page using selected strategy */
    2278       12738 :     hscan->rs_cbuf = ReadBufferExtended(hscan->rs_base.rs_rd, MAIN_FORKNUM,
    2279             :                                         blockno, RBM_NORMAL, hscan->rs_strategy);
    2280             : 
    2281             :     /* in pagemode, prune the page and determine visible tuple offsets */
    2282       12738 :     if (hscan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
    2283        8550 :         heap_prepare_pagescan(scan);
    2284             : 
    2285       12738 :     hscan->rs_inited = true;
    2286       12738 :     return true;
    2287             : }
    2288             : 
    2289             : static bool
    2290      253892 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
    2291             :                               TupleTableSlot *slot)
    2292             : {
    2293      253892 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2294      253892 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2295      253892 :     BlockNumber blockno = hscan->rs_cblock;
    2296      253892 :     bool        pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
    2297             : 
    2298             :     Page        page;
    2299             :     bool        all_visible;
    2300             :     OffsetNumber maxoffset;
    2301             : 
    2302             :     /*
    2303             :      * When not using pagemode, we must lock the buffer during tuple
    2304             :      * visibility checks.
    2305             :      */
    2306      253892 :     if (!pagemode)
    2307        4194 :         LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    2308             : 
    2309      253892 :     page = (Page) BufferGetPage(hscan->rs_cbuf);
    2310      506684 :     all_visible = PageIsAllVisible(page) &&
    2311      252792 :         !scan->rs_snapshot->takenDuringRecovery;
    2312      253892 :     maxoffset = PageGetMaxOffsetNumber(page);
    2313             : 
    2314             :     for (;;)
    2315           0 :     {
    2316             :         OffsetNumber tupoffset;
    2317             : 
    2318      253892 :         CHECK_FOR_INTERRUPTS();
    2319             : 
    2320             :         /* Ask the tablesample method which tuples to check on this page. */
    2321      253892 :         tupoffset = tsm->NextSampleTuple(scanstate,
    2322             :                                          blockno,
    2323             :                                          maxoffset);
    2324             : 
    2325      253892 :         if (OffsetNumberIsValid(tupoffset))
    2326             :         {
    2327             :             ItemId      itemid;
    2328             :             bool        visible;
    2329      241160 :             HeapTuple   tuple = &(hscan->rs_ctup);
    2330             : 
    2331             :             /* Skip invalid tuple pointers. */
    2332      241160 :             itemid = PageGetItemId(page, tupoffset);
    2333      241160 :             if (!ItemIdIsNormal(itemid))
    2334           0 :                 continue;
    2335             : 
    2336      241160 :             tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2337      241160 :             tuple->t_len = ItemIdGetLength(itemid);
    2338      241160 :             ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
    2339             : 
    2340             : 
    2341      241160 :             if (all_visible)
    2342      240348 :                 visible = true;
    2343             :             else
    2344         812 :                 visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
    2345             :                                                  tuple, tupoffset);
    2346             : 
    2347             :             /* in pagemode, heap_prepare_pagescan did this for us */
    2348      241160 :             if (!pagemode)
    2349           6 :                 HeapCheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
    2350             :                                                     hscan->rs_cbuf, scan->rs_snapshot);
    2351             : 
    2352             :             /* Try next tuple from same page. */
    2353      241160 :             if (!visible)
    2354           0 :                 continue;
    2355             : 
    2356             :             /* Found visible tuple, return it. */
    2357      241160 :             if (!pagemode)
    2358           6 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2359             : 
    2360      241160 :             ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
    2361             : 
    2362             :             /* Count successfully-fetched tuples as heap fetches */
    2363      241160 :             pgstat_count_heap_getnext(scan->rs_rd);
    2364             : 
    2365      241160 :             return true;
    2366             :         }
    2367             :         else
    2368             :         {
    2369             :             /*
    2370             :              * If we get here, it means we've exhausted the items on this page
    2371             :              * and it's time to move to the next.
    2372             :              */
    2373       12732 :             if (!pagemode)
    2374        4188 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2375             : 
    2376       12732 :             ExecClearTuple(slot);
    2377       12732 :             return false;
    2378             :         }
    2379             :     }
    2380             : 
    2381             :     Assert(0);
    2382             : }
    2383             : 
    2384             : 
    2385             : /* ----------------------------------------------------------------------------
    2386             :  *  Helper functions for the above.
    2387             :  * ----------------------------------------------------------------------------
    2388             :  */
    2389             : 
    2390             : /*
    2391             :  * Reconstruct and rewrite the given tuple
    2392             :  *
    2393             :  * We cannot simply copy the tuple as-is, for several reasons:
    2394             :  *
    2395             :  * 1. We'd like to squeeze out the values of any dropped columns, both
    2396             :  * to save space and to ensure we have no corner-case failures. (It's
    2397             :  * possible for example that the new table hasn't got a TOAST table
    2398             :  * and so is unable to store any large values of dropped cols.)
    2399             :  *
    2400             :  * 2. The tuple might not even be legal for the new table; this is
    2401             :  * currently only known to happen as an after-effect of ALTER TABLE
    2402             :  * SET WITHOUT OIDS.
    2403             :  *
    2404             :  * So, we must reconstruct the tuple from component Datums.
    2405             :  */
    2406             : static void
    2407      748098 : reform_and_rewrite_tuple(HeapTuple tuple,
    2408             :                          Relation OldHeap, Relation NewHeap,
    2409             :                          Datum *values, bool *isnull, RewriteState rwstate)
    2410             : {
    2411      748098 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
    2412      748098 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
    2413             :     HeapTuple   copiedTuple;
    2414             :     int         i;
    2415             : 
    2416      748098 :     heap_deform_tuple(tuple, oldTupDesc, values, isnull);
    2417             : 
    2418             :     /* Be sure to null out any dropped columns */
    2419     6415546 :     for (i = 0; i < newTupDesc->natts; i++)
    2420             :     {
    2421     5667448 :         if (TupleDescCompactAttr(newTupDesc, i)->attisdropped)
    2422           0 :             isnull[i] = true;
    2423             :     }
    2424             : 
    2425      748098 :     copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
    2426             : 
    2427             :     /* The heap rewrite module does the rest */
    2428      748098 :     rewrite_heap_tuple(rwstate, tuple, copiedTuple);
    2429             : 
    2430      748098 :     heap_freetuple(copiedTuple);
    2431      748098 : }
    2432             : 
    2433             : /*
    2434             :  * Check visibility of the tuple.
    2435             :  */
    2436             : static bool
    2437         812 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
    2438             :                        HeapTuple tuple,
    2439             :                        OffsetNumber tupoffset)
    2440             : {
    2441         812 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2442             : 
    2443         812 :     if (scan->rs_flags & SO_ALLOW_PAGEMODE)
    2444             :     {
    2445         806 :         uint32      start = 0,
    2446         806 :                     end = hscan->rs_ntuples;
    2447             : 
    2448             :         /*
    2449             :          * In pageatatime mode, heap_prepare_pagescan() already did visibility
    2450             :          * checks, so just look at the info it left in rs_vistuples[].
    2451             :          *
    2452             :          * We use a binary search over the known-sorted array.  Note: we could
    2453             :          * save some effort if we insisted that NextSampleTuple select tuples
    2454             :          * in increasing order, but it's not clear that there would be enough
    2455             :          * gain to justify the restriction.
    2456             :          */
    2457        1562 :         while (start < end)
    2458             :         {
    2459        1562 :             uint32      mid = start + (end - start) / 2;
    2460        1562 :             OffsetNumber curoffset = hscan->rs_vistuples[mid];
    2461             : 
    2462        1562 :             if (tupoffset == curoffset)
    2463         806 :                 return true;
    2464         756 :             else if (tupoffset < curoffset)
    2465         442 :                 end = mid;
    2466             :             else
    2467         314 :                 start = mid + 1;
    2468             :         }
    2469             : 
    2470           0 :         return false;
    2471             :     }
    2472             :     else
    2473             :     {
    2474             :         /* Otherwise, we have to check the tuple individually. */
    2475           6 :         return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
    2476             :                                             buffer);
    2477             :     }
    2478             : }
    2479             : 
    2480             : /*
    2481             :  * Helper function get the next block of a bitmap heap scan. Returns true when
    2482             :  * it got the next block and saved it in the scan descriptor and false when
    2483             :  * the bitmap and or relation are exhausted.
    2484             :  */
    2485             : static bool
    2486      396922 : BitmapHeapScanNextBlock(TableScanDesc scan,
    2487             :                         bool *recheck,
    2488             :                         uint64 *lossy_pages, uint64 *exact_pages)
    2489             : {
    2490      396922 :     BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
    2491      396922 :     HeapScanDesc hscan = (HeapScanDesc) bscan;
    2492             :     BlockNumber block;
    2493             :     void       *per_buffer_data;
    2494             :     Buffer      buffer;
    2495             :     Snapshot    snapshot;
    2496             :     int         ntup;
    2497             :     TBMIterateResult *tbmres;
    2498             :     OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
    2499      396922 :     int         noffsets = -1;
    2500             : 
    2501             :     Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
    2502             :     Assert(hscan->rs_read_stream);
    2503             : 
    2504      396922 :     hscan->rs_cindex = 0;
    2505      396922 :     hscan->rs_ntuples = 0;
    2506             : 
    2507             :     /* Release buffer containing previous block. */
    2508      396922 :     if (BufferIsValid(hscan->rs_cbuf))
    2509             :     {
    2510      371246 :         ReleaseBuffer(hscan->rs_cbuf);
    2511      371246 :         hscan->rs_cbuf = InvalidBuffer;
    2512             :     }
    2513             : 
    2514      396922 :     hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream,
    2515             :                                              &per_buffer_data);
    2516             : 
    2517      396922 :     if (BufferIsInvalid(hscan->rs_cbuf))
    2518             :     {
    2519       25270 :         if (BufferIsValid(bscan->rs_vmbuffer))
    2520             :         {
    2521         120 :             ReleaseBuffer(bscan->rs_vmbuffer);
    2522         120 :             bscan->rs_vmbuffer = InvalidBuffer;
    2523             :         }
    2524             : 
    2525             :         /*
    2526             :          * The bitmap is exhausted. Now emit any remaining empty tuples. The
    2527             :          * read stream API only returns TBMIterateResults for blocks actually
    2528             :          * fetched from the heap. Our callback will accrue a count of empty
    2529             :          * tuples to emit for all blocks we skipped fetching. So, if we skip
    2530             :          * fetching heap blocks at the end of the relation (or no heap blocks
    2531             :          * are fetched) we need to ensure we emit empty tuples before ending
    2532             :          * the scan. We don't recheck empty tuples so ensure `recheck` is
    2533             :          * unset.
    2534             :          */
    2535       25270 :         *recheck = false;
    2536       25270 :         return bscan->rs_empty_tuples_pending > 0;
    2537             :     }
    2538             : 
    2539             :     Assert(per_buffer_data);
    2540             : 
    2541      371652 :     tbmres = per_buffer_data;
    2542             : 
    2543             :     Assert(BlockNumberIsValid(tbmres->blockno));
    2544             :     Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
    2545             : 
    2546             :     /* Exact pages need their tuple offsets extracted. */
    2547      371652 :     if (!tbmres->lossy)
    2548      211252 :         noffsets = tbm_extract_page_tuple(tbmres, offsets,
    2549             :                                           TBM_MAX_TUPLES_PER_PAGE);
    2550             : 
    2551      371652 :     *recheck = tbmres->recheck;
    2552             : 
    2553      371652 :     block = hscan->rs_cblock = tbmres->blockno;
    2554      371652 :     buffer = hscan->rs_cbuf;
    2555      371652 :     snapshot = scan->rs_snapshot;
    2556             : 
    2557      371652 :     ntup = 0;
    2558             : 
    2559             :     /*
    2560             :      * Prune and repair fragmentation for the whole page, if possible.
    2561             :      */
    2562      371652 :     heap_page_prune_opt(scan->rs_rd, buffer);
    2563             : 
    2564             :     /*
    2565             :      * We must hold share lock on the buffer content while examining tuple
    2566             :      * visibility.  Afterwards, however, the tuples we have found to be
    2567             :      * visible are guaranteed good as long as we hold the buffer pin.
    2568             :      */
    2569      371652 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
    2570             : 
    2571             :     /*
    2572             :      * We need two separate strategies for lossy and non-lossy cases.
    2573             :      */
    2574      371652 :     if (!tbmres->lossy)
    2575             :     {
    2576             :         /*
    2577             :          * Bitmap is non-lossy, so we just look through the offsets listed in
    2578             :          * tbmres; but we have to follow any HOT chain starting at each such
    2579             :          * offset.
    2580             :          */
    2581             :         int         curslot;
    2582             : 
    2583             :         /* We must have extracted the tuple offsets by now */
    2584             :         Assert(noffsets > -1);
    2585             : 
    2586     5259416 :         for (curslot = 0; curslot < noffsets; curslot++)
    2587             :         {
    2588     5048170 :             OffsetNumber offnum = offsets[curslot];
    2589             :             ItemPointerData tid;
    2590             :             HeapTupleData heapTuple;
    2591             : 
    2592     5048170 :             ItemPointerSet(&tid, block, offnum);
    2593     5048170 :             if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
    2594             :                                        &heapTuple, NULL, true))
    2595     4808102 :                 hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
    2596             :         }
    2597             :     }
    2598             :     else
    2599             :     {
    2600             :         /*
    2601             :          * Bitmap is lossy, so we must examine each line pointer on the page.
    2602             :          * But we can ignore HOT chains, since we'll check each tuple anyway.
    2603             :          */
    2604      160400 :         Page        page = BufferGetPage(buffer);
    2605      160400 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
    2606             :         OffsetNumber offnum;
    2607             : 
    2608     1384850 :         for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    2609             :         {
    2610             :             ItemId      lp;
    2611             :             HeapTupleData loctup;
    2612             :             bool        valid;
    2613             : 
    2614     1224450 :             lp = PageGetItemId(page, offnum);
    2615     1224450 :             if (!ItemIdIsNormal(lp))
    2616           0 :                 continue;
    2617     1224450 :             loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
    2618     1224450 :             loctup.t_len = ItemIdGetLength(lp);
    2619     1224450 :             loctup.t_tableOid = scan->rs_rd->rd_id;
    2620     1224450 :             ItemPointerSet(&loctup.t_self, block, offnum);
    2621     1224450 :             valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
    2622     1224450 :             if (valid)
    2623             :             {
    2624     1224324 :                 hscan->rs_vistuples[ntup++] = offnum;
    2625     1224324 :                 PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
    2626     1224324 :                                  HeapTupleHeaderGetXmin(loctup.t_data));
    2627             :             }
    2628     1224450 :             HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
    2629             :                                                 buffer, snapshot);
    2630             :         }
    2631             :     }
    2632             : 
    2633      371646 :     LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2634             : 
    2635             :     Assert(ntup <= MaxHeapTuplesPerPage);
    2636      371646 :     hscan->rs_ntuples = ntup;
    2637             : 
    2638      371646 :     if (tbmres->lossy)
    2639      160400 :         (*lossy_pages)++;
    2640             :     else
    2641      211246 :         (*exact_pages)++;
    2642             : 
    2643             :     /*
    2644             :      * Return true to indicate that a valid block was found and the bitmap is
    2645             :      * not exhausted. If there are no visible tuples on this page,
    2646             :      * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
    2647             :      * return false returning control to this function to advance to the next
    2648             :      * block in the bitmap.
    2649             :      */
    2650      371646 :     return true;
    2651             : }
    2652             : 
    2653             : /* ------------------------------------------------------------------------
    2654             :  * Definition of the heap table access method.
    2655             :  * ------------------------------------------------------------------------
    2656             :  */
    2657             : 
    2658             : static const TableAmRoutine heapam_methods = {
    2659             :     .type = T_TableAmRoutine,
    2660             : 
    2661             :     .slot_callbacks = heapam_slot_callbacks,
    2662             : 
    2663             :     .scan_begin = heap_beginscan,
    2664             :     .scan_end = heap_endscan,
    2665             :     .scan_rescan = heap_rescan,
    2666             :     .scan_getnextslot = heap_getnextslot,
    2667             : 
    2668             :     .scan_set_tidrange = heap_set_tidrange,
    2669             :     .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
    2670             : 
    2671             :     .parallelscan_estimate = table_block_parallelscan_estimate,
    2672             :     .parallelscan_initialize = table_block_parallelscan_initialize,
    2673             :     .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
    2674             : 
    2675             :     .index_fetch_begin = heapam_index_fetch_begin,
    2676             :     .index_fetch_reset = heapam_index_fetch_reset,
    2677             :     .index_fetch_end = heapam_index_fetch_end,
    2678             :     .index_fetch_tuple = heapam_index_fetch_tuple,
    2679             : 
    2680             :     .tuple_insert = heapam_tuple_insert,
    2681             :     .tuple_insert_speculative = heapam_tuple_insert_speculative,
    2682             :     .tuple_complete_speculative = heapam_tuple_complete_speculative,
    2683             :     .multi_insert = heap_multi_insert,
    2684             :     .tuple_delete = heapam_tuple_delete,
    2685             :     .tuple_update = heapam_tuple_update,
    2686             :     .tuple_lock = heapam_tuple_lock,
    2687             : 
    2688             :     .tuple_fetch_row_version = heapam_fetch_row_version,
    2689             :     .tuple_get_latest_tid = heap_get_latest_tid,
    2690             :     .tuple_tid_valid = heapam_tuple_tid_valid,
    2691             :     .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
    2692             :     .index_delete_tuples = heap_index_delete_tuples,
    2693             : 
    2694             :     .relation_set_new_filelocator = heapam_relation_set_new_filelocator,
    2695             :     .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
    2696             :     .relation_copy_data = heapam_relation_copy_data,
    2697             :     .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
    2698             :     .relation_vacuum = heap_vacuum_rel,
    2699             :     .scan_analyze_next_block = heapam_scan_analyze_next_block,
    2700             :     .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
    2701             :     .index_build_range_scan = heapam_index_build_range_scan,
    2702             :     .index_validate_scan = heapam_index_validate_scan,
    2703             : 
    2704             :     .relation_size = table_block_relation_size,
    2705             :     .relation_needs_toast_table = heapam_relation_needs_toast_table,
    2706             :     .relation_toast_am = heapam_relation_toast_am,
    2707             :     .relation_fetch_toast_slice = heap_fetch_toast_slice,
    2708             : 
    2709             :     .relation_estimate_size = heapam_estimate_rel_size,
    2710             : 
    2711             :     .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
    2712             :     .scan_sample_next_block = heapam_scan_sample_next_block,
    2713             :     .scan_sample_next_tuple = heapam_scan_sample_next_tuple
    2714             : };
    2715             : 
    2716             : 
    2717             : const TableAmRoutine *
    2718    18958122 : GetHeapamTableAmRoutine(void)
    2719             : {
    2720    18958122 :     return &heapam_methods;
    2721             : }
    2722             : 
    2723             : Datum
    2724     2302164 : heap_tableam_handler(PG_FUNCTION_ARGS)
    2725             : {
    2726     2302164 :     PG_RETURN_POINTER(&heapam_methods);
    2727             : }

Generated by: LCOV version 1.14