LCOV - code coverage report
Current view: top level - src/backend/access/heap - heapam_handler.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 676 741 91.2 %
Date: 2019-11-13 22:07:24 Functions: 34 34 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * heapam_handler.c
       4             :  *    heap table access method code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/heapam_handler.c
      12             :  *
      13             :  *
      14             :  * NOTES
      15             :  *    This files wires up the lower level heapam.c et al routines with the
      16             :  *    tableam abstraction.
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/genam.h"
      23             : #include "access/heapam.h"
      24             : #include "access/heaptoast.h"
      25             : #include "access/multixact.h"
      26             : #include "access/rewriteheap.h"
      27             : #include "access/tableam.h"
      28             : #include "access/tsmapi.h"
      29             : #include "access/xact.h"
      30             : #include "catalog/catalog.h"
      31             : #include "catalog/index.h"
      32             : #include "catalog/storage.h"
      33             : #include "catalog/storage_xlog.h"
      34             : #include "commands/progress.h"
      35             : #include "executor/executor.h"
      36             : #include "miscadmin.h"
      37             : #include "pgstat.h"
      38             : #include "storage/bufmgr.h"
      39             : #include "storage/bufpage.h"
      40             : #include "storage/lmgr.h"
      41             : #include "storage/predicate.h"
      42             : #include "storage/procarray.h"
      43             : #include "storage/smgr.h"
      44             : #include "utils/builtins.h"
      45             : #include "utils/rel.h"
      46             : 
      47             : static void reform_and_rewrite_tuple(HeapTuple tuple,
      48             :                                      Relation OldHeap, Relation NewHeap,
      49             :                                      Datum *values, bool *isnull, RewriteState rwstate);
      50             : 
      51             : static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
      52             :                                    HeapTuple tuple,
      53             :                                    OffsetNumber tupoffset);
      54             : 
      55             : static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
      56             : 
      57             : static const TableAmRoutine heapam_methods;
      58             : 
      59             : 
      60             : /* ------------------------------------------------------------------------
      61             :  * Slot related callbacks for heap AM
      62             :  * ------------------------------------------------------------------------
      63             :  */
      64             : 
      65             : static const TupleTableSlotOps *
      66    11749950 : heapam_slot_callbacks(Relation relation)
      67             : {
      68    11749950 :     return &TTSOpsBufferHeapTuple;
      69             : }
      70             : 
      71             : 
      72             : /* ------------------------------------------------------------------------
      73             :  * Index Scan Callbacks for heap AM
      74             :  * ------------------------------------------------------------------------
      75             :  */
      76             : 
      77             : static IndexFetchTableData *
      78    10190704 : heapam_index_fetch_begin(Relation rel)
      79             : {
      80    10190704 :     IndexFetchHeapData *hscan = palloc0(sizeof(IndexFetchHeapData));
      81             : 
      82    10190704 :     hscan->xs_base.rel = rel;
      83    10190704 :     hscan->xs_cbuf = InvalidBuffer;
      84             : 
      85    10190704 :     return &hscan->xs_base;
      86             : }
      87             : 
      88             : static void
      89    24386902 : heapam_index_fetch_reset(IndexFetchTableData *scan)
      90             : {
      91    24386902 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
      92             : 
      93    24386902 :     if (BufferIsValid(hscan->xs_cbuf))
      94             :     {
      95     7489604 :         ReleaseBuffer(hscan->xs_cbuf);
      96     7489604 :         hscan->xs_cbuf = InvalidBuffer;
      97             :     }
      98    24386902 : }
      99             : 
     100             : static void
     101    10189982 : heapam_index_fetch_end(IndexFetchTableData *scan)
     102             : {
     103    10189982 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     104             : 
     105    10189982 :     heapam_index_fetch_reset(scan);
     106             : 
     107    10189982 :     pfree(hscan);
     108    10189982 : }
     109             : 
     110             : static bool
     111    14978534 : heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
     112             :                          ItemPointer tid,
     113             :                          Snapshot snapshot,
     114             :                          TupleTableSlot *slot,
     115             :                          bool *call_again, bool *all_dead)
     116             : {
     117    14978534 :     IndexFetchHeapData *hscan = (IndexFetchHeapData *) scan;
     118    14978534 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     119             :     bool        got_heap_tuple;
     120             : 
     121             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     122             : 
     123             :     /* We can skip the buffer-switching logic if we're in mid-HOT chain. */
     124    14978534 :     if (!*call_again)
     125             :     {
     126             :         /* Switch to correct buffer if we don't have it already */
     127    14854282 :         Buffer      prev_buf = hscan->xs_cbuf;
     128             : 
     129    14854282 :         hscan->xs_cbuf = ReleaseAndReadBuffer(hscan->xs_cbuf,
     130             :                                               hscan->xs_base.rel,
     131    14854282 :                                               ItemPointerGetBlockNumber(tid));
     132             : 
     133             :         /*
     134             :          * Prune page, but only if we weren't already on this page
     135             :          */
     136    14854282 :         if (prev_buf != hscan->xs_cbuf)
     137     8805928 :             heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
     138             :     }
     139             : 
     140             :     /* Obtain share-lock on the buffer so we can examine visibility */
     141    14978534 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_SHARE);
     142    14978534 :     got_heap_tuple = heap_hot_search_buffer(tid,
     143             :                                             hscan->xs_base.rel,
     144             :                                             hscan->xs_cbuf,
     145             :                                             snapshot,
     146             :                                             &bslot->base.tupdata,
     147             :                                             all_dead,
     148    14978534 :                                             !*call_again);
     149    14978534 :     bslot->base.tupdata.t_self = *tid;
     150    14978534 :     LockBuffer(hscan->xs_cbuf, BUFFER_LOCK_UNLOCK);
     151             : 
     152    14978534 :     if (got_heap_tuple)
     153             :     {
     154             :         /*
     155             :          * Only in a non-MVCC snapshot can more than one member of the HOT
     156             :          * chain be visible.
     157             :          */
     158    14304524 :         *call_again = !IsMVCCSnapshot(snapshot);
     159             : 
     160    14304524 :         slot->tts_tableOid = RelationGetRelid(scan->rel);
     161    14304524 :         ExecStoreBufferHeapTuple(&bslot->base.tupdata, slot, hscan->xs_cbuf);
     162             :     }
     163             :     else
     164             :     {
     165             :         /* We've reached the end of the HOT chain. */
     166      674010 :         *call_again = false;
     167             :     }
     168             : 
     169    14978534 :     return got_heap_tuple;
     170             : }
     171             : 
     172             : 
     173             : /* ------------------------------------------------------------------------
     174             :  * Callbacks for non-modifying operations on individual tuples for heap AM
     175             :  * ------------------------------------------------------------------------
     176             :  */
     177             : 
     178             : static bool
     179       14610 : heapam_fetch_row_version(Relation relation,
     180             :                          ItemPointer tid,
     181             :                          Snapshot snapshot,
     182             :                          TupleTableSlot *slot)
     183             : {
     184       14610 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     185             :     Buffer      buffer;
     186             : 
     187             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     188             : 
     189       14610 :     bslot->base.tupdata.t_self = *tid;
     190       14610 :     if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer))
     191             :     {
     192             :         /* store in slot, transferring existing pin */
     193       14592 :         ExecStorePinnedBufferHeapTuple(&bslot->base.tupdata, slot, buffer);
     194       14592 :         slot->tts_tableOid = RelationGetRelid(relation);
     195             : 
     196       14592 :         return true;
     197             :     }
     198             : 
     199          18 :     return false;
     200             : }
     201             : 
     202             : static bool
     203         340 : heapam_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
     204             : {
     205         340 :     HeapScanDesc hscan = (HeapScanDesc) scan;
     206             : 
     207         680 :     return ItemPointerIsValid(tid) &&
     208         340 :         ItemPointerGetBlockNumber(tid) < hscan->rs_nblocks;
     209             : }
     210             : 
     211             : static bool
     212      110404 : heapam_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
     213             :                                 Snapshot snapshot)
     214             : {
     215      110404 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     216             :     bool        res;
     217             : 
     218             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     219             :     Assert(BufferIsValid(bslot->buffer));
     220             : 
     221             :     /*
     222             :      * We need buffer pin and lock to call HeapTupleSatisfiesVisibility.
     223             :      * Caller should be holding pin, but not lock.
     224             :      */
     225      110404 :     LockBuffer(bslot->buffer, BUFFER_LOCK_SHARE);
     226      110404 :     res = HeapTupleSatisfiesVisibility(bslot->base.tuple, snapshot,
     227             :                                        bslot->buffer);
     228      110404 :     LockBuffer(bslot->buffer, BUFFER_LOCK_UNLOCK);
     229             : 
     230      110404 :     return res;
     231             : }
     232             : 
     233             : 
     234             : /* ----------------------------------------------------------------------------
     235             :  *  Functions for manipulations of physical tuples for heap AM.
     236             :  * ----------------------------------------------------------------------------
     237             :  */
     238             : 
     239             : static void
     240    12521084 : heapam_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid,
     241             :                     int options, BulkInsertState bistate)
     242             : {
     243    12521084 :     bool        shouldFree = true;
     244    12521084 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     245             : 
     246             :     /* Update the tuple with table oid */
     247    12521084 :     slot->tts_tableOid = RelationGetRelid(relation);
     248    12521084 :     tuple->t_tableOid = slot->tts_tableOid;
     249             : 
     250             :     /* Perform the insertion, and copy the resulting ItemPointer */
     251    12521084 :     heap_insert(relation, tuple, cid, options, bistate);
     252    12521062 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     253             : 
     254    12521062 :     if (shouldFree)
     255     1531746 :         pfree(tuple);
     256    12521062 : }
     257             : 
     258             : static void
     259        3894 : heapam_tuple_insert_speculative(Relation relation, TupleTableSlot *slot,
     260             :                                 CommandId cid, int options,
     261             :                                 BulkInsertState bistate, uint32 specToken)
     262             : {
     263        3894 :     bool        shouldFree = true;
     264        3894 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     265             : 
     266             :     /* Update the tuple with table oid */
     267        3894 :     slot->tts_tableOid = RelationGetRelid(relation);
     268        3894 :     tuple->t_tableOid = slot->tts_tableOid;
     269             : 
     270        3894 :     HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
     271        3894 :     options |= HEAP_INSERT_SPECULATIVE;
     272             : 
     273             :     /* Perform the insertion, and copy the resulting ItemPointer */
     274        3894 :     heap_insert(relation, tuple, cid, options, bistate);
     275        3894 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     276             : 
     277        3894 :     if (shouldFree)
     278          46 :         pfree(tuple);
     279        3894 : }
     280             : 
     281             : static void
     282        3890 : heapam_tuple_complete_speculative(Relation relation, TupleTableSlot *slot,
     283             :                                   uint32 specToken, bool succeeded)
     284             : {
     285        3890 :     bool        shouldFree = true;
     286        3890 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     287             : 
     288             :     /* adjust the tuple's state accordingly */
     289        3890 :     if (succeeded)
     290        3882 :         heap_finish_speculative(relation, &slot->tts_tid);
     291             :     else
     292           8 :         heap_abort_speculative(relation, &slot->tts_tid);
     293             : 
     294        3890 :     if (shouldFree)
     295          46 :         pfree(tuple);
     296        3890 : }
     297             : 
     298             : static TM_Result
     299      831718 : heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid,
     300             :                     Snapshot snapshot, Snapshot crosscheck, bool wait,
     301             :                     TM_FailureData *tmfd, bool changingPart)
     302             : {
     303             :     /*
     304             :      * Currently Deleting of index tuples are handled at vacuum, in case if
     305             :      * the storage itself is cleaning the dead tuples by itself, it is the
     306             :      * time to call the index tuple deletion also.
     307             :      */
     308      831718 :     return heap_delete(relation, tid, cid, crosscheck, wait, tmfd, changingPart);
     309             : }
     310             : 
     311             : 
     312             : static TM_Result
     313      118568 : heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot,
     314             :                     CommandId cid, Snapshot snapshot, Snapshot crosscheck,
     315             :                     bool wait, TM_FailureData *tmfd,
     316             :                     LockTupleMode *lockmode, bool *update_indexes)
     317             : {
     318      118568 :     bool        shouldFree = true;
     319      118568 :     HeapTuple   tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree);
     320             :     TM_Result   result;
     321             : 
     322             :     /* Update the tuple with table oid */
     323      118568 :     slot->tts_tableOid = RelationGetRelid(relation);
     324      118568 :     tuple->t_tableOid = slot->tts_tableOid;
     325             : 
     326      118568 :     result = heap_update(relation, otid, tuple, cid, crosscheck, wait,
     327             :                          tmfd, lockmode);
     328      118544 :     ItemPointerCopy(&tuple->t_self, &slot->tts_tid);
     329             : 
     330             :     /*
     331             :      * Decide whether new index entries are needed for the tuple
     332             :      *
     333             :      * Note: heap_update returns the tid (location) of the new tuple in the
     334             :      * t_self field.
     335             :      *
     336             :      * If it's a HOT update, we mustn't insert new index entries.
     337             :      */
     338      118544 :     *update_indexes = result == TM_Ok && !HeapTupleIsHeapOnly(tuple);
     339             : 
     340      118544 :     if (shouldFree)
     341         266 :         pfree(tuple);
     342             : 
     343      118544 :     return result;
     344             : }
     345             : 
     346             : static TM_Result
     347       18218 : heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
     348             :                   TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
     349             :                   LockWaitPolicy wait_policy, uint8 flags,
     350             :                   TM_FailureData *tmfd)
     351             : {
     352       18218 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     353             :     TM_Result   result;
     354             :     Buffer      buffer;
     355       18218 :     HeapTuple   tuple = &bslot->base.tupdata;
     356             :     bool        follow_updates;
     357             : 
     358       18218 :     follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0;
     359       18218 :     tmfd->traversed = false;
     360             : 
     361             :     Assert(TTS_IS_BUFFERTUPLE(slot));
     362             : 
     363             : tuple_lock_retry:
     364       18370 :     tuple->t_self = *tid;
     365       18370 :     result = heap_lock_tuple(relation, tuple, cid, mode, wait_policy,
     366             :                              follow_updates, &buffer, tmfd);
     367             : 
     368       18566 :     if (result == TM_Updated &&
     369         214 :         (flags & TUPLE_LOCK_FLAG_FIND_LAST_VERSION))
     370             :     {
     371         188 :         ReleaseBuffer(buffer);
     372             :         /* Should not encounter speculative tuple on recheck */
     373             :         Assert(!HeapTupleHeaderIsSpeculative(tuple->t_data));
     374             : 
     375         188 :         if (!ItemPointerEquals(&tmfd->ctid, &tuple->t_self))
     376             :         {
     377             :             SnapshotData SnapshotDirty;
     378             :             TransactionId priorXmax;
     379             : 
     380             :             /* it was updated, so look at the updated version */
     381         188 :             *tid = tmfd->ctid;
     382             :             /* updated row should have xmin matching this xmax */
     383         188 :             priorXmax = tmfd->xmax;
     384             : 
     385             :             /* signal that a tuple later in the chain is getting locked */
     386         188 :             tmfd->traversed = true;
     387             : 
     388             :             /*
     389             :              * fetch target tuple
     390             :              *
     391             :              * Loop here to deal with updated or busy tuples
     392             :              */
     393         188 :             InitDirtySnapshot(SnapshotDirty);
     394             :             for (;;)
     395             :             {
     396         284 :                 if (ItemPointerIndicatesMovedPartitions(tid))
     397          16 :                     ereport(ERROR,
     398             :                             (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
     399             :                              errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
     400             : 
     401         220 :                 tuple->t_self = *tid;
     402         220 :                 if (heap_fetch(relation, &SnapshotDirty, tuple, &buffer))
     403             :                 {
     404             :                     /*
     405             :                      * If xmin isn't what we're expecting, the slot must have
     406             :                      * been recycled and reused for an unrelated tuple.  This
     407             :                      * implies that the latest version of the row was deleted,
     408             :                      * so we need do nothing.  (Should be safe to examine xmin
     409             :                      * without getting buffer's content lock.  We assume
     410             :                      * reading a TransactionId to be atomic, and Xmin never
     411             :                      * changes in an existing tuple, except to invalid or
     412             :                      * frozen, and neither of those can match priorXmax.)
     413             :                      */
     414         166 :                     if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     415             :                                              priorXmax))
     416             :                     {
     417           0 :                         ReleaseBuffer(buffer);
     418          18 :                         return TM_Deleted;
     419             :                     }
     420             : 
     421             :                     /* otherwise xmin should not be dirty... */
     422         166 :                     if (TransactionIdIsValid(SnapshotDirty.xmin))
     423           0 :                         ereport(ERROR,
     424             :                                 (errcode(ERRCODE_DATA_CORRUPTED),
     425             :                                  errmsg_internal("t_xmin is uncommitted in tuple to be updated")));
     426             : 
     427             :                     /*
     428             :                      * If tuple is being updated by other transaction then we
     429             :                      * have to wait for its commit/abort, or die trying.
     430             :                      */
     431         166 :                     if (TransactionIdIsValid(SnapshotDirty.xmax))
     432             :                     {
     433           4 :                         ReleaseBuffer(buffer);
     434           4 :                         switch (wait_policy)
     435             :                         {
     436             :                             case LockWaitBlock:
     437           0 :                                 XactLockTableWait(SnapshotDirty.xmax,
     438             :                                                   relation, &tuple->t_self,
     439             :                                                   XLTW_FetchUpdated);
     440           0 :                                 break;
     441             :                             case LockWaitSkip:
     442           2 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
     443             :                                     /* skip instead of waiting */
     444           2 :                                     return TM_WouldBlock;
     445           0 :                                 break;
     446             :                             case LockWaitError:
     447           2 :                                 if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
     448           2 :                                     ereport(ERROR,
     449             :                                             (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
     450             :                                              errmsg("could not obtain lock on row in relation \"%s\"",
     451             :                                                     RelationGetRelationName(relation))));
     452           0 :                                 break;
     453             :                         }
     454           0 :                         continue;   /* loop back to repeat heap_fetch */
     455             :                     }
     456             : 
     457             :                     /*
     458             :                      * If tuple was inserted by our own transaction, we have
     459             :                      * to check cmin against cid: cmin >= current CID means
     460             :                      * our command cannot see the tuple, so we should ignore
     461             :                      * it. Otherwise heap_lock_tuple() will throw an error,
     462             :                      * and so would any later attempt to update or delete the
     463             :                      * tuple.  (We need not check cmax because
     464             :                      * HeapTupleSatisfiesDirty will consider a tuple deleted
     465             :                      * by our transaction dead, regardless of cmax.)  We just
     466             :                      * checked that priorXmax == xmin, so we can test that
     467             :                      * variable instead of doing HeapTupleHeaderGetXmin again.
     468             :                      */
     469         172 :                     if (TransactionIdIsCurrentTransactionId(priorXmax) &&
     470          10 :                         HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
     471             :                     {
     472          10 :                         tmfd->xmax = priorXmax;
     473             : 
     474             :                         /*
     475             :                          * Cmin is the problematic value, so store that. See
     476             :                          * above.
     477             :                          */
     478          10 :                         tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
     479          10 :                         ReleaseBuffer(buffer);
     480          10 :                         return TM_SelfModified;
     481             :                     }
     482             : 
     483             :                     /*
     484             :                      * This is a live tuple, so try to lock it again.
     485             :                      */
     486         152 :                     ReleaseBuffer(buffer);
     487         152 :                     goto tuple_lock_retry;
     488             :                 }
     489             : 
     490             :                 /*
     491             :                  * If the referenced slot was actually empty, the latest
     492             :                  * version of the row must have been deleted, so we need do
     493             :                  * nothing.
     494             :                  */
     495          54 :                 if (tuple->t_data == NULL)
     496             :                 {
     497           0 :                     return TM_Deleted;
     498             :                 }
     499             : 
     500             :                 /*
     501             :                  * As above, if xmin isn't what we're expecting, do nothing.
     502             :                  */
     503          54 :                 if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
     504             :                                          priorXmax))
     505             :                 {
     506           0 :                     if (BufferIsValid(buffer))
     507           0 :                         ReleaseBuffer(buffer);
     508           0 :                     return TM_Deleted;
     509             :                 }
     510             : 
     511             :                 /*
     512             :                  * If we get here, the tuple was found but failed
     513             :                  * SnapshotDirty. Assuming the xmin is either a committed xact
     514             :                  * or our own xact (as it certainly should be if we're trying
     515             :                  * to modify the tuple), this must mean that the row was
     516             :                  * updated or deleted by either a committed xact or our own
     517             :                  * xact.  If it was deleted, we can ignore it; if it was
     518             :                  * updated then chain up to the next version and repeat the
     519             :                  * whole process.
     520             :                  *
     521             :                  * As above, it should be safe to examine xmax and t_ctid
     522             :                  * without the buffer content lock, because they can't be
     523             :                  * changing.
     524             :                  */
     525          54 :                 if (ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
     526             :                 {
     527             :                     /* deleted, so forget about it */
     528           6 :                     if (BufferIsValid(buffer))
     529           0 :                         ReleaseBuffer(buffer);
     530           6 :                     return TM_Deleted;
     531             :                 }
     532             : 
     533             :                 /* updated, so look at the updated row */
     534          48 :                 *tid = tuple->t_data->t_ctid;
     535             :                 /* updated row should have xmin matching this xmax */
     536          48 :                 priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
     537          48 :                 if (BufferIsValid(buffer))
     538           0 :                     ReleaseBuffer(buffer);
     539             :                 /* loop back to fetch next in chain */
     540             :             }
     541             :         }
     542             :         else
     543             :         {
     544             :             /* tuple was deleted, so give up */
     545           0 :             return TM_Deleted;
     546             :         }
     547             :     }
     548             : 
     549       18164 :     slot->tts_tableOid = RelationGetRelid(relation);
     550       18164 :     tuple->t_tableOid = slot->tts_tableOid;
     551             : 
     552             :     /* store in slot, transferring existing pin */
     553       18164 :     ExecStorePinnedBufferHeapTuple(tuple, slot, buffer);
     554             : 
     555       18164 :     return result;
     556             : }
     557             : 
     558             : static void
     559        2660 : heapam_finish_bulk_insert(Relation relation, int options)
     560             : {
     561             :     /*
     562             :      * If we skipped writing WAL, then we need to sync the heap (but not
     563             :      * indexes since those use WAL anyway / don't go through tableam)
     564             :      */
     565        2660 :     if (options & HEAP_INSERT_SKIP_WAL)
     566          22 :         heap_sync(relation);
     567        2660 : }
     568             : 
     569             : 
     570             : /* ------------------------------------------------------------------------
     571             :  * DDL related callbacks for heap AM.
     572             :  * ------------------------------------------------------------------------
     573             :  */
     574             : 
     575             : static void
     576       61776 : heapam_relation_set_new_filenode(Relation rel,
     577             :                                  const RelFileNode *newrnode,
     578             :                                  char persistence,
     579             :                                  TransactionId *freezeXid,
     580             :                                  MultiXactId *minmulti)
     581             : {
     582             :     SMgrRelation srel;
     583             : 
     584             :     /*
     585             :      * Initialize to the minimum XID that could put tuples in the table. We
     586             :      * know that no xacts older than RecentXmin are still running, so that
     587             :      * will do.
     588             :      */
     589       61776 :     *freezeXid = RecentXmin;
     590             : 
     591             :     /*
     592             :      * Similarly, initialize the minimum Multixact to the first value that
     593             :      * could possibly be stored in tuples in the table.  Running transactions
     594             :      * could reuse values from their local cache, so we are careful to
     595             :      * consider all currently running multis.
     596             :      *
     597             :      * XXX this could be refined further, but is it worth the hassle?
     598             :      */
     599       61776 :     *minmulti = GetOldestMultiXactId();
     600             : 
     601       61776 :     srel = RelationCreateStorage(*newrnode, persistence);
     602             : 
     603             :     /*
     604             :      * If required, set up an init fork for an unlogged table so that it can
     605             :      * be correctly reinitialized on restart.  An immediate sync is required
     606             :      * even if the page has been logged, because the write did not go through
     607             :      * shared_buffers and therefore a concurrent checkpoint may have moved the
     608             :      * redo pointer past our xlog record.  Recovery may as well remove it
     609             :      * while replaying, for example, XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE
     610             :      * record. Therefore, logging is necessary even if wal_level=minimal.
     611             :      */
     612       61776 :     if (persistence == RELPERSISTENCE_UNLOGGED)
     613             :     {
     614             :         Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
     615             :                rel->rd_rel->relkind == RELKIND_MATVIEW ||
     616             :                rel->rd_rel->relkind == RELKIND_TOASTVALUE);
     617         120 :         smgrcreate(srel, INIT_FORKNUM, false);
     618         120 :         log_smgrcreate(newrnode, INIT_FORKNUM);
     619         120 :         smgrimmedsync(srel, INIT_FORKNUM);
     620             :     }
     621             : 
     622       61776 :     smgrclose(srel);
     623       61776 : }
     624             : 
     625             : static void
     626         288 : heapam_relation_nontransactional_truncate(Relation rel)
     627             : {
     628         288 :     RelationTruncate(rel, 0);
     629         288 : }
     630             : 
     631             : static void
     632          22 : heapam_relation_copy_data(Relation rel, const RelFileNode *newrnode)
     633             : {
     634             :     SMgrRelation dstrel;
     635             : 
     636          22 :     dstrel = smgropen(*newrnode, rel->rd_backend);
     637          22 :     RelationOpenSmgr(rel);
     638             : 
     639             :     /*
     640             :      * Since we copy the file directly without looking at the shared buffers,
     641             :      * we'd better first flush out any pages of the source relation that are
     642             :      * in shared buffers.  We assume no new changes will be made while we are
     643             :      * holding exclusive lock on the rel.
     644             :      */
     645          22 :     FlushRelationBuffers(rel);
     646             : 
     647             :     /*
     648             :      * Create and copy all forks of the relation, and schedule unlinking of
     649             :      * old physical files.
     650             :      *
     651             :      * NOTE: any conflict in relfilenode value will be caught in
     652             :      * RelationCreateStorage().
     653             :      */
     654          22 :     RelationCreateStorage(*newrnode, rel->rd_rel->relpersistence);
     655             : 
     656             :     /* copy main fork */
     657          22 :     RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
     658          22 :                         rel->rd_rel->relpersistence);
     659             : 
     660             :     /* copy those extra forks that exist */
     661         110 :     for (ForkNumber forkNum = MAIN_FORKNUM + 1;
     662          66 :          forkNum <= MAX_FORKNUM; forkNum++)
     663             :     {
     664          66 :         if (smgrexists(rel->rd_smgr, forkNum))
     665             :         {
     666           0 :             smgrcreate(dstrel, forkNum, false);
     667             : 
     668             :             /*
     669             :              * WAL log creation if the relation is persistent, or this is the
     670             :              * init fork of an unlogged relation.
     671             :              */
     672           0 :             if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
     673           0 :                 (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
     674             :                  forkNum == INIT_FORKNUM))
     675           0 :                 log_smgrcreate(newrnode, forkNum);
     676           0 :             RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
     677           0 :                                 rel->rd_rel->relpersistence);
     678             :         }
     679             :     }
     680             : 
     681             : 
     682             :     /* drop old relation, and close new one */
     683          22 :     RelationDropStorage(rel);
     684          22 :     smgrclose(dstrel);
     685          22 : }
     686             : 
     687             : static void
     688         308 : heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
     689             :                                  Relation OldIndex, bool use_sort,
     690             :                                  TransactionId OldestXmin,
     691             :                                  TransactionId *xid_cutoff,
     692             :                                  MultiXactId *multi_cutoff,
     693             :                                  double *num_tuples,
     694             :                                  double *tups_vacuumed,
     695             :                                  double *tups_recently_dead)
     696             : {
     697             :     RewriteState rwstate;
     698             :     IndexScanDesc indexScan;
     699             :     TableScanDesc tableScan;
     700             :     HeapScanDesc heapScan;
     701             :     bool        use_wal;
     702             :     bool        is_system_catalog;
     703             :     Tuplesortstate *tuplesort;
     704         308 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
     705         308 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
     706             :     TupleTableSlot *slot;
     707             :     int         natts;
     708             :     Datum      *values;
     709             :     bool       *isnull;
     710             :     BufferHeapTupleTableSlot *hslot;
     711             : 
     712             :     /* Remember if it's a system catalog */
     713         308 :     is_system_catalog = IsSystemRelation(OldHeap);
     714             : 
     715             :     /*
     716             :      * We need to log the copied data in WAL iff WAL archiving/streaming is
     717             :      * enabled AND it's a WAL-logged rel.
     718             :      */
     719         308 :     use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
     720             : 
     721             :     /* use_wal off requires smgr_targblock be initially invalid */
     722             :     Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
     723             : 
     724             :     /* Preallocate values/isnull arrays */
     725         308 :     natts = newTupDesc->natts;
     726         308 :     values = (Datum *) palloc(natts * sizeof(Datum));
     727         308 :     isnull = (bool *) palloc(natts * sizeof(bool));
     728             : 
     729             :     /* Initialize the rewrite operation */
     730         308 :     rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
     731             :                                  *multi_cutoff, use_wal);
     732             : 
     733             : 
     734             :     /* Set up sorting if wanted */
     735         308 :     if (use_sort)
     736          34 :         tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
     737             :                                             maintenance_work_mem,
     738             :                                             NULL, false);
     739             :     else
     740         274 :         tuplesort = NULL;
     741             : 
     742             :     /*
     743             :      * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
     744             :      * that still need to be copied, we scan with SnapshotAny and use
     745             :      * HeapTupleSatisfiesVacuum for the visibility test.
     746             :      */
     747         308 :     if (OldIndex != NULL && !use_sort)
     748          24 :     {
     749          24 :         const int   ci_index[] = {
     750             :             PROGRESS_CLUSTER_PHASE,
     751             :             PROGRESS_CLUSTER_INDEX_RELID
     752             :         };
     753             :         int64       ci_val[2];
     754             : 
     755             :         /* Set phase and OIDOldIndex to columns */
     756          24 :         ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
     757          24 :         ci_val[1] = RelationGetRelid(OldIndex);
     758          24 :         pgstat_progress_update_multi_param(2, ci_index, ci_val);
     759             : 
     760          24 :         tableScan = NULL;
     761          24 :         heapScan = NULL;
     762          24 :         indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
     763          24 :         index_rescan(indexScan, NULL, 0, NULL, 0);
     764             :     }
     765             :     else
     766             :     {
     767             :         /* In scan-and-sort mode and also VACUUM FULL, set phase */
     768         284 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     769             :                                      PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
     770             : 
     771         284 :         tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
     772         284 :         heapScan = (HeapScanDesc) tableScan;
     773         284 :         indexScan = NULL;
     774             : 
     775             :         /* Set total heap blocks */
     776         284 :         pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
     777         284 :                                      heapScan->rs_nblocks);
     778             :     }
     779             : 
     780         308 :     slot = table_slot_create(OldHeap, NULL);
     781         308 :     hslot = (BufferHeapTupleTableSlot *) slot;
     782             : 
     783             :     /*
     784             :      * Scan through the OldHeap, either in OldIndex order or sequentially;
     785             :      * copy each tuple into the NewHeap, or transiently to the tuplesort
     786             :      * module.  Note that we don't bother sorting dead tuples (they won't get
     787             :      * to the new table anyway).
     788             :      */
     789             :     for (;;)
     790      154478 :     {
     791             :         HeapTuple   tuple;
     792             :         Buffer      buf;
     793             :         bool        isdead;
     794             : 
     795      154786 :         CHECK_FOR_INTERRUPTS();
     796             : 
     797      154786 :         if (indexScan != NULL)
     798             :         {
     799          88 :             if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
     800          24 :                 break;
     801             : 
     802             :             /* Since we used no scan keys, should never need to recheck */
     803          64 :             if (indexScan->xs_recheck)
     804           0 :                 elog(ERROR, "CLUSTER does not support lossy index conditions");
     805             :         }
     806             :         else
     807             :         {
     808      154698 :             if (!table_scan_getnextslot(tableScan, ForwardScanDirection, slot))
     809         284 :                 break;
     810             : 
     811             :             /*
     812             :              * In scan-and-sort mode and also VACUUM FULL, set heap blocks
     813             :              * scanned
     814             :              */
     815      154414 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
     816      154414 :                                          heapScan->rs_cblock + 1);
     817             :         }
     818             : 
     819      154478 :         tuple = ExecFetchSlotHeapTuple(slot, false, NULL);
     820      154478 :         buf = hslot->buffer;
     821             : 
     822      154478 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
     823             : 
     824      154478 :         switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
     825             :         {
     826             :             case HEAPTUPLE_DEAD:
     827             :                 /* Definitely dead */
     828       10338 :                 isdead = true;
     829       10338 :                 break;
     830             :             case HEAPTUPLE_RECENTLY_DEAD:
     831        5022 :                 *tups_recently_dead += 1;
     832             :                 /* fall through */
     833             :             case HEAPTUPLE_LIVE:
     834             :                 /* Live or recently dead, must copy it */
     835      143988 :                 isdead = false;
     836      143988 :                 break;
     837             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     838             : 
     839             :                 /*
     840             :                  * Since we hold exclusive lock on the relation, normally the
     841             :                  * only way to see this is if it was inserted earlier in our
     842             :                  * own transaction.  However, it can happen in system
     843             :                  * catalogs, since we tend to release write lock before commit
     844             :                  * there.  Give a warning if neither case applies; but in any
     845             :                  * case we had better copy it.
     846             :                  */
     847         128 :                 if (!is_system_catalog &&
     848          12 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
     849           0 :                     elog(WARNING, "concurrent insert in progress within table \"%s\"",
     850             :                          RelationGetRelationName(OldHeap));
     851             :                 /* treat as live */
     852         116 :                 isdead = false;
     853         116 :                 break;
     854             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     855             : 
     856             :                 /*
     857             :                  * Similar situation to INSERT_IN_PROGRESS case.
     858             :                  */
     859          56 :                 if (!is_system_catalog &&
     860          20 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
     861           0 :                     elog(WARNING, "concurrent delete in progress within table \"%s\"",
     862             :                          RelationGetRelationName(OldHeap));
     863             :                 /* treat as recently dead */
     864          36 :                 *tups_recently_dead += 1;
     865          36 :                 isdead = false;
     866          36 :                 break;
     867             :             default:
     868           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     869             :                 isdead = false; /* keep compiler quiet */
     870             :                 break;
     871             :         }
     872             : 
     873      154478 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     874             : 
     875      154478 :         if (isdead)
     876             :         {
     877       10338 :             *tups_vacuumed += 1;
     878             :             /* heap rewrite module still needs to see it... */
     879       10338 :             if (rewrite_heap_dead_tuple(rwstate, tuple))
     880             :             {
     881             :                 /* A previous recently-dead tuple is now known dead */
     882           0 :                 *tups_vacuumed += 1;
     883           0 :                 *tups_recently_dead -= 1;
     884             :             }
     885       10338 :             continue;
     886             :         }
     887             : 
     888      144140 :         *num_tuples += 1;
     889      144140 :         if (tuplesort != NULL)
     890             :         {
     891       46118 :             tuplesort_putheaptuple(tuplesort, tuple);
     892             : 
     893             :             /*
     894             :              * In scan-and-sort mode, report increase in number of tuples
     895             :              * scanned
     896             :              */
     897       46118 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     898       46118 :                                          *num_tuples);
     899             :         }
     900             :         else
     901             :         {
     902       98022 :             const int   ct_index[] = {
     903             :                 PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
     904             :                 PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
     905             :             };
     906             :             int64       ct_val[2];
     907             : 
     908       98022 :             reform_and_rewrite_tuple(tuple, OldHeap, NewHeap,
     909             :                                      values, isnull, rwstate);
     910             : 
     911             :             /*
     912             :              * In indexscan mode and also VACUUM FULL, report increase in
     913             :              * number of tuples scanned and written
     914             :              */
     915       98022 :             ct_val[0] = *num_tuples;
     916       98022 :             ct_val[1] = *num_tuples;
     917       98022 :             pgstat_progress_update_multi_param(2, ct_index, ct_val);
     918             :         }
     919             :     }
     920             : 
     921         308 :     if (indexScan != NULL)
     922          24 :         index_endscan(indexScan);
     923         308 :     if (tableScan != NULL)
     924         284 :         table_endscan(tableScan);
     925         308 :     if (slot)
     926         308 :         ExecDropSingleTupleTableSlot(slot);
     927             : 
     928             :     /*
     929             :      * In scan-and-sort mode, complete the sort, then read out all live tuples
     930             :      * from the tuplestore and write them to the new relation.
     931             :      */
     932         308 :     if (tuplesort != NULL)
     933             :     {
     934          34 :         double      n_tuples = 0;
     935             : 
     936             :         /* Report that we are now sorting tuples */
     937          34 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     938             :                                      PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
     939             : 
     940          34 :         tuplesort_performsort(tuplesort);
     941             : 
     942             :         /* Report that we are now writing new heap */
     943          34 :         pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
     944             :                                      PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
     945             : 
     946             :         for (;;)
     947       46118 :         {
     948             :             HeapTuple   tuple;
     949             : 
     950       46152 :             CHECK_FOR_INTERRUPTS();
     951             : 
     952       46152 :             tuple = tuplesort_getheaptuple(tuplesort, true);
     953       46152 :             if (tuple == NULL)
     954          34 :                 break;
     955             : 
     956       46118 :             n_tuples += 1;
     957       46118 :             reform_and_rewrite_tuple(tuple,
     958             :                                      OldHeap, NewHeap,
     959             :                                      values, isnull,
     960             :                                      rwstate);
     961             :             /* Report n_tuples */
     962       46118 :             pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
     963             :                                          n_tuples);
     964             :         }
     965             : 
     966          34 :         tuplesort_end(tuplesort);
     967             :     }
     968             : 
     969             :     /* Write out any remaining tuples, and fsync if needed */
     970         308 :     end_heap_rewrite(rwstate);
     971             : 
     972             :     /* Clean up */
     973         308 :     pfree(values);
     974         308 :     pfree(isnull);
     975         308 : }
     976             : 
     977             : static bool
     978      207644 : heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
     979             :                                BufferAccessStrategy bstrategy)
     980             : {
     981      207644 :     HeapScanDesc hscan = (HeapScanDesc) scan;
     982             : 
     983             :     /*
     984             :      * We must maintain a pin on the target page's buffer to ensure that
     985             :      * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
     986             :      * under us.  Hence, pin the page until we are done looking at it.  We
     987             :      * also choose to hold sharelock on the buffer throughout --- we could
     988             :      * release and re-acquire sharelock for each tuple, but since we aren't
     989             :      * doing much work per tuple, the extra lock traffic is probably better
     990             :      * avoided.
     991             :      */
     992      207644 :     hscan->rs_cblock = blockno;
     993      207644 :     hscan->rs_cindex = FirstOffsetNumber;
     994      207644 :     hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
     995             :                                         blockno, RBM_NORMAL, bstrategy);
     996      207644 :     LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
     997             : 
     998             :     /* in heap all blocks can contain tuples, so always return true */
     999      207644 :     return true;
    1000             : }
    1001             : 
    1002             : static bool
    1003    15571468 : heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
    1004             :                                double *liverows, double *deadrows,
    1005             :                                TupleTableSlot *slot)
    1006             : {
    1007    15571468 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    1008             :     Page        targpage;
    1009             :     OffsetNumber maxoffset;
    1010             :     BufferHeapTupleTableSlot *hslot;
    1011             : 
    1012             :     Assert(TTS_IS_BUFFERTUPLE(slot));
    1013             : 
    1014    15571468 :     hslot = (BufferHeapTupleTableSlot *) slot;
    1015    15571468 :     targpage = BufferGetPage(hscan->rs_cbuf);
    1016    15571468 :     maxoffset = PageGetMaxOffsetNumber(targpage);
    1017             : 
    1018             :     /* Inner loop over all tuples on the selected page */
    1019    17269728 :     for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
    1020             :     {
    1021             :         ItemId      itemid;
    1022    17062084 :         HeapTuple   targtuple = &hslot->base.tupdata;
    1023    17062084 :         bool        sample_it = false;
    1024             : 
    1025    17062084 :         itemid = PageGetItemId(targpage, hscan->rs_cindex);
    1026             : 
    1027             :         /*
    1028             :          * We ignore unused and redirect line pointers.  DEAD line pointers
    1029             :          * should be counted as dead, because we need vacuum to run to get rid
    1030             :          * of them.  Note that this rule agrees with the way that
    1031             :          * heap_page_prune() counts things.
    1032             :          */
    1033    17062084 :         if (!ItemIdIsNormal(itemid))
    1034             :         {
    1035     1585144 :             if (ItemIdIsDead(itemid))
    1036      220480 :                 *deadrows += 1;
    1037     1585144 :             continue;
    1038             :         }
    1039             : 
    1040    15476940 :         ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
    1041             : 
    1042    15476940 :         targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
    1043    15476940 :         targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
    1044    15476940 :         targtuple->t_len = ItemIdGetLength(itemid);
    1045             : 
    1046    15476940 :         switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
    1047             :                                          hscan->rs_cbuf))
    1048             :         {
    1049             :             case HEAPTUPLE_LIVE:
    1050    15175652 :                 sample_it = true;
    1051    15175652 :                 *liverows += 1;
    1052    15175652 :                 break;
    1053             : 
    1054             :             case HEAPTUPLE_DEAD:
    1055             :             case HEAPTUPLE_RECENTLY_DEAD:
    1056             :                 /* Count dead and recently-dead rows */
    1057      108088 :                 *deadrows += 1;
    1058      108088 :                 break;
    1059             : 
    1060             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    1061             : 
    1062             :                 /*
    1063             :                  * Insert-in-progress rows are not counted.  We assume that
    1064             :                  * when the inserting transaction commits or aborts, it will
    1065             :                  * send a stats message to increment the proper count.  This
    1066             :                  * works right only if that transaction ends after we finish
    1067             :                  * analyzing the table; if things happen in the other order,
    1068             :                  * its stats update will be overwritten by ours.  However, the
    1069             :                  * error will be large only if the other transaction runs long
    1070             :                  * enough to insert many tuples, so assuming it will finish
    1071             :                  * after us is the safer option.
    1072             :                  *
    1073             :                  * A special case is that the inserting transaction might be
    1074             :                  * our own.  In this case we should count and sample the row,
    1075             :                  * to accommodate users who load a table and analyze it in one
    1076             :                  * transaction.  (pgstat_report_analyze has to adjust the
    1077             :                  * numbers we send to the stats collector to make this come
    1078             :                  * out right.)
    1079             :                  */
    1080      191992 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
    1081             :                 {
    1082      188108 :                     sample_it = true;
    1083      188108 :                     *liverows += 1;
    1084             :                 }
    1085      191992 :                 break;
    1086             : 
    1087             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1088             : 
    1089             :                 /*
    1090             :                  * We count and sample delete-in-progress rows the same as
    1091             :                  * live ones, so that the stats counters come out right if the
    1092             :                  * deleting transaction commits after us, per the same
    1093             :                  * reasoning given above.
    1094             :                  *
    1095             :                  * If the delete was done by our own transaction, however, we
    1096             :                  * must count the row as dead to make pgstat_report_analyze's
    1097             :                  * stats adjustments come out right.  (Note: this works out
    1098             :                  * properly when the row was both inserted and deleted in our
    1099             :                  * xact.)
    1100             :                  *
    1101             :                  * The net effect of these choices is that we act as though an
    1102             :                  * IN_PROGRESS transaction hasn't happened yet, except if it
    1103             :                  * is our own transaction, which we assume has happened.
    1104             :                  *
    1105             :                  * This approach ensures that we behave sanely if we see both
    1106             :                  * the pre-image and post-image rows for a row being updated
    1107             :                  * by a concurrent transaction: we will sample the pre-image
    1108             :                  * but not the post-image.  We also get sane results if the
    1109             :                  * concurrent transaction never commits.
    1110             :                  */
    1111        1208 :                 if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
    1112        1144 :                     *deadrows += 1;
    1113             :                 else
    1114             :                 {
    1115          64 :                     sample_it = true;
    1116          64 :                     *liverows += 1;
    1117             :                 }
    1118        1208 :                 break;
    1119             : 
    1120             :             default:
    1121           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1122             :                 break;
    1123             :         }
    1124             : 
    1125    15476940 :         if (sample_it)
    1126             :         {
    1127    15363824 :             ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
    1128    15363824 :             hscan->rs_cindex++;
    1129             : 
    1130             :             /* note that we leave the buffer locked here! */
    1131    15363824 :             return true;
    1132             :         }
    1133             :     }
    1134             : 
    1135             :     /* Now release the lock and pin on the page */
    1136      207644 :     UnlockReleaseBuffer(hscan->rs_cbuf);
    1137      207644 :     hscan->rs_cbuf = InvalidBuffer;
    1138             : 
    1139             :     /* also prevent old slot contents from having pin on page */
    1140      207644 :     ExecClearTuple(slot);
    1141             : 
    1142      207644 :     return false;
    1143             : }
    1144             : 
    1145             : static double
    1146       72090 : heapam_index_build_range_scan(Relation heapRelation,
    1147             :                               Relation indexRelation,
    1148             :                               IndexInfo *indexInfo,
    1149             :                               bool allow_sync,
    1150             :                               bool anyvisible,
    1151             :                               bool progress,
    1152             :                               BlockNumber start_blockno,
    1153             :                               BlockNumber numblocks,
    1154             :                               IndexBuildCallback callback,
    1155             :                               void *callback_state,
    1156             :                               TableScanDesc scan)
    1157             : {
    1158             :     HeapScanDesc hscan;
    1159             :     bool        is_system_catalog;
    1160             :     bool        checking_uniqueness;
    1161             :     HeapTuple   heapTuple;
    1162             :     Datum       values[INDEX_MAX_KEYS];
    1163             :     bool        isnull[INDEX_MAX_KEYS];
    1164             :     double      reltuples;
    1165             :     ExprState  *predicate;
    1166             :     TupleTableSlot *slot;
    1167             :     EState     *estate;
    1168             :     ExprContext *econtext;
    1169             :     Snapshot    snapshot;
    1170       72090 :     bool        need_unregister_snapshot = false;
    1171             :     TransactionId OldestXmin;
    1172       72090 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1173       72090 :     BlockNumber root_blkno = InvalidBlockNumber;
    1174             :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1175             : 
    1176             :     /*
    1177             :      * sanity checks
    1178             :      */
    1179             :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1180             : 
    1181             :     /* Remember if it's a system catalog */
    1182       72090 :     is_system_catalog = IsSystemRelation(heapRelation);
    1183             : 
    1184             :     /* See whether we're verifying uniqueness/exclusion properties */
    1185       80550 :     checking_uniqueness = (indexInfo->ii_Unique ||
    1186        8460 :                            indexInfo->ii_ExclusionOps != NULL);
    1187             : 
    1188             :     /*
    1189             :      * "Any visible" mode is not compatible with uniqueness checks; make sure
    1190             :      * only one of those is requested.
    1191             :      */
    1192             :     Assert(!(anyvisible && checking_uniqueness));
    1193             : 
    1194             :     /*
    1195             :      * Need an EState for evaluation of index expressions and partial-index
    1196             :      * predicates.  Also a slot to hold the current tuple.
    1197             :      */
    1198       72090 :     estate = CreateExecutorState();
    1199       72090 :     econtext = GetPerTupleExprContext(estate);
    1200       72090 :     slot = table_slot_create(heapRelation, NULL);
    1201             : 
    1202             :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1203       72090 :     econtext->ecxt_scantuple = slot;
    1204             : 
    1205             :     /* Set up execution state for predicate, if any. */
    1206       72090 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1207             : 
    1208             :     /*
    1209             :      * Prepare for scan of the base relation.  In a normal index build, we use
    1210             :      * SnapshotAny because we must retrieve all tuples and do our own time
    1211             :      * qual checks (because we have to index RECENTLY_DEAD tuples). In a
    1212             :      * concurrent build, or during bootstrap, we take a regular MVCC snapshot
    1213             :      * and index whatever's live according to that.
    1214             :      */
    1215       72090 :     OldestXmin = InvalidTransactionId;
    1216             : 
    1217             :     /* okay to ignore lazy VACUUMs here */
    1218       72090 :     if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
    1219       22970 :         OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
    1220             : 
    1221       72090 :     if (!scan)
    1222             :     {
    1223             :         /*
    1224             :          * Serial index build.
    1225             :          *
    1226             :          * Must begin our own heap scan in this case.  We may also need to
    1227             :          * register a snapshot whose lifetime is under our direct control.
    1228             :          */
    1229       71910 :         if (!TransactionIdIsValid(OldestXmin))
    1230             :         {
    1231       49116 :             snapshot = RegisterSnapshot(GetTransactionSnapshot());
    1232       49116 :             need_unregister_snapshot = true;
    1233             :         }
    1234             :         else
    1235       22794 :             snapshot = SnapshotAny;
    1236             : 
    1237       71910 :         scan = table_beginscan_strat(heapRelation,  /* relation */
    1238             :                                      snapshot,  /* snapshot */
    1239             :                                      0, /* number of keys */
    1240             :                                      NULL,  /* scan key */
    1241             :                                      true,  /* buffer access strategy OK */
    1242             :                                      allow_sync);   /* syncscan OK? */
    1243             :     }
    1244             :     else
    1245             :     {
    1246             :         /*
    1247             :          * Parallel index build.
    1248             :          *
    1249             :          * Parallel case never registers/unregisters own snapshot.  Snapshot
    1250             :          * is taken from parallel heap scan, and is SnapshotAny or an MVCC
    1251             :          * snapshot, based on same criteria as serial case.
    1252             :          */
    1253             :         Assert(!IsBootstrapProcessingMode());
    1254             :         Assert(allow_sync);
    1255         180 :         snapshot = scan->rs_snapshot;
    1256             :     }
    1257             : 
    1258       72090 :     hscan = (HeapScanDesc) scan;
    1259             : 
    1260             :     /* Publish number of blocks to scan */
    1261       72090 :     if (progress)
    1262             :     {
    1263             :         BlockNumber nblocks;
    1264             : 
    1265       71952 :         if (hscan->rs_base.rs_parallel != NULL)
    1266             :         {
    1267             :             ParallelBlockTableScanDesc pbscan;
    1268             : 
    1269          84 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1270          84 :             nblocks = pbscan->phs_nblocks;
    1271             :         }
    1272             :         else
    1273       71868 :             nblocks = hscan->rs_nblocks;
    1274             : 
    1275       71952 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1276             :                                      nblocks);
    1277             :     }
    1278             : 
    1279             :     /*
    1280             :      * Must call GetOldestXmin() with SnapshotAny.  Should never call
    1281             :      * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
    1282             :      * this for parallel builds, since ambuild routines that support parallel
    1283             :      * builds must work these details out for themselves.)
    1284             :      */
    1285             :     Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
    1286             :     Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
    1287             :            !TransactionIdIsValid(OldestXmin));
    1288             :     Assert(snapshot == SnapshotAny || !anyvisible);
    1289             : 
    1290             :     /* set our scan endpoints */
    1291       72090 :     if (!allow_sync)
    1292         274 :         heap_setscanlimits(scan, start_blockno, numblocks);
    1293             :     else
    1294             :     {
    1295             :         /* syncscan can only be requested on whole relation */
    1296             :         Assert(start_blockno == 0);
    1297             :         Assert(numblocks == InvalidBlockNumber);
    1298             :     }
    1299             : 
    1300       72090 :     reltuples = 0;
    1301             : 
    1302             :     /*
    1303             :      * Scan all tuples in the base relation.
    1304             :      */
    1305    15410698 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1306             :     {
    1307             :         bool        tupleIsAlive;
    1308             : 
    1309    15266522 :         CHECK_FOR_INTERRUPTS();
    1310             : 
    1311             :         /* Report scan progress, if asked to. */
    1312    15266522 :         if (progress)
    1313             :         {
    1314    13275608 :             BlockNumber blocks_done = heapam_scan_get_blocks_done(hscan);
    1315             : 
    1316    13275608 :             if (blocks_done != previous_blkno)
    1317             :             {
    1318      205610 :                 pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1319             :                                              blocks_done);
    1320      205610 :                 previous_blkno = blocks_done;
    1321             :             }
    1322             :         }
    1323             : 
    1324             :         /*
    1325             :          * When dealing with a HOT-chain of updated tuples, we want to index
    1326             :          * the values of the live tuple (if any), but index it under the TID
    1327             :          * of the chain's root tuple.  This approach is necessary to preserve
    1328             :          * the HOT-chain structure in the heap. So we need to be able to find
    1329             :          * the root item offset for every tuple that's in a HOT-chain.  When
    1330             :          * first reaching a new page of the relation, call
    1331             :          * heap_get_root_tuples() to build a map of root item offsets on the
    1332             :          * page.
    1333             :          *
    1334             :          * It might look unsafe to use this information across buffer
    1335             :          * lock/unlock.  However, we hold ShareLock on the table so no
    1336             :          * ordinary insert/update/delete should occur; and we hold pin on the
    1337             :          * buffer continuously while visiting the page, so no pruning
    1338             :          * operation can occur either.
    1339             :          *
    1340             :          * Also, although our opinions about tuple liveness could change while
    1341             :          * we scan the page (due to concurrent transaction commits/aborts),
    1342             :          * the chain root locations won't, so this info doesn't need to be
    1343             :          * rebuilt after waiting for another transaction.
    1344             :          *
    1345             :          * Note the implied assumption that there is no more than one live
    1346             :          * tuple per HOT-chain --- else we could create more than one index
    1347             :          * entry pointing to the same root tuple.
    1348             :          */
    1349    15266522 :         if (hscan->rs_cblock != root_blkno)
    1350             :         {
    1351      220964 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1352             : 
    1353      220964 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1354      220964 :             heap_get_root_tuples(page, root_offsets);
    1355      220964 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1356             : 
    1357      220964 :             root_blkno = hscan->rs_cblock;
    1358             :         }
    1359             : 
    1360    15266522 :         if (snapshot == SnapshotAny)
    1361             :         {
    1362             :             /* do our own time qual check */
    1363             :             bool        indexIt;
    1364             :             TransactionId xwait;
    1365             : 
    1366             :     recheck:
    1367             : 
    1368             :             /*
    1369             :              * We could possibly get away with not locking the buffer here,
    1370             :              * since caller should hold ShareLock on the relation, but let's
    1371             :              * be conservative about it.  (This remark is still correct even
    1372             :              * with HOT-pruning: our pin on the buffer prevents pruning.)
    1373             :              */
    1374     9688148 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1375             : 
    1376             :             /*
    1377             :              * The criteria for counting a tuple as live in this block need to
    1378             :              * match what analyze.c's heapam_scan_analyze_next_tuple() does,
    1379             :              * otherwise CREATE INDEX and ANALYZE may produce wildly different
    1380             :              * reltuples values, e.g. when there are many recently-dead
    1381             :              * tuples.
    1382             :              */
    1383     9688148 :             switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
    1384             :                                              hscan->rs_cbuf))
    1385             :             {
    1386             :                 case HEAPTUPLE_DEAD:
    1387             :                     /* Definitely dead, we can ignore it */
    1388        3112 :                     indexIt = false;
    1389        3112 :                     tupleIsAlive = false;
    1390        3112 :                     break;
    1391             :                 case HEAPTUPLE_LIVE:
    1392             :                     /* Normal case, index and unique-check it */
    1393     6306052 :                     indexIt = true;
    1394     6306052 :                     tupleIsAlive = true;
    1395             :                     /* Count it as live, too */
    1396     6306052 :                     reltuples += 1;
    1397     6306052 :                     break;
    1398             :                 case HEAPTUPLE_RECENTLY_DEAD:
    1399             : 
    1400             :                     /*
    1401             :                      * If tuple is recently deleted then we must index it
    1402             :                      * anyway to preserve MVCC semantics.  (Pre-existing
    1403             :                      * transactions could try to use the index after we finish
    1404             :                      * building it, and may need to see such tuples.)
    1405             :                      *
    1406             :                      * However, if it was HOT-updated then we must only index
    1407             :                      * the live tuple at the end of the HOT-chain.  Since this
    1408             :                      * breaks semantics for pre-existing snapshots, mark the
    1409             :                      * index as unusable for them.
    1410             :                      *
    1411             :                      * We don't count recently-dead tuples in reltuples, even
    1412             :                      * if we index them; see heapam_scan_analyze_next_tuple().
    1413             :                      */
    1414      133598 :                     if (HeapTupleIsHotUpdated(heapTuple))
    1415             :                     {
    1416         136 :                         indexIt = false;
    1417             :                         /* mark the index as unsafe for old snapshots */
    1418         136 :                         indexInfo->ii_BrokenHotChain = true;
    1419             :                     }
    1420             :                     else
    1421      133462 :                         indexIt = true;
    1422             :                     /* In any case, exclude the tuple from unique-checking */
    1423      133598 :                     tupleIsAlive = false;
    1424      133598 :                     break;
    1425             :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1426             : 
    1427             :                     /*
    1428             :                      * In "anyvisible" mode, this tuple is visible and we
    1429             :                      * don't need any further checks.
    1430             :                      */
    1431     3245318 :                     if (anyvisible)
    1432             :                     {
    1433           4 :                         indexIt = true;
    1434           4 :                         tupleIsAlive = true;
    1435           4 :                         reltuples += 1;
    1436           4 :                         break;
    1437             :                     }
    1438             : 
    1439             :                     /*
    1440             :                      * Since caller should hold ShareLock or better, normally
    1441             :                      * the only way to see this is if it was inserted earlier
    1442             :                      * in our own transaction.  However, it can happen in
    1443             :                      * system catalogs, since we tend to release write lock
    1444             :                      * before commit there.  Give a warning if neither case
    1445             :                      * applies.
    1446             :                      */
    1447     3245314 :                     xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
    1448     3245314 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1449             :                     {
    1450           0 :                         if (!is_system_catalog)
    1451           0 :                             elog(WARNING, "concurrent insert in progress within table \"%s\"",
    1452             :                                  RelationGetRelationName(heapRelation));
    1453             : 
    1454             :                         /*
    1455             :                          * If we are performing uniqueness checks, indexing
    1456             :                          * such a tuple could lead to a bogus uniqueness
    1457             :                          * failure.  In that case we wait for the inserting
    1458             :                          * transaction to finish and check again.
    1459             :                          */
    1460           0 :                         if (checking_uniqueness)
    1461             :                         {
    1462             :                             /*
    1463             :                              * Must drop the lock on the buffer before we wait
    1464             :                              */
    1465           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1466           0 :                             XactLockTableWait(xwait, heapRelation,
    1467             :                                               &heapTuple->t_self,
    1468             :                                               XLTW_InsertIndexUnique);
    1469           0 :                             CHECK_FOR_INTERRUPTS();
    1470           0 :                             goto recheck;
    1471             :                         }
    1472             :                     }
    1473             :                     else
    1474             :                     {
    1475             :                         /*
    1476             :                          * For consistency with
    1477             :                          * heapam_scan_analyze_next_tuple(), count
    1478             :                          * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
    1479             :                          * when inserted by our own transaction.
    1480             :                          */
    1481     3245314 :                         reltuples += 1;
    1482             :                     }
    1483             : 
    1484             :                     /*
    1485             :                      * We must index such tuples, since if the index build
    1486             :                      * commits then they're good.
    1487             :                      */
    1488     3245314 :                     indexIt = true;
    1489     3245314 :                     tupleIsAlive = true;
    1490     3245314 :                     break;
    1491             :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1492             : 
    1493             :                     /*
    1494             :                      * As with INSERT_IN_PROGRESS case, this is unexpected
    1495             :                      * unless it's our own deletion or a system catalog; but
    1496             :                      * in anyvisible mode, this tuple is visible.
    1497             :                      */
    1498          68 :                     if (anyvisible)
    1499             :                     {
    1500           0 :                         indexIt = true;
    1501           0 :                         tupleIsAlive = false;
    1502           0 :                         reltuples += 1;
    1503           0 :                         break;
    1504             :                     }
    1505             : 
    1506          68 :                     xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
    1507          68 :                     if (!TransactionIdIsCurrentTransactionId(xwait))
    1508             :                     {
    1509           0 :                         if (!is_system_catalog)
    1510           0 :                             elog(WARNING, "concurrent delete in progress within table \"%s\"",
    1511             :                                  RelationGetRelationName(heapRelation));
    1512             : 
    1513             :                         /*
    1514             :                          * If we are performing uniqueness checks, assuming
    1515             :                          * the tuple is dead could lead to missing a
    1516             :                          * uniqueness violation.  In that case we wait for the
    1517             :                          * deleting transaction to finish and check again.
    1518             :                          *
    1519             :                          * Also, if it's a HOT-updated tuple, we should not
    1520             :                          * index it but rather the live tuple at the end of
    1521             :                          * the HOT-chain.  However, the deleting transaction
    1522             :                          * could abort, possibly leaving this tuple as live
    1523             :                          * after all, in which case it has to be indexed. The
    1524             :                          * only way to know what to do is to wait for the
    1525             :                          * deleting transaction to finish and check again.
    1526             :                          */
    1527           0 :                         if (checking_uniqueness ||
    1528           0 :                             HeapTupleIsHotUpdated(heapTuple))
    1529             :                         {
    1530             :                             /*
    1531             :                              * Must drop the lock on the buffer before we wait
    1532             :                              */
    1533           0 :                             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1534           0 :                             XactLockTableWait(xwait, heapRelation,
    1535             :                                               &heapTuple->t_self,
    1536             :                                               XLTW_InsertIndexUnique);
    1537           0 :                             CHECK_FOR_INTERRUPTS();
    1538           0 :                             goto recheck;
    1539             :                         }
    1540             : 
    1541             :                         /*
    1542             :                          * Otherwise index it but don't check for uniqueness,
    1543             :                          * the same as a RECENTLY_DEAD tuple.
    1544             :                          */
    1545           0 :                         indexIt = true;
    1546             : 
    1547             :                         /*
    1548             :                          * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
    1549             :                          * if they were not deleted by the current
    1550             :                          * transaction.  That's what
    1551             :                          * heapam_scan_analyze_next_tuple() does, and we want
    1552             :                          * the behavior to be consistent.
    1553             :                          */
    1554           0 :                         reltuples += 1;
    1555             :                     }
    1556          68 :                     else if (HeapTupleIsHotUpdated(heapTuple))
    1557             :                     {
    1558             :                         /*
    1559             :                          * It's a HOT-updated tuple deleted by our own xact.
    1560             :                          * We can assume the deletion will commit (else the
    1561             :                          * index contents don't matter), so treat the same as
    1562             :                          * RECENTLY_DEAD HOT-updated tuples.
    1563             :                          */
    1564           0 :                         indexIt = false;
    1565             :                         /* mark the index as unsafe for old snapshots */
    1566           0 :                         indexInfo->ii_BrokenHotChain = true;
    1567             :                     }
    1568             :                     else
    1569             :                     {
    1570             :                         /*
    1571             :                          * It's a regular tuple deleted by our own xact. Index
    1572             :                          * it, but don't check for uniqueness nor count in
    1573             :                          * reltuples, the same as a RECENTLY_DEAD tuple.
    1574             :                          */
    1575          68 :                         indexIt = true;
    1576             :                     }
    1577             :                     /* In any case, exclude the tuple from unique-checking */
    1578          68 :                     tupleIsAlive = false;
    1579          68 :                     break;
    1580             :                 default:
    1581           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1582             :                     indexIt = tupleIsAlive = false; /* keep compiler quiet */
    1583             :                     break;
    1584             :             }
    1585             : 
    1586     9688148 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1587             : 
    1588     9688148 :             if (!indexIt)
    1589        3248 :                 continue;
    1590             :         }
    1591             :         else
    1592             :         {
    1593             :             /* heap_getnext did the time qual check */
    1594     5578374 :             tupleIsAlive = true;
    1595     5578374 :             reltuples += 1;
    1596             :         }
    1597             : 
    1598    15263274 :         MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1599             : 
    1600             :         /* Set up for predicate or expression evaluation */
    1601    15263274 :         ExecStoreBufferHeapTuple(heapTuple, slot, hscan->rs_cbuf);
    1602             : 
    1603             :         /*
    1604             :          * In a partial index, discard tuples that don't satisfy the
    1605             :          * predicate.
    1606             :          */
    1607    15263274 :         if (predicate != NULL)
    1608             :         {
    1609       52256 :             if (!ExecQual(predicate, econtext))
    1610       15720 :                 continue;
    1611             :         }
    1612             : 
    1613             :         /*
    1614             :          * For the current heap tuple, extract all the attributes we use in
    1615             :          * this index, and note which are null.  This also performs evaluation
    1616             :          * of any expressions needed.
    1617             :          */
    1618    15247554 :         FormIndexDatum(indexInfo,
    1619             :                        slot,
    1620             :                        estate,
    1621             :                        values,
    1622             :                        isnull);
    1623             : 
    1624             :         /*
    1625             :          * You'd think we should go ahead and build the index tuple here, but
    1626             :          * some index AMs want to do further processing on the data first.  So
    1627             :          * pass the values[] and isnull[] arrays, instead.
    1628             :          */
    1629             : 
    1630    15247550 :         if (HeapTupleIsHeapOnly(heapTuple))
    1631             :         {
    1632             :             /*
    1633             :              * For a heap-only tuple, pretend its TID is that of the root. See
    1634             :              * src/backend/access/heap/README.HOT for discussion.
    1635             :              */
    1636             :             ItemPointerData tid;
    1637             :             OffsetNumber offnum;
    1638             : 
    1639       12282 :             offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
    1640             : 
    1641       12282 :             if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
    1642           0 :                 ereport(ERROR,
    1643             :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1644             :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1645             :                                          ItemPointerGetBlockNumber(&heapTuple->t_self),
    1646             :                                          offnum,
    1647             :                                          RelationGetRelationName(heapRelation))));
    1648             : 
    1649       12282 :             ItemPointerSet(&tid, ItemPointerGetBlockNumber(&heapTuple->t_self),
    1650             :                            root_offsets[offnum - 1]);
    1651             : 
    1652             :             /* Call the AM's callback routine to process the tuple */
    1653       12282 :             callback(indexRelation, &tid, values, isnull, tupleIsAlive,
    1654             :                      callback_state);
    1655             :         }
    1656             :         else
    1657             :         {
    1658             :             /* Call the AM's callback routine to process the tuple */
    1659    15235268 :             callback(indexRelation, &heapTuple->t_self, values, isnull,
    1660             :                      tupleIsAlive, callback_state);
    1661             :         }
    1662             :     }
    1663             : 
    1664             :     /* Report scan progress one last time. */
    1665       72086 :     if (progress)
    1666             :     {
    1667             :         BlockNumber blks_done;
    1668             : 
    1669       71948 :         if (hscan->rs_base.rs_parallel != NULL)
    1670             :         {
    1671             :             ParallelBlockTableScanDesc pbscan;
    1672             : 
    1673          84 :             pbscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1674          84 :             blks_done = pbscan->phs_nblocks;
    1675             :         }
    1676             :         else
    1677       71864 :             blks_done = hscan->rs_nblocks;
    1678             : 
    1679       71948 :         pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1680             :                                      blks_done);
    1681             :     }
    1682             : 
    1683       72086 :     table_endscan(scan);
    1684             : 
    1685             :     /* we can now forget our snapshot, if set and registered by us */
    1686       72086 :     if (need_unregister_snapshot)
    1687       49116 :         UnregisterSnapshot(snapshot);
    1688             : 
    1689       72086 :     ExecDropSingleTupleTableSlot(slot);
    1690             : 
    1691       72086 :     FreeExecutorState(estate);
    1692             : 
    1693             :     /* These may have been pointing to the now-gone estate */
    1694       72086 :     indexInfo->ii_ExpressionsState = NIL;
    1695       72086 :     indexInfo->ii_PredicateState = NULL;
    1696             : 
    1697       72086 :     return reltuples;
    1698             : }
    1699             : 
    1700             : static void
    1701         160 : heapam_index_validate_scan(Relation heapRelation,
    1702             :                            Relation indexRelation,
    1703             :                            IndexInfo *indexInfo,
    1704             :                            Snapshot snapshot,
    1705             :                            ValidateIndexState *state)
    1706             : {
    1707             :     TableScanDesc scan;
    1708             :     HeapScanDesc hscan;
    1709             :     HeapTuple   heapTuple;
    1710             :     Datum       values[INDEX_MAX_KEYS];
    1711             :     bool        isnull[INDEX_MAX_KEYS];
    1712             :     ExprState  *predicate;
    1713             :     TupleTableSlot *slot;
    1714             :     EState     *estate;
    1715             :     ExprContext *econtext;
    1716         160 :     BlockNumber root_blkno = InvalidBlockNumber;
    1717             :     OffsetNumber root_offsets[MaxHeapTuplesPerPage];
    1718             :     bool        in_index[MaxHeapTuplesPerPage];
    1719         160 :     BlockNumber previous_blkno = InvalidBlockNumber;
    1720             : 
    1721             :     /* state variables for the merge */
    1722         160 :     ItemPointer indexcursor = NULL;
    1723             :     ItemPointerData decoded;
    1724         160 :     bool        tuplesort_empty = false;
    1725             : 
    1726             :     /*
    1727             :      * sanity checks
    1728             :      */
    1729             :     Assert(OidIsValid(indexRelation->rd_rel->relam));
    1730             : 
    1731             :     /*
    1732             :      * Need an EState for evaluation of index expressions and partial-index
    1733             :      * predicates.  Also a slot to hold the current tuple.
    1734             :      */
    1735         160 :     estate = CreateExecutorState();
    1736         160 :     econtext = GetPerTupleExprContext(estate);
    1737         160 :     slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation),
    1738             :                                     &TTSOpsHeapTuple);
    1739             : 
    1740             :     /* Arrange for econtext's scan tuple to be the tuple under test */
    1741         160 :     econtext->ecxt_scantuple = slot;
    1742             : 
    1743             :     /* Set up execution state for predicate, if any. */
    1744         160 :     predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
    1745             : 
    1746             :     /*
    1747             :      * Prepare for scan of the base relation.  We need just those tuples
    1748             :      * satisfying the passed-in reference snapshot.  We must disable syncscan
    1749             :      * here, because it's critical that we read from block zero forward to
    1750             :      * match the sorted TIDs.
    1751             :      */
    1752         160 :     scan = table_beginscan_strat(heapRelation,  /* relation */
    1753             :                                  snapshot,  /* snapshot */
    1754             :                                  0, /* number of keys */
    1755             :                                  NULL,  /* scan key */
    1756             :                                  true,  /* buffer access strategy OK */
    1757             :                                  false);    /* syncscan not OK */
    1758         160 :     hscan = (HeapScanDesc) scan;
    1759             : 
    1760         160 :     pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_TOTAL,
    1761         160 :                                  hscan->rs_nblocks);
    1762             : 
    1763             :     /*
    1764             :      * Scan all tuples matching the snapshot.
    1765             :      */
    1766       10920 :     while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1767             :     {
    1768       10600 :         ItemPointer heapcursor = &heapTuple->t_self;
    1769             :         ItemPointerData rootTuple;
    1770             :         OffsetNumber root_offnum;
    1771             : 
    1772       10600 :         CHECK_FOR_INTERRUPTS();
    1773             : 
    1774       10600 :         state->htups += 1;
    1775             : 
    1776       21112 :         if ((previous_blkno == InvalidBlockNumber) ||
    1777       10512 :             (hscan->rs_cblock != previous_blkno))
    1778             :         {
    1779         140 :             pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
    1780         140 :                                          hscan->rs_cblock);
    1781         140 :             previous_blkno = hscan->rs_cblock;
    1782             :         }
    1783             : 
    1784             :         /*
    1785             :          * As commented in table_index_build_scan, we should index heap-only
    1786             :          * tuples under the TIDs of their root tuples; so when we advance onto
    1787             :          * a new heap page, build a map of root item offsets on the page.
    1788             :          *
    1789             :          * This complicates merging against the tuplesort output: we will
    1790             :          * visit the live tuples in order by their offsets, but the root
    1791             :          * offsets that we need to compare against the index contents might be
    1792             :          * ordered differently.  So we might have to "look back" within the
    1793             :          * tuplesort output, but only within the current page.  We handle that
    1794             :          * by keeping a bool array in_index[] showing all the
    1795             :          * already-passed-over tuplesort output TIDs of the current page. We
    1796             :          * clear that array here, when advancing onto a new heap page.
    1797             :          */
    1798       10600 :         if (hscan->rs_cblock != root_blkno)
    1799             :         {
    1800         140 :             Page        page = BufferGetPage(hscan->rs_cbuf);
    1801             : 
    1802         140 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    1803         140 :             heap_get_root_tuples(page, root_offsets);
    1804         140 :             LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    1805             : 
    1806         140 :             memset(in_index, 0, sizeof(in_index));
    1807             : 
    1808         140 :             root_blkno = hscan->rs_cblock;
    1809             :         }
    1810             : 
    1811             :         /* Convert actual tuple TID to root TID */
    1812       10600 :         rootTuple = *heapcursor;
    1813       10600 :         root_offnum = ItemPointerGetOffsetNumber(heapcursor);
    1814             : 
    1815       10600 :         if (HeapTupleIsHeapOnly(heapTuple))
    1816             :         {
    1817           8 :             root_offnum = root_offsets[root_offnum - 1];
    1818           8 :             if (!OffsetNumberIsValid(root_offnum))
    1819           0 :                 ereport(ERROR,
    1820             :                         (errcode(ERRCODE_DATA_CORRUPTED),
    1821             :                          errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
    1822             :                                          ItemPointerGetBlockNumber(heapcursor),
    1823             :                                          ItemPointerGetOffsetNumber(heapcursor),
    1824             :                                          RelationGetRelationName(heapRelation))));
    1825           8 :             ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
    1826             :         }
    1827             : 
    1828             :         /*
    1829             :          * "merge" by skipping through the index tuples until we find or pass
    1830             :          * the current root tuple.
    1831             :          */
    1832       31780 :         while (!tuplesort_empty &&
    1833       21068 :                (!indexcursor ||
    1834       21068 :                 ItemPointerCompare(indexcursor, &rootTuple) < 0))
    1835             :         {
    1836             :             Datum       ts_val;
    1837             :             bool        ts_isnull;
    1838             : 
    1839       10580 :             if (indexcursor)
    1840             :             {
    1841             :                 /*
    1842             :                  * Remember index items seen earlier on the current heap page
    1843             :                  */
    1844       10492 :                 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
    1845       10440 :                     in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
    1846             :             }
    1847             : 
    1848       21160 :             tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
    1849       10580 :                                                   &ts_val, &ts_isnull, NULL);
    1850             :             Assert(tuplesort_empty || !ts_isnull);
    1851       10580 :             if (!tuplesort_empty)
    1852             :             {
    1853       10568 :                 itemptr_decode(&decoded, DatumGetInt64(ts_val));
    1854       10568 :                 indexcursor = &decoded;
    1855             : 
    1856             :                 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
    1857             : #ifndef USE_FLOAT8_BYVAL
    1858             :                 pfree(DatumGetPointer(ts_val));
    1859             : #endif
    1860             :             }
    1861             :             else
    1862             :             {
    1863             :                 /* Be tidy */
    1864          12 :                 indexcursor = NULL;
    1865             :             }
    1866             :         }
    1867             : 
    1868             :         /*
    1869             :          * If the tuplesort has overshot *and* we didn't see a match earlier,
    1870             :          * then this tuple is missing from the index, so insert it.
    1871             :          */
    1872       21176 :         if ((tuplesort_empty ||
    1873       10616 :              ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
    1874          40 :             !in_index[root_offnum - 1])
    1875             :         {
    1876          32 :             MemoryContextReset(econtext->ecxt_per_tuple_memory);
    1877             : 
    1878             :             /* Set up for predicate or expression evaluation */
    1879          32 :             ExecStoreHeapTuple(heapTuple, slot, false);
    1880             : 
    1881             :             /*
    1882             :              * In a partial index, discard tuples that don't satisfy the
    1883             :              * predicate.
    1884             :              */
    1885          32 :             if (predicate != NULL)
    1886             :             {
    1887          32 :                 if (!ExecQual(predicate, econtext))
    1888          32 :                     continue;
    1889             :             }
    1890             : 
    1891             :             /*
    1892             :              * For the current heap tuple, extract all the attributes we use
    1893             :              * in this index, and note which are null.  This also performs
    1894             :              * evaluation of any expressions needed.
    1895             :              */
    1896           0 :             FormIndexDatum(indexInfo,
    1897             :                            slot,
    1898             :                            estate,
    1899             :                            values,
    1900             :                            isnull);
    1901             : 
    1902             :             /*
    1903             :              * You'd think we should go ahead and build the index tuple here,
    1904             :              * but some index AMs want to do further processing on the data
    1905             :              * first. So pass the values[] and isnull[] arrays, instead.
    1906             :              */
    1907             : 
    1908             :             /*
    1909             :              * If the tuple is already committed dead, you might think we
    1910             :              * could suppress uniqueness checking, but this is no longer true
    1911             :              * in the presence of HOT, because the insert is actually a proxy
    1912             :              * for a uniqueness check on the whole HOT-chain.  That is, the
    1913             :              * tuple we have here could be dead because it was already
    1914             :              * HOT-updated, and if so the updating transaction will not have
    1915             :              * thought it should insert index entries.  The index AM will
    1916             :              * check the whole HOT-chain and correctly detect a conflict if
    1917             :              * there is one.
    1918             :              */
    1919             : 
    1920           0 :             index_insert(indexRelation,
    1921             :                          values,
    1922             :                          isnull,
    1923             :                          &rootTuple,
    1924             :                          heapRelation,
    1925           0 :                          indexInfo->ii_Unique ?
    1926             :                          UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
    1927             :                          indexInfo);
    1928             : 
    1929           0 :             state->tups_inserted += 1;
    1930             :         }
    1931             :     }
    1932             : 
    1933         160 :     table_endscan(scan);
    1934             : 
    1935         160 :     ExecDropSingleTupleTableSlot(slot);
    1936             : 
    1937         160 :     FreeExecutorState(estate);
    1938             : 
    1939             :     /* These may have been pointing to the now-gone estate */
    1940         160 :     indexInfo->ii_ExpressionsState = NIL;
    1941         160 :     indexInfo->ii_PredicateState = NULL;
    1942         160 : }
    1943             : 
    1944             : /*
    1945             :  * Return the number of blocks that have been read by this scan since
    1946             :  * starting.  This is meant for progress reporting rather than be fully
    1947             :  * accurate: in a parallel scan, workers can be concurrently reading blocks
    1948             :  * further ahead than what we report.
    1949             :  */
    1950             : static BlockNumber
    1951    13275608 : heapam_scan_get_blocks_done(HeapScanDesc hscan)
    1952             : {
    1953    13275608 :     ParallelBlockTableScanDesc bpscan = NULL;
    1954             :     BlockNumber startblock;
    1955             :     BlockNumber blocks_done;
    1956             : 
    1957    13275608 :     if (hscan->rs_base.rs_parallel != NULL)
    1958             :     {
    1959     1969314 :         bpscan = (ParallelBlockTableScanDesc) hscan->rs_base.rs_parallel;
    1960     1969314 :         startblock = bpscan->phs_startblock;
    1961             :     }
    1962             :     else
    1963    11306294 :         startblock = hscan->rs_startblock;
    1964             : 
    1965             :     /*
    1966             :      * Might have wrapped around the end of the relation, if startblock was
    1967             :      * not zero.
    1968             :      */
    1969    13275608 :     if (hscan->rs_cblock > startblock)
    1970    12316926 :         blocks_done = hscan->rs_cblock - startblock;
    1971             :     else
    1972             :     {
    1973             :         BlockNumber nblocks;
    1974             : 
    1975      958682 :         nblocks = bpscan != NULL ? bpscan->phs_nblocks : hscan->rs_nblocks;
    1976     1917364 :         blocks_done = nblocks - startblock +
    1977      958682 :             hscan->rs_cblock;
    1978             :     }
    1979             : 
    1980    13275608 :     return blocks_done;
    1981             : }
    1982             : 
    1983             : 
    1984             : /* ------------------------------------------------------------------------
    1985             :  * Miscellaneous callbacks for the heap AM
    1986             :  * ------------------------------------------------------------------------
    1987             :  */
    1988             : 
    1989             : /*
    1990             :  * Check to see whether the table needs a TOAST table.  It does only if
    1991             :  * (1) there are any toastable attributes, and (2) the maximum length
    1992             :  * of a tuple could exceed TOAST_TUPLE_THRESHOLD.  (We don't want to
    1993             :  * create a toast table for something like "f1 varchar(20)".)
    1994             :  */
    1995             : static bool
    1996       33038 : heapam_relation_needs_toast_table(Relation rel)
    1997             : {
    1998       33038 :     int32       data_length = 0;
    1999       33038 :     bool        maxlength_unknown = false;
    2000       33038 :     bool        has_toastable_attrs = false;
    2001       33038 :     TupleDesc   tupdesc = rel->rd_att;
    2002             :     int32       tuple_length;
    2003             :     int         i;
    2004             : 
    2005      198898 :     for (i = 0; i < tupdesc->natts; i++)
    2006             :     {
    2007      165860 :         Form_pg_attribute att = TupleDescAttr(tupdesc, i);
    2008             : 
    2009      165860 :         if (att->attisdropped)
    2010         626 :             continue;
    2011      165234 :         data_length = att_align_nominal(data_length, att->attalign);
    2012      165234 :         if (att->attlen > 0)
    2013             :         {
    2014             :             /* Fixed-length types are never toastable */
    2015      120536 :             data_length += att->attlen;
    2016             :         }
    2017             :         else
    2018             :         {
    2019       44698 :             int32       maxlen = type_maximum_size(att->atttypid,
    2020             :                                                    att->atttypmod);
    2021             : 
    2022       44698 :             if (maxlen < 0)
    2023       42952 :                 maxlength_unknown = true;
    2024             :             else
    2025        1746 :                 data_length += maxlen;
    2026       44698 :             if (att->attstorage != 'p')
    2027       42728 :                 has_toastable_attrs = true;
    2028             :         }
    2029             :     }
    2030       33038 :     if (!has_toastable_attrs)
    2031       12386 :         return false;           /* nothing to toast? */
    2032       20652 :     if (maxlength_unknown)
    2033       19320 :         return true;            /* any unlimited-length attrs? */
    2034        2664 :     tuple_length = MAXALIGN(SizeofHeapTupleHeader +
    2035        1332 :                             BITMAPLEN(tupdesc->natts)) +
    2036        1332 :         MAXALIGN(data_length);
    2037        1332 :     return (tuple_length > TOAST_TUPLE_THRESHOLD);
    2038             : }
    2039             : 
    2040             : 
    2041             : /* ------------------------------------------------------------------------
    2042             :  * Planner related callbacks for the heap AM
    2043             :  * ------------------------------------------------------------------------
    2044             :  */
    2045             : 
    2046             : #define HEAP_OVERHEAD_BYTES_PER_TUPLE \
    2047             :     (MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
    2048             : #define HEAP_USABLE_BYTES_PER_PAGE \
    2049             :     (BLCKSZ - SizeOfPageHeaderData)
    2050             : 
    2051             : static void
    2052      251866 : heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
    2053             :                          BlockNumber *pages, double *tuples,
    2054             :                          double *allvisfrac)
    2055             : {
    2056      251866 :     table_block_relation_estimate_size(rel, attr_widths, pages,
    2057             :                                        tuples, allvisfrac,
    2058             :                                        HEAP_OVERHEAD_BYTES_PER_TUPLE,
    2059             :                                        HEAP_USABLE_BYTES_PER_PAGE);
    2060      251866 : }
    2061             : 
    2062             : 
    2063             : /* ------------------------------------------------------------------------
    2064             :  * Executor related callbacks for the heap AM
    2065             :  * ------------------------------------------------------------------------
    2066             :  */
    2067             : 
    2068             : static bool
    2069      119754 : heapam_scan_bitmap_next_block(TableScanDesc scan,
    2070             :                               TBMIterateResult *tbmres)
    2071             : {
    2072      119754 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2073      119754 :     BlockNumber page = tbmres->blockno;
    2074             :     Buffer      buffer;
    2075             :     Snapshot    snapshot;
    2076             :     int         ntup;
    2077             : 
    2078      119754 :     hscan->rs_cindex = 0;
    2079      119754 :     hscan->rs_ntuples = 0;
    2080             : 
    2081             :     /*
    2082             :      * Ignore any claimed entries past what we think is the end of the
    2083             :      * relation. It may have been extended after the start of our scan (we
    2084             :      * only hold an AccessShareLock, and it could be inserts from this
    2085             :      * backend).
    2086             :      */
    2087      119754 :     if (page >= hscan->rs_nblocks)
    2088          12 :         return false;
    2089             : 
    2090             :     /*
    2091             :      * Acquire pin on the target heap page, trading in any pin we held before.
    2092             :      */
    2093      119742 :     hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
    2094             :                                           scan->rs_rd,
    2095             :                                           page);
    2096      119742 :     hscan->rs_cblock = page;
    2097      119742 :     buffer = hscan->rs_cbuf;
    2098      119742 :     snapshot = scan->rs_snapshot;
    2099             : 
    2100      119742 :     ntup = 0;
    2101             : 
    2102             :     /*
    2103             :      * Prune and repair fragmentation for the whole page, if possible.
    2104             :      */
    2105      119742 :     heap_page_prune_opt(scan->rs_rd, buffer);
    2106             : 
    2107             :     /*
    2108             :      * We must hold share lock on the buffer content while examining tuple
    2109             :      * visibility.  Afterwards, however, the tuples we have found to be
    2110             :      * visible are guaranteed good as long as we hold the buffer pin.
    2111             :      */
    2112      119742 :     LockBuffer(buffer, BUFFER_LOCK_SHARE);
    2113             : 
    2114             :     /*
    2115             :      * We need two separate strategies for lossy and non-lossy cases.
    2116             :      */
    2117      119742 :     if (tbmres->ntuples >= 0)
    2118             :     {
    2119             :         /*
    2120             :          * Bitmap is non-lossy, so we just look through the offsets listed in
    2121             :          * tbmres; but we have to follow any HOT chain starting at each such
    2122             :          * offset.
    2123             :          */
    2124             :         int         curslot;
    2125             : 
    2126     1587718 :         for (curslot = 0; curslot < tbmres->ntuples; curslot++)
    2127             :         {
    2128     1548098 :             OffsetNumber offnum = tbmres->offsets[curslot];
    2129             :             ItemPointerData tid;
    2130             :             HeapTupleData heapTuple;
    2131             : 
    2132     1548098 :             ItemPointerSet(&tid, page, offnum);
    2133     1548098 :             if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
    2134             :                                        &heapTuple, NULL, true))
    2135     1448016 :                 hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
    2136             :         }
    2137             :     }
    2138             :     else
    2139             :     {
    2140             :         /*
    2141             :          * Bitmap is lossy, so we must examine each line pointer on the page.
    2142             :          * But we can ignore HOT chains, since we'll check each tuple anyway.
    2143             :          */
    2144       80116 :         Page        dp = (Page) BufferGetPage(buffer);
    2145       80116 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
    2146             :         OffsetNumber offnum;
    2147             : 
    2148      570716 :         for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
    2149             :         {
    2150             :             ItemId      lp;
    2151             :             HeapTupleData loctup;
    2152             :             bool        valid;
    2153             : 
    2154      490600 :             lp = PageGetItemId(dp, offnum);
    2155      490600 :             if (!ItemIdIsNormal(lp))
    2156           0 :                 continue;
    2157      490600 :             loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    2158      490600 :             loctup.t_len = ItemIdGetLength(lp);
    2159      490600 :             loctup.t_tableOid = scan->rs_rd->rd_id;
    2160      490600 :             ItemPointerSet(&loctup.t_self, page, offnum);
    2161      490600 :             valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
    2162      490600 :             if (valid)
    2163             :             {
    2164      490600 :                 hscan->rs_vistuples[ntup++] = offnum;
    2165      490600 :                 PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
    2166             :             }
    2167      490600 :             CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
    2168             :                                             buffer, snapshot);
    2169             :         }
    2170             :     }
    2171             : 
    2172      119736 :     LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
    2173             : 
    2174             :     Assert(ntup <= MaxHeapTuplesPerPage);
    2175      119736 :     hscan->rs_ntuples = ntup;
    2176             : 
    2177      119736 :     return ntup > 0;
    2178             : }
    2179             : 
    2180             : static bool
    2181     2055082 : heapam_scan_bitmap_next_tuple(TableScanDesc scan,
    2182             :                               TBMIterateResult *tbmres,
    2183             :                               TupleTableSlot *slot)
    2184             : {
    2185     2055082 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2186             :     OffsetNumber targoffset;
    2187             :     Page        dp;
    2188             :     ItemId      lp;
    2189             : 
    2190             :     /*
    2191             :      * Out of range?  If so, nothing more to look at on this page
    2192             :      */
    2193     2055082 :     if (hscan->rs_cindex < 0 || hscan->rs_cindex >= hscan->rs_ntuples)
    2194      119598 :         return false;
    2195             : 
    2196     1935484 :     targoffset = hscan->rs_vistuples[hscan->rs_cindex];
    2197     1935484 :     dp = (Page) BufferGetPage(hscan->rs_cbuf);
    2198     1935484 :     lp = PageGetItemId(dp, targoffset);
    2199             :     Assert(ItemIdIsNormal(lp));
    2200             : 
    2201     1935484 :     hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
    2202     1935484 :     hscan->rs_ctup.t_len = ItemIdGetLength(lp);
    2203     1935484 :     hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
    2204     1935484 :     ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
    2205             : 
    2206     1935484 :     pgstat_count_heap_fetch(scan->rs_rd);
    2207             : 
    2208             :     /*
    2209             :      * Set up the result slot to point to this tuple.  Note that the slot
    2210             :      * acquires a pin on the buffer.
    2211             :      */
    2212     1935484 :     ExecStoreBufferHeapTuple(&hscan->rs_ctup,
    2213             :                              slot,
    2214             :                              hscan->rs_cbuf);
    2215             : 
    2216     1935484 :     hscan->rs_cindex++;
    2217             : 
    2218     1935484 :     return true;
    2219             : }
    2220             : 
    2221             : static bool
    2222        8654 : heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate)
    2223             : {
    2224        8654 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2225        8654 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2226             :     BlockNumber blockno;
    2227             : 
    2228             :     /* return false immediately if relation is empty */
    2229        8654 :     if (hscan->rs_nblocks == 0)
    2230           0 :         return false;
    2231             : 
    2232        8654 :     if (tsm->NextSampleBlock)
    2233             :     {
    2234        3010 :         blockno = tsm->NextSampleBlock(scanstate, hscan->rs_nblocks);
    2235        3010 :         hscan->rs_cblock = blockno;
    2236             :     }
    2237             :     else
    2238             :     {
    2239             :         /* scanning table sequentially */
    2240             : 
    2241        5644 :         if (hscan->rs_cblock == InvalidBlockNumber)
    2242             :         {
    2243             :             Assert(!hscan->rs_inited);
    2244          52 :             blockno = hscan->rs_startblock;
    2245             :         }
    2246             :         else
    2247             :         {
    2248             :             Assert(hscan->rs_inited);
    2249             : 
    2250        5592 :             blockno = hscan->rs_cblock + 1;
    2251             : 
    2252        5592 :             if (blockno >= hscan->rs_nblocks)
    2253             :             {
    2254             :                 /* wrap to beginning of rel, might not have started at 0 */
    2255          52 :                 blockno = 0;
    2256             :             }
    2257             : 
    2258             :             /*
    2259             :              * Report our new scan position for synchronization purposes.
    2260             :              *
    2261             :              * Note: we do this before checking for end of scan so that the
    2262             :              * final state of the position hint is back at the start of the
    2263             :              * rel.  That's not strictly necessary, but otherwise when you run
    2264             :              * the same query multiple times the starting position would shift
    2265             :              * a little bit backwards on every invocation, which is confusing.
    2266             :              * We don't guarantee any specific ordering in general, though.
    2267             :              */
    2268        5592 :             if (scan->rs_flags & SO_ALLOW_SYNC)
    2269           0 :                 ss_report_location(scan->rs_rd, blockno);
    2270             : 
    2271        5592 :             if (blockno == hscan->rs_startblock)
    2272             :             {
    2273          52 :                 blockno = InvalidBlockNumber;
    2274             :             }
    2275             :         }
    2276             :     }
    2277             : 
    2278        8654 :     if (!BlockNumberIsValid(blockno))
    2279             :     {
    2280         122 :         if (BufferIsValid(hscan->rs_cbuf))
    2281         102 :             ReleaseBuffer(hscan->rs_cbuf);
    2282         122 :         hscan->rs_cbuf = InvalidBuffer;
    2283         122 :         hscan->rs_cblock = InvalidBlockNumber;
    2284         122 :         hscan->rs_inited = false;
    2285             : 
    2286         122 :         return false;
    2287             :     }
    2288             : 
    2289        8532 :     heapgetpage(scan, blockno);
    2290        8532 :     hscan->rs_inited = true;
    2291             : 
    2292        8532 :     return true;
    2293             : }
    2294             : 
    2295             : static bool
    2296      169452 : heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
    2297             :                               TupleTableSlot *slot)
    2298             : {
    2299      169452 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2300      169452 :     TsmRoutine *tsm = scanstate->tsmroutine;
    2301      169452 :     BlockNumber blockno = hscan->rs_cblock;
    2302      169452 :     bool        pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0;
    2303             : 
    2304             :     Page        page;
    2305             :     bool        all_visible;
    2306             :     OffsetNumber maxoffset;
    2307             : 
    2308             :     /*
    2309             :      * When not using pagemode, we must lock the buffer during tuple
    2310             :      * visibility checks.
    2311             :      */
    2312      169452 :     if (!pagemode)
    2313        2796 :         LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
    2314             : 
    2315      169452 :     page = (Page) BufferGetPage(hscan->rs_cbuf);
    2316      337980 :     all_visible = PageIsAllVisible(page) &&
    2317      168528 :         !scan->rs_snapshot->takenDuringRecovery;
    2318      169452 :     maxoffset = PageGetMaxOffsetNumber(page);
    2319             : 
    2320             :     for (;;)
    2321           0 :     {
    2322             :         OffsetNumber tupoffset;
    2323             : 
    2324      169452 :         CHECK_FOR_INTERRUPTS();
    2325             : 
    2326             :         /* Ask the tablesample method which tuples to check on this page. */
    2327      169452 :         tupoffset = tsm->NextSampleTuple(scanstate,
    2328             :                                          blockno,
    2329             :                                          maxoffset);
    2330             : 
    2331      169452 :         if (OffsetNumberIsValid(tupoffset))
    2332             :         {
    2333             :             ItemId      itemid;
    2334             :             bool        visible;
    2335      160928 :             HeapTuple   tuple = &(hscan->rs_ctup);
    2336             : 
    2337             :             /* Skip invalid tuple pointers. */
    2338      160928 :             itemid = PageGetItemId(page, tupoffset);
    2339      160928 :             if (!ItemIdIsNormal(itemid))
    2340           0 :                 continue;
    2341             : 
    2342      160928 :             tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2343      160928 :             tuple->t_len = ItemIdGetLength(itemid);
    2344      160928 :             ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
    2345             : 
    2346             : 
    2347      160928 :             if (all_visible)
    2348      160232 :                 visible = true;
    2349             :             else
    2350         696 :                 visible = SampleHeapTupleVisible(scan, hscan->rs_cbuf,
    2351             :                                                  tuple, tupoffset);
    2352             : 
    2353             :             /* in pagemode, heapgetpage did this for us */
    2354      160928 :             if (!pagemode)
    2355           4 :                 CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
    2356             :                                                 hscan->rs_cbuf, scan->rs_snapshot);
    2357             : 
    2358             :             /* Try next tuple from same page. */
    2359      160928 :             if (!visible)
    2360           0 :                 continue;
    2361             : 
    2362             :             /* Found visible tuple, return it. */
    2363      160928 :             if (!pagemode)
    2364           4 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2365             : 
    2366      160928 :             ExecStoreBufferHeapTuple(tuple, slot, hscan->rs_cbuf);
    2367             : 
    2368             :             /* Count successfully-fetched tuples as heap fetches */
    2369      160928 :             pgstat_count_heap_getnext(scan->rs_rd);
    2370             : 
    2371      160928 :             return true;
    2372             :         }
    2373             :         else
    2374             :         {
    2375             :             /*
    2376             :              * If we get here, it means we've exhausted the items on this page
    2377             :              * and it's time to move to the next.
    2378             :              */
    2379        8524 :             if (!pagemode)
    2380        2792 :                 LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
    2381             : 
    2382        8524 :             ExecClearTuple(slot);
    2383        8524 :             return false;
    2384             :         }
    2385             :     }
    2386             : 
    2387             :     Assert(0);
    2388             : }
    2389             : 
    2390             : 
    2391             : /* ----------------------------------------------------------------------------
    2392             :  *  Helper functions for the above.
    2393             :  * ----------------------------------------------------------------------------
    2394             :  */
    2395             : 
    2396             : /*
    2397             :  * Reconstruct and rewrite the given tuple
    2398             :  *
    2399             :  * We cannot simply copy the tuple as-is, for several reasons:
    2400             :  *
    2401             :  * 1. We'd like to squeeze out the values of any dropped columns, both
    2402             :  * to save space and to ensure we have no corner-case failures. (It's
    2403             :  * possible for example that the new table hasn't got a TOAST table
    2404             :  * and so is unable to store any large values of dropped cols.)
    2405             :  *
    2406             :  * 2. The tuple might not even be legal for the new table; this is
    2407             :  * currently only known to happen as an after-effect of ALTER TABLE
    2408             :  * SET WITHOUT OIDS.
    2409             :  *
    2410             :  * So, we must reconstruct the tuple from component Datums.
    2411             :  */
    2412             : static void
    2413      144140 : reform_and_rewrite_tuple(HeapTuple tuple,
    2414             :                          Relation OldHeap, Relation NewHeap,
    2415             :                          Datum *values, bool *isnull, RewriteState rwstate)
    2416             : {
    2417      144140 :     TupleDesc   oldTupDesc = RelationGetDescr(OldHeap);
    2418      144140 :     TupleDesc   newTupDesc = RelationGetDescr(NewHeap);
    2419             :     HeapTuple   copiedTuple;
    2420             :     int         i;
    2421             : 
    2422      144140 :     heap_deform_tuple(tuple, oldTupDesc, values, isnull);
    2423             : 
    2424             :     /* Be sure to null out any dropped columns */
    2425     2365936 :     for (i = 0; i < newTupDesc->natts; i++)
    2426             :     {
    2427     2221796 :         if (TupleDescAttr(newTupDesc, i)->attisdropped)
    2428           0 :             isnull[i] = true;
    2429             :     }
    2430             : 
    2431      144140 :     copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
    2432             : 
    2433             :     /* The heap rewrite module does the rest */
    2434      144140 :     rewrite_heap_tuple(rwstate, tuple, copiedTuple);
    2435             : 
    2436      144140 :     heap_freetuple(copiedTuple);
    2437      144140 : }
    2438             : 
    2439             : /*
    2440             :  * Check visibility of the tuple.
    2441             :  */
    2442             : static bool
    2443         696 : SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
    2444             :                        HeapTuple tuple,
    2445             :                        OffsetNumber tupoffset)
    2446             : {
    2447         696 :     HeapScanDesc hscan = (HeapScanDesc) scan;
    2448             : 
    2449         696 :     if (scan->rs_flags & SO_ALLOW_PAGEMODE)
    2450             :     {
    2451             :         /*
    2452             :          * In pageatatime mode, heapgetpage() already did visibility checks,
    2453             :          * so just look at the info it left in rs_vistuples[].
    2454             :          *
    2455             :          * We use a binary search over the known-sorted array.  Note: we could
    2456             :          * save some effort if we insisted that NextSampleTuple select tuples
    2457             :          * in increasing order, but it's not clear that there would be enough
    2458             :          * gain to justify the restriction.
    2459             :          */
    2460         692 :         int         start = 0,
    2461         692 :                     end = hscan->rs_ntuples - 1;
    2462             : 
    2463        2048 :         while (start <= end)
    2464             :         {
    2465        1356 :             int         mid = (start + end) / 2;
    2466        1356 :             OffsetNumber curoffset = hscan->rs_vistuples[mid];
    2467             : 
    2468        1356 :             if (tupoffset == curoffset)
    2469         692 :                 return true;
    2470         664 :             else if (tupoffset < curoffset)
    2471         260 :                 end = mid - 1;
    2472             :             else
    2473         404 :                 start = mid + 1;
    2474             :         }
    2475             : 
    2476           0 :         return false;
    2477             :     }
    2478             :     else
    2479             :     {
    2480             :         /* Otherwise, we have to check the tuple individually. */
    2481           4 :         return HeapTupleSatisfiesVisibility(tuple, scan->rs_snapshot,
    2482             :                                             buffer);
    2483             :     }
    2484             : }
    2485             : 
    2486             : 
    2487             : /* ------------------------------------------------------------------------
    2488             :  * Definition of the heap table access method.
    2489             :  * ------------------------------------------------------------------------
    2490             :  */
    2491             : 
    2492             : static const TableAmRoutine heapam_methods = {
    2493             :     .type = T_TableAmRoutine,
    2494             : 
    2495             :     .slot_callbacks = heapam_slot_callbacks,
    2496             : 
    2497             :     .scan_begin = heap_beginscan,
    2498             :     .scan_end = heap_endscan,
    2499             :     .scan_rescan = heap_rescan,
    2500             :     .scan_getnextslot = heap_getnextslot,
    2501             : 
    2502             :     .parallelscan_estimate = table_block_parallelscan_estimate,
    2503             :     .parallelscan_initialize = table_block_parallelscan_initialize,
    2504             :     .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
    2505             : 
    2506             :     .index_fetch_begin = heapam_index_fetch_begin,
    2507             :     .index_fetch_reset = heapam_index_fetch_reset,
    2508             :     .index_fetch_end = heapam_index_fetch_end,
    2509             :     .index_fetch_tuple = heapam_index_fetch_tuple,
    2510             : 
    2511             :     .tuple_insert = heapam_tuple_insert,
    2512             :     .tuple_insert_speculative = heapam_tuple_insert_speculative,
    2513             :     .tuple_complete_speculative = heapam_tuple_complete_speculative,
    2514             :     .multi_insert = heap_multi_insert,
    2515             :     .tuple_delete = heapam_tuple_delete,
    2516             :     .tuple_update = heapam_tuple_update,
    2517             :     .tuple_lock = heapam_tuple_lock,
    2518             :     .finish_bulk_insert = heapam_finish_bulk_insert,
    2519             : 
    2520             :     .tuple_fetch_row_version = heapam_fetch_row_version,
    2521             :     .tuple_get_latest_tid = heap_get_latest_tid,
    2522             :     .tuple_tid_valid = heapam_tuple_tid_valid,
    2523             :     .tuple_satisfies_snapshot = heapam_tuple_satisfies_snapshot,
    2524             :     .compute_xid_horizon_for_tuples = heap_compute_xid_horizon_for_tuples,
    2525             : 
    2526             :     .relation_set_new_filenode = heapam_relation_set_new_filenode,
    2527             :     .relation_nontransactional_truncate = heapam_relation_nontransactional_truncate,
    2528             :     .relation_copy_data = heapam_relation_copy_data,
    2529             :     .relation_copy_for_cluster = heapam_relation_copy_for_cluster,
    2530             :     .relation_vacuum = heap_vacuum_rel,
    2531             :     .scan_analyze_next_block = heapam_scan_analyze_next_block,
    2532             :     .scan_analyze_next_tuple = heapam_scan_analyze_next_tuple,
    2533             :     .index_build_range_scan = heapam_index_build_range_scan,
    2534             :     .index_validate_scan = heapam_index_validate_scan,
    2535             : 
    2536             :     .relation_size = table_block_relation_size,
    2537             :     .relation_needs_toast_table = heapam_relation_needs_toast_table,
    2538             : 
    2539             :     .relation_estimate_size = heapam_estimate_rel_size,
    2540             : 
    2541             :     .scan_bitmap_next_block = heapam_scan_bitmap_next_block,
    2542             :     .scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
    2543             :     .scan_sample_next_block = heapam_scan_sample_next_block,
    2544             :     .scan_sample_next_tuple = heapam_scan_sample_next_tuple
    2545             : };
    2546             : 
    2547             : 
    2548             : const TableAmRoutine *
    2549    16812036 : GetHeapamTableAmRoutine(void)
    2550             : {
    2551    16812036 :     return &heapam_methods;
    2552             : }
    2553             : 
    2554             : Datum
    2555      787618 : heap_tableam_handler(PG_FUNCTION_ARGS)
    2556             : {
    2557      787618 :     PG_RETURN_POINTER(&heapam_methods);
    2558             : }

Generated by: LCOV version 1.13