LCOV - code coverage report
Current view: top level - src/include/access - tableam.h (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 97.3 % 146 142
Test Date: 2026-03-11 20:15:07 Functions: 100.0 % 49 49
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * tableam.h
       4              :  *    POSTGRES table access method definitions.
       5              :  *
       6              :  *
       7              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       8              :  * Portions Copyright (c) 1994, Regents of the University of California
       9              :  *
      10              :  * src/include/access/tableam.h
      11              :  *
      12              :  * NOTES
      13              :  *      See tableam.sgml for higher level documentation.
      14              :  *
      15              :  *-------------------------------------------------------------------------
      16              :  */
      17              : #ifndef TABLEAM_H
      18              : #define TABLEAM_H
      19              : 
      20              : #include "access/relscan.h"
      21              : #include "access/sdir.h"
      22              : #include "access/xact.h"
      23              : #include "commands/vacuum.h"
      24              : #include "executor/tuptable.h"
      25              : #include "storage/read_stream.h"
      26              : #include "utils/rel.h"
      27              : #include "utils/snapshot.h"
      28              : 
      29              : 
      30              : #define DEFAULT_TABLE_ACCESS_METHOD "heap"
      31              : 
      32              : /* GUCs */
      33              : extern PGDLLIMPORT char *default_table_access_method;
      34              : extern PGDLLIMPORT bool synchronize_seqscans;
      35              : 
      36              : 
      37              : /* forward references in this file */
      38              : typedef struct BulkInsertStateData BulkInsertStateData;
      39              : typedef struct IndexInfo IndexInfo;
      40              : typedef struct SampleScanState SampleScanState;
      41              : typedef struct ValidateIndexState ValidateIndexState;
      42              : 
      43              : /*
      44              :  * Bitmask values for the flags argument to the scan_begin callback.
      45              :  */
      46              : typedef enum ScanOptions
      47              : {
      48              :     /* one of SO_TYPE_* may be specified */
      49              :     SO_TYPE_SEQSCAN = 1 << 0,
      50              :     SO_TYPE_BITMAPSCAN = 1 << 1,
      51              :     SO_TYPE_SAMPLESCAN = 1 << 2,
      52              :     SO_TYPE_TIDSCAN = 1 << 3,
      53              :     SO_TYPE_TIDRANGESCAN = 1 << 4,
      54              :     SO_TYPE_ANALYZE = 1 << 5,
      55              : 
      56              :     /* several of SO_ALLOW_* may be specified */
      57              :     /* allow or disallow use of access strategy */
      58              :     SO_ALLOW_STRAT = 1 << 6,
      59              :     /* report location to syncscan logic? */
      60              :     SO_ALLOW_SYNC = 1 << 7,
      61              :     /* verify visibility page-at-a-time? */
      62              :     SO_ALLOW_PAGEMODE = 1 << 8,
      63              : 
      64              :     /* unregister snapshot at scan end? */
      65              :     SO_TEMP_SNAPSHOT = 1 << 9,
      66              : }           ScanOptions;
      67              : 
      68              : /*
      69              :  * Result codes for table_{update,delete,lock_tuple}, and for visibility
      70              :  * routines inside table AMs.
      71              :  */
      72              : typedef enum TM_Result
      73              : {
      74              :     /*
      75              :      * Signals that the action succeeded (i.e. update/delete performed, lock
      76              :      * was acquired)
      77              :      */
      78              :     TM_Ok,
      79              : 
      80              :     /* The affected tuple wasn't visible to the relevant snapshot */
      81              :     TM_Invisible,
      82              : 
      83              :     /* The affected tuple was already modified by the calling backend */
      84              :     TM_SelfModified,
      85              : 
      86              :     /*
      87              :      * The affected tuple was updated by another transaction. This includes
      88              :      * the case where tuple was moved to another partition.
      89              :      */
      90              :     TM_Updated,
      91              : 
      92              :     /* The affected tuple was deleted by another transaction */
      93              :     TM_Deleted,
      94              : 
      95              :     /*
      96              :      * The affected tuple is currently being modified by another session. This
      97              :      * will only be returned if table_(update/delete/lock_tuple) are
      98              :      * instructed not to wait.
      99              :      */
     100              :     TM_BeingModified,
     101              : 
     102              :     /* lock couldn't be acquired, action skipped. Only used by lock_tuple */
     103              :     TM_WouldBlock,
     104              : } TM_Result;
     105              : 
     106              : /*
     107              :  * Result codes for table_update(..., update_indexes*..).
     108              :  * Used to determine which indexes to update.
     109              :  */
     110              : typedef enum TU_UpdateIndexes
     111              : {
     112              :     /* No indexed columns were updated (incl. TID addressing of tuple) */
     113              :     TU_None,
     114              : 
     115              :     /* A non-summarizing indexed column was updated, or the TID has changed */
     116              :     TU_All,
     117              : 
     118              :     /* Only summarized columns were updated, TID is unchanged */
     119              :     TU_Summarizing,
     120              : } TU_UpdateIndexes;
     121              : 
     122              : /*
     123              :  * When table_tuple_update, table_tuple_delete, or table_tuple_lock fail
     124              :  * because the target tuple is already outdated, they fill in this struct to
     125              :  * provide information to the caller about what happened. When those functions
     126              :  * succeed, the contents of this struct should not be relied upon, except for
     127              :  * `traversed`, which may be set in both success and failure cases.
     128              :  *
     129              :  * ctid is the target's ctid link: it is the same as the target's TID if the
     130              :  * target was deleted, or the location of the replacement tuple if the target
     131              :  * was updated.
     132              :  *
     133              :  * xmax is the outdating transaction's XID.  If the caller wants to visit the
     134              :  * replacement tuple, it must check that this matches before believing the
     135              :  * replacement is really a match.  This is InvalidTransactionId if the target
     136              :  * was !LP_NORMAL (expected only for a TID retrieved from syscache).
     137              :  *
     138              :  * cmax is the outdating command's CID, but only when the failure code is
     139              :  * TM_SelfModified (i.e., something in the current transaction outdated the
     140              :  * tuple); otherwise cmax is zero.  (We make this restriction because
     141              :  * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
     142              :  * transactions.)
     143              :  *
     144              :  * traversed indicates if an update chain was followed in order to try to lock
     145              :  * the target tuple.  (This may be set in both success and failure cases.)
     146              :  */
     147              : typedef struct TM_FailureData
     148              : {
     149              :     ItemPointerData ctid;
     150              :     TransactionId xmax;
     151              :     CommandId   cmax;
     152              :     bool        traversed;
     153              : } TM_FailureData;
     154              : 
     155              : /*
     156              :  * State used when calling table_index_delete_tuples().
     157              :  *
     158              :  * Represents the status of table tuples, referenced by table TID and taken by
     159              :  * index AM from index tuples.  State consists of high level parameters of the
     160              :  * deletion operation, plus two mutable palloc()'d arrays for information
     161              :  * about the status of individual table tuples.  These are conceptually one
     162              :  * single array.  Using two arrays keeps the TM_IndexDelete struct small,
     163              :  * which makes sorting the first array (the deltids array) fast.
     164              :  *
     165              :  * Some index AM callers perform simple index tuple deletion (by specifying
     166              :  * bottomup = false), and include only known-dead deltids.  These known-dead
     167              :  * entries are all marked knowndeletable = true directly (typically these are
     168              :  * TIDs from LP_DEAD-marked index tuples), but that isn't strictly required.
     169              :  *
     170              :  * Callers that specify bottomup = true are "bottom-up index deletion"
     171              :  * callers.  The considerations for the tableam are more subtle with these
     172              :  * callers because they ask the tableam to perform highly speculative work,
     173              :  * and might only expect the tableam to check a small fraction of all entries.
     174              :  * Caller is not allowed to specify knowndeletable = true for any entry
     175              :  * because everything is highly speculative.  Bottom-up caller provides
     176              :  * context and hints to tableam -- see comments below for details on how index
     177              :  * AMs and tableams should coordinate during bottom-up index deletion.
     178              :  *
     179              :  * Simple index deletion callers may ask the tableam to perform speculative
     180              :  * work, too.  This is a little like bottom-up deletion, but not too much.
     181              :  * The tableam will only perform speculative work when it's practically free
     182              :  * to do so in passing for simple deletion caller (while always performing
     183              :  * whatever work is needed to enable knowndeletable/LP_DEAD index tuples to
     184              :  * be deleted within index AM).  This is the real reason why it's possible for
     185              :  * simple index deletion caller to specify knowndeletable = false up front
     186              :  * (this means "check if it's possible for me to delete corresponding index
     187              :  * tuple when it's cheap to do so in passing").  The index AM should only
     188              :  * include "extra" entries for index tuples whose TIDs point to a table block
     189              :  * that tableam is expected to have to visit anyway (in the event of a block
     190              :  * orientated tableam).  The tableam isn't strictly obligated to check these
     191              :  * "extra" TIDs, but a block-based AM should always manage to do so in
     192              :  * practice.
     193              :  *
     194              :  * The final contents of the deltids/status arrays are interesting to callers
     195              :  * that ask tableam to perform speculative work (i.e. when _any_ items have
     196              :  * knowndeletable set to false up front).  These index AM callers will
     197              :  * naturally need to consult final state to determine which index tuples are
     198              :  * in fact deletable.
     199              :  *
     200              :  * The index AM can keep track of which index tuple relates to which deltid by
     201              :  * setting idxoffnum (and/or relying on each entry being uniquely identifiable
     202              :  * using tid), which is important when the final contents of the array will
     203              :  * need to be interpreted -- the array can shrink from initial size after
     204              :  * tableam processing and/or have entries in a new order (tableam may sort
     205              :  * deltids array for its own reasons).  Bottom-up callers may find that final
     206              :  * ndeltids is 0 on return from call to tableam, in which case no index tuple
     207              :  * deletions are possible.  Simple deletion callers can rely on any entries
     208              :  * they know to be deletable appearing in the final array as deletable.
     209              :  */
     210              : typedef struct TM_IndexDelete
     211              : {
     212              :     ItemPointerData tid;        /* table TID from index tuple */
     213              :     int16       id;             /* Offset into TM_IndexStatus array */
     214              : } TM_IndexDelete;
     215              : 
     216              : typedef struct TM_IndexStatus
     217              : {
     218              :     OffsetNumber idxoffnum;     /* Index am page offset number */
     219              :     bool        knowndeletable; /* Currently known to be deletable? */
     220              : 
     221              :     /* Bottom-up index deletion specific fields follow */
     222              :     bool        promising;      /* Promising (duplicate) index tuple? */
     223              :     int16       freespace;      /* Space freed in index if deleted */
     224              : } TM_IndexStatus;
     225              : 
     226              : /*
     227              :  * Index AM/tableam coordination is central to the design of bottom-up index
     228              :  * deletion.  The index AM provides hints about where to look to the tableam
     229              :  * by marking some entries as "promising".  Index AM does this with duplicate
     230              :  * index tuples that are strongly suspected to be old versions left behind by
     231              :  * UPDATEs that did not logically modify indexed values.  Index AM may find it
     232              :  * helpful to only mark entries as promising when they're thought to have been
     233              :  * affected by such an UPDATE in the recent past.
     234              :  *
     235              :  * Bottom-up index deletion casts a wide net at first, usually by including
     236              :  * all TIDs on a target index page.  It is up to the tableam to worry about
     237              :  * the cost of checking transaction status information.  The tableam is in
     238              :  * control, but needs careful guidance from the index AM.  Index AM requests
     239              :  * that bottomupfreespace target be met, while tableam measures progress
     240              :  * towards that goal by tallying the per-entry freespace value for known
     241              :  * deletable entries. (All !bottomup callers can just set these space related
     242              :  * fields to zero.)
     243              :  */
     244              : typedef struct TM_IndexDeleteOp
     245              : {
     246              :     Relation    irel;           /* Target index relation */
     247              :     BlockNumber iblknum;        /* Index block number (for error reports) */
     248              :     bool        bottomup;       /* Bottom-up (not simple) deletion? */
     249              :     int         bottomupfreespace;  /* Bottom-up space target */
     250              : 
     251              :     /* Mutable per-TID information follows (index AM initializes entries) */
     252              :     int         ndeltids;       /* Current # of deltids/status elements */
     253              :     TM_IndexDelete *deltids;
     254              :     TM_IndexStatus *status;
     255              : } TM_IndexDeleteOp;
     256              : 
     257              : /* "options" flag bits for table_tuple_insert */
     258              : /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */
     259              : #define TABLE_INSERT_SKIP_FSM       0x0002
     260              : #define TABLE_INSERT_FROZEN         0x0004
     261              : #define TABLE_INSERT_NO_LOGICAL     0x0008
     262              : 
     263              : /* flag bits for table_tuple_lock */
     264              : /* Follow tuples whose update is in progress if lock modes don't conflict  */
     265              : #define TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS (1 << 0)
     266              : /* Follow update chain and lock latest version of tuple */
     267              : #define TUPLE_LOCK_FLAG_FIND_LAST_VERSION       (1 << 1)
     268              : 
     269              : 
     270              : /* Typedef for callback function for table_index_build_scan */
     271              : typedef void (*IndexBuildCallback) (Relation index,
     272              :                                     ItemPointer tid,
     273              :                                     Datum *values,
     274              :                                     bool *isnull,
     275              :                                     bool tupleIsAlive,
     276              :                                     void *state);
     277              : 
     278              : /*
     279              :  * API struct for a table AM.  Note this must be allocated in a
     280              :  * server-lifetime manner, typically as a static const struct, which then gets
     281              :  * returned by FormData_pg_am.amhandler.
     282              :  *
     283              :  * In most cases it's not appropriate to call the callbacks directly, use the
     284              :  * table_* wrapper functions instead.
     285              :  *
     286              :  * GetTableAmRoutine() asserts that required callbacks are filled in, remember
     287              :  * to update when adding a callback.
     288              :  */
     289              : typedef struct TableAmRoutine
     290              : {
     291              :     /* this must be set to T_TableAmRoutine */
     292              :     NodeTag     type;
     293              : 
     294              : 
     295              :     /* ------------------------------------------------------------------------
     296              :      * Slot related callbacks.
     297              :      * ------------------------------------------------------------------------
     298              :      */
     299              : 
     300              :     /*
     301              :      * Return slot implementation suitable for storing a tuple of this AM.
     302              :      */
     303              :     const TupleTableSlotOps *(*slot_callbacks) (Relation rel);
     304              : 
     305              : 
     306              :     /* ------------------------------------------------------------------------
     307              :      * Table scan callbacks.
     308              :      * ------------------------------------------------------------------------
     309              :      */
     310              : 
     311              :     /*
     312              :      * Start a scan of `rel`.  The callback has to return a TableScanDesc,
     313              :      * which will typically be embedded in a larger, AM specific, struct.
     314              :      *
     315              :      * If nkeys != 0, the results need to be filtered by those scan keys.
     316              :      *
     317              :      * pscan, if not NULL, will have already been initialized with
     318              :      * parallelscan_initialize(), and has to be for the same relation. Will
     319              :      * only be set coming from table_beginscan_parallel().
     320              :      *
     321              :      * `flags` is a bitmask indicating the type of scan (ScanOptions's
     322              :      * SO_TYPE_*, currently only one may be specified), options controlling
     323              :      * the scan's behaviour (ScanOptions's SO_ALLOW_*, several may be
     324              :      * specified, an AM may ignore unsupported ones) and whether the snapshot
     325              :      * needs to be deallocated at scan_end (ScanOptions's SO_TEMP_SNAPSHOT).
     326              :      */
     327              :     TableScanDesc (*scan_begin) (Relation rel,
     328              :                                  Snapshot snapshot,
     329              :                                  int nkeys, ScanKeyData *key,
     330              :                                  ParallelTableScanDesc pscan,
     331              :                                  uint32 flags);
     332              : 
     333              :     /*
     334              :      * Release resources and deallocate scan. If TableScanDesc.temp_snap,
     335              :      * TableScanDesc.rs_snapshot needs to be unregistered.
     336              :      */
     337              :     void        (*scan_end) (TableScanDesc scan);
     338              : 
     339              :     /*
     340              :      * Restart relation scan.  If set_params is set to true, allow_{strat,
     341              :      * sync, pagemode} (see scan_begin) changes should be taken into account.
     342              :      */
     343              :     void        (*scan_rescan) (TableScanDesc scan, ScanKeyData *key,
     344              :                                 bool set_params, bool allow_strat,
     345              :                                 bool allow_sync, bool allow_pagemode);
     346              : 
     347              :     /*
     348              :      * Return next tuple from `scan`, store in slot.
     349              :      */
     350              :     bool        (*scan_getnextslot) (TableScanDesc scan,
     351              :                                      ScanDirection direction,
     352              :                                      TupleTableSlot *slot);
     353              : 
     354              :     /*-----------
     355              :      * Optional functions to provide scanning for ranges of ItemPointers.
     356              :      * Implementations must either provide both of these functions, or neither
     357              :      * of them.
     358              :      *
     359              :      * Implementations of scan_set_tidrange must themselves handle
     360              :      * ItemPointers of any value. i.e, they must handle each of the following:
     361              :      *
     362              :      * 1) mintid or maxtid is beyond the end of the table; and
     363              :      * 2) mintid is above maxtid; and
     364              :      * 3) item offset for mintid or maxtid is beyond the maximum offset
     365              :      * allowed by the AM.
     366              :      *
     367              :      * Implementations can assume that scan_set_tidrange is always called
     368              :      * before scan_getnextslot_tidrange or after scan_rescan and before any
     369              :      * further calls to scan_getnextslot_tidrange.
     370              :      */
     371              :     void        (*scan_set_tidrange) (TableScanDesc scan,
     372              :                                       ItemPointer mintid,
     373              :                                       ItemPointer maxtid);
     374              : 
     375              :     /*
     376              :      * Return next tuple from `scan` that's in the range of TIDs defined by
     377              :      * scan_set_tidrange.
     378              :      */
     379              :     bool        (*scan_getnextslot_tidrange) (TableScanDesc scan,
     380              :                                               ScanDirection direction,
     381              :                                               TupleTableSlot *slot);
     382              : 
     383              :     /* ------------------------------------------------------------------------
     384              :      * Parallel table scan related functions.
     385              :      * ------------------------------------------------------------------------
     386              :      */
     387              : 
     388              :     /*
     389              :      * Estimate the size of shared memory needed for a parallel scan of this
     390              :      * relation. The snapshot does not need to be accounted for.
     391              :      */
     392              :     Size        (*parallelscan_estimate) (Relation rel);
     393              : 
     394              :     /*
     395              :      * Initialize ParallelTableScanDesc for a parallel scan of this relation.
     396              :      * `pscan` will be sized according to parallelscan_estimate() for the same
     397              :      * relation.
     398              :      */
     399              :     Size        (*parallelscan_initialize) (Relation rel,
     400              :                                             ParallelTableScanDesc pscan);
     401              : 
     402              :     /*
     403              :      * Reinitialize `pscan` for a new scan. `rel` will be the same relation as
     404              :      * when `pscan` was initialized by parallelscan_initialize.
     405              :      */
     406              :     void        (*parallelscan_reinitialize) (Relation rel,
     407              :                                               ParallelTableScanDesc pscan);
     408              : 
     409              : 
     410              :     /* ------------------------------------------------------------------------
     411              :      * Index Scan Callbacks
     412              :      * ------------------------------------------------------------------------
     413              :      */
     414              : 
     415              :     /*
     416              :      * Prepare to fetch tuples from the relation, as needed when fetching
     417              :      * tuples for an index scan.  The callback has to return an
     418              :      * IndexFetchTableData, which the AM will typically embed in a larger
     419              :      * structure with additional information.
     420              :      *
     421              :      * Tuples for an index scan can then be fetched via index_fetch_tuple.
     422              :      */
     423              :     struct IndexFetchTableData *(*index_fetch_begin) (Relation rel);
     424              : 
     425              :     /*
     426              :      * Reset index fetch. Typically this will release cross index fetch
     427              :      * resources held in IndexFetchTableData.
     428              :      */
     429              :     void        (*index_fetch_reset) (struct IndexFetchTableData *data);
     430              : 
     431              :     /*
     432              :      * Release resources and deallocate index fetch.
     433              :      */
     434              :     void        (*index_fetch_end) (struct IndexFetchTableData *data);
     435              : 
     436              :     /*
     437              :      * Fetch tuple at `tid` into `slot`, after doing a visibility test
     438              :      * according to `snapshot`. If a tuple was found and passed the visibility
     439              :      * test, return true, false otherwise.
     440              :      *
     441              :      * Note that AMs that do not necessarily update indexes when indexed
     442              :      * columns do not change, need to return the current/correct version of
     443              :      * the tuple that is visible to the snapshot, even if the tid points to an
     444              :      * older version of the tuple.
     445              :      *
     446              :      * *call_again is false on the first call to index_fetch_tuple for a tid.
     447              :      * If there potentially is another tuple matching the tid, *call_again
     448              :      * needs to be set to true by index_fetch_tuple, signaling to the caller
     449              :      * that index_fetch_tuple should be called again for the same tid.
     450              :      *
     451              :      * *all_dead, if all_dead is not NULL, should be set to true by
     452              :      * index_fetch_tuple iff it is guaranteed that no backend needs to see
     453              :      * that tuple. Index AMs can use that to avoid returning that tid in
     454              :      * future searches.
     455              :      */
     456              :     bool        (*index_fetch_tuple) (struct IndexFetchTableData *scan,
     457              :                                       ItemPointer tid,
     458              :                                       Snapshot snapshot,
     459              :                                       TupleTableSlot *slot,
     460              :                                       bool *call_again, bool *all_dead);
     461              : 
     462              : 
     463              :     /* ------------------------------------------------------------------------
     464              :      * Callbacks for non-modifying operations on individual tuples
     465              :      * ------------------------------------------------------------------------
     466              :      */
     467              : 
     468              :     /*
     469              :      * Fetch tuple at `tid` into `slot`, after doing a visibility test
     470              :      * according to `snapshot`. If a tuple was found and passed the visibility
     471              :      * test, returns true, false otherwise.
     472              :      */
     473              :     bool        (*tuple_fetch_row_version) (Relation rel,
     474              :                                             ItemPointer tid,
     475              :                                             Snapshot snapshot,
     476              :                                             TupleTableSlot *slot);
     477              : 
     478              :     /*
     479              :      * Is tid valid for a scan of this relation.
     480              :      */
     481              :     bool        (*tuple_tid_valid) (TableScanDesc scan,
     482              :                                     ItemPointer tid);
     483              : 
     484              :     /*
     485              :      * Return the latest version of the tuple at `tid`, by updating `tid` to
     486              :      * point at the newest version.
     487              :      */
     488              :     void        (*tuple_get_latest_tid) (TableScanDesc scan,
     489              :                                          ItemPointer tid);
     490              : 
     491              :     /*
     492              :      * Does the tuple in `slot` satisfy `snapshot`?  The slot needs to be of
     493              :      * the appropriate type for the AM.
     494              :      */
     495              :     bool        (*tuple_satisfies_snapshot) (Relation rel,
     496              :                                              TupleTableSlot *slot,
     497              :                                              Snapshot snapshot);
     498              : 
     499              :     /* see table_index_delete_tuples() */
     500              :     TransactionId (*index_delete_tuples) (Relation rel,
     501              :                                           TM_IndexDeleteOp *delstate);
     502              : 
     503              : 
     504              :     /* ------------------------------------------------------------------------
     505              :      * Manipulations of physical tuples.
     506              :      * ------------------------------------------------------------------------
     507              :      */
     508              : 
     509              :     /* see table_tuple_insert() for reference about parameters */
     510              :     void        (*tuple_insert) (Relation rel, TupleTableSlot *slot,
     511              :                                  CommandId cid, int options,
     512              :                                  BulkInsertStateData *bistate);
     513              : 
     514              :     /* see table_tuple_insert_speculative() for reference about parameters */
     515              :     void        (*tuple_insert_speculative) (Relation rel,
     516              :                                              TupleTableSlot *slot,
     517              :                                              CommandId cid,
     518              :                                              int options,
     519              :                                              BulkInsertStateData *bistate,
     520              :                                              uint32 specToken);
     521              : 
     522              :     /* see table_tuple_complete_speculative() for reference about parameters */
     523              :     void        (*tuple_complete_speculative) (Relation rel,
     524              :                                                TupleTableSlot *slot,
     525              :                                                uint32 specToken,
     526              :                                                bool succeeded);
     527              : 
     528              :     /* see table_multi_insert() for reference about parameters */
     529              :     void        (*multi_insert) (Relation rel, TupleTableSlot **slots, int nslots,
     530              :                                  CommandId cid, int options, BulkInsertStateData *bistate);
     531              : 
     532              :     /* see table_tuple_delete() for reference about parameters */
     533              :     TM_Result   (*tuple_delete) (Relation rel,
     534              :                                  ItemPointer tid,
     535              :                                  CommandId cid,
     536              :                                  Snapshot snapshot,
     537              :                                  Snapshot crosscheck,
     538              :                                  bool wait,
     539              :                                  TM_FailureData *tmfd,
     540              :                                  bool changingPart);
     541              : 
     542              :     /* see table_tuple_update() for reference about parameters */
     543              :     TM_Result   (*tuple_update) (Relation rel,
     544              :                                  ItemPointer otid,
     545              :                                  TupleTableSlot *slot,
     546              :                                  CommandId cid,
     547              :                                  Snapshot snapshot,
     548              :                                  Snapshot crosscheck,
     549              :                                  bool wait,
     550              :                                  TM_FailureData *tmfd,
     551              :                                  LockTupleMode *lockmode,
     552              :                                  TU_UpdateIndexes *update_indexes);
     553              : 
     554              :     /* see table_tuple_lock() for reference about parameters */
     555              :     TM_Result   (*tuple_lock) (Relation rel,
     556              :                                ItemPointer tid,
     557              :                                Snapshot snapshot,
     558              :                                TupleTableSlot *slot,
     559              :                                CommandId cid,
     560              :                                LockTupleMode mode,
     561              :                                LockWaitPolicy wait_policy,
     562              :                                uint8 flags,
     563              :                                TM_FailureData *tmfd);
     564              : 
     565              :     /*
     566              :      * Perform operations necessary to complete insertions made via
     567              :      * tuple_insert and multi_insert with a BulkInsertState specified. In-tree
     568              :      * access methods ceased to use this.
     569              :      *
     570              :      * Typically callers of tuple_insert and multi_insert will just pass all
     571              :      * the flags that apply to them, and each AM has to decide which of them
     572              :      * make sense for it, and then only take actions in finish_bulk_insert for
     573              :      * those flags, and ignore others.
     574              :      *
     575              :      * Optional callback.
     576              :      */
     577              :     void        (*finish_bulk_insert) (Relation rel, int options);
     578              : 
     579              : 
     580              :     /* ------------------------------------------------------------------------
     581              :      * DDL related functionality.
     582              :      * ------------------------------------------------------------------------
     583              :      */
     584              : 
     585              :     /*
     586              :      * This callback needs to create new relation storage for `rel`, with
     587              :      * appropriate durability behaviour for `persistence`.
     588              :      *
     589              :      * Note that only the subset of the relcache filled by
     590              :      * RelationBuildLocalRelation() can be relied upon and that the relation's
     591              :      * catalog entries will either not yet exist (new relation), or will still
     592              :      * reference the old relfilelocator.
     593              :      *
     594              :      * As output *freezeXid, *minmulti must be set to the values appropriate
     595              :      * for pg_class.{relfrozenxid, relminmxid}. For AMs that don't need those
     596              :      * fields to be filled they can be set to InvalidTransactionId and
     597              :      * InvalidMultiXactId, respectively.
     598              :      *
     599              :      * See also table_relation_set_new_filelocator().
     600              :      */
     601              :     void        (*relation_set_new_filelocator) (Relation rel,
     602              :                                                  const RelFileLocator *newrlocator,
     603              :                                                  char persistence,
     604              :                                                  TransactionId *freezeXid,
     605              :                                                  MultiXactId *minmulti);
     606              : 
     607              :     /*
     608              :      * This callback needs to remove all contents from `rel`'s current
     609              :      * relfilelocator. No provisions for transactional behaviour need to be
     610              :      * made.  Often this can be implemented by truncating the underlying
     611              :      * storage to its minimal size.
     612              :      *
     613              :      * See also table_relation_nontransactional_truncate().
     614              :      */
     615              :     void        (*relation_nontransactional_truncate) (Relation rel);
     616              : 
     617              :     /*
     618              :      * See table_relation_copy_data().
     619              :      *
     620              :      * This can typically be implemented by directly copying the underlying
     621              :      * storage, unless it contains references to the tablespace internally.
     622              :      */
     623              :     void        (*relation_copy_data) (Relation rel,
     624              :                                        const RelFileLocator *newrlocator);
     625              : 
     626              :     /* See table_relation_copy_for_cluster() */
     627              :     void        (*relation_copy_for_cluster) (Relation OldTable,
     628              :                                               Relation NewTable,
     629              :                                               Relation OldIndex,
     630              :                                               bool use_sort,
     631              :                                               TransactionId OldestXmin,
     632              :                                               TransactionId *xid_cutoff,
     633              :                                               MultiXactId *multi_cutoff,
     634              :                                               double *num_tuples,
     635              :                                               double *tups_vacuumed,
     636              :                                               double *tups_recently_dead);
     637              : 
     638              :     /*
     639              :      * React to VACUUM command on the relation. The VACUUM can be triggered by
     640              :      * a user or by autovacuum. The specific actions performed by the AM will
     641              :      * depend heavily on the individual AM.
     642              :      *
     643              :      * On entry a transaction is already established, and the relation is
     644              :      * locked with a ShareUpdateExclusive lock.
     645              :      *
     646              :      * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through
     647              :      * this routine, even if (for ANALYZE) it is part of the same VACUUM
     648              :      * command.
     649              :      *
     650              :      * There probably, in the future, needs to be a separate callback to
     651              :      * integrate with autovacuum's scheduling.
     652              :      */
     653              :     void        (*relation_vacuum) (Relation rel,
     654              :                                     const VacuumParams params,
     655              :                                     BufferAccessStrategy bstrategy);
     656              : 
     657              :     /*
     658              :      * Prepare to analyze block `blockno` of `scan`. The scan has been started
     659              :      * with table_beginscan_analyze().  See also
     660              :      * table_scan_analyze_next_block().
     661              :      *
     662              :      * The callback may acquire resources like locks that are held until
     663              :      * table_scan_analyze_next_tuple() returns false. It e.g. can make sense
     664              :      * to hold a lock until all tuples on a block have been analyzed by
     665              :      * scan_analyze_next_tuple.
     666              :      *
     667              :      * The callback can return false if the block is not suitable for
     668              :      * sampling, e.g. because it's a metapage that could never contain tuples.
     669              :      *
     670              :      * XXX: This obviously is primarily suited for block-based AMs. It's not
     671              :      * clear what a good interface for non block based AMs would be, so there
     672              :      * isn't one yet.
     673              :      */
     674              :     bool        (*scan_analyze_next_block) (TableScanDesc scan,
     675              :                                             ReadStream *stream);
     676              : 
     677              :     /*
     678              :      * See table_scan_analyze_next_tuple().
     679              :      *
     680              :      * Not every AM might have a meaningful concept of dead rows, in which
     681              :      * case it's OK to not increment *deadrows - but note that that may
     682              :      * influence autovacuum scheduling (see comment for relation_vacuum
     683              :      * callback).
     684              :      */
     685              :     bool        (*scan_analyze_next_tuple) (TableScanDesc scan,
     686              :                                             double *liverows,
     687              :                                             double *deadrows,
     688              :                                             TupleTableSlot *slot);
     689              : 
     690              :     /* see table_index_build_range_scan for reference about parameters */
     691              :     double      (*index_build_range_scan) (Relation table_rel,
     692              :                                            Relation index_rel,
     693              :                                            IndexInfo *index_info,
     694              :                                            bool allow_sync,
     695              :                                            bool anyvisible,
     696              :                                            bool progress,
     697              :                                            BlockNumber start_blockno,
     698              :                                            BlockNumber numblocks,
     699              :                                            IndexBuildCallback callback,
     700              :                                            void *callback_state,
     701              :                                            TableScanDesc scan);
     702              : 
     703              :     /* see table_index_validate_scan for reference about parameters */
     704              :     void        (*index_validate_scan) (Relation table_rel,
     705              :                                         Relation index_rel,
     706              :                                         IndexInfo *index_info,
     707              :                                         Snapshot snapshot,
     708              :                                         ValidateIndexState *state);
     709              : 
     710              : 
     711              :     /* ------------------------------------------------------------------------
     712              :      * Miscellaneous functions.
     713              :      * ------------------------------------------------------------------------
     714              :      */
     715              : 
     716              :     /*
     717              :      * See table_relation_size().
     718              :      *
     719              :      * Note that currently a few callers use the MAIN_FORKNUM size to figure
     720              :      * out the range of potentially interesting blocks (brin, analyze). It's
     721              :      * probable that we'll need to revise the interface for those at some
     722              :      * point.
     723              :      */
     724              :     uint64      (*relation_size) (Relation rel, ForkNumber forkNumber);
     725              : 
     726              : 
     727              :     /*
     728              :      * This callback should return true if the relation requires a TOAST table
     729              :      * and false if it does not.  It may wish to examine the relation's tuple
     730              :      * descriptor before making a decision, but if it uses some other method
     731              :      * of storing large values (or if it does not support them) it can simply
     732              :      * return false.
     733              :      */
     734              :     bool        (*relation_needs_toast_table) (Relation rel);
     735              : 
     736              :     /*
     737              :      * This callback should return the OID of the table AM that implements
     738              :      * TOAST tables for this AM.  If the relation_needs_toast_table callback
     739              :      * always returns false, this callback is not required.
     740              :      */
     741              :     Oid         (*relation_toast_am) (Relation rel);
     742              : 
     743              :     /*
     744              :      * This callback is invoked when detoasting a value stored in a toast
     745              :      * table implemented by this AM.  See table_relation_fetch_toast_slice()
     746              :      * for more details.
     747              :      */
     748              :     void        (*relation_fetch_toast_slice) (Relation toastrel, Oid valueid,
     749              :                                                int32 attrsize,
     750              :                                                int32 sliceoffset,
     751              :                                                int32 slicelength,
     752              :                                                varlena *result);
     753              : 
     754              : 
     755              :     /* ------------------------------------------------------------------------
     756              :      * Planner related functions.
     757              :      * ------------------------------------------------------------------------
     758              :      */
     759              : 
     760              :     /*
     761              :      * See table_relation_estimate_size().
     762              :      *
     763              :      * While block oriented, it shouldn't be too hard for an AM that doesn't
     764              :      * internally use blocks to convert into a usable representation.
     765              :      *
     766              :      * This differs from the relation_size callback by returning size
     767              :      * estimates (both relation size and tuple count) for planning purposes,
     768              :      * rather than returning a currently correct estimate.
     769              :      */
     770              :     void        (*relation_estimate_size) (Relation rel, int32 *attr_widths,
     771              :                                            BlockNumber *pages, double *tuples,
     772              :                                            double *allvisfrac);
     773              : 
     774              : 
     775              :     /* ------------------------------------------------------------------------
     776              :      * Executor related functions.
     777              :      * ------------------------------------------------------------------------
     778              :      */
     779              : 
     780              :     /*
     781              :      * Fetch the next tuple of a bitmap table scan into `slot` and return true
     782              :      * if a visible tuple was found, false otherwise.
     783              :      *
     784              :      * `lossy_pages` is incremented if the bitmap is lossy for the selected
     785              :      * page; otherwise, `exact_pages` is incremented. These are tracked for
     786              :      * display in EXPLAIN ANALYZE output.
     787              :      *
     788              :      * Prefetching additional data from the bitmap is left to the table AM.
     789              :      *
     790              :      * This is an optional callback.
     791              :      */
     792              :     bool        (*scan_bitmap_next_tuple) (TableScanDesc scan,
     793              :                                            TupleTableSlot *slot,
     794              :                                            bool *recheck,
     795              :                                            uint64 *lossy_pages,
     796              :                                            uint64 *exact_pages);
     797              : 
     798              :     /*
     799              :      * Prepare to fetch tuples from the next block in a sample scan. Return
     800              :      * false if the sample scan is finished, true otherwise. `scan` was
     801              :      * started via table_beginscan_sampling().
     802              :      *
     803              :      * Typically this will first determine the target block by calling the
     804              :      * TsmRoutine's NextSampleBlock() callback if not NULL, or alternatively
     805              :      * perform a sequential scan over all blocks.  The determined block is
     806              :      * then typically read and pinned.
     807              :      *
     808              :      * As the TsmRoutine interface is block based, a block needs to be passed
     809              :      * to NextSampleBlock(). If that's not appropriate for an AM, it
     810              :      * internally needs to perform mapping between the internal and a block
     811              :      * based representation.
     812              :      *
     813              :      * Note that it's not acceptable to hold deadlock prone resources such as
     814              :      * lwlocks until scan_sample_next_tuple() has exhausted the tuples on the
     815              :      * block - the tuple is likely to be returned to an upper query node, and
     816              :      * the next call could be off a long while. Holding buffer pins and such
     817              :      * is obviously OK.
     818              :      *
     819              :      * Currently it is required to implement this interface, as there's no
     820              :      * alternative way (contrary e.g. to bitmap scans) to implement sample
     821              :      * scans. If infeasible to implement, the AM may raise an error.
     822              :      */
     823              :     bool        (*scan_sample_next_block) (TableScanDesc scan,
     824              :                                            SampleScanState *scanstate);
     825              : 
     826              :     /*
     827              :      * This callback, only called after scan_sample_next_block has returned
     828              :      * true, should determine the next tuple to be returned from the selected
     829              :      * block using the TsmRoutine's NextSampleTuple() callback.
     830              :      *
     831              :      * The callback needs to perform visibility checks, and only return
     832              :      * visible tuples. That obviously can mean calling NextSampleTuple()
     833              :      * multiple times.
     834              :      *
     835              :      * The TsmRoutine interface assumes that there's a maximum offset on a
     836              :      * given page, so if that doesn't apply to an AM, it needs to emulate that
     837              :      * assumption somehow.
     838              :      */
     839              :     bool        (*scan_sample_next_tuple) (TableScanDesc scan,
     840              :                                            SampleScanState *scanstate,
     841              :                                            TupleTableSlot *slot);
     842              : 
     843              : } TableAmRoutine;
     844              : 
     845              : 
     846              : /* ----------------------------------------------------------------------------
     847              :  * Slot functions.
     848              :  * ----------------------------------------------------------------------------
     849              :  */
     850              : 
     851              : /*
     852              :  * Returns slot callbacks suitable for holding tuples of the appropriate type
     853              :  * for the relation.  Works for tables, views, foreign tables and partitioned
     854              :  * tables.
     855              :  */
     856              : extern const TupleTableSlotOps *table_slot_callbacks(Relation relation);
     857              : 
     858              : /*
     859              :  * Returns slot using the callbacks returned by table_slot_callbacks(), and
     860              :  * registers it on *reglist.
     861              :  */
     862              : extern TupleTableSlot *table_slot_create(Relation relation, List **reglist);
     863              : 
     864              : 
     865              : /* ----------------------------------------------------------------------------
     866              :  * Table scan functions.
     867              :  * ----------------------------------------------------------------------------
     868              :  */
     869              : 
     870              : /*
     871              :  * A wrapper around the Table Access Method scan_begin callback, to centralize
     872              :  * error checking. All calls to ->scan_begin() should go through this
     873              :  * function.
     874              :  */
     875              : static TableScanDesc
     876       408797 : table_beginscan_common(Relation rel, Snapshot snapshot, int nkeys,
     877              :                        ScanKeyData *key, ParallelTableScanDesc pscan,
     878              :                        uint32 flags)
     879              : {
     880              :     /*
     881              :      * We don't allow scans to be started while CheckXidAlive is set, except
     882              :      * via systable_beginscan() et al.  See detailed comments in xact.c where
     883              :      * these variables are declared.
     884              :      */
     885       408797 :     if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
     886            0 :         elog(ERROR, "scan started during logical decoding");
     887              : 
     888       408797 :     return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, pscan, flags);
     889              : }
     890              : 
     891              : /*
     892              :  * Start a scan of `rel`. Returned tuples pass a visibility test of
     893              :  * `snapshot`, and if nkeys != 0, the results are filtered by those scan keys.
     894              :  */
     895              : static inline TableScanDesc
     896       113924 : table_beginscan(Relation rel, Snapshot snapshot,
     897              :                 int nkeys, ScanKeyData *key)
     898              : {
     899       113924 :     uint32      flags = SO_TYPE_SEQSCAN |
     900              :         SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
     901              : 
     902       113924 :     return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
     903              : }
     904              : 
     905              : /*
     906              :  * Like table_beginscan(), but for scanning catalog. It'll automatically use a
     907              :  * snapshot appropriate for scanning catalog relations.
     908              :  */
     909              : extern TableScanDesc table_beginscan_catalog(Relation relation, int nkeys,
     910              :                                              ScanKeyData *key);
     911              : 
     912              : /*
     913              :  * Like table_beginscan(), but table_beginscan_strat() offers an extended API
     914              :  * that lets the caller control whether a nondefault buffer access strategy
     915              :  * can be used, and whether syncscan can be chosen (possibly resulting in the
     916              :  * scan not starting from block zero).  Both of these default to true with
     917              :  * plain table_beginscan.
     918              :  */
     919              : static inline TableScanDesc
     920       232599 : table_beginscan_strat(Relation rel, Snapshot snapshot,
     921              :                       int nkeys, ScanKeyData *key,
     922              :                       bool allow_strat, bool allow_sync)
     923              : {
     924       232599 :     uint32      flags = SO_TYPE_SEQSCAN | SO_ALLOW_PAGEMODE;
     925              : 
     926       232599 :     if (allow_strat)
     927       232599 :         flags |= SO_ALLOW_STRAT;
     928       232599 :     if (allow_sync)
     929        27671 :         flags |= SO_ALLOW_SYNC;
     930              : 
     931       232599 :     return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
     932              : }
     933              : 
     934              : /*
     935              :  * table_beginscan_bm is an alternative entry point for setting up a
     936              :  * TableScanDesc for a bitmap heap scan.  Although that scan technology is
     937              :  * really quite unlike a standard seqscan, there is just enough commonality to
     938              :  * make it worth using the same data structure.
     939              :  */
     940              : static inline TableScanDesc
     941        11225 : table_beginscan_bm(Relation rel, Snapshot snapshot,
     942              :                    int nkeys, ScanKeyData *key)
     943              : {
     944        11225 :     uint32      flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
     945              : 
     946        11225 :     return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
     947              : }
     948              : 
     949              : /*
     950              :  * table_beginscan_sampling is an alternative entry point for setting up a
     951              :  * TableScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
     952              :  * using the same data structure although the behavior is rather different.
     953              :  * In addition to the options offered by table_beginscan_strat, this call
     954              :  * also allows control of whether page-mode visibility checking is used.
     955              :  */
     956              : static inline TableScanDesc
     957           73 : table_beginscan_sampling(Relation rel, Snapshot snapshot,
     958              :                          int nkeys, ScanKeyData *key,
     959              :                          bool allow_strat, bool allow_sync,
     960              :                          bool allow_pagemode)
     961              : {
     962           73 :     uint32      flags = SO_TYPE_SAMPLESCAN;
     963              : 
     964           73 :     if (allow_strat)
     965           67 :         flags |= SO_ALLOW_STRAT;
     966           73 :     if (allow_sync)
     967           33 :         flags |= SO_ALLOW_SYNC;
     968           73 :     if (allow_pagemode)
     969           61 :         flags |= SO_ALLOW_PAGEMODE;
     970              : 
     971           73 :     return table_beginscan_common(rel, snapshot, nkeys, key, NULL, flags);
     972              : }
     973              : 
     974              : /*
     975              :  * table_beginscan_tid is an alternative entry point for setting up a
     976              :  * TableScanDesc for a Tid scan. As with bitmap scans, it's worth using
     977              :  * the same data structure although the behavior is rather different.
     978              :  */
     979              : static inline TableScanDesc
     980          393 : table_beginscan_tid(Relation rel, Snapshot snapshot)
     981              : {
     982          393 :     uint32      flags = SO_TYPE_TIDSCAN;
     983              : 
     984          393 :     return table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
     985              : }
     986              : 
     987              : /*
     988              :  * table_beginscan_analyze is an alternative entry point for setting up a
     989              :  * TableScanDesc for an ANALYZE scan.  As with bitmap scans, it's worth using
     990              :  * the same data structure although the behavior is rather different.
     991              :  */
     992              : static inline TableScanDesc
     993         8925 : table_beginscan_analyze(Relation rel)
     994              : {
     995         8925 :     uint32      flags = SO_TYPE_ANALYZE;
     996              : 
     997         8925 :     return table_beginscan_common(rel, NULL, 0, NULL, NULL, flags);
     998              : }
     999              : 
    1000              : /*
    1001              :  * End relation scan.
    1002              :  */
    1003              : static inline void
    1004       406378 : table_endscan(TableScanDesc scan)
    1005              : {
    1006       406378 :     scan->rs_rd->rd_tableam->scan_end(scan);
    1007       406378 : }
    1008              : 
    1009              : /*
    1010              :  * Restart a relation scan.
    1011              :  */
    1012              : static inline void
    1013       852982 : table_rescan(TableScanDesc scan, ScanKeyData *key)
    1014              : {
    1015       852982 :     scan->rs_rd->rd_tableam->scan_rescan(scan, key, false, false, false, false);
    1016       852982 : }
    1017              : 
    1018              : /*
    1019              :  * Restart a relation scan after changing params.
    1020              :  *
    1021              :  * This call allows changing the buffer strategy, syncscan, and pagemode
    1022              :  * options before starting a fresh scan.  Note that although the actual use of
    1023              :  * syncscan might change (effectively, enabling or disabling reporting), the
    1024              :  * previously selected startblock will be kept.
    1025              :  */
    1026              : static inline void
    1027           15 : table_rescan_set_params(TableScanDesc scan, ScanKeyData *key,
    1028              :                         bool allow_strat, bool allow_sync, bool allow_pagemode)
    1029              : {
    1030           15 :     scan->rs_rd->rd_tableam->scan_rescan(scan, key, true,
    1031              :                                          allow_strat, allow_sync,
    1032              :                                          allow_pagemode);
    1033           15 : }
    1034              : 
    1035              : /*
    1036              :  * Return next tuple from `scan`, store in slot.
    1037              :  */
    1038              : static inline bool
    1039     49815559 : table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
    1040              : {
    1041     49815559 :     slot->tts_tableOid = RelationGetRelid(sscan->rs_rd);
    1042              : 
    1043              :     /* We don't expect actual scans using NoMovementScanDirection */
    1044              :     Assert(direction == ForwardScanDirection ||
    1045              :            direction == BackwardScanDirection);
    1046              : 
    1047     49815559 :     return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
    1048              : }
    1049              : 
    1050              : /* ----------------------------------------------------------------------------
    1051              :  * TID Range scanning related functions.
    1052              :  * ----------------------------------------------------------------------------
    1053              :  */
    1054              : 
    1055              : /*
    1056              :  * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
    1057              :  * for a TID range scan.
    1058              :  */
    1059              : static inline TableScanDesc
    1060          930 : table_beginscan_tidrange(Relation rel, Snapshot snapshot,
    1061              :                          ItemPointer mintid,
    1062              :                          ItemPointer maxtid)
    1063              : {
    1064              :     TableScanDesc sscan;
    1065          930 :     uint32      flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
    1066              : 
    1067          930 :     sscan = table_beginscan_common(rel, snapshot, 0, NULL, NULL, flags);
    1068              : 
    1069              :     /* Set the range of TIDs to scan */
    1070          930 :     sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
    1071              : 
    1072          930 :     return sscan;
    1073              : }
    1074              : 
    1075              : /*
    1076              :  * table_rescan_tidrange resets the scan position and sets the minimum and
    1077              :  * maximum TID range to scan for a TableScanDesc created by
    1078              :  * table_beginscan_tidrange.
    1079              :  */
    1080              : static inline void
    1081          105 : table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
    1082              :                       ItemPointer maxtid)
    1083              : {
    1084              :     /* Ensure table_beginscan_tidrange() was used. */
    1085              :     Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
    1086              : 
    1087          105 :     sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
    1088          105 :     sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
    1089          105 : }
    1090              : 
    1091              : /*
    1092              :  * Fetch the next tuple from `sscan` for a TID range scan created by
    1093              :  * table_beginscan_tidrange().  Stores the tuple in `slot` and returns true,
    1094              :  * or returns false if no more tuples exist in the range.
    1095              :  */
    1096              : static inline bool
    1097         5636 : table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
    1098              :                                 TupleTableSlot *slot)
    1099              : {
    1100              :     /* Ensure table_beginscan_tidrange() was used. */
    1101              :     Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
    1102              : 
    1103              :     /* We don't expect actual scans using NoMovementScanDirection */
    1104              :     Assert(direction == ForwardScanDirection ||
    1105              :            direction == BackwardScanDirection);
    1106              : 
    1107         5636 :     return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
    1108              :                                                                direction,
    1109              :                                                                slot);
    1110              : }
    1111              : 
    1112              : 
    1113              : /* ----------------------------------------------------------------------------
    1114              :  * Parallel table scan related functions.
    1115              :  * ----------------------------------------------------------------------------
    1116              :  */
    1117              : 
    1118              : /*
    1119              :  * Estimate the size of shared memory needed for a parallel scan of this
    1120              :  * relation.
    1121              :  */
    1122              : extern Size table_parallelscan_estimate(Relation rel, Snapshot snapshot);
    1123              : 
    1124              : /*
    1125              :  * Initialize ParallelTableScanDesc for a parallel scan of this
    1126              :  * relation. `pscan` needs to be sized according to parallelscan_estimate()
    1127              :  * for the same relation.  Call this just once in the leader process; then,
    1128              :  * individual workers attach via table_beginscan_parallel.
    1129              :  */
    1130              : extern void table_parallelscan_initialize(Relation rel,
    1131              :                                           ParallelTableScanDesc pscan,
    1132              :                                           Snapshot snapshot);
    1133              : 
    1134              : /*
    1135              :  * Begin a parallel scan. `pscan` needs to have been initialized with
    1136              :  * table_parallelscan_initialize(), for the same relation. The initialization
    1137              :  * does not need to have happened in this backend.
    1138              :  *
    1139              :  * Caller must hold a suitable lock on the relation.
    1140              :  */
    1141              : extern TableScanDesc table_beginscan_parallel(Relation relation,
    1142              :                                               ParallelTableScanDesc pscan);
    1143              : 
    1144              : /*
    1145              :  * Begin a parallel tid range scan. `pscan` needs to have been initialized
    1146              :  * with table_parallelscan_initialize(), for the same relation. The
    1147              :  * initialization does not need to have happened in this backend.
    1148              :  *
    1149              :  * Caller must hold a suitable lock on the relation.
    1150              :  */
    1151              : extern TableScanDesc table_beginscan_parallel_tidrange(Relation relation,
    1152              :                                                        ParallelTableScanDesc pscan);
    1153              : 
    1154              : /*
    1155              :  * Restart a parallel scan.  Call this in the leader process.  Caller is
    1156              :  * responsible for making sure that all workers have finished the scan
    1157              :  * beforehand.
    1158              :  */
    1159              : static inline void
    1160          114 : table_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan)
    1161              : {
    1162          114 :     rel->rd_tableam->parallelscan_reinitialize(rel, pscan);
    1163          114 : }
    1164              : 
    1165              : 
    1166              : /* ----------------------------------------------------------------------------
    1167              :  *  Index scan related functions.
    1168              :  * ----------------------------------------------------------------------------
    1169              :  */
    1170              : 
    1171              : /*
    1172              :  * Prepare to fetch tuples from the relation, as needed when fetching tuples
    1173              :  * for an index scan.
    1174              :  *
    1175              :  * Tuples for an index scan can then be fetched via table_index_fetch_tuple().
    1176              :  */
    1177              : static inline IndexFetchTableData *
    1178     13914122 : table_index_fetch_begin(Relation rel)
    1179              : {
    1180              :     /*
    1181              :      * We don't allow scans to be started while CheckXidAlive is set, except
    1182              :      * via systable_beginscan() et al.  See detailed comments in xact.c where
    1183              :      * these variables are declared.
    1184              :      */
    1185     13914122 :     if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
    1186            0 :         elog(ERROR, "scan started during logical decoding");
    1187              : 
    1188     13914122 :     return rel->rd_tableam->index_fetch_begin(rel);
    1189              : }
    1190              : 
    1191              : /*
    1192              :  * Reset index fetch. Typically this will release cross index fetch resources
    1193              :  * held in IndexFetchTableData.
    1194              :  */
    1195              : static inline void
    1196     12350670 : table_index_fetch_reset(struct IndexFetchTableData *scan)
    1197              : {
    1198     12350670 :     scan->rel->rd_tableam->index_fetch_reset(scan);
    1199     12350670 : }
    1200              : 
    1201              : /*
    1202              :  * Release resources and deallocate index fetch.
    1203              :  */
    1204              : static inline void
    1205     13913220 : table_index_fetch_end(struct IndexFetchTableData *scan)
    1206              : {
    1207     13913220 :     scan->rel->rd_tableam->index_fetch_end(scan);
    1208     13913220 : }
    1209              : 
    1210              : /*
    1211              :  * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing
    1212              :  * a visibility test according to `snapshot`. If a tuple was found and passed
    1213              :  * the visibility test, returns true, false otherwise. Note that *tid may be
    1214              :  * modified when we return true (see later remarks on multiple row versions
    1215              :  * reachable via a single index entry).
    1216              :  *
    1217              :  * *call_again needs to be false on the first call to table_index_fetch_tuple() for
    1218              :  * a tid. If there potentially is another tuple matching the tid, *call_again
    1219              :  * will be set to true, signaling that table_index_fetch_tuple() should be called
    1220              :  * again for the same tid.
    1221              :  *
    1222              :  * *all_dead, if all_dead is not NULL, will be set to true by
    1223              :  * table_index_fetch_tuple() iff it is guaranteed that no backend needs to see
    1224              :  * that tuple. Index AMs can use that to avoid returning that tid in future
    1225              :  * searches.
    1226              :  *
    1227              :  * The difference between this function and table_tuple_fetch_row_version()
    1228              :  * is that this function returns the currently visible version of a row if
    1229              :  * the AM supports storing multiple row versions reachable via a single index
    1230              :  * entry (like heap's HOT). Whereas table_tuple_fetch_row_version() only
    1231              :  * evaluates the tuple exactly at `tid`. Outside of index entry ->table tuple
    1232              :  * lookups, table_tuple_fetch_row_version() is what's usually needed.
    1233              :  */
    1234              : static inline bool
    1235     19861852 : table_index_fetch_tuple(struct IndexFetchTableData *scan,
    1236              :                         ItemPointer tid,
    1237              :                         Snapshot snapshot,
    1238              :                         TupleTableSlot *slot,
    1239              :                         bool *call_again, bool *all_dead)
    1240              : {
    1241     19861852 :     return scan->rel->rd_tableam->index_fetch_tuple(scan, tid, snapshot,
    1242              :                                                     slot, call_again,
    1243              :                                                     all_dead);
    1244              : }
    1245              : 
    1246              : /*
    1247              :  * This is a convenience wrapper around table_index_fetch_tuple() which
    1248              :  * returns whether there are table tuple items corresponding to an index
    1249              :  * entry.  This likely is only useful to verify if there's a conflict in a
    1250              :  * unique index.
    1251              :  */
    1252              : extern bool table_index_fetch_tuple_check(Relation rel,
    1253              :                                           ItemPointer tid,
    1254              :                                           Snapshot snapshot,
    1255              :                                           bool *all_dead);
    1256              : 
    1257              : 
    1258              : /* ------------------------------------------------------------------------
    1259              :  * Functions for non-modifying operations on individual tuples
    1260              :  * ------------------------------------------------------------------------
    1261              :  */
    1262              : 
    1263              : 
    1264              : /*
    1265              :  * Fetch tuple at `tid` into `slot`, after doing a visibility test according to
    1266              :  * `snapshot`. If a tuple was found and passed the visibility test, returns
    1267              :  * true, false otherwise.
    1268              :  *
    1269              :  * See table_index_fetch_tuple's comment about what the difference between
    1270              :  * these functions is. It is correct to use this function outside of index
    1271              :  * entry->table tuple lookups.
    1272              :  */
    1273              : static inline bool
    1274       182976 : table_tuple_fetch_row_version(Relation rel,
    1275              :                               ItemPointer tid,
    1276              :                               Snapshot snapshot,
    1277              :                               TupleTableSlot *slot)
    1278              : {
    1279              :     /*
    1280              :      * We don't expect direct calls to table_tuple_fetch_row_version with
    1281              :      * valid CheckXidAlive for catalog or regular tables.  See detailed
    1282              :      * comments in xact.c where these variables are declared.
    1283              :      */
    1284       182976 :     if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
    1285            0 :         elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding");
    1286              : 
    1287       182976 :     return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot);
    1288              : }
    1289              : 
    1290              : /*
    1291              :  * Verify that `tid` is a potentially valid tuple identifier. That doesn't
    1292              :  * mean that the pointed to row needs to exist or be visible, but that
    1293              :  * attempting to fetch the row (e.g. with table_tuple_get_latest_tid() or
    1294              :  * table_tuple_fetch_row_version()) should not error out if called with that
    1295              :  * tid.
    1296              :  *
    1297              :  * `scan` needs to have been started via table_beginscan().
    1298              :  */
    1299              : static inline bool
    1300          227 : table_tuple_tid_valid(TableScanDesc scan, ItemPointer tid)
    1301              : {
    1302          227 :     return scan->rs_rd->rd_tableam->tuple_tid_valid(scan, tid);
    1303              : }
    1304              : 
    1305              : /*
    1306              :  * Return the latest version of the tuple at `tid`, by updating `tid` to
    1307              :  * point at the newest version.
    1308              :  */
    1309              : extern void table_tuple_get_latest_tid(TableScanDesc scan, ItemPointer tid);
    1310              : 
    1311              : /*
    1312              :  * Return true iff tuple in slot satisfies the snapshot.
    1313              :  *
    1314              :  * This assumes the slot's tuple is valid, and of the appropriate type for the
    1315              :  * AM.
    1316              :  *
    1317              :  * Some AMs might modify the data underlying the tuple as a side-effect. If so
    1318              :  * they ought to mark the relevant buffer dirty.
    1319              :  */
    1320              : static inline bool
    1321       128805 : table_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot,
    1322              :                                Snapshot snapshot)
    1323              : {
    1324       128805 :     return rel->rd_tableam->tuple_satisfies_snapshot(rel, slot, snapshot);
    1325              : }
    1326              : 
    1327              : /*
    1328              :  * Determine which index tuples are safe to delete based on their table TID.
    1329              :  *
    1330              :  * Determines which entries from index AM caller's TM_IndexDeleteOp state
    1331              :  * point to vacuumable table tuples.  Entries that are found by tableam to be
    1332              :  * vacuumable are naturally safe for index AM to delete, and so get directly
    1333              :  * marked as deletable.  See comments above TM_IndexDelete and comments above
    1334              :  * TM_IndexDeleteOp for full details.
    1335              :  *
    1336              :  * Returns a snapshotConflictHorizon transaction ID that caller places in
    1337              :  * its index deletion WAL record.  This might be used during subsequent REDO
    1338              :  * of the WAL record when in Hot Standby mode -- a recovery conflict for the
    1339              :  * index deletion operation might be required on the standby.
    1340              :  */
    1341              : static inline TransactionId
    1342         6067 : table_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
    1343              : {
    1344         6067 :     return rel->rd_tableam->index_delete_tuples(rel, delstate);
    1345              : }
    1346              : 
    1347              : 
    1348              : /* ----------------------------------------------------------------------------
    1349              :  *  Functions for manipulations of physical tuples.
    1350              :  * ----------------------------------------------------------------------------
    1351              :  */
    1352              : 
    1353              : /*
    1354              :  * Insert a tuple from a slot into table AM routine.
    1355              :  *
    1356              :  * The options bitmask allows the caller to specify options that may change the
    1357              :  * behaviour of the AM. The AM will ignore options that it does not support.
    1358              :  *
    1359              :  * If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
    1360              :  * free space in the relation. This can save some cycles when we know the
    1361              :  * relation is new and doesn't contain useful amounts of free space.
    1362              :  * TABLE_INSERT_SKIP_FSM is commonly passed directly to
    1363              :  * RelationGetBufferForTuple. See that method for more information.
    1364              :  *
    1365              :  * TABLE_INSERT_FROZEN should only be specified for inserts into
    1366              :  * relation storage created during the current subtransaction and when
    1367              :  * there are no prior snapshots or pre-existing portals open.
    1368              :  * This causes rows to be frozen, which is an MVCC violation and
    1369              :  * requires explicit options chosen by user.
    1370              :  *
    1371              :  * TABLE_INSERT_NO_LOGICAL force-disables the emitting of logical decoding
    1372              :  * information for the tuple. This should solely be used during table rewrites
    1373              :  * where RelationIsLogicallyLogged(relation) is not yet accurate for the new
    1374              :  * relation.
    1375              :  *
    1376              :  * Note that most of these options will be applied when inserting into the
    1377              :  * heap's TOAST table, too, if the tuple requires any out-of-line data.
    1378              :  *
    1379              :  * The BulkInsertState object (if any; bistate can be NULL for default
    1380              :  * behavior) is also just passed through to RelationGetBufferForTuple. If
    1381              :  * `bistate` is provided, table_finish_bulk_insert() needs to be called.
    1382              :  *
    1383              :  * On return the slot's tts_tid and tts_tableOid are updated to reflect the
    1384              :  * insertion. But note that any toasting of fields within the slot is NOT
    1385              :  * reflected in the slots contents.
    1386              :  */
    1387              : static inline void
    1388      7448691 : table_tuple_insert(Relation rel, TupleTableSlot *slot, CommandId cid,
    1389              :                    int options, BulkInsertStateData *bistate)
    1390              : {
    1391      7448691 :     rel->rd_tableam->tuple_insert(rel, slot, cid, options,
    1392              :                                   bistate);
    1393      7448673 : }
    1394              : 
    1395              : /*
    1396              :  * Perform a "speculative insertion". These can be backed out afterwards
    1397              :  * without aborting the whole transaction.  Other sessions can wait for the
    1398              :  * speculative insertion to be confirmed, turning it into a regular tuple, or
    1399              :  * aborted, as if it never existed.  Speculatively inserted tuples behave as
    1400              :  * "value locks" of short duration, used to implement INSERT .. ON CONFLICT.
    1401              :  *
    1402              :  * A transaction having performed a speculative insertion has to either abort,
    1403              :  * or finish the speculative insertion with
    1404              :  * table_tuple_complete_speculative(succeeded = ...).
    1405              :  */
    1406              : static inline void
    1407         2133 : table_tuple_insert_speculative(Relation rel, TupleTableSlot *slot,
    1408              :                                CommandId cid, int options,
    1409              :                                BulkInsertStateData *bistate,
    1410              :                                uint32 specToken)
    1411              : {
    1412         2133 :     rel->rd_tableam->tuple_insert_speculative(rel, slot, cid, options,
    1413              :                                               bistate, specToken);
    1414         2133 : }
    1415              : 
    1416              : /*
    1417              :  * Complete "speculative insertion" started in the same transaction. If
    1418              :  * succeeded is true, the tuple is fully inserted, if false, it's removed.
    1419              :  */
    1420              : static inline void
    1421         2130 : table_tuple_complete_speculative(Relation rel, TupleTableSlot *slot,
    1422              :                                  uint32 specToken, bool succeeded)
    1423              : {
    1424         2130 :     rel->rd_tableam->tuple_complete_speculative(rel, slot, specToken,
    1425              :                                                 succeeded);
    1426         2130 : }
    1427              : 
    1428              : /*
    1429              :  * Insert multiple tuples into a table.
    1430              :  *
    1431              :  * This is like table_tuple_insert(), but inserts multiple tuples in one
    1432              :  * operation. That's often faster than calling table_tuple_insert() in a loop,
    1433              :  * because e.g. the AM can reduce WAL logging and page locking overhead.
    1434              :  *
    1435              :  * Except for taking `nslots` tuples as input, and an array of TupleTableSlots
    1436              :  * in `slots`, the parameters for table_multi_insert() are the same as for
    1437              :  * table_tuple_insert().
    1438              :  *
    1439              :  * Note: this leaks memory into the current memory context. You can create a
    1440              :  * temporary context before calling this, if that's a problem.
    1441              :  */
    1442              : static inline void
    1443         1242 : table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots,
    1444              :                    CommandId cid, int options, BulkInsertStateData *bistate)
    1445              : {
    1446         1242 :     rel->rd_tableam->multi_insert(rel, slots, nslots,
    1447              :                                   cid, options, bistate);
    1448         1242 : }
    1449              : 
    1450              : /*
    1451              :  * Delete a tuple.
    1452              :  *
    1453              :  * NB: do not call this directly unless prepared to deal with
    1454              :  * concurrent-update conditions.  Use simple_table_tuple_delete instead.
    1455              :  *
    1456              :  * Input parameters:
    1457              :  *  rel - table to be modified (caller must hold suitable lock)
    1458              :  *  tid - TID of tuple to be deleted
    1459              :  *  cid - delete command ID (used for visibility test, and stored into
    1460              :  *      cmax if successful)
    1461              :  *  crosscheck - if not InvalidSnapshot, also check tuple against this
    1462              :  *  wait - true if should wait for any conflicting update to commit/abort
    1463              :  *  changingPart - true iff the tuple is being moved to another partition
    1464              :  *      table due to an update of the partition key. Otherwise, false.
    1465              :  *
    1466              :  * Output parameters:
    1467              :  *  tmfd - filled in failure cases (see below)
    1468              :  *
    1469              :  * Normal, successful return value is TM_Ok, which means we did actually
    1470              :  * delete it.  Failure return codes are TM_SelfModified, TM_Updated, and
    1471              :  * TM_BeingModified (the last only possible if wait == false).
    1472              :  *
    1473              :  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
    1474              :  * t_xmax, and, if possible, t_cmax.  See comments for struct
    1475              :  * TM_FailureData for additional info.
    1476              :  */
    1477              : static inline TM_Result
    1478       866483 : table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
    1479              :                    Snapshot snapshot, Snapshot crosscheck, bool wait,
    1480              :                    TM_FailureData *tmfd, bool changingPart)
    1481              : {
    1482       866483 :     return rel->rd_tableam->tuple_delete(rel, tid, cid,
    1483              :                                          snapshot, crosscheck,
    1484              :                                          wait, tmfd, changingPart);
    1485              : }
    1486              : 
    1487              : /*
    1488              :  * Update a tuple.
    1489              :  *
    1490              :  * NB: do not call this directly unless you are prepared to deal with
    1491              :  * concurrent-update conditions.  Use simple_table_tuple_update instead.
    1492              :  *
    1493              :  * Input parameters:
    1494              :  *  rel - table to be modified (caller must hold suitable lock)
    1495              :  *  otid - TID of old tuple to be replaced
    1496              :  *  cid - update command ID (used for visibility test, and stored into
    1497              :  *      cmax/cmin if successful)
    1498              :  *  crosscheck - if not InvalidSnapshot, also check old tuple against this
    1499              :  *  wait - true if should wait for any conflicting update to commit/abort
    1500              :  *
    1501              :  * Output parameters:
    1502              :  *  slot - newly constructed tuple data to store
    1503              :  *  tmfd - filled in failure cases (see below)
    1504              :  *  lockmode - filled with lock mode acquired on tuple
    1505              :  *  update_indexes - in success cases this is set if new index entries
    1506              :  *      are required for this tuple; see TU_UpdateIndexes
    1507              :  *
    1508              :  * Normal, successful return value is TM_Ok, which means we did actually
    1509              :  * update it.  Failure return codes are TM_SelfModified, TM_Updated, and
    1510              :  * TM_BeingModified (the last only possible if wait == false).
    1511              :  *
    1512              :  * On success, the slot's tts_tid and tts_tableOid are updated to match the new
    1513              :  * stored tuple; in particular, slot->tts_tid is set to the TID where the
    1514              :  * new tuple was inserted, and its HEAP_ONLY_TUPLE flag is set iff a HOT
    1515              :  * update was done.  However, any TOAST changes in the new tuple's
    1516              :  * data are not reflected into *newtup.
    1517              :  *
    1518              :  * In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
    1519              :  * t_xmax, and, if possible, t_cmax.  See comments for struct TM_FailureData
    1520              :  * for additional info.
    1521              :  */
    1522              : static inline TM_Result
    1523       198859 : table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot,
    1524              :                    CommandId cid, Snapshot snapshot, Snapshot crosscheck,
    1525              :                    bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode,
    1526              :                    TU_UpdateIndexes *update_indexes)
    1527              : {
    1528       198859 :     return rel->rd_tableam->tuple_update(rel, otid, slot,
    1529              :                                          cid, snapshot, crosscheck,
    1530              :                                          wait, tmfd,
    1531              :                                          lockmode, update_indexes);
    1532              : }
    1533              : 
    1534              : /*
    1535              :  * Lock a tuple in the specified mode.
    1536              :  *
    1537              :  * Input parameters:
    1538              :  *  rel: relation containing tuple (caller must hold suitable lock)
    1539              :  *  tid: TID of tuple to lock (updated if an update chain was followed)
    1540              :  *  snapshot: snapshot to use for visibility determinations
    1541              :  *  cid: current command ID (used for visibility test, and stored into
    1542              :  *      tuple's cmax if lock is successful)
    1543              :  *  mode: lock mode desired
    1544              :  *  wait_policy: what to do if tuple lock is not available
    1545              :  *  flags:
    1546              :  *      If TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS, follow the update chain to
    1547              :  *      also lock descendant tuples if lock modes don't conflict.
    1548              :  *      If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock
    1549              :  *      latest version.
    1550              :  *
    1551              :  * Output parameters:
    1552              :  *  *slot: contains the target tuple
    1553              :  *  *tmfd: filled in failure cases (see below)
    1554              :  *
    1555              :  * Function result may be:
    1556              :  *  TM_Ok: lock was successfully acquired
    1557              :  *  TM_Invisible: lock failed because tuple was never visible to us
    1558              :  *  TM_SelfModified: lock failed because tuple updated by self
    1559              :  *  TM_Updated: lock failed because tuple updated by other xact
    1560              :  *  TM_Deleted: lock failed because tuple deleted by other xact
    1561              :  *  TM_WouldBlock: lock couldn't be acquired and wait_policy is skip
    1562              :  *
    1563              :  * In the failure cases other than TM_Invisible and TM_Deleted, the routine
    1564              :  * fills *tmfd with the tuple's t_ctid, t_xmax, and, if possible, t_cmax.
    1565              :  * Additionally, in both success and failure cases, tmfd->traversed is set if
    1566              :  * an update chain was followed.  See comments for struct TM_FailureData for
    1567              :  * additional info.
    1568              :  */
    1569              : static inline TM_Result
    1570       158325 : table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
    1571              :                  TupleTableSlot *slot, CommandId cid, LockTupleMode mode,
    1572              :                  LockWaitPolicy wait_policy, uint8 flags,
    1573              :                  TM_FailureData *tmfd)
    1574              : {
    1575       158325 :     return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot,
    1576              :                                        cid, mode, wait_policy,
    1577              :                                        flags, tmfd);
    1578              : }
    1579              : 
    1580              : /*
    1581              :  * Perform operations necessary to complete insertions made via
    1582              :  * tuple_insert and multi_insert with a BulkInsertState specified.
    1583              :  */
    1584              : static inline void
    1585         2502 : table_finish_bulk_insert(Relation rel, int options)
    1586              : {
    1587              :     /* optional callback */
    1588         2502 :     if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
    1589            0 :         rel->rd_tableam->finish_bulk_insert(rel, options);
    1590         2502 : }
    1591              : 
    1592              : 
    1593              : /* ------------------------------------------------------------------------
    1594              :  * DDL related functionality.
    1595              :  * ------------------------------------------------------------------------
    1596              :  */
    1597              : 
    1598              : /*
    1599              :  * Create storage for `rel` in `newrlocator`, with persistence set to
    1600              :  * `persistence`.
    1601              :  *
    1602              :  * This is used both during relation creation and various DDL operations to
    1603              :  * create new rel storage that can be filled from scratch.  When creating
    1604              :  * new storage for an existing relfilelocator, this should be called before the
    1605              :  * relcache entry has been updated.
    1606              :  *
    1607              :  * *freezeXid, *minmulti are set to the xid / multixact horizon for the table
    1608              :  * that pg_class.{relfrozenxid, relminmxid} have to be set to.
    1609              :  */
    1610              : static inline void
    1611        34075 : table_relation_set_new_filelocator(Relation rel,
    1612              :                                    const RelFileLocator *newrlocator,
    1613              :                                    char persistence,
    1614              :                                    TransactionId *freezeXid,
    1615              :                                    MultiXactId *minmulti)
    1616              : {
    1617        34075 :     rel->rd_tableam->relation_set_new_filelocator(rel, newrlocator,
    1618              :                                                   persistence, freezeXid,
    1619              :                                                   minmulti);
    1620        34075 : }
    1621              : 
    1622              : /*
    1623              :  * Remove all table contents from `rel`, in a non-transactional manner.
    1624              :  * Non-transactional meaning that there's no need to support rollbacks. This
    1625              :  * commonly only is used to perform truncations for relation storage created in
    1626              :  * the current transaction.
    1627              :  */
    1628              : static inline void
    1629          312 : table_relation_nontransactional_truncate(Relation rel)
    1630              : {
    1631          312 :     rel->rd_tableam->relation_nontransactional_truncate(rel);
    1632          312 : }
    1633              : 
    1634              : /*
    1635              :  * Copy data from `rel` into the new relfilelocator `newrlocator`. The new
    1636              :  * relfilelocator may not have storage associated before this function is
    1637              :  * called. This is only supposed to be used for low level operations like
    1638              :  * changing a relation's tablespace.
    1639              :  */
    1640              : static inline void
    1641           49 : table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator)
    1642              : {
    1643           49 :     rel->rd_tableam->relation_copy_data(rel, newrlocator);
    1644           49 : }
    1645              : 
    1646              : /*
    1647              :  * Copy data from `OldTable` into `NewTable`, as part of a CLUSTER or VACUUM
    1648              :  * FULL.
    1649              :  *
    1650              :  * Additional Input parameters:
    1651              :  * - use_sort - if true, the table contents are sorted appropriate for
    1652              :  *   `OldIndex`; if false and OldIndex is not InvalidOid, the data is copied
    1653              :  *   in that index's order; if false and OldIndex is InvalidOid, no sorting is
    1654              :  *   performed
    1655              :  * - OldIndex - see use_sort
    1656              :  * - OldestXmin - computed by vacuum_get_cutoffs(), even when
    1657              :  *   not needed for the relation's AM
    1658              :  * - *xid_cutoff - ditto
    1659              :  * - *multi_cutoff - ditto
    1660              :  *
    1661              :  * Output parameters:
    1662              :  * - *xid_cutoff - rel's new relfrozenxid value, may be invalid
    1663              :  * - *multi_cutoff - rel's new relminmxid value, may be invalid
    1664              :  * - *tups_vacuumed - stats, for logging, if appropriate for AM
    1665              :  * - *tups_recently_dead - stats, for logging, if appropriate for AM
    1666              :  */
    1667              : static inline void
    1668          319 : table_relation_copy_for_cluster(Relation OldTable, Relation NewTable,
    1669              :                                 Relation OldIndex,
    1670              :                                 bool use_sort,
    1671              :                                 TransactionId OldestXmin,
    1672              :                                 TransactionId *xid_cutoff,
    1673              :                                 MultiXactId *multi_cutoff,
    1674              :                                 double *num_tuples,
    1675              :                                 double *tups_vacuumed,
    1676              :                                 double *tups_recently_dead)
    1677              : {
    1678          319 :     OldTable->rd_tableam->relation_copy_for_cluster(OldTable, NewTable, OldIndex,
    1679              :                                                     use_sort, OldestXmin,
    1680              :                                                     xid_cutoff, multi_cutoff,
    1681              :                                                     num_tuples, tups_vacuumed,
    1682              :                                                     tups_recently_dead);
    1683          319 : }
    1684              : 
    1685              : /*
    1686              :  * Perform VACUUM on the relation. The VACUUM can be triggered by a user or by
    1687              :  * autovacuum. The specific actions performed by the AM will depend heavily on
    1688              :  * the individual AM.
    1689              :  *
    1690              :  * On entry a transaction needs to already been established, and the
    1691              :  * table is locked with a ShareUpdateExclusive lock.
    1692              :  *
    1693              :  * Note that neither VACUUM FULL (and CLUSTER), nor ANALYZE go through this
    1694              :  * routine, even if (for ANALYZE) it is part of the same VACUUM command.
    1695              :  */
    1696              : static inline void
    1697       110297 : table_relation_vacuum(Relation rel, const VacuumParams params,
    1698              :                       BufferAccessStrategy bstrategy)
    1699              : {
    1700       110297 :     rel->rd_tableam->relation_vacuum(rel, params, bstrategy);
    1701       110297 : }
    1702              : 
    1703              : /*
    1704              :  * Prepare to analyze the next block in the read stream. The scan needs to
    1705              :  * have been  started with table_beginscan_analyze().  Note that this routine
    1706              :  * might acquire resources like locks that are held until
    1707              :  * table_scan_analyze_next_tuple() returns false.
    1708              :  *
    1709              :  * Returns false if block is unsuitable for sampling, true otherwise.
    1710              :  */
    1711              : static inline bool
    1712        79152 : table_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
    1713              : {
    1714        79152 :     return scan->rs_rd->rd_tableam->scan_analyze_next_block(scan, stream);
    1715              : }
    1716              : 
    1717              : /*
    1718              :  * Iterate over tuples in the block selected with
    1719              :  * table_scan_analyze_next_block() (which needs to have returned true, and
    1720              :  * this routine may not have returned false for the same block before). If a
    1721              :  * tuple that's suitable for sampling is found, true is returned and a tuple
    1722              :  * is stored in `slot`.
    1723              :  *
    1724              :  * *liverows and *deadrows are incremented according to the encountered
    1725              :  * tuples.
    1726              :  */
    1727              : static inline bool
    1728      5505998 : table_scan_analyze_next_tuple(TableScanDesc scan,
    1729              :                               double *liverows, double *deadrows,
    1730              :                               TupleTableSlot *slot)
    1731              : {
    1732      5505998 :     return scan->rs_rd->rd_tableam->scan_analyze_next_tuple(scan,
    1733              :                                                             liverows, deadrows,
    1734              :                                                             slot);
    1735              : }
    1736              : 
    1737              : /*
    1738              :  * table_index_build_scan - scan the table to find tuples to be indexed
    1739              :  *
    1740              :  * This is called back from an access-method-specific index build procedure
    1741              :  * after the AM has done whatever setup it needs.  The parent table relation
    1742              :  * is scanned to find tuples that should be entered into the index.  Each
    1743              :  * such tuple is passed to the AM's callback routine, which does the right
    1744              :  * things to add it to the new index.  After we return, the AM's index
    1745              :  * build procedure does whatever cleanup it needs.
    1746              :  *
    1747              :  * The total count of live tuples is returned.  This is for updating pg_class
    1748              :  * statistics.  (It's annoying not to be able to do that here, but we want to
    1749              :  * merge that update with others; see index_update_stats.)  Note that the
    1750              :  * index AM itself must keep track of the number of index tuples; we don't do
    1751              :  * so here because the AM might reject some of the tuples for its own reasons,
    1752              :  * such as being unable to store NULLs.
    1753              :  *
    1754              :  * If 'progress', the PROGRESS_SCAN_BLOCKS_TOTAL counter is updated when
    1755              :  * starting the scan, and PROGRESS_SCAN_BLOCKS_DONE is updated as we go along.
    1756              :  *
    1757              :  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
    1758              :  * any potentially broken HOT chains.  Currently, we set this if there are any
    1759              :  * RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without trying
    1760              :  * very hard to detect whether they're really incompatible with the chain tip.
    1761              :  * This only really makes sense for heap AM, it might need to be generalized
    1762              :  * for other AMs later.
    1763              :  */
    1764              : static inline double
    1765        27865 : table_index_build_scan(Relation table_rel,
    1766              :                        Relation index_rel,
    1767              :                        IndexInfo *index_info,
    1768              :                        bool allow_sync,
    1769              :                        bool progress,
    1770              :                        IndexBuildCallback callback,
    1771              :                        void *callback_state,
    1772              :                        TableScanDesc scan)
    1773              : {
    1774        27865 :     return table_rel->rd_tableam->index_build_range_scan(table_rel,
    1775              :                                                          index_rel,
    1776              :                                                          index_info,
    1777              :                                                          allow_sync,
    1778              :                                                          false,
    1779              :                                                          progress,
    1780              :                                                          0,
    1781              :                                                          InvalidBlockNumber,
    1782              :                                                          callback,
    1783              :                                                          callback_state,
    1784              :                                                          scan);
    1785              : }
    1786              : 
    1787              : /*
    1788              :  * As table_index_build_scan(), except that instead of scanning the complete
    1789              :  * table, only the given number of blocks are scanned.  Scan to end-of-rel can
    1790              :  * be signaled by passing InvalidBlockNumber as numblocks.  Note that
    1791              :  * restricting the range to scan cannot be done when requesting syncscan.
    1792              :  *
    1793              :  * When "anyvisible" mode is requested, all tuples visible to any transaction
    1794              :  * are indexed and counted as live, including those inserted or deleted by
    1795              :  * transactions that are still in progress.
    1796              :  */
    1797              : static inline double
    1798         1472 : table_index_build_range_scan(Relation table_rel,
    1799              :                              Relation index_rel,
    1800              :                              IndexInfo *index_info,
    1801              :                              bool allow_sync,
    1802              :                              bool anyvisible,
    1803              :                              bool progress,
    1804              :                              BlockNumber start_blockno,
    1805              :                              BlockNumber numblocks,
    1806              :                              IndexBuildCallback callback,
    1807              :                              void *callback_state,
    1808              :                              TableScanDesc scan)
    1809              : {
    1810         1472 :     return table_rel->rd_tableam->index_build_range_scan(table_rel,
    1811              :                                                          index_rel,
    1812              :                                                          index_info,
    1813              :                                                          allow_sync,
    1814              :                                                          anyvisible,
    1815              :                                                          progress,
    1816              :                                                          start_blockno,
    1817              :                                                          numblocks,
    1818              :                                                          callback,
    1819              :                                                          callback_state,
    1820              :                                                          scan);
    1821              : }
    1822              : 
    1823              : /*
    1824              :  * table_index_validate_scan - second table scan for concurrent index build
    1825              :  *
    1826              :  * See validate_index() for an explanation.
    1827              :  */
    1828              : static inline void
    1829          356 : table_index_validate_scan(Relation table_rel,
    1830              :                           Relation index_rel,
    1831              :                           IndexInfo *index_info,
    1832              :                           Snapshot snapshot,
    1833              :                           ValidateIndexState *state)
    1834              : {
    1835          356 :     table_rel->rd_tableam->index_validate_scan(table_rel,
    1836              :                                                index_rel,
    1837              :                                                index_info,
    1838              :                                                snapshot,
    1839              :                                                state);
    1840          356 : }
    1841              : 
    1842              : 
    1843              : /* ----------------------------------------------------------------------------
    1844              :  * Miscellaneous functionality
    1845              :  * ----------------------------------------------------------------------------
    1846              :  */
    1847              : 
    1848              : /*
    1849              :  * Return the current size of `rel` in bytes. If `forkNumber` is
    1850              :  * InvalidForkNumber, return the relation's overall size, otherwise the size
    1851              :  * for the indicated fork.
    1852              :  *
    1853              :  * Note that the overall size might not be the equivalent of the sum of sizes
    1854              :  * for the individual forks for some AMs, e.g. because the AMs storage does
    1855              :  * not neatly map onto the builtin types of forks.
    1856              :  */
    1857              : static inline uint64
    1858      1709615 : table_relation_size(Relation rel, ForkNumber forkNumber)
    1859              : {
    1860      1709615 :     return rel->rd_tableam->relation_size(rel, forkNumber);
    1861              : }
    1862              : 
    1863              : /*
    1864              :  * table_relation_needs_toast_table - does this relation need a toast table?
    1865              :  */
    1866              : static inline bool
    1867        23384 : table_relation_needs_toast_table(Relation rel)
    1868              : {
    1869        23384 :     return rel->rd_tableam->relation_needs_toast_table(rel);
    1870              : }
    1871              : 
    1872              : /*
    1873              :  * Return the OID of the AM that should be used to implement the TOAST table
    1874              :  * for this relation.
    1875              :  */
    1876              : static inline Oid
    1877         9046 : table_relation_toast_am(Relation rel)
    1878              : {
    1879         9046 :     return rel->rd_tableam->relation_toast_am(rel);
    1880              : }
    1881              : 
    1882              : /*
    1883              :  * Fetch all or part of a TOAST value from a TOAST table.
    1884              :  *
    1885              :  * If this AM is never used to implement a TOAST table, then this callback
    1886              :  * is not needed. But, if toasted values are ever stored in a table of this
    1887              :  * type, then you will need this callback.
    1888              :  *
    1889              :  * toastrel is the relation in which the toasted value is stored.
    1890              :  *
    1891              :  * valueid identifies which toast value is to be fetched. For the heap,
    1892              :  * this corresponds to the values stored in the chunk_id column.
    1893              :  *
    1894              :  * attrsize is the total size of the toast value to be fetched.
    1895              :  *
    1896              :  * sliceoffset is the offset within the toast value of the first byte that
    1897              :  * should be fetched.
    1898              :  *
    1899              :  * slicelength is the number of bytes from the toast value that should be
    1900              :  * fetched.
    1901              :  *
    1902              :  * result is caller-allocated space into which the fetched bytes should be
    1903              :  * stored.
    1904              :  */
    1905              : static inline void
    1906        15044 : table_relation_fetch_toast_slice(Relation toastrel, Oid valueid,
    1907              :                                  int32 attrsize, int32 sliceoffset,
    1908              :                                  int32 slicelength, varlena *result)
    1909              : {
    1910        15044 :     toastrel->rd_tableam->relation_fetch_toast_slice(toastrel, valueid,
    1911              :                                                      attrsize,
    1912              :                                                      sliceoffset, slicelength,
    1913              :                                                      result);
    1914        15044 : }
    1915              : 
    1916              : 
    1917              : /* ----------------------------------------------------------------------------
    1918              :  * Planner related functionality
    1919              :  * ----------------------------------------------------------------------------
    1920              :  */
    1921              : 
    1922              : /*
    1923              :  * Estimate the current size of the relation, as an AM specific workhorse for
    1924              :  * estimate_rel_size(). Look there for an explanation of the parameters.
    1925              :  */
    1926              : static inline void
    1927       252507 : table_relation_estimate_size(Relation rel, int32 *attr_widths,
    1928              :                              BlockNumber *pages, double *tuples,
    1929              :                              double *allvisfrac)
    1930              : {
    1931       252507 :     rel->rd_tableam->relation_estimate_size(rel, attr_widths, pages, tuples,
    1932              :                                             allvisfrac);
    1933       252507 : }
    1934              : 
    1935              : 
    1936              : /* ----------------------------------------------------------------------------
    1937              :  * Executor related functionality
    1938              :  * ----------------------------------------------------------------------------
    1939              :  */
    1940              : 
    1941              : /*
    1942              :  * Fetch / check / return tuples as part of a bitmap table scan. `scan` needs
    1943              :  * to have been started via table_beginscan_bm(). Fetch the next tuple of a
    1944              :  * bitmap table scan into `slot` and return true if a visible tuple was found,
    1945              :  * false otherwise.
    1946              :  *
    1947              :  * `recheck` is set by the table AM to indicate whether or not the tuple in
    1948              :  * `slot` should be rechecked. Tuples from lossy pages will always need to be
    1949              :  * rechecked, but some non-lossy pages' tuples may also require recheck.
    1950              :  *
    1951              :  * `lossy_pages` is incremented if the block's representation in the bitmap is
    1952              :  * lossy; otherwise, `exact_pages` is incremented.
    1953              :  */
    1954              : static inline bool
    1955      3465091 : table_scan_bitmap_next_tuple(TableScanDesc scan,
    1956              :                              TupleTableSlot *slot,
    1957              :                              bool *recheck,
    1958              :                              uint64 *lossy_pages,
    1959              :                              uint64 *exact_pages)
    1960              : {
    1961      3465091 :     return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
    1962              :                                                            slot,
    1963              :                                                            recheck,
    1964              :                                                            lossy_pages,
    1965              :                                                            exact_pages);
    1966              : }
    1967              : 
    1968              : /*
    1969              :  * Prepare to fetch tuples from the next block in a sample scan. Returns false
    1970              :  * if the sample scan is finished, true otherwise. `scan` needs to have been
    1971              :  * started via table_beginscan_sampling().
    1972              :  *
    1973              :  * This will call the TsmRoutine's NextSampleBlock() callback if necessary
    1974              :  * (i.e. NextSampleBlock is not NULL), or perform a sequential scan over the
    1975              :  * underlying relation.
    1976              :  */
    1977              : static inline bool
    1978         6457 : table_scan_sample_next_block(TableScanDesc scan,
    1979              :                              SampleScanState *scanstate)
    1980              : {
    1981         6457 :     return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
    1982              : }
    1983              : 
    1984              : /*
    1985              :  * Fetch the next sample tuple into `slot` and return true if a visible tuple
    1986              :  * was found, false otherwise. table_scan_sample_next_block() needs to
    1987              :  * previously have selected a block (i.e. returned true), and no previous
    1988              :  * table_scan_sample_next_tuple() for the same block may have returned false.
    1989              :  *
    1990              :  * This will call the TsmRoutine's NextSampleTuple() callback.
    1991              :  */
    1992              : static inline bool
    1993       126949 : table_scan_sample_next_tuple(TableScanDesc scan,
    1994              :                              SampleScanState *scanstate,
    1995              :                              TupleTableSlot *slot)
    1996              : {
    1997       126949 :     return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate,
    1998              :                                                            slot);
    1999              : }
    2000              : 
    2001              : 
    2002              : /* ----------------------------------------------------------------------------
    2003              :  * Functions to make modifications a bit simpler.
    2004              :  * ----------------------------------------------------------------------------
    2005              :  */
    2006              : 
    2007              : extern void simple_table_tuple_insert(Relation rel, TupleTableSlot *slot);
    2008              : extern void simple_table_tuple_delete(Relation rel, ItemPointer tid,
    2009              :                                       Snapshot snapshot);
    2010              : extern void simple_table_tuple_update(Relation rel, ItemPointer otid,
    2011              :                                       TupleTableSlot *slot, Snapshot snapshot,
    2012              :                                       TU_UpdateIndexes *update_indexes);
    2013              : 
    2014              : 
    2015              : /* ----------------------------------------------------------------------------
    2016              :  * Helper functions to implement parallel scans for block oriented AMs.
    2017              :  * ----------------------------------------------------------------------------
    2018              :  */
    2019              : 
    2020              : extern Size table_block_parallelscan_estimate(Relation rel);
    2021              : extern Size table_block_parallelscan_initialize(Relation rel,
    2022              :                                                 ParallelTableScanDesc pscan);
    2023              : extern void table_block_parallelscan_reinitialize(Relation rel,
    2024              :                                                   ParallelTableScanDesc pscan);
    2025              : extern BlockNumber table_block_parallelscan_nextpage(Relation rel,
    2026              :                                                      ParallelBlockTableScanWorker pbscanwork,
    2027              :                                                      ParallelBlockTableScanDesc pbscan);
    2028              : extern void table_block_parallelscan_startblock_init(Relation rel,
    2029              :                                                      ParallelBlockTableScanWorker pbscanwork,
    2030              :                                                      ParallelBlockTableScanDesc pbscan,
    2031              :                                                      BlockNumber startblock,
    2032              :                                                      BlockNumber numblocks);
    2033              : 
    2034              : 
    2035              : /* ----------------------------------------------------------------------------
    2036              :  * Helper functions to implement relation sizing for block oriented AMs.
    2037              :  * ----------------------------------------------------------------------------
    2038              :  */
    2039              : 
    2040              : extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
    2041              : extern void table_block_relation_estimate_size(Relation rel,
    2042              :                                                int32 *attr_widths,
    2043              :                                                BlockNumber *pages,
    2044              :                                                double *tuples,
    2045              :                                                double *allvisfrac,
    2046              :                                                Size overhead_bytes_per_tuple,
    2047              :                                                Size usable_bytes_per_page);
    2048              : 
    2049              : /* ----------------------------------------------------------------------------
    2050              :  * Functions in tableamapi.c
    2051              :  * ----------------------------------------------------------------------------
    2052              :  */
    2053              : 
    2054              : extern const TableAmRoutine *GetTableAmRoutine(Oid amhandler);
    2055              : 
    2056              : /* ----------------------------------------------------------------------------
    2057              :  * Functions in heapam_handler.c
    2058              :  * ----------------------------------------------------------------------------
    2059              :  */
    2060              : 
    2061              : extern const TableAmRoutine *GetHeapamTableAmRoutine(void);
    2062              : 
    2063              : #endif                          /* TABLEAM_H */
        

Generated by: LCOV version 2.0-1