LCOV - code coverage report
Current view: top level - src/backend/access/index - genam.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 94.7 % 244 231
Test Date: 2026-04-07 14:16:30 Functions: 100.0 % 15 15
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * genam.c
       4              :  *    general index access method routines
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/backend/access/index/genam.c
      12              :  *
      13              :  * NOTES
      14              :  *    many of the old access method routines have been turned into
      15              :  *    macros and moved to genam.h -cim 4/30/91
      16              :  *
      17              :  *-------------------------------------------------------------------------
      18              :  */
      19              : 
      20              : #include "postgres.h"
      21              : 
      22              : #include "access/genam.h"
      23              : #include "access/heapam.h"
      24              : #include "access/relscan.h"
      25              : #include "access/tableam.h"
      26              : #include "access/transam.h"
      27              : #include "catalog/index.h"
      28              : #include "lib/stringinfo.h"
      29              : #include "miscadmin.h"
      30              : #include "storage/bufmgr.h"
      31              : #include "storage/procarray.h"
      32              : #include "utils/acl.h"
      33              : #include "utils/injection_point.h"
      34              : #include "utils/lsyscache.h"
      35              : #include "utils/rel.h"
      36              : #include "utils/rls.h"
      37              : #include "utils/ruleutils.h"
      38              : #include "utils/snapmgr.h"
      39              : 
      40              : 
      41              : /* ----------------------------------------------------------------
      42              :  *      general access method routines
      43              :  *
      44              :  *      All indexed access methods use an identical scan structure.
      45              :  *      We don't know how the various AMs do locking, however, so we don't
      46              :  *      do anything about that here.
      47              :  *
      48              :  *      The intent is that an AM implementor will define a beginscan routine
      49              :  *      that calls RelationGetIndexScan, to fill in the scan, and then does
      50              :  *      whatever kind of locking he wants.
      51              :  *
      52              :  *      At the end of a scan, the AM's endscan routine undoes the locking,
      53              :  *      but does *not* call IndexScanEnd --- the higher-level index_endscan
      54              :  *      routine does that.  (We can't do it in the AM because index_endscan
      55              :  *      still needs to touch the IndexScanDesc after calling the AM.)
      56              :  *
      57              :  *      Because of this, the AM does not have a choice whether to call
      58              :  *      RelationGetIndexScan or not; its beginscan routine must return an
      59              :  *      object made by RelationGetIndexScan.  This is kinda ugly but not
      60              :  *      worth cleaning up now.
      61              :  * ----------------------------------------------------------------
      62              :  */
      63              : 
      64              : /* ----------------
      65              :  *  RelationGetIndexScan -- Create and fill an IndexScanDesc.
      66              :  *
      67              :  *      This routine creates an index scan structure and sets up initial
      68              :  *      contents for it.
      69              :  *
      70              :  *      Parameters:
      71              :  *              indexRelation -- index relation for scan.
      72              :  *              nkeys -- count of scan keys (index qual conditions).
      73              :  *              norderbys -- count of index order-by operators.
      74              :  *
      75              :  *      Returns:
      76              :  *              An initialized IndexScanDesc.
      77              :  * ----------------
      78              :  */
      79              : IndexScanDesc
      80      9859819 : RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
      81              : {
      82              :     IndexScanDesc scan;
      83              : 
      84      9859819 :     scan = palloc_object(IndexScanDescData);
      85              : 
      86      9859819 :     scan->heapRelation = NULL;   /* may be set later */
      87      9859819 :     scan->xs_heapfetch = NULL;
      88      9859819 :     scan->indexRelation = indexRelation;
      89      9859819 :     scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
      90      9859819 :     scan->numberOfKeys = nkeys;
      91      9859819 :     scan->numberOfOrderBys = norderbys;
      92              : 
      93              :     /*
      94              :      * We allocate key workspace here, but it won't get filled until amrescan.
      95              :      */
      96      9859819 :     if (nkeys > 0)
      97      9852051 :         scan->keyData = palloc_array(ScanKeyData, nkeys);
      98              :     else
      99         7768 :         scan->keyData = NULL;
     100      9859819 :     if (norderbys > 0)
     101          119 :         scan->orderByData = palloc_array(ScanKeyData, norderbys);
     102              :     else
     103      9859700 :         scan->orderByData = NULL;
     104              : 
     105      9859819 :     scan->xs_want_itup = false; /* may be set later */
     106              : 
     107              :     /*
     108              :      * During recovery we ignore killed tuples and don't bother to kill them
     109              :      * either. We do this because the xmin on the primary node could easily be
     110              :      * later than the xmin on the standby node, so that what the primary
     111              :      * thinks is killed is supposed to be visible on standby. So for correct
     112              :      * MVCC for queries during recovery we must ignore these hints and check
     113              :      * all tuples. Do *not* set ignore_killed_tuples to true when running in a
     114              :      * transaction that was started during recovery. xactStartedInRecovery
     115              :      * should not be altered by index AMs.
     116              :      */
     117      9859819 :     scan->kill_prior_tuple = false;
     118      9859819 :     scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
     119      9859819 :     scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
     120              : 
     121      9859819 :     scan->opaque = NULL;
     122      9859819 :     scan->instrument = NULL;
     123              : 
     124      9859819 :     scan->xs_itup = NULL;
     125      9859819 :     scan->xs_itupdesc = NULL;
     126      9859819 :     scan->xs_hitup = NULL;
     127      9859819 :     scan->xs_hitupdesc = NULL;
     128              : 
     129      9859819 :     return scan;
     130              : }
     131              : 
     132              : /* ----------------
     133              :  *  IndexScanEnd -- End an index scan.
     134              :  *
     135              :  *      This routine just releases the storage acquired by
     136              :  *      RelationGetIndexScan().  Any AM-level resources are
     137              :  *      assumed to already have been released by the AM's
     138              :  *      endscan routine.
     139              :  *
     140              :  *  Returns:
     141              :  *      None.
     142              :  * ----------------
     143              :  */
     144              : void
     145      9858497 : IndexScanEnd(IndexScanDesc scan)
     146              : {
     147      9858497 :     if (scan->keyData != NULL)
     148      9850752 :         pfree(scan->keyData);
     149      9858497 :     if (scan->orderByData != NULL)
     150          115 :         pfree(scan->orderByData);
     151              : 
     152      9858497 :     pfree(scan);
     153      9858497 : }
     154              : 
     155              : /*
     156              :  * BuildIndexValueDescription
     157              :  *
     158              :  * Construct a string describing the contents of an index entry, in the
     159              :  * form "(key_name, ...)=(key_value, ...)".  This is currently used
     160              :  * for building unique-constraint, exclusion-constraint error messages, and
     161              :  * logical replication conflict error messages so only key columns of the index
     162              :  * are checked and printed.
     163              :  *
     164              :  * Note that if the user does not have permissions to view all of the
     165              :  * columns involved then a NULL is returned.  Returning a partial key seems
     166              :  * unlikely to be useful and we have no way to know which of the columns the
     167              :  * user provided (unlike in ExecBuildSlotValueDescription).
     168              :  *
     169              :  * The passed-in values/nulls arrays are the "raw" input to the index AM,
     170              :  * e.g. results of FormIndexDatum --- this is not necessarily what is stored
     171              :  * in the index, but it's what the user perceives to be stored.
     172              :  *
     173              :  * Note: if you change anything here, check whether
     174              :  * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
     175              :  * change.
     176              :  */
     177              : char *
     178          725 : BuildIndexValueDescription(Relation indexRelation,
     179              :                            const Datum *values, const bool *isnull)
     180              : {
     181              :     StringInfoData buf;
     182              :     Form_pg_index idxrec;
     183              :     int         indnkeyatts;
     184              :     int         i;
     185              :     int         keyno;
     186          725 :     Oid         indexrelid = RelationGetRelid(indexRelation);
     187              :     Oid         indrelid;
     188              :     AclResult   aclresult;
     189              : 
     190          725 :     indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
     191              : 
     192              :     /*
     193              :      * Check permissions- if the user does not have access to view all of the
     194              :      * key columns then return NULL to avoid leaking data.
     195              :      *
     196              :      * First check if RLS is enabled for the relation.  If so, return NULL to
     197              :      * avoid leaking data.
     198              :      *
     199              :      * Next we need to check table-level SELECT access and then, if there is
     200              :      * no access there, check column-level permissions.
     201              :      */
     202          725 :     idxrec = indexRelation->rd_index;
     203          725 :     indrelid = idxrec->indrelid;
     204              :     Assert(indexrelid == idxrec->indexrelid);
     205              : 
     206              :     /* RLS check- if RLS is enabled then we don't return anything. */
     207          725 :     if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
     208            8 :         return NULL;
     209              : 
     210              :     /* Table-level SELECT is enough, if the user has it */
     211          717 :     aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
     212          717 :     if (aclresult != ACLCHECK_OK)
     213              :     {
     214              :         /*
     215              :          * No table-level access, so step through the columns in the index and
     216              :          * make sure the user has SELECT rights on all of them.
     217              :          */
     218           16 :         for (keyno = 0; keyno < indnkeyatts; keyno++)
     219              :         {
     220           16 :             AttrNumber  attnum = idxrec->indkey.values[keyno];
     221              : 
     222              :             /*
     223              :              * Note that if attnum == InvalidAttrNumber, then this is an index
     224              :              * based on an expression and we return no detail rather than try
     225              :              * to figure out what column(s) the expression includes and if the
     226              :              * user has SELECT rights on them.
     227              :              */
     228           32 :             if (attnum == InvalidAttrNumber ||
     229           16 :                 pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
     230              :                                       ACL_SELECT) != ACLCHECK_OK)
     231              :             {
     232              :                 /* No access, so clean up and return */
     233            8 :                 return NULL;
     234              :             }
     235              :         }
     236              :     }
     237              : 
     238          709 :     initStringInfo(&buf);
     239          709 :     appendStringInfo(&buf, "(%s)=(",
     240              :                      pg_get_indexdef_columns(indexrelid, true));
     241              : 
     242         1674 :     for (i = 0; i < indnkeyatts; i++)
     243              :     {
     244              :         char       *val;
     245              : 
     246          965 :         if (isnull[i])
     247           12 :             val = "null";
     248              :         else
     249              :         {
     250              :             Oid         foutoid;
     251              :             bool        typisvarlena;
     252              : 
     253              :             /*
     254              :              * The provided data is not necessarily of the type stored in the
     255              :              * index; rather it is of the index opclass's input type. So look
     256              :              * at rd_opcintype not the index tupdesc.
     257              :              *
     258              :              * Note: this is a bit shaky for opclasses that have pseudotype
     259              :              * input types such as ANYARRAY or RECORD.  Currently, the
     260              :              * typoutput functions associated with the pseudotypes will work
     261              :              * okay, but we might have to try harder in future.
     262              :              */
     263          953 :             getTypeOutputInfo(indexRelation->rd_opcintype[i],
     264              :                               &foutoid, &typisvarlena);
     265          953 :             val = OidOutputFunctionCall(foutoid, values[i]);
     266              :         }
     267              : 
     268          965 :         if (i > 0)
     269          256 :             appendStringInfoString(&buf, ", ");
     270          965 :         appendStringInfoString(&buf, val);
     271              :     }
     272              : 
     273          709 :     appendStringInfoChar(&buf, ')');
     274              : 
     275          709 :     return buf.data;
     276              : }
     277              : 
     278              : /*
     279              :  * Get the snapshotConflictHorizon from the table entries pointed to by the
     280              :  * index tuples being deleted using an AM-generic approach.
     281              :  *
     282              :  * This is a table_index_delete_tuples() shim used by index AMs that only need
     283              :  * to consult the tableam to get a snapshotConflictHorizon value, and only
     284              :  * expect to delete index tuples that are already known deletable (typically
     285              :  * due to having LP_DEAD bits set).  When a snapshotConflictHorizon value
     286              :  * isn't needed in index AM's deletion WAL record, it is safe for it to skip
     287              :  * calling here entirely.
     288              :  *
     289              :  * We assume that caller index AM uses the standard IndexTuple representation,
     290              :  * with table TIDs stored in the t_tid field.  We also expect (and assert)
     291              :  * that the line pointers on page for 'itemnos' offsets are already marked
     292              :  * LP_DEAD.
     293              :  */
     294              : TransactionId
     295            4 : index_compute_xid_horizon_for_tuples(Relation irel,
     296              :                                      Relation hrel,
     297              :                                      Buffer ibuf,
     298              :                                      OffsetNumber *itemnos,
     299              :                                      int nitems)
     300              : {
     301              :     TM_IndexDeleteOp delstate;
     302            4 :     TransactionId snapshotConflictHorizon = InvalidTransactionId;
     303            4 :     Page        ipage = BufferGetPage(ibuf);
     304              :     IndexTuple  itup;
     305              : 
     306              :     Assert(nitems > 0);
     307              : 
     308            4 :     delstate.irel = irel;
     309            4 :     delstate.iblknum = BufferGetBlockNumber(ibuf);
     310            4 :     delstate.bottomup = false;
     311            4 :     delstate.bottomupfreespace = 0;
     312            4 :     delstate.ndeltids = 0;
     313            4 :     delstate.deltids = palloc_array(TM_IndexDelete, nitems);
     314            4 :     delstate.status = palloc_array(TM_IndexStatus, nitems);
     315              : 
     316              :     /* identify what the index tuples about to be deleted point to */
     317          888 :     for (int i = 0; i < nitems; i++)
     318              :     {
     319          884 :         OffsetNumber offnum = itemnos[i];
     320              :         ItemId      iitemid;
     321              : 
     322          884 :         iitemid = PageGetItemId(ipage, offnum);
     323          884 :         itup = (IndexTuple) PageGetItem(ipage, iitemid);
     324              : 
     325              :         Assert(ItemIdIsDead(iitemid));
     326              : 
     327          884 :         ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
     328          884 :         delstate.deltids[i].id = delstate.ndeltids;
     329          884 :         delstate.status[i].idxoffnum = offnum;
     330          884 :         delstate.status[i].knowndeletable = true;   /* LP_DEAD-marked */
     331          884 :         delstate.status[i].promising = false;   /* unused */
     332          884 :         delstate.status[i].freespace = 0;   /* unused */
     333              : 
     334          884 :         delstate.ndeltids++;
     335              :     }
     336              : 
     337              :     /* determine the actual xid horizon */
     338            4 :     snapshotConflictHorizon = table_index_delete_tuples(hrel, &delstate);
     339              : 
     340              :     /* assert tableam agrees that all items are deletable */
     341              :     Assert(delstate.ndeltids == nitems);
     342              : 
     343            4 :     pfree(delstate.deltids);
     344            4 :     pfree(delstate.status);
     345              : 
     346            4 :     return snapshotConflictHorizon;
     347              : }
     348              : 
     349              : 
     350              : /* ----------------------------------------------------------------
     351              :  *      heap-or-index-scan access to system catalogs
     352              :  *
     353              :  *      These functions support system catalog accesses that normally use
     354              :  *      an index but need to be capable of being switched to heap scans
     355              :  *      if the system indexes are unavailable.
     356              :  *
     357              :  *      The specified scan keys must be compatible with the named index.
     358              :  *      Generally this means that they must constrain either all columns
     359              :  *      of the index, or the first K columns of an N-column index.
     360              :  *
     361              :  *      These routines could work with non-system tables, actually,
     362              :  *      but they're only useful when there is a known index to use with
     363              :  *      the given scan keys; so in practice they're only good for
     364              :  *      predetermined types of scans of system catalogs.
     365              :  * ----------------------------------------------------------------
     366              :  */
     367              : 
     368              : /*
     369              :  * systable_beginscan --- set up for heap-or-index scan
     370              :  *
     371              :  *  rel: catalog to scan, already opened and suitably locked
     372              :  *  indexId: OID of index to conditionally use
     373              :  *  indexOK: if false, forces a heap scan (see notes below)
     374              :  *  snapshot: time qual to use (NULL for a recent catalog snapshot)
     375              :  *  nkeys, key: scan keys
     376              :  *
     377              :  * The attribute numbers in the scan key should be set for the heap case.
     378              :  * If we choose to index, we convert them to 1..n to reference the index
     379              :  * columns.  Note this means there must be one scankey qualification per
     380              :  * index column!  This is checked by the Asserts in the normal, index-using
     381              :  * case, but won't be checked if the heapscan path is taken.
     382              :  *
     383              :  * The routine checks the normal cases for whether an indexscan is safe,
     384              :  * but caller can make additional checks and pass indexOK=false if needed.
     385              :  * In standard case indexOK can simply be constant TRUE.
     386              :  */
     387              : SysScanDesc
     388      9783139 : systable_beginscan(Relation heapRelation,
     389              :                    Oid indexId,
     390              :                    bool indexOK,
     391              :                    Snapshot snapshot,
     392              :                    int nkeys, ScanKey key)
     393              : {
     394              :     SysScanDesc sysscan;
     395              :     Relation    irel;
     396              : 
     397              :     /*
     398              :      * If this backend promised that it won't access shared catalogs during
     399              :      * logical decoding, this it the right place to verify.
     400              :      */
     401              :     Assert(!HistoricSnapshotActive() ||
     402              :            accessSharedCatalogsInDecoding ||
     403              :            !heapRelation->rd_rel->relisshared);
     404              : 
     405      9783139 :     if (indexOK &&
     406      9633640 :         !IgnoreSystemIndexes &&
     407      9558343 :         !ReindexIsProcessingIndex(indexId))
     408      9549743 :         irel = index_open(indexId, AccessShareLock);
     409              :     else
     410       233396 :         irel = NULL;
     411              : 
     412      9783135 :     sysscan = palloc_object(SysScanDescData);
     413              : 
     414      9783135 :     sysscan->heap_rel = heapRelation;
     415      9783135 :     sysscan->irel = irel;
     416      9783135 :     sysscan->slot = table_slot_create(heapRelation, NULL);
     417              : 
     418      9783135 :     if (snapshot == NULL)
     419              :     {
     420      9133173 :         Oid         relid = RelationGetRelid(heapRelation);
     421              : 
     422      9133173 :         snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
     423      9133173 :         sysscan->snapshot = snapshot;
     424              :     }
     425              :     else
     426              :     {
     427              :         /* Caller is responsible for any snapshot. */
     428       649962 :         sysscan->snapshot = NULL;
     429              :     }
     430              : 
     431              :     /*
     432              :      * If CheckXidAlive is set then set a flag to indicate that system table
     433              :      * scan is in-progress.  See detailed comments in xact.c where these
     434              :      * variables are declared.
     435              :      */
     436      9783135 :     if (TransactionIdIsValid(CheckXidAlive))
     437         1044 :         bsysscan = true;
     438              : 
     439      9783135 :     if (irel)
     440              :     {
     441              :         int         i;
     442              :         ScanKey     idxkey;
     443              : 
     444      9549739 :         idxkey = palloc_array(ScanKeyData, nkeys);
     445              : 
     446              :         /* Convert attribute numbers to be index column numbers. */
     447     25145293 :         for (i = 0; i < nkeys; i++)
     448              :         {
     449              :             int         j;
     450              : 
     451     15595554 :             memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
     452              : 
     453     22691562 :             for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
     454              :             {
     455     22691562 :                 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
     456              :                 {
     457     15595554 :                     idxkey[i].sk_attno = j + 1;
     458     15595554 :                     break;
     459              :                 }
     460              :             }
     461     15595554 :             if (j == IndexRelationGetNumberOfAttributes(irel))
     462            0 :                 elog(ERROR, "column is not in index");
     463              :         }
     464              : 
     465      9549739 :         sysscan->iscan = index_beginscan(heapRelation, irel,
     466              :                                          snapshot, NULL, nkeys, 0,
     467              :                                          SO_NONE);
     468      9549739 :         index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
     469      9549739 :         sysscan->scan = NULL;
     470              : 
     471      9549739 :         pfree(idxkey);
     472              :     }
     473              :     else
     474              :     {
     475              :         /*
     476              :          * We disallow synchronized scans when forced to use a heapscan on a
     477              :          * catalog.  In most cases the desired rows are near the front, so
     478              :          * that the unpredictable start point of a syncscan is a serious
     479              :          * disadvantage; and there are no compensating advantages, because
     480              :          * it's unlikely that such scans will occur in parallel.
     481              :          */
     482       233396 :         sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
     483              :                                               nkeys, key,
     484              :                                               true, false);
     485       233396 :         sysscan->iscan = NULL;
     486              :     }
     487              : 
     488      9783135 :     return sysscan;
     489              : }
     490              : 
     491              : /*
     492              :  * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive.
     493              :  *
     494              :  * Error out, if CheckXidAlive is aborted. We can't directly use
     495              :  * TransactionIdDidAbort as after crash such transaction might not have been
     496              :  * marked as aborted.  See detailed comments in xact.c where the variable
     497              :  * is declared.
     498              :  */
     499              : static inline void
     500     20446447 : HandleConcurrentAbort(void)
     501              : {
     502     20446447 :     if (TransactionIdIsValid(CheckXidAlive) &&
     503         1699 :         !TransactionIdIsInProgress(CheckXidAlive) &&
     504            8 :         !TransactionIdDidCommit(CheckXidAlive))
     505            8 :         ereport(ERROR,
     506              :                 (errcode(ERRCODE_TRANSACTION_ROLLBACK),
     507              :                  errmsg("transaction aborted during system catalog scan")));
     508     20446439 : }
     509              : 
     510              : /*
     511              :  * systable_getnext --- get next tuple in a heap-or-index scan
     512              :  *
     513              :  * Returns NULL if no more tuples available.
     514              :  *
     515              :  * Note that returned tuple is a reference to data in a disk buffer;
     516              :  * it must not be modified, and should be presumed inaccessible after
     517              :  * next getnext() or endscan() call.
     518              :  *
     519              :  * XXX: It'd probably make sense to offer a slot based interface, at least
     520              :  * optionally.
     521              :  */
     522              : HeapTuple
     523     20143488 : systable_getnext(SysScanDesc sysscan)
     524              : {
     525     20143488 :     HeapTuple   htup = NULL;
     526              : 
     527     20143488 :     if (sysscan->irel)
     528              :     {
     529     17886084 :         if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
     530              :         {
     531              :             bool        shouldFree;
     532              : 
     533     13878841 :             htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
     534              :             Assert(!shouldFree);
     535              : 
     536              :             /*
     537              :              * We currently don't need to support lossy index operators for
     538              :              * any system catalog scan.  It could be done here, using the scan
     539              :              * keys to drive the operator calls, if we arranged to save the
     540              :              * heap attnums during systable_beginscan(); this is practical
     541              :              * because we still wouldn't need to support indexes on
     542              :              * expressions.
     543              :              */
     544     13878841 :             if (sysscan->iscan->xs_recheck)
     545            0 :                 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
     546              :         }
     547              :     }
     548              :     else
     549              :     {
     550      2257404 :         if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
     551              :         {
     552              :             bool        shouldFree;
     553              : 
     554      2201318 :             htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
     555              :             Assert(!shouldFree);
     556              :         }
     557              :     }
     558              : 
     559              :     /*
     560              :      * Handle the concurrent abort while fetching the catalog tuple during
     561              :      * logical streaming of a transaction.
     562              :      */
     563     20143485 :     HandleConcurrentAbort();
     564              : 
     565     20143477 :     return htup;
     566              : }
     567              : 
     568              : /*
     569              :  * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
     570              :  *
     571              :  * In particular, determine if this tuple would be visible to a catalog scan
     572              :  * that started now.  We don't handle the case of a non-MVCC scan snapshot,
     573              :  * because no caller needs that yet.
     574              :  *
     575              :  * This is useful to test whether an object was deleted while we waited to
     576              :  * acquire lock on it.
     577              :  *
     578              :  * Note: we don't actually *need* the tuple to be passed in, but it's a
     579              :  * good crosscheck that the caller is interested in the right tuple.
     580              :  */
     581              : bool
     582       162185 : systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
     583              : {
     584              :     Snapshot    freshsnap;
     585              :     bool        result;
     586              : 
     587              :     Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
     588              : 
     589       162185 :     freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
     590       162185 :     freshsnap = RegisterSnapshot(freshsnap);
     591              : 
     592       162185 :     result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
     593       162185 :                                             sysscan->slot,
     594              :                                             freshsnap);
     595       162185 :     UnregisterSnapshot(freshsnap);
     596              : 
     597              :     /*
     598              :      * Handle the concurrent abort while fetching the catalog tuple during
     599              :      * logical streaming of a transaction.
     600              :      */
     601       162185 :     HandleConcurrentAbort();
     602              : 
     603       162185 :     return result;
     604              : }
     605              : 
     606              : /*
     607              :  * systable_endscan --- close scan, release resources
     608              :  *
     609              :  * Note that it's still up to the caller to close the heap relation.
     610              :  */
     611              : void
     612      9782587 : systable_endscan(SysScanDesc sysscan)
     613              : {
     614      9782587 :     if (sysscan->slot)
     615              :     {
     616      9782587 :         ExecDropSingleTupleTableSlot(sysscan->slot);
     617      9782587 :         sysscan->slot = NULL;
     618              :     }
     619              : 
     620      9782587 :     if (sysscan->irel)
     621              :     {
     622      9549201 :         index_endscan(sysscan->iscan);
     623      9549201 :         index_close(sysscan->irel, AccessShareLock);
     624              :     }
     625              :     else
     626       233386 :         table_endscan(sysscan->scan);
     627              : 
     628      9782587 :     if (sysscan->snapshot)
     629      9132625 :         UnregisterSnapshot(sysscan->snapshot);
     630              : 
     631              :     /*
     632              :      * Reset the bsysscan flag at the end of the systable scan.  See detailed
     633              :      * comments in xact.c where these variables are declared.
     634              :      */
     635      9782587 :     if (TransactionIdIsValid(CheckXidAlive))
     636         1036 :         bsysscan = false;
     637              : 
     638      9782587 :     pfree(sysscan);
     639      9782587 : }
     640              : 
     641              : 
     642              : /*
     643              :  * systable_beginscan_ordered --- set up for ordered catalog scan
     644              :  *
     645              :  * These routines have essentially the same API as systable_beginscan etc,
     646              :  * except that they guarantee to return multiple matching tuples in
     647              :  * index order.  Also, for largely historical reasons, the index to use
     648              :  * is opened and locked by the caller, not here.
     649              :  *
     650              :  * Currently we do not support non-index-based scans here.  (In principle
     651              :  * we could do a heapscan and sort, but the uses are in places that
     652              :  * probably don't need to still work with corrupted catalog indexes.)
     653              :  * For the moment, therefore, these functions are merely the thinest of
     654              :  * wrappers around index_beginscan/index_getnext_slot.  The main reason for
     655              :  * their existence is to centralize possible future support of lossy operators
     656              :  * in catalog scans.
     657              :  */
     658              : SysScanDesc
     659        37636 : systable_beginscan_ordered(Relation heapRelation,
     660              :                            Relation indexRelation,
     661              :                            Snapshot snapshot,
     662              :                            int nkeys, ScanKey key)
     663              : {
     664              :     SysScanDesc sysscan;
     665              :     int         i;
     666              :     ScanKey     idxkey;
     667              : 
     668              :     /* REINDEX can probably be a hard error here ... */
     669        37636 :     if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
     670            0 :         ereport(ERROR,
     671              :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     672              :                  errmsg("cannot access index \"%s\" while it is being reindexed",
     673              :                         RelationGetRelationName(indexRelation))));
     674              :     /* ... but we only throw a warning about violating IgnoreSystemIndexes */
     675        37636 :     if (IgnoreSystemIndexes)
     676            0 :         elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
     677              :              RelationGetRelationName(indexRelation));
     678              : 
     679        37636 :     sysscan = palloc_object(SysScanDescData);
     680              : 
     681        37636 :     sysscan->heap_rel = heapRelation;
     682        37636 :     sysscan->irel = indexRelation;
     683        37636 :     sysscan->slot = table_slot_create(heapRelation, NULL);
     684              : 
     685        37636 :     if (snapshot == NULL)
     686              :     {
     687         5289 :         Oid         relid = RelationGetRelid(heapRelation);
     688              : 
     689         5289 :         snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
     690         5289 :         sysscan->snapshot = snapshot;
     691              :     }
     692              :     else
     693              :     {
     694              :         /* Caller is responsible for any snapshot. */
     695        32347 :         sysscan->snapshot = NULL;
     696              :     }
     697              : 
     698        37636 :     idxkey = palloc_array(ScanKeyData, nkeys);
     699              : 
     700              :     /* Convert attribute numbers to be index column numbers. */
     701        73294 :     for (i = 0; i < nkeys; i++)
     702              :     {
     703              :         int         j;
     704              : 
     705        35658 :         memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
     706              : 
     707        37699 :         for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
     708              :         {
     709        37699 :             if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
     710              :             {
     711        35658 :                 idxkey[i].sk_attno = j + 1;
     712        35658 :                 break;
     713              :             }
     714              :         }
     715        35658 :         if (j == IndexRelationGetNumberOfAttributes(indexRelation))
     716            0 :             elog(ERROR, "column is not in index");
     717              :     }
     718              : 
     719              :     /*
     720              :      * If CheckXidAlive is set then set a flag to indicate that system table
     721              :      * scan is in-progress.  See detailed comments in xact.c where these
     722              :      * variables are declared.
     723              :      */
     724        37636 :     if (TransactionIdIsValid(CheckXidAlive))
     725            0 :         bsysscan = true;
     726              : 
     727        37636 :     sysscan->iscan = index_beginscan(heapRelation, indexRelation,
     728              :                                      snapshot, NULL, nkeys, 0,
     729              :                                      SO_NONE);
     730        37636 :     index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
     731        37636 :     sysscan->scan = NULL;
     732              : 
     733        37636 :     pfree(idxkey);
     734              : 
     735        37636 :     return sysscan;
     736              : }
     737              : 
     738              : /*
     739              :  * systable_getnext_ordered --- get next tuple in an ordered catalog scan
     740              :  */
     741              : HeapTuple
     742       140780 : systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
     743              : {
     744       140780 :     HeapTuple   htup = NULL;
     745              : 
     746              :     Assert(sysscan->irel);
     747       140780 :     if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
     748       103996 :         htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
     749              : 
     750              :     /* See notes in systable_getnext */
     751       140777 :     if (htup && sysscan->iscan->xs_recheck)
     752            0 :         elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
     753              : 
     754              :     /*
     755              :      * Handle the concurrent abort while fetching the catalog tuple during
     756              :      * logical streaming of a transaction.
     757              :      */
     758       140777 :     HandleConcurrentAbort();
     759              : 
     760       140777 :     return htup;
     761              : }
     762              : 
     763              : /*
     764              :  * systable_endscan_ordered --- close scan, release resources
     765              :  */
     766              : void
     767        37625 : systable_endscan_ordered(SysScanDesc sysscan)
     768              : {
     769        37625 :     if (sysscan->slot)
     770              :     {
     771        37625 :         ExecDropSingleTupleTableSlot(sysscan->slot);
     772        37625 :         sysscan->slot = NULL;
     773              :     }
     774              : 
     775              :     Assert(sysscan->irel);
     776        37625 :     index_endscan(sysscan->iscan);
     777        37625 :     if (sysscan->snapshot)
     778         5281 :         UnregisterSnapshot(sysscan->snapshot);
     779              : 
     780              :     /*
     781              :      * Reset the bsysscan flag at the end of the systable scan.  See detailed
     782              :      * comments in xact.c where these variables are declared.
     783              :      */
     784        37625 :     if (TransactionIdIsValid(CheckXidAlive))
     785            0 :         bsysscan = false;
     786              : 
     787        37625 :     pfree(sysscan);
     788        37625 : }
     789              : 
     790              : /*
     791              :  * systable_inplace_update_begin --- update a row "in place" (overwrite it)
     792              :  *
     793              :  * Overwriting violates both MVCC and transactional safety, so the uses of
     794              :  * this function in Postgres are extremely limited.  This makes no effort to
     795              :  * support updating cache key columns or other indexed columns.  Nonetheless
     796              :  * we find some places to use it.  See README.tuplock section "Locking to
     797              :  * write inplace-updated tables" and later sections for expectations of
     798              :  * readers and writers of a table that gets inplace updates.  Standard flow:
     799              :  *
     800              :  * ... [any slow preparation not requiring oldtup] ...
     801              :  * systable_inplace_update_begin([...], &tup, &inplace_state);
     802              :  * if (!HeapTupleIsValid(tup))
     803              :  *  elog(ERROR, [...]);
     804              :  * ... [buffer is exclusive-locked; mutate "tup"] ...
     805              :  * if (dirty)
     806              :  *  systable_inplace_update_finish(inplace_state, tup);
     807              :  * else
     808              :  *  systable_inplace_update_cancel(inplace_state);
     809              :  *
     810              :  * The first several params duplicate the systable_beginscan() param list.
     811              :  * "oldtupcopy" is an output parameter, assigned NULL if the key ceases to
     812              :  * find a live tuple.  (In PROC_IN_VACUUM, that is a low-probability transient
     813              :  * condition.)  If "oldtupcopy" gets non-NULL, you must pass output parameter
     814              :  * "state" to systable_inplace_update_finish() or
     815              :  * systable_inplace_update_cancel().
     816              :  */
     817              : void
     818       217417 : systable_inplace_update_begin(Relation relation,
     819              :                               Oid indexId,
     820              :                               bool indexOK,
     821              :                               Snapshot snapshot,
     822              :                               int nkeys, const ScanKeyData *key,
     823              :                               HeapTuple *oldtupcopy,
     824              :                               void **state)
     825              : {
     826       217417 :     int         retries = 0;
     827              :     SysScanDesc scan;
     828              :     HeapTuple   oldtup;
     829              :     BufferHeapTupleTableSlot *bslot;
     830              : 
     831              :     /*
     832              :      * For now, we don't allow parallel updates.  Unlike a regular update,
     833              :      * this should never create a combo CID, so it might be possible to relax
     834              :      * this restriction, but not without more thought and testing.  It's not
     835              :      * clear that it would be useful, anyway.
     836              :      */
     837       217417 :     if (IsInParallelMode())
     838            0 :         ereport(ERROR,
     839              :                 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
     840              :                  errmsg("cannot update tuples during a parallel operation")));
     841              : 
     842              :     /*
     843              :      * Accept a snapshot argument, for symmetry, but this function advances
     844              :      * its snapshot as needed to reach the tail of the updated tuple chain.
     845              :      */
     846              :     Assert(snapshot == NULL);
     847              : 
     848              :     Assert(IsInplaceUpdateRelation(relation) || !IsSystemRelation(relation));
     849              : 
     850              :     /* Loop for an exclusive-locked buffer of a non-updated tuple. */
     851              :     do
     852              :     {
     853              :         TupleTableSlot *slot;
     854              : 
     855       217441 :         CHECK_FOR_INTERRUPTS();
     856              : 
     857              :         /*
     858              :          * Processes issuing heap_update (e.g. GRANT) at maximum speed could
     859              :          * drive us to this error.  A hostile table owner has stronger ways to
     860              :          * damage their own table, so that's minor.
     861              :          */
     862       217441 :         if (retries++ > 10000)
     863            0 :             elog(ERROR, "giving up after too many tries to overwrite row");
     864              : 
     865       217441 :         INJECTION_POINT("inplace-before-pin", NULL);
     866       217441 :         scan = systable_beginscan(relation, indexId, indexOK, snapshot,
     867              :                                   nkeys, unconstify(ScanKeyData *, key));
     868       217441 :         oldtup = systable_getnext(scan);
     869       217441 :         if (!HeapTupleIsValid(oldtup))
     870              :         {
     871            0 :             systable_endscan(scan);
     872            0 :             *oldtupcopy = NULL;
     873            0 :             return;
     874              :         }
     875              : 
     876       217441 :         slot = scan->slot;
     877              :         Assert(TTS_IS_BUFFERTUPLE(slot));
     878       217441 :         bslot = (BufferHeapTupleTableSlot *) slot;
     879       217441 :     } while (!heap_inplace_lock(scan->heap_rel,
     880              :                                 bslot->base.tuple, bslot->buffer,
     881              :                                 (void (*) (void *)) systable_endscan, scan));
     882              : 
     883       217417 :     *oldtupcopy = heap_copytuple(oldtup);
     884       217417 :     *state = scan;
     885              : }
     886              : 
     887              : /*
     888              :  * systable_inplace_update_finish --- second phase of inplace update
     889              :  *
     890              :  * The tuple cannot change size, and therefore its header fields and null
     891              :  * bitmap (if any) don't change either.
     892              :  */
     893              : void
     894       100019 : systable_inplace_update_finish(void *state, HeapTuple tuple)
     895              : {
     896       100019 :     SysScanDesc scan = (SysScanDesc) state;
     897       100019 :     Relation    relation = scan->heap_rel;
     898       100019 :     TupleTableSlot *slot = scan->slot;
     899       100019 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     900       100019 :     HeapTuple   oldtup = bslot->base.tuple;
     901       100019 :     Buffer      buffer = bslot->buffer;
     902              : 
     903       100019 :     heap_inplace_update_and_unlock(relation, oldtup, tuple, buffer);
     904       100019 :     systable_endscan(scan);
     905       100019 : }
     906              : 
     907              : /*
     908              :  * systable_inplace_update_cancel --- abandon inplace update
     909              :  *
     910              :  * This is an alternative to making a no-op update.
     911              :  */
     912              : void
     913       117398 : systable_inplace_update_cancel(void *state)
     914              : {
     915       117398 :     SysScanDesc scan = (SysScanDesc) state;
     916       117398 :     Relation    relation = scan->heap_rel;
     917       117398 :     TupleTableSlot *slot = scan->slot;
     918       117398 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     919       117398 :     HeapTuple   oldtup = bslot->base.tuple;
     920       117398 :     Buffer      buffer = bslot->buffer;
     921              : 
     922       117398 :     heap_inplace_unlock(relation, oldtup, buffer);
     923       117398 :     systable_endscan(scan);
     924       117398 : }
        

Generated by: LCOV version 2.0-1