LCOV - code coverage report
Current view: top level - src/backend/access/index - genam.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 207 243 85.2 %
Date: 2025-01-18 03:14:54 Functions: 14 15 93.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * genam.c
       4             :  *    general index access method routines
       5             :  *
       6             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/index/genam.c
      12             :  *
      13             :  * NOTES
      14             :  *    many of the old access method routines have been turned into
      15             :  *    macros and moved to genam.h -cim 4/30/91
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/genam.h"
      23             : #include "access/heapam.h"
      24             : #include "access/relscan.h"
      25             : #include "access/tableam.h"
      26             : #include "access/transam.h"
      27             : #include "catalog/index.h"
      28             : #include "lib/stringinfo.h"
      29             : #include "miscadmin.h"
      30             : #include "storage/bufmgr.h"
      31             : #include "storage/procarray.h"
      32             : #include "utils/acl.h"
      33             : #include "utils/injection_point.h"
      34             : #include "utils/lsyscache.h"
      35             : #include "utils/rel.h"
      36             : #include "utils/rls.h"
      37             : #include "utils/ruleutils.h"
      38             : #include "utils/snapmgr.h"
      39             : 
      40             : 
      41             : /* ----------------------------------------------------------------
      42             :  *      general access method routines
      43             :  *
      44             :  *      All indexed access methods use an identical scan structure.
      45             :  *      We don't know how the various AMs do locking, however, so we don't
      46             :  *      do anything about that here.
      47             :  *
      48             :  *      The intent is that an AM implementor will define a beginscan routine
      49             :  *      that calls RelationGetIndexScan, to fill in the scan, and then does
      50             :  *      whatever kind of locking he wants.
      51             :  *
      52             :  *      At the end of a scan, the AM's endscan routine undoes the locking,
      53             :  *      but does *not* call IndexScanEnd --- the higher-level index_endscan
      54             :  *      routine does that.  (We can't do it in the AM because index_endscan
      55             :  *      still needs to touch the IndexScanDesc after calling the AM.)
      56             :  *
      57             :  *      Because of this, the AM does not have a choice whether to call
      58             :  *      RelationGetIndexScan or not; its beginscan routine must return an
      59             :  *      object made by RelationGetIndexScan.  This is kinda ugly but not
      60             :  *      worth cleaning up now.
      61             :  * ----------------------------------------------------------------
      62             :  */
      63             : 
      64             : /* ----------------
      65             :  *  RelationGetIndexScan -- Create and fill an IndexScanDesc.
      66             :  *
      67             :  *      This routine creates an index scan structure and sets up initial
      68             :  *      contents for it.
      69             :  *
      70             :  *      Parameters:
      71             :  *              indexRelation -- index relation for scan.
      72             :  *              nkeys -- count of scan keys (index qual conditions).
      73             :  *              norderbys -- count of index order-by operators.
      74             :  *
      75             :  *      Returns:
      76             :  *              An initialized IndexScanDesc.
      77             :  * ----------------
      78             :  */
      79             : IndexScanDesc
      80    12938812 : RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
      81             : {
      82             :     IndexScanDesc scan;
      83             : 
      84    12938812 :     scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
      85             : 
      86    12938812 :     scan->heapRelation = NULL;   /* may be set later */
      87    12938812 :     scan->xs_heapfetch = NULL;
      88    12938812 :     scan->indexRelation = indexRelation;
      89    12938812 :     scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
      90    12938812 :     scan->numberOfKeys = nkeys;
      91    12938812 :     scan->numberOfOrderBys = norderbys;
      92             : 
      93             :     /*
      94             :      * We allocate key workspace here, but it won't get filled until amrescan.
      95             :      */
      96    12938812 :     if (nkeys > 0)
      97    12926478 :         scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
      98             :     else
      99       12334 :         scan->keyData = NULL;
     100    12938812 :     if (norderbys > 0)
     101         192 :         scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
     102             :     else
     103    12938620 :         scan->orderByData = NULL;
     104             : 
     105    12938812 :     scan->xs_want_itup = false; /* may be set later */
     106             : 
     107             :     /*
     108             :      * During recovery we ignore killed tuples and don't bother to kill them
     109             :      * either. We do this because the xmin on the primary node could easily be
     110             :      * later than the xmin on the standby node, so that what the primary
     111             :      * thinks is killed is supposed to be visible on standby. So for correct
     112             :      * MVCC for queries during recovery we must ignore these hints and check
     113             :      * all tuples. Do *not* set ignore_killed_tuples to true when running in a
     114             :      * transaction that was started during recovery. xactStartedInRecovery
     115             :      * should not be altered by index AMs.
     116             :      */
     117    12938812 :     scan->kill_prior_tuple = false;
     118    12938812 :     scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
     119    12938812 :     scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
     120             : 
     121    12938812 :     scan->opaque = NULL;
     122             : 
     123    12938812 :     scan->xs_itup = NULL;
     124    12938812 :     scan->xs_itupdesc = NULL;
     125    12938812 :     scan->xs_hitup = NULL;
     126    12938812 :     scan->xs_hitupdesc = NULL;
     127             : 
     128    12938812 :     return scan;
     129             : }
     130             : 
     131             : /* ----------------
     132             :  *  IndexScanEnd -- End an index scan.
     133             :  *
     134             :  *      This routine just releases the storage acquired by
     135             :  *      RelationGetIndexScan().  Any AM-level resources are
     136             :  *      assumed to already have been released by the AM's
     137             :  *      endscan routine.
     138             :  *
     139             :  *  Returns:
     140             :  *      None.
     141             :  * ----------------
     142             :  */
     143             : void
     144    12937054 : IndexScanEnd(IndexScanDesc scan)
     145             : {
     146    12937054 :     if (scan->keyData != NULL)
     147    12924756 :         pfree(scan->keyData);
     148    12937054 :     if (scan->orderByData != NULL)
     149         186 :         pfree(scan->orderByData);
     150             : 
     151    12937054 :     pfree(scan);
     152    12937054 : }
     153             : 
     154             : /*
     155             :  * BuildIndexValueDescription
     156             :  *
     157             :  * Construct a string describing the contents of an index entry, in the
     158             :  * form "(key_name, ...)=(key_value, ...)".  This is currently used
     159             :  * for building unique-constraint, exclusion-constraint error messages, and
     160             :  * logical replication conflict error messages so only key columns of the index
     161             :  * are checked and printed.
     162             :  *
     163             :  * Note that if the user does not have permissions to view all of the
     164             :  * columns involved then a NULL is returned.  Returning a partial key seems
     165             :  * unlikely to be useful and we have no way to know which of the columns the
     166             :  * user provided (unlike in ExecBuildSlotValueDescription).
     167             :  *
     168             :  * The passed-in values/nulls arrays are the "raw" input to the index AM,
     169             :  * e.g. results of FormIndexDatum --- this is not necessarily what is stored
     170             :  * in the index, but it's what the user perceives to be stored.
     171             :  *
     172             :  * Note: if you change anything here, check whether
     173             :  * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
     174             :  * change.
     175             :  */
     176             : char *
     177         958 : BuildIndexValueDescription(Relation indexRelation,
     178             :                            const Datum *values, const bool *isnull)
     179             : {
     180             :     StringInfoData buf;
     181             :     Form_pg_index idxrec;
     182             :     int         indnkeyatts;
     183             :     int         i;
     184             :     int         keyno;
     185         958 :     Oid         indexrelid = RelationGetRelid(indexRelation);
     186             :     Oid         indrelid;
     187             :     AclResult   aclresult;
     188             : 
     189         958 :     indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
     190             : 
     191             :     /*
     192             :      * Check permissions- if the user does not have access to view all of the
     193             :      * key columns then return NULL to avoid leaking data.
     194             :      *
     195             :      * First check if RLS is enabled for the relation.  If so, return NULL to
     196             :      * avoid leaking data.
     197             :      *
     198             :      * Next we need to check table-level SELECT access and then, if there is
     199             :      * no access there, check column-level permissions.
     200             :      */
     201         958 :     idxrec = indexRelation->rd_index;
     202         958 :     indrelid = idxrec->indrelid;
     203             :     Assert(indexrelid == idxrec->indexrelid);
     204             : 
     205             :     /* RLS check- if RLS is enabled then we don't return anything. */
     206         958 :     if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
     207          12 :         return NULL;
     208             : 
     209             :     /* Table-level SELECT is enough, if the user has it */
     210         946 :     aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
     211         946 :     if (aclresult != ACLCHECK_OK)
     212             :     {
     213             :         /*
     214             :          * No table-level access, so step through the columns in the index and
     215             :          * make sure the user has SELECT rights on all of them.
     216             :          */
     217          24 :         for (keyno = 0; keyno < indnkeyatts; keyno++)
     218             :         {
     219          24 :             AttrNumber  attnum = idxrec->indkey.values[keyno];
     220             : 
     221             :             /*
     222             :              * Note that if attnum == InvalidAttrNumber, then this is an index
     223             :              * based on an expression and we return no detail rather than try
     224             :              * to figure out what column(s) the expression includes and if the
     225             :              * user has SELECT rights on them.
     226             :              */
     227          48 :             if (attnum == InvalidAttrNumber ||
     228          24 :                 pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
     229             :                                       ACL_SELECT) != ACLCHECK_OK)
     230             :             {
     231             :                 /* No access, so clean up and return */
     232          12 :                 return NULL;
     233             :             }
     234             :         }
     235             :     }
     236             : 
     237         934 :     initStringInfo(&buf);
     238         934 :     appendStringInfo(&buf, "(%s)=(",
     239             :                      pg_get_indexdef_columns(indexrelid, true));
     240             : 
     241        2178 :     for (i = 0; i < indnkeyatts; i++)
     242             :     {
     243             :         char       *val;
     244             : 
     245        1244 :         if (isnull[i])
     246          18 :             val = "null";
     247             :         else
     248             :         {
     249             :             Oid         foutoid;
     250             :             bool        typisvarlena;
     251             : 
     252             :             /*
     253             :              * The provided data is not necessarily of the type stored in the
     254             :              * index; rather it is of the index opclass's input type. So look
     255             :              * at rd_opcintype not the index tupdesc.
     256             :              *
     257             :              * Note: this is a bit shaky for opclasses that have pseudotype
     258             :              * input types such as ANYARRAY or RECORD.  Currently, the
     259             :              * typoutput functions associated with the pseudotypes will work
     260             :              * okay, but we might have to try harder in future.
     261             :              */
     262        1226 :             getTypeOutputInfo(indexRelation->rd_opcintype[i],
     263             :                               &foutoid, &typisvarlena);
     264        1226 :             val = OidOutputFunctionCall(foutoid, values[i]);
     265             :         }
     266             : 
     267        1244 :         if (i > 0)
     268         310 :             appendStringInfoString(&buf, ", ");
     269        1244 :         appendStringInfoString(&buf, val);
     270             :     }
     271             : 
     272         934 :     appendStringInfoChar(&buf, ')');
     273             : 
     274         934 :     return buf.data;
     275             : }
     276             : 
     277             : /*
     278             :  * Get the snapshotConflictHorizon from the table entries pointed to by the
     279             :  * index tuples being deleted using an AM-generic approach.
     280             :  *
     281             :  * This is a table_index_delete_tuples() shim used by index AMs that only need
     282             :  * to consult the tableam to get a snapshotConflictHorizon value, and only
     283             :  * expect to delete index tuples that are already known deletable (typically
     284             :  * due to having LP_DEAD bits set).  When a snapshotConflictHorizon value
     285             :  * isn't needed in index AM's deletion WAL record, it is safe for it to skip
     286             :  * calling here entirely.
     287             :  *
     288             :  * We assume that caller index AM uses the standard IndexTuple representation,
     289             :  * with table TIDs stored in the t_tid field.  We also expect (and assert)
     290             :  * that the line pointers on page for 'itemnos' offsets are already marked
     291             :  * LP_DEAD.
     292             :  */
     293             : TransactionId
     294           0 : index_compute_xid_horizon_for_tuples(Relation irel,
     295             :                                      Relation hrel,
     296             :                                      Buffer ibuf,
     297             :                                      OffsetNumber *itemnos,
     298             :                                      int nitems)
     299             : {
     300             :     TM_IndexDeleteOp delstate;
     301           0 :     TransactionId snapshotConflictHorizon = InvalidTransactionId;
     302           0 :     Page        ipage = BufferGetPage(ibuf);
     303             :     IndexTuple  itup;
     304             : 
     305             :     Assert(nitems > 0);
     306             : 
     307           0 :     delstate.irel = irel;
     308           0 :     delstate.iblknum = BufferGetBlockNumber(ibuf);
     309           0 :     delstate.bottomup = false;
     310           0 :     delstate.bottomupfreespace = 0;
     311           0 :     delstate.ndeltids = 0;
     312           0 :     delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete));
     313           0 :     delstate.status = palloc(nitems * sizeof(TM_IndexStatus));
     314             : 
     315             :     /* identify what the index tuples about to be deleted point to */
     316           0 :     for (int i = 0; i < nitems; i++)
     317             :     {
     318           0 :         OffsetNumber offnum = itemnos[i];
     319             :         ItemId      iitemid;
     320             : 
     321           0 :         iitemid = PageGetItemId(ipage, offnum);
     322           0 :         itup = (IndexTuple) PageGetItem(ipage, iitemid);
     323             : 
     324             :         Assert(ItemIdIsDead(iitemid));
     325             : 
     326           0 :         ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
     327           0 :         delstate.deltids[i].id = delstate.ndeltids;
     328           0 :         delstate.status[i].idxoffnum = offnum;
     329           0 :         delstate.status[i].knowndeletable = true;   /* LP_DEAD-marked */
     330           0 :         delstate.status[i].promising = false;   /* unused */
     331           0 :         delstate.status[i].freespace = 0;   /* unused */
     332             : 
     333           0 :         delstate.ndeltids++;
     334             :     }
     335             : 
     336             :     /* determine the actual xid horizon */
     337           0 :     snapshotConflictHorizon = table_index_delete_tuples(hrel, &delstate);
     338             : 
     339             :     /* assert tableam agrees that all items are deletable */
     340             :     Assert(delstate.ndeltids == nitems);
     341             : 
     342           0 :     pfree(delstate.deltids);
     343           0 :     pfree(delstate.status);
     344             : 
     345           0 :     return snapshotConflictHorizon;
     346             : }
     347             : 
     348             : 
     349             : /* ----------------------------------------------------------------
     350             :  *      heap-or-index-scan access to system catalogs
     351             :  *
     352             :  *      These functions support system catalog accesses that normally use
     353             :  *      an index but need to be capable of being switched to heap scans
     354             :  *      if the system indexes are unavailable.
     355             :  *
     356             :  *      The specified scan keys must be compatible with the named index.
     357             :  *      Generally this means that they must constrain either all columns
     358             :  *      of the index, or the first K columns of an N-column index.
     359             :  *
     360             :  *      These routines could work with non-system tables, actually,
     361             :  *      but they're only useful when there is a known index to use with
     362             :  *      the given scan keys; so in practice they're only good for
     363             :  *      predetermined types of scans of system catalogs.
     364             :  * ----------------------------------------------------------------
     365             :  */
     366             : 
     367             : /*
     368             :  * systable_beginscan --- set up for heap-or-index scan
     369             :  *
     370             :  *  rel: catalog to scan, already opened and suitably locked
     371             :  *  indexId: OID of index to conditionally use
     372             :  *  indexOK: if false, forces a heap scan (see notes below)
     373             :  *  snapshot: time qual to use (NULL for a recent catalog snapshot)
     374             :  *  nkeys, key: scan keys
     375             :  *
     376             :  * The attribute numbers in the scan key should be set for the heap case.
     377             :  * If we choose to index, we convert them to 1..n to reference the index
     378             :  * columns.  Note this means there must be one scankey qualification per
     379             :  * index column!  This is checked by the Asserts in the normal, index-using
     380             :  * case, but won't be checked if the heapscan path is taken.
     381             :  *
     382             :  * The routine checks the normal cases for whether an indexscan is safe,
     383             :  * but caller can make additional checks and pass indexOK=false if needed.
     384             :  * In standard case indexOK can simply be constant TRUE.
     385             :  */
     386             : SysScanDesc
     387    12813944 : systable_beginscan(Relation heapRelation,
     388             :                    Oid indexId,
     389             :                    bool indexOK,
     390             :                    Snapshot snapshot,
     391             :                    int nkeys, ScanKey key)
     392             : {
     393             :     SysScanDesc sysscan;
     394             :     Relation    irel;
     395             : 
     396    12813944 :     if (indexOK &&
     397    12595138 :         !IgnoreSystemIndexes &&
     398    12483718 :         !ReindexIsProcessingIndex(indexId))
     399    12473724 :         irel = index_open(indexId, AccessShareLock);
     400             :     else
     401      340220 :         irel = NULL;
     402             : 
     403    12813934 :     sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
     404             : 
     405    12813934 :     sysscan->heap_rel = heapRelation;
     406    12813934 :     sysscan->irel = irel;
     407    12813934 :     sysscan->slot = table_slot_create(heapRelation, NULL);
     408             : 
     409    12813934 :     if (snapshot == NULL)
     410             :     {
     411    11761222 :         Oid         relid = RelationGetRelid(heapRelation);
     412             : 
     413    11761222 :         snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
     414    11761222 :         sysscan->snapshot = snapshot;
     415             :     }
     416             :     else
     417             :     {
     418             :         /* Caller is responsible for any snapshot. */
     419     1052712 :         sysscan->snapshot = NULL;
     420             :     }
     421             : 
     422    12813934 :     if (irel)
     423             :     {
     424             :         int         i;
     425             :         ScanKey     idxkey;
     426             : 
     427    12473714 :         idxkey = palloc_array(ScanKeyData, nkeys);
     428             : 
     429             :         /* Convert attribute numbers to be index column numbers. */
     430    32816312 :         for (i = 0; i < nkeys; i++)
     431             :         {
     432             :             int         j;
     433             : 
     434    20342598 :             memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
     435             : 
     436    29693082 :             for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
     437             :             {
     438    29693082 :                 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
     439             :                 {
     440    20342598 :                     idxkey[i].sk_attno = j + 1;
     441    20342598 :                     break;
     442             :                 }
     443             :             }
     444    20342598 :             if (j == IndexRelationGetNumberOfAttributes(irel))
     445           0 :                 elog(ERROR, "column is not in index");
     446             :         }
     447             : 
     448    12473714 :         sysscan->iscan = index_beginscan(heapRelation, irel,
     449             :                                          snapshot, nkeys, 0);
     450    12473714 :         index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
     451    12473714 :         sysscan->scan = NULL;
     452             : 
     453    12473714 :         pfree(idxkey);
     454             :     }
     455             :     else
     456             :     {
     457             :         /*
     458             :          * We disallow synchronized scans when forced to use a heapscan on a
     459             :          * catalog.  In most cases the desired rows are near the front, so
     460             :          * that the unpredictable start point of a syncscan is a serious
     461             :          * disadvantage; and there are no compensating advantages, because
     462             :          * it's unlikely that such scans will occur in parallel.
     463             :          */
     464      340220 :         sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
     465             :                                               nkeys, key,
     466             :                                               true, false);
     467      340220 :         sysscan->iscan = NULL;
     468             :     }
     469             : 
     470             :     /*
     471             :      * If CheckXidAlive is set then set a flag to indicate that system table
     472             :      * scan is in-progress.  See detailed comments in xact.c where these
     473             :      * variables are declared.
     474             :      */
     475    12813934 :     if (TransactionIdIsValid(CheckXidAlive))
     476        1660 :         bsysscan = true;
     477             : 
     478    12813934 :     return sysscan;
     479             : }
     480             : 
     481             : /*
     482             :  * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive.
     483             :  *
     484             :  * Error out, if CheckXidAlive is aborted. We can't directly use
     485             :  * TransactionIdDidAbort as after crash such transaction might not have been
     486             :  * marked as aborted.  See detailed comments in xact.c where the variable
     487             :  * is declared.
     488             :  */
     489             : static inline void
     490    26923194 : HandleConcurrentAbort()
     491             : {
     492    26923194 :     if (TransactionIdIsValid(CheckXidAlive) &&
     493        2406 :         !TransactionIdIsInProgress(CheckXidAlive) &&
     494          16 :         !TransactionIdDidCommit(CheckXidAlive))
     495          16 :         ereport(ERROR,
     496             :                 (errcode(ERRCODE_TRANSACTION_ROLLBACK),
     497             :                  errmsg("transaction aborted during system catalog scan")));
     498    26923178 : }
     499             : 
     500             : /*
     501             :  * systable_getnext --- get next tuple in a heap-or-index scan
     502             :  *
     503             :  * Returns NULL if no more tuples available.
     504             :  *
     505             :  * Note that returned tuple is a reference to data in a disk buffer;
     506             :  * it must not be modified, and should be presumed inaccessible after
     507             :  * next getnext() or endscan() call.
     508             :  *
     509             :  * XXX: It'd probably make sense to offer a slot based interface, at least
     510             :  * optionally.
     511             :  */
     512             : HeapTuple
     513    26490476 : systable_getnext(SysScanDesc sysscan)
     514             : {
     515    26490476 :     HeapTuple   htup = NULL;
     516             : 
     517    26490476 :     if (sysscan->irel)
     518             :     {
     519    23482140 :         if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
     520             :         {
     521             :             bool        shouldFree;
     522             : 
     523    18002748 :             htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
     524             :             Assert(!shouldFree);
     525             : 
     526             :             /*
     527             :              * We currently don't need to support lossy index operators for
     528             :              * any system catalog scan.  It could be done here, using the scan
     529             :              * keys to drive the operator calls, if we arranged to save the
     530             :              * heap attnums during systable_beginscan(); this is practical
     531             :              * because we still wouldn't need to support indexes on
     532             :              * expressions.
     533             :              */
     534    18002748 :             if (sysscan->iscan->xs_recheck)
     535           0 :                 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
     536             :         }
     537             :     }
     538             :     else
     539             :     {
     540     3008336 :         if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
     541             :         {
     542             :             bool        shouldFree;
     543             : 
     544     2929052 :             htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
     545             :             Assert(!shouldFree);
     546             :         }
     547             :     }
     548             : 
     549             :     /*
     550             :      * Handle the concurrent abort while fetching the catalog tuple during
     551             :      * logical streaming of a transaction.
     552             :      */
     553    26490474 :     HandleConcurrentAbort();
     554             : 
     555    26490458 :     return htup;
     556             : }
     557             : 
     558             : /*
     559             :  * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
     560             :  *
     561             :  * In particular, determine if this tuple would be visible to a catalog scan
     562             :  * that started now.  We don't handle the case of a non-MVCC scan snapshot,
     563             :  * because no caller needs that yet.
     564             :  *
     565             :  * This is useful to test whether an object was deleted while we waited to
     566             :  * acquire lock on it.
     567             :  *
     568             :  * Note: we don't actually *need* the tuple to be passed in, but it's a
     569             :  * good crosscheck that the caller is interested in the right tuple.
     570             :  */
     571             : bool
     572      213944 : systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
     573             : {
     574             :     Snapshot    freshsnap;
     575             :     bool        result;
     576             : 
     577             :     Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
     578             : 
     579             :     /*
     580             :      * Trust that table_tuple_satisfies_snapshot() and its subsidiaries
     581             :      * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves
     582             :      * acquire snapshots, so we need not register the snapshot.  Those
     583             :      * facilities are too low-level to have any business scanning tables.
     584             :      */
     585      213944 :     freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
     586             : 
     587      213944 :     result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
     588      213944 :                                             sysscan->slot,
     589             :                                             freshsnap);
     590             : 
     591             :     /*
     592             :      * Handle the concurrent abort while fetching the catalog tuple during
     593             :      * logical streaming of a transaction.
     594             :      */
     595      213944 :     HandleConcurrentAbort();
     596             : 
     597      213944 :     return result;
     598             : }
     599             : 
     600             : /*
     601             :  * systable_endscan --- close scan, release resources
     602             :  *
     603             :  * Note that it's still up to the caller to close the heap relation.
     604             :  */
     605             : void
     606    12813310 : systable_endscan(SysScanDesc sysscan)
     607             : {
     608    12813310 :     if (sysscan->slot)
     609             :     {
     610    12813310 :         ExecDropSingleTupleTableSlot(sysscan->slot);
     611    12813310 :         sysscan->slot = NULL;
     612             :     }
     613             : 
     614    12813310 :     if (sysscan->irel)
     615             :     {
     616    12473104 :         index_endscan(sysscan->iscan);
     617    12473104 :         index_close(sysscan->irel, AccessShareLock);
     618             :     }
     619             :     else
     620      340206 :         table_endscan(sysscan->scan);
     621             : 
     622    12813310 :     if (sysscan->snapshot)
     623    11760598 :         UnregisterSnapshot(sysscan->snapshot);
     624             : 
     625             :     /*
     626             :      * Reset the bsysscan flag at the end of the systable scan.  See detailed
     627             :      * comments in xact.c where these variables are declared.
     628             :      */
     629    12813310 :     if (TransactionIdIsValid(CheckXidAlive))
     630        1644 :         bsysscan = false;
     631             : 
     632    12813310 :     pfree(sysscan);
     633    12813310 : }
     634             : 
     635             : 
     636             : /*
     637             :  * systable_beginscan_ordered --- set up for ordered catalog scan
     638             :  *
     639             :  * These routines have essentially the same API as systable_beginscan etc,
     640             :  * except that they guarantee to return multiple matching tuples in
     641             :  * index order.  Also, for largely historical reasons, the index to use
     642             :  * is opened and locked by the caller, not here.
     643             :  *
     644             :  * Currently we do not support non-index-based scans here.  (In principle
     645             :  * we could do a heapscan and sort, but the uses are in places that
     646             :  * probably don't need to still work with corrupted catalog indexes.)
     647             :  * For the moment, therefore, these functions are merely the thinest of
     648             :  * wrappers around index_beginscan/index_getnext_slot.  The main reason for
     649             :  * their existence is to centralize possible future support of lossy operators
     650             :  * in catalog scans.
     651             :  */
     652             : SysScanDesc
     653       54472 : systable_beginscan_ordered(Relation heapRelation,
     654             :                            Relation indexRelation,
     655             :                            Snapshot snapshot,
     656             :                            int nkeys, ScanKey key)
     657             : {
     658             :     SysScanDesc sysscan;
     659             :     int         i;
     660             :     ScanKey     idxkey;
     661             : 
     662             :     /* REINDEX can probably be a hard error here ... */
     663       54472 :     if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
     664           0 :         ereport(ERROR,
     665             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     666             :                  errmsg("cannot access index \"%s\" while it is being reindexed",
     667             :                         RelationGetRelationName(indexRelation))));
     668             :     /* ... but we only throw a warning about violating IgnoreSystemIndexes */
     669       54472 :     if (IgnoreSystemIndexes)
     670           0 :         elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
     671             :              RelationGetRelationName(indexRelation));
     672             : 
     673       54472 :     sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
     674             : 
     675       54472 :     sysscan->heap_rel = heapRelation;
     676       54472 :     sysscan->irel = indexRelation;
     677       54472 :     sysscan->slot = table_slot_create(heapRelation, NULL);
     678             : 
     679       54472 :     if (snapshot == NULL)
     680             :     {
     681        8248 :         Oid         relid = RelationGetRelid(heapRelation);
     682             : 
     683        8248 :         snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
     684        8248 :         sysscan->snapshot = snapshot;
     685             :     }
     686             :     else
     687             :     {
     688             :         /* Caller is responsible for any snapshot. */
     689       46224 :         sysscan->snapshot = NULL;
     690             :     }
     691             : 
     692       54472 :     idxkey = palloc_array(ScanKeyData, nkeys);
     693             : 
     694             :     /* Convert attribute numbers to be index column numbers. */
     695      105786 :     for (i = 0; i < nkeys; i++)
     696             :     {
     697             :         int         j;
     698             : 
     699       51314 :         memcpy(&idxkey[i], &key[i], sizeof(ScanKeyData));
     700             : 
     701       54544 :         for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
     702             :         {
     703       54544 :             if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
     704             :             {
     705       51314 :                 idxkey[i].sk_attno = j + 1;
     706       51314 :                 break;
     707             :             }
     708             :         }
     709       51314 :         if (j == IndexRelationGetNumberOfAttributes(indexRelation))
     710           0 :             elog(ERROR, "column is not in index");
     711             :     }
     712             : 
     713       54472 :     sysscan->iscan = index_beginscan(heapRelation, indexRelation,
     714             :                                      snapshot, nkeys, 0);
     715       54472 :     index_rescan(sysscan->iscan, idxkey, nkeys, NULL, 0);
     716       54472 :     sysscan->scan = NULL;
     717             : 
     718       54472 :     pfree(idxkey);
     719             : 
     720             :     /*
     721             :      * If CheckXidAlive is set then set a flag to indicate that system table
     722             :      * scan is in-progress.  See detailed comments in xact.c where these
     723             :      * variables are declared.
     724             :      */
     725       54472 :     if (TransactionIdIsValid(CheckXidAlive))
     726           2 :         bsysscan = true;
     727             : 
     728       54472 :     return sysscan;
     729             : }
     730             : 
     731             : /*
     732             :  * systable_getnext_ordered --- get next tuple in an ordered catalog scan
     733             :  */
     734             : HeapTuple
     735      218782 : systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
     736             : {
     737      218782 :     HeapTuple   htup = NULL;
     738             : 
     739             :     Assert(sysscan->irel);
     740      218782 :     if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
     741      165540 :         htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
     742             : 
     743             :     /* See notes in systable_getnext */
     744      218776 :     if (htup && sysscan->iscan->xs_recheck)
     745           0 :         elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
     746             : 
     747             :     /*
     748             :      * Handle the concurrent abort while fetching the catalog tuple during
     749             :      * logical streaming of a transaction.
     750             :      */
     751      218776 :     HandleConcurrentAbort();
     752             : 
     753      218776 :     return htup;
     754             : }
     755             : 
     756             : /*
     757             :  * systable_endscan_ordered --- close scan, release resources
     758             :  */
     759             : void
     760       54454 : systable_endscan_ordered(SysScanDesc sysscan)
     761             : {
     762       54454 :     if (sysscan->slot)
     763             :     {
     764       54454 :         ExecDropSingleTupleTableSlot(sysscan->slot);
     765       54454 :         sysscan->slot = NULL;
     766             :     }
     767             : 
     768             :     Assert(sysscan->irel);
     769       54454 :     index_endscan(sysscan->iscan);
     770       54454 :     if (sysscan->snapshot)
     771        8236 :         UnregisterSnapshot(sysscan->snapshot);
     772             : 
     773             :     /*
     774             :      * Reset the bsysscan flag at the end of the systable scan.  See detailed
     775             :      * comments in xact.c where these variables are declared.
     776             :      */
     777       54454 :     if (TransactionIdIsValid(CheckXidAlive))
     778           2 :         bsysscan = false;
     779             : 
     780       54454 :     pfree(sysscan);
     781       54454 : }
     782             : 
     783             : /*
     784             :  * systable_inplace_update_begin --- update a row "in place" (overwrite it)
     785             :  *
     786             :  * Overwriting violates both MVCC and transactional safety, so the uses of
     787             :  * this function in Postgres are extremely limited.  Nonetheless we find some
     788             :  * places to use it.  See README.tuplock section "Locking to write
     789             :  * inplace-updated tables" and later sections for expectations of readers and
     790             :  * writers of a table that gets inplace updates.  Standard flow:
     791             :  *
     792             :  * ... [any slow preparation not requiring oldtup] ...
     793             :  * systable_inplace_update_begin([...], &tup, &inplace_state);
     794             :  * if (!HeapTupleIsValid(tup))
     795             :  *  elog(ERROR, [...]);
     796             :  * ... [buffer is exclusive-locked; mutate "tup"] ...
     797             :  * if (dirty)
     798             :  *  systable_inplace_update_finish(inplace_state, tup);
     799             :  * else
     800             :  *  systable_inplace_update_cancel(inplace_state);
     801             :  *
     802             :  * The first several params duplicate the systable_beginscan() param list.
     803             :  * "oldtupcopy" is an output parameter, assigned NULL if the key ceases to
     804             :  * find a live tuple.  (In PROC_IN_VACUUM, that is a low-probability transient
     805             :  * condition.)  If "oldtupcopy" gets non-NULL, you must pass output parameter
     806             :  * "state" to systable_inplace_update_finish() or
     807             :  * systable_inplace_update_cancel().
     808             :  */
     809             : void
     810      259648 : systable_inplace_update_begin(Relation relation,
     811             :                               Oid indexId,
     812             :                               bool indexOK,
     813             :                               Snapshot snapshot,
     814             :                               int nkeys, const ScanKeyData *key,
     815             :                               HeapTuple *oldtupcopy,
     816             :                               void **state)
     817             : {
     818      259648 :     int         retries = 0;
     819             :     SysScanDesc scan;
     820             :     HeapTuple   oldtup;
     821             :     BufferHeapTupleTableSlot *bslot;
     822             : 
     823             :     /*
     824             :      * For now, we don't allow parallel updates.  Unlike a regular update,
     825             :      * this should never create a combo CID, so it might be possible to relax
     826             :      * this restriction, but not without more thought and testing.  It's not
     827             :      * clear that it would be useful, anyway.
     828             :      */
     829      259648 :     if (IsInParallelMode())
     830           0 :         ereport(ERROR,
     831             :                 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
     832             :                  errmsg("cannot update tuples during a parallel operation")));
     833             : 
     834             :     /*
     835             :      * Accept a snapshot argument, for symmetry, but this function advances
     836             :      * its snapshot as needed to reach the tail of the updated tuple chain.
     837             :      */
     838             :     Assert(snapshot == NULL);
     839             : 
     840             :     Assert(IsInplaceUpdateRelation(relation) || !IsSystemRelation(relation));
     841             : 
     842             :     /* Loop for an exclusive-locked buffer of a non-updated tuple. */
     843             :     do
     844             :     {
     845             :         TupleTableSlot *slot;
     846             : 
     847      259686 :         CHECK_FOR_INTERRUPTS();
     848             : 
     849             :         /*
     850             :          * Processes issuing heap_update (e.g. GRANT) at maximum speed could
     851             :          * drive us to this error.  A hostile table owner has stronger ways to
     852             :          * damage their own table, so that's minor.
     853             :          */
     854      259686 :         if (retries++ > 10000)
     855           0 :             elog(ERROR, "giving up after too many tries to overwrite row");
     856             : 
     857      259686 :         INJECTION_POINT("inplace-before-pin");
     858      259686 :         scan = systable_beginscan(relation, indexId, indexOK, snapshot,
     859      259686 :                                   nkeys, unconstify(ScanKeyData *, key));
     860      259684 :         oldtup = systable_getnext(scan);
     861      259684 :         if (!HeapTupleIsValid(oldtup))
     862             :         {
     863           0 :             systable_endscan(scan);
     864           0 :             *oldtupcopy = NULL;
     865           0 :             return;
     866             :         }
     867             : 
     868      259684 :         slot = scan->slot;
     869             :         Assert(TTS_IS_BUFFERTUPLE(slot));
     870      259684 :         bslot = (BufferHeapTupleTableSlot *) slot;
     871      259684 :     } while (!heap_inplace_lock(scan->heap_rel,
     872             :                                 bslot->base.tuple, bslot->buffer,
     873      259684 :                                 (void (*) (void *)) systable_endscan, scan));
     874             : 
     875      259646 :     *oldtupcopy = heap_copytuple(oldtup);
     876      259646 :     *state = scan;
     877             : }
     878             : 
     879             : /*
     880             :  * systable_inplace_update_finish --- second phase of inplace update
     881             :  *
     882             :  * The tuple cannot change size, and therefore its header fields and null
     883             :  * bitmap (if any) don't change either.
     884             :  */
     885             : void
     886      151216 : systable_inplace_update_finish(void *state, HeapTuple tuple)
     887             : {
     888      151216 :     SysScanDesc scan = (SysScanDesc) state;
     889      151216 :     Relation    relation = scan->heap_rel;
     890      151216 :     TupleTableSlot *slot = scan->slot;
     891      151216 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     892      151216 :     HeapTuple   oldtup = bslot->base.tuple;
     893      151216 :     Buffer      buffer = bslot->buffer;
     894             : 
     895      151216 :     heap_inplace_update_and_unlock(relation, oldtup, tuple, buffer);
     896      151216 :     systable_endscan(scan);
     897      151216 : }
     898             : 
     899             : /*
     900             :  * systable_inplace_update_cancel --- abandon inplace update
     901             :  *
     902             :  * This is an alternative to making a no-op update.
     903             :  */
     904             : void
     905      108430 : systable_inplace_update_cancel(void *state)
     906             : {
     907      108430 :     SysScanDesc scan = (SysScanDesc) state;
     908      108430 :     Relation    relation = scan->heap_rel;
     909      108430 :     TupleTableSlot *slot = scan->slot;
     910      108430 :     BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot;
     911      108430 :     HeapTuple   oldtup = bslot->base.tuple;
     912      108430 :     Buffer      buffer = bslot->buffer;
     913             : 
     914      108430 :     heap_inplace_unlock(relation, oldtup, buffer);
     915      108430 :     systable_endscan(scan);
     916      108430 : }

Generated by: LCOV version 1.14