LCOV - code coverage report
Current view: top level - src/backend/utils/time - snapmgr.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 492 554 88.8 %
Date: 2024-04-26 04:11:37 Functions: 50 50 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * snapmgr.c
       4             :  *      PostgreSQL snapshot manager
       5             :  *
       6             :  * We keep track of snapshots in two ways: those "registered" by resowner.c,
       7             :  * and the "active snapshot" stack.  All snapshots in either of them live in
       8             :  * persistent memory.  When a snapshot is no longer in any of these lists
       9             :  * (tracked by separate refcounts on each snapshot), its memory can be freed.
      10             :  *
      11             :  * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
      12             :  * regd_count and list it in RegisteredSnapshots, but this reference is not
      13             :  * tracked by a resource owner. We used to use the TopTransactionResourceOwner
      14             :  * to track this snapshot reference, but that introduces logical circularity
      15             :  * and thus makes it impossible to clean up in a sane fashion.  It's better to
      16             :  * handle this reference as an internally-tracked registration, so that this
      17             :  * module is entirely lower-level than ResourceOwners.
      18             :  *
      19             :  * Likewise, any snapshots that have been exported by pg_export_snapshot
      20             :  * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
      21             :  * tracked by any resource owner.
      22             :  *
      23             :  * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
      24             :  * is valid, but is not tracked by any resource owner.
      25             :  *
      26             :  * The same is true for historic snapshots used during logical decoding,
      27             :  * their lifetime is managed separately (as they live longer than one xact.c
      28             :  * transaction).
      29             :  *
      30             :  * These arrangements let us reset MyProc->xmin when there are no snapshots
      31             :  * referenced by this transaction, and advance it when the one with oldest
      32             :  * Xmin is no longer referenced.  For simplicity however, only registered
      33             :  * snapshots not active snapshots participate in tracking which one is oldest;
      34             :  * we don't try to change MyProc->xmin except when the active-snapshot
      35             :  * stack is empty.
      36             :  *
      37             :  *
      38             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      39             :  * Portions Copyright (c) 1994, Regents of the University of California
      40             :  *
      41             :  * IDENTIFICATION
      42             :  *    src/backend/utils/time/snapmgr.c
      43             :  *
      44             :  *-------------------------------------------------------------------------
      45             :  */
      46             : #include "postgres.h"
      47             : 
      48             : #include <sys/stat.h>
      49             : #include <unistd.h>
      50             : 
      51             : #include "access/subtrans.h"
      52             : #include "access/transam.h"
      53             : #include "access/xact.h"
      54             : #include "datatype/timestamp.h"
      55             : #include "lib/pairingheap.h"
      56             : #include "miscadmin.h"
      57             : #include "port/pg_lfind.h"
      58             : #include "storage/fd.h"
      59             : #include "storage/predicate.h"
      60             : #include "storage/proc.h"
      61             : #include "storage/procarray.h"
      62             : #include "utils/builtins.h"
      63             : #include "utils/memutils.h"
      64             : #include "utils/resowner.h"
      65             : #include "utils/snapmgr.h"
      66             : #include "utils/syscache.h"
      67             : 
      68             : 
      69             : /*
      70             :  * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
      71             :  * mode, and to the latest one taken in a read-committed transaction.
      72             :  * SecondarySnapshot is a snapshot that's always up-to-date as of the current
      73             :  * instant, even in transaction-snapshot mode.  It should only be used for
      74             :  * special-purpose code (say, RI checking.)  CatalogSnapshot points to an
      75             :  * MVCC snapshot intended to be used for catalog scans; we must invalidate it
      76             :  * whenever a system catalog change occurs.
      77             :  *
      78             :  * These SnapshotData structs are static to simplify memory allocation
      79             :  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
      80             :  */
      81             : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
      82             : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
      83             : SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
      84             : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
      85             : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
      86             : 
      87             : /* Pointers to valid snapshots */
      88             : static Snapshot CurrentSnapshot = NULL;
      89             : static Snapshot SecondarySnapshot = NULL;
      90             : static Snapshot CatalogSnapshot = NULL;
      91             : static Snapshot HistoricSnapshot = NULL;
      92             : 
      93             : /*
      94             :  * These are updated by GetSnapshotData.  We initialize them this way
      95             :  * for the convenience of TransactionIdIsInProgress: even in bootstrap
      96             :  * mode, we don't want it to say that BootstrapTransactionId is in progress.
      97             :  */
      98             : TransactionId TransactionXmin = FirstNormalTransactionId;
      99             : TransactionId RecentXmin = FirstNormalTransactionId;
     100             : 
     101             : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
     102             : static HTAB *tuplecid_data = NULL;
     103             : 
     104             : /*
     105             :  * Elements of the active snapshot stack.
     106             :  *
     107             :  * Each element here accounts for exactly one active_count on SnapshotData.
     108             :  *
     109             :  * NB: the code assumes that elements in this list are in non-increasing
     110             :  * order of as_level; also, the list must be NULL-terminated.
     111             :  */
     112             : typedef struct ActiveSnapshotElt
     113             : {
     114             :     Snapshot    as_snap;
     115             :     int         as_level;
     116             :     struct ActiveSnapshotElt *as_next;
     117             : } ActiveSnapshotElt;
     118             : 
     119             : /* Top of the stack of active snapshots */
     120             : static ActiveSnapshotElt *ActiveSnapshot = NULL;
     121             : 
     122             : /* Bottom of the stack of active snapshots */
     123             : static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
     124             : 
     125             : /*
     126             :  * Currently registered Snapshots.  Ordered in a heap by xmin, so that we can
     127             :  * quickly find the one with lowest xmin, to advance our MyProc->xmin.
     128             :  */
     129             : static int  xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
     130             :                      void *arg);
     131             : 
     132             : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
     133             : 
     134             : /* first GetTransactionSnapshot call in a transaction? */
     135             : bool        FirstSnapshotSet = false;
     136             : 
     137             : /*
     138             :  * Remember the serializable transaction snapshot, if any.  We cannot trust
     139             :  * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
     140             :  * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
     141             :  */
     142             : static Snapshot FirstXactSnapshot = NULL;
     143             : 
     144             : /* Define pathname of exported-snapshot files */
     145             : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
     146             : 
     147             : /* Structure holding info about exported snapshot. */
     148             : typedef struct ExportedSnapshot
     149             : {
     150             :     char       *snapfile;
     151             :     Snapshot    snapshot;
     152             : } ExportedSnapshot;
     153             : 
     154             : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
     155             : static List *exportedSnapshots = NIL;
     156             : 
     157             : /* Prototypes for local functions */
     158             : static Snapshot CopySnapshot(Snapshot snapshot);
     159             : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
     160             : static void FreeSnapshot(Snapshot snapshot);
     161             : static void SnapshotResetXmin(void);
     162             : 
     163             : /* ResourceOwner callbacks to track snapshot references */
     164             : static void ResOwnerReleaseSnapshot(Datum res);
     165             : 
     166             : static const ResourceOwnerDesc snapshot_resowner_desc =
     167             : {
     168             :     .name = "snapshot reference",
     169             :     .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
     170             :     .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
     171             :     .ReleaseResource = ResOwnerReleaseSnapshot,
     172             :     .DebugPrint = NULL          /* the default message is fine */
     173             : };
     174             : 
     175             : /* Convenience wrappers over ResourceOwnerRemember/Forget */
     176             : static inline void
     177    11223666 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
     178             : {
     179    11223666 :     ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
     180    11223666 : }
     181             : static inline void
     182    11168872 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
     183             : {
     184    11168872 :     ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
     185    11168872 : }
     186             : 
     187             : /*
     188             :  * Snapshot fields to be serialized.
     189             :  *
     190             :  * Only these fields need to be sent to the cooperating backend; the
     191             :  * remaining ones can (and must) be set by the receiver upon restore.
     192             :  */
     193             : typedef struct SerializedSnapshotData
     194             : {
     195             :     TransactionId xmin;
     196             :     TransactionId xmax;
     197             :     uint32      xcnt;
     198             :     int32       subxcnt;
     199             :     bool        suboverflowed;
     200             :     bool        takenDuringRecovery;
     201             :     CommandId   curcid;
     202             :     TimestampTz whenTaken;
     203             :     XLogRecPtr  lsn;
     204             : } SerializedSnapshotData;
     205             : 
     206             : /*
     207             :  * GetTransactionSnapshot
     208             :  *      Get the appropriate snapshot for a new query in a transaction.
     209             :  *
     210             :  * Note that the return value may point at static storage that will be modified
     211             :  * by future calls and by CommandCounterIncrement().  Callers should call
     212             :  * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
     213             :  * used very long.
     214             :  */
     215             : Snapshot
     216     1609708 : GetTransactionSnapshot(void)
     217             : {
     218             :     /*
     219             :      * Return historic snapshot if doing logical decoding. We'll never need a
     220             :      * non-historic transaction snapshot in this (sub-)transaction, so there's
     221             :      * no need to be careful to set one up for later calls to
     222             :      * GetTransactionSnapshot().
     223             :      */
     224     1609708 :     if (HistoricSnapshotActive())
     225             :     {
     226             :         Assert(!FirstSnapshotSet);
     227           0 :         return HistoricSnapshot;
     228             :     }
     229             : 
     230             :     /* First call in transaction? */
     231     1609708 :     if (!FirstSnapshotSet)
     232             :     {
     233             :         /*
     234             :          * Don't allow catalog snapshot to be older than xact snapshot.  Must
     235             :          * do this first to allow the empty-heap Assert to succeed.
     236             :          */
     237      517494 :         InvalidateCatalogSnapshot();
     238             : 
     239             :         Assert(pairingheap_is_empty(&RegisteredSnapshots));
     240             :         Assert(FirstXactSnapshot == NULL);
     241             : 
     242      517494 :         if (IsInParallelMode())
     243           0 :             elog(ERROR,
     244             :                  "cannot take query snapshot during a parallel operation");
     245             : 
     246             :         /*
     247             :          * In transaction-snapshot mode, the first snapshot must live until
     248             :          * end of xact regardless of what the caller does with it, so we must
     249             :          * make a copy of it rather than returning CurrentSnapshotData
     250             :          * directly.  Furthermore, if we're running in serializable mode,
     251             :          * predicate.c needs to wrap the snapshot fetch in its own processing.
     252             :          */
     253      517494 :         if (IsolationUsesXactSnapshot())
     254             :         {
     255             :             /* First, create the snapshot in CurrentSnapshotData */
     256        5376 :             if (IsolationIsSerializable())
     257        3304 :                 CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
     258             :             else
     259        2072 :                 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     260             :             /* Make a saved copy */
     261        5376 :             CurrentSnapshot = CopySnapshot(CurrentSnapshot);
     262        5376 :             FirstXactSnapshot = CurrentSnapshot;
     263             :             /* Mark it as "registered" in FirstXactSnapshot */
     264        5376 :             FirstXactSnapshot->regd_count++;
     265        5376 :             pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
     266             :         }
     267             :         else
     268      512118 :             CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     269             : 
     270      517494 :         FirstSnapshotSet = true;
     271      517494 :         return CurrentSnapshot;
     272             :     }
     273             : 
     274     1092214 :     if (IsolationUsesXactSnapshot())
     275      196494 :         return CurrentSnapshot;
     276             : 
     277             :     /* Don't allow catalog snapshot to be older than xact snapshot. */
     278      895720 :     InvalidateCatalogSnapshot();
     279             : 
     280      895720 :     CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     281             : 
     282      895720 :     return CurrentSnapshot;
     283             : }
     284             : 
     285             : /*
     286             :  * GetLatestSnapshot
     287             :  *      Get a snapshot that is up-to-date as of the current instant,
     288             :  *      even if we are executing in transaction-snapshot mode.
     289             :  */
     290             : Snapshot
     291      296506 : GetLatestSnapshot(void)
     292             : {
     293             :     /*
     294             :      * We might be able to relax this, but nothing that could otherwise work
     295             :      * needs it.
     296             :      */
     297      296506 :     if (IsInParallelMode())
     298           0 :         elog(ERROR,
     299             :              "cannot update SecondarySnapshot during a parallel operation");
     300             : 
     301             :     /*
     302             :      * So far there are no cases requiring support for GetLatestSnapshot()
     303             :      * during logical decoding, but it wouldn't be hard to add if required.
     304             :      */
     305             :     Assert(!HistoricSnapshotActive());
     306             : 
     307             :     /* If first call in transaction, go ahead and set the xact snapshot */
     308      296506 :     if (!FirstSnapshotSet)
     309         100 :         return GetTransactionSnapshot();
     310             : 
     311      296406 :     SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
     312             : 
     313      296406 :     return SecondarySnapshot;
     314             : }
     315             : 
     316             : /*
     317             :  * GetOldestSnapshot
     318             :  *
     319             :  *      Get the transaction's oldest known snapshot, as judged by the LSN.
     320             :  *      Will return NULL if there are no active or registered snapshots.
     321             :  */
     322             : Snapshot
     323       42774 : GetOldestSnapshot(void)
     324             : {
     325       42774 :     Snapshot    OldestRegisteredSnapshot = NULL;
     326       42774 :     XLogRecPtr  RegisteredLSN = InvalidXLogRecPtr;
     327             : 
     328       42774 :     if (!pairingheap_is_empty(&RegisteredSnapshots))
     329             :     {
     330       42504 :         OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
     331             :                                                          pairingheap_first(&RegisteredSnapshots));
     332       42504 :         RegisteredLSN = OldestRegisteredSnapshot->lsn;
     333             :     }
     334             : 
     335       42774 :     if (OldestActiveSnapshot != NULL)
     336             :     {
     337       42764 :         XLogRecPtr  ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
     338             : 
     339       42764 :         if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
     340       42764 :             return OldestActiveSnapshot->as_snap;
     341             :     }
     342             : 
     343          10 :     return OldestRegisteredSnapshot;
     344             : }
     345             : 
     346             : /*
     347             :  * GetCatalogSnapshot
     348             :  *      Get a snapshot that is sufficiently up-to-date for scan of the
     349             :  *      system catalog with the specified OID.
     350             :  */
     351             : Snapshot
     352    10132136 : GetCatalogSnapshot(Oid relid)
     353             : {
     354             :     /*
     355             :      * Return historic snapshot while we're doing logical decoding, so we can
     356             :      * see the appropriate state of the catalog.
     357             :      *
     358             :      * This is the primary reason for needing to reset the system caches after
     359             :      * finishing decoding.
     360             :      */
     361    10132136 :     if (HistoricSnapshotActive())
     362       24162 :         return HistoricSnapshot;
     363             : 
     364    10107974 :     return GetNonHistoricCatalogSnapshot(relid);
     365             : }
     366             : 
     367             : /*
     368             :  * GetNonHistoricCatalogSnapshot
     369             :  *      Get a snapshot that is sufficiently up-to-date for scan of the system
     370             :  *      catalog with the specified OID, even while historic snapshots are set
     371             :  *      up.
     372             :  */
     373             : Snapshot
     374    10110594 : GetNonHistoricCatalogSnapshot(Oid relid)
     375             : {
     376             :     /*
     377             :      * If the caller is trying to scan a relation that has no syscache, no
     378             :      * catcache invalidations will be sent when it is updated.  For a few key
     379             :      * relations, snapshot invalidations are sent instead.  If we're trying to
     380             :      * scan a relation for which neither catcache nor snapshot invalidations
     381             :      * are sent, we must refresh the snapshot every time.
     382             :      */
     383    10110594 :     if (CatalogSnapshot &&
     384     9038854 :         !RelationInvalidatesSnapshotsOnly(relid) &&
     385     7702334 :         !RelationHasSysCache(relid))
     386      428212 :         InvalidateCatalogSnapshot();
     387             : 
     388    10110594 :     if (CatalogSnapshot == NULL)
     389             :     {
     390             :         /* Get new snapshot. */
     391     1499952 :         CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
     392             : 
     393             :         /*
     394             :          * Make sure the catalog snapshot will be accounted for in decisions
     395             :          * about advancing PGPROC->xmin.  We could apply RegisterSnapshot, but
     396             :          * that would result in making a physical copy, which is overkill; and
     397             :          * it would also create a dependency on some resource owner, which we
     398             :          * do not want for reasons explained at the head of this file. Instead
     399             :          * just shove the CatalogSnapshot into the pairing heap manually. This
     400             :          * has to be reversed in InvalidateCatalogSnapshot, of course.
     401             :          *
     402             :          * NB: it had better be impossible for this to throw error, since the
     403             :          * CatalogSnapshot pointer is already valid.
     404             :          */
     405     1499952 :         pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
     406             :     }
     407             : 
     408    10110594 :     return CatalogSnapshot;
     409             : }
     410             : 
     411             : /*
     412             :  * InvalidateCatalogSnapshot
     413             :  *      Mark the current catalog snapshot, if any, as invalid
     414             :  *
     415             :  * We could change this API to allow the caller to provide more fine-grained
     416             :  * invalidation details, so that a change to relation A wouldn't prevent us
     417             :  * from using our cached snapshot to scan relation B, but so far there's no
     418             :  * evidence that the CPU cycles we spent tracking such fine details would be
     419             :  * well-spent.
     420             :  */
     421             : void
     422    22583572 : InvalidateCatalogSnapshot(void)
     423             : {
     424    22583572 :     if (CatalogSnapshot)
     425             :     {
     426     1499952 :         pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
     427     1499952 :         CatalogSnapshot = NULL;
     428     1499952 :         SnapshotResetXmin();
     429             :     }
     430    22583572 : }
     431             : 
     432             : /*
     433             :  * InvalidateCatalogSnapshotConditionally
     434             :  *      Drop catalog snapshot if it's the only one we have
     435             :  *
     436             :  * This is called when we are about to wait for client input, so we don't
     437             :  * want to continue holding the catalog snapshot if it might mean that the
     438             :  * global xmin horizon can't advance.  However, if there are other snapshots
     439             :  * still active or registered, the catalog snapshot isn't likely to be the
     440             :  * oldest one, so we might as well keep it.
     441             :  */
     442             : void
     443      701398 : InvalidateCatalogSnapshotConditionally(void)
     444             : {
     445      701398 :     if (CatalogSnapshot &&
     446       90262 :         ActiveSnapshot == NULL &&
     447       88630 :         pairingheap_is_singular(&RegisteredSnapshots))
     448       16096 :         InvalidateCatalogSnapshot();
     449      701398 : }
     450             : 
     451             : /*
     452             :  * SnapshotSetCommandId
     453             :  *      Propagate CommandCounterIncrement into the static snapshots, if set
     454             :  */
     455             : void
     456     1008536 : SnapshotSetCommandId(CommandId curcid)
     457             : {
     458     1008536 :     if (!FirstSnapshotSet)
     459       17356 :         return;
     460             : 
     461      991180 :     if (CurrentSnapshot)
     462      991180 :         CurrentSnapshot->curcid = curcid;
     463      991180 :     if (SecondarySnapshot)
     464      160436 :         SecondarySnapshot->curcid = curcid;
     465             :     /* Should we do the same with CatalogSnapshot? */
     466             : }
     467             : 
     468             : /*
     469             :  * SetTransactionSnapshot
     470             :  *      Set the transaction's snapshot from an imported MVCC snapshot.
     471             :  *
     472             :  * Note that this is very closely tied to GetTransactionSnapshot --- it
     473             :  * must take care of all the same considerations as the first-snapshot case
     474             :  * in GetTransactionSnapshot.
     475             :  */
     476             : static void
     477        3016 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
     478             :                        int sourcepid, PGPROC *sourceproc)
     479             : {
     480             :     /* Caller should have checked this already */
     481             :     Assert(!FirstSnapshotSet);
     482             : 
     483             :     /* Better do this to ensure following Assert succeeds. */
     484        3016 :     InvalidateCatalogSnapshot();
     485             : 
     486             :     Assert(pairingheap_is_empty(&RegisteredSnapshots));
     487             :     Assert(FirstXactSnapshot == NULL);
     488             :     Assert(!HistoricSnapshotActive());
     489             : 
     490             :     /*
     491             :      * Even though we are not going to use the snapshot it computes, we must
     492             :      * call GetSnapshotData, for two reasons: (1) to be sure that
     493             :      * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
     494             :      * the state for GlobalVis*.
     495             :      */
     496        3016 :     CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     497             : 
     498             :     /*
     499             :      * Now copy appropriate fields from the source snapshot.
     500             :      */
     501        3016 :     CurrentSnapshot->xmin = sourcesnap->xmin;
     502        3016 :     CurrentSnapshot->xmax = sourcesnap->xmax;
     503        3016 :     CurrentSnapshot->xcnt = sourcesnap->xcnt;
     504             :     Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
     505        3016 :     if (sourcesnap->xcnt > 0)
     506         490 :         memcpy(CurrentSnapshot->xip, sourcesnap->xip,
     507         490 :                sourcesnap->xcnt * sizeof(TransactionId));
     508        3016 :     CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
     509             :     Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
     510        3016 :     if (sourcesnap->subxcnt > 0)
     511           4 :         memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
     512           4 :                sourcesnap->subxcnt * sizeof(TransactionId));
     513        3016 :     CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
     514        3016 :     CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
     515             :     /* NB: curcid should NOT be copied, it's a local matter */
     516             : 
     517        3016 :     CurrentSnapshot->snapXactCompletionCount = 0;
     518             : 
     519             :     /*
     520             :      * Now we have to fix what GetSnapshotData did with MyProc->xmin and
     521             :      * TransactionXmin.  There is a race condition: to make sure we are not
     522             :      * causing the global xmin to go backwards, we have to test that the
     523             :      * source transaction is still running, and that has to be done
     524             :      * atomically. So let procarray.c do it.
     525             :      *
     526             :      * Note: in serializable mode, predicate.c will do this a second time. It
     527             :      * doesn't seem worth contorting the logic here to avoid two calls,
     528             :      * especially since it's not clear that predicate.c *must* do this.
     529             :      */
     530        3016 :     if (sourceproc != NULL)
     531             :     {
     532        2984 :         if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
     533           0 :             ereport(ERROR,
     534             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     535             :                      errmsg("could not import the requested snapshot"),
     536             :                      errdetail("The source transaction is not running anymore.")));
     537             :     }
     538          32 :     else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
     539           0 :         ereport(ERROR,
     540             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     541             :                  errmsg("could not import the requested snapshot"),
     542             :                  errdetail("The source process with PID %d is not running anymore.",
     543             :                            sourcepid)));
     544             : 
     545             :     /*
     546             :      * In transaction-snapshot mode, the first snapshot must live until end of
     547             :      * xact, so we must make a copy of it.  Furthermore, if we're running in
     548             :      * serializable mode, predicate.c needs to do its own processing.
     549             :      */
     550        3016 :     if (IsolationUsesXactSnapshot())
     551             :     {
     552         414 :         if (IsolationIsSerializable())
     553          26 :             SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
     554             :                                                sourcepid);
     555             :         /* Make a saved copy */
     556         414 :         CurrentSnapshot = CopySnapshot(CurrentSnapshot);
     557         414 :         FirstXactSnapshot = CurrentSnapshot;
     558             :         /* Mark it as "registered" in FirstXactSnapshot */
     559         414 :         FirstXactSnapshot->regd_count++;
     560         414 :         pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
     561             :     }
     562             : 
     563        3016 :     FirstSnapshotSet = true;
     564        3016 : }
     565             : 
     566             : /*
     567             :  * CopySnapshot
     568             :  *      Copy the given snapshot.
     569             :  *
     570             :  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
     571             :  * to 0.  The returned snapshot has the copied flag set.
     572             :  */
     573             : static Snapshot
     574    11666818 : CopySnapshot(Snapshot snapshot)
     575             : {
     576             :     Snapshot    newsnap;
     577             :     Size        subxipoff;
     578             :     Size        size;
     579             : 
     580             :     Assert(snapshot != InvalidSnapshot);
     581             : 
     582             :     /* We allocate any XID arrays needed in the same palloc block. */
     583    11666818 :     size = subxipoff = sizeof(SnapshotData) +
     584    11666818 :         snapshot->xcnt * sizeof(TransactionId);
     585    11666818 :     if (snapshot->subxcnt > 0)
     586      116234 :         size += snapshot->subxcnt * sizeof(TransactionId);
     587             : 
     588    11666818 :     newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
     589    11666818 :     memcpy(newsnap, snapshot, sizeof(SnapshotData));
     590             : 
     591    11666818 :     newsnap->regd_count = 0;
     592    11666818 :     newsnap->active_count = 0;
     593    11666818 :     newsnap->copied = true;
     594    11666818 :     newsnap->snapXactCompletionCount = 0;
     595             : 
     596             :     /* setup XID array */
     597    11666818 :     if (snapshot->xcnt > 0)
     598             :     {
     599     2366668 :         newsnap->xip = (TransactionId *) (newsnap + 1);
     600     2366668 :         memcpy(newsnap->xip, snapshot->xip,
     601     2366668 :                snapshot->xcnt * sizeof(TransactionId));
     602             :     }
     603             :     else
     604     9300150 :         newsnap->xip = NULL;
     605             : 
     606             :     /*
     607             :      * Setup subXID array. Don't bother to copy it if it had overflowed,
     608             :      * though, because it's not used anywhere in that case. Except if it's a
     609             :      * snapshot taken during recovery; all the top-level XIDs are in subxip as
     610             :      * well in that case, so we mustn't lose them.
     611             :      */
     612    11666818 :     if (snapshot->subxcnt > 0 &&
     613      116234 :         (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
     614             :     {
     615      116234 :         newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
     616      116234 :         memcpy(newsnap->subxip, snapshot->subxip,
     617      116234 :                snapshot->subxcnt * sizeof(TransactionId));
     618             :     }
     619             :     else
     620    11550584 :         newsnap->subxip = NULL;
     621             : 
     622    11666818 :     return newsnap;
     623             : }
     624             : 
     625             : /*
     626             :  * FreeSnapshot
     627             :  *      Free the memory associated with a snapshot.
     628             :  */
     629             : static void
     630    11621182 : FreeSnapshot(Snapshot snapshot)
     631             : {
     632             :     Assert(snapshot->regd_count == 0);
     633             :     Assert(snapshot->active_count == 0);
     634             :     Assert(snapshot->copied);
     635             : 
     636    11621182 :     pfree(snapshot);
     637    11621182 : }
     638             : 
     639             : /*
     640             :  * PushActiveSnapshot
     641             :  *      Set the given snapshot as the current active snapshot
     642             :  *
     643             :  * If the passed snapshot is a statically-allocated one, or it is possibly
     644             :  * subject to a future command counter update, create a new long-lived copy
     645             :  * with active refcount=1.  Otherwise, only increment the refcount.
     646             :  */
     647             : void
     648     1731676 : PushActiveSnapshot(Snapshot snapshot)
     649             : {
     650     1731676 :     PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
     651     1731676 : }
     652             : 
     653             : /*
     654             :  * PushActiveSnapshotWithLevel
     655             :  *      Set the given snapshot as the current active snapshot
     656             :  *
     657             :  * Same as PushActiveSnapshot except that caller can specify the
     658             :  * transaction nesting level that "owns" the snapshot.  This level
     659             :  * must not be deeper than the current top of the snapshot stack.
     660             :  */
     661             : void
     662     1986296 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
     663             : {
     664             :     ActiveSnapshotElt *newactive;
     665             : 
     666             :     Assert(snapshot != InvalidSnapshot);
     667             :     Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
     668             : 
     669     1986296 :     newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
     670             : 
     671             :     /*
     672             :      * Checking SecondarySnapshot is probably useless here, but it seems
     673             :      * better to be sure.
     674             :      */
     675     1986296 :     if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
     676      410292 :         !snapshot->copied)
     677     1576004 :         newactive->as_snap = CopySnapshot(snapshot);
     678             :     else
     679      410292 :         newactive->as_snap = snapshot;
     680             : 
     681     1986296 :     newactive->as_next = ActiveSnapshot;
     682     1986296 :     newactive->as_level = snap_level;
     683             : 
     684     1986296 :     newactive->as_snap->active_count++;
     685             : 
     686     1986296 :     ActiveSnapshot = newactive;
     687     1986296 :     if (OldestActiveSnapshot == NULL)
     688     1496910 :         OldestActiveSnapshot = ActiveSnapshot;
     689     1986296 : }
     690             : 
     691             : /*
     692             :  * PushCopiedSnapshot
     693             :  *      As above, except forcibly copy the presented snapshot.
     694             :  *
     695             :  * This should be used when the ActiveSnapshot has to be modifiable, for
     696             :  * example if the caller intends to call UpdateActiveSnapshotCommandId.
     697             :  * The new snapshot will be released when popped from the stack.
     698             :  */
     699             : void
     700      112956 : PushCopiedSnapshot(Snapshot snapshot)
     701             : {
     702      112956 :     PushActiveSnapshot(CopySnapshot(snapshot));
     703      112956 : }
     704             : 
     705             : /*
     706             :  * UpdateActiveSnapshotCommandId
     707             :  *
     708             :  * Update the current CID of the active snapshot.  This can only be applied
     709             :  * to a snapshot that is not referenced elsewhere.
     710             :  */
     711             : void
     712       97534 : UpdateActiveSnapshotCommandId(void)
     713             : {
     714             :     CommandId   save_curcid,
     715             :                 curcid;
     716             : 
     717             :     Assert(ActiveSnapshot != NULL);
     718             :     Assert(ActiveSnapshot->as_snap->active_count == 1);
     719             :     Assert(ActiveSnapshot->as_snap->regd_count == 0);
     720             : 
     721             :     /*
     722             :      * Don't allow modification of the active snapshot during parallel
     723             :      * operation.  We share the snapshot to worker backends at the beginning
     724             :      * of parallel operation, so any change to the snapshot can lead to
     725             :      * inconsistencies.  We have other defenses against
     726             :      * CommandCounterIncrement, but there are a few places that call this
     727             :      * directly, so we put an additional guard here.
     728             :      */
     729       97534 :     save_curcid = ActiveSnapshot->as_snap->curcid;
     730       97534 :     curcid = GetCurrentCommandId(false);
     731       97534 :     if (IsInParallelMode() && save_curcid != curcid)
     732           0 :         elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
     733       97534 :     ActiveSnapshot->as_snap->curcid = curcid;
     734       97534 : }
     735             : 
     736             : /*
     737             :  * PopActiveSnapshot
     738             :  *
     739             :  * Remove the topmost snapshot from the active snapshot stack, decrementing the
     740             :  * reference count, and free it if this was the last reference.
     741             :  */
     742             : void
     743     1934216 : PopActiveSnapshot(void)
     744             : {
     745             :     ActiveSnapshotElt *newstack;
     746             : 
     747     1934216 :     newstack = ActiveSnapshot->as_next;
     748             : 
     749             :     Assert(ActiveSnapshot->as_snap->active_count > 0);
     750             : 
     751     1934216 :     ActiveSnapshot->as_snap->active_count--;
     752             : 
     753     1934216 :     if (ActiveSnapshot->as_snap->active_count == 0 &&
     754     1908748 :         ActiveSnapshot->as_snap->regd_count == 0)
     755     1409892 :         FreeSnapshot(ActiveSnapshot->as_snap);
     756             : 
     757     1934216 :     pfree(ActiveSnapshot);
     758     1934216 :     ActiveSnapshot = newstack;
     759     1934216 :     if (ActiveSnapshot == NULL)
     760     1459110 :         OldestActiveSnapshot = NULL;
     761             : 
     762     1934216 :     SnapshotResetXmin();
     763     1934216 : }
     764             : 
     765             : /*
     766             :  * GetActiveSnapshot
     767             :  *      Return the topmost snapshot in the Active stack.
     768             :  */
     769             : Snapshot
     770      913898 : GetActiveSnapshot(void)
     771             : {
     772             :     Assert(ActiveSnapshot != NULL);
     773             : 
     774      913898 :     return ActiveSnapshot->as_snap;
     775             : }
     776             : 
     777             : /*
     778             :  * ActiveSnapshotSet
     779             :  *      Return whether there is at least one snapshot in the Active stack
     780             :  */
     781             : bool
     782      749182 : ActiveSnapshotSet(void)
     783             : {
     784      749182 :     return ActiveSnapshot != NULL;
     785             : }
     786             : 
     787             : /*
     788             :  * RegisterSnapshot
     789             :  *      Register a snapshot as being in use by the current resource owner
     790             :  *
     791             :  * If InvalidSnapshot is passed, it is not registered.
     792             :  */
     793             : Snapshot
     794    12448022 : RegisterSnapshot(Snapshot snapshot)
     795             : {
     796    12448022 :     if (snapshot == InvalidSnapshot)
     797     1224564 :         return InvalidSnapshot;
     798             : 
     799    11223458 :     return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
     800             : }
     801             : 
     802             : /*
     803             :  * RegisterSnapshotOnOwner
     804             :  *      As above, but use the specified resource owner
     805             :  */
     806             : Snapshot
     807    11223666 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
     808             : {
     809             :     Snapshot    snap;
     810             : 
     811    11223666 :     if (snapshot == InvalidSnapshot)
     812           0 :         return InvalidSnapshot;
     813             : 
     814             :     /* Static snapshot?  Create a persistent copy */
     815    11223666 :     snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
     816             : 
     817             :     /* and tell resowner.c about it */
     818    11223666 :     ResourceOwnerEnlarge(owner);
     819    11223666 :     snap->regd_count++;
     820    11223666 :     ResourceOwnerRememberSnapshot(owner, snap);
     821             : 
     822    11223666 :     if (snap->regd_count == 1)
     823    10580480 :         pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
     824             : 
     825    11223666 :     return snap;
     826             : }
     827             : 
     828             : /*
     829             :  * UnregisterSnapshot
     830             :  *
     831             :  * Decrement the reference count of a snapshot, remove the corresponding
     832             :  * reference from CurrentResourceOwner, and free the snapshot if no more
     833             :  * references remain.
     834             :  */
     835             : void
     836    12304044 : UnregisterSnapshot(Snapshot snapshot)
     837             : {
     838    12304044 :     if (snapshot == NULL)
     839     1171118 :         return;
     840             : 
     841    11132926 :     UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
     842             : }
     843             : 
     844             : /*
     845             :  * UnregisterSnapshotFromOwner
     846             :  *      As above, but use the specified resource owner
     847             :  */
     848             : void
     849    11168872 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
     850             : {
     851    11168872 :     if (snapshot == NULL)
     852           0 :         return;
     853             : 
     854    11168872 :     ResourceOwnerForgetSnapshot(owner, snapshot);
     855    11168872 :     UnregisterSnapshotNoOwner(snapshot);
     856             : }
     857             : 
     858             : static void
     859    11223666 : UnregisterSnapshotNoOwner(Snapshot snapshot)
     860             : {
     861             :     Assert(snapshot->regd_count > 0);
     862             :     Assert(!pairingheap_is_empty(&RegisteredSnapshots));
     863             : 
     864    11223666 :     snapshot->regd_count--;
     865    11223666 :     if (snapshot->regd_count == 0)
     866    10580480 :         pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
     867             : 
     868    11223666 :     if (snapshot->regd_count == 0 && snapshot->active_count == 0)
     869             :     {
     870    10205626 :         FreeSnapshot(snapshot);
     871    10205626 :         SnapshotResetXmin();
     872             :     }
     873    11223666 : }
     874             : 
     875             : /*
     876             :  * Comparison function for RegisteredSnapshots heap.  Snapshots are ordered
     877             :  * by xmin, so that the snapshot with smallest xmin is at the top.
     878             :  */
     879             : static int
     880    10436050 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
     881             : {
     882    10436050 :     const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
     883    10436050 :     const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
     884             : 
     885    10436050 :     if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
     886       99622 :         return 1;
     887    10336428 :     else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
     888       17024 :         return -1;
     889             :     else
     890    10319404 :         return 0;
     891             : }
     892             : 
     893             : /*
     894             :  * SnapshotResetXmin
     895             :  *
     896             :  * If there are no more snapshots, we can reset our PGPROC->xmin to
     897             :  * InvalidTransactionId. Note we can do this without locking because we assume
     898             :  * that storing an Xid is atomic.
     899             :  *
     900             :  * Even if there are some remaining snapshots, we may be able to advance our
     901             :  * PGPROC->xmin to some degree.  This typically happens when a portal is
     902             :  * dropped.  For efficiency, we only consider recomputing PGPROC->xmin when
     903             :  * the active snapshot stack is empty; this allows us not to need to track
     904             :  * which active snapshot is oldest.
     905             :  *
     906             :  * Note: it's tempting to use GetOldestSnapshot() here so that we can include
     907             :  * active snapshots in the calculation.  However, that compares by LSN not
     908             :  * xmin so it's not entirely clear that it's the same thing.  Also, we'd be
     909             :  * critically dependent on the assumption that the bottommost active snapshot
     910             :  * stack entry has the oldest xmin.  (Current uses of GetOldestSnapshot() are
     911             :  * not actually critical, but this would be.)
     912             :  */
     913             : static void
     914    13694952 : SnapshotResetXmin(void)
     915             : {
     916             :     Snapshot    minSnapshot;
     917             : 
     918    13694952 :     if (ActiveSnapshot != NULL)
     919     9980174 :         return;
     920             : 
     921     3714778 :     if (pairingheap_is_empty(&RegisteredSnapshots))
     922             :     {
     923     1262718 :         MyProc->xmin = InvalidTransactionId;
     924     1262718 :         return;
     925             :     }
     926             : 
     927     2452060 :     minSnapshot = pairingheap_container(SnapshotData, ph_node,
     928             :                                         pairingheap_first(&RegisteredSnapshots));
     929             : 
     930     2452060 :     if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
     931        7166 :         MyProc->xmin = minSnapshot->xmin;
     932             : }
     933             : 
     934             : /*
     935             :  * AtSubCommit_Snapshot
     936             :  */
     937             : void
     938        8992 : AtSubCommit_Snapshot(int level)
     939             : {
     940             :     ActiveSnapshotElt *active;
     941             : 
     942             :     /*
     943             :      * Relabel the active snapshots set in this subtransaction as though they
     944             :      * are owned by the parent subxact.
     945             :      */
     946        8992 :     for (active = ActiveSnapshot; active != NULL; active = active->as_next)
     947             :     {
     948        7230 :         if (active->as_level < level)
     949        7230 :             break;
     950           0 :         active->as_level = level - 1;
     951             :     }
     952        8992 : }
     953             : 
     954             : /*
     955             :  * AtSubAbort_Snapshot
     956             :  *      Clean up snapshots after a subtransaction abort
     957             :  */
     958             : void
     959        9150 : AtSubAbort_Snapshot(int level)
     960             : {
     961             :     /* Forget the active snapshots set by this subtransaction */
     962       14814 :     while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
     963             :     {
     964             :         ActiveSnapshotElt *next;
     965             : 
     966        5664 :         next = ActiveSnapshot->as_next;
     967             : 
     968             :         /*
     969             :          * Decrement the snapshot's active count.  If it's still registered or
     970             :          * marked as active by an outer subtransaction, we can't free it yet.
     971             :          */
     972             :         Assert(ActiveSnapshot->as_snap->active_count >= 1);
     973        5664 :         ActiveSnapshot->as_snap->active_count -= 1;
     974             : 
     975        5664 :         if (ActiveSnapshot->as_snap->active_count == 0 &&
     976        5664 :             ActiveSnapshot->as_snap->regd_count == 0)
     977        5664 :             FreeSnapshot(ActiveSnapshot->as_snap);
     978             : 
     979             :         /* and free the stack element */
     980        5664 :         pfree(ActiveSnapshot);
     981             : 
     982        5664 :         ActiveSnapshot = next;
     983        5664 :         if (ActiveSnapshot == NULL)
     984         242 :             OldestActiveSnapshot = NULL;
     985             :     }
     986             : 
     987        9150 :     SnapshotResetXmin();
     988        9150 : }
     989             : 
     990             : /*
     991             :  * AtEOXact_Snapshot
     992             :  *      Snapshot manager's cleanup function for end of transaction
     993             :  */
     994             : void
     995      566962 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
     996             : {
     997             :     /*
     998             :      * In transaction-snapshot mode we must release our privately-managed
     999             :      * reference to the transaction snapshot.  We must remove it from
    1000             :      * RegisteredSnapshots to keep the check below happy.  But we don't bother
    1001             :      * to do FreeSnapshot, for two reasons: the memory will go away with
    1002             :      * TopTransactionContext anyway, and if someone has left the snapshot
    1003             :      * stacked as active, we don't want the code below to be chasing through a
    1004             :      * dangling pointer.
    1005             :      */
    1006      566962 :     if (FirstXactSnapshot != NULL)
    1007             :     {
    1008             :         Assert(FirstXactSnapshot->regd_count > 0);
    1009             :         Assert(!pairingheap_is_empty(&RegisteredSnapshots));
    1010        5790 :         pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
    1011             :     }
    1012      566962 :     FirstXactSnapshot = NULL;
    1013             : 
    1014             :     /*
    1015             :      * If we exported any snapshots, clean them up.
    1016             :      */
    1017      566962 :     if (exportedSnapshots != NIL)
    1018             :     {
    1019             :         ListCell   *lc;
    1020             : 
    1021             :         /*
    1022             :          * Get rid of the files.  Unlink failure is only a WARNING because (1)
    1023             :          * it's too late to abort the transaction, and (2) leaving a leaked
    1024             :          * file around has little real consequence anyway.
    1025             :          *
    1026             :          * We also need to remove the snapshots from RegisteredSnapshots to
    1027             :          * prevent a warning below.
    1028             :          *
    1029             :          * As with the FirstXactSnapshot, we don't need to free resources of
    1030             :          * the snapshot itself as it will go away with the memory context.
    1031             :          */
    1032          32 :         foreach(lc, exportedSnapshots)
    1033             :         {
    1034          16 :             ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
    1035             : 
    1036          16 :             if (unlink(esnap->snapfile))
    1037           0 :                 elog(WARNING, "could not unlink file \"%s\": %m",
    1038             :                      esnap->snapfile);
    1039             : 
    1040          16 :             pairingheap_remove(&RegisteredSnapshots,
    1041          16 :                                &esnap->snapshot->ph_node);
    1042             :         }
    1043             : 
    1044          16 :         exportedSnapshots = NIL;
    1045             :     }
    1046             : 
    1047             :     /* Drop catalog snapshot if any */
    1048      566962 :     InvalidateCatalogSnapshot();
    1049             : 
    1050             :     /* On commit, complain about leftover snapshots */
    1051      566962 :     if (isCommit)
    1052             :     {
    1053             :         ActiveSnapshotElt *active;
    1054             : 
    1055      521776 :         if (!pairingheap_is_empty(&RegisteredSnapshots))
    1056           0 :             elog(WARNING, "registered snapshots seem to remain after cleanup");
    1057             : 
    1058             :         /* complain about unpopped active snapshots */
    1059      521776 :         for (active = ActiveSnapshot; active != NULL; active = active->as_next)
    1060           0 :             elog(WARNING, "snapshot %p still active", active);
    1061             :     }
    1062             : 
    1063             :     /*
    1064             :      * And reset our state.  We don't need to free the memory explicitly --
    1065             :      * it'll go away with TopTransactionContext.
    1066             :      */
    1067      566962 :     ActiveSnapshot = NULL;
    1068      566962 :     OldestActiveSnapshot = NULL;
    1069      566962 :     pairingheap_reset(&RegisteredSnapshots);
    1070             : 
    1071      566962 :     CurrentSnapshot = NULL;
    1072      566962 :     SecondarySnapshot = NULL;
    1073             : 
    1074      566962 :     FirstSnapshotSet = false;
    1075             : 
    1076             :     /*
    1077             :      * During normal commit processing, we call ProcArrayEndTransaction() to
    1078             :      * reset the MyProc->xmin. That call happens prior to the call to
    1079             :      * AtEOXact_Snapshot(), so we need not touch xmin here at all.
    1080             :      */
    1081      566962 :     if (resetXmin)
    1082       46008 :         SnapshotResetXmin();
    1083             : 
    1084             :     Assert(resetXmin || MyProc->xmin == 0);
    1085      566962 : }
    1086             : 
    1087             : 
    1088             : /*
    1089             :  * ExportSnapshot
    1090             :  *      Export the snapshot to a file so that other backends can import it.
    1091             :  *      Returns the token (the file name) that can be used to import this
    1092             :  *      snapshot.
    1093             :  */
    1094             : char *
    1095          16 : ExportSnapshot(Snapshot snapshot)
    1096             : {
    1097             :     TransactionId topXid;
    1098             :     TransactionId *children;
    1099             :     ExportedSnapshot *esnap;
    1100             :     int         nchildren;
    1101             :     int         addTopXid;
    1102             :     StringInfoData buf;
    1103             :     FILE       *f;
    1104             :     int         i;
    1105             :     MemoryContext oldcxt;
    1106             :     char        path[MAXPGPATH];
    1107             :     char        pathtmp[MAXPGPATH];
    1108             : 
    1109             :     /*
    1110             :      * It's tempting to call RequireTransactionBlock here, since it's not very
    1111             :      * useful to export a snapshot that will disappear immediately afterwards.
    1112             :      * However, we haven't got enough information to do that, since we don't
    1113             :      * know if we're at top level or not.  For example, we could be inside a
    1114             :      * plpgsql function that is going to fire off other transactions via
    1115             :      * dblink.  Rather than disallow perfectly legitimate usages, don't make a
    1116             :      * check.
    1117             :      *
    1118             :      * Also note that we don't make any restriction on the transaction's
    1119             :      * isolation level; however, importers must check the level if they are
    1120             :      * serializable.
    1121             :      */
    1122             : 
    1123             :     /*
    1124             :      * Get our transaction ID if there is one, to include in the snapshot.
    1125             :      */
    1126          16 :     topXid = GetTopTransactionIdIfAny();
    1127             : 
    1128             :     /*
    1129             :      * We cannot export a snapshot from a subtransaction because there's no
    1130             :      * easy way for importers to verify that the same subtransaction is still
    1131             :      * running.
    1132             :      */
    1133          16 :     if (IsSubTransaction())
    1134           0 :         ereport(ERROR,
    1135             :                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
    1136             :                  errmsg("cannot export a snapshot from a subtransaction")));
    1137             : 
    1138             :     /*
    1139             :      * We do however allow previous committed subtransactions to exist.
    1140             :      * Importers of the snapshot must see them as still running, so get their
    1141             :      * XIDs to add them to the snapshot.
    1142             :      */
    1143          16 :     nchildren = xactGetCommittedChildren(&children);
    1144             : 
    1145             :     /*
    1146             :      * Generate file path for the snapshot.  We start numbering of snapshots
    1147             :      * inside the transaction from 1.
    1148             :      */
    1149          16 :     snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
    1150          16 :              MyProc->vxid.procNumber, MyProc->vxid.lxid,
    1151          16 :              list_length(exportedSnapshots) + 1);
    1152             : 
    1153             :     /*
    1154             :      * Copy the snapshot into TopTransactionContext, add it to the
    1155             :      * exportedSnapshots list, and mark it pseudo-registered.  We do this to
    1156             :      * ensure that the snapshot's xmin is honored for the rest of the
    1157             :      * transaction.
    1158             :      */
    1159          16 :     snapshot = CopySnapshot(snapshot);
    1160             : 
    1161          16 :     oldcxt = MemoryContextSwitchTo(TopTransactionContext);
    1162          16 :     esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
    1163          16 :     esnap->snapfile = pstrdup(path);
    1164          16 :     esnap->snapshot = snapshot;
    1165          16 :     exportedSnapshots = lappend(exportedSnapshots, esnap);
    1166          16 :     MemoryContextSwitchTo(oldcxt);
    1167             : 
    1168          16 :     snapshot->regd_count++;
    1169          16 :     pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
    1170             : 
    1171             :     /*
    1172             :      * Fill buf with a text serialization of the snapshot, plus identification
    1173             :      * data about this transaction.  The format expected by ImportSnapshot is
    1174             :      * pretty rigid: each line must be fieldname:value.
    1175             :      */
    1176          16 :     initStringInfo(&buf);
    1177             : 
    1178          16 :     appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
    1179          16 :     appendStringInfo(&buf, "pid:%d\n", MyProcPid);
    1180          16 :     appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
    1181          16 :     appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
    1182          16 :     appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
    1183             : 
    1184          16 :     appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
    1185          16 :     appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
    1186             : 
    1187             :     /*
    1188             :      * We must include our own top transaction ID in the top-xid data, since
    1189             :      * by definition we will still be running when the importing transaction
    1190             :      * adopts the snapshot, but GetSnapshotData never includes our own XID in
    1191             :      * the snapshot.  (There must, therefore, be enough room to add it.)
    1192             :      *
    1193             :      * However, it could be that our topXid is after the xmax, in which case
    1194             :      * we shouldn't include it because xip[] members are expected to be before
    1195             :      * xmax.  (We need not make the same check for subxip[] members, see
    1196             :      * snapshot.h.)
    1197             :      */
    1198          16 :     addTopXid = (TransactionIdIsValid(topXid) &&
    1199          16 :                  TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
    1200          16 :     appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
    1201          16 :     for (i = 0; i < snapshot->xcnt; i++)
    1202           0 :         appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
    1203          16 :     if (addTopXid)
    1204           0 :         appendStringInfo(&buf, "xip:%u\n", topXid);
    1205             : 
    1206             :     /*
    1207             :      * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
    1208             :      * we have to cope with possible overflow.
    1209             :      */
    1210          32 :     if (snapshot->suboverflowed ||
    1211          16 :         snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
    1212           0 :         appendStringInfoString(&buf, "sof:1\n");
    1213             :     else
    1214             :     {
    1215          16 :         appendStringInfoString(&buf, "sof:0\n");
    1216          16 :         appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
    1217          16 :         for (i = 0; i < snapshot->subxcnt; i++)
    1218           0 :             appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
    1219          16 :         for (i = 0; i < nchildren; i++)
    1220           0 :             appendStringInfo(&buf, "sxp:%u\n", children[i]);
    1221             :     }
    1222          16 :     appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
    1223             : 
    1224             :     /*
    1225             :      * Now write the text representation into a file.  We first write to a
    1226             :      * ".tmp" filename, and rename to final filename if no error.  This
    1227             :      * ensures that no other backend can read an incomplete file
    1228             :      * (ImportSnapshot won't allow it because of its valid-characters check).
    1229             :      */
    1230          16 :     snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
    1231          16 :     if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
    1232           0 :         ereport(ERROR,
    1233             :                 (errcode_for_file_access(),
    1234             :                  errmsg("could not create file \"%s\": %m", pathtmp)));
    1235             : 
    1236          16 :     if (fwrite(buf.data, buf.len, 1, f) != 1)
    1237           0 :         ereport(ERROR,
    1238             :                 (errcode_for_file_access(),
    1239             :                  errmsg("could not write to file \"%s\": %m", pathtmp)));
    1240             : 
    1241             :     /* no fsync() since file need not survive a system crash */
    1242             : 
    1243          16 :     if (FreeFile(f))
    1244           0 :         ereport(ERROR,
    1245             :                 (errcode_for_file_access(),
    1246             :                  errmsg("could not write to file \"%s\": %m", pathtmp)));
    1247             : 
    1248             :     /*
    1249             :      * Now that we have written everything into a .tmp file, rename the file
    1250             :      * to remove the .tmp suffix.
    1251             :      */
    1252          16 :     if (rename(pathtmp, path) < 0)
    1253           0 :         ereport(ERROR,
    1254             :                 (errcode_for_file_access(),
    1255             :                  errmsg("could not rename file \"%s\" to \"%s\": %m",
    1256             :                         pathtmp, path)));
    1257             : 
    1258             :     /*
    1259             :      * The basename of the file is what we return from pg_export_snapshot().
    1260             :      * It's already in path in a textual format and we know that the path
    1261             :      * starts with SNAPSHOT_EXPORT_DIR.  Skip over the prefix and the slash
    1262             :      * and pstrdup it so as not to return the address of a local variable.
    1263             :      */
    1264          16 :     return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
    1265             : }
    1266             : 
    1267             : /*
    1268             :  * pg_export_snapshot
    1269             :  *      SQL-callable wrapper for ExportSnapshot.
    1270             :  */
    1271             : Datum
    1272          16 : pg_export_snapshot(PG_FUNCTION_ARGS)
    1273             : {
    1274             :     char       *snapshotName;
    1275             : 
    1276          16 :     snapshotName = ExportSnapshot(GetActiveSnapshot());
    1277          16 :     PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
    1278             : }
    1279             : 
    1280             : 
    1281             : /*
    1282             :  * Parsing subroutines for ImportSnapshot: parse a line with the given
    1283             :  * prefix followed by a value, and advance *s to the next line.  The
    1284             :  * filename is provided for use in error messages.
    1285             :  */
    1286             : static int
    1287         224 : parseIntFromText(const char *prefix, char **s, const char *filename)
    1288             : {
    1289         224 :     char       *ptr = *s;
    1290         224 :     int         prefixlen = strlen(prefix);
    1291             :     int         val;
    1292             : 
    1293         224 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1294           0 :         ereport(ERROR,
    1295             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1296             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1297         224 :     ptr += prefixlen;
    1298         224 :     if (sscanf(ptr, "%d", &val) != 1)
    1299           0 :         ereport(ERROR,
    1300             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1301             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1302         224 :     ptr = strchr(ptr, '\n');
    1303         224 :     if (!ptr)
    1304           0 :         ereport(ERROR,
    1305             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1306             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1307         224 :     *s = ptr + 1;
    1308         224 :     return val;
    1309             : }
    1310             : 
    1311             : static TransactionId
    1312          96 : parseXidFromText(const char *prefix, char **s, const char *filename)
    1313             : {
    1314          96 :     char       *ptr = *s;
    1315          96 :     int         prefixlen = strlen(prefix);
    1316             :     TransactionId val;
    1317             : 
    1318          96 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1319           0 :         ereport(ERROR,
    1320             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1321             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1322          96 :     ptr += prefixlen;
    1323          96 :     if (sscanf(ptr, "%u", &val) != 1)
    1324           0 :         ereport(ERROR,
    1325             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1326             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1327          96 :     ptr = strchr(ptr, '\n');
    1328          96 :     if (!ptr)
    1329           0 :         ereport(ERROR,
    1330             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1331             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1332          96 :     *s = ptr + 1;
    1333          96 :     return val;
    1334             : }
    1335             : 
    1336             : static void
    1337          32 : parseVxidFromText(const char *prefix, char **s, const char *filename,
    1338             :                   VirtualTransactionId *vxid)
    1339             : {
    1340          32 :     char       *ptr = *s;
    1341          32 :     int         prefixlen = strlen(prefix);
    1342             : 
    1343          32 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1344           0 :         ereport(ERROR,
    1345             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1346             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1347          32 :     ptr += prefixlen;
    1348          32 :     if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
    1349           0 :         ereport(ERROR,
    1350             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1351             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1352          32 :     ptr = strchr(ptr, '\n');
    1353          32 :     if (!ptr)
    1354           0 :         ereport(ERROR,
    1355             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1356             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1357          32 :     *s = ptr + 1;
    1358          32 : }
    1359             : 
    1360             : /*
    1361             :  * ImportSnapshot
    1362             :  *      Import a previously exported snapshot.  The argument should be a
    1363             :  *      filename in SNAPSHOT_EXPORT_DIR.  Load the snapshot from that file.
    1364             :  *      This is called by "SET TRANSACTION SNAPSHOT 'foo'".
    1365             :  */
    1366             : void
    1367          44 : ImportSnapshot(const char *idstr)
    1368             : {
    1369             :     char        path[MAXPGPATH];
    1370             :     FILE       *f;
    1371             :     struct stat stat_buf;
    1372             :     char       *filebuf;
    1373             :     int         xcnt;
    1374             :     int         i;
    1375             :     VirtualTransactionId src_vxid;
    1376             :     int         src_pid;
    1377             :     Oid         src_dbid;
    1378             :     int         src_isolevel;
    1379             :     bool        src_readonly;
    1380             :     SnapshotData snapshot;
    1381             : 
    1382             :     /*
    1383             :      * Must be at top level of a fresh transaction.  Note in particular that
    1384             :      * we check we haven't acquired an XID --- if we have, it's conceivable
    1385             :      * that the snapshot would show it as not running, making for very screwy
    1386             :      * behavior.
    1387             :      */
    1388          88 :     if (FirstSnapshotSet ||
    1389          88 :         GetTopTransactionIdIfAny() != InvalidTransactionId ||
    1390          44 :         IsSubTransaction())
    1391           0 :         ereport(ERROR,
    1392             :                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
    1393             :                  errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
    1394             : 
    1395             :     /*
    1396             :      * If we are in read committed mode then the next query would execute with
    1397             :      * a new snapshot thus making this function call quite useless.
    1398             :      */
    1399          44 :     if (!IsolationUsesXactSnapshot())
    1400           0 :         ereport(ERROR,
    1401             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1402             :                  errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
    1403             : 
    1404             :     /*
    1405             :      * Verify the identifier: only 0-9, A-F and hyphens are allowed.  We do
    1406             :      * this mainly to prevent reading arbitrary files.
    1407             :      */
    1408          44 :     if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
    1409           6 :         ereport(ERROR,
    1410             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1411             :                  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
    1412             : 
    1413             :     /* OK, read the file */
    1414          38 :     snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
    1415             : 
    1416          38 :     f = AllocateFile(path, PG_BINARY_R);
    1417          38 :     if (!f)
    1418             :     {
    1419             :         /*
    1420             :          * If file is missing while identifier has a correct format, avoid
    1421             :          * system errors.
    1422             :          */
    1423           6 :         if (errno == ENOENT)
    1424           6 :             ereport(ERROR,
    1425             :                     (errcode(ERRCODE_UNDEFINED_OBJECT),
    1426             :                      errmsg("snapshot \"%s\" does not exist", idstr)));
    1427             :         else
    1428           0 :             ereport(ERROR,
    1429             :                     (errcode_for_file_access(),
    1430             :                      errmsg("could not open file \"%s\" for reading: %m",
    1431             :                             path)));
    1432             :     }
    1433             : 
    1434             :     /* get the size of the file so that we know how much memory we need */
    1435          32 :     if (fstat(fileno(f), &stat_buf))
    1436           0 :         elog(ERROR, "could not stat file \"%s\": %m", path);
    1437             : 
    1438             :     /* and read the file into a palloc'd string */
    1439          32 :     filebuf = (char *) palloc(stat_buf.st_size + 1);
    1440          32 :     if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
    1441           0 :         elog(ERROR, "could not read file \"%s\": %m", path);
    1442             : 
    1443          32 :     filebuf[stat_buf.st_size] = '\0';
    1444             : 
    1445          32 :     FreeFile(f);
    1446             : 
    1447             :     /*
    1448             :      * Construct a snapshot struct by parsing the file content.
    1449             :      */
    1450          32 :     memset(&snapshot, 0, sizeof(snapshot));
    1451             : 
    1452          32 :     parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
    1453          32 :     src_pid = parseIntFromText("pid:", &filebuf, path);
    1454             :     /* we abuse parseXidFromText a bit here ... */
    1455          32 :     src_dbid = parseXidFromText("dbid:", &filebuf, path);
    1456          32 :     src_isolevel = parseIntFromText("iso:", &filebuf, path);
    1457          32 :     src_readonly = parseIntFromText("ro:", &filebuf, path);
    1458             : 
    1459          32 :     snapshot.snapshot_type = SNAPSHOT_MVCC;
    1460             : 
    1461          32 :     snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
    1462          32 :     snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
    1463             : 
    1464          32 :     snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
    1465             : 
    1466             :     /* sanity-check the xid count before palloc */
    1467          32 :     if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
    1468           0 :         ereport(ERROR,
    1469             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1470             :                  errmsg("invalid snapshot data in file \"%s\"", path)));
    1471             : 
    1472          32 :     snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
    1473          32 :     for (i = 0; i < xcnt; i++)
    1474           0 :         snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
    1475             : 
    1476          32 :     snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
    1477             : 
    1478          32 :     if (!snapshot.suboverflowed)
    1479             :     {
    1480          32 :         snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
    1481             : 
    1482             :         /* sanity-check the xid count before palloc */
    1483          32 :         if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
    1484           0 :             ereport(ERROR,
    1485             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1486             :                      errmsg("invalid snapshot data in file \"%s\"", path)));
    1487             : 
    1488          32 :         snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
    1489          32 :         for (i = 0; i < xcnt; i++)
    1490           0 :             snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
    1491             :     }
    1492             :     else
    1493             :     {
    1494           0 :         snapshot.subxcnt = 0;
    1495           0 :         snapshot.subxip = NULL;
    1496             :     }
    1497             : 
    1498          32 :     snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
    1499             : 
    1500             :     /*
    1501             :      * Do some additional sanity checking, just to protect ourselves.  We
    1502             :      * don't trouble to check the array elements, just the most critical
    1503             :      * fields.
    1504             :      */
    1505          32 :     if (!VirtualTransactionIdIsValid(src_vxid) ||
    1506          32 :         !OidIsValid(src_dbid) ||
    1507          32 :         !TransactionIdIsNormal(snapshot.xmin) ||
    1508          32 :         !TransactionIdIsNormal(snapshot.xmax))
    1509           0 :         ereport(ERROR,
    1510             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1511             :                  errmsg("invalid snapshot data in file \"%s\"", path)));
    1512             : 
    1513             :     /*
    1514             :      * If we're serializable, the source transaction must be too, otherwise
    1515             :      * predicate.c has problems (SxactGlobalXmin could go backwards).  Also, a
    1516             :      * non-read-only transaction can't adopt a snapshot from a read-only
    1517             :      * transaction, as predicate.c handles the cases very differently.
    1518             :      */
    1519          32 :     if (IsolationIsSerializable())
    1520             :     {
    1521           0 :         if (src_isolevel != XACT_SERIALIZABLE)
    1522           0 :             ereport(ERROR,
    1523             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1524             :                      errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
    1525           0 :         if (src_readonly && !XactReadOnly)
    1526           0 :             ereport(ERROR,
    1527             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1528             :                      errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
    1529             :     }
    1530             : 
    1531             :     /*
    1532             :      * We cannot import a snapshot that was taken in a different database,
    1533             :      * because vacuum calculates OldestXmin on a per-database basis; so the
    1534             :      * source transaction's xmin doesn't protect us from data loss.  This
    1535             :      * restriction could be removed if the source transaction were to mark its
    1536             :      * xmin as being globally applicable.  But that would require some
    1537             :      * additional syntax, since that has to be known when the snapshot is
    1538             :      * initially taken.  (See pgsql-hackers discussion of 2011-10-21.)
    1539             :      */
    1540          32 :     if (src_dbid != MyDatabaseId)
    1541           0 :         ereport(ERROR,
    1542             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1543             :                  errmsg("cannot import a snapshot from a different database")));
    1544             : 
    1545             :     /* OK, install the snapshot */
    1546          32 :     SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
    1547          32 : }
    1548             : 
    1549             : /*
    1550             :  * XactHasExportedSnapshots
    1551             :  *      Test whether current transaction has exported any snapshots.
    1552             :  */
    1553             : bool
    1554         848 : XactHasExportedSnapshots(void)
    1555             : {
    1556         848 :     return (exportedSnapshots != NIL);
    1557             : }
    1558             : 
    1559             : /*
    1560             :  * DeleteAllExportedSnapshotFiles
    1561             :  *      Clean up any files that have been left behind by a crashed backend
    1562             :  *      that had exported snapshots before it died.
    1563             :  *
    1564             :  * This should be called during database startup or crash recovery.
    1565             :  */
    1566             : void
    1567         392 : DeleteAllExportedSnapshotFiles(void)
    1568             : {
    1569             :     char        buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
    1570             :     DIR        *s_dir;
    1571             :     struct dirent *s_de;
    1572             : 
    1573             :     /*
    1574             :      * Problems in reading the directory, or unlinking files, are reported at
    1575             :      * LOG level.  Since we're running in the startup process, ERROR level
    1576             :      * would prevent database start, and it's not important enough for that.
    1577             :      */
    1578         392 :     s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
    1579             : 
    1580        1176 :     while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
    1581             :     {
    1582         784 :         if (strcmp(s_de->d_name, ".") == 0 ||
    1583         392 :             strcmp(s_de->d_name, "..") == 0)
    1584         784 :             continue;
    1585             : 
    1586           0 :         snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
    1587             : 
    1588           0 :         if (unlink(buf) != 0)
    1589           0 :             ereport(LOG,
    1590             :                     (errcode_for_file_access(),
    1591             :                      errmsg("could not remove file \"%s\": %m", buf)));
    1592             :     }
    1593             : 
    1594         392 :     FreeDir(s_dir);
    1595         392 : }
    1596             : 
    1597             : /*
    1598             :  * ThereAreNoPriorRegisteredSnapshots
    1599             :  *      Is the registered snapshot count less than or equal to one?
    1600             :  *
    1601             :  * Don't use this to settle important decisions.  While zero registrations and
    1602             :  * no ActiveSnapshot would confirm a certain idleness, the system makes no
    1603             :  * guarantees about the significance of one registered snapshot.
    1604             :  */
    1605             : bool
    1606          60 : ThereAreNoPriorRegisteredSnapshots(void)
    1607             : {
    1608          60 :     if (pairingheap_is_empty(&RegisteredSnapshots) ||
    1609           0 :         pairingheap_is_singular(&RegisteredSnapshots))
    1610          60 :         return true;
    1611             : 
    1612           0 :     return false;
    1613             : }
    1614             : 
    1615             : /*
    1616             :  * HaveRegisteredOrActiveSnapshot
    1617             :  *      Is there any registered or active snapshot?
    1618             :  *
    1619             :  * NB: Unless pushed or active, the cached catalog snapshot will not cause
    1620             :  * this function to return true. That allows this function to be used in
    1621             :  * checks enforcing a longer-lived snapshot.
    1622             :  */
    1623             : bool
    1624         342 : HaveRegisteredOrActiveSnapshot(void)
    1625             : {
    1626         342 :     if (ActiveSnapshot != NULL)
    1627           0 :         return true;
    1628             : 
    1629             :     /*
    1630             :      * The catalog snapshot is in RegisteredSnapshots when valid, but can be
    1631             :      * removed at any time due to invalidation processing. If explicitly
    1632             :      * registered more than one snapshot has to be in RegisteredSnapshots.
    1633             :      */
    1634         342 :     if (CatalogSnapshot != NULL &&
    1635           0 :         pairingheap_is_singular(&RegisteredSnapshots))
    1636           0 :         return false;
    1637             : 
    1638         342 :     return !pairingheap_is_empty(&RegisteredSnapshots);
    1639             : }
    1640             : 
    1641             : 
    1642             : /*
    1643             :  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
    1644             :  * access to behave just like it did at a certain point in the past.
    1645             :  *
    1646             :  * Needed for logical decoding.
    1647             :  */
    1648             : void
    1649        7898 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
    1650             : {
    1651             :     Assert(historic_snapshot != NULL);
    1652             : 
    1653             :     /* setup the timetravel snapshot */
    1654        7898 :     HistoricSnapshot = historic_snapshot;
    1655             : 
    1656             :     /* setup (cmin, cmax) lookup hash */
    1657        7898 :     tuplecid_data = tuplecids;
    1658        7898 : }
    1659             : 
    1660             : 
    1661             : /*
    1662             :  * Make catalog snapshots behave normally again.
    1663             :  */
    1664             : void
    1665        7896 : TeardownHistoricSnapshot(bool is_error)
    1666             : {
    1667        7896 :     HistoricSnapshot = NULL;
    1668        7896 :     tuplecid_data = NULL;
    1669        7896 : }
    1670             : 
    1671             : bool
    1672    14871032 : HistoricSnapshotActive(void)
    1673             : {
    1674    14871032 :     return HistoricSnapshot != NULL;
    1675             : }
    1676             : 
    1677             : HTAB *
    1678        1190 : HistoricSnapshotGetTupleCids(void)
    1679             : {
    1680             :     Assert(HistoricSnapshotActive());
    1681        1190 :     return tuplecid_data;
    1682             : }
    1683             : 
    1684             : /*
    1685             :  * EstimateSnapshotSpace
    1686             :  *      Returns the size needed to store the given snapshot.
    1687             :  *
    1688             :  * We are exporting only required fields from the Snapshot, stored in
    1689             :  * SerializedSnapshotData.
    1690             :  */
    1691             : Size
    1692        1862 : EstimateSnapshotSpace(Snapshot snapshot)
    1693             : {
    1694             :     Size        size;
    1695             : 
    1696             :     Assert(snapshot != InvalidSnapshot);
    1697             :     Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
    1698             : 
    1699             :     /* We allocate any XID arrays needed in the same palloc block. */
    1700        1862 :     size = add_size(sizeof(SerializedSnapshotData),
    1701        1862 :                     mul_size(snapshot->xcnt, sizeof(TransactionId)));
    1702        1862 :     if (snapshot->subxcnt > 0 &&
    1703           4 :         (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
    1704           4 :         size = add_size(size,
    1705           4 :                         mul_size(snapshot->subxcnt, sizeof(TransactionId)));
    1706             : 
    1707        1862 :     return size;
    1708             : }
    1709             : 
    1710             : /*
    1711             :  * SerializeSnapshot
    1712             :  *      Dumps the serialized snapshot (extracted from given snapshot) onto the
    1713             :  *      memory location at start_address.
    1714             :  */
    1715             : void
    1716        1804 : SerializeSnapshot(Snapshot snapshot, char *start_address)
    1717             : {
    1718             :     SerializedSnapshotData serialized_snapshot;
    1719             : 
    1720             :     Assert(snapshot->subxcnt >= 0);
    1721             : 
    1722             :     /* Copy all required fields */
    1723        1804 :     serialized_snapshot.xmin = snapshot->xmin;
    1724        1804 :     serialized_snapshot.xmax = snapshot->xmax;
    1725        1804 :     serialized_snapshot.xcnt = snapshot->xcnt;
    1726        1804 :     serialized_snapshot.subxcnt = snapshot->subxcnt;
    1727        1804 :     serialized_snapshot.suboverflowed = snapshot->suboverflowed;
    1728        1804 :     serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
    1729        1804 :     serialized_snapshot.curcid = snapshot->curcid;
    1730        1804 :     serialized_snapshot.whenTaken = snapshot->whenTaken;
    1731        1804 :     serialized_snapshot.lsn = snapshot->lsn;
    1732             : 
    1733             :     /*
    1734             :      * Ignore the SubXID array if it has overflowed, unless the snapshot was
    1735             :      * taken during recovery - in that case, top-level XIDs are in subxip as
    1736             :      * well, and we mustn't lose them.
    1737             :      */
    1738        1804 :     if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
    1739           0 :         serialized_snapshot.subxcnt = 0;
    1740             : 
    1741             :     /* Copy struct to possibly-unaligned buffer */
    1742        1804 :     memcpy(start_address,
    1743             :            &serialized_snapshot, sizeof(SerializedSnapshotData));
    1744             : 
    1745             :     /* Copy XID array */
    1746        1804 :     if (snapshot->xcnt > 0)
    1747         612 :         memcpy((TransactionId *) (start_address +
    1748             :                                   sizeof(SerializedSnapshotData)),
    1749         612 :                snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
    1750             : 
    1751             :     /*
    1752             :      * Copy SubXID array. Don't bother to copy it if it had overflowed,
    1753             :      * though, because it's not used anywhere in that case. Except if it's a
    1754             :      * snapshot taken during recovery; all the top-level XIDs are in subxip as
    1755             :      * well in that case, so we mustn't lose them.
    1756             :      */
    1757        1804 :     if (serialized_snapshot.subxcnt > 0)
    1758             :     {
    1759           4 :         Size        subxipoff = sizeof(SerializedSnapshotData) +
    1760           4 :             snapshot->xcnt * sizeof(TransactionId);
    1761             : 
    1762           4 :         memcpy((TransactionId *) (start_address + subxipoff),
    1763           4 :                snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
    1764             :     }
    1765        1804 : }
    1766             : 
    1767             : /*
    1768             :  * RestoreSnapshot
    1769             :  *      Restore a serialized snapshot from the specified address.
    1770             :  *
    1771             :  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
    1772             :  * to 0.  The returned snapshot has the copied flag set.
    1773             :  */
    1774             : Snapshot
    1775        6624 : RestoreSnapshot(char *start_address)
    1776             : {
    1777             :     SerializedSnapshotData serialized_snapshot;
    1778             :     Size        size;
    1779             :     Snapshot    snapshot;
    1780             :     TransactionId *serialized_xids;
    1781             : 
    1782        6624 :     memcpy(&serialized_snapshot, start_address,
    1783             :            sizeof(SerializedSnapshotData));
    1784        6624 :     serialized_xids = (TransactionId *)
    1785             :         (start_address + sizeof(SerializedSnapshotData));
    1786             : 
    1787             :     /* We allocate any XID arrays needed in the same palloc block. */
    1788        6624 :     size = sizeof(SnapshotData)
    1789        6624 :         + serialized_snapshot.xcnt * sizeof(TransactionId)
    1790        6624 :         + serialized_snapshot.subxcnt * sizeof(TransactionId);
    1791             : 
    1792             :     /* Copy all required fields */
    1793        6624 :     snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
    1794        6624 :     snapshot->snapshot_type = SNAPSHOT_MVCC;
    1795        6624 :     snapshot->xmin = serialized_snapshot.xmin;
    1796        6624 :     snapshot->xmax = serialized_snapshot.xmax;
    1797        6624 :     snapshot->xip = NULL;
    1798        6624 :     snapshot->xcnt = serialized_snapshot.xcnt;
    1799        6624 :     snapshot->subxip = NULL;
    1800        6624 :     snapshot->subxcnt = serialized_snapshot.subxcnt;
    1801        6624 :     snapshot->suboverflowed = serialized_snapshot.suboverflowed;
    1802        6624 :     snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
    1803        6624 :     snapshot->curcid = serialized_snapshot.curcid;
    1804        6624 :     snapshot->whenTaken = serialized_snapshot.whenTaken;
    1805        6624 :     snapshot->lsn = serialized_snapshot.lsn;
    1806        6624 :     snapshot->snapXactCompletionCount = 0;
    1807             : 
    1808             :     /* Copy XIDs, if present. */
    1809        6624 :     if (serialized_snapshot.xcnt > 0)
    1810             :     {
    1811        1602 :         snapshot->xip = (TransactionId *) (snapshot + 1);
    1812        1602 :         memcpy(snapshot->xip, serialized_xids,
    1813        1602 :                serialized_snapshot.xcnt * sizeof(TransactionId));
    1814             :     }
    1815             : 
    1816             :     /* Copy SubXIDs, if present. */
    1817        6624 :     if (serialized_snapshot.subxcnt > 0)
    1818             :     {
    1819          10 :         snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
    1820          10 :             serialized_snapshot.xcnt;
    1821          10 :         memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
    1822          10 :                serialized_snapshot.subxcnt * sizeof(TransactionId));
    1823             :     }
    1824             : 
    1825             :     /* Set the copied flag so that the caller will set refcounts correctly. */
    1826        6624 :     snapshot->regd_count = 0;
    1827        6624 :     snapshot->active_count = 0;
    1828        6624 :     snapshot->copied = true;
    1829             : 
    1830        6624 :     return snapshot;
    1831             : }
    1832             : 
    1833             : /*
    1834             :  * Install a restored snapshot as the transaction snapshot.
    1835             :  *
    1836             :  * The second argument is of type void * so that snapmgr.h need not include
    1837             :  * the declaration for PGPROC.
    1838             :  */
    1839             : void
    1840        2984 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
    1841             : {
    1842        2984 :     SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
    1843        2984 : }
    1844             : 
    1845             : /*
    1846             :  * XidInMVCCSnapshot
    1847             :  *      Is the given XID still-in-progress according to the snapshot?
    1848             :  *
    1849             :  * Note: GetSnapshotData never stores either top xid or subxids of our own
    1850             :  * backend into a snapshot, so these xids will not be reported as "running"
    1851             :  * by this function.  This is OK for current uses, because we always check
    1852             :  * TransactionIdIsCurrentTransactionId first, except when it's known the
    1853             :  * XID could not be ours anyway.
    1854             :  */
    1855             : bool
    1856   114345366 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
    1857             : {
    1858             :     /*
    1859             :      * Make a quick range check to eliminate most XIDs without looking at the
    1860             :      * xip arrays.  Note that this is OK even if we convert a subxact XID to
    1861             :      * its parent below, because a subxact with XID < xmin has surely also got
    1862             :      * a parent with XID < xmin, while one with XID >= xmax must belong to a
    1863             :      * parent that was not yet committed at the time of this snapshot.
    1864             :      */
    1865             : 
    1866             :     /* Any xid < xmin is not in-progress */
    1867   114345366 :     if (TransactionIdPrecedes(xid, snapshot->xmin))
    1868   108725694 :         return false;
    1869             :     /* Any xid >= xmax is in-progress */
    1870     5619672 :     if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
    1871       17936 :         return true;
    1872             : 
    1873             :     /*
    1874             :      * Snapshot information is stored slightly differently in snapshots taken
    1875             :      * during recovery.
    1876             :      */
    1877     5601736 :     if (!snapshot->takenDuringRecovery)
    1878             :     {
    1879             :         /*
    1880             :          * If the snapshot contains full subxact data, the fastest way to
    1881             :          * check things is just to compare the given XID against both subxact
    1882             :          * XIDs and top-level XIDs.  If the snapshot overflowed, we have to
    1883             :          * use pg_subtrans to convert a subxact XID to its parent XID, but
    1884             :          * then we need only look at top-level XIDs not subxacts.
    1885             :          */
    1886     5601734 :         if (!snapshot->suboverflowed)
    1887             :         {
    1888             :             /* we have full data, so search subxip */
    1889     5601034 :             if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
    1890         420 :                 return true;
    1891             : 
    1892             :             /* not there, fall through to search xip[] */
    1893             :         }
    1894             :         else
    1895             :         {
    1896             :             /*
    1897             :              * Snapshot overflowed, so convert xid to top-level.  This is safe
    1898             :              * because we eliminated too-old XIDs above.
    1899             :              */
    1900         700 :             xid = SubTransGetTopmostTransaction(xid);
    1901             : 
    1902             :             /*
    1903             :              * If xid was indeed a subxact, we might now have an xid < xmin,
    1904             :              * so recheck to avoid an array scan.  No point in rechecking
    1905             :              * xmax.
    1906             :              */
    1907         700 :             if (TransactionIdPrecedes(xid, snapshot->xmin))
    1908           0 :                 return false;
    1909             :         }
    1910             : 
    1911     5601314 :         if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
    1912       14516 :             return true;
    1913             :     }
    1914             :     else
    1915             :     {
    1916             :         /*
    1917             :          * In recovery we store all xids in the subxip array because it is by
    1918             :          * far the bigger array, and we mostly don't know which xids are
    1919             :          * top-level and which are subxacts. The xip array is empty.
    1920             :          *
    1921             :          * We start by searching subtrans, if we overflowed.
    1922             :          */
    1923           2 :         if (snapshot->suboverflowed)
    1924             :         {
    1925             :             /*
    1926             :              * Snapshot overflowed, so convert xid to top-level.  This is safe
    1927             :              * because we eliminated too-old XIDs above.
    1928             :              */
    1929           0 :             xid = SubTransGetTopmostTransaction(xid);
    1930             : 
    1931             :             /*
    1932             :              * If xid was indeed a subxact, we might now have an xid < xmin,
    1933             :              * so recheck to avoid an array scan.  No point in rechecking
    1934             :              * xmax.
    1935             :              */
    1936           0 :             if (TransactionIdPrecedes(xid, snapshot->xmin))
    1937           0 :                 return false;
    1938             :         }
    1939             : 
    1940             :         /*
    1941             :          * We now have either a top-level xid higher than xmin or an
    1942             :          * indeterminate xid. We don't know whether it's top level or subxact
    1943             :          * but it doesn't matter. If it's present, the xid is visible.
    1944             :          */
    1945           2 :         if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
    1946           0 :             return true;
    1947             :     }
    1948             : 
    1949     5586800 :     return false;
    1950             : }
    1951             : 
    1952             : /* ResourceOwner callbacks */
    1953             : 
    1954             : static void
    1955       54794 : ResOwnerReleaseSnapshot(Datum res)
    1956             : {
    1957       54794 :     UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
    1958       54794 : }

Generated by: LCOV version 1.14