LCOV - code coverage report
Current view: top level - src/backend/utils/time - snapmgr.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 537 688 78.1 %
Date: 2019-11-21 13:06:38 Functions: 52 53 98.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * snapmgr.c
       4             :  *      PostgreSQL snapshot manager
       5             :  *
       6             :  * We keep track of snapshots in two ways: those "registered" by resowner.c,
       7             :  * and the "active snapshot" stack.  All snapshots in either of them live in
       8             :  * persistent memory.  When a snapshot is no longer in any of these lists
       9             :  * (tracked by separate refcounts on each snapshot), its memory can be freed.
      10             :  *
      11             :  * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
      12             :  * regd_count and list it in RegisteredSnapshots, but this reference is not
      13             :  * tracked by a resource owner. We used to use the TopTransactionResourceOwner
      14             :  * to track this snapshot reference, but that introduces logical circularity
      15             :  * and thus makes it impossible to clean up in a sane fashion.  It's better to
      16             :  * handle this reference as an internally-tracked registration, so that this
      17             :  * module is entirely lower-level than ResourceOwners.
      18             :  *
      19             :  * Likewise, any snapshots that have been exported by pg_export_snapshot
      20             :  * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
      21             :  * tracked by any resource owner.
      22             :  *
      23             :  * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
      24             :  * is valid, but is not tracked by any resource owner.
      25             :  *
      26             :  * The same is true for historic snapshots used during logical decoding,
      27             :  * their lifetime is managed separately (as they live longer than one xact.c
      28             :  * transaction).
      29             :  *
      30             :  * These arrangements let us reset MyPgXact->xmin when there are no snapshots
      31             :  * referenced by this transaction, and advance it when the one with oldest
      32             :  * Xmin is no longer referenced.  For simplicity however, only registered
      33             :  * snapshots not active snapshots participate in tracking which one is oldest;
      34             :  * we don't try to change MyPgXact->xmin except when the active-snapshot
      35             :  * stack is empty.
      36             :  *
      37             :  *
      38             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
      39             :  * Portions Copyright (c) 1994, Regents of the University of California
      40             :  *
      41             :  * IDENTIFICATION
      42             :  *    src/backend/utils/time/snapmgr.c
      43             :  *
      44             :  *-------------------------------------------------------------------------
      45             :  */
      46             : #include "postgres.h"
      47             : 
      48             : #include <sys/stat.h>
      49             : #include <unistd.h>
      50             : 
      51             : #include "access/subtrans.h"
      52             : #include "access/transam.h"
      53             : #include "access/xact.h"
      54             : #include "access/xlog.h"
      55             : #include "catalog/catalog.h"
      56             : #include "lib/pairingheap.h"
      57             : #include "miscadmin.h"
      58             : #include "storage/predicate.h"
      59             : #include "storage/proc.h"
      60             : #include "storage/procarray.h"
      61             : #include "storage/sinval.h"
      62             : #include "storage/sinvaladt.h"
      63             : #include "storage/spin.h"
      64             : #include "utils/builtins.h"
      65             : #include "utils/memutils.h"
      66             : #include "utils/rel.h"
      67             : #include "utils/resowner_private.h"
      68             : #include "utils/snapmgr.h"
      69             : #include "utils/syscache.h"
      70             : 
      71             : 
      72             : /*
      73             :  * GUC parameters
      74             :  */
      75             : int         old_snapshot_threshold; /* number of minutes, -1 disables */
      76             : 
      77             : /*
      78             :  * Structure for dealing with old_snapshot_threshold implementation.
      79             :  */
      80             : typedef struct OldSnapshotControlData
      81             : {
      82             :     /*
      83             :      * Variables for old snapshot handling are shared among processes and are
      84             :      * only allowed to move forward.
      85             :      */
      86             :     slock_t     mutex_current;  /* protect current_timestamp */
      87             :     TimestampTz current_timestamp;  /* latest snapshot timestamp */
      88             :     slock_t     mutex_latest_xmin;  /* protect latest_xmin and next_map_update */
      89             :     TransactionId latest_xmin;  /* latest snapshot xmin */
      90             :     TimestampTz next_map_update;    /* latest snapshot valid up to */
      91             :     slock_t     mutex_threshold;    /* protect threshold fields */
      92             :     TimestampTz threshold_timestamp;    /* earlier snapshot is old */
      93             :     TransactionId threshold_xid;    /* earlier xid may be gone */
      94             : 
      95             :     /*
      96             :      * Keep one xid per minute for old snapshot error handling.
      97             :      *
      98             :      * Use a circular buffer with a head offset, a count of entries currently
      99             :      * used, and a timestamp corresponding to the xid at the head offset.  A
     100             :      * count_used value of zero means that there are no times stored; a
     101             :      * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
     102             :      * is full and the head must be advanced to add new entries.  Use
     103             :      * timestamps aligned to minute boundaries, since that seems less
     104             :      * surprising than aligning based on the first usage timestamp.  The
     105             :      * latest bucket is effectively stored within latest_xmin.  The circular
     106             :      * buffer is updated when we get a new xmin value that doesn't fall into
     107             :      * the same interval.
     108             :      *
     109             :      * It is OK if the xid for a given time slot is from earlier than
     110             :      * calculated by adding the number of minutes corresponding to the
     111             :      * (possibly wrapped) distance from the head offset to the time of the
     112             :      * head entry, since that just results in the vacuuming of old tuples
     113             :      * being slightly less aggressive.  It would not be OK for it to be off in
     114             :      * the other direction, since it might result in vacuuming tuples that are
     115             :      * still expected to be there.
     116             :      *
     117             :      * Use of an SLRU was considered but not chosen because it is more
     118             :      * heavyweight than is needed for this, and would probably not be any less
     119             :      * code to implement.
     120             :      *
     121             :      * Persistence is not needed.
     122             :      */
     123             :     int         head_offset;    /* subscript of oldest tracked time */
     124             :     TimestampTz head_timestamp; /* time corresponding to head xid */
     125             :     int         count_used;     /* how many slots are in use */
     126             :     TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
     127             : } OldSnapshotControlData;
     128             : 
     129             : static volatile OldSnapshotControlData *oldSnapshotControl;
     130             : 
     131             : 
     132             : /*
     133             :  * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
     134             :  * mode, and to the latest one taken in a read-committed transaction.
     135             :  * SecondarySnapshot is a snapshot that's always up-to-date as of the current
     136             :  * instant, even in transaction-snapshot mode.  It should only be used for
     137             :  * special-purpose code (say, RI checking.)  CatalogSnapshot points to an
     138             :  * MVCC snapshot intended to be used for catalog scans; we must invalidate it
     139             :  * whenever a system catalog change occurs.
     140             :  *
     141             :  * These SnapshotData structs are static to simplify memory allocation
     142             :  * (see the hack in GetSnapshotData to avoid repeated malloc/free).
     143             :  */
     144             : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
     145             : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
     146             : SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
     147             : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
     148             : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
     149             : 
     150             : /* Pointers to valid snapshots */
     151             : static Snapshot CurrentSnapshot = NULL;
     152             : static Snapshot SecondarySnapshot = NULL;
     153             : static Snapshot CatalogSnapshot = NULL;
     154             : static Snapshot HistoricSnapshot = NULL;
     155             : 
     156             : /*
     157             :  * These are updated by GetSnapshotData.  We initialize them this way
     158             :  * for the convenience of TransactionIdIsInProgress: even in bootstrap
     159             :  * mode, we don't want it to say that BootstrapTransactionId is in progress.
     160             :  *
     161             :  * RecentGlobalXmin and RecentGlobalDataXmin are initialized to
     162             :  * InvalidTransactionId, to ensure that no one tries to use a stale
     163             :  * value. Readers should ensure that it has been set to something else
     164             :  * before using it.
     165             :  */
     166             : TransactionId TransactionXmin = FirstNormalTransactionId;
     167             : TransactionId RecentXmin = FirstNormalTransactionId;
     168             : TransactionId RecentGlobalXmin = InvalidTransactionId;
     169             : TransactionId RecentGlobalDataXmin = InvalidTransactionId;
     170             : 
     171             : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
     172             : static HTAB *tuplecid_data = NULL;
     173             : 
     174             : /*
     175             :  * Elements of the active snapshot stack.
     176             :  *
     177             :  * Each element here accounts for exactly one active_count on SnapshotData.
     178             :  *
     179             :  * NB: the code assumes that elements in this list are in non-increasing
     180             :  * order of as_level; also, the list must be NULL-terminated.
     181             :  */
     182             : typedef struct ActiveSnapshotElt
     183             : {
     184             :     Snapshot    as_snap;
     185             :     int         as_level;
     186             :     struct ActiveSnapshotElt *as_next;
     187             : } ActiveSnapshotElt;
     188             : 
     189             : /* Top of the stack of active snapshots */
     190             : static ActiveSnapshotElt *ActiveSnapshot = NULL;
     191             : 
     192             : /* Bottom of the stack of active snapshots */
     193             : static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
     194             : 
     195             : /*
     196             :  * Currently registered Snapshots.  Ordered in a heap by xmin, so that we can
     197             :  * quickly find the one with lowest xmin, to advance our MyPgXact->xmin.
     198             :  */
     199             : static int  xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
     200             :                      void *arg);
     201             : 
     202             : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
     203             : 
     204             : /* first GetTransactionSnapshot call in a transaction? */
     205             : bool        FirstSnapshotSet = false;
     206             : 
     207             : /*
     208             :  * Remember the serializable transaction snapshot, if any.  We cannot trust
     209             :  * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
     210             :  * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
     211             :  */
     212             : static Snapshot FirstXactSnapshot = NULL;
     213             : 
     214             : /* Define pathname of exported-snapshot files */
     215             : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
     216             : 
     217             : /* Structure holding info about exported snapshot. */
     218             : typedef struct ExportedSnapshot
     219             : {
     220             :     char       *snapfile;
     221             :     Snapshot    snapshot;
     222             : } ExportedSnapshot;
     223             : 
     224             : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
     225             : static List *exportedSnapshots = NIL;
     226             : 
     227             : /* Prototypes for local functions */
     228             : static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
     229             : static Snapshot CopySnapshot(Snapshot snapshot);
     230             : static void FreeSnapshot(Snapshot snapshot);
     231             : static void SnapshotResetXmin(void);
     232             : 
     233             : /*
     234             :  * Snapshot fields to be serialized.
     235             :  *
     236             :  * Only these fields need to be sent to the cooperating backend; the
     237             :  * remaining ones can (and must) be set by the receiver upon restore.
     238             :  */
     239             : typedef struct SerializedSnapshotData
     240             : {
     241             :     TransactionId xmin;
     242             :     TransactionId xmax;
     243             :     uint32      xcnt;
     244             :     int32       subxcnt;
     245             :     bool        suboverflowed;
     246             :     bool        takenDuringRecovery;
     247             :     CommandId   curcid;
     248             :     TimestampTz whenTaken;
     249             :     XLogRecPtr  lsn;
     250             : } SerializedSnapshotData;
     251             : 
     252             : Size
     253        3784 : SnapMgrShmemSize(void)
     254             : {
     255             :     Size        size;
     256             : 
     257        3784 :     size = offsetof(OldSnapshotControlData, xid_by_minute);
     258        3784 :     if (old_snapshot_threshold > 0)
     259           0 :         size = add_size(size, mul_size(sizeof(TransactionId),
     260           0 :                                        OLD_SNAPSHOT_TIME_MAP_ENTRIES));
     261             : 
     262        3784 :     return size;
     263             : }
     264             : 
     265             : /*
     266             :  * Initialize for managing old snapshot detection.
     267             :  */
     268             : void
     269        1890 : SnapMgrInit(void)
     270             : {
     271             :     bool        found;
     272             : 
     273             :     /*
     274             :      * Create or attach to the OldSnapshotControlData structure.
     275             :      */
     276        1890 :     oldSnapshotControl = (volatile OldSnapshotControlData *)
     277        1890 :         ShmemInitStruct("OldSnapshotControlData",
     278             :                         SnapMgrShmemSize(), &found);
     279             : 
     280        1890 :     if (!found)
     281             :     {
     282        1890 :         SpinLockInit(&oldSnapshotControl->mutex_current);
     283        1890 :         oldSnapshotControl->current_timestamp = 0;
     284        1890 :         SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
     285        1890 :         oldSnapshotControl->latest_xmin = InvalidTransactionId;
     286        1890 :         oldSnapshotControl->next_map_update = 0;
     287        1890 :         SpinLockInit(&oldSnapshotControl->mutex_threshold);
     288        1890 :         oldSnapshotControl->threshold_timestamp = 0;
     289        1890 :         oldSnapshotControl->threshold_xid = InvalidTransactionId;
     290        1890 :         oldSnapshotControl->head_offset = 0;
     291        1890 :         oldSnapshotControl->head_timestamp = 0;
     292        1890 :         oldSnapshotControl->count_used = 0;
     293             :     }
     294        1890 : }
     295             : 
     296             : /*
     297             :  * GetTransactionSnapshot
     298             :  *      Get the appropriate snapshot for a new query in a transaction.
     299             :  *
     300             :  * Note that the return value may point at static storage that will be modified
     301             :  * by future calls and by CommandCounterIncrement().  Callers should call
     302             :  * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
     303             :  * used very long.
     304             :  */
     305             : Snapshot
     306     1039812 : GetTransactionSnapshot(void)
     307             : {
     308             :     /*
     309             :      * Return historic snapshot if doing logical decoding. We'll never need a
     310             :      * non-historic transaction snapshot in this (sub-)transaction, so there's
     311             :      * no need to be careful to set one up for later calls to
     312             :      * GetTransactionSnapshot().
     313             :      */
     314     1039812 :     if (HistoricSnapshotActive())
     315             :     {
     316             :         Assert(!FirstSnapshotSet);
     317           0 :         return HistoricSnapshot;
     318             :     }
     319             : 
     320             :     /* First call in transaction? */
     321     1039812 :     if (!FirstSnapshotSet)
     322             :     {
     323             :         /*
     324             :          * Don't allow catalog snapshot to be older than xact snapshot.  Must
     325             :          * do this first to allow the empty-heap Assert to succeed.
     326             :          */
     327      436754 :         InvalidateCatalogSnapshot();
     328             : 
     329             :         Assert(pairingheap_is_empty(&RegisteredSnapshots));
     330             :         Assert(FirstXactSnapshot == NULL);
     331             : 
     332      436754 :         if (IsInParallelMode())
     333           0 :             elog(ERROR,
     334             :                  "cannot take query snapshot during a parallel operation");
     335             : 
     336             :         /*
     337             :          * In transaction-snapshot mode, the first snapshot must live until
     338             :          * end of xact regardless of what the caller does with it, so we must
     339             :          * make a copy of it rather than returning CurrentSnapshotData
     340             :          * directly.  Furthermore, if we're running in serializable mode,
     341             :          * predicate.c needs to wrap the snapshot fetch in its own processing.
     342             :          */
     343      436754 :         if (IsolationUsesXactSnapshot())
     344             :         {
     345             :             /* First, create the snapshot in CurrentSnapshotData */
     346        4398 :             if (IsolationIsSerializable())
     347        3094 :                 CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
     348             :             else
     349        1304 :                 CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     350             :             /* Make a saved copy */
     351        4398 :             CurrentSnapshot = CopySnapshot(CurrentSnapshot);
     352        4398 :             FirstXactSnapshot = CurrentSnapshot;
     353             :             /* Mark it as "registered" in FirstXactSnapshot */
     354        4398 :             FirstXactSnapshot->regd_count++;
     355        4398 :             pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
     356             :         }
     357             :         else
     358      432356 :             CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     359             : 
     360      436754 :         FirstSnapshotSet = true;
     361      436754 :         return CurrentSnapshot;
     362             :     }
     363             : 
     364      603058 :     if (IsolationUsesXactSnapshot())
     365      181758 :         return CurrentSnapshot;
     366             : 
     367             :     /* Don't allow catalog snapshot to be older than xact snapshot. */
     368      421300 :     InvalidateCatalogSnapshot();
     369             : 
     370      421300 :     CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     371             : 
     372      421300 :     return CurrentSnapshot;
     373             : }
     374             : 
     375             : /*
     376             :  * GetLatestSnapshot
     377             :  *      Get a snapshot that is up-to-date as of the current instant,
     378             :  *      even if we are executing in transaction-snapshot mode.
     379             :  */
     380             : Snapshot
     381        8726 : GetLatestSnapshot(void)
     382             : {
     383             :     /*
     384             :      * We might be able to relax this, but nothing that could otherwise work
     385             :      * needs it.
     386             :      */
     387        8726 :     if (IsInParallelMode())
     388           0 :         elog(ERROR,
     389             :              "cannot update SecondarySnapshot during a parallel operation");
     390             : 
     391             :     /*
     392             :      * So far there are no cases requiring support for GetLatestSnapshot()
     393             :      * during logical decoding, but it wouldn't be hard to add if required.
     394             :      */
     395             :     Assert(!HistoricSnapshotActive());
     396             : 
     397             :     /* If first call in transaction, go ahead and set the xact snapshot */
     398        8726 :     if (!FirstSnapshotSet)
     399         184 :         return GetTransactionSnapshot();
     400             : 
     401        8542 :     SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
     402             : 
     403        8542 :     return SecondarySnapshot;
     404             : }
     405             : 
     406             : /*
     407             :  * GetOldestSnapshot
     408             :  *
     409             :  *      Get the transaction's oldest known snapshot, as judged by the LSN.
     410             :  *      Will return NULL if there are no active or registered snapshots.
     411             :  */
     412             : Snapshot
     413       36622 : GetOldestSnapshot(void)
     414             : {
     415       36622 :     Snapshot    OldestRegisteredSnapshot = NULL;
     416       36622 :     XLogRecPtr  RegisteredLSN = InvalidXLogRecPtr;
     417             : 
     418       36622 :     if (!pairingheap_is_empty(&RegisteredSnapshots))
     419             :     {
     420       36442 :         OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
     421             :                                                          pairingheap_first(&RegisteredSnapshots));
     422       36442 :         RegisteredLSN = OldestRegisteredSnapshot->lsn;
     423             :     }
     424             : 
     425       36622 :     if (OldestActiveSnapshot != NULL)
     426             :     {
     427       36622 :         XLogRecPtr  ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
     428             : 
     429       36622 :         if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
     430       36622 :             return OldestActiveSnapshot->as_snap;
     431             :     }
     432             : 
     433           0 :     return OldestRegisteredSnapshot;
     434             : }
     435             : 
     436             : /*
     437             :  * GetCatalogSnapshot
     438             :  *      Get a snapshot that is sufficiently up-to-date for scan of the
     439             :  *      system catalog with the specified OID.
     440             :  */
     441             : Snapshot
     442     9681950 : GetCatalogSnapshot(Oid relid)
     443             : {
     444             :     /*
     445             :      * Return historic snapshot while we're doing logical decoding, so we can
     446             :      * see the appropriate state of the catalog.
     447             :      *
     448             :      * This is the primary reason for needing to reset the system caches after
     449             :      * finishing decoding.
     450             :      */
     451     9681950 :     if (HistoricSnapshotActive())
     452        9940 :         return HistoricSnapshot;
     453             : 
     454     9672010 :     return GetNonHistoricCatalogSnapshot(relid);
     455             : }
     456             : 
     457             : /*
     458             :  * GetNonHistoricCatalogSnapshot
     459             :  *      Get a snapshot that is sufficiently up-to-date for scan of the system
     460             :  *      catalog with the specified OID, even while historic snapshots are set
     461             :  *      up.
     462             :  */
     463             : Snapshot
     464     9673530 : GetNonHistoricCatalogSnapshot(Oid relid)
     465             : {
     466             :     /*
     467             :      * If the caller is trying to scan a relation that has no syscache, no
     468             :      * catcache invalidations will be sent when it is updated.  For a few key
     469             :      * relations, snapshot invalidations are sent instead.  If we're trying to
     470             :      * scan a relation for which neither catcache nor snapshot invalidations
     471             :      * are sent, we must refresh the snapshot every time.
     472             :      */
     473    18509436 :     if (CatalogSnapshot &&
     474    14224702 :         !RelationInvalidatesSnapshotsOnly(relid) &&
     475     5388796 :         !RelationHasSysCache(relid))
     476      222584 :         InvalidateCatalogSnapshot();
     477             : 
     478     9673530 :     if (CatalogSnapshot == NULL)
     479             :     {
     480             :         /* Get new snapshot. */
     481     1060208 :         CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
     482             : 
     483             :         /*
     484             :          * Make sure the catalog snapshot will be accounted for in decisions
     485             :          * about advancing PGXACT->xmin.  We could apply RegisterSnapshot, but
     486             :          * that would result in making a physical copy, which is overkill; and
     487             :          * it would also create a dependency on some resource owner, which we
     488             :          * do not want for reasons explained at the head of this file. Instead
     489             :          * just shove the CatalogSnapshot into the pairing heap manually. This
     490             :          * has to be reversed in InvalidateCatalogSnapshot, of course.
     491             :          *
     492             :          * NB: it had better be impossible for this to throw error, since the
     493             :          * CatalogSnapshot pointer is already valid.
     494             :          */
     495     1060208 :         pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
     496             :     }
     497             : 
     498     9673530 :     return CatalogSnapshot;
     499             : }
     500             : 
     501             : /*
     502             :  * InvalidateCatalogSnapshot
     503             :  *      Mark the current catalog snapshot, if any, as invalid
     504             :  *
     505             :  * We could change this API to allow the caller to provide more fine-grained
     506             :  * invalidation details, so that a change to relation A wouldn't prevent us
     507             :  * from using our cached snapshot to scan relation B, but so far there's no
     508             :  * evidence that the CPU cycles we spent tracking such fine details would be
     509             :  * well-spent.
     510             :  */
     511             : void
     512    17425674 : InvalidateCatalogSnapshot(void)
     513             : {
     514    17425674 :     if (CatalogSnapshot)
     515             :     {
     516     1060208 :         pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
     517     1060208 :         CatalogSnapshot = NULL;
     518     1060208 :         SnapshotResetXmin();
     519             :     }
     520    17425674 : }
     521             : 
     522             : /*
     523             :  * InvalidateCatalogSnapshotConditionally
     524             :  *      Drop catalog snapshot if it's the only one we have
     525             :  *
     526             :  * This is called when we are about to wait for client input, so we don't
     527             :  * want to continue holding the catalog snapshot if it might mean that the
     528             :  * global xmin horizon can't advance.  However, if there are other snapshots
     529             :  * still active or registered, the catalog snapshot isn't likely to be the
     530             :  * oldest one, so we might as well keep it.
     531             :  */
     532             : void
     533      547980 : InvalidateCatalogSnapshotConditionally(void)
     534             : {
     535      607700 :     if (CatalogSnapshot &&
     536      119038 :         ActiveSnapshot == NULL &&
     537      118636 :         pairingheap_is_singular(&RegisteredSnapshots))
     538        4902 :         InvalidateCatalogSnapshot();
     539      547980 : }
     540             : 
     541             : /*
     542             :  * SnapshotSetCommandId
     543             :  *      Propagate CommandCounterIncrement into the static snapshots, if set
     544             :  */
     545             : void
     546      589436 : SnapshotSetCommandId(CommandId curcid)
     547             : {
     548      589436 :     if (!FirstSnapshotSet)
     549       65176 :         return;
     550             : 
     551      524260 :     if (CurrentSnapshot)
     552      524260 :         CurrentSnapshot->curcid = curcid;
     553      524260 :     if (SecondarySnapshot)
     554        5004 :         SecondarySnapshot->curcid = curcid;
     555             :     /* Should we do the same with CatalogSnapshot? */
     556             : }
     557             : 
     558             : /*
     559             :  * SetTransactionSnapshot
     560             :  *      Set the transaction's snapshot from an imported MVCC snapshot.
     561             :  *
     562             :  * Note that this is very closely tied to GetTransactionSnapshot --- it
     563             :  * must take care of all the same considerations as the first-snapshot case
     564             :  * in GetTransactionSnapshot.
     565             :  */
     566             : static void
     567        1610 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
     568             :                        int sourcepid, PGPROC *sourceproc)
     569             : {
     570             :     /* Caller should have checked this already */
     571             :     Assert(!FirstSnapshotSet);
     572             : 
     573             :     /* Better do this to ensure following Assert succeeds. */
     574        1610 :     InvalidateCatalogSnapshot();
     575             : 
     576             :     Assert(pairingheap_is_empty(&RegisteredSnapshots));
     577             :     Assert(FirstXactSnapshot == NULL);
     578             :     Assert(!HistoricSnapshotActive());
     579             : 
     580             :     /*
     581             :      * Even though we are not going to use the snapshot it computes, we must
     582             :      * call GetSnapshotData, for two reasons: (1) to be sure that
     583             :      * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
     584             :      * RecentXmin and RecentGlobalXmin.  (We could alternatively include those
     585             :      * two variables in exported snapshot files, but it seems better to have
     586             :      * snapshot importers compute reasonably up-to-date values for them.)
     587             :      */
     588        1610 :     CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
     589             : 
     590             :     /*
     591             :      * Now copy appropriate fields from the source snapshot.
     592             :      */
     593        1610 :     CurrentSnapshot->xmin = sourcesnap->xmin;
     594        1610 :     CurrentSnapshot->xmax = sourcesnap->xmax;
     595        1610 :     CurrentSnapshot->xcnt = sourcesnap->xcnt;
     596             :     Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
     597        1610 :     memcpy(CurrentSnapshot->xip, sourcesnap->xip,
     598        1610 :            sourcesnap->xcnt * sizeof(TransactionId));
     599        1610 :     CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
     600             :     Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
     601        1610 :     memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
     602        1610 :            sourcesnap->subxcnt * sizeof(TransactionId));
     603        1610 :     CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
     604        1610 :     CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
     605             :     /* NB: curcid should NOT be copied, it's a local matter */
     606             : 
     607             :     /*
     608             :      * Now we have to fix what GetSnapshotData did with MyPgXact->xmin and
     609             :      * TransactionXmin.  There is a race condition: to make sure we are not
     610             :      * causing the global xmin to go backwards, we have to test that the
     611             :      * source transaction is still running, and that has to be done
     612             :      * atomically. So let procarray.c do it.
     613             :      *
     614             :      * Note: in serializable mode, predicate.c will do this a second time. It
     615             :      * doesn't seem worth contorting the logic here to avoid two calls,
     616             :      * especially since it's not clear that predicate.c *must* do this.
     617             :      */
     618        1610 :     if (sourceproc != NULL)
     619             :     {
     620        1594 :         if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
     621           0 :             ereport(ERROR,
     622             :                     (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     623             :                      errmsg("could not import the requested snapshot"),
     624             :                      errdetail("The source transaction is not running anymore.")));
     625             :     }
     626          16 :     else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
     627           0 :         ereport(ERROR,
     628             :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
     629             :                  errmsg("could not import the requested snapshot"),
     630             :                  errdetail("The source process with PID %d is not running anymore.",
     631             :                            sourcepid)));
     632             : 
     633             :     /*
     634             :      * In transaction-snapshot mode, the first snapshot must live until end of
     635             :      * xact, so we must make a copy of it.  Furthermore, if we're running in
     636             :      * serializable mode, predicate.c needs to do its own processing.
     637             :      */
     638        1610 :     if (IsolationUsesXactSnapshot())
     639             :     {
     640        1070 :         if (IsolationIsSerializable())
     641          10 :             SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
     642             :                                                sourcepid);
     643             :         /* Make a saved copy */
     644        1070 :         CurrentSnapshot = CopySnapshot(CurrentSnapshot);
     645        1070 :         FirstXactSnapshot = CurrentSnapshot;
     646             :         /* Mark it as "registered" in FirstXactSnapshot */
     647        1070 :         FirstXactSnapshot->regd_count++;
     648        1070 :         pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
     649             :     }
     650             : 
     651        1610 :     FirstSnapshotSet = true;
     652        1610 : }
     653             : 
     654             : /*
     655             :  * CopySnapshot
     656             :  *      Copy the given snapshot.
     657             :  *
     658             :  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
     659             :  * to 0.  The returned snapshot has the copied flag set.
     660             :  */
     661             : static Snapshot
     662     9815380 : CopySnapshot(Snapshot snapshot)
     663             : {
     664             :     Snapshot    newsnap;
     665             :     Size        subxipoff;
     666             :     Size        size;
     667             : 
     668             :     Assert(snapshot != InvalidSnapshot);
     669             : 
     670             :     /* We allocate any XID arrays needed in the same palloc block. */
     671     9815380 :     size = subxipoff = sizeof(SnapshotData) +
     672     9815380 :         snapshot->xcnt * sizeof(TransactionId);
     673     9815380 :     if (snapshot->subxcnt > 0)
     674       36104 :         size += snapshot->subxcnt * sizeof(TransactionId);
     675             : 
     676     9815380 :     newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
     677     9815380 :     memcpy(newsnap, snapshot, sizeof(SnapshotData));
     678             : 
     679     9815380 :     newsnap->regd_count = 0;
     680     9815380 :     newsnap->active_count = 0;
     681     9815380 :     newsnap->copied = true;
     682             : 
     683             :     /* setup XID array */
     684     9815380 :     if (snapshot->xcnt > 0)
     685             :     {
     686     1070710 :         newsnap->xip = (TransactionId *) (newsnap + 1);
     687     1070710 :         memcpy(newsnap->xip, snapshot->xip,
     688     1070710 :                snapshot->xcnt * sizeof(TransactionId));
     689             :     }
     690             :     else
     691     8744670 :         newsnap->xip = NULL;
     692             : 
     693             :     /*
     694             :      * Setup subXID array. Don't bother to copy it if it had overflowed,
     695             :      * though, because it's not used anywhere in that case. Except if it's a
     696             :      * snapshot taken during recovery; all the top-level XIDs are in subxip as
     697             :      * well in that case, so we mustn't lose them.
     698             :      */
     699     9851484 :     if (snapshot->subxcnt > 0 &&
     700       36104 :         (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
     701             :     {
     702       36104 :         newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
     703       36104 :         memcpy(newsnap->subxip, snapshot->subxip,
     704       36104 :                snapshot->subxcnt * sizeof(TransactionId));
     705             :     }
     706             :     else
     707     9779276 :         newsnap->subxip = NULL;
     708             : 
     709     9815380 :     return newsnap;
     710             : }
     711             : 
     712             : /*
     713             :  * FreeSnapshot
     714             :  *      Free the memory associated with a snapshot.
     715             :  */
     716             : static void
     717     9790614 : FreeSnapshot(Snapshot snapshot)
     718             : {
     719             :     Assert(snapshot->regd_count == 0);
     720             :     Assert(snapshot->active_count == 0);
     721             :     Assert(snapshot->copied);
     722             : 
     723     9790614 :     pfree(snapshot);
     724     9790614 : }
     725             : 
     726             : /*
     727             :  * PushActiveSnapshot
     728             :  *      Set the given snapshot as the current active snapshot
     729             :  *
     730             :  * If the passed snapshot is a statically-allocated one, or it is possibly
     731             :  * subject to a future command counter update, create a new long-lived copy
     732             :  * with active refcount=1.  Otherwise, only increment the refcount.
     733             :  */
     734             : void
     735     1134878 : PushActiveSnapshot(Snapshot snap)
     736             : {
     737             :     ActiveSnapshotElt *newactive;
     738             : 
     739             :     Assert(snap != InvalidSnapshot);
     740             : 
     741     1134878 :     newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
     742             : 
     743             :     /*
     744             :      * Checking SecondarySnapshot is probably useless here, but it seems
     745             :      * better to be sure.
     746             :      */
     747     1134878 :     if (snap == CurrentSnapshot || snap == SecondarySnapshot || !snap->copied)
     748      898234 :         newactive->as_snap = CopySnapshot(snap);
     749             :     else
     750      236644 :         newactive->as_snap = snap;
     751             : 
     752     1134878 :     newactive->as_next = ActiveSnapshot;
     753     1134878 :     newactive->as_level = GetCurrentTransactionNestLevel();
     754             : 
     755     1134878 :     newactive->as_snap->active_count++;
     756             : 
     757     1134878 :     ActiveSnapshot = newactive;
     758     1134878 :     if (OldestActiveSnapshot == NULL)
     759      908242 :         OldestActiveSnapshot = ActiveSnapshot;
     760     1134878 : }
     761             : 
     762             : /*
     763             :  * PushCopiedSnapshot
     764             :  *      As above, except forcibly copy the presented snapshot.
     765             :  *
     766             :  * This should be used when the ActiveSnapshot has to be modifiable, for
     767             :  * example if the caller intends to call UpdateActiveSnapshotCommandId.
     768             :  * The new snapshot will be released when popped from the stack.
     769             :  */
     770             : void
     771       77078 : PushCopiedSnapshot(Snapshot snapshot)
     772             : {
     773       77078 :     PushActiveSnapshot(CopySnapshot(snapshot));
     774       77078 : }
     775             : 
     776             : /*
     777             :  * UpdateActiveSnapshotCommandId
     778             :  *
     779             :  * Update the current CID of the active snapshot.  This can only be applied
     780             :  * to a snapshot that is not referenced elsewhere.
     781             :  */
     782             : void
     783       32346 : UpdateActiveSnapshotCommandId(void)
     784             : {
     785             :     CommandId   save_curcid,
     786             :                 curcid;
     787             : 
     788             :     Assert(ActiveSnapshot != NULL);
     789             :     Assert(ActiveSnapshot->as_snap->active_count == 1);
     790             :     Assert(ActiveSnapshot->as_snap->regd_count == 0);
     791             : 
     792             :     /*
     793             :      * Don't allow modification of the active snapshot during parallel
     794             :      * operation.  We share the snapshot to worker backends at the beginning
     795             :      * of parallel operation, so any change to the snapshot can lead to
     796             :      * inconsistencies.  We have other defenses against
     797             :      * CommandCounterIncrement, but there are a few places that call this
     798             :      * directly, so we put an additional guard here.
     799             :      */
     800       32346 :     save_curcid = ActiveSnapshot->as_snap->curcid;
     801       32346 :     curcid = GetCurrentCommandId(false);
     802       32346 :     if (IsInParallelMode() && save_curcid != curcid)
     803           0 :         elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
     804       32346 :     ActiveSnapshot->as_snap->curcid = curcid;
     805       32346 : }
     806             : 
     807             : /*
     808             :  * PopActiveSnapshot
     809             :  *
     810             :  * Remove the topmost snapshot from the active snapshot stack, decrementing the
     811             :  * reference count, and free it if this was the last reference.
     812             :  */
     813             : void
     814     1110560 : PopActiveSnapshot(void)
     815             : {
     816             :     ActiveSnapshotElt *newstack;
     817             : 
     818     1110560 :     newstack = ActiveSnapshot->as_next;
     819             : 
     820             :     Assert(ActiveSnapshot->as_snap->active_count > 0);
     821             : 
     822     1110560 :     ActiveSnapshot->as_snap->active_count--;
     823             : 
     824     2217402 :     if (ActiveSnapshot->as_snap->active_count == 0 &&
     825     1106842 :         ActiveSnapshot->as_snap->regd_count == 0)
     826      813826 :         FreeSnapshot(ActiveSnapshot->as_snap);
     827             : 
     828     1110560 :     pfree(ActiveSnapshot);
     829     1110560 :     ActiveSnapshot = newstack;
     830     1110560 :     if (ActiveSnapshot == NULL)
     831      891328 :         OldestActiveSnapshot = NULL;
     832             : 
     833     1110560 :     SnapshotResetXmin();
     834     1110560 : }
     835             : 
     836             : /*
     837             :  * GetActiveSnapshot
     838             :  *      Return the topmost snapshot in the Active stack.
     839             :  */
     840             : Snapshot
     841      862120 : GetActiveSnapshot(void)
     842             : {
     843             :     Assert(ActiveSnapshot != NULL);
     844             : 
     845      862120 :     return ActiveSnapshot->as_snap;
     846             : }
     847             : 
     848             : /*
     849             :  * ActiveSnapshotSet
     850             :  *      Return whether there is at least one snapshot in the Active stack
     851             :  */
     852             : bool
     853      330906 : ActiveSnapshotSet(void)
     854             : {
     855      330906 :     return ActiveSnapshot != NULL;
     856             : }
     857             : 
     858             : /*
     859             :  * RegisterSnapshot
     860             :  *      Register a snapshot as being in use by the current resource owner
     861             :  *
     862             :  * If InvalidSnapshot is passed, it is not registered.
     863             :  */
     864             : Snapshot
     865    10239764 : RegisterSnapshot(Snapshot snapshot)
     866             : {
     867    10239764 :     if (snapshot == InvalidSnapshot)
     868      698724 :         return InvalidSnapshot;
     869             : 
     870     9541040 :     return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
     871             : }
     872             : 
     873             : /*
     874             :  * RegisterSnapshotOnOwner
     875             :  *      As above, but use the specified resource owner
     876             :  */
     877             : Snapshot
     878     9541176 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
     879             : {
     880             :     Snapshot    snap;
     881             : 
     882     9541176 :     if (snapshot == InvalidSnapshot)
     883           0 :         return InvalidSnapshot;
     884             : 
     885             :     /* Static snapshot?  Create a persistent copy */
     886     9541176 :     snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
     887             : 
     888             :     /* and tell resowner.c about it */
     889     9541176 :     ResourceOwnerEnlargeSnapshots(owner);
     890     9541176 :     snap->regd_count++;
     891     9541176 :     ResourceOwnerRememberSnapshot(owner, snap);
     892             : 
     893     9541176 :     if (snap->regd_count == 1)
     894     9177278 :         pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
     895             : 
     896     9541176 :     return snap;
     897             : }
     898             : 
     899             : /*
     900             :  * UnregisterSnapshot
     901             :  *
     902             :  * Decrement the reference count of a snapshot, remove the corresponding
     903             :  * reference from CurrentResourceOwner, and free the snapshot if no more
     904             :  * references remain.
     905             :  */
     906             : void
     907    10202616 : UnregisterSnapshot(Snapshot snapshot)
     908             : {
     909    10202616 :     if (snapshot == NULL)
     910      672808 :         return;
     911             : 
     912     9529808 :     UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
     913             : }
     914             : 
     915             : /*
     916             :  * UnregisterSnapshotFromOwner
     917             :  *      As above, but use the specified resource owner
     918             :  */
     919             : void
     920     9541254 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
     921             : {
     922     9541254 :     if (snapshot == NULL)
     923          78 :         return;
     924             : 
     925             :     Assert(snapshot->regd_count > 0);
     926             :     Assert(!pairingheap_is_empty(&RegisteredSnapshots));
     927             : 
     928     9541176 :     ResourceOwnerForgetSnapshot(owner, snapshot);
     929             : 
     930     9541176 :     snapshot->regd_count--;
     931     9541176 :     if (snapshot->regd_count == 0)
     932     9177278 :         pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
     933             : 
     934     9541176 :     if (snapshot->regd_count == 0 && snapshot->active_count == 0)
     935             :     {
     936     8975660 :         FreeSnapshot(snapshot);
     937     8975660 :         SnapshotResetXmin();
     938             :     }
     939             : }
     940             : 
     941             : /*
     942             :  * Comparison function for RegisteredSnapshots heap.  Snapshots are ordered
     943             :  * by xmin, so that the snapshot with smallest xmin is at the top.
     944             :  */
     945             : static int
     946     9015448 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
     947             : {
     948     9015448 :     const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
     949     9015448 :     const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
     950             : 
     951     9015448 :     if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
     952       42532 :         return 1;
     953     8972916 :     else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
     954        5964 :         return -1;
     955             :     else
     956     8966952 :         return 0;
     957             : }
     958             : 
     959             : /*
     960             :  * Get current RecentGlobalXmin value, as a FullTransactionId.
     961             :  */
     962             : FullTransactionId
     963           0 : GetFullRecentGlobalXmin(void)
     964             : {
     965             :     FullTransactionId nextxid_full;
     966             :     uint32      nextxid_epoch;
     967             :     TransactionId nextxid_xid;
     968             :     uint32      epoch;
     969             : 
     970             :     Assert(TransactionIdIsNormal(RecentGlobalXmin));
     971             : 
     972             :     /*
     973             :      * Compute the epoch from the next XID's epoch. This relies on the fact
     974             :      * that RecentGlobalXmin must be within the 2 billion XID horizon from the
     975             :      * next XID.
     976             :      */
     977           0 :     nextxid_full = ReadNextFullTransactionId();
     978           0 :     nextxid_epoch = EpochFromFullTransactionId(nextxid_full);
     979           0 :     nextxid_xid = XidFromFullTransactionId(nextxid_full);
     980             : 
     981           0 :     if (RecentGlobalXmin > nextxid_xid)
     982           0 :         epoch = nextxid_epoch - 1;
     983             :     else
     984           0 :         epoch = nextxid_epoch;
     985             : 
     986           0 :     return FullTransactionIdFromEpochAndXid(epoch, RecentGlobalXmin);
     987             : }
     988             : 
     989             : /*
     990             :  * SnapshotResetXmin
     991             :  *
     992             :  * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid.
     993             :  * Note we can do this without locking because we assume that storing an Xid
     994             :  * is atomic.
     995             :  *
     996             :  * Even if there are some remaining snapshots, we may be able to advance our
     997             :  * PGXACT->xmin to some degree.  This typically happens when a portal is
     998             :  * dropped.  For efficiency, we only consider recomputing PGXACT->xmin when
     999             :  * the active snapshot stack is empty; this allows us not to need to track
    1000             :  * which active snapshot is oldest.
    1001             :  *
    1002             :  * Note: it's tempting to use GetOldestSnapshot() here so that we can include
    1003             :  * active snapshots in the calculation.  However, that compares by LSN not
    1004             :  * xmin so it's not entirely clear that it's the same thing.  Also, we'd be
    1005             :  * critically dependent on the assumption that the bottommost active snapshot
    1006             :  * stack entry has the oldest xmin.  (Current uses of GetOldestSnapshot() are
    1007             :  * not actually critical, but this would be.)
    1008             :  */
    1009             : static void
    1010    11168802 : SnapshotResetXmin(void)
    1011             : {
    1012             :     Snapshot    minSnapshot;
    1013             : 
    1014    11168802 :     if (ActiveSnapshot != NULL)
    1015     8814444 :         return;
    1016             : 
    1017     2354358 :     if (pairingheap_is_empty(&RegisteredSnapshots))
    1018             :     {
    1019      813672 :         MyPgXact->xmin = InvalidTransactionId;
    1020      813672 :         return;
    1021             :     }
    1022             : 
    1023     1540686 :     minSnapshot = pairingheap_container(SnapshotData, ph_node,
    1024             :                                         pairingheap_first(&RegisteredSnapshots));
    1025             : 
    1026     1540686 :     if (TransactionIdPrecedes(MyPgXact->xmin, minSnapshot->xmin))
    1027        3178 :         MyPgXact->xmin = minSnapshot->xmin;
    1028             : }
    1029             : 
    1030             : /*
    1031             :  * AtSubCommit_Snapshot
    1032             :  */
    1033             : void
    1034        4438 : AtSubCommit_Snapshot(int level)
    1035             : {
    1036             :     ActiveSnapshotElt *active;
    1037             : 
    1038             :     /*
    1039             :      * Relabel the active snapshots set in this subtransaction as though they
    1040             :      * are owned by the parent subxact.
    1041             :      */
    1042        4438 :     for (active = ActiveSnapshot; active != NULL; active = active->as_next)
    1043             :     {
    1044        3674 :         if (active->as_level < level)
    1045        3674 :             break;
    1046           0 :         active->as_level = level - 1;
    1047             :     }
    1048        4438 : }
    1049             : 
    1050             : /*
    1051             :  * AtSubAbort_Snapshot
    1052             :  *      Clean up snapshots after a subtransaction abort
    1053             :  */
    1054             : void
    1055        2922 : AtSubAbort_Snapshot(int level)
    1056             : {
    1057             :     /* Forget the active snapshots set by this subtransaction */
    1058        6972 :     while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
    1059             :     {
    1060             :         ActiveSnapshotElt *next;
    1061             : 
    1062        1128 :         next = ActiveSnapshot->as_next;
    1063             : 
    1064             :         /*
    1065             :          * Decrement the snapshot's active count.  If it's still registered or
    1066             :          * marked as active by an outer subtransaction, we can't free it yet.
    1067             :          */
    1068             :         Assert(ActiveSnapshot->as_snap->active_count >= 1);
    1069        1128 :         ActiveSnapshot->as_snap->active_count -= 1;
    1070             : 
    1071        2256 :         if (ActiveSnapshot->as_snap->active_count == 0 &&
    1072        1128 :             ActiveSnapshot->as_snap->regd_count == 0)
    1073        1128 :             FreeSnapshot(ActiveSnapshot->as_snap);
    1074             : 
    1075             :         /* and free the stack element */
    1076        1128 :         pfree(ActiveSnapshot);
    1077             : 
    1078        1128 :         ActiveSnapshot = next;
    1079        1128 :         if (ActiveSnapshot == NULL)
    1080         120 :             OldestActiveSnapshot = NULL;
    1081             :     }
    1082             : 
    1083        2922 :     SnapshotResetXmin();
    1084        2922 : }
    1085             : 
    1086             : /*
    1087             :  * AtEOXact_Snapshot
    1088             :  *      Snapshot manager's cleanup function for end of transaction
    1089             :  */
    1090             : void
    1091      458968 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
    1092             : {
    1093             :     /*
    1094             :      * In transaction-snapshot mode we must release our privately-managed
    1095             :      * reference to the transaction snapshot.  We must remove it from
    1096             :      * RegisteredSnapshots to keep the check below happy.  But we don't bother
    1097             :      * to do FreeSnapshot, for two reasons: the memory will go away with
    1098             :      * TopTransactionContext anyway, and if someone has left the snapshot
    1099             :      * stacked as active, we don't want the code below to be chasing through a
    1100             :      * dangling pointer.
    1101             :      */
    1102      458968 :     if (FirstXactSnapshot != NULL)
    1103             :     {
    1104             :         Assert(FirstXactSnapshot->regd_count > 0);
    1105             :         Assert(!pairingheap_is_empty(&RegisteredSnapshots));
    1106        5468 :         pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
    1107             :     }
    1108      458968 :     FirstXactSnapshot = NULL;
    1109             : 
    1110             :     /*
    1111             :      * If we exported any snapshots, clean them up.
    1112             :      */
    1113      458968 :     if (exportedSnapshots != NIL)
    1114             :     {
    1115             :         ListCell   *lc;
    1116             : 
    1117             :         /*
    1118             :          * Get rid of the files.  Unlink failure is only a WARNING because (1)
    1119             :          * it's too late to abort the transaction, and (2) leaving a leaked
    1120             :          * file around has little real consequence anyway.
    1121             :          *
    1122             :          * We also need to remove the snapshots from RegisteredSnapshots to
    1123             :          * prevent a warning below.
    1124             :          *
    1125             :          * As with the FirstXactSnapshot, we don't need to free resources of
    1126             :          * the snapshot itself as it will go away with the memory context.
    1127             :          */
    1128          16 :         foreach(lc, exportedSnapshots)
    1129             :         {
    1130           8 :             ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
    1131             : 
    1132           8 :             if (unlink(esnap->snapfile))
    1133           0 :                 elog(WARNING, "could not unlink file \"%s\": %m",
    1134             :                      esnap->snapfile);
    1135             : 
    1136           8 :             pairingheap_remove(&RegisteredSnapshots,
    1137           8 :                                &esnap->snapshot->ph_node);
    1138             :         }
    1139             : 
    1140           8 :         exportedSnapshots = NIL;
    1141             :     }
    1142             : 
    1143             :     /* Drop catalog snapshot if any */
    1144      458968 :     InvalidateCatalogSnapshot();
    1145             : 
    1146             :     /* On commit, complain about leftover snapshots */
    1147      458968 :     if (isCommit)
    1148             :     {
    1149             :         ActiveSnapshotElt *active;
    1150             : 
    1151      439582 :         if (!pairingheap_is_empty(&RegisteredSnapshots))
    1152           0 :             elog(WARNING, "registered snapshots seem to remain after cleanup");
    1153             : 
    1154             :         /* complain about unpopped active snapshots */
    1155      439582 :         for (active = ActiveSnapshot; active != NULL; active = active->as_next)
    1156           0 :             elog(WARNING, "snapshot %p still active", active);
    1157             :     }
    1158             : 
    1159             :     /*
    1160             :      * And reset our state.  We don't need to free the memory explicitly --
    1161             :      * it'll go away with TopTransactionContext.
    1162             :      */
    1163      458968 :     ActiveSnapshot = NULL;
    1164      458968 :     OldestActiveSnapshot = NULL;
    1165      458968 :     pairingheap_reset(&RegisteredSnapshots);
    1166             : 
    1167      458968 :     CurrentSnapshot = NULL;
    1168      458968 :     SecondarySnapshot = NULL;
    1169             : 
    1170      458968 :     FirstSnapshotSet = false;
    1171             : 
    1172             :     /*
    1173             :      * During normal commit processing, we call ProcArrayEndTransaction() to
    1174             :      * reset the MyPgXact->xmin. That call happens prior to the call to
    1175             :      * AtEOXact_Snapshot(), so we need not touch xmin here at all.
    1176             :      */
    1177      458968 :     if (resetXmin)
    1178       19452 :         SnapshotResetXmin();
    1179             : 
    1180             :     Assert(resetXmin || MyPgXact->xmin == 0);
    1181      458968 : }
    1182             : 
    1183             : 
    1184             : /*
    1185             :  * ExportSnapshot
    1186             :  *      Export the snapshot to a file so that other backends can import it.
    1187             :  *      Returns the token (the file name) that can be used to import this
    1188             :  *      snapshot.
    1189             :  */
    1190             : char *
    1191           8 : ExportSnapshot(Snapshot snapshot)
    1192             : {
    1193             :     TransactionId topXid;
    1194             :     TransactionId *children;
    1195             :     ExportedSnapshot *esnap;
    1196             :     int         nchildren;
    1197             :     int         addTopXid;
    1198             :     StringInfoData buf;
    1199             :     FILE       *f;
    1200             :     int         i;
    1201             :     MemoryContext oldcxt;
    1202             :     char        path[MAXPGPATH];
    1203             :     char        pathtmp[MAXPGPATH];
    1204             : 
    1205             :     /*
    1206             :      * It's tempting to call RequireTransactionBlock here, since it's not very
    1207             :      * useful to export a snapshot that will disappear immediately afterwards.
    1208             :      * However, we haven't got enough information to do that, since we don't
    1209             :      * know if we're at top level or not.  For example, we could be inside a
    1210             :      * plpgsql function that is going to fire off other transactions via
    1211             :      * dblink.  Rather than disallow perfectly legitimate usages, don't make a
    1212             :      * check.
    1213             :      *
    1214             :      * Also note that we don't make any restriction on the transaction's
    1215             :      * isolation level; however, importers must check the level if they are
    1216             :      * serializable.
    1217             :      */
    1218             : 
    1219             :     /*
    1220             :      * Get our transaction ID if there is one, to include in the snapshot.
    1221             :      */
    1222           8 :     topXid = GetTopTransactionIdIfAny();
    1223             : 
    1224             :     /*
    1225             :      * We cannot export a snapshot from a subtransaction because there's no
    1226             :      * easy way for importers to verify that the same subtransaction is still
    1227             :      * running.
    1228             :      */
    1229           8 :     if (IsSubTransaction())
    1230           0 :         ereport(ERROR,
    1231             :                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
    1232             :                  errmsg("cannot export a snapshot from a subtransaction")));
    1233             : 
    1234             :     /*
    1235             :      * We do however allow previous committed subtransactions to exist.
    1236             :      * Importers of the snapshot must see them as still running, so get their
    1237             :      * XIDs to add them to the snapshot.
    1238             :      */
    1239           8 :     nchildren = xactGetCommittedChildren(&children);
    1240             : 
    1241             :     /*
    1242             :      * Generate file path for the snapshot.  We start numbering of snapshots
    1243             :      * inside the transaction from 1.
    1244             :      */
    1245          24 :     snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
    1246          24 :              MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1);
    1247             : 
    1248             :     /*
    1249             :      * Copy the snapshot into TopTransactionContext, add it to the
    1250             :      * exportedSnapshots list, and mark it pseudo-registered.  We do this to
    1251             :      * ensure that the snapshot's xmin is honored for the rest of the
    1252             :      * transaction.
    1253             :      */
    1254           8 :     snapshot = CopySnapshot(snapshot);
    1255             : 
    1256           8 :     oldcxt = MemoryContextSwitchTo(TopTransactionContext);
    1257           8 :     esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
    1258           8 :     esnap->snapfile = pstrdup(path);
    1259           8 :     esnap->snapshot = snapshot;
    1260           8 :     exportedSnapshots = lappend(exportedSnapshots, esnap);
    1261           8 :     MemoryContextSwitchTo(oldcxt);
    1262             : 
    1263           8 :     snapshot->regd_count++;
    1264           8 :     pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
    1265             : 
    1266             :     /*
    1267             :      * Fill buf with a text serialization of the snapshot, plus identification
    1268             :      * data about this transaction.  The format expected by ImportSnapshot is
    1269             :      * pretty rigid: each line must be fieldname:value.
    1270             :      */
    1271           8 :     initStringInfo(&buf);
    1272             : 
    1273           8 :     appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid);
    1274           8 :     appendStringInfo(&buf, "pid:%d\n", MyProcPid);
    1275           8 :     appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
    1276           8 :     appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
    1277           8 :     appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
    1278             : 
    1279           8 :     appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
    1280           8 :     appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
    1281             : 
    1282             :     /*
    1283             :      * We must include our own top transaction ID in the top-xid data, since
    1284             :      * by definition we will still be running when the importing transaction
    1285             :      * adopts the snapshot, but GetSnapshotData never includes our own XID in
    1286             :      * the snapshot.  (There must, therefore, be enough room to add it.)
    1287             :      *
    1288             :      * However, it could be that our topXid is after the xmax, in which case
    1289             :      * we shouldn't include it because xip[] members are expected to be before
    1290             :      * xmax.  (We need not make the same check for subxip[] members, see
    1291             :      * snapshot.h.)
    1292             :      */
    1293           8 :     addTopXid = (TransactionIdIsValid(topXid) &&
    1294           8 :                  TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
    1295           8 :     appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
    1296           8 :     for (i = 0; i < snapshot->xcnt; i++)
    1297           0 :         appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
    1298           8 :     if (addTopXid)
    1299           0 :         appendStringInfo(&buf, "xip:%u\n", topXid);
    1300             : 
    1301             :     /*
    1302             :      * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
    1303             :      * we have to cope with possible overflow.
    1304             :      */
    1305          16 :     if (snapshot->suboverflowed ||
    1306           8 :         snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
    1307           0 :         appendStringInfoString(&buf, "sof:1\n");
    1308             :     else
    1309             :     {
    1310           8 :         appendStringInfoString(&buf, "sof:0\n");
    1311           8 :         appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
    1312           8 :         for (i = 0; i < snapshot->subxcnt; i++)
    1313           0 :             appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
    1314           8 :         for (i = 0; i < nchildren; i++)
    1315           0 :             appendStringInfo(&buf, "sxp:%u\n", children[i]);
    1316             :     }
    1317           8 :     appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
    1318             : 
    1319             :     /*
    1320             :      * Now write the text representation into a file.  We first write to a
    1321             :      * ".tmp" filename, and rename to final filename if no error.  This
    1322             :      * ensures that no other backend can read an incomplete file
    1323             :      * (ImportSnapshot won't allow it because of its valid-characters check).
    1324             :      */
    1325           8 :     snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
    1326           8 :     if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
    1327           0 :         ereport(ERROR,
    1328             :                 (errcode_for_file_access(),
    1329             :                  errmsg("could not create file \"%s\": %m", pathtmp)));
    1330             : 
    1331           8 :     if (fwrite(buf.data, buf.len, 1, f) != 1)
    1332           0 :         ereport(ERROR,
    1333             :                 (errcode_for_file_access(),
    1334             :                  errmsg("could not write to file \"%s\": %m", pathtmp)));
    1335             : 
    1336             :     /* no fsync() since file need not survive a system crash */
    1337             : 
    1338           8 :     if (FreeFile(f))
    1339           0 :         ereport(ERROR,
    1340             :                 (errcode_for_file_access(),
    1341             :                  errmsg("could not write to file \"%s\": %m", pathtmp)));
    1342             : 
    1343             :     /*
    1344             :      * Now that we have written everything into a .tmp file, rename the file
    1345             :      * to remove the .tmp suffix.
    1346             :      */
    1347           8 :     if (rename(pathtmp, path) < 0)
    1348           0 :         ereport(ERROR,
    1349             :                 (errcode_for_file_access(),
    1350             :                  errmsg("could not rename file \"%s\" to \"%s\": %m",
    1351             :                         pathtmp, path)));
    1352             : 
    1353             :     /*
    1354             :      * The basename of the file is what we return from pg_export_snapshot().
    1355             :      * It's already in path in a textual format and we know that the path
    1356             :      * starts with SNAPSHOT_EXPORT_DIR.  Skip over the prefix and the slash
    1357             :      * and pstrdup it so as not to return the address of a local variable.
    1358             :      */
    1359           8 :     return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
    1360             : }
    1361             : 
    1362             : /*
    1363             :  * pg_export_snapshot
    1364             :  *      SQL-callable wrapper for ExportSnapshot.
    1365             :  */
    1366             : Datum
    1367           8 : pg_export_snapshot(PG_FUNCTION_ARGS)
    1368             : {
    1369             :     char       *snapshotName;
    1370             : 
    1371           8 :     snapshotName = ExportSnapshot(GetActiveSnapshot());
    1372           8 :     PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
    1373             : }
    1374             : 
    1375             : 
    1376             : /*
    1377             :  * Parsing subroutines for ImportSnapshot: parse a line with the given
    1378             :  * prefix followed by a value, and advance *s to the next line.  The
    1379             :  * filename is provided for use in error messages.
    1380             :  */
    1381             : static int
    1382         112 : parseIntFromText(const char *prefix, char **s, const char *filename)
    1383             : {
    1384         112 :     char       *ptr = *s;
    1385         112 :     int         prefixlen = strlen(prefix);
    1386             :     int         val;
    1387             : 
    1388         112 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1389           0 :         ereport(ERROR,
    1390             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1391             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1392         112 :     ptr += prefixlen;
    1393         112 :     if (sscanf(ptr, "%d", &val) != 1)
    1394           0 :         ereport(ERROR,
    1395             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1396             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1397         112 :     ptr = strchr(ptr, '\n');
    1398         112 :     if (!ptr)
    1399           0 :         ereport(ERROR,
    1400             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1401             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1402         112 :     *s = ptr + 1;
    1403         112 :     return val;
    1404             : }
    1405             : 
    1406             : static TransactionId
    1407          48 : parseXidFromText(const char *prefix, char **s, const char *filename)
    1408             : {
    1409          48 :     char       *ptr = *s;
    1410          48 :     int         prefixlen = strlen(prefix);
    1411             :     TransactionId val;
    1412             : 
    1413          48 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1414           0 :         ereport(ERROR,
    1415             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1416             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1417          48 :     ptr += prefixlen;
    1418          48 :     if (sscanf(ptr, "%u", &val) != 1)
    1419           0 :         ereport(ERROR,
    1420             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1421             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1422          48 :     ptr = strchr(ptr, '\n');
    1423          48 :     if (!ptr)
    1424           0 :         ereport(ERROR,
    1425             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1426             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1427          48 :     *s = ptr + 1;
    1428          48 :     return val;
    1429             : }
    1430             : 
    1431             : static void
    1432          16 : parseVxidFromText(const char *prefix, char **s, const char *filename,
    1433             :                   VirtualTransactionId *vxid)
    1434             : {
    1435          16 :     char       *ptr = *s;
    1436          16 :     int         prefixlen = strlen(prefix);
    1437             : 
    1438          16 :     if (strncmp(ptr, prefix, prefixlen) != 0)
    1439           0 :         ereport(ERROR,
    1440             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1441             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1442          16 :     ptr += prefixlen;
    1443          16 :     if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
    1444           0 :         ereport(ERROR,
    1445             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1446             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1447          16 :     ptr = strchr(ptr, '\n');
    1448          16 :     if (!ptr)
    1449           0 :         ereport(ERROR,
    1450             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1451             :                  errmsg("invalid snapshot data in file \"%s\"", filename)));
    1452          16 :     *s = ptr + 1;
    1453          16 : }
    1454             : 
    1455             : /*
    1456             :  * ImportSnapshot
    1457             :  *      Import a previously exported snapshot.  The argument should be a
    1458             :  *      filename in SNAPSHOT_EXPORT_DIR.  Load the snapshot from that file.
    1459             :  *      This is called by "SET TRANSACTION SNAPSHOT 'foo'".
    1460             :  */
    1461             : void
    1462          16 : ImportSnapshot(const char *idstr)
    1463             : {
    1464             :     char        path[MAXPGPATH];
    1465             :     FILE       *f;
    1466             :     struct stat stat_buf;
    1467             :     char       *filebuf;
    1468             :     int         xcnt;
    1469             :     int         i;
    1470             :     VirtualTransactionId src_vxid;
    1471             :     int         src_pid;
    1472             :     Oid         src_dbid;
    1473             :     int         src_isolevel;
    1474             :     bool        src_readonly;
    1475             :     SnapshotData snapshot;
    1476             : 
    1477             :     /*
    1478             :      * Must be at top level of a fresh transaction.  Note in particular that
    1479             :      * we check we haven't acquired an XID --- if we have, it's conceivable
    1480             :      * that the snapshot would show it as not running, making for very screwy
    1481             :      * behavior.
    1482             :      */
    1483          32 :     if (FirstSnapshotSet ||
    1484          32 :         GetTopTransactionIdIfAny() != InvalidTransactionId ||
    1485          16 :         IsSubTransaction())
    1486           0 :         ereport(ERROR,
    1487             :                 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
    1488             :                  errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
    1489             : 
    1490             :     /*
    1491             :      * If we are in read committed mode then the next query would execute with
    1492             :      * a new snapshot thus making this function call quite useless.
    1493             :      */
    1494          16 :     if (!IsolationUsesXactSnapshot())
    1495           0 :         ereport(ERROR,
    1496             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1497             :                  errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
    1498             : 
    1499             :     /*
    1500             :      * Verify the identifier: only 0-9, A-F and hyphens are allowed.  We do
    1501             :      * this mainly to prevent reading arbitrary files.
    1502             :      */
    1503          16 :     if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
    1504           0 :         ereport(ERROR,
    1505             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1506             :                  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
    1507             : 
    1508             :     /* OK, read the file */
    1509          16 :     snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
    1510             : 
    1511          16 :     f = AllocateFile(path, PG_BINARY_R);
    1512          16 :     if (!f)
    1513           0 :         ereport(ERROR,
    1514             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1515             :                  errmsg("invalid snapshot identifier: \"%s\"", idstr)));
    1516             : 
    1517             :     /* get the size of the file so that we know how much memory we need */
    1518          16 :     if (fstat(fileno(f), &stat_buf))
    1519           0 :         elog(ERROR, "could not stat file \"%s\": %m", path);
    1520             : 
    1521             :     /* and read the file into a palloc'd string */
    1522          16 :     filebuf = (char *) palloc(stat_buf.st_size + 1);
    1523          16 :     if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
    1524           0 :         elog(ERROR, "could not read file \"%s\": %m", path);
    1525             : 
    1526          16 :     filebuf[stat_buf.st_size] = '\0';
    1527             : 
    1528          16 :     FreeFile(f);
    1529             : 
    1530             :     /*
    1531             :      * Construct a snapshot struct by parsing the file content.
    1532             :      */
    1533          16 :     memset(&snapshot, 0, sizeof(snapshot));
    1534             : 
    1535          16 :     parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
    1536          16 :     src_pid = parseIntFromText("pid:", &filebuf, path);
    1537             :     /* we abuse parseXidFromText a bit here ... */
    1538          16 :     src_dbid = parseXidFromText("dbid:", &filebuf, path);
    1539          16 :     src_isolevel = parseIntFromText("iso:", &filebuf, path);
    1540          16 :     src_readonly = parseIntFromText("ro:", &filebuf, path);
    1541             : 
    1542          16 :     snapshot.snapshot_type = SNAPSHOT_MVCC;
    1543             : 
    1544          16 :     snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
    1545          16 :     snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
    1546             : 
    1547          16 :     snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
    1548             : 
    1549             :     /* sanity-check the xid count before palloc */
    1550          16 :     if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
    1551           0 :         ereport(ERROR,
    1552             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1553             :                  errmsg("invalid snapshot data in file \"%s\"", path)));
    1554             : 
    1555          16 :     snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
    1556          16 :     for (i = 0; i < xcnt; i++)
    1557           0 :         snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
    1558             : 
    1559          16 :     snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
    1560             : 
    1561          16 :     if (!snapshot.suboverflowed)
    1562             :     {
    1563          16 :         snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
    1564             : 
    1565             :         /* sanity-check the xid count before palloc */
    1566          16 :         if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
    1567           0 :             ereport(ERROR,
    1568             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1569             :                      errmsg("invalid snapshot data in file \"%s\"", path)));
    1570             : 
    1571          16 :         snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
    1572          16 :         for (i = 0; i < xcnt; i++)
    1573           0 :             snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
    1574             :     }
    1575             :     else
    1576             :     {
    1577           0 :         snapshot.subxcnt = 0;
    1578           0 :         snapshot.subxip = NULL;
    1579             :     }
    1580             : 
    1581          16 :     snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
    1582             : 
    1583             :     /*
    1584             :      * Do some additional sanity checking, just to protect ourselves.  We
    1585             :      * don't trouble to check the array elements, just the most critical
    1586             :      * fields.
    1587             :      */
    1588          16 :     if (!VirtualTransactionIdIsValid(src_vxid) ||
    1589          16 :         !OidIsValid(src_dbid) ||
    1590          32 :         !TransactionIdIsNormal(snapshot.xmin) ||
    1591          16 :         !TransactionIdIsNormal(snapshot.xmax))
    1592           0 :         ereport(ERROR,
    1593             :                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
    1594             :                  errmsg("invalid snapshot data in file \"%s\"", path)));
    1595             : 
    1596             :     /*
    1597             :      * If we're serializable, the source transaction must be too, otherwise
    1598             :      * predicate.c has problems (SxactGlobalXmin could go backwards).  Also, a
    1599             :      * non-read-only transaction can't adopt a snapshot from a read-only
    1600             :      * transaction, as predicate.c handles the cases very differently.
    1601             :      */
    1602          16 :     if (IsolationIsSerializable())
    1603             :     {
    1604           0 :         if (src_isolevel != XACT_SERIALIZABLE)
    1605           0 :             ereport(ERROR,
    1606             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1607             :                      errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
    1608           0 :         if (src_readonly && !XactReadOnly)
    1609           0 :             ereport(ERROR,
    1610             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1611             :                      errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
    1612             :     }
    1613             : 
    1614             :     /*
    1615             :      * We cannot import a snapshot that was taken in a different database,
    1616             :      * because vacuum calculates OldestXmin on a per-database basis; so the
    1617             :      * source transaction's xmin doesn't protect us from data loss.  This
    1618             :      * restriction could be removed if the source transaction were to mark its
    1619             :      * xmin as being globally applicable.  But that would require some
    1620             :      * additional syntax, since that has to be known when the snapshot is
    1621             :      * initially taken.  (See pgsql-hackers discussion of 2011-10-21.)
    1622             :      */
    1623          16 :     if (src_dbid != MyDatabaseId)
    1624           0 :         ereport(ERROR,
    1625             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1626             :                  errmsg("cannot import a snapshot from a different database")));
    1627             : 
    1628             :     /* OK, install the snapshot */
    1629          16 :     SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
    1630          16 : }
    1631             : 
    1632             : /*
    1633             :  * XactHasExportedSnapshots
    1634             :  *      Test whether current transaction has exported any snapshots.
    1635             :  */
    1636             : bool
    1637          84 : XactHasExportedSnapshots(void)
    1638             : {
    1639          84 :     return (exportedSnapshots != NIL);
    1640             : }
    1641             : 
    1642             : /*
    1643             :  * DeleteAllExportedSnapshotFiles
    1644             :  *      Clean up any files that have been left behind by a crashed backend
    1645             :  *      that had exported snapshots before it died.
    1646             :  *
    1647             :  * This should be called during database startup or crash recovery.
    1648             :  */
    1649             : void
    1650         116 : DeleteAllExportedSnapshotFiles(void)
    1651             : {
    1652             :     char        buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
    1653             :     DIR        *s_dir;
    1654             :     struct dirent *s_de;
    1655             : 
    1656             :     /*
    1657             :      * Problems in reading the directory, or unlinking files, are reported at
    1658             :      * LOG level.  Since we're running in the startup process, ERROR level
    1659             :      * would prevent database start, and it's not important enough for that.
    1660             :      */
    1661         116 :     s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
    1662             : 
    1663         464 :     while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
    1664             :     {
    1665         348 :         if (strcmp(s_de->d_name, ".") == 0 ||
    1666         116 :             strcmp(s_de->d_name, "..") == 0)
    1667         232 :             continue;
    1668             : 
    1669           0 :         snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
    1670             : 
    1671           0 :         if (unlink(buf) != 0)
    1672           0 :             ereport(LOG,
    1673             :                     (errcode_for_file_access(),
    1674             :                      errmsg("could not remove file \"%s\": %m", buf)));
    1675             :     }
    1676             : 
    1677         116 :     FreeDir(s_dir);
    1678         116 : }
    1679             : 
    1680             : /*
    1681             :  * ThereAreNoPriorRegisteredSnapshots
    1682             :  *      Is the registered snapshot count less than or equal to one?
    1683             :  *
    1684             :  * Don't use this to settle important decisions.  While zero registrations and
    1685             :  * no ActiveSnapshot would confirm a certain idleness, the system makes no
    1686             :  * guarantees about the significance of one registered snapshot.
    1687             :  */
    1688             : bool
    1689          28 : ThereAreNoPriorRegisteredSnapshots(void)
    1690             : {
    1691          28 :     if (pairingheap_is_empty(&RegisteredSnapshots) ||
    1692           0 :         pairingheap_is_singular(&RegisteredSnapshots))
    1693          28 :         return true;
    1694             : 
    1695           0 :     return false;
    1696             : }
    1697             : 
    1698             : 
    1699             : /*
    1700             :  * Return a timestamp that is exactly on a minute boundary.
    1701             :  *
    1702             :  * If the argument is already aligned, return that value, otherwise move to
    1703             :  * the next minute boundary following the given time.
    1704             :  */
    1705             : static TimestampTz
    1706       23730 : AlignTimestampToMinuteBoundary(TimestampTz ts)
    1707             : {
    1708       23730 :     TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
    1709             : 
    1710       23730 :     return retval - (retval % USECS_PER_MINUTE);
    1711             : }
    1712             : 
    1713             : /*
    1714             :  * Get current timestamp for snapshots
    1715             :  *
    1716             :  * This is basically GetCurrentTimestamp(), but with a guarantee that
    1717             :  * the result never moves backward.
    1718             :  */
    1719             : TimestampTz
    1720       23920 : GetSnapshotCurrentTimestamp(void)
    1721             : {
    1722       23920 :     TimestampTz now = GetCurrentTimestamp();
    1723             : 
    1724             :     /*
    1725             :      * Don't let time move backward; if it hasn't advanced, use the old value.
    1726             :      */
    1727       23920 :     SpinLockAcquire(&oldSnapshotControl->mutex_current);
    1728       23920 :     if (now <= oldSnapshotControl->current_timestamp)
    1729           0 :         now = oldSnapshotControl->current_timestamp;
    1730             :     else
    1731       23920 :         oldSnapshotControl->current_timestamp = now;
    1732       23920 :     SpinLockRelease(&oldSnapshotControl->mutex_current);
    1733             : 
    1734       23920 :     return now;
    1735             : }
    1736             : 
    1737             : /*
    1738             :  * Get timestamp through which vacuum may have processed based on last stored
    1739             :  * value for threshold_timestamp.
    1740             :  *
    1741             :  * XXX: So far, we never trust that a 64-bit value can be read atomically; if
    1742             :  * that ever changes, we could get rid of the spinlock here.
    1743             :  */
    1744             : TimestampTz
    1745        6310 : GetOldSnapshotThresholdTimestamp(void)
    1746             : {
    1747             :     TimestampTz threshold_timestamp;
    1748             : 
    1749        6310 :     SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
    1750        6310 :     threshold_timestamp = oldSnapshotControl->threshold_timestamp;
    1751        6310 :     SpinLockRelease(&oldSnapshotControl->mutex_threshold);
    1752             : 
    1753        6310 :     return threshold_timestamp;
    1754             : }
    1755             : 
    1756             : static void
    1757         190 : SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
    1758             : {
    1759         190 :     SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
    1760         190 :     oldSnapshotControl->threshold_timestamp = ts;
    1761         190 :     oldSnapshotControl->threshold_xid = xlimit;
    1762         190 :     SpinLockRelease(&oldSnapshotControl->mutex_threshold);
    1763         190 : }
    1764             : 
    1765             : /*
    1766             :  * TransactionIdLimitedForOldSnapshots
    1767             :  *
    1768             :  * Apply old snapshot limit, if any.  This is intended to be called for page
    1769             :  * pruning and table vacuuming, to allow old_snapshot_threshold to override
    1770             :  * the normal global xmin value.  Actual testing for snapshot too old will be
    1771             :  * based on whether a snapshot timestamp is prior to the threshold timestamp
    1772             :  * set in this function.
    1773             :  */
    1774             : TransactionId
    1775     1214886 : TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
    1776             :                                     Relation relation)
    1777             : {
    1778     1214886 :     if (TransactionIdIsNormal(recentXmin)
    1779     1214886 :         && old_snapshot_threshold >= 0
    1780         190 :         && RelationAllowsEarlyPruning(relation))
    1781             :     {
    1782         190 :         TimestampTz ts = GetSnapshotCurrentTimestamp();
    1783         190 :         TransactionId xlimit = recentXmin;
    1784             :         TransactionId latest_xmin;
    1785             :         TimestampTz update_ts;
    1786         190 :         bool        same_ts_as_threshold = false;
    1787             : 
    1788         190 :         SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
    1789         190 :         latest_xmin = oldSnapshotControl->latest_xmin;
    1790         190 :         update_ts = oldSnapshotControl->next_map_update;
    1791         190 :         SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
    1792             : 
    1793             :         /*
    1794             :          * Zero threshold always overrides to latest xmin, if valid.  Without
    1795             :          * some heuristic it will find its own snapshot too old on, for
    1796             :          * example, a simple UPDATE -- which would make it useless for most
    1797             :          * testing, but there is no principled way to ensure that it doesn't
    1798             :          * fail in this way.  Use a five-second delay to try to get useful
    1799             :          * testing behavior, but this may need adjustment.
    1800             :          */
    1801         190 :         if (old_snapshot_threshold == 0)
    1802             :         {
    1803         190 :             if (TransactionIdPrecedes(latest_xmin, MyPgXact->xmin)
    1804           0 :                 && TransactionIdFollows(latest_xmin, xlimit))
    1805           0 :                 xlimit = latest_xmin;
    1806             : 
    1807         190 :             ts -= 5 * USECS_PER_SEC;
    1808         190 :             SetOldSnapshotThresholdTimestamp(ts, xlimit);
    1809             : 
    1810         190 :             return xlimit;
    1811             :         }
    1812             : 
    1813           0 :         ts = AlignTimestampToMinuteBoundary(ts)
    1814           0 :             - (old_snapshot_threshold * USECS_PER_MINUTE);
    1815             : 
    1816             :         /* Check for fast exit without LW locking. */
    1817           0 :         SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
    1818           0 :         if (ts == oldSnapshotControl->threshold_timestamp)
    1819             :         {
    1820           0 :             xlimit = oldSnapshotControl->threshold_xid;
    1821           0 :             same_ts_as_threshold = true;
    1822             :         }
    1823           0 :         SpinLockRelease(&oldSnapshotControl->mutex_threshold);
    1824             : 
    1825           0 :         if (!same_ts_as_threshold)
    1826             :         {
    1827           0 :             if (ts == update_ts)
    1828             :             {
    1829           0 :                 xlimit = latest_xmin;
    1830           0 :                 if (NormalTransactionIdFollows(xlimit, recentXmin))
    1831           0 :                     SetOldSnapshotThresholdTimestamp(ts, xlimit);
    1832             :             }
    1833             :             else
    1834             :             {
    1835           0 :                 LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
    1836             : 
    1837           0 :                 if (oldSnapshotControl->count_used > 0
    1838           0 :                     && ts >= oldSnapshotControl->head_timestamp)
    1839             :                 {
    1840             :                     int         offset;
    1841             : 
    1842           0 :                     offset = ((ts - oldSnapshotControl->head_timestamp)
    1843           0 :                               / USECS_PER_MINUTE);
    1844           0 :                     if (offset > oldSnapshotControl->count_used - 1)
    1845           0 :                         offset = oldSnapshotControl->count_used - 1;
    1846           0 :                     offset = (oldSnapshotControl->head_offset + offset)
    1847           0 :                         % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
    1848           0 :                     xlimit = oldSnapshotControl->xid_by_minute[offset];
    1849             : 
    1850           0 :                     if (NormalTransactionIdFollows(xlimit, recentXmin))
    1851           0 :                         SetOldSnapshotThresholdTimestamp(ts, xlimit);
    1852             :                 }
    1853             : 
    1854           0 :                 LWLockRelease(OldSnapshotTimeMapLock);
    1855             :             }
    1856             :         }
    1857             : 
    1858             :         /*
    1859             :          * Failsafe protection against vacuuming work of active transaction.
    1860             :          *
    1861             :          * This is not an assertion because we avoid the spinlock for
    1862             :          * performance, leaving open the possibility that xlimit could advance
    1863             :          * and be more current; but it seems prudent to apply this limit.  It
    1864             :          * might make pruning a tiny bit less aggressive than it could be, but
    1865             :          * protects against data loss bugs.
    1866             :          */
    1867           0 :         if (TransactionIdIsNormal(latest_xmin)
    1868           0 :             && TransactionIdPrecedes(latest_xmin, xlimit))
    1869           0 :             xlimit = latest_xmin;
    1870             : 
    1871           0 :         if (NormalTransactionIdFollows(xlimit, recentXmin))
    1872           0 :             return xlimit;
    1873             :     }
    1874             : 
    1875     1214696 :     return recentXmin;
    1876             : }
    1877             : 
    1878             : /*
    1879             :  * Take care of the circular buffer that maps time to xid.
    1880             :  */
    1881             : void
    1882       23730 : MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
    1883             : {
    1884             :     TimestampTz ts;
    1885             :     TransactionId latest_xmin;
    1886             :     TimestampTz update_ts;
    1887       23730 :     bool        map_update_required = false;
    1888             : 
    1889             :     /* Never call this function when old snapshot checking is disabled. */
    1890             :     Assert(old_snapshot_threshold >= 0);
    1891             : 
    1892       23730 :     ts = AlignTimestampToMinuteBoundary(whenTaken);
    1893             : 
    1894             :     /*
    1895             :      * Keep track of the latest xmin seen by any process. Update mapping with
    1896             :      * a new value when we have crossed a bucket boundary.
    1897             :      */
    1898       23730 :     SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
    1899       23730 :     latest_xmin = oldSnapshotControl->latest_xmin;
    1900       23730 :     update_ts = oldSnapshotControl->next_map_update;
    1901       23730 :     if (ts > update_ts)
    1902             :     {
    1903           4 :         oldSnapshotControl->next_map_update = ts;
    1904           4 :         map_update_required = true;
    1905             :     }
    1906       23730 :     if (TransactionIdFollows(xmin, latest_xmin))
    1907          84 :         oldSnapshotControl->latest_xmin = xmin;
    1908       23730 :     SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
    1909             : 
    1910             :     /* We only needed to update the most recent xmin value. */
    1911       23730 :     if (!map_update_required)
    1912       23726 :         return;
    1913             : 
    1914             :     /* No further tracking needed for 0 (used for testing). */
    1915           4 :     if (old_snapshot_threshold == 0)
    1916           4 :         return;
    1917             : 
    1918             :     /*
    1919             :      * We don't want to do something stupid with unusual values, but we don't
    1920             :      * want to litter the log with warnings or break otherwise normal
    1921             :      * processing for this feature; so if something seems unreasonable, just
    1922             :      * log at DEBUG level and return without doing anything.
    1923             :      */
    1924           0 :     if (whenTaken < 0)
    1925             :     {
    1926           0 :         elog(DEBUG1,
    1927             :              "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
    1928             :              (long) whenTaken);
    1929           0 :         return;
    1930             :     }
    1931           0 :     if (!TransactionIdIsNormal(xmin))
    1932             :     {
    1933           0 :         elog(DEBUG1,
    1934             :              "MaintainOldSnapshotTimeMapping called with xmin = %lu",
    1935             :              (unsigned long) xmin);
    1936           0 :         return;
    1937             :     }
    1938             : 
    1939           0 :     LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
    1940             : 
    1941             :     Assert(oldSnapshotControl->head_offset >= 0);
    1942             :     Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
    1943             :     Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
    1944             :     Assert(oldSnapshotControl->count_used >= 0);
    1945             :     Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
    1946             : 
    1947           0 :     if (oldSnapshotControl->count_used == 0)
    1948             :     {
    1949             :         /* set up first entry for empty mapping */
    1950           0 :         oldSnapshotControl->head_offset = 0;
    1951           0 :         oldSnapshotControl->head_timestamp = ts;
    1952           0 :         oldSnapshotControl->count_used = 1;
    1953           0 :         oldSnapshotControl->xid_by_minute[0] = xmin;
    1954             :     }
    1955           0 :     else if (ts < oldSnapshotControl->head_timestamp)
    1956             :     {
    1957             :         /* old ts; log it at DEBUG */
    1958           0 :         LWLockRelease(OldSnapshotTimeMapLock);
    1959           0 :         elog(DEBUG1,
    1960             :              "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
    1961             :              (long) whenTaken);
    1962           0 :         return;
    1963             :     }
    1964           0 :     else if (ts <= (oldSnapshotControl->head_timestamp +
    1965           0 :                     ((oldSnapshotControl->count_used - 1)
    1966           0 :                      * USECS_PER_MINUTE)))
    1967             :     {
    1968             :         /* existing mapping; advance xid if possible */
    1969           0 :         int         bucket = (oldSnapshotControl->head_offset
    1970           0 :                               + ((ts - oldSnapshotControl->head_timestamp)
    1971           0 :                                  / USECS_PER_MINUTE))
    1972           0 :         % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
    1973             : 
    1974           0 :         if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
    1975           0 :             oldSnapshotControl->xid_by_minute[bucket] = xmin;
    1976             :     }
    1977             :     else
    1978             :     {
    1979             :         /* We need a new bucket, but it might not be the very next one. */
    1980           0 :         int         advance = ((ts - oldSnapshotControl->head_timestamp)
    1981           0 :                                / USECS_PER_MINUTE);
    1982             : 
    1983           0 :         oldSnapshotControl->head_timestamp = ts;
    1984             : 
    1985           0 :         if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
    1986             :         {
    1987             :             /* Advance is so far that all old data is junk; start over. */
    1988           0 :             oldSnapshotControl->head_offset = 0;
    1989           0 :             oldSnapshotControl->count_used = 1;
    1990           0 :             oldSnapshotControl->xid_by_minute[0] = xmin;
    1991             :         }
    1992             :         else
    1993             :         {
    1994             :             /* Store the new value in one or more buckets. */
    1995             :             int         i;
    1996             : 
    1997           0 :             for (i = 0; i < advance; i++)
    1998             :             {
    1999           0 :                 if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
    2000             :                 {
    2001             :                     /* Map full and new value replaces old head. */
    2002           0 :                     int         old_head = oldSnapshotControl->head_offset;
    2003             : 
    2004           0 :                     if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
    2005           0 :                         oldSnapshotControl->head_offset = 0;
    2006             :                     else
    2007           0 :                         oldSnapshotControl->head_offset = old_head + 1;
    2008           0 :                     oldSnapshotControl->xid_by_minute[old_head] = xmin;
    2009             :                 }
    2010             :                 else
    2011             :                 {
    2012             :                     /* Extend map to unused entry. */
    2013           0 :                     int         new_tail = (oldSnapshotControl->head_offset
    2014           0 :                                             + oldSnapshotControl->count_used)
    2015           0 :                     % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
    2016             : 
    2017           0 :                     oldSnapshotControl->count_used++;
    2018           0 :                     oldSnapshotControl->xid_by_minute[new_tail] = xmin;
    2019             :                 }
    2020             :             }
    2021             :         }
    2022             :     }
    2023             : 
    2024           0 :     LWLockRelease(OldSnapshotTimeMapLock);
    2025             : }
    2026             : 
    2027             : 
    2028             : /*
    2029             :  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
    2030             :  * access to behave just like it did at a certain point in the past.
    2031             :  *
    2032             :  * Needed for logical decoding.
    2033             :  */
    2034             : void
    2035        2876 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
    2036             : {
    2037             :     Assert(historic_snapshot != NULL);
    2038             : 
    2039             :     /* setup the timetravel snapshot */
    2040        2876 :     HistoricSnapshot = historic_snapshot;
    2041             : 
    2042             :     /* setup (cmin, cmax) lookup hash */
    2043        2876 :     tuplecid_data = tuplecids;
    2044        2876 : }
    2045             : 
    2046             : 
    2047             : /*
    2048             :  * Make catalog snapshots behave normally again.
    2049             :  */
    2050             : void
    2051        2876 : TeardownHistoricSnapshot(bool is_error)
    2052             : {
    2053        2876 :     HistoricSnapshot = NULL;
    2054        2876 :     tuplecid_data = NULL;
    2055        2876 : }
    2056             : 
    2057             : bool
    2058    12103168 : HistoricSnapshotActive(void)
    2059             : {
    2060    12103168 :     return HistoricSnapshot != NULL;
    2061             : }
    2062             : 
    2063             : HTAB *
    2064         808 : HistoricSnapshotGetTupleCids(void)
    2065             : {
    2066             :     Assert(HistoricSnapshotActive());
    2067         808 :     return tuplecid_data;
    2068             : }
    2069             : 
    2070             : /*
    2071             :  * EstimateSnapshotSpace
    2072             :  *      Returns the size needed to store the given snapshot.
    2073             :  *
    2074             :  * We are exporting only required fields from the Snapshot, stored in
    2075             :  * SerializedSnapshotData.
    2076             :  */
    2077             : Size
    2078        1456 : EstimateSnapshotSpace(Snapshot snap)
    2079             : {
    2080             :     Size        size;
    2081             : 
    2082             :     Assert(snap != InvalidSnapshot);
    2083             :     Assert(snap->snapshot_type == SNAPSHOT_MVCC);
    2084             : 
    2085             :     /* We allocate any XID arrays needed in the same palloc block. */
    2086        1456 :     size = add_size(sizeof(SerializedSnapshotData),
    2087        1456 :                     mul_size(snap->xcnt, sizeof(TransactionId)));
    2088        1464 :     if (snap->subxcnt > 0 &&
    2089           8 :         (!snap->suboverflowed || snap->takenDuringRecovery))
    2090           8 :         size = add_size(size,
    2091           8 :                         mul_size(snap->subxcnt, sizeof(TransactionId)));
    2092             : 
    2093        1456 :     return size;
    2094             : }
    2095             : 
    2096             : /*
    2097             :  * SerializeSnapshot
    2098             :  *      Dumps the serialized snapshot (extracted from given snapshot) onto the
    2099             :  *      memory location at start_address.
    2100             :  */
    2101             : void
    2102        1424 : SerializeSnapshot(Snapshot snapshot, char *start_address)
    2103             : {
    2104             :     SerializedSnapshotData serialized_snapshot;
    2105             : 
    2106             :     Assert(snapshot->subxcnt >= 0);
    2107             : 
    2108             :     /* Copy all required fields */
    2109        1424 :     serialized_snapshot.xmin = snapshot->xmin;
    2110        1424 :     serialized_snapshot.xmax = snapshot->xmax;
    2111        1424 :     serialized_snapshot.xcnt = snapshot->xcnt;
    2112        1424 :     serialized_snapshot.subxcnt = snapshot->subxcnt;
    2113        1424 :     serialized_snapshot.suboverflowed = snapshot->suboverflowed;
    2114        1424 :     serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
    2115        1424 :     serialized_snapshot.curcid = snapshot->curcid;
    2116        1424 :     serialized_snapshot.whenTaken = snapshot->whenTaken;
    2117        1424 :     serialized_snapshot.lsn = snapshot->lsn;
    2118             : 
    2119             :     /*
    2120             :      * Ignore the SubXID array if it has overflowed, unless the snapshot was
    2121             :      * taken during recovery - in that case, top-level XIDs are in subxip as
    2122             :      * well, and we mustn't lose them.
    2123             :      */
    2124        1424 :     if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
    2125           0 :         serialized_snapshot.subxcnt = 0;
    2126             : 
    2127             :     /* Copy struct to possibly-unaligned buffer */
    2128        1424 :     memcpy(start_address,
    2129             :            &serialized_snapshot, sizeof(SerializedSnapshotData));
    2130             : 
    2131             :     /* Copy XID array */
    2132        1424 :     if (snapshot->xcnt > 0)
    2133         672 :         memcpy((TransactionId *) (start_address +
    2134             :                                   sizeof(SerializedSnapshotData)),
    2135         672 :                snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
    2136             : 
    2137             :     /*
    2138             :      * Copy SubXID array. Don't bother to copy it if it had overflowed,
    2139             :      * though, because it's not used anywhere in that case. Except if it's a
    2140             :      * snapshot taken during recovery; all the top-level XIDs are in subxip as
    2141             :      * well in that case, so we mustn't lose them.
    2142             :      */
    2143        1424 :     if (serialized_snapshot.subxcnt > 0)
    2144             :     {
    2145           6 :         Size        subxipoff = sizeof(SerializedSnapshotData) +
    2146           6 :         snapshot->xcnt * sizeof(TransactionId);
    2147             : 
    2148          12 :         memcpy((TransactionId *) (start_address + subxipoff),
    2149          12 :                snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
    2150             :     }
    2151        1424 : }
    2152             : 
    2153             : /*
    2154             :  * RestoreSnapshot
    2155             :  *      Restore a serialized snapshot from the specified address.
    2156             :  *
    2157             :  * The copy is palloc'd in TopTransactionContext and has initial refcounts set
    2158             :  * to 0.  The returned snapshot has the copied flag set.
    2159             :  */
    2160             : Snapshot
    2161        5414 : RestoreSnapshot(char *start_address)
    2162             : {
    2163             :     SerializedSnapshotData serialized_snapshot;
    2164             :     Size        size;
    2165             :     Snapshot    snapshot;
    2166             :     TransactionId *serialized_xids;
    2167             : 
    2168        5414 :     memcpy(&serialized_snapshot, start_address,
    2169             :            sizeof(SerializedSnapshotData));
    2170        5414 :     serialized_xids = (TransactionId *)
    2171             :         (start_address + sizeof(SerializedSnapshotData));
    2172             : 
    2173             :     /* We allocate any XID arrays needed in the same palloc block. */
    2174        5414 :     size = sizeof(SnapshotData)
    2175        5414 :         + serialized_snapshot.xcnt * sizeof(TransactionId)
    2176        5414 :         + serialized_snapshot.subxcnt * sizeof(TransactionId);
    2177             : 
    2178             :     /* Copy all required fields */
    2179        5414 :     snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
    2180        5414 :     snapshot->snapshot_type = SNAPSHOT_MVCC;
    2181        5414 :     snapshot->xmin = serialized_snapshot.xmin;
    2182        5414 :     snapshot->xmax = serialized_snapshot.xmax;
    2183        5414 :     snapshot->xip = NULL;
    2184        5414 :     snapshot->xcnt = serialized_snapshot.xcnt;
    2185        5414 :     snapshot->subxip = NULL;
    2186        5414 :     snapshot->subxcnt = serialized_snapshot.subxcnt;
    2187        5414 :     snapshot->suboverflowed = serialized_snapshot.suboverflowed;
    2188        5414 :     snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
    2189        5414 :     snapshot->curcid = serialized_snapshot.curcid;
    2190        5414 :     snapshot->whenTaken = serialized_snapshot.whenTaken;
    2191        5414 :     snapshot->lsn = serialized_snapshot.lsn;
    2192             : 
    2193             :     /* Copy XIDs, if present. */
    2194        5414 :     if (serialized_snapshot.xcnt > 0)
    2195             :     {
    2196         786 :         snapshot->xip = (TransactionId *) (snapshot + 1);
    2197         786 :         memcpy(snapshot->xip, serialized_xids,
    2198         786 :                serialized_snapshot.xcnt * sizeof(TransactionId));
    2199             :     }
    2200             : 
    2201             :     /* Copy SubXIDs, if present. */
    2202        5414 :     if (serialized_snapshot.subxcnt > 0)
    2203             :     {
    2204          52 :         snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
    2205          26 :             serialized_snapshot.xcnt;
    2206          26 :         memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
    2207          26 :                serialized_snapshot.subxcnt * sizeof(TransactionId));
    2208             :     }
    2209             : 
    2210             :     /* Set the copied flag so that the caller will set refcounts correctly. */
    2211        5414 :     snapshot->regd_count = 0;
    2212        5414 :     snapshot->active_count = 0;
    2213        5414 :     snapshot->copied = true;
    2214             : 
    2215        5414 :     return snapshot;
    2216             : }
    2217             : 
    2218             : /*
    2219             :  * Install a restored snapshot as the transaction snapshot.
    2220             :  *
    2221             :  * The second argument is of type void * so that snapmgr.h need not include
    2222             :  * the declaration for PGPROC.
    2223             :  */
    2224             : void
    2225        1594 : RestoreTransactionSnapshot(Snapshot snapshot, void *master_pgproc)
    2226             : {
    2227        1594 :     SetTransactionSnapshot(snapshot, NULL, InvalidPid, master_pgproc);
    2228        1594 : }
    2229             : 
    2230             : /*
    2231             :  * XidInMVCCSnapshot
    2232             :  *      Is the given XID still-in-progress according to the snapshot?
    2233             :  *
    2234             :  * Note: GetSnapshotData never stores either top xid or subxids of our own
    2235             :  * backend into a snapshot, so these xids will not be reported as "running"
    2236             :  * by this function.  This is OK for current uses, because we always check
    2237             :  * TransactionIdIsCurrentTransactionId first, except when it's known the
    2238             :  * XID could not be ours anyway.
    2239             :  */
    2240             : bool
    2241   177902750 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
    2242             : {
    2243             :     uint32      i;
    2244             : 
    2245             :     /*
    2246             :      * Make a quick range check to eliminate most XIDs without looking at the
    2247             :      * xip arrays.  Note that this is OK even if we convert a subxact XID to
    2248             :      * its parent below, because a subxact with XID < xmin has surely also got
    2249             :      * a parent with XID < xmin, while one with XID >= xmax must belong to a
    2250             :      * parent that was not yet committed at the time of this snapshot.
    2251             :      */
    2252             : 
    2253             :     /* Any xid < xmin is not in-progress */
    2254   177902750 :     if (TransactionIdPrecedes(xid, snapshot->xmin))
    2255   176513626 :         return false;
    2256             :     /* Any xid >= xmax is in-progress */
    2257     1389124 :     if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
    2258        9212 :         return true;
    2259             : 
    2260             :     /*
    2261             :      * Snapshot information is stored slightly differently in snapshots taken
    2262             :      * during recovery.
    2263             :      */
    2264     1379912 :     if (!snapshot->takenDuringRecovery)
    2265             :     {
    2266             :         /*
    2267             :          * If the snapshot contains full subxact data, the fastest way to
    2268             :          * check things is just to compare the given XID against both subxact
    2269             :          * XIDs and top-level XIDs.  If the snapshot overflowed, we have to
    2270             :          * use pg_subtrans to convert a subxact XID to its parent XID, but
    2271             :          * then we need only look at top-level XIDs not subxacts.
    2272             :          */
    2273     1379912 :         if (!snapshot->suboverflowed)
    2274             :         {
    2275             :             /* we have full data, so search subxip */
    2276             :             int32       j;
    2277             : 
    2278     1442220 :             for (j = 0; j < snapshot->subxcnt; j++)
    2279             :             {
    2280       63388 :                 if (TransactionIdEquals(xid, snapshot->subxip[j]))
    2281         424 :                     return true;
    2282             :             }
    2283             : 
    2284             :             /* not there, fall through to search xip[] */
    2285             :         }
    2286             :         else
    2287             :         {
    2288             :             /*
    2289             :              * Snapshot overflowed, so convert xid to top-level.  This is safe
    2290             :              * because we eliminated too-old XIDs above.
    2291             :              */
    2292         656 :             xid = SubTransGetTopmostTransaction(xid);
    2293             : 
    2294             :             /*
    2295             :              * If xid was indeed a subxact, we might now have an xid < xmin,
    2296             :              * so recheck to avoid an array scan.  No point in rechecking
    2297             :              * xmax.
    2298             :              */
    2299         656 :             if (TransactionIdPrecedes(xid, snapshot->xmin))
    2300           0 :                 return false;
    2301             :         }
    2302             : 
    2303     4293438 :         for (i = 0; i < snapshot->xcnt; i++)
    2304             :         {
    2305     2921152 :             if (TransactionIdEquals(xid, snapshot->xip[i]))
    2306        7202 :                 return true;
    2307             :         }
    2308             :     }
    2309             :     else
    2310             :     {
    2311             :         int32       j;
    2312             : 
    2313             :         /*
    2314             :          * In recovery we store all xids in the subxact array because it is by
    2315             :          * far the bigger array, and we mostly don't know which xids are
    2316             :          * top-level and which are subxacts. The xip array is empty.
    2317             :          *
    2318             :          * We start by searching subtrans, if we overflowed.
    2319             :          */
    2320           0 :         if (snapshot->suboverflowed)
    2321             :         {
    2322             :             /*
    2323             :              * Snapshot overflowed, so convert xid to top-level.  This is safe
    2324             :              * because we eliminated too-old XIDs above.
    2325             :              */
    2326           0 :             xid = SubTransGetTopmostTransaction(xid);
    2327             : 
    2328             :             /*
    2329             :              * If xid was indeed a subxact, we might now have an xid < xmin,
    2330             :              * so recheck to avoid an array scan.  No point in rechecking
    2331             :              * xmax.
    2332             :              */
    2333           0 :             if (TransactionIdPrecedes(xid, snapshot->xmin))
    2334           0 :                 return false;
    2335             :         }
    2336             : 
    2337             :         /*
    2338             :          * We now have either a top-level xid higher than xmin or an
    2339             :          * indeterminate xid. We don't know whether it's top level or subxact
    2340             :          * but it doesn't matter. If it's present, the xid is visible.
    2341             :          */
    2342           0 :         for (j = 0; j < snapshot->subxcnt; j++)
    2343             :         {
    2344           0 :             if (TransactionIdEquals(xid, snapshot->subxip[j]))
    2345           0 :                 return true;
    2346             :         }
    2347             :     }
    2348             : 
    2349     1372286 :     return false;
    2350             : }

Generated by: LCOV version 1.13