Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * snapmgr.c
4 : * PostgreSQL snapshot manager
5 : *
6 : * The following functions return an MVCC snapshot that can be used in tuple
7 : * visibility checks:
8 : *
9 : * - GetTransactionSnapshot
10 : * - GetLatestSnapshot
11 : * - GetCatalogSnapshot
12 : * - GetNonHistoricCatalogSnapshot
13 : *
14 : * Each of these functions returns a reference to a statically allocated
15 : * snapshot. The statically allocated snapshot is subject to change on any
16 : * snapshot-related function call, and should not be used directly. Instead,
17 : * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived
18 : * copy and use that.
19 : *
20 : * We keep track of snapshots in two ways: those "registered" by resowner.c,
21 : * and the "active snapshot" stack. All snapshots in either of them live in
22 : * persistent memory. When a snapshot is no longer in any of these lists
23 : * (tracked by separate refcounts on each snapshot), its memory can be freed.
24 : *
25 : * In addition to the above-mentioned MVCC snapshots, there are some special
26 : * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots. They can
27 : * only be used in limited contexts and cannot be registered or pushed to the
28 : * active stack.
29 : *
30 : * ActiveSnapshot stack
31 : * --------------------
32 : *
33 : * Most visibility checks use the current "active snapshot" returned by
34 : * GetActiveSnapshot(). When running normal queries, the active snapshot is
35 : * set when query execution begins based on the transaction isolation level.
36 : *
37 : * The active snapshot is tracked in a stack so that the currently active one
38 : * is at the top of the stack. It mirrors the process call stack: whenever we
39 : * recurse or switch context to fetch rows from a different portal for
40 : * example, the appropriate snapshot is pushed to become the active snapshot,
41 : * and popped on return. Once upon a time, ActiveSnapshot was just a global
42 : * variable that was saved and restored similar to CurrentMemoryContext, but
43 : * nowadays it's managed as a separate data structure so that we can keep
44 : * track of which snapshots are in use and reset MyProc->xmin when there is no
45 : * active snapshot.
46 : *
47 : * However, there are a couple of exceptions where the active snapshot stack
48 : * does not strictly mirror the call stack:
49 : *
50 : * - VACUUM and a few other utility commands manage their own transactions,
51 : * which take their own snapshots. They are called with an active snapshot
52 : * set, like most utility commands, but they pop the active snapshot that
53 : * was pushed by the caller. PortalRunUtility knows about the possibility
54 : * that the snapshot it pushed is no longer active on return.
55 : *
56 : * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the
57 : * active snapshot stack is destroyed, and re-established later when
58 : * subsequent statements in the procedure are executed. There are many
59 : * limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such
60 : * limitation is that all the snapshots on the active snapshot stack are
61 : * known to portals that are being executed, which makes it safe to reset
62 : * the stack. See EnsurePortalSnapshotExists().
63 : *
64 : * Registered snapshots
65 : * --------------------
66 : *
67 : * In addition to snapshots pushed to the active snapshot stack, a snapshot
68 : * can be registered with a resource owner.
69 : *
70 : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
71 : * regd_count and list it in RegisteredSnapshots, but this reference is not
72 : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
73 : * to track this snapshot reference, but that introduces logical circularity
74 : * and thus makes it impossible to clean up in a sane fashion. It's better to
75 : * handle this reference as an internally-tracked registration, so that this
76 : * module is entirely lower-level than ResourceOwners.
77 : *
78 : * Likewise, any snapshots that have been exported by pg_export_snapshot
79 : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
80 : * tracked by any resource owner.
81 : *
82 : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
83 : * is valid, but is not tracked by any resource owner.
84 : *
85 : * The same is true for historic snapshots used during logical decoding,
86 : * their lifetime is managed separately (as they live longer than one xact.c
87 : * transaction).
88 : *
89 : * These arrangements let us reset MyProc->xmin when there are no snapshots
90 : * referenced by this transaction, and advance it when the one with oldest
91 : * Xmin is no longer referenced. For simplicity however, only registered
92 : * snapshots not active snapshots participate in tracking which one is oldest;
93 : * we don't try to change MyProc->xmin except when the active-snapshot
94 : * stack is empty.
95 : *
96 : *
97 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
98 : * Portions Copyright (c) 1994, Regents of the University of California
99 : *
100 : * IDENTIFICATION
101 : * src/backend/utils/time/snapmgr.c
102 : *
103 : *-------------------------------------------------------------------------
104 : */
105 : #include "postgres.h"
106 :
107 : #include <sys/stat.h>
108 : #include <unistd.h>
109 :
110 : #include "access/subtrans.h"
111 : #include "access/transam.h"
112 : #include "access/xact.h"
113 : #include "datatype/timestamp.h"
114 : #include "lib/pairingheap.h"
115 : #include "miscadmin.h"
116 : #include "port/pg_lfind.h"
117 : #include "storage/fd.h"
118 : #include "storage/predicate.h"
119 : #include "storage/proc.h"
120 : #include "storage/procarray.h"
121 : #include "utils/builtins.h"
122 : #include "utils/memutils.h"
123 : #include "utils/resowner.h"
124 : #include "utils/snapmgr.h"
125 : #include "utils/syscache.h"
126 :
127 :
128 : /*
129 : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
130 : * mode, and to the latest one taken in a read-committed transaction.
131 : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
132 : * instant, even in transaction-snapshot mode. It should only be used for
133 : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
134 : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
135 : * whenever a system catalog change occurs.
136 : *
137 : * These SnapshotData structs are static to simplify memory allocation
138 : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
139 : */
140 : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
141 : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
142 : static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
143 : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
144 : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
145 : SnapshotData SnapshotToastData = {SNAPSHOT_TOAST};
146 :
147 : /* Pointers to valid snapshots */
148 : static Snapshot CurrentSnapshot = NULL;
149 : static Snapshot SecondarySnapshot = NULL;
150 : static Snapshot CatalogSnapshot = NULL;
151 : static Snapshot HistoricSnapshot = NULL;
152 :
153 : /*
154 : * These are updated by GetSnapshotData. We initialize them this way
155 : * for the convenience of TransactionIdIsInProgress: even in bootstrap
156 : * mode, we don't want it to say that BootstrapTransactionId is in progress.
157 : */
158 : TransactionId TransactionXmin = FirstNormalTransactionId;
159 : TransactionId RecentXmin = FirstNormalTransactionId;
160 :
161 : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
162 : static HTAB *tuplecid_data = NULL;
163 :
164 : /*
165 : * Elements of the active snapshot stack.
166 : *
167 : * Each element here accounts for exactly one active_count on SnapshotData.
168 : *
169 : * NB: the code assumes that elements in this list are in non-increasing
170 : * order of as_level; also, the list must be NULL-terminated.
171 : */
172 : typedef struct ActiveSnapshotElt
173 : {
174 : Snapshot as_snap;
175 : int as_level;
176 : struct ActiveSnapshotElt *as_next;
177 : } ActiveSnapshotElt;
178 :
179 : /* Top of the stack of active snapshots */
180 : static ActiveSnapshotElt *ActiveSnapshot = NULL;
181 :
182 : /*
183 : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
184 : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
185 : */
186 : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
187 : void *arg);
188 :
189 : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
190 :
191 : /* first GetTransactionSnapshot call in a transaction? */
192 : bool FirstSnapshotSet = false;
193 :
194 : /*
195 : * Remember the serializable transaction snapshot, if any. We cannot trust
196 : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
197 : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
198 : */
199 : static Snapshot FirstXactSnapshot = NULL;
200 :
201 : /* Define pathname of exported-snapshot files */
202 : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
203 :
204 : /* Structure holding info about exported snapshot. */
205 : typedef struct ExportedSnapshot
206 : {
207 : char *snapfile;
208 : Snapshot snapshot;
209 : } ExportedSnapshot;
210 :
211 : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
212 : static List *exportedSnapshots = NIL;
213 :
214 : /* Prototypes for local functions */
215 : static Snapshot CopySnapshot(Snapshot snapshot);
216 : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
217 : static void FreeSnapshot(Snapshot snapshot);
218 : static void SnapshotResetXmin(void);
219 :
220 : /* ResourceOwner callbacks to track snapshot references */
221 : static void ResOwnerReleaseSnapshot(Datum res);
222 :
223 : static const ResourceOwnerDesc snapshot_resowner_desc =
224 : {
225 : .name = "snapshot reference",
226 : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
227 : .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
228 : .ReleaseResource = ResOwnerReleaseSnapshot,
229 : .DebugPrint = NULL /* the default message is fine */
230 : };
231 :
232 : /* Convenience wrappers over ResourceOwnerRemember/Forget */
233 : static inline void
234 14361088 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
235 : {
236 14361088 : ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
237 14361088 : }
238 : static inline void
239 14302698 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
240 : {
241 14302698 : ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
242 14302698 : }
243 :
244 : /*
245 : * Snapshot fields to be serialized.
246 : *
247 : * Only these fields need to be sent to the cooperating backend; the
248 : * remaining ones can (and must) be set by the receiver upon restore.
249 : */
250 : typedef struct SerializedSnapshotData
251 : {
252 : TransactionId xmin;
253 : TransactionId xmax;
254 : uint32 xcnt;
255 : int32 subxcnt;
256 : bool suboverflowed;
257 : bool takenDuringRecovery;
258 : CommandId curcid;
259 : } SerializedSnapshotData;
260 :
261 : /*
262 : * GetTransactionSnapshot
263 : * Get the appropriate snapshot for a new query in a transaction.
264 : *
265 : * Note that the return value points at static storage that will be modified
266 : * by future calls and by CommandCounterIncrement(). Callers must call
267 : * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
268 : * any other non-trivial work that could invalidate it.
269 : */
270 : Snapshot
271 1835722 : GetTransactionSnapshot(void)
272 : {
273 : /*
274 : * This should not be called while doing logical decoding. Historic
275 : * snapshots are only usable for catalog access, not for general-purpose
276 : * queries.
277 : */
278 1835722 : if (HistoricSnapshotActive())
279 0 : elog(ERROR, "cannot take query snapshot during logical decoding");
280 :
281 : /* First call in transaction? */
282 1835722 : if (!FirstSnapshotSet)
283 : {
284 : /*
285 : * Don't allow catalog snapshot to be older than xact snapshot. Must
286 : * do this first to allow the empty-heap Assert to succeed.
287 : */
288 626988 : InvalidateCatalogSnapshot();
289 :
290 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
291 : Assert(FirstXactSnapshot == NULL);
292 :
293 626988 : if (IsInParallelMode())
294 0 : elog(ERROR,
295 : "cannot take query snapshot during a parallel operation");
296 :
297 : /*
298 : * In transaction-snapshot mode, the first snapshot must live until
299 : * end of xact regardless of what the caller does with it, so we must
300 : * make a copy of it rather than returning CurrentSnapshotData
301 : * directly. Furthermore, if we're running in serializable mode,
302 : * predicate.c needs to wrap the snapshot fetch in its own processing.
303 : */
304 626988 : if (IsolationUsesXactSnapshot())
305 : {
306 : /* First, create the snapshot in CurrentSnapshotData */
307 5408 : if (IsolationIsSerializable())
308 3280 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
309 : else
310 2128 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
311 : /* Make a saved copy */
312 5408 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
313 5408 : FirstXactSnapshot = CurrentSnapshot;
314 : /* Mark it as "registered" in FirstXactSnapshot */
315 5408 : FirstXactSnapshot->regd_count++;
316 5408 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
317 : }
318 : else
319 621580 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
320 :
321 626988 : FirstSnapshotSet = true;
322 626988 : return CurrentSnapshot;
323 : }
324 :
325 1208734 : if (IsolationUsesXactSnapshot())
326 228312 : return CurrentSnapshot;
327 :
328 : /* Don't allow catalog snapshot to be older than xact snapshot. */
329 980422 : InvalidateCatalogSnapshot();
330 :
331 980422 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
332 :
333 980422 : return CurrentSnapshot;
334 : }
335 :
336 : /*
337 : * GetLatestSnapshot
338 : * Get a snapshot that is up-to-date as of the current instant,
339 : * even if we are executing in transaction-snapshot mode.
340 : */
341 : Snapshot
342 152134 : GetLatestSnapshot(void)
343 : {
344 : /*
345 : * We might be able to relax this, but nothing that could otherwise work
346 : * needs it.
347 : */
348 152134 : if (IsInParallelMode())
349 0 : elog(ERROR,
350 : "cannot update SecondarySnapshot during a parallel operation");
351 :
352 : /*
353 : * So far there are no cases requiring support for GetLatestSnapshot()
354 : * during logical decoding, but it wouldn't be hard to add if required.
355 : */
356 : Assert(!HistoricSnapshotActive());
357 :
358 : /* If first call in transaction, go ahead and set the xact snapshot */
359 152134 : if (!FirstSnapshotSet)
360 100 : return GetTransactionSnapshot();
361 :
362 152034 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
363 :
364 152034 : return SecondarySnapshot;
365 : }
366 :
367 : /*
368 : * GetCatalogSnapshot
369 : * Get a snapshot that is sufficiently up-to-date for scan of the
370 : * system catalog with the specified OID.
371 : */
372 : Snapshot
373 12925554 : GetCatalogSnapshot(Oid relid)
374 : {
375 : /*
376 : * Return historic snapshot while we're doing logical decoding, so we can
377 : * see the appropriate state of the catalog.
378 : *
379 : * This is the primary reason for needing to reset the system caches after
380 : * finishing decoding.
381 : */
382 12925554 : if (HistoricSnapshotActive())
383 26556 : return HistoricSnapshot;
384 :
385 12898998 : return GetNonHistoricCatalogSnapshot(relid);
386 : }
387 :
388 : /*
389 : * GetNonHistoricCatalogSnapshot
390 : * Get a snapshot that is sufficiently up-to-date for scan of the system
391 : * catalog with the specified OID, even while historic snapshots are set
392 : * up.
393 : */
394 : Snapshot
395 12901704 : GetNonHistoricCatalogSnapshot(Oid relid)
396 : {
397 : /*
398 : * If the caller is trying to scan a relation that has no syscache, no
399 : * catcache invalidations will be sent when it is updated. For a few key
400 : * relations, snapshot invalidations are sent instead. If we're trying to
401 : * scan a relation for which neither catcache nor snapshot invalidations
402 : * are sent, we must refresh the snapshot every time.
403 : */
404 12901704 : if (CatalogSnapshot &&
405 11325654 : !RelationInvalidatesSnapshotsOnly(relid) &&
406 9824742 : !RelationHasSysCache(relid))
407 463756 : InvalidateCatalogSnapshot();
408 :
409 12901704 : if (CatalogSnapshot == NULL)
410 : {
411 : /* Get new snapshot. */
412 2039806 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
413 :
414 : /*
415 : * Make sure the catalog snapshot will be accounted for in decisions
416 : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
417 : * that would result in making a physical copy, which is overkill; and
418 : * it would also create a dependency on some resource owner, which we
419 : * do not want for reasons explained at the head of this file. Instead
420 : * just shove the CatalogSnapshot into the pairing heap manually. This
421 : * has to be reversed in InvalidateCatalogSnapshot, of course.
422 : *
423 : * NB: it had better be impossible for this to throw error, since the
424 : * CatalogSnapshot pointer is already valid.
425 : */
426 2039806 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
427 : }
428 :
429 12901704 : return CatalogSnapshot;
430 : }
431 :
432 : /*
433 : * InvalidateCatalogSnapshot
434 : * Mark the current catalog snapshot, if any, as invalid
435 : *
436 : * We could change this API to allow the caller to provide more fine-grained
437 : * invalidation details, so that a change to relation A wouldn't prevent us
438 : * from using our cached snapshot to scan relation B, but so far there's no
439 : * evidence that the CPU cycles we spent tracking such fine details would be
440 : * well-spent.
441 : */
442 : void
443 24857232 : InvalidateCatalogSnapshot(void)
444 : {
445 24857232 : if (CatalogSnapshot)
446 : {
447 2039806 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
448 2039806 : CatalogSnapshot = NULL;
449 2039806 : SnapshotResetXmin();
450 : }
451 24857232 : }
452 :
453 : /*
454 : * InvalidateCatalogSnapshotConditionally
455 : * Drop catalog snapshot if it's the only one we have
456 : *
457 : * This is called when we are about to wait for client input, so we don't
458 : * want to continue holding the catalog snapshot if it might mean that the
459 : * global xmin horizon can't advance. However, if there are other snapshots
460 : * still active or registered, the catalog snapshot isn't likely to be the
461 : * oldest one, so we might as well keep it.
462 : */
463 : void
464 781300 : InvalidateCatalogSnapshotConditionally(void)
465 : {
466 781300 : if (CatalogSnapshot &&
467 116742 : ActiveSnapshot == NULL &&
468 115082 : pairingheap_is_singular(&RegisteredSnapshots))
469 18228 : InvalidateCatalogSnapshot();
470 781300 : }
471 :
472 : /*
473 : * SnapshotSetCommandId
474 : * Propagate CommandCounterIncrement into the static snapshots, if set
475 : */
476 : void
477 1094250 : SnapshotSetCommandId(CommandId curcid)
478 : {
479 1094250 : if (!FirstSnapshotSet)
480 18732 : return;
481 :
482 1075518 : if (CurrentSnapshot)
483 1075518 : CurrentSnapshot->curcid = curcid;
484 1075518 : if (SecondarySnapshot)
485 157874 : SecondarySnapshot->curcid = curcid;
486 : /* Should we do the same with CatalogSnapshot? */
487 : }
488 :
489 : /*
490 : * SetTransactionSnapshot
491 : * Set the transaction's snapshot from an imported MVCC snapshot.
492 : *
493 : * Note that this is very closely tied to GetTransactionSnapshot --- it
494 : * must take care of all the same considerations as the first-snapshot case
495 : * in GetTransactionSnapshot.
496 : */
497 : static void
498 3160 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
499 : int sourcepid, PGPROC *sourceproc)
500 : {
501 : /* Caller should have checked this already */
502 : Assert(!FirstSnapshotSet);
503 :
504 : /* Better do this to ensure following Assert succeeds. */
505 3160 : InvalidateCatalogSnapshot();
506 :
507 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
508 : Assert(FirstXactSnapshot == NULL);
509 : Assert(!HistoricSnapshotActive());
510 :
511 : /*
512 : * Even though we are not going to use the snapshot it computes, we must
513 : * call GetSnapshotData, for two reasons: (1) to be sure that
514 : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
515 : * the state for GlobalVis*.
516 : */
517 3160 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
518 :
519 : /*
520 : * Now copy appropriate fields from the source snapshot.
521 : */
522 3160 : CurrentSnapshot->xmin = sourcesnap->xmin;
523 3160 : CurrentSnapshot->xmax = sourcesnap->xmax;
524 3160 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
525 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
526 3160 : if (sourcesnap->xcnt > 0)
527 546 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
528 546 : sourcesnap->xcnt * sizeof(TransactionId));
529 3160 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
530 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
531 3160 : if (sourcesnap->subxcnt > 0)
532 4 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
533 4 : sourcesnap->subxcnt * sizeof(TransactionId));
534 3160 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
535 3160 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
536 : /* NB: curcid should NOT be copied, it's a local matter */
537 :
538 3160 : CurrentSnapshot->snapXactCompletionCount = 0;
539 :
540 : /*
541 : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
542 : * TransactionXmin. There is a race condition: to make sure we are not
543 : * causing the global xmin to go backwards, we have to test that the
544 : * source transaction is still running, and that has to be done
545 : * atomically. So let procarray.c do it.
546 : *
547 : * Note: in serializable mode, predicate.c will do this a second time. It
548 : * doesn't seem worth contorting the logic here to avoid two calls,
549 : * especially since it's not clear that predicate.c *must* do this.
550 : */
551 3160 : if (sourceproc != NULL)
552 : {
553 3128 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
554 0 : ereport(ERROR,
555 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
556 : errmsg("could not import the requested snapshot"),
557 : errdetail("The source transaction is not running anymore.")));
558 : }
559 32 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
560 0 : ereport(ERROR,
561 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
562 : errmsg("could not import the requested snapshot"),
563 : errdetail("The source process with PID %d is not running anymore.",
564 : sourcepid)));
565 :
566 : /*
567 : * In transaction-snapshot mode, the first snapshot must live until end of
568 : * xact, so we must make a copy of it. Furthermore, if we're running in
569 : * serializable mode, predicate.c needs to do its own processing.
570 : */
571 3160 : if (IsolationUsesXactSnapshot())
572 : {
573 454 : if (IsolationIsSerializable())
574 26 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
575 : sourcepid);
576 : /* Make a saved copy */
577 454 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
578 454 : FirstXactSnapshot = CurrentSnapshot;
579 : /* Mark it as "registered" in FirstXactSnapshot */
580 454 : FirstXactSnapshot->regd_count++;
581 454 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
582 : }
583 :
584 3160 : FirstSnapshotSet = true;
585 3160 : }
586 :
587 : /*
588 : * CopySnapshot
589 : * Copy the given snapshot.
590 : *
591 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
592 : * to 0. The returned snapshot has the copied flag set.
593 : */
594 : static Snapshot
595 14920550 : CopySnapshot(Snapshot snapshot)
596 : {
597 : Snapshot newsnap;
598 : Size subxipoff;
599 : Size size;
600 :
601 : Assert(snapshot != InvalidSnapshot);
602 :
603 : /* We allocate any XID arrays needed in the same palloc block. */
604 14920550 : size = subxipoff = sizeof(SnapshotData) +
605 14920550 : snapshot->xcnt * sizeof(TransactionId);
606 14920550 : if (snapshot->subxcnt > 0)
607 105754 : size += snapshot->subxcnt * sizeof(TransactionId);
608 :
609 14920550 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
610 14920550 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
611 :
612 14920550 : newsnap->regd_count = 0;
613 14920550 : newsnap->active_count = 0;
614 14920550 : newsnap->copied = true;
615 14920550 : newsnap->snapXactCompletionCount = 0;
616 :
617 : /* setup XID array */
618 14920550 : if (snapshot->xcnt > 0)
619 : {
620 3184038 : newsnap->xip = (TransactionId *) (newsnap + 1);
621 3184038 : memcpy(newsnap->xip, snapshot->xip,
622 3184038 : snapshot->xcnt * sizeof(TransactionId));
623 : }
624 : else
625 11736512 : newsnap->xip = NULL;
626 :
627 : /*
628 : * Setup subXID array. Don't bother to copy it if it had overflowed,
629 : * though, because it's not used anywhere in that case. Except if it's a
630 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
631 : * well in that case, so we mustn't lose them.
632 : */
633 14920550 : if (snapshot->subxcnt > 0 &&
634 105754 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
635 : {
636 105754 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
637 105754 : memcpy(newsnap->subxip, snapshot->subxip,
638 105754 : snapshot->subxcnt * sizeof(TransactionId));
639 : }
640 : else
641 14814796 : newsnap->subxip = NULL;
642 :
643 14920550 : return newsnap;
644 : }
645 :
646 : /*
647 : * FreeSnapshot
648 : * Free the memory associated with a snapshot.
649 : */
650 : static void
651 14873388 : FreeSnapshot(Snapshot snapshot)
652 : {
653 : Assert(snapshot->regd_count == 0);
654 : Assert(snapshot->active_count == 0);
655 : Assert(snapshot->copied);
656 :
657 14873388 : pfree(snapshot);
658 14873388 : }
659 :
660 : /*
661 : * PushActiveSnapshot
662 : * Set the given snapshot as the current active snapshot
663 : *
664 : * If the passed snapshot is a statically-allocated one, or it is possibly
665 : * subject to a future command counter update, create a new long-lived copy
666 : * with active refcount=1. Otherwise, only increment the refcount.
667 : */
668 : void
669 2010736 : PushActiveSnapshot(Snapshot snapshot)
670 : {
671 2010736 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
672 2010736 : }
673 :
674 : /*
675 : * PushActiveSnapshotWithLevel
676 : * Set the given snapshot as the current active snapshot
677 : *
678 : * Same as PushActiveSnapshot except that caller can specify the
679 : * transaction nesting level that "owns" the snapshot. This level
680 : * must not be deeper than the current top of the snapshot stack.
681 : */
682 : void
683 2303222 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
684 : {
685 : ActiveSnapshotElt *newactive;
686 :
687 : Assert(snapshot != InvalidSnapshot);
688 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
689 :
690 2303222 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
691 :
692 : /*
693 : * Checking SecondarySnapshot is probably useless here, but it seems
694 : * better to be sure.
695 : */
696 2303222 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
697 476946 : !snapshot->copied)
698 1826276 : newactive->as_snap = CopySnapshot(snapshot);
699 : else
700 476946 : newactive->as_snap = snapshot;
701 :
702 2303222 : newactive->as_next = ActiveSnapshot;
703 2303222 : newactive->as_level = snap_level;
704 :
705 2303222 : newactive->as_snap->active_count++;
706 :
707 2303222 : ActiveSnapshot = newactive;
708 2303222 : }
709 :
710 : /*
711 : * PushCopiedSnapshot
712 : * As above, except forcibly copy the presented snapshot.
713 : *
714 : * This should be used when the ActiveSnapshot has to be modifiable, for
715 : * example if the caller intends to call UpdateActiveSnapshotCommandId.
716 : * The new snapshot will be released when popped from the stack.
717 : */
718 : void
719 115652 : PushCopiedSnapshot(Snapshot snapshot)
720 : {
721 115652 : PushActiveSnapshot(CopySnapshot(snapshot));
722 115652 : }
723 :
724 : /*
725 : * UpdateActiveSnapshotCommandId
726 : *
727 : * Update the current CID of the active snapshot. This can only be applied
728 : * to a snapshot that is not referenced elsewhere.
729 : */
730 : void
731 121400 : UpdateActiveSnapshotCommandId(void)
732 : {
733 : CommandId save_curcid,
734 : curcid;
735 :
736 : Assert(ActiveSnapshot != NULL);
737 : Assert(ActiveSnapshot->as_snap->active_count == 1);
738 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
739 :
740 : /*
741 : * Don't allow modification of the active snapshot during parallel
742 : * operation. We share the snapshot to worker backends at the beginning
743 : * of parallel operation, so any change to the snapshot can lead to
744 : * inconsistencies. We have other defenses against
745 : * CommandCounterIncrement, but there are a few places that call this
746 : * directly, so we put an additional guard here.
747 : */
748 121400 : save_curcid = ActiveSnapshot->as_snap->curcid;
749 121400 : curcid = GetCurrentCommandId(false);
750 121400 : if (IsInParallelMode() && save_curcid != curcid)
751 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
752 121400 : ActiveSnapshot->as_snap->curcid = curcid;
753 121400 : }
754 :
755 : /*
756 : * PopActiveSnapshot
757 : *
758 : * Remove the topmost snapshot from the active snapshot stack, decrementing the
759 : * reference count, and free it if this was the last reference.
760 : */
761 : void
762 2249524 : PopActiveSnapshot(void)
763 : {
764 : ActiveSnapshotElt *newstack;
765 :
766 2249524 : newstack = ActiveSnapshot->as_next;
767 :
768 : Assert(ActiveSnapshot->as_snap->active_count > 0);
769 :
770 2249524 : ActiveSnapshot->as_snap->active_count--;
771 :
772 2249524 : if (ActiveSnapshot->as_snap->active_count == 0 &&
773 2189800 : ActiveSnapshot->as_snap->regd_count == 0)
774 1633898 : FreeSnapshot(ActiveSnapshot->as_snap);
775 :
776 2249524 : pfree(ActiveSnapshot);
777 2249524 : ActiveSnapshot = newstack;
778 :
779 2249524 : SnapshotResetXmin();
780 2249524 : }
781 :
782 : /*
783 : * GetActiveSnapshot
784 : * Return the topmost snapshot in the Active stack.
785 : */
786 : Snapshot
787 1176176 : GetActiveSnapshot(void)
788 : {
789 : Assert(ActiveSnapshot != NULL);
790 :
791 1176176 : return ActiveSnapshot->as_snap;
792 : }
793 :
794 : /*
795 : * ActiveSnapshotSet
796 : * Return whether there is at least one snapshot in the Active stack
797 : */
798 : bool
799 977608 : ActiveSnapshotSet(void)
800 : {
801 977608 : return ActiveSnapshot != NULL;
802 : }
803 :
804 : /*
805 : * RegisterSnapshot
806 : * Register a snapshot as being in use by the current resource owner
807 : *
808 : * If InvalidSnapshot is passed, it is not registered.
809 : */
810 : Snapshot
811 15704016 : RegisterSnapshot(Snapshot snapshot)
812 : {
813 15704016 : if (snapshot == InvalidSnapshot)
814 1343148 : return InvalidSnapshot;
815 :
816 14360868 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
817 : }
818 :
819 : /*
820 : * RegisterSnapshotOnOwner
821 : * As above, but use the specified resource owner
822 : */
823 : Snapshot
824 14361088 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
825 : {
826 : Snapshot snap;
827 :
828 14361088 : if (snapshot == InvalidSnapshot)
829 0 : return InvalidSnapshot;
830 :
831 : /* Static snapshot? Create a persistent copy */
832 14361088 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
833 :
834 : /* and tell resowner.c about it */
835 14361088 : ResourceOwnerEnlarge(owner);
836 14361088 : snap->regd_count++;
837 14361088 : ResourceOwnerRememberSnapshot(owner, snap);
838 :
839 14361088 : if (snap->regd_count == 1)
840 13638388 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
841 :
842 14361088 : return snap;
843 : }
844 :
845 : /*
846 : * UnregisterSnapshot
847 : *
848 : * Decrement the reference count of a snapshot, remove the corresponding
849 : * reference from CurrentResourceOwner, and free the snapshot if no more
850 : * references remain.
851 : */
852 : void
853 15535216 : UnregisterSnapshot(Snapshot snapshot)
854 : {
855 15535216 : if (snapshot == NULL)
856 1286370 : return;
857 :
858 14248846 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
859 : }
860 :
861 : /*
862 : * UnregisterSnapshotFromOwner
863 : * As above, but use the specified resource owner
864 : */
865 : void
866 14302698 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
867 : {
868 14302698 : if (snapshot == NULL)
869 0 : return;
870 :
871 14302698 : ResourceOwnerForgetSnapshot(owner, snapshot);
872 14302698 : UnregisterSnapshotNoOwner(snapshot);
873 : }
874 :
875 : static void
876 14361088 : UnregisterSnapshotNoOwner(Snapshot snapshot)
877 : {
878 : Assert(snapshot->regd_count > 0);
879 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
880 :
881 14361088 : snapshot->regd_count--;
882 14361088 : if (snapshot->regd_count == 0)
883 13638388 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
884 :
885 14361088 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
886 : {
887 13233804 : FreeSnapshot(snapshot);
888 13233804 : SnapshotResetXmin();
889 : }
890 14361088 : }
891 :
892 : /*
893 : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
894 : * by xmin, so that the snapshot with smallest xmin is at the top.
895 : */
896 : static int
897 13626876 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
898 : {
899 13626876 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
900 13626876 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
901 :
902 13626876 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
903 107304 : return 1;
904 13519572 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
905 18198 : return -1;
906 : else
907 13501374 : return 0;
908 : }
909 :
910 : /*
911 : * SnapshotResetXmin
912 : *
913 : * If there are no more snapshots, we can reset our PGPROC->xmin to
914 : * InvalidTransactionId. Note we can do this without locking because we assume
915 : * that storing an Xid is atomic.
916 : *
917 : * Even if there are some remaining snapshots, we may be able to advance our
918 : * PGPROC->xmin to some degree. This typically happens when a portal is
919 : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
920 : * the active snapshot stack is empty; this allows us not to need to track
921 : * which active snapshot is oldest.
922 : */
923 : static void
924 17581468 : SnapshotResetXmin(void)
925 : {
926 : Snapshot minSnapshot;
927 :
928 17581468 : if (ActiveSnapshot != NULL)
929 12687504 : return;
930 :
931 4893964 : if (pairingheap_is_empty(&RegisteredSnapshots))
932 : {
933 1549320 : MyProc->xmin = TransactionXmin = InvalidTransactionId;
934 1549320 : return;
935 : }
936 :
937 3344644 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
938 : pairingheap_first(&RegisteredSnapshots));
939 :
940 3344644 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
941 7352 : MyProc->xmin = TransactionXmin = minSnapshot->xmin;
942 : }
943 :
944 : /*
945 : * AtSubCommit_Snapshot
946 : */
947 : void
948 10686 : AtSubCommit_Snapshot(int level)
949 : {
950 : ActiveSnapshotElt *active;
951 :
952 : /*
953 : * Relabel the active snapshots set in this subtransaction as though they
954 : * are owned by the parent subxact.
955 : */
956 10686 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
957 : {
958 9094 : if (active->as_level < level)
959 9094 : break;
960 0 : active->as_level = level - 1;
961 : }
962 10686 : }
963 :
964 : /*
965 : * AtSubAbort_Snapshot
966 : * Clean up snapshots after a subtransaction abort
967 : */
968 : void
969 9312 : AtSubAbort_Snapshot(int level)
970 : {
971 : /* Forget the active snapshots set by this subtransaction */
972 14998 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
973 : {
974 : ActiveSnapshotElt *next;
975 :
976 5686 : next = ActiveSnapshot->as_next;
977 :
978 : /*
979 : * Decrement the snapshot's active count. If it's still registered or
980 : * marked as active by an outer subtransaction, we can't free it yet.
981 : */
982 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
983 5686 : ActiveSnapshot->as_snap->active_count -= 1;
984 :
985 5686 : if (ActiveSnapshot->as_snap->active_count == 0 &&
986 5686 : ActiveSnapshot->as_snap->regd_count == 0)
987 5686 : FreeSnapshot(ActiveSnapshot->as_snap);
988 :
989 : /* and free the stack element */
990 5686 : pfree(ActiveSnapshot);
991 :
992 5686 : ActiveSnapshot = next;
993 : }
994 :
995 9312 : SnapshotResetXmin();
996 9312 : }
997 :
998 : /*
999 : * AtEOXact_Snapshot
1000 : * Snapshot manager's cleanup function for end of transaction
1001 : */
1002 : void
1003 820778 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1004 : {
1005 : /*
1006 : * In transaction-snapshot mode we must release our privately-managed
1007 : * reference to the transaction snapshot. We must remove it from
1008 : * RegisteredSnapshots to keep the check below happy. But we don't bother
1009 : * to do FreeSnapshot, for two reasons: the memory will go away with
1010 : * TopTransactionContext anyway, and if someone has left the snapshot
1011 : * stacked as active, we don't want the code below to be chasing through a
1012 : * dangling pointer.
1013 : */
1014 820778 : if (FirstXactSnapshot != NULL)
1015 : {
1016 : Assert(FirstXactSnapshot->regd_count > 0);
1017 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1018 5862 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1019 : }
1020 820778 : FirstXactSnapshot = NULL;
1021 :
1022 : /*
1023 : * If we exported any snapshots, clean them up.
1024 : */
1025 820778 : if (exportedSnapshots != NIL)
1026 : {
1027 : ListCell *lc;
1028 :
1029 : /*
1030 : * Get rid of the files. Unlink failure is only a WARNING because (1)
1031 : * it's too late to abort the transaction, and (2) leaving a leaked
1032 : * file around has little real consequence anyway.
1033 : *
1034 : * We also need to remove the snapshots from RegisteredSnapshots to
1035 : * prevent a warning below.
1036 : *
1037 : * As with the FirstXactSnapshot, we don't need to free resources of
1038 : * the snapshot itself as it will go away with the memory context.
1039 : */
1040 32 : foreach(lc, exportedSnapshots)
1041 : {
1042 16 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1043 :
1044 16 : if (unlink(esnap->snapfile))
1045 0 : elog(WARNING, "could not unlink file \"%s\": %m",
1046 : esnap->snapfile);
1047 :
1048 16 : pairingheap_remove(&RegisteredSnapshots,
1049 16 : &esnap->snapshot->ph_node);
1050 : }
1051 :
1052 16 : exportedSnapshots = NIL;
1053 : }
1054 :
1055 : /* Drop catalog snapshot if any */
1056 820778 : InvalidateCatalogSnapshot();
1057 :
1058 : /* On commit, complain about leftover snapshots */
1059 820778 : if (isCommit)
1060 : {
1061 : ActiveSnapshotElt *active;
1062 :
1063 772390 : if (!pairingheap_is_empty(&RegisteredSnapshots))
1064 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
1065 :
1066 : /* complain about unpopped active snapshots */
1067 772390 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1068 0 : elog(WARNING, "snapshot %p still active", active);
1069 : }
1070 :
1071 : /*
1072 : * And reset our state. We don't need to free the memory explicitly --
1073 : * it'll go away with TopTransactionContext.
1074 : */
1075 820778 : ActiveSnapshot = NULL;
1076 820778 : pairingheap_reset(&RegisteredSnapshots);
1077 :
1078 820778 : CurrentSnapshot = NULL;
1079 820778 : SecondarySnapshot = NULL;
1080 :
1081 820778 : FirstSnapshotSet = false;
1082 :
1083 : /*
1084 : * During normal commit processing, we call ProcArrayEndTransaction() to
1085 : * reset the MyProc->xmin. That call happens prior to the call to
1086 : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1087 : */
1088 820778 : if (resetXmin)
1089 49022 : SnapshotResetXmin();
1090 :
1091 : Assert(resetXmin || MyProc->xmin == 0);
1092 820778 : }
1093 :
1094 :
1095 : /*
1096 : * ExportSnapshot
1097 : * Export the snapshot to a file so that other backends can import it.
1098 : * Returns the token (the file name) that can be used to import this
1099 : * snapshot.
1100 : */
1101 : char *
1102 16 : ExportSnapshot(Snapshot snapshot)
1103 : {
1104 : TransactionId topXid;
1105 : TransactionId *children;
1106 : ExportedSnapshot *esnap;
1107 : int nchildren;
1108 : int addTopXid;
1109 : StringInfoData buf;
1110 : FILE *f;
1111 : int i;
1112 : MemoryContext oldcxt;
1113 : char path[MAXPGPATH];
1114 : char pathtmp[MAXPGPATH];
1115 :
1116 : /*
1117 : * It's tempting to call RequireTransactionBlock here, since it's not very
1118 : * useful to export a snapshot that will disappear immediately afterwards.
1119 : * However, we haven't got enough information to do that, since we don't
1120 : * know if we're at top level or not. For example, we could be inside a
1121 : * plpgsql function that is going to fire off other transactions via
1122 : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1123 : * check.
1124 : *
1125 : * Also note that we don't make any restriction on the transaction's
1126 : * isolation level; however, importers must check the level if they are
1127 : * serializable.
1128 : */
1129 :
1130 : /*
1131 : * Get our transaction ID if there is one, to include in the snapshot.
1132 : */
1133 16 : topXid = GetTopTransactionIdIfAny();
1134 :
1135 : /*
1136 : * We cannot export a snapshot from a subtransaction because there's no
1137 : * easy way for importers to verify that the same subtransaction is still
1138 : * running.
1139 : */
1140 16 : if (IsSubTransaction())
1141 0 : ereport(ERROR,
1142 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1143 : errmsg("cannot export a snapshot from a subtransaction")));
1144 :
1145 : /*
1146 : * We do however allow previous committed subtransactions to exist.
1147 : * Importers of the snapshot must see them as still running, so get their
1148 : * XIDs to add them to the snapshot.
1149 : */
1150 16 : nchildren = xactGetCommittedChildren(&children);
1151 :
1152 : /*
1153 : * Generate file path for the snapshot. We start numbering of snapshots
1154 : * inside the transaction from 1.
1155 : */
1156 16 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1157 16 : MyProc->vxid.procNumber, MyProc->vxid.lxid,
1158 16 : list_length(exportedSnapshots) + 1);
1159 :
1160 : /*
1161 : * Copy the snapshot into TopTransactionContext, add it to the
1162 : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1163 : * ensure that the snapshot's xmin is honored for the rest of the
1164 : * transaction.
1165 : */
1166 16 : snapshot = CopySnapshot(snapshot);
1167 :
1168 16 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1169 16 : esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1170 16 : esnap->snapfile = pstrdup(path);
1171 16 : esnap->snapshot = snapshot;
1172 16 : exportedSnapshots = lappend(exportedSnapshots, esnap);
1173 16 : MemoryContextSwitchTo(oldcxt);
1174 :
1175 16 : snapshot->regd_count++;
1176 16 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1177 :
1178 : /*
1179 : * Fill buf with a text serialization of the snapshot, plus identification
1180 : * data about this transaction. The format expected by ImportSnapshot is
1181 : * pretty rigid: each line must be fieldname:value.
1182 : */
1183 16 : initStringInfo(&buf);
1184 :
1185 16 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1186 16 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1187 16 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1188 16 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1189 16 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1190 :
1191 16 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1192 16 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1193 :
1194 : /*
1195 : * We must include our own top transaction ID in the top-xid data, since
1196 : * by definition we will still be running when the importing transaction
1197 : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1198 : * the snapshot. (There must, therefore, be enough room to add it.)
1199 : *
1200 : * However, it could be that our topXid is after the xmax, in which case
1201 : * we shouldn't include it because xip[] members are expected to be before
1202 : * xmax. (We need not make the same check for subxip[] members, see
1203 : * snapshot.h.)
1204 : */
1205 16 : addTopXid = (TransactionIdIsValid(topXid) &&
1206 16 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1207 16 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1208 16 : for (i = 0; i < snapshot->xcnt; i++)
1209 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1210 16 : if (addTopXid)
1211 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
1212 :
1213 : /*
1214 : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1215 : * we have to cope with possible overflow.
1216 : */
1217 32 : if (snapshot->suboverflowed ||
1218 16 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1219 0 : appendStringInfoString(&buf, "sof:1\n");
1220 : else
1221 : {
1222 16 : appendStringInfoString(&buf, "sof:0\n");
1223 16 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1224 16 : for (i = 0; i < snapshot->subxcnt; i++)
1225 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1226 16 : for (i = 0; i < nchildren; i++)
1227 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
1228 : }
1229 16 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1230 :
1231 : /*
1232 : * Now write the text representation into a file. We first write to a
1233 : * ".tmp" filename, and rename to final filename if no error. This
1234 : * ensures that no other backend can read an incomplete file
1235 : * (ImportSnapshot won't allow it because of its valid-characters check).
1236 : */
1237 16 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1238 16 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1239 0 : ereport(ERROR,
1240 : (errcode_for_file_access(),
1241 : errmsg("could not create file \"%s\": %m", pathtmp)));
1242 :
1243 16 : if (fwrite(buf.data, buf.len, 1, f) != 1)
1244 0 : ereport(ERROR,
1245 : (errcode_for_file_access(),
1246 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1247 :
1248 : /* no fsync() since file need not survive a system crash */
1249 :
1250 16 : if (FreeFile(f))
1251 0 : ereport(ERROR,
1252 : (errcode_for_file_access(),
1253 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1254 :
1255 : /*
1256 : * Now that we have written everything into a .tmp file, rename the file
1257 : * to remove the .tmp suffix.
1258 : */
1259 16 : if (rename(pathtmp, path) < 0)
1260 0 : ereport(ERROR,
1261 : (errcode_for_file_access(),
1262 : errmsg("could not rename file \"%s\" to \"%s\": %m",
1263 : pathtmp, path)));
1264 :
1265 : /*
1266 : * The basename of the file is what we return from pg_export_snapshot().
1267 : * It's already in path in a textual format and we know that the path
1268 : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1269 : * and pstrdup it so as not to return the address of a local variable.
1270 : */
1271 16 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1272 : }
1273 :
1274 : /*
1275 : * pg_export_snapshot
1276 : * SQL-callable wrapper for ExportSnapshot.
1277 : */
1278 : Datum
1279 16 : pg_export_snapshot(PG_FUNCTION_ARGS)
1280 : {
1281 : char *snapshotName;
1282 :
1283 16 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1284 16 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1285 : }
1286 :
1287 :
1288 : /*
1289 : * Parsing subroutines for ImportSnapshot: parse a line with the given
1290 : * prefix followed by a value, and advance *s to the next line. The
1291 : * filename is provided for use in error messages.
1292 : */
1293 : static int
1294 224 : parseIntFromText(const char *prefix, char **s, const char *filename)
1295 : {
1296 224 : char *ptr = *s;
1297 224 : int prefixlen = strlen(prefix);
1298 : int val;
1299 :
1300 224 : if (strncmp(ptr, prefix, prefixlen) != 0)
1301 0 : ereport(ERROR,
1302 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1303 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1304 224 : ptr += prefixlen;
1305 224 : if (sscanf(ptr, "%d", &val) != 1)
1306 0 : ereport(ERROR,
1307 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1308 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1309 224 : ptr = strchr(ptr, '\n');
1310 224 : if (!ptr)
1311 0 : ereport(ERROR,
1312 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1313 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1314 224 : *s = ptr + 1;
1315 224 : return val;
1316 : }
1317 :
1318 : static TransactionId
1319 96 : parseXidFromText(const char *prefix, char **s, const char *filename)
1320 : {
1321 96 : char *ptr = *s;
1322 96 : int prefixlen = strlen(prefix);
1323 : TransactionId val;
1324 :
1325 96 : if (strncmp(ptr, prefix, prefixlen) != 0)
1326 0 : ereport(ERROR,
1327 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1328 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1329 96 : ptr += prefixlen;
1330 96 : if (sscanf(ptr, "%u", &val) != 1)
1331 0 : ereport(ERROR,
1332 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1333 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1334 96 : ptr = strchr(ptr, '\n');
1335 96 : if (!ptr)
1336 0 : ereport(ERROR,
1337 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1338 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1339 96 : *s = ptr + 1;
1340 96 : return val;
1341 : }
1342 :
1343 : static void
1344 32 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1345 : VirtualTransactionId *vxid)
1346 : {
1347 32 : char *ptr = *s;
1348 32 : int prefixlen = strlen(prefix);
1349 :
1350 32 : if (strncmp(ptr, prefix, prefixlen) != 0)
1351 0 : ereport(ERROR,
1352 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1353 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1354 32 : ptr += prefixlen;
1355 32 : if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1356 0 : ereport(ERROR,
1357 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1358 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1359 32 : ptr = strchr(ptr, '\n');
1360 32 : if (!ptr)
1361 0 : ereport(ERROR,
1362 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1363 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1364 32 : *s = ptr + 1;
1365 32 : }
1366 :
1367 : /*
1368 : * ImportSnapshot
1369 : * Import a previously exported snapshot. The argument should be a
1370 : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1371 : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1372 : */
1373 : void
1374 44 : ImportSnapshot(const char *idstr)
1375 : {
1376 : char path[MAXPGPATH];
1377 : FILE *f;
1378 : struct stat stat_buf;
1379 : char *filebuf;
1380 : int xcnt;
1381 : int i;
1382 : VirtualTransactionId src_vxid;
1383 : int src_pid;
1384 : Oid src_dbid;
1385 : int src_isolevel;
1386 : bool src_readonly;
1387 : SnapshotData snapshot;
1388 :
1389 : /*
1390 : * Must be at top level of a fresh transaction. Note in particular that
1391 : * we check we haven't acquired an XID --- if we have, it's conceivable
1392 : * that the snapshot would show it as not running, making for very screwy
1393 : * behavior.
1394 : */
1395 88 : if (FirstSnapshotSet ||
1396 88 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
1397 44 : IsSubTransaction())
1398 0 : ereport(ERROR,
1399 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1400 : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1401 :
1402 : /*
1403 : * If we are in read committed mode then the next query would execute with
1404 : * a new snapshot thus making this function call quite useless.
1405 : */
1406 44 : if (!IsolationUsesXactSnapshot())
1407 0 : ereport(ERROR,
1408 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1409 : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1410 :
1411 : /*
1412 : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1413 : * this mainly to prevent reading arbitrary files.
1414 : */
1415 44 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1416 6 : ereport(ERROR,
1417 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1418 : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1419 :
1420 : /* OK, read the file */
1421 38 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1422 :
1423 38 : f = AllocateFile(path, PG_BINARY_R);
1424 38 : if (!f)
1425 : {
1426 : /*
1427 : * If file is missing while identifier has a correct format, avoid
1428 : * system errors.
1429 : */
1430 6 : if (errno == ENOENT)
1431 6 : ereport(ERROR,
1432 : (errcode(ERRCODE_UNDEFINED_OBJECT),
1433 : errmsg("snapshot \"%s\" does not exist", idstr)));
1434 : else
1435 0 : ereport(ERROR,
1436 : (errcode_for_file_access(),
1437 : errmsg("could not open file \"%s\" for reading: %m",
1438 : path)));
1439 : }
1440 :
1441 : /* get the size of the file so that we know how much memory we need */
1442 32 : if (fstat(fileno(f), &stat_buf))
1443 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
1444 :
1445 : /* and read the file into a palloc'd string */
1446 32 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1447 32 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1448 0 : elog(ERROR, "could not read file \"%s\": %m", path);
1449 :
1450 32 : filebuf[stat_buf.st_size] = '\0';
1451 :
1452 32 : FreeFile(f);
1453 :
1454 : /*
1455 : * Construct a snapshot struct by parsing the file content.
1456 : */
1457 32 : memset(&snapshot, 0, sizeof(snapshot));
1458 :
1459 32 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1460 32 : src_pid = parseIntFromText("pid:", &filebuf, path);
1461 : /* we abuse parseXidFromText a bit here ... */
1462 32 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
1463 32 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
1464 32 : src_readonly = parseIntFromText("ro:", &filebuf, path);
1465 :
1466 32 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1467 :
1468 32 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1469 32 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1470 :
1471 32 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1472 :
1473 : /* sanity-check the xid count before palloc */
1474 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1475 0 : ereport(ERROR,
1476 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1477 : errmsg("invalid snapshot data in file \"%s\"", path)));
1478 :
1479 32 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1480 32 : for (i = 0; i < xcnt; i++)
1481 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1482 :
1483 32 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1484 :
1485 32 : if (!snapshot.suboverflowed)
1486 : {
1487 32 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1488 :
1489 : /* sanity-check the xid count before palloc */
1490 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1491 0 : ereport(ERROR,
1492 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1493 : errmsg("invalid snapshot data in file \"%s\"", path)));
1494 :
1495 32 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1496 32 : for (i = 0; i < xcnt; i++)
1497 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1498 : }
1499 : else
1500 : {
1501 0 : snapshot.subxcnt = 0;
1502 0 : snapshot.subxip = NULL;
1503 : }
1504 :
1505 32 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1506 :
1507 : /*
1508 : * Do some additional sanity checking, just to protect ourselves. We
1509 : * don't trouble to check the array elements, just the most critical
1510 : * fields.
1511 : */
1512 32 : if (!VirtualTransactionIdIsValid(src_vxid) ||
1513 32 : !OidIsValid(src_dbid) ||
1514 32 : !TransactionIdIsNormal(snapshot.xmin) ||
1515 32 : !TransactionIdIsNormal(snapshot.xmax))
1516 0 : ereport(ERROR,
1517 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1518 : errmsg("invalid snapshot data in file \"%s\"", path)));
1519 :
1520 : /*
1521 : * If we're serializable, the source transaction must be too, otherwise
1522 : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1523 : * non-read-only transaction can't adopt a snapshot from a read-only
1524 : * transaction, as predicate.c handles the cases very differently.
1525 : */
1526 32 : if (IsolationIsSerializable())
1527 : {
1528 0 : if (src_isolevel != XACT_SERIALIZABLE)
1529 0 : ereport(ERROR,
1530 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1531 : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1532 0 : if (src_readonly && !XactReadOnly)
1533 0 : ereport(ERROR,
1534 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1535 : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1536 : }
1537 :
1538 : /*
1539 : * We cannot import a snapshot that was taken in a different database,
1540 : * because vacuum calculates OldestXmin on a per-database basis; so the
1541 : * source transaction's xmin doesn't protect us from data loss. This
1542 : * restriction could be removed if the source transaction were to mark its
1543 : * xmin as being globally applicable. But that would require some
1544 : * additional syntax, since that has to be known when the snapshot is
1545 : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1546 : */
1547 32 : if (src_dbid != MyDatabaseId)
1548 0 : ereport(ERROR,
1549 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1550 : errmsg("cannot import a snapshot from a different database")));
1551 :
1552 : /* OK, install the snapshot */
1553 32 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1554 32 : }
1555 :
1556 : /*
1557 : * XactHasExportedSnapshots
1558 : * Test whether current transaction has exported any snapshots.
1559 : */
1560 : bool
1561 662 : XactHasExportedSnapshots(void)
1562 : {
1563 662 : return (exportedSnapshots != NIL);
1564 : }
1565 :
1566 : /*
1567 : * DeleteAllExportedSnapshotFiles
1568 : * Clean up any files that have been left behind by a crashed backend
1569 : * that had exported snapshots before it died.
1570 : *
1571 : * This should be called during database startup or crash recovery.
1572 : */
1573 : void
1574 418 : DeleteAllExportedSnapshotFiles(void)
1575 : {
1576 : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1577 : DIR *s_dir;
1578 : struct dirent *s_de;
1579 :
1580 : /*
1581 : * Problems in reading the directory, or unlinking files, are reported at
1582 : * LOG level. Since we're running in the startup process, ERROR level
1583 : * would prevent database start, and it's not important enough for that.
1584 : */
1585 418 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1586 :
1587 1254 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1588 : {
1589 836 : if (strcmp(s_de->d_name, ".") == 0 ||
1590 418 : strcmp(s_de->d_name, "..") == 0)
1591 836 : continue;
1592 :
1593 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1594 :
1595 0 : if (unlink(buf) != 0)
1596 0 : ereport(LOG,
1597 : (errcode_for_file_access(),
1598 : errmsg("could not remove file \"%s\": %m", buf)));
1599 : }
1600 :
1601 418 : FreeDir(s_dir);
1602 418 : }
1603 :
1604 : /*
1605 : * ThereAreNoPriorRegisteredSnapshots
1606 : * Is the registered snapshot count less than or equal to one?
1607 : *
1608 : * Don't use this to settle important decisions. While zero registrations and
1609 : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1610 : * guarantees about the significance of one registered snapshot.
1611 : */
1612 : bool
1613 60 : ThereAreNoPriorRegisteredSnapshots(void)
1614 : {
1615 60 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
1616 0 : pairingheap_is_singular(&RegisteredSnapshots))
1617 60 : return true;
1618 :
1619 0 : return false;
1620 : }
1621 :
1622 : /*
1623 : * HaveRegisteredOrActiveSnapshot
1624 : * Is there any registered or active snapshot?
1625 : *
1626 : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1627 : * this function to return true. That allows this function to be used in
1628 : * checks enforcing a longer-lived snapshot.
1629 : */
1630 : bool
1631 47002 : HaveRegisteredOrActiveSnapshot(void)
1632 : {
1633 47002 : if (ActiveSnapshot != NULL)
1634 46520 : return true;
1635 :
1636 : /*
1637 : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1638 : * removed at any time due to invalidation processing. If explicitly
1639 : * registered more than one snapshot has to be in RegisteredSnapshots.
1640 : */
1641 482 : if (CatalogSnapshot != NULL &&
1642 100 : pairingheap_is_singular(&RegisteredSnapshots))
1643 0 : return false;
1644 :
1645 482 : return !pairingheap_is_empty(&RegisteredSnapshots);
1646 : }
1647 :
1648 :
1649 : /*
1650 : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1651 : * access to behave just like it did at a certain point in the past.
1652 : *
1653 : * Needed for logical decoding.
1654 : */
1655 : void
1656 8814 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1657 : {
1658 : Assert(historic_snapshot != NULL);
1659 :
1660 : /* setup the timetravel snapshot */
1661 8814 : HistoricSnapshot = historic_snapshot;
1662 :
1663 : /* setup (cmin, cmax) lookup hash */
1664 8814 : tuplecid_data = tuplecids;
1665 8814 : }
1666 :
1667 :
1668 : /*
1669 : * Make catalog snapshots behave normally again.
1670 : */
1671 : void
1672 8810 : TeardownHistoricSnapshot(bool is_error)
1673 : {
1674 8810 : HistoricSnapshot = NULL;
1675 8810 : tuplecid_data = NULL;
1676 8810 : }
1677 :
1678 : bool
1679 19083096 : HistoricSnapshotActive(void)
1680 : {
1681 19083096 : return HistoricSnapshot != NULL;
1682 : }
1683 :
1684 : HTAB *
1685 1444 : HistoricSnapshotGetTupleCids(void)
1686 : {
1687 : Assert(HistoricSnapshotActive());
1688 1444 : return tuplecid_data;
1689 : }
1690 :
1691 : /*
1692 : * EstimateSnapshotSpace
1693 : * Returns the size needed to store the given snapshot.
1694 : *
1695 : * We are exporting only required fields from the Snapshot, stored in
1696 : * SerializedSnapshotData.
1697 : */
1698 : Size
1699 2506 : EstimateSnapshotSpace(Snapshot snapshot)
1700 : {
1701 : Size size;
1702 :
1703 : Assert(snapshot != InvalidSnapshot);
1704 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1705 :
1706 : /* We allocate any XID arrays needed in the same palloc block. */
1707 2506 : size = add_size(sizeof(SerializedSnapshotData),
1708 2506 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
1709 2506 : if (snapshot->subxcnt > 0 &&
1710 4 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1711 4 : size = add_size(size,
1712 4 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1713 :
1714 2506 : return size;
1715 : }
1716 :
1717 : /*
1718 : * SerializeSnapshot
1719 : * Dumps the serialized snapshot (extracted from given snapshot) onto the
1720 : * memory location at start_address.
1721 : */
1722 : void
1723 2172 : SerializeSnapshot(Snapshot snapshot, char *start_address)
1724 : {
1725 : SerializedSnapshotData serialized_snapshot;
1726 :
1727 : Assert(snapshot->subxcnt >= 0);
1728 :
1729 : /* Copy all required fields */
1730 2172 : serialized_snapshot.xmin = snapshot->xmin;
1731 2172 : serialized_snapshot.xmax = snapshot->xmax;
1732 2172 : serialized_snapshot.xcnt = snapshot->xcnt;
1733 2172 : serialized_snapshot.subxcnt = snapshot->subxcnt;
1734 2172 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1735 2172 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1736 2172 : serialized_snapshot.curcid = snapshot->curcid;
1737 :
1738 : /*
1739 : * Ignore the SubXID array if it has overflowed, unless the snapshot was
1740 : * taken during recovery - in that case, top-level XIDs are in subxip as
1741 : * well, and we mustn't lose them.
1742 : */
1743 2172 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1744 0 : serialized_snapshot.subxcnt = 0;
1745 :
1746 : /* Copy struct to possibly-unaligned buffer */
1747 2172 : memcpy(start_address,
1748 : &serialized_snapshot, sizeof(SerializedSnapshotData));
1749 :
1750 : /* Copy XID array */
1751 2172 : if (snapshot->xcnt > 0)
1752 662 : memcpy((TransactionId *) (start_address +
1753 : sizeof(SerializedSnapshotData)),
1754 662 : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1755 :
1756 : /*
1757 : * Copy SubXID array. Don't bother to copy it if it had overflowed,
1758 : * though, because it's not used anywhere in that case. Except if it's a
1759 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
1760 : * well in that case, so we mustn't lose them.
1761 : */
1762 2172 : if (serialized_snapshot.subxcnt > 0)
1763 : {
1764 4 : Size subxipoff = sizeof(SerializedSnapshotData) +
1765 4 : snapshot->xcnt * sizeof(TransactionId);
1766 :
1767 4 : memcpy((TransactionId *) (start_address + subxipoff),
1768 4 : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1769 : }
1770 2172 : }
1771 :
1772 : /*
1773 : * RestoreSnapshot
1774 : * Restore a serialized snapshot from the specified address.
1775 : *
1776 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1777 : * to 0. The returned snapshot has the copied flag set.
1778 : */
1779 : Snapshot
1780 6766 : RestoreSnapshot(char *start_address)
1781 : {
1782 : SerializedSnapshotData serialized_snapshot;
1783 : Size size;
1784 : Snapshot snapshot;
1785 : TransactionId *serialized_xids;
1786 :
1787 6766 : memcpy(&serialized_snapshot, start_address,
1788 : sizeof(SerializedSnapshotData));
1789 6766 : serialized_xids = (TransactionId *)
1790 : (start_address + sizeof(SerializedSnapshotData));
1791 :
1792 : /* We allocate any XID arrays needed in the same palloc block. */
1793 6766 : size = sizeof(SnapshotData)
1794 6766 : + serialized_snapshot.xcnt * sizeof(TransactionId)
1795 6766 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
1796 :
1797 : /* Copy all required fields */
1798 6766 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1799 6766 : snapshot->snapshot_type = SNAPSHOT_MVCC;
1800 6766 : snapshot->xmin = serialized_snapshot.xmin;
1801 6766 : snapshot->xmax = serialized_snapshot.xmax;
1802 6766 : snapshot->xip = NULL;
1803 6766 : snapshot->xcnt = serialized_snapshot.xcnt;
1804 6766 : snapshot->subxip = NULL;
1805 6766 : snapshot->subxcnt = serialized_snapshot.subxcnt;
1806 6766 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1807 6766 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1808 6766 : snapshot->curcid = serialized_snapshot.curcid;
1809 6766 : snapshot->snapXactCompletionCount = 0;
1810 :
1811 : /* Copy XIDs, if present. */
1812 6766 : if (serialized_snapshot.xcnt > 0)
1813 : {
1814 1682 : snapshot->xip = (TransactionId *) (snapshot + 1);
1815 1682 : memcpy(snapshot->xip, serialized_xids,
1816 1682 : serialized_snapshot.xcnt * sizeof(TransactionId));
1817 : }
1818 :
1819 : /* Copy SubXIDs, if present. */
1820 6766 : if (serialized_snapshot.subxcnt > 0)
1821 : {
1822 10 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1823 10 : serialized_snapshot.xcnt;
1824 10 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1825 10 : serialized_snapshot.subxcnt * sizeof(TransactionId));
1826 : }
1827 :
1828 : /* Set the copied flag so that the caller will set refcounts correctly. */
1829 6766 : snapshot->regd_count = 0;
1830 6766 : snapshot->active_count = 0;
1831 6766 : snapshot->copied = true;
1832 :
1833 6766 : return snapshot;
1834 : }
1835 :
1836 : /*
1837 : * Install a restored snapshot as the transaction snapshot.
1838 : *
1839 : * The second argument is of type void * so that snapmgr.h need not include
1840 : * the declaration for PGPROC.
1841 : */
1842 : void
1843 3128 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
1844 : {
1845 3128 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1846 3128 : }
1847 :
1848 : /*
1849 : * XidInMVCCSnapshot
1850 : * Is the given XID still-in-progress according to the snapshot?
1851 : *
1852 : * Note: GetSnapshotData never stores either top xid or subxids of our own
1853 : * backend into a snapshot, so these xids will not be reported as "running"
1854 : * by this function. This is OK for current uses, because we always check
1855 : * TransactionIdIsCurrentTransactionId first, except when it's known the
1856 : * XID could not be ours anyway.
1857 : */
1858 : bool
1859 139316470 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1860 : {
1861 : /*
1862 : * Make a quick range check to eliminate most XIDs without looking at the
1863 : * xip arrays. Note that this is OK even if we convert a subxact XID to
1864 : * its parent below, because a subxact with XID < xmin has surely also got
1865 : * a parent with XID < xmin, while one with XID >= xmax must belong to a
1866 : * parent that was not yet committed at the time of this snapshot.
1867 : */
1868 :
1869 : /* Any xid < xmin is not in-progress */
1870 139316470 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1871 132678704 : return false;
1872 : /* Any xid >= xmax is in-progress */
1873 6637766 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1874 25438 : return true;
1875 :
1876 : /*
1877 : * Snapshot information is stored slightly differently in snapshots taken
1878 : * during recovery.
1879 : */
1880 6612328 : if (!snapshot->takenDuringRecovery)
1881 : {
1882 : /*
1883 : * If the snapshot contains full subxact data, the fastest way to
1884 : * check things is just to compare the given XID against both subxact
1885 : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1886 : * use pg_subtrans to convert a subxact XID to its parent XID, but
1887 : * then we need only look at top-level XIDs not subxacts.
1888 : */
1889 6612168 : if (!snapshot->suboverflowed)
1890 : {
1891 : /* we have full data, so search subxip */
1892 6611468 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1893 556 : return true;
1894 :
1895 : /* not there, fall through to search xip[] */
1896 : }
1897 : else
1898 : {
1899 : /*
1900 : * Snapshot overflowed, so convert xid to top-level. This is safe
1901 : * because we eliminated too-old XIDs above.
1902 : */
1903 700 : xid = SubTransGetTopmostTransaction(xid);
1904 :
1905 : /*
1906 : * If xid was indeed a subxact, we might now have an xid < xmin,
1907 : * so recheck to avoid an array scan. No point in rechecking
1908 : * xmax.
1909 : */
1910 700 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1911 0 : return false;
1912 : }
1913 :
1914 6611612 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1915 28638 : return true;
1916 : }
1917 : else
1918 : {
1919 : /*
1920 : * In recovery we store all xids in the subxip array because it is by
1921 : * far the bigger array, and we mostly don't know which xids are
1922 : * top-level and which are subxacts. The xip array is empty.
1923 : *
1924 : * We start by searching subtrans, if we overflowed.
1925 : */
1926 160 : if (snapshot->suboverflowed)
1927 : {
1928 : /*
1929 : * Snapshot overflowed, so convert xid to top-level. This is safe
1930 : * because we eliminated too-old XIDs above.
1931 : */
1932 8 : xid = SubTransGetTopmostTransaction(xid);
1933 :
1934 : /*
1935 : * If xid was indeed a subxact, we might now have an xid < xmin,
1936 : * so recheck to avoid an array scan. No point in rechecking
1937 : * xmax.
1938 : */
1939 8 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1940 0 : return false;
1941 : }
1942 :
1943 : /*
1944 : * We now have either a top-level xid higher than xmin or an
1945 : * indeterminate xid. We don't know whether it's top level or subxact
1946 : * but it doesn't matter. If it's present, the xid is visible.
1947 : */
1948 160 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1949 12 : return true;
1950 : }
1951 :
1952 6583122 : return false;
1953 : }
1954 :
1955 : /* ResourceOwner callbacks */
1956 :
1957 : static void
1958 58390 : ResOwnerReleaseSnapshot(Datum res)
1959 : {
1960 58390 : UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1961 58390 : }
|