Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * snapmgr.c
4 : * PostgreSQL snapshot manager
5 : *
6 : * We keep track of snapshots in two ways: those "registered" by resowner.c,
7 : * and the "active snapshot" stack. All snapshots in either of them live in
8 : * persistent memory. When a snapshot is no longer in any of these lists
9 : * (tracked by separate refcounts on each snapshot), its memory can be freed.
10 : *
11 : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12 : * regd_count and list it in RegisteredSnapshots, but this reference is not
13 : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14 : * to track this snapshot reference, but that introduces logical circularity
15 : * and thus makes it impossible to clean up in a sane fashion. It's better to
16 : * handle this reference as an internally-tracked registration, so that this
17 : * module is entirely lower-level than ResourceOwners.
18 : *
19 : * Likewise, any snapshots that have been exported by pg_export_snapshot
20 : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21 : * tracked by any resource owner.
22 : *
23 : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24 : * is valid, but is not tracked by any resource owner.
25 : *
26 : * The same is true for historic snapshots used during logical decoding,
27 : * their lifetime is managed separately (as they live longer than one xact.c
28 : * transaction).
29 : *
30 : * These arrangements let us reset MyProc->xmin when there are no snapshots
31 : * referenced by this transaction, and advance it when the one with oldest
32 : * Xmin is no longer referenced. For simplicity however, only registered
33 : * snapshots not active snapshots participate in tracking which one is oldest;
34 : * we don't try to change MyProc->xmin except when the active-snapshot
35 : * stack is empty.
36 : *
37 : *
38 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
39 : * Portions Copyright (c) 1994, Regents of the University of California
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/time/snapmgr.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/stat.h>
49 : #include <unistd.h>
50 :
51 : #include "access/subtrans.h"
52 : #include "access/transam.h"
53 : #include "access/xact.h"
54 : #include "datatype/timestamp.h"
55 : #include "lib/pairingheap.h"
56 : #include "miscadmin.h"
57 : #include "port/pg_lfind.h"
58 : #include "storage/fd.h"
59 : #include "storage/predicate.h"
60 : #include "storage/proc.h"
61 : #include "storage/procarray.h"
62 : #include "utils/builtins.h"
63 : #include "utils/memutils.h"
64 : #include "utils/resowner.h"
65 : #include "utils/snapmgr.h"
66 : #include "utils/syscache.h"
67 :
68 :
69 : /*
70 : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
71 : * mode, and to the latest one taken in a read-committed transaction.
72 : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
73 : * instant, even in transaction-snapshot mode. It should only be used for
74 : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
75 : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
76 : * whenever a system catalog change occurs.
77 : *
78 : * These SnapshotData structs are static to simplify memory allocation
79 : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
80 : */
81 : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
82 : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
83 : static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
84 : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
85 : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
86 : SnapshotData SnapshotToastData = {SNAPSHOT_TOAST};
87 :
88 : /* Pointers to valid snapshots */
89 : static Snapshot CurrentSnapshot = NULL;
90 : static Snapshot SecondarySnapshot = NULL;
91 : static Snapshot CatalogSnapshot = NULL;
92 : static Snapshot HistoricSnapshot = NULL;
93 :
94 : /*
95 : * These are updated by GetSnapshotData. We initialize them this way
96 : * for the convenience of TransactionIdIsInProgress: even in bootstrap
97 : * mode, we don't want it to say that BootstrapTransactionId is in progress.
98 : */
99 : TransactionId TransactionXmin = FirstNormalTransactionId;
100 : TransactionId RecentXmin = FirstNormalTransactionId;
101 :
102 : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
103 : static HTAB *tuplecid_data = NULL;
104 :
105 : /*
106 : * Elements of the active snapshot stack.
107 : *
108 : * Each element here accounts for exactly one active_count on SnapshotData.
109 : *
110 : * NB: the code assumes that elements in this list are in non-increasing
111 : * order of as_level; also, the list must be NULL-terminated.
112 : */
113 : typedef struct ActiveSnapshotElt
114 : {
115 : Snapshot as_snap;
116 : int as_level;
117 : struct ActiveSnapshotElt *as_next;
118 : } ActiveSnapshotElt;
119 :
120 : /* Top of the stack of active snapshots */
121 : static ActiveSnapshotElt *ActiveSnapshot = NULL;
122 :
123 : /*
124 : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
125 : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
126 : */
127 : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
128 : void *arg);
129 :
130 : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
131 :
132 : /* first GetTransactionSnapshot call in a transaction? */
133 : bool FirstSnapshotSet = false;
134 :
135 : /*
136 : * Remember the serializable transaction snapshot, if any. We cannot trust
137 : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
138 : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
139 : */
140 : static Snapshot FirstXactSnapshot = NULL;
141 :
142 : /* Define pathname of exported-snapshot files */
143 : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
144 :
145 : /* Structure holding info about exported snapshot. */
146 : typedef struct ExportedSnapshot
147 : {
148 : char *snapfile;
149 : Snapshot snapshot;
150 : } ExportedSnapshot;
151 :
152 : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
153 : static List *exportedSnapshots = NIL;
154 :
155 : /* Prototypes for local functions */
156 : static Snapshot CopySnapshot(Snapshot snapshot);
157 : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
158 : static void FreeSnapshot(Snapshot snapshot);
159 : static void SnapshotResetXmin(void);
160 :
161 : /* ResourceOwner callbacks to track snapshot references */
162 : static void ResOwnerReleaseSnapshot(Datum res);
163 :
164 : static const ResourceOwnerDesc snapshot_resowner_desc =
165 : {
166 : .name = "snapshot reference",
167 : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
168 : .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
169 : .ReleaseResource = ResOwnerReleaseSnapshot,
170 : .DebugPrint = NULL /* the default message is fine */
171 : };
172 :
173 : /* Convenience wrappers over ResourceOwnerRemember/Forget */
174 : static inline void
175 13317280 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
176 : {
177 13317280 : ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
178 13317280 : }
179 : static inline void
180 13261920 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
181 : {
182 13261920 : ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
183 13261920 : }
184 :
185 : /*
186 : * Snapshot fields to be serialized.
187 : *
188 : * Only these fields need to be sent to the cooperating backend; the
189 : * remaining ones can (and must) be set by the receiver upon restore.
190 : */
191 : typedef struct SerializedSnapshotData
192 : {
193 : TransactionId xmin;
194 : TransactionId xmax;
195 : uint32 xcnt;
196 : int32 subxcnt;
197 : bool suboverflowed;
198 : bool takenDuringRecovery;
199 : CommandId curcid;
200 : } SerializedSnapshotData;
201 :
202 : /*
203 : * GetTransactionSnapshot
204 : * Get the appropriate snapshot for a new query in a transaction.
205 : *
206 : * Note that the return value may point at static storage that will be modified
207 : * by future calls and by CommandCounterIncrement(). Callers should call
208 : * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
209 : * used very long.
210 : */
211 : Snapshot
212 1767502 : GetTransactionSnapshot(void)
213 : {
214 : /*
215 : * This should not be called while doing logical decoding. Historic
216 : * snapshots are only usable for catalog access, not for general-purpose
217 : * queries.
218 : */
219 1767502 : if (HistoricSnapshotActive())
220 0 : elog(ERROR, "cannot take query snapshot during logical decoding");
221 :
222 : /* First call in transaction? */
223 1767502 : if (!FirstSnapshotSet)
224 : {
225 : /*
226 : * Don't allow catalog snapshot to be older than xact snapshot. Must
227 : * do this first to allow the empty-heap Assert to succeed.
228 : */
229 605424 : InvalidateCatalogSnapshot();
230 :
231 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
232 : Assert(FirstXactSnapshot == NULL);
233 :
234 605424 : if (IsInParallelMode())
235 0 : elog(ERROR,
236 : "cannot take query snapshot during a parallel operation");
237 :
238 : /*
239 : * In transaction-snapshot mode, the first snapshot must live until
240 : * end of xact regardless of what the caller does with it, so we must
241 : * make a copy of it rather than returning CurrentSnapshotData
242 : * directly. Furthermore, if we're running in serializable mode,
243 : * predicate.c needs to wrap the snapshot fetch in its own processing.
244 : */
245 605424 : if (IsolationUsesXactSnapshot())
246 : {
247 : /* First, create the snapshot in CurrentSnapshotData */
248 5356 : if (IsolationIsSerializable())
249 3280 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
250 : else
251 2076 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
252 : /* Make a saved copy */
253 5356 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
254 5356 : FirstXactSnapshot = CurrentSnapshot;
255 : /* Mark it as "registered" in FirstXactSnapshot */
256 5356 : FirstXactSnapshot->regd_count++;
257 5356 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
258 : }
259 : else
260 600068 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
261 :
262 605424 : FirstSnapshotSet = true;
263 605424 : return CurrentSnapshot;
264 : }
265 :
266 1162078 : if (IsolationUsesXactSnapshot())
267 208646 : return CurrentSnapshot;
268 :
269 : /* Don't allow catalog snapshot to be older than xact snapshot. */
270 953432 : InvalidateCatalogSnapshot();
271 :
272 953432 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
273 :
274 953432 : return CurrentSnapshot;
275 : }
276 :
277 : /*
278 : * GetLatestSnapshot
279 : * Get a snapshot that is up-to-date as of the current instant,
280 : * even if we are executing in transaction-snapshot mode.
281 : */
282 : Snapshot
283 296318 : GetLatestSnapshot(void)
284 : {
285 : /*
286 : * We might be able to relax this, but nothing that could otherwise work
287 : * needs it.
288 : */
289 296318 : if (IsInParallelMode())
290 0 : elog(ERROR,
291 : "cannot update SecondarySnapshot during a parallel operation");
292 :
293 : /*
294 : * So far there are no cases requiring support for GetLatestSnapshot()
295 : * during logical decoding, but it wouldn't be hard to add if required.
296 : */
297 : Assert(!HistoricSnapshotActive());
298 :
299 : /* If first call in transaction, go ahead and set the xact snapshot */
300 296318 : if (!FirstSnapshotSet)
301 100 : return GetTransactionSnapshot();
302 :
303 296218 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
304 :
305 296218 : return SecondarySnapshot;
306 : }
307 :
308 : /*
309 : * GetCatalogSnapshot
310 : * Get a snapshot that is sufficiently up-to-date for scan of the
311 : * system catalog with the specified OID.
312 : */
313 : Snapshot
314 12175680 : GetCatalogSnapshot(Oid relid)
315 : {
316 : /*
317 : * Return historic snapshot while we're doing logical decoding, so we can
318 : * see the appropriate state of the catalog.
319 : *
320 : * This is the primary reason for needing to reset the system caches after
321 : * finishing decoding.
322 : */
323 12175680 : if (HistoricSnapshotActive())
324 26348 : return HistoricSnapshot;
325 :
326 12149332 : return GetNonHistoricCatalogSnapshot(relid);
327 : }
328 :
329 : /*
330 : * GetNonHistoricCatalogSnapshot
331 : * Get a snapshot that is sufficiently up-to-date for scan of the system
332 : * catalog with the specified OID, even while historic snapshots are set
333 : * up.
334 : */
335 : Snapshot
336 12152066 : GetNonHistoricCatalogSnapshot(Oid relid)
337 : {
338 : /*
339 : * If the caller is trying to scan a relation that has no syscache, no
340 : * catcache invalidations will be sent when it is updated. For a few key
341 : * relations, snapshot invalidations are sent instead. If we're trying to
342 : * scan a relation for which neither catcache nor snapshot invalidations
343 : * are sent, we must refresh the snapshot every time.
344 : */
345 12152066 : if (CatalogSnapshot &&
346 10632086 : !RelationInvalidatesSnapshotsOnly(relid) &&
347 9189346 : !RelationHasSysCache(relid))
348 444670 : InvalidateCatalogSnapshot();
349 :
350 12152066 : if (CatalogSnapshot == NULL)
351 : {
352 : /* Get new snapshot. */
353 1964650 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
354 :
355 : /*
356 : * Make sure the catalog snapshot will be accounted for in decisions
357 : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
358 : * that would result in making a physical copy, which is overkill; and
359 : * it would also create a dependency on some resource owner, which we
360 : * do not want for reasons explained at the head of this file. Instead
361 : * just shove the CatalogSnapshot into the pairing heap manually. This
362 : * has to be reversed in InvalidateCatalogSnapshot, of course.
363 : *
364 : * NB: it had better be impossible for this to throw error, since the
365 : * CatalogSnapshot pointer is already valid.
366 : */
367 1964650 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
368 : }
369 :
370 12152066 : return CatalogSnapshot;
371 : }
372 :
373 : /*
374 : * InvalidateCatalogSnapshot
375 : * Mark the current catalog snapshot, if any, as invalid
376 : *
377 : * We could change this API to allow the caller to provide more fine-grained
378 : * invalidation details, so that a change to relation A wouldn't prevent us
379 : * from using our cached snapshot to scan relation B, but so far there's no
380 : * evidence that the CPU cycles we spent tracking such fine details would be
381 : * well-spent.
382 : */
383 : void
384 24089750 : InvalidateCatalogSnapshot(void)
385 : {
386 24089750 : if (CatalogSnapshot)
387 : {
388 1964650 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
389 1964650 : CatalogSnapshot = NULL;
390 1964650 : SnapshotResetXmin();
391 : }
392 24089750 : }
393 :
394 : /*
395 : * InvalidateCatalogSnapshotConditionally
396 : * Drop catalog snapshot if it's the only one we have
397 : *
398 : * This is called when we are about to wait for client input, so we don't
399 : * want to continue holding the catalog snapshot if it might mean that the
400 : * global xmin horizon can't advance. However, if there are other snapshots
401 : * still active or registered, the catalog snapshot isn't likely to be the
402 : * oldest one, so we might as well keep it.
403 : */
404 : void
405 740502 : InvalidateCatalogSnapshotConditionally(void)
406 : {
407 740502 : if (CatalogSnapshot &&
408 98406 : ActiveSnapshot == NULL &&
409 96770 : pairingheap_is_singular(&RegisteredSnapshots))
410 16458 : InvalidateCatalogSnapshot();
411 740502 : }
412 :
413 : /*
414 : * SnapshotSetCommandId
415 : * Propagate CommandCounterIncrement into the static snapshots, if set
416 : */
417 : void
418 1071558 : SnapshotSetCommandId(CommandId curcid)
419 : {
420 1071558 : if (!FirstSnapshotSet)
421 18708 : return;
422 :
423 1052850 : if (CurrentSnapshot)
424 1052850 : CurrentSnapshot->curcid = curcid;
425 1052850 : if (SecondarySnapshot)
426 157586 : SecondarySnapshot->curcid = curcid;
427 : /* Should we do the same with CatalogSnapshot? */
428 : }
429 :
430 : /*
431 : * SetTransactionSnapshot
432 : * Set the transaction's snapshot from an imported MVCC snapshot.
433 : *
434 : * Note that this is very closely tied to GetTransactionSnapshot --- it
435 : * must take care of all the same considerations as the first-snapshot case
436 : * in GetTransactionSnapshot.
437 : */
438 : static void
439 3116 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
440 : int sourcepid, PGPROC *sourceproc)
441 : {
442 : /* Caller should have checked this already */
443 : Assert(!FirstSnapshotSet);
444 :
445 : /* Better do this to ensure following Assert succeeds. */
446 3116 : InvalidateCatalogSnapshot();
447 :
448 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
449 : Assert(FirstXactSnapshot == NULL);
450 : Assert(!HistoricSnapshotActive());
451 :
452 : /*
453 : * Even though we are not going to use the snapshot it computes, we must
454 : * call GetSnapshotData, for two reasons: (1) to be sure that
455 : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
456 : * the state for GlobalVis*.
457 : */
458 3116 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
459 :
460 : /*
461 : * Now copy appropriate fields from the source snapshot.
462 : */
463 3116 : CurrentSnapshot->xmin = sourcesnap->xmin;
464 3116 : CurrentSnapshot->xmax = sourcesnap->xmax;
465 3116 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
466 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
467 3116 : if (sourcesnap->xcnt > 0)
468 576 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
469 576 : sourcesnap->xcnt * sizeof(TransactionId));
470 3116 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
471 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
472 3116 : if (sourcesnap->subxcnt > 0)
473 0 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
474 0 : sourcesnap->subxcnt * sizeof(TransactionId));
475 3116 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
476 3116 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
477 : /* NB: curcid should NOT be copied, it's a local matter */
478 :
479 3116 : CurrentSnapshot->snapXactCompletionCount = 0;
480 :
481 : /*
482 : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
483 : * TransactionXmin. There is a race condition: to make sure we are not
484 : * causing the global xmin to go backwards, we have to test that the
485 : * source transaction is still running, and that has to be done
486 : * atomically. So let procarray.c do it.
487 : *
488 : * Note: in serializable mode, predicate.c will do this a second time. It
489 : * doesn't seem worth contorting the logic here to avoid two calls,
490 : * especially since it's not clear that predicate.c *must* do this.
491 : */
492 3116 : if (sourceproc != NULL)
493 : {
494 3084 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
495 0 : ereport(ERROR,
496 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
497 : errmsg("could not import the requested snapshot"),
498 : errdetail("The source transaction is not running anymore.")));
499 : }
500 32 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
501 0 : ereport(ERROR,
502 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
503 : errmsg("could not import the requested snapshot"),
504 : errdetail("The source process with PID %d is not running anymore.",
505 : sourcepid)));
506 :
507 : /*
508 : * In transaction-snapshot mode, the first snapshot must live until end of
509 : * xact, so we must make a copy of it. Furthermore, if we're running in
510 : * serializable mode, predicate.c needs to do its own processing.
511 : */
512 3116 : if (IsolationUsesXactSnapshot())
513 : {
514 442 : if (IsolationIsSerializable())
515 26 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
516 : sourcepid);
517 : /* Make a saved copy */
518 442 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
519 442 : FirstXactSnapshot = CurrentSnapshot;
520 : /* Mark it as "registered" in FirstXactSnapshot */
521 442 : FirstXactSnapshot->regd_count++;
522 442 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
523 : }
524 :
525 3116 : FirstSnapshotSet = true;
526 3116 : }
527 :
528 : /*
529 : * CopySnapshot
530 : * Copy the given snapshot.
531 : *
532 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
533 : * to 0. The returned snapshot has the copied flag set.
534 : */
535 : static Snapshot
536 13884758 : CopySnapshot(Snapshot snapshot)
537 : {
538 : Snapshot newsnap;
539 : Size subxipoff;
540 : Size size;
541 :
542 : Assert(snapshot != InvalidSnapshot);
543 :
544 : /* We allocate any XID arrays needed in the same palloc block. */
545 13884758 : size = subxipoff = sizeof(SnapshotData) +
546 13884758 : snapshot->xcnt * sizeof(TransactionId);
547 13884758 : if (snapshot->subxcnt > 0)
548 126844 : size += snapshot->subxcnt * sizeof(TransactionId);
549 :
550 13884758 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
551 13884758 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
552 :
553 13884758 : newsnap->regd_count = 0;
554 13884758 : newsnap->active_count = 0;
555 13884758 : newsnap->copied = true;
556 13884758 : newsnap->snapXactCompletionCount = 0;
557 :
558 : /* setup XID array */
559 13884758 : if (snapshot->xcnt > 0)
560 : {
561 2914640 : newsnap->xip = (TransactionId *) (newsnap + 1);
562 2914640 : memcpy(newsnap->xip, snapshot->xip,
563 2914640 : snapshot->xcnt * sizeof(TransactionId));
564 : }
565 : else
566 10970118 : newsnap->xip = NULL;
567 :
568 : /*
569 : * Setup subXID array. Don't bother to copy it if it had overflowed,
570 : * though, because it's not used anywhere in that case. Except if it's a
571 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
572 : * well in that case, so we mustn't lose them.
573 : */
574 13884758 : if (snapshot->subxcnt > 0 &&
575 126844 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
576 : {
577 126844 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
578 126844 : memcpy(newsnap->subxip, snapshot->subxip,
579 126844 : snapshot->subxcnt * sizeof(TransactionId));
580 : }
581 : else
582 13757914 : newsnap->subxip = NULL;
583 :
584 13884758 : return newsnap;
585 : }
586 :
587 : /*
588 : * FreeSnapshot
589 : * Free the memory associated with a snapshot.
590 : */
591 : static void
592 13838172 : FreeSnapshot(Snapshot snapshot)
593 : {
594 : Assert(snapshot->regd_count == 0);
595 : Assert(snapshot->active_count == 0);
596 : Assert(snapshot->copied);
597 :
598 13838172 : pfree(snapshot);
599 13838172 : }
600 :
601 : /*
602 : * PushActiveSnapshot
603 : * Set the given snapshot as the current active snapshot
604 : *
605 : * If the passed snapshot is a statically-allocated one, or it is possibly
606 : * subject to a future command counter update, create a new long-lived copy
607 : * with active refcount=1. Otherwise, only increment the refcount.
608 : */
609 : void
610 1922732 : PushActiveSnapshot(Snapshot snapshot)
611 : {
612 1922732 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
613 1922732 : }
614 :
615 : /*
616 : * PushActiveSnapshotWithLevel
617 : * Set the given snapshot as the current active snapshot
618 : *
619 : * Same as PushActiveSnapshot except that caller can specify the
620 : * transaction nesting level that "owns" the snapshot. This level
621 : * must not be deeper than the current top of the snapshot stack.
622 : */
623 : void
624 2192924 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
625 : {
626 : ActiveSnapshotElt *newactive;
627 :
628 : Assert(snapshot != InvalidSnapshot);
629 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
630 :
631 2192924 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
632 :
633 : /*
634 : * Checking SecondarySnapshot is probably useless here, but it seems
635 : * better to be sure.
636 : */
637 2192924 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
638 432692 : !snapshot->copied)
639 1760232 : newactive->as_snap = CopySnapshot(snapshot);
640 : else
641 432692 : newactive->as_snap = snapshot;
642 :
643 2192924 : newactive->as_next = ActiveSnapshot;
644 2192924 : newactive->as_level = snap_level;
645 :
646 2192924 : newactive->as_snap->active_count++;
647 :
648 2192924 : ActiveSnapshot = newactive;
649 2192924 : }
650 :
651 : /*
652 : * PushCopiedSnapshot
653 : * As above, except forcibly copy the presented snapshot.
654 : *
655 : * This should be used when the ActiveSnapshot has to be modifiable, for
656 : * example if the caller intends to call UpdateActiveSnapshotCommandId.
657 : * The new snapshot will be released when popped from the stack.
658 : */
659 : void
660 114084 : PushCopiedSnapshot(Snapshot snapshot)
661 : {
662 114084 : PushActiveSnapshot(CopySnapshot(snapshot));
663 114084 : }
664 :
665 : /*
666 : * UpdateActiveSnapshotCommandId
667 : *
668 : * Update the current CID of the active snapshot. This can only be applied
669 : * to a snapshot that is not referenced elsewhere.
670 : */
671 : void
672 115332 : UpdateActiveSnapshotCommandId(void)
673 : {
674 : CommandId save_curcid,
675 : curcid;
676 :
677 : Assert(ActiveSnapshot != NULL);
678 : Assert(ActiveSnapshot->as_snap->active_count == 1);
679 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
680 :
681 : /*
682 : * Don't allow modification of the active snapshot during parallel
683 : * operation. We share the snapshot to worker backends at the beginning
684 : * of parallel operation, so any change to the snapshot can lead to
685 : * inconsistencies. We have other defenses against
686 : * CommandCounterIncrement, but there are a few places that call this
687 : * directly, so we put an additional guard here.
688 : */
689 115332 : save_curcid = ActiveSnapshot->as_snap->curcid;
690 115332 : curcid = GetCurrentCommandId(false);
691 115332 : if (IsInParallelMode() && save_curcid != curcid)
692 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
693 115332 : ActiveSnapshot->as_snap->curcid = curcid;
694 115332 : }
695 :
696 : /*
697 : * PopActiveSnapshot
698 : *
699 : * Remove the topmost snapshot from the active snapshot stack, decrementing the
700 : * reference count, and free it if this was the last reference.
701 : */
702 : void
703 2139790 : PopActiveSnapshot(void)
704 : {
705 : ActiveSnapshotElt *newstack;
706 :
707 2139790 : newstack = ActiveSnapshot->as_next;
708 :
709 : Assert(ActiveSnapshot->as_snap->active_count > 0);
710 :
711 2139790 : ActiveSnapshot->as_snap->active_count--;
712 :
713 2139790 : if (ActiveSnapshot->as_snap->active_count == 0 &&
714 2106922 : ActiveSnapshot->as_snap->regd_count == 0)
715 1581764 : FreeSnapshot(ActiveSnapshot->as_snap);
716 :
717 2139790 : pfree(ActiveSnapshot);
718 2139790 : ActiveSnapshot = newstack;
719 :
720 2139790 : SnapshotResetXmin();
721 2139790 : }
722 :
723 : /*
724 : * GetActiveSnapshot
725 : * Return the topmost snapshot in the Active stack.
726 : */
727 : Snapshot
728 962414 : GetActiveSnapshot(void)
729 : {
730 : Assert(ActiveSnapshot != NULL);
731 :
732 962414 : return ActiveSnapshot->as_snap;
733 : }
734 :
735 : /*
736 : * ActiveSnapshotSet
737 : * Return whether there is at least one snapshot in the Active stack
738 : */
739 : bool
740 921848 : ActiveSnapshotSet(void)
741 : {
742 921848 : return ActiveSnapshot != NULL;
743 : }
744 :
745 : /*
746 : * RegisterSnapshot
747 : * Register a snapshot as being in use by the current resource owner
748 : *
749 : * If InvalidSnapshot is passed, it is not registered.
750 : */
751 : Snapshot
752 14597768 : RegisterSnapshot(Snapshot snapshot)
753 : {
754 14597768 : if (snapshot == InvalidSnapshot)
755 1280696 : return InvalidSnapshot;
756 :
757 13317072 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
758 : }
759 :
760 : /*
761 : * RegisterSnapshotOnOwner
762 : * As above, but use the specified resource owner
763 : */
764 : Snapshot
765 13317280 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
766 : {
767 : Snapshot snap;
768 :
769 13317280 : if (snapshot == InvalidSnapshot)
770 0 : return InvalidSnapshot;
771 :
772 : /* Static snapshot? Create a persistent copy */
773 13317280 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
774 :
775 : /* and tell resowner.c about it */
776 13317280 : ResourceOwnerEnlarge(owner);
777 13317280 : snap->regd_count++;
778 13317280 : ResourceOwnerRememberSnapshot(owner, snap);
779 :
780 13317280 : if (snap->regd_count == 1)
781 12639220 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
782 :
783 13317280 : return snap;
784 : }
785 :
786 : /*
787 : * UnregisterSnapshot
788 : *
789 : * Decrement the reference count of a snapshot, remove the corresponding
790 : * reference from CurrentResourceOwner, and free the snapshot if no more
791 : * references remain.
792 : */
793 : void
794 14448226 : UnregisterSnapshot(Snapshot snapshot)
795 : {
796 14448226 : if (snapshot == NULL)
797 1226798 : return;
798 :
799 13221428 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
800 : }
801 :
802 : /*
803 : * UnregisterSnapshotFromOwner
804 : * As above, but use the specified resource owner
805 : */
806 : void
807 13261920 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
808 : {
809 13261920 : if (snapshot == NULL)
810 0 : return;
811 :
812 13261920 : ResourceOwnerForgetSnapshot(owner, snapshot);
813 13261920 : UnregisterSnapshotNoOwner(snapshot);
814 : }
815 :
816 : static void
817 13317280 : UnregisterSnapshotNoOwner(Snapshot snapshot)
818 : {
819 : Assert(snapshot->regd_count > 0);
820 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
821 :
822 13317280 : snapshot->regd_count--;
823 13317280 : if (snapshot->regd_count == 0)
824 12639220 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
825 :
826 13317280 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
827 : {
828 12250734 : FreeSnapshot(snapshot);
829 12250734 : SnapshotResetXmin();
830 : }
831 13317280 : }
832 :
833 : /*
834 : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
835 : * by xmin, so that the snapshot with smallest xmin is at the top.
836 : */
837 : static int
838 12621360 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
839 : {
840 12621360 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
841 12621360 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
842 :
843 12621360 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
844 101996 : return 1;
845 12519364 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
846 17934 : return -1;
847 : else
848 12501430 : return 0;
849 : }
850 :
851 : /*
852 : * SnapshotResetXmin
853 : *
854 : * If there are no more snapshots, we can reset our PGPROC->xmin to
855 : * InvalidTransactionId. Note we can do this without locking because we assume
856 : * that storing an Xid is atomic.
857 : *
858 : * Even if there are some remaining snapshots, we may be able to advance our
859 : * PGPROC->xmin to some degree. This typically happens when a portal is
860 : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
861 : * the active snapshot stack is empty; this allows us not to need to track
862 : * which active snapshot is oldest.
863 : */
864 : static void
865 16412374 : SnapshotResetXmin(void)
866 : {
867 : Snapshot minSnapshot;
868 :
869 16412374 : if (ActiveSnapshot != NULL)
870 11803840 : return;
871 :
872 4608534 : if (pairingheap_is_empty(&RegisteredSnapshots))
873 : {
874 1505890 : MyProc->xmin = TransactionXmin = InvalidTransactionId;
875 1505890 : return;
876 : }
877 :
878 3102644 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
879 : pairingheap_first(&RegisteredSnapshots));
880 :
881 3102644 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
882 7606 : MyProc->xmin = TransactionXmin = minSnapshot->xmin;
883 : }
884 :
885 : /*
886 : * AtSubCommit_Snapshot
887 : */
888 : void
889 10784 : AtSubCommit_Snapshot(int level)
890 : {
891 : ActiveSnapshotElt *active;
892 :
893 : /*
894 : * Relabel the active snapshots set in this subtransaction as though they
895 : * are owned by the parent subxact.
896 : */
897 10784 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
898 : {
899 9076 : if (active->as_level < level)
900 9076 : break;
901 0 : active->as_level = level - 1;
902 : }
903 10784 : }
904 :
905 : /*
906 : * AtSubAbort_Snapshot
907 : * Clean up snapshots after a subtransaction abort
908 : */
909 : void
910 9286 : AtSubAbort_Snapshot(int level)
911 : {
912 : /* Forget the active snapshots set by this subtransaction */
913 14960 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
914 : {
915 : ActiveSnapshotElt *next;
916 :
917 5674 : next = ActiveSnapshot->as_next;
918 :
919 : /*
920 : * Decrement the snapshot's active count. If it's still registered or
921 : * marked as active by an outer subtransaction, we can't free it yet.
922 : */
923 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
924 5674 : ActiveSnapshot->as_snap->active_count -= 1;
925 :
926 5674 : if (ActiveSnapshot->as_snap->active_count == 0 &&
927 5674 : ActiveSnapshot->as_snap->regd_count == 0)
928 5674 : FreeSnapshot(ActiveSnapshot->as_snap);
929 :
930 : /* and free the stack element */
931 5674 : pfree(ActiveSnapshot);
932 :
933 5674 : ActiveSnapshot = next;
934 : }
935 :
936 9286 : SnapshotResetXmin();
937 9286 : }
938 :
939 : /*
940 : * AtEOXact_Snapshot
941 : * Snapshot manager's cleanup function for end of transaction
942 : */
943 : void
944 791456 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
945 : {
946 : /*
947 : * In transaction-snapshot mode we must release our privately-managed
948 : * reference to the transaction snapshot. We must remove it from
949 : * RegisteredSnapshots to keep the check below happy. But we don't bother
950 : * to do FreeSnapshot, for two reasons: the memory will go away with
951 : * TopTransactionContext anyway, and if someone has left the snapshot
952 : * stacked as active, we don't want the code below to be chasing through a
953 : * dangling pointer.
954 : */
955 791456 : if (FirstXactSnapshot != NULL)
956 : {
957 : Assert(FirstXactSnapshot->regd_count > 0);
958 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
959 5798 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
960 : }
961 791456 : FirstXactSnapshot = NULL;
962 :
963 : /*
964 : * If we exported any snapshots, clean them up.
965 : */
966 791456 : if (exportedSnapshots != NIL)
967 : {
968 : ListCell *lc;
969 :
970 : /*
971 : * Get rid of the files. Unlink failure is only a WARNING because (1)
972 : * it's too late to abort the transaction, and (2) leaving a leaked
973 : * file around has little real consequence anyway.
974 : *
975 : * We also need to remove the snapshots from RegisteredSnapshots to
976 : * prevent a warning below.
977 : *
978 : * As with the FirstXactSnapshot, we don't need to free resources of
979 : * the snapshot itself as it will go away with the memory context.
980 : */
981 32 : foreach(lc, exportedSnapshots)
982 : {
983 16 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
984 :
985 16 : if (unlink(esnap->snapfile))
986 0 : elog(WARNING, "could not unlink file \"%s\": %m",
987 : esnap->snapfile);
988 :
989 16 : pairingheap_remove(&RegisteredSnapshots,
990 16 : &esnap->snapshot->ph_node);
991 : }
992 :
993 16 : exportedSnapshots = NIL;
994 : }
995 :
996 : /* Drop catalog snapshot if any */
997 791456 : InvalidateCatalogSnapshot();
998 :
999 : /* On commit, complain about leftover snapshots */
1000 791456 : if (isCommit)
1001 : {
1002 : ActiveSnapshotElt *active;
1003 :
1004 744332 : if (!pairingheap_is_empty(&RegisteredSnapshots))
1005 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
1006 :
1007 : /* complain about unpopped active snapshots */
1008 744332 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1009 0 : elog(WARNING, "snapshot %p still active", active);
1010 : }
1011 :
1012 : /*
1013 : * And reset our state. We don't need to free the memory explicitly --
1014 : * it'll go away with TopTransactionContext.
1015 : */
1016 791456 : ActiveSnapshot = NULL;
1017 791456 : pairingheap_reset(&RegisteredSnapshots);
1018 :
1019 791456 : CurrentSnapshot = NULL;
1020 791456 : SecondarySnapshot = NULL;
1021 :
1022 791456 : FirstSnapshotSet = false;
1023 :
1024 : /*
1025 : * During normal commit processing, we call ProcArrayEndTransaction() to
1026 : * reset the MyProc->xmin. That call happens prior to the call to
1027 : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1028 : */
1029 791456 : if (resetXmin)
1030 47914 : SnapshotResetXmin();
1031 :
1032 : Assert(resetXmin || MyProc->xmin == 0);
1033 791456 : }
1034 :
1035 :
1036 : /*
1037 : * ExportSnapshot
1038 : * Export the snapshot to a file so that other backends can import it.
1039 : * Returns the token (the file name) that can be used to import this
1040 : * snapshot.
1041 : */
1042 : char *
1043 16 : ExportSnapshot(Snapshot snapshot)
1044 : {
1045 : TransactionId topXid;
1046 : TransactionId *children;
1047 : ExportedSnapshot *esnap;
1048 : int nchildren;
1049 : int addTopXid;
1050 : StringInfoData buf;
1051 : FILE *f;
1052 : int i;
1053 : MemoryContext oldcxt;
1054 : char path[MAXPGPATH];
1055 : char pathtmp[MAXPGPATH];
1056 :
1057 : /*
1058 : * It's tempting to call RequireTransactionBlock here, since it's not very
1059 : * useful to export a snapshot that will disappear immediately afterwards.
1060 : * However, we haven't got enough information to do that, since we don't
1061 : * know if we're at top level or not. For example, we could be inside a
1062 : * plpgsql function that is going to fire off other transactions via
1063 : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1064 : * check.
1065 : *
1066 : * Also note that we don't make any restriction on the transaction's
1067 : * isolation level; however, importers must check the level if they are
1068 : * serializable.
1069 : */
1070 :
1071 : /*
1072 : * Get our transaction ID if there is one, to include in the snapshot.
1073 : */
1074 16 : topXid = GetTopTransactionIdIfAny();
1075 :
1076 : /*
1077 : * We cannot export a snapshot from a subtransaction because there's no
1078 : * easy way for importers to verify that the same subtransaction is still
1079 : * running.
1080 : */
1081 16 : if (IsSubTransaction())
1082 0 : ereport(ERROR,
1083 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1084 : errmsg("cannot export a snapshot from a subtransaction")));
1085 :
1086 : /*
1087 : * We do however allow previous committed subtransactions to exist.
1088 : * Importers of the snapshot must see them as still running, so get their
1089 : * XIDs to add them to the snapshot.
1090 : */
1091 16 : nchildren = xactGetCommittedChildren(&children);
1092 :
1093 : /*
1094 : * Generate file path for the snapshot. We start numbering of snapshots
1095 : * inside the transaction from 1.
1096 : */
1097 16 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1098 16 : MyProc->vxid.procNumber, MyProc->vxid.lxid,
1099 16 : list_length(exportedSnapshots) + 1);
1100 :
1101 : /*
1102 : * Copy the snapshot into TopTransactionContext, add it to the
1103 : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1104 : * ensure that the snapshot's xmin is honored for the rest of the
1105 : * transaction.
1106 : */
1107 16 : snapshot = CopySnapshot(snapshot);
1108 :
1109 16 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1110 16 : esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1111 16 : esnap->snapfile = pstrdup(path);
1112 16 : esnap->snapshot = snapshot;
1113 16 : exportedSnapshots = lappend(exportedSnapshots, esnap);
1114 16 : MemoryContextSwitchTo(oldcxt);
1115 :
1116 16 : snapshot->regd_count++;
1117 16 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1118 :
1119 : /*
1120 : * Fill buf with a text serialization of the snapshot, plus identification
1121 : * data about this transaction. The format expected by ImportSnapshot is
1122 : * pretty rigid: each line must be fieldname:value.
1123 : */
1124 16 : initStringInfo(&buf);
1125 :
1126 16 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1127 16 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1128 16 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1129 16 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1130 16 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1131 :
1132 16 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1133 16 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1134 :
1135 : /*
1136 : * We must include our own top transaction ID in the top-xid data, since
1137 : * by definition we will still be running when the importing transaction
1138 : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1139 : * the snapshot. (There must, therefore, be enough room to add it.)
1140 : *
1141 : * However, it could be that our topXid is after the xmax, in which case
1142 : * we shouldn't include it because xip[] members are expected to be before
1143 : * xmax. (We need not make the same check for subxip[] members, see
1144 : * snapshot.h.)
1145 : */
1146 16 : addTopXid = (TransactionIdIsValid(topXid) &&
1147 16 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1148 16 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1149 16 : for (i = 0; i < snapshot->xcnt; i++)
1150 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1151 16 : if (addTopXid)
1152 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
1153 :
1154 : /*
1155 : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1156 : * we have to cope with possible overflow.
1157 : */
1158 32 : if (snapshot->suboverflowed ||
1159 16 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1160 0 : appendStringInfoString(&buf, "sof:1\n");
1161 : else
1162 : {
1163 16 : appendStringInfoString(&buf, "sof:0\n");
1164 16 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1165 16 : for (i = 0; i < snapshot->subxcnt; i++)
1166 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1167 16 : for (i = 0; i < nchildren; i++)
1168 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
1169 : }
1170 16 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1171 :
1172 : /*
1173 : * Now write the text representation into a file. We first write to a
1174 : * ".tmp" filename, and rename to final filename if no error. This
1175 : * ensures that no other backend can read an incomplete file
1176 : * (ImportSnapshot won't allow it because of its valid-characters check).
1177 : */
1178 16 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1179 16 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1180 0 : ereport(ERROR,
1181 : (errcode_for_file_access(),
1182 : errmsg("could not create file \"%s\": %m", pathtmp)));
1183 :
1184 16 : if (fwrite(buf.data, buf.len, 1, f) != 1)
1185 0 : ereport(ERROR,
1186 : (errcode_for_file_access(),
1187 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1188 :
1189 : /* no fsync() since file need not survive a system crash */
1190 :
1191 16 : if (FreeFile(f))
1192 0 : ereport(ERROR,
1193 : (errcode_for_file_access(),
1194 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1195 :
1196 : /*
1197 : * Now that we have written everything into a .tmp file, rename the file
1198 : * to remove the .tmp suffix.
1199 : */
1200 16 : if (rename(pathtmp, path) < 0)
1201 0 : ereport(ERROR,
1202 : (errcode_for_file_access(),
1203 : errmsg("could not rename file \"%s\" to \"%s\": %m",
1204 : pathtmp, path)));
1205 :
1206 : /*
1207 : * The basename of the file is what we return from pg_export_snapshot().
1208 : * It's already in path in a textual format and we know that the path
1209 : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1210 : * and pstrdup it so as not to return the address of a local variable.
1211 : */
1212 16 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1213 : }
1214 :
1215 : /*
1216 : * pg_export_snapshot
1217 : * SQL-callable wrapper for ExportSnapshot.
1218 : */
1219 : Datum
1220 16 : pg_export_snapshot(PG_FUNCTION_ARGS)
1221 : {
1222 : char *snapshotName;
1223 :
1224 16 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1225 16 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1226 : }
1227 :
1228 :
1229 : /*
1230 : * Parsing subroutines for ImportSnapshot: parse a line with the given
1231 : * prefix followed by a value, and advance *s to the next line. The
1232 : * filename is provided for use in error messages.
1233 : */
1234 : static int
1235 224 : parseIntFromText(const char *prefix, char **s, const char *filename)
1236 : {
1237 224 : char *ptr = *s;
1238 224 : int prefixlen = strlen(prefix);
1239 : int val;
1240 :
1241 224 : if (strncmp(ptr, prefix, prefixlen) != 0)
1242 0 : ereport(ERROR,
1243 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1244 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1245 224 : ptr += prefixlen;
1246 224 : if (sscanf(ptr, "%d", &val) != 1)
1247 0 : ereport(ERROR,
1248 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1249 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1250 224 : ptr = strchr(ptr, '\n');
1251 224 : if (!ptr)
1252 0 : ereport(ERROR,
1253 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1254 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1255 224 : *s = ptr + 1;
1256 224 : return val;
1257 : }
1258 :
1259 : static TransactionId
1260 96 : parseXidFromText(const char *prefix, char **s, const char *filename)
1261 : {
1262 96 : char *ptr = *s;
1263 96 : int prefixlen = strlen(prefix);
1264 : TransactionId val;
1265 :
1266 96 : if (strncmp(ptr, prefix, prefixlen) != 0)
1267 0 : ereport(ERROR,
1268 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1269 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1270 96 : ptr += prefixlen;
1271 96 : if (sscanf(ptr, "%u", &val) != 1)
1272 0 : ereport(ERROR,
1273 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1274 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1275 96 : ptr = strchr(ptr, '\n');
1276 96 : if (!ptr)
1277 0 : ereport(ERROR,
1278 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1279 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1280 96 : *s = ptr + 1;
1281 96 : return val;
1282 : }
1283 :
1284 : static void
1285 32 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1286 : VirtualTransactionId *vxid)
1287 : {
1288 32 : char *ptr = *s;
1289 32 : int prefixlen = strlen(prefix);
1290 :
1291 32 : if (strncmp(ptr, prefix, prefixlen) != 0)
1292 0 : ereport(ERROR,
1293 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1294 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1295 32 : ptr += prefixlen;
1296 32 : if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1297 0 : ereport(ERROR,
1298 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1299 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1300 32 : ptr = strchr(ptr, '\n');
1301 32 : if (!ptr)
1302 0 : ereport(ERROR,
1303 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1304 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1305 32 : *s = ptr + 1;
1306 32 : }
1307 :
1308 : /*
1309 : * ImportSnapshot
1310 : * Import a previously exported snapshot. The argument should be a
1311 : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1312 : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1313 : */
1314 : void
1315 44 : ImportSnapshot(const char *idstr)
1316 : {
1317 : char path[MAXPGPATH];
1318 : FILE *f;
1319 : struct stat stat_buf;
1320 : char *filebuf;
1321 : int xcnt;
1322 : int i;
1323 : VirtualTransactionId src_vxid;
1324 : int src_pid;
1325 : Oid src_dbid;
1326 : int src_isolevel;
1327 : bool src_readonly;
1328 : SnapshotData snapshot;
1329 :
1330 : /*
1331 : * Must be at top level of a fresh transaction. Note in particular that
1332 : * we check we haven't acquired an XID --- if we have, it's conceivable
1333 : * that the snapshot would show it as not running, making for very screwy
1334 : * behavior.
1335 : */
1336 88 : if (FirstSnapshotSet ||
1337 88 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
1338 44 : IsSubTransaction())
1339 0 : ereport(ERROR,
1340 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1341 : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1342 :
1343 : /*
1344 : * If we are in read committed mode then the next query would execute with
1345 : * a new snapshot thus making this function call quite useless.
1346 : */
1347 44 : if (!IsolationUsesXactSnapshot())
1348 0 : ereport(ERROR,
1349 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1350 : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1351 :
1352 : /*
1353 : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1354 : * this mainly to prevent reading arbitrary files.
1355 : */
1356 44 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1357 6 : ereport(ERROR,
1358 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1359 : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1360 :
1361 : /* OK, read the file */
1362 38 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1363 :
1364 38 : f = AllocateFile(path, PG_BINARY_R);
1365 38 : if (!f)
1366 : {
1367 : /*
1368 : * If file is missing while identifier has a correct format, avoid
1369 : * system errors.
1370 : */
1371 6 : if (errno == ENOENT)
1372 6 : ereport(ERROR,
1373 : (errcode(ERRCODE_UNDEFINED_OBJECT),
1374 : errmsg("snapshot \"%s\" does not exist", idstr)));
1375 : else
1376 0 : ereport(ERROR,
1377 : (errcode_for_file_access(),
1378 : errmsg("could not open file \"%s\" for reading: %m",
1379 : path)));
1380 : }
1381 :
1382 : /* get the size of the file so that we know how much memory we need */
1383 32 : if (fstat(fileno(f), &stat_buf))
1384 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
1385 :
1386 : /* and read the file into a palloc'd string */
1387 32 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1388 32 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1389 0 : elog(ERROR, "could not read file \"%s\": %m", path);
1390 :
1391 32 : filebuf[stat_buf.st_size] = '\0';
1392 :
1393 32 : FreeFile(f);
1394 :
1395 : /*
1396 : * Construct a snapshot struct by parsing the file content.
1397 : */
1398 32 : memset(&snapshot, 0, sizeof(snapshot));
1399 :
1400 32 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1401 32 : src_pid = parseIntFromText("pid:", &filebuf, path);
1402 : /* we abuse parseXidFromText a bit here ... */
1403 32 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
1404 32 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
1405 32 : src_readonly = parseIntFromText("ro:", &filebuf, path);
1406 :
1407 32 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1408 :
1409 32 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1410 32 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1411 :
1412 32 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1413 :
1414 : /* sanity-check the xid count before palloc */
1415 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1416 0 : ereport(ERROR,
1417 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1418 : errmsg("invalid snapshot data in file \"%s\"", path)));
1419 :
1420 32 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1421 32 : for (i = 0; i < xcnt; i++)
1422 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1423 :
1424 32 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1425 :
1426 32 : if (!snapshot.suboverflowed)
1427 : {
1428 32 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1429 :
1430 : /* sanity-check the xid count before palloc */
1431 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1432 0 : ereport(ERROR,
1433 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1434 : errmsg("invalid snapshot data in file \"%s\"", path)));
1435 :
1436 32 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1437 32 : for (i = 0; i < xcnt; i++)
1438 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1439 : }
1440 : else
1441 : {
1442 0 : snapshot.subxcnt = 0;
1443 0 : snapshot.subxip = NULL;
1444 : }
1445 :
1446 32 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1447 :
1448 : /*
1449 : * Do some additional sanity checking, just to protect ourselves. We
1450 : * don't trouble to check the array elements, just the most critical
1451 : * fields.
1452 : */
1453 32 : if (!VirtualTransactionIdIsValid(src_vxid) ||
1454 32 : !OidIsValid(src_dbid) ||
1455 32 : !TransactionIdIsNormal(snapshot.xmin) ||
1456 32 : !TransactionIdIsNormal(snapshot.xmax))
1457 0 : ereport(ERROR,
1458 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1459 : errmsg("invalid snapshot data in file \"%s\"", path)));
1460 :
1461 : /*
1462 : * If we're serializable, the source transaction must be too, otherwise
1463 : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1464 : * non-read-only transaction can't adopt a snapshot from a read-only
1465 : * transaction, as predicate.c handles the cases very differently.
1466 : */
1467 32 : if (IsolationIsSerializable())
1468 : {
1469 0 : if (src_isolevel != XACT_SERIALIZABLE)
1470 0 : ereport(ERROR,
1471 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1472 : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1473 0 : if (src_readonly && !XactReadOnly)
1474 0 : ereport(ERROR,
1475 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1476 : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1477 : }
1478 :
1479 : /*
1480 : * We cannot import a snapshot that was taken in a different database,
1481 : * because vacuum calculates OldestXmin on a per-database basis; so the
1482 : * source transaction's xmin doesn't protect us from data loss. This
1483 : * restriction could be removed if the source transaction were to mark its
1484 : * xmin as being globally applicable. But that would require some
1485 : * additional syntax, since that has to be known when the snapshot is
1486 : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1487 : */
1488 32 : if (src_dbid != MyDatabaseId)
1489 0 : ereport(ERROR,
1490 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1491 : errmsg("cannot import a snapshot from a different database")));
1492 :
1493 : /* OK, install the snapshot */
1494 32 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1495 32 : }
1496 :
1497 : /*
1498 : * XactHasExportedSnapshots
1499 : * Test whether current transaction has exported any snapshots.
1500 : */
1501 : bool
1502 818 : XactHasExportedSnapshots(void)
1503 : {
1504 818 : return (exportedSnapshots != NIL);
1505 : }
1506 :
1507 : /*
1508 : * DeleteAllExportedSnapshotFiles
1509 : * Clean up any files that have been left behind by a crashed backend
1510 : * that had exported snapshots before it died.
1511 : *
1512 : * This should be called during database startup or crash recovery.
1513 : */
1514 : void
1515 410 : DeleteAllExportedSnapshotFiles(void)
1516 : {
1517 : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1518 : DIR *s_dir;
1519 : struct dirent *s_de;
1520 :
1521 : /*
1522 : * Problems in reading the directory, or unlinking files, are reported at
1523 : * LOG level. Since we're running in the startup process, ERROR level
1524 : * would prevent database start, and it's not important enough for that.
1525 : */
1526 410 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1527 :
1528 1230 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1529 : {
1530 820 : if (strcmp(s_de->d_name, ".") == 0 ||
1531 410 : strcmp(s_de->d_name, "..") == 0)
1532 820 : continue;
1533 :
1534 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1535 :
1536 0 : if (unlink(buf) != 0)
1537 0 : ereport(LOG,
1538 : (errcode_for_file_access(),
1539 : errmsg("could not remove file \"%s\": %m", buf)));
1540 : }
1541 :
1542 410 : FreeDir(s_dir);
1543 410 : }
1544 :
1545 : /*
1546 : * ThereAreNoPriorRegisteredSnapshots
1547 : * Is the registered snapshot count less than or equal to one?
1548 : *
1549 : * Don't use this to settle important decisions. While zero registrations and
1550 : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1551 : * guarantees about the significance of one registered snapshot.
1552 : */
1553 : bool
1554 60 : ThereAreNoPriorRegisteredSnapshots(void)
1555 : {
1556 60 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
1557 0 : pairingheap_is_singular(&RegisteredSnapshots))
1558 60 : return true;
1559 :
1560 0 : return false;
1561 : }
1562 :
1563 : /*
1564 : * HaveRegisteredOrActiveSnapshot
1565 : * Is there any registered or active snapshot?
1566 : *
1567 : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1568 : * this function to return true. That allows this function to be used in
1569 : * checks enforcing a longer-lived snapshot.
1570 : */
1571 : bool
1572 45812 : HaveRegisteredOrActiveSnapshot(void)
1573 : {
1574 45812 : if (ActiveSnapshot != NULL)
1575 45426 : return true;
1576 :
1577 : /*
1578 : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1579 : * removed at any time due to invalidation processing. If explicitly
1580 : * registered more than one snapshot has to be in RegisteredSnapshots.
1581 : */
1582 386 : if (CatalogSnapshot != NULL &&
1583 14 : pairingheap_is_singular(&RegisteredSnapshots))
1584 0 : return false;
1585 :
1586 386 : return !pairingheap_is_empty(&RegisteredSnapshots);
1587 : }
1588 :
1589 :
1590 : /*
1591 : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1592 : * access to behave just like it did at a certain point in the past.
1593 : *
1594 : * Needed for logical decoding.
1595 : */
1596 : void
1597 8508 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1598 : {
1599 : Assert(historic_snapshot != NULL);
1600 :
1601 : /* setup the timetravel snapshot */
1602 8508 : HistoricSnapshot = historic_snapshot;
1603 :
1604 : /* setup (cmin, cmax) lookup hash */
1605 8508 : tuplecid_data = tuplecids;
1606 8508 : }
1607 :
1608 :
1609 : /*
1610 : * Make catalog snapshots behave normally again.
1611 : */
1612 : void
1613 8504 : TeardownHistoricSnapshot(bool is_error)
1614 : {
1615 8504 : HistoricSnapshot = NULL;
1616 8504 : tuplecid_data = NULL;
1617 8504 : }
1618 :
1619 : bool
1620 18071646 : HistoricSnapshotActive(void)
1621 : {
1622 18071646 : return HistoricSnapshot != NULL;
1623 : }
1624 :
1625 : HTAB *
1626 1440 : HistoricSnapshotGetTupleCids(void)
1627 : {
1628 : Assert(HistoricSnapshotActive());
1629 1440 : return tuplecid_data;
1630 : }
1631 :
1632 : /*
1633 : * EstimateSnapshotSpace
1634 : * Returns the size needed to store the given snapshot.
1635 : *
1636 : * We are exporting only required fields from the Snapshot, stored in
1637 : * SerializedSnapshotData.
1638 : */
1639 : Size
1640 1942 : EstimateSnapshotSpace(Snapshot snapshot)
1641 : {
1642 : Size size;
1643 :
1644 : Assert(snapshot != InvalidSnapshot);
1645 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1646 :
1647 : /* We allocate any XID arrays needed in the same palloc block. */
1648 1942 : size = add_size(sizeof(SerializedSnapshotData),
1649 1942 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
1650 1942 : if (snapshot->subxcnt > 0 &&
1651 0 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1652 0 : size = add_size(size,
1653 0 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1654 :
1655 1942 : return size;
1656 : }
1657 :
1658 : /*
1659 : * SerializeSnapshot
1660 : * Dumps the serialized snapshot (extracted from given snapshot) onto the
1661 : * memory location at start_address.
1662 : */
1663 : void
1664 1878 : SerializeSnapshot(Snapshot snapshot, char *start_address)
1665 : {
1666 : SerializedSnapshotData serialized_snapshot;
1667 :
1668 : Assert(snapshot->subxcnt >= 0);
1669 :
1670 : /* Copy all required fields */
1671 1878 : serialized_snapshot.xmin = snapshot->xmin;
1672 1878 : serialized_snapshot.xmax = snapshot->xmax;
1673 1878 : serialized_snapshot.xcnt = snapshot->xcnt;
1674 1878 : serialized_snapshot.subxcnt = snapshot->subxcnt;
1675 1878 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1676 1878 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1677 1878 : serialized_snapshot.curcid = snapshot->curcid;
1678 :
1679 : /*
1680 : * Ignore the SubXID array if it has overflowed, unless the snapshot was
1681 : * taken during recovery - in that case, top-level XIDs are in subxip as
1682 : * well, and we mustn't lose them.
1683 : */
1684 1878 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1685 0 : serialized_snapshot.subxcnt = 0;
1686 :
1687 : /* Copy struct to possibly-unaligned buffer */
1688 1878 : memcpy(start_address,
1689 : &serialized_snapshot, sizeof(SerializedSnapshotData));
1690 :
1691 : /* Copy XID array */
1692 1878 : if (snapshot->xcnt > 0)
1693 638 : memcpy((TransactionId *) (start_address +
1694 : sizeof(SerializedSnapshotData)),
1695 638 : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1696 :
1697 : /*
1698 : * Copy SubXID array. Don't bother to copy it if it had overflowed,
1699 : * though, because it's not used anywhere in that case. Except if it's a
1700 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
1701 : * well in that case, so we mustn't lose them.
1702 : */
1703 1878 : if (serialized_snapshot.subxcnt > 0)
1704 : {
1705 0 : Size subxipoff = sizeof(SerializedSnapshotData) +
1706 0 : snapshot->xcnt * sizeof(TransactionId);
1707 :
1708 0 : memcpy((TransactionId *) (start_address + subxipoff),
1709 0 : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1710 : }
1711 1878 : }
1712 :
1713 : /*
1714 : * RestoreSnapshot
1715 : * Restore a serialized snapshot from the specified address.
1716 : *
1717 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1718 : * to 0. The returned snapshot has the copied flag set.
1719 : */
1720 : Snapshot
1721 6726 : RestoreSnapshot(char *start_address)
1722 : {
1723 : SerializedSnapshotData serialized_snapshot;
1724 : Size size;
1725 : Snapshot snapshot;
1726 : TransactionId *serialized_xids;
1727 :
1728 6726 : memcpy(&serialized_snapshot, start_address,
1729 : sizeof(SerializedSnapshotData));
1730 6726 : serialized_xids = (TransactionId *)
1731 : (start_address + sizeof(SerializedSnapshotData));
1732 :
1733 : /* We allocate any XID arrays needed in the same palloc block. */
1734 6726 : size = sizeof(SnapshotData)
1735 6726 : + serialized_snapshot.xcnt * sizeof(TransactionId)
1736 6726 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
1737 :
1738 : /* Copy all required fields */
1739 6726 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1740 6726 : snapshot->snapshot_type = SNAPSHOT_MVCC;
1741 6726 : snapshot->xmin = serialized_snapshot.xmin;
1742 6726 : snapshot->xmax = serialized_snapshot.xmax;
1743 6726 : snapshot->xip = NULL;
1744 6726 : snapshot->xcnt = serialized_snapshot.xcnt;
1745 6726 : snapshot->subxip = NULL;
1746 6726 : snapshot->subxcnt = serialized_snapshot.subxcnt;
1747 6726 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1748 6726 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1749 6726 : snapshot->curcid = serialized_snapshot.curcid;
1750 6726 : snapshot->snapXactCompletionCount = 0;
1751 :
1752 : /* Copy XIDs, if present. */
1753 6726 : if (serialized_snapshot.xcnt > 0)
1754 : {
1755 1756 : snapshot->xip = (TransactionId *) (snapshot + 1);
1756 1756 : memcpy(snapshot->xip, serialized_xids,
1757 1756 : serialized_snapshot.xcnt * sizeof(TransactionId));
1758 : }
1759 :
1760 : /* Copy SubXIDs, if present. */
1761 6726 : if (serialized_snapshot.subxcnt > 0)
1762 : {
1763 0 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1764 0 : serialized_snapshot.xcnt;
1765 0 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1766 0 : serialized_snapshot.subxcnt * sizeof(TransactionId));
1767 : }
1768 :
1769 : /* Set the copied flag so that the caller will set refcounts correctly. */
1770 6726 : snapshot->regd_count = 0;
1771 6726 : snapshot->active_count = 0;
1772 6726 : snapshot->copied = true;
1773 :
1774 6726 : return snapshot;
1775 : }
1776 :
1777 : /*
1778 : * Install a restored snapshot as the transaction snapshot.
1779 : *
1780 : * The second argument is of type void * so that snapmgr.h need not include
1781 : * the declaration for PGPROC.
1782 : */
1783 : void
1784 3084 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
1785 : {
1786 3084 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1787 3084 : }
1788 :
1789 : /*
1790 : * XidInMVCCSnapshot
1791 : * Is the given XID still-in-progress according to the snapshot?
1792 : *
1793 : * Note: GetSnapshotData never stores either top xid or subxids of our own
1794 : * backend into a snapshot, so these xids will not be reported as "running"
1795 : * by this function. This is OK for current uses, because we always check
1796 : * TransactionIdIsCurrentTransactionId first, except when it's known the
1797 : * XID could not be ours anyway.
1798 : */
1799 : bool
1800 130801530 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1801 : {
1802 : /*
1803 : * Make a quick range check to eliminate most XIDs without looking at the
1804 : * xip arrays. Note that this is OK even if we convert a subxact XID to
1805 : * its parent below, because a subxact with XID < xmin has surely also got
1806 : * a parent with XID < xmin, while one with XID >= xmax must belong to a
1807 : * parent that was not yet committed at the time of this snapshot.
1808 : */
1809 :
1810 : /* Any xid < xmin is not in-progress */
1811 130801530 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1812 124845782 : return false;
1813 : /* Any xid >= xmax is in-progress */
1814 5955748 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1815 25706 : return true;
1816 :
1817 : /*
1818 : * Snapshot information is stored slightly differently in snapshots taken
1819 : * during recovery.
1820 : */
1821 5930042 : if (!snapshot->takenDuringRecovery)
1822 : {
1823 : /*
1824 : * If the snapshot contains full subxact data, the fastest way to
1825 : * check things is just to compare the given XID against both subxact
1826 : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1827 : * use pg_subtrans to convert a subxact XID to its parent XID, but
1828 : * then we need only look at top-level XIDs not subxacts.
1829 : */
1830 5929882 : if (!snapshot->suboverflowed)
1831 : {
1832 : /* we have full data, so search subxip */
1833 5929182 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1834 540 : return true;
1835 :
1836 : /* not there, fall through to search xip[] */
1837 : }
1838 : else
1839 : {
1840 : /*
1841 : * Snapshot overflowed, so convert xid to top-level. This is safe
1842 : * because we eliminated too-old XIDs above.
1843 : */
1844 700 : xid = SubTransGetTopmostTransaction(xid);
1845 :
1846 : /*
1847 : * If xid was indeed a subxact, we might now have an xid < xmin,
1848 : * so recheck to avoid an array scan. No point in rechecking
1849 : * xmax.
1850 : */
1851 700 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1852 0 : return false;
1853 : }
1854 :
1855 5929342 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1856 28744 : return true;
1857 : }
1858 : else
1859 : {
1860 : /*
1861 : * In recovery we store all xids in the subxip array because it is by
1862 : * far the bigger array, and we mostly don't know which xids are
1863 : * top-level and which are subxacts. The xip array is empty.
1864 : *
1865 : * We start by searching subtrans, if we overflowed.
1866 : */
1867 160 : if (snapshot->suboverflowed)
1868 : {
1869 : /*
1870 : * Snapshot overflowed, so convert xid to top-level. This is safe
1871 : * because we eliminated too-old XIDs above.
1872 : */
1873 8 : xid = SubTransGetTopmostTransaction(xid);
1874 :
1875 : /*
1876 : * If xid was indeed a subxact, we might now have an xid < xmin,
1877 : * so recheck to avoid an array scan. No point in rechecking
1878 : * xmax.
1879 : */
1880 8 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1881 0 : return false;
1882 : }
1883 :
1884 : /*
1885 : * We now have either a top-level xid higher than xmin or an
1886 : * indeterminate xid. We don't know whether it's top level or subxact
1887 : * but it doesn't matter. If it's present, the xid is visible.
1888 : */
1889 160 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1890 12 : return true;
1891 : }
1892 :
1893 5900746 : return false;
1894 : }
1895 :
1896 : /* ResourceOwner callbacks */
1897 :
1898 : static void
1899 55360 : ResOwnerReleaseSnapshot(Datum res)
1900 : {
1901 55360 : UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1902 55360 : }
|