Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * snapmgr.c
4 : * PostgreSQL snapshot manager
5 : *
6 : * We keep track of snapshots in two ways: those "registered" by resowner.c,
7 : * and the "active snapshot" stack. All snapshots in either of them live in
8 : * persistent memory. When a snapshot is no longer in any of these lists
9 : * (tracked by separate refcounts on each snapshot), its memory can be freed.
10 : *
11 : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
12 : * regd_count and list it in RegisteredSnapshots, but this reference is not
13 : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
14 : * to track this snapshot reference, but that introduces logical circularity
15 : * and thus makes it impossible to clean up in a sane fashion. It's better to
16 : * handle this reference as an internally-tracked registration, so that this
17 : * module is entirely lower-level than ResourceOwners.
18 : *
19 : * Likewise, any snapshots that have been exported by pg_export_snapshot
20 : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
21 : * tracked by any resource owner.
22 : *
23 : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
24 : * is valid, but is not tracked by any resource owner.
25 : *
26 : * The same is true for historic snapshots used during logical decoding,
27 : * their lifetime is managed separately (as they live longer than one xact.c
28 : * transaction).
29 : *
30 : * These arrangements let us reset MyProc->xmin when there are no snapshots
31 : * referenced by this transaction, and advance it when the one with oldest
32 : * Xmin is no longer referenced. For simplicity however, only registered
33 : * snapshots not active snapshots participate in tracking which one is oldest;
34 : * we don't try to change MyProc->xmin except when the active-snapshot
35 : * stack is empty.
36 : *
37 : *
38 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
39 : * Portions Copyright (c) 1994, Regents of the University of California
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/time/snapmgr.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/stat.h>
49 : #include <unistd.h>
50 :
51 : #include "access/subtrans.h"
52 : #include "access/transam.h"
53 : #include "access/xact.h"
54 : #include "datatype/timestamp.h"
55 : #include "lib/pairingheap.h"
56 : #include "miscadmin.h"
57 : #include "port/pg_lfind.h"
58 : #include "storage/fd.h"
59 : #include "storage/predicate.h"
60 : #include "storage/proc.h"
61 : #include "storage/procarray.h"
62 : #include "utils/builtins.h"
63 : #include "utils/memutils.h"
64 : #include "utils/resowner.h"
65 : #include "utils/snapmgr.h"
66 : #include "utils/syscache.h"
67 :
68 :
69 : /*
70 : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
71 : * mode, and to the latest one taken in a read-committed transaction.
72 : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
73 : * instant, even in transaction-snapshot mode. It should only be used for
74 : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
75 : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
76 : * whenever a system catalog change occurs.
77 : *
78 : * These SnapshotData structs are static to simplify memory allocation
79 : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
80 : */
81 : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
82 : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
83 : SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
84 : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
85 : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
86 :
87 : /* Pointers to valid snapshots */
88 : static Snapshot CurrentSnapshot = NULL;
89 : static Snapshot SecondarySnapshot = NULL;
90 : static Snapshot CatalogSnapshot = NULL;
91 : static Snapshot HistoricSnapshot = NULL;
92 :
93 : /*
94 : * These are updated by GetSnapshotData. We initialize them this way
95 : * for the convenience of TransactionIdIsInProgress: even in bootstrap
96 : * mode, we don't want it to say that BootstrapTransactionId is in progress.
97 : */
98 : TransactionId TransactionXmin = FirstNormalTransactionId;
99 : TransactionId RecentXmin = FirstNormalTransactionId;
100 :
101 : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
102 : static HTAB *tuplecid_data = NULL;
103 :
104 : /*
105 : * Elements of the active snapshot stack.
106 : *
107 : * Each element here accounts for exactly one active_count on SnapshotData.
108 : *
109 : * NB: the code assumes that elements in this list are in non-increasing
110 : * order of as_level; also, the list must be NULL-terminated.
111 : */
112 : typedef struct ActiveSnapshotElt
113 : {
114 : Snapshot as_snap;
115 : int as_level;
116 : struct ActiveSnapshotElt *as_next;
117 : } ActiveSnapshotElt;
118 :
119 : /* Top of the stack of active snapshots */
120 : static ActiveSnapshotElt *ActiveSnapshot = NULL;
121 :
122 : /* Bottom of the stack of active snapshots */
123 : static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
124 :
125 : /*
126 : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
127 : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
128 : */
129 : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
130 : void *arg);
131 :
132 : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
133 :
134 : /* first GetTransactionSnapshot call in a transaction? */
135 : bool FirstSnapshotSet = false;
136 :
137 : /*
138 : * Remember the serializable transaction snapshot, if any. We cannot trust
139 : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
140 : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
141 : */
142 : static Snapshot FirstXactSnapshot = NULL;
143 :
144 : /* Define pathname of exported-snapshot files */
145 : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
146 :
147 : /* Structure holding info about exported snapshot. */
148 : typedef struct ExportedSnapshot
149 : {
150 : char *snapfile;
151 : Snapshot snapshot;
152 : } ExportedSnapshot;
153 :
154 : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
155 : static List *exportedSnapshots = NIL;
156 :
157 : /* Prototypes for local functions */
158 : static Snapshot CopySnapshot(Snapshot snapshot);
159 : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
160 : static void FreeSnapshot(Snapshot snapshot);
161 : static void SnapshotResetXmin(void);
162 :
163 : /* ResourceOwner callbacks to track snapshot references */
164 : static void ResOwnerReleaseSnapshot(Datum res);
165 :
166 : static const ResourceOwnerDesc snapshot_resowner_desc =
167 : {
168 : .name = "snapshot reference",
169 : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
170 : .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
171 : .ReleaseResource = ResOwnerReleaseSnapshot,
172 : .DebugPrint = NULL /* the default message is fine */
173 : };
174 :
175 : /* Convenience wrappers over ResourceOwnerRemember/Forget */
176 : static inline void
177 12987258 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
178 : {
179 12987258 : ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
180 12987258 : }
181 : static inline void
182 12931674 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
183 : {
184 12931674 : ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
185 12931674 : }
186 :
187 : /*
188 : * Snapshot fields to be serialized.
189 : *
190 : * Only these fields need to be sent to the cooperating backend; the
191 : * remaining ones can (and must) be set by the receiver upon restore.
192 : */
193 : typedef struct SerializedSnapshotData
194 : {
195 : TransactionId xmin;
196 : TransactionId xmax;
197 : uint32 xcnt;
198 : int32 subxcnt;
199 : bool suboverflowed;
200 : bool takenDuringRecovery;
201 : CommandId curcid;
202 : TimestampTz whenTaken;
203 : XLogRecPtr lsn;
204 : } SerializedSnapshotData;
205 :
206 : /*
207 : * GetTransactionSnapshot
208 : * Get the appropriate snapshot for a new query in a transaction.
209 : *
210 : * Note that the return value may point at static storage that will be modified
211 : * by future calls and by CommandCounterIncrement(). Callers should call
212 : * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
213 : * used very long.
214 : */
215 : Snapshot
216 1781114 : GetTransactionSnapshot(void)
217 : {
218 : /*
219 : * Return historic snapshot if doing logical decoding. We'll never need a
220 : * non-historic transaction snapshot in this (sub-)transaction, so there's
221 : * no need to be careful to set one up for later calls to
222 : * GetTransactionSnapshot().
223 : */
224 1781114 : if (HistoricSnapshotActive())
225 : {
226 : Assert(!FirstSnapshotSet);
227 0 : return HistoricSnapshot;
228 : }
229 :
230 : /* First call in transaction? */
231 1781114 : if (!FirstSnapshotSet)
232 : {
233 : /*
234 : * Don't allow catalog snapshot to be older than xact snapshot. Must
235 : * do this first to allow the empty-heap Assert to succeed.
236 : */
237 621948 : InvalidateCatalogSnapshot();
238 :
239 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
240 : Assert(FirstXactSnapshot == NULL);
241 :
242 621948 : if (IsInParallelMode())
243 0 : elog(ERROR,
244 : "cannot take query snapshot during a parallel operation");
245 :
246 : /*
247 : * In transaction-snapshot mode, the first snapshot must live until
248 : * end of xact regardless of what the caller does with it, so we must
249 : * make a copy of it rather than returning CurrentSnapshotData
250 : * directly. Furthermore, if we're running in serializable mode,
251 : * predicate.c needs to wrap the snapshot fetch in its own processing.
252 : */
253 621948 : if (IsolationUsesXactSnapshot())
254 : {
255 : /* First, create the snapshot in CurrentSnapshotData */
256 5342 : if (IsolationIsSerializable())
257 3280 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
258 : else
259 2062 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
260 : /* Make a saved copy */
261 5342 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
262 5342 : FirstXactSnapshot = CurrentSnapshot;
263 : /* Mark it as "registered" in FirstXactSnapshot */
264 5342 : FirstXactSnapshot->regd_count++;
265 5342 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
266 : }
267 : else
268 616606 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
269 :
270 621948 : FirstSnapshotSet = true;
271 621948 : return CurrentSnapshot;
272 : }
273 :
274 1159166 : if (IsolationUsesXactSnapshot())
275 207736 : return CurrentSnapshot;
276 :
277 : /* Don't allow catalog snapshot to be older than xact snapshot. */
278 951430 : InvalidateCatalogSnapshot();
279 :
280 951430 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
281 :
282 951430 : return CurrentSnapshot;
283 : }
284 :
285 : /*
286 : * GetLatestSnapshot
287 : * Get a snapshot that is up-to-date as of the current instant,
288 : * even if we are executing in transaction-snapshot mode.
289 : */
290 : Snapshot
291 296188 : GetLatestSnapshot(void)
292 : {
293 : /*
294 : * We might be able to relax this, but nothing that could otherwise work
295 : * needs it.
296 : */
297 296188 : if (IsInParallelMode())
298 0 : elog(ERROR,
299 : "cannot update SecondarySnapshot during a parallel operation");
300 :
301 : /*
302 : * So far there are no cases requiring support for GetLatestSnapshot()
303 : * during logical decoding, but it wouldn't be hard to add if required.
304 : */
305 : Assert(!HistoricSnapshotActive());
306 :
307 : /* If first call in transaction, go ahead and set the xact snapshot */
308 296188 : if (!FirstSnapshotSet)
309 100 : return GetTransactionSnapshot();
310 :
311 296088 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
312 :
313 296088 : return SecondarySnapshot;
314 : }
315 :
316 : /*
317 : * GetOldestSnapshot
318 : *
319 : * Get the transaction's oldest known snapshot, as judged by the LSN.
320 : * Will return NULL if there are no active or registered snapshots.
321 : */
322 : Snapshot
323 45094 : GetOldestSnapshot(void)
324 : {
325 45094 : Snapshot OldestRegisteredSnapshot = NULL;
326 45094 : XLogRecPtr RegisteredLSN = InvalidXLogRecPtr;
327 :
328 45094 : if (!pairingheap_is_empty(&RegisteredSnapshots))
329 : {
330 44832 : OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
331 : pairingheap_first(&RegisteredSnapshots));
332 44832 : RegisteredLSN = OldestRegisteredSnapshot->lsn;
333 : }
334 :
335 45094 : if (OldestActiveSnapshot != NULL)
336 : {
337 45078 : XLogRecPtr ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
338 :
339 45078 : if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
340 45078 : return OldestActiveSnapshot->as_snap;
341 : }
342 :
343 16 : return OldestRegisteredSnapshot;
344 : }
345 :
346 : /*
347 : * GetCatalogSnapshot
348 : * Get a snapshot that is sufficiently up-to-date for scan of the
349 : * system catalog with the specified OID.
350 : */
351 : Snapshot
352 11852644 : GetCatalogSnapshot(Oid relid)
353 : {
354 : /*
355 : * Return historic snapshot while we're doing logical decoding, so we can
356 : * see the appropriate state of the catalog.
357 : *
358 : * This is the primary reason for needing to reset the system caches after
359 : * finishing decoding.
360 : */
361 11852644 : if (HistoricSnapshotActive())
362 25876 : return HistoricSnapshot;
363 :
364 11826768 : return GetNonHistoricCatalogSnapshot(relid);
365 : }
366 :
367 : /*
368 : * GetNonHistoricCatalogSnapshot
369 : * Get a snapshot that is sufficiently up-to-date for scan of the system
370 : * catalog with the specified OID, even while historic snapshots are set
371 : * up.
372 : */
373 : Snapshot
374 11829466 : GetNonHistoricCatalogSnapshot(Oid relid)
375 : {
376 : /*
377 : * If the caller is trying to scan a relation that has no syscache, no
378 : * catcache invalidations will be sent when it is updated. For a few key
379 : * relations, snapshot invalidations are sent instead. If we're trying to
380 : * scan a relation for which neither catcache nor snapshot invalidations
381 : * are sent, we must refresh the snapshot every time.
382 : */
383 11829466 : if (CatalogSnapshot &&
384 10359420 : !RelationInvalidatesSnapshotsOnly(relid) &&
385 8931610 : !RelationHasSysCache(relid))
386 441630 : InvalidateCatalogSnapshot();
387 :
388 11829466 : if (CatalogSnapshot == NULL)
389 : {
390 : /* Get new snapshot. */
391 1911676 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
392 :
393 : /*
394 : * Make sure the catalog snapshot will be accounted for in decisions
395 : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
396 : * that would result in making a physical copy, which is overkill; and
397 : * it would also create a dependency on some resource owner, which we
398 : * do not want for reasons explained at the head of this file. Instead
399 : * just shove the CatalogSnapshot into the pairing heap manually. This
400 : * has to be reversed in InvalidateCatalogSnapshot, of course.
401 : *
402 : * NB: it had better be impossible for this to throw error, since the
403 : * CatalogSnapshot pointer is already valid.
404 : */
405 1911676 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
406 : }
407 :
408 11829466 : return CatalogSnapshot;
409 : }
410 :
411 : /*
412 : * InvalidateCatalogSnapshot
413 : * Mark the current catalog snapshot, if any, as invalid
414 : *
415 : * We could change this API to allow the caller to provide more fine-grained
416 : * invalidation details, so that a change to relation A wouldn't prevent us
417 : * from using our cached snapshot to scan relation B, but so far there's no
418 : * evidence that the CPU cycles we spent tracking such fine details would be
419 : * well-spent.
420 : */
421 : void
422 23942694 : InvalidateCatalogSnapshot(void)
423 : {
424 23942694 : if (CatalogSnapshot)
425 : {
426 1911676 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
427 1911676 : CatalogSnapshot = NULL;
428 1911676 : SnapshotResetXmin();
429 : }
430 23942694 : }
431 :
432 : /*
433 : * InvalidateCatalogSnapshotConditionally
434 : * Drop catalog snapshot if it's the only one we have
435 : *
436 : * This is called when we are about to wait for client input, so we don't
437 : * want to continue holding the catalog snapshot if it might mean that the
438 : * global xmin horizon can't advance. However, if there are other snapshots
439 : * still active or registered, the catalog snapshot isn't likely to be the
440 : * oldest one, so we might as well keep it.
441 : */
442 : void
443 735618 : InvalidateCatalogSnapshotConditionally(void)
444 : {
445 735618 : if (CatalogSnapshot &&
446 98092 : ActiveSnapshot == NULL &&
447 96466 : pairingheap_is_singular(&RegisteredSnapshots))
448 16382 : InvalidateCatalogSnapshot();
449 735618 : }
450 :
451 : /*
452 : * SnapshotSetCommandId
453 : * Propagate CommandCounterIncrement into the static snapshots, if set
454 : */
455 : void
456 1067056 : SnapshotSetCommandId(CommandId curcid)
457 : {
458 1067056 : if (!FirstSnapshotSet)
459 18662 : return;
460 :
461 1048394 : if (CurrentSnapshot)
462 1048394 : CurrentSnapshot->curcid = curcid;
463 1048394 : if (SecondarySnapshot)
464 157562 : SecondarySnapshot->curcid = curcid;
465 : /* Should we do the same with CatalogSnapshot? */
466 : }
467 :
468 : /*
469 : * SetTransactionSnapshot
470 : * Set the transaction's snapshot from an imported MVCC snapshot.
471 : *
472 : * Note that this is very closely tied to GetTransactionSnapshot --- it
473 : * must take care of all the same considerations as the first-snapshot case
474 : * in GetTransactionSnapshot.
475 : */
476 : static void
477 3088 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
478 : int sourcepid, PGPROC *sourceproc)
479 : {
480 : /* Caller should have checked this already */
481 : Assert(!FirstSnapshotSet);
482 :
483 : /* Better do this to ensure following Assert succeeds. */
484 3088 : InvalidateCatalogSnapshot();
485 :
486 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
487 : Assert(FirstXactSnapshot == NULL);
488 : Assert(!HistoricSnapshotActive());
489 :
490 : /*
491 : * Even though we are not going to use the snapshot it computes, we must
492 : * call GetSnapshotData, for two reasons: (1) to be sure that
493 : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
494 : * the state for GlobalVis*.
495 : */
496 3088 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
497 :
498 : /*
499 : * Now copy appropriate fields from the source snapshot.
500 : */
501 3088 : CurrentSnapshot->xmin = sourcesnap->xmin;
502 3088 : CurrentSnapshot->xmax = sourcesnap->xmax;
503 3088 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
504 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
505 3088 : if (sourcesnap->xcnt > 0)
506 620 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
507 620 : sourcesnap->xcnt * sizeof(TransactionId));
508 3088 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
509 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
510 3088 : if (sourcesnap->subxcnt > 0)
511 8 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
512 8 : sourcesnap->subxcnt * sizeof(TransactionId));
513 3088 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
514 3088 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
515 : /* NB: curcid should NOT be copied, it's a local matter */
516 :
517 3088 : CurrentSnapshot->snapXactCompletionCount = 0;
518 :
519 : /*
520 : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
521 : * TransactionXmin. There is a race condition: to make sure we are not
522 : * causing the global xmin to go backwards, we have to test that the
523 : * source transaction is still running, and that has to be done
524 : * atomically. So let procarray.c do it.
525 : *
526 : * Note: in serializable mode, predicate.c will do this a second time. It
527 : * doesn't seem worth contorting the logic here to avoid two calls,
528 : * especially since it's not clear that predicate.c *must* do this.
529 : */
530 3088 : if (sourceproc != NULL)
531 : {
532 3056 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
533 0 : ereport(ERROR,
534 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
535 : errmsg("could not import the requested snapshot"),
536 : errdetail("The source transaction is not running anymore.")));
537 : }
538 32 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
539 0 : ereport(ERROR,
540 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
541 : errmsg("could not import the requested snapshot"),
542 : errdetail("The source process with PID %d is not running anymore.",
543 : sourcepid)));
544 :
545 : /*
546 : * In transaction-snapshot mode, the first snapshot must live until end of
547 : * xact, so we must make a copy of it. Furthermore, if we're running in
548 : * serializable mode, predicate.c needs to do its own processing.
549 : */
550 3088 : if (IsolationUsesXactSnapshot())
551 : {
552 416 : if (IsolationIsSerializable())
553 26 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
554 : sourcepid);
555 : /* Make a saved copy */
556 416 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
557 416 : FirstXactSnapshot = CurrentSnapshot;
558 : /* Mark it as "registered" in FirstXactSnapshot */
559 416 : FirstXactSnapshot->regd_count++;
560 416 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
561 : }
562 :
563 3088 : FirstSnapshotSet = true;
564 3088 : }
565 :
566 : /*
567 : * CopySnapshot
568 : * Copy the given snapshot.
569 : *
570 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
571 : * to 0. The returned snapshot has the copied flag set.
572 : */
573 : static Snapshot
574 13536424 : CopySnapshot(Snapshot snapshot)
575 : {
576 : Snapshot newsnap;
577 : Size subxipoff;
578 : Size size;
579 :
580 : Assert(snapshot != InvalidSnapshot);
581 :
582 : /* We allocate any XID arrays needed in the same palloc block. */
583 13536424 : size = subxipoff = sizeof(SnapshotData) +
584 13536424 : snapshot->xcnt * sizeof(TransactionId);
585 13536424 : if (snapshot->subxcnt > 0)
586 153226 : size += snapshot->subxcnt * sizeof(TransactionId);
587 :
588 13536424 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
589 13536424 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
590 :
591 13536424 : newsnap->regd_count = 0;
592 13536424 : newsnap->active_count = 0;
593 13536424 : newsnap->copied = true;
594 13536424 : newsnap->snapXactCompletionCount = 0;
595 :
596 : /* setup XID array */
597 13536424 : if (snapshot->xcnt > 0)
598 : {
599 2809620 : newsnap->xip = (TransactionId *) (newsnap + 1);
600 2809620 : memcpy(newsnap->xip, snapshot->xip,
601 2809620 : snapshot->xcnt * sizeof(TransactionId));
602 : }
603 : else
604 10726804 : newsnap->xip = NULL;
605 :
606 : /*
607 : * Setup subXID array. Don't bother to copy it if it had overflowed,
608 : * though, because it's not used anywhere in that case. Except if it's a
609 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
610 : * well in that case, so we mustn't lose them.
611 : */
612 13536424 : if (snapshot->subxcnt > 0 &&
613 153226 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
614 : {
615 153226 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
616 153226 : memcpy(newsnap->subxip, snapshot->subxip,
617 153226 : snapshot->subxcnt * sizeof(TransactionId));
618 : }
619 : else
620 13383198 : newsnap->subxip = NULL;
621 :
622 13536424 : return newsnap;
623 : }
624 :
625 : /*
626 : * FreeSnapshot
627 : * Free the memory associated with a snapshot.
628 : */
629 : static void
630 13490014 : FreeSnapshot(Snapshot snapshot)
631 : {
632 : Assert(snapshot->regd_count == 0);
633 : Assert(snapshot->active_count == 0);
634 : Assert(snapshot->copied);
635 :
636 13490014 : pfree(snapshot);
637 13490014 : }
638 :
639 : /*
640 : * PushActiveSnapshot
641 : * Set the given snapshot as the current active snapshot
642 : *
643 : * If the passed snapshot is a statically-allocated one, or it is possibly
644 : * subject to a future command counter update, create a new long-lived copy
645 : * with active refcount=1. Otherwise, only increment the refcount.
646 : */
647 : void
648 1899656 : PushActiveSnapshot(Snapshot snapshot)
649 : {
650 1899656 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
651 1899656 : }
652 :
653 : /*
654 : * PushActiveSnapshotWithLevel
655 : * Set the given snapshot as the current active snapshot
656 : *
657 : * Same as PushActiveSnapshot except that caller can specify the
658 : * transaction nesting level that "owns" the snapshot. This level
659 : * must not be deeper than the current top of the snapshot stack.
660 : */
661 : void
662 2167874 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
663 : {
664 : ActiveSnapshotElt *newactive;
665 :
666 : Assert(snapshot != InvalidSnapshot);
667 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
668 :
669 2167874 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
670 :
671 : /*
672 : * Checking SecondarySnapshot is probably useless here, but it seems
673 : * better to be sure.
674 : */
675 2167874 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
676 429830 : !snapshot->copied)
677 1738044 : newactive->as_snap = CopySnapshot(snapshot);
678 : else
679 429830 : newactive->as_snap = snapshot;
680 :
681 2167874 : newactive->as_next = ActiveSnapshot;
682 2167874 : newactive->as_level = snap_level;
683 :
684 2167874 : newactive->as_snap->active_count++;
685 :
686 2167874 : ActiveSnapshot = newactive;
687 2167874 : if (OldestActiveSnapshot == NULL)
688 1625846 : OldestActiveSnapshot = ActiveSnapshot;
689 2167874 : }
690 :
691 : /*
692 : * PushCopiedSnapshot
693 : * As above, except forcibly copy the presented snapshot.
694 : *
695 : * This should be used when the ActiveSnapshot has to be modifiable, for
696 : * example if the caller intends to call UpdateActiveSnapshotCommandId.
697 : * The new snapshot will be released when popped from the stack.
698 : */
699 : void
700 113000 : PushCopiedSnapshot(Snapshot snapshot)
701 : {
702 113000 : PushActiveSnapshot(CopySnapshot(snapshot));
703 113000 : }
704 :
705 : /*
706 : * UpdateActiveSnapshotCommandId
707 : *
708 : * Update the current CID of the active snapshot. This can only be applied
709 : * to a snapshot that is not referenced elsewhere.
710 : */
711 : void
712 114926 : UpdateActiveSnapshotCommandId(void)
713 : {
714 : CommandId save_curcid,
715 : curcid;
716 :
717 : Assert(ActiveSnapshot != NULL);
718 : Assert(ActiveSnapshot->as_snap->active_count == 1);
719 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
720 :
721 : /*
722 : * Don't allow modification of the active snapshot during parallel
723 : * operation. We share the snapshot to worker backends at the beginning
724 : * of parallel operation, so any change to the snapshot can lead to
725 : * inconsistencies. We have other defenses against
726 : * CommandCounterIncrement, but there are a few places that call this
727 : * directly, so we put an additional guard here.
728 : */
729 114926 : save_curcid = ActiveSnapshot->as_snap->curcid;
730 114926 : curcid = GetCurrentCommandId(false);
731 114926 : if (IsInParallelMode() && save_curcid != curcid)
732 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
733 114926 : ActiveSnapshot->as_snap->curcid = curcid;
734 114926 : }
735 :
736 : /*
737 : * PopActiveSnapshot
738 : *
739 : * Remove the topmost snapshot from the active snapshot stack, decrementing the
740 : * reference count, and free it if this was the last reference.
741 : */
742 : void
743 2114890 : PopActiveSnapshot(void)
744 : {
745 : ActiveSnapshotElt *newstack;
746 :
747 2114890 : newstack = ActiveSnapshot->as_next;
748 :
749 : Assert(ActiveSnapshot->as_snap->active_count > 0);
750 :
751 2114890 : ActiveSnapshot->as_snap->active_count--;
752 :
753 2114890 : if (ActiveSnapshot->as_snap->active_count == 0 &&
754 2082052 : ActiveSnapshot->as_snap->regd_count == 0)
755 1559844 : FreeSnapshot(ActiveSnapshot->as_snap);
756 :
757 2114890 : pfree(ActiveSnapshot);
758 2114890 : ActiveSnapshot = newstack;
759 2114890 : if (ActiveSnapshot == NULL)
760 1586892 : OldestActiveSnapshot = NULL;
761 :
762 2114890 : SnapshotResetXmin();
763 2114890 : }
764 :
765 : /*
766 : * GetActiveSnapshot
767 : * Return the topmost snapshot in the Active stack.
768 : */
769 : Snapshot
770 957466 : GetActiveSnapshot(void)
771 : {
772 : Assert(ActiveSnapshot != NULL);
773 :
774 957466 : return ActiveSnapshot->as_snap;
775 : }
776 :
777 : /*
778 : * ActiveSnapshotSet
779 : * Return whether there is at least one snapshot in the Active stack
780 : */
781 : bool
782 900254 : ActiveSnapshotSet(void)
783 : {
784 900254 : return ActiveSnapshot != NULL;
785 : }
786 :
787 : /*
788 : * RegisterSnapshot
789 : * Register a snapshot as being in use by the current resource owner
790 : *
791 : * If InvalidSnapshot is passed, it is not registered.
792 : */
793 : Snapshot
794 14262958 : RegisterSnapshot(Snapshot snapshot)
795 : {
796 14262958 : if (snapshot == InvalidSnapshot)
797 1275908 : return InvalidSnapshot;
798 :
799 12987050 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
800 : }
801 :
802 : /*
803 : * RegisterSnapshotOnOwner
804 : * As above, but use the specified resource owner
805 : */
806 : Snapshot
807 12987258 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
808 : {
809 : Snapshot snap;
810 :
811 12987258 : if (snapshot == InvalidSnapshot)
812 0 : return InvalidSnapshot;
813 :
814 : /* Static snapshot? Create a persistent copy */
815 12987258 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
816 :
817 : /* and tell resowner.c about it */
818 12987258 : ResourceOwnerEnlarge(owner);
819 12987258 : snap->regd_count++;
820 12987258 : ResourceOwnerRememberSnapshot(owner, snap);
821 :
822 12987258 : if (snap->regd_count == 1)
823 12311828 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
824 :
825 12987258 : return snap;
826 : }
827 :
828 : /*
829 : * UnregisterSnapshot
830 : *
831 : * Decrement the reference count of a snapshot, remove the corresponding
832 : * reference from CurrentResourceOwner, and free the snapshot if no more
833 : * references remain.
834 : */
835 : void
836 14113620 : UnregisterSnapshot(Snapshot snapshot)
837 : {
838 14113620 : if (snapshot == NULL)
839 1221730 : return;
840 :
841 12891890 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
842 : }
843 :
844 : /*
845 : * UnregisterSnapshotFromOwner
846 : * As above, but use the specified resource owner
847 : */
848 : void
849 12931674 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
850 : {
851 12931674 : if (snapshot == NULL)
852 0 : return;
853 :
854 12931674 : ResourceOwnerForgetSnapshot(owner, snapshot);
855 12931674 : UnregisterSnapshotNoOwner(snapshot);
856 : }
857 :
858 : static void
859 12987258 : UnregisterSnapshotNoOwner(Snapshot snapshot)
860 : {
861 : Assert(snapshot->regd_count > 0);
862 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
863 :
864 12987258 : snapshot->regd_count--;
865 12987258 : if (snapshot->regd_count == 0)
866 12311828 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
867 :
868 12987258 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
869 : {
870 11924496 : FreeSnapshot(snapshot);
871 11924496 : SnapshotResetXmin();
872 : }
873 12987258 : }
874 :
875 : /*
876 : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
877 : * by xmin, so that the snapshot with smallest xmin is at the top.
878 : */
879 : static int
880 12295702 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
881 : {
882 12295702 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
883 12295702 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
884 :
885 12295702 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
886 104754 : return 1;
887 12190948 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
888 18304 : return -1;
889 : else
890 12172644 : return 0;
891 : }
892 :
893 : /*
894 : * SnapshotResetXmin
895 : *
896 : * If there are no more snapshots, we can reset our PGPROC->xmin to
897 : * InvalidTransactionId. Note we can do this without locking because we assume
898 : * that storing an Xid is atomic.
899 : *
900 : * Even if there are some remaining snapshots, we may be able to advance our
901 : * PGPROC->xmin to some degree. This typically happens when a portal is
902 : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
903 : * the active snapshot stack is empty; this allows us not to need to track
904 : * which active snapshot is oldest.
905 : *
906 : * Note: it's tempting to use GetOldestSnapshot() here so that we can include
907 : * active snapshots in the calculation. However, that compares by LSN not
908 : * xmin so it's not entirely clear that it's the same thing. Also, we'd be
909 : * critically dependent on the assumption that the bottommost active snapshot
910 : * stack entry has the oldest xmin. (Current uses of GetOldestSnapshot() are
911 : * not actually critical, but this would be.)
912 : */
913 : static void
914 16007692 : SnapshotResetXmin(void)
915 : {
916 : Snapshot minSnapshot;
917 :
918 16007692 : if (ActiveSnapshot != NULL)
919 11565554 : return;
920 :
921 4442138 : if (pairingheap_is_empty(&RegisteredSnapshots))
922 : {
923 1458210 : MyProc->xmin = InvalidTransactionId;
924 1458210 : return;
925 : }
926 :
927 2983928 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
928 : pairingheap_first(&RegisteredSnapshots));
929 :
930 2983928 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
931 7426 : MyProc->xmin = minSnapshot->xmin;
932 : }
933 :
934 : /*
935 : * AtSubCommit_Snapshot
936 : */
937 : void
938 10756 : AtSubCommit_Snapshot(int level)
939 : {
940 : ActiveSnapshotElt *active;
941 :
942 : /*
943 : * Relabel the active snapshots set in this subtransaction as though they
944 : * are owned by the parent subxact.
945 : */
946 10756 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
947 : {
948 9076 : if (active->as_level < level)
949 9076 : break;
950 0 : active->as_level = level - 1;
951 : }
952 10756 : }
953 :
954 : /*
955 : * AtSubAbort_Snapshot
956 : * Clean up snapshots after a subtransaction abort
957 : */
958 : void
959 9288 : AtSubAbort_Snapshot(int level)
960 : {
961 : /* Forget the active snapshots set by this subtransaction */
962 14962 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
963 : {
964 : ActiveSnapshotElt *next;
965 :
966 5674 : next = ActiveSnapshot->as_next;
967 :
968 : /*
969 : * Decrement the snapshot's active count. If it's still registered or
970 : * marked as active by an outer subtransaction, we can't free it yet.
971 : */
972 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
973 5674 : ActiveSnapshot->as_snap->active_count -= 1;
974 :
975 5674 : if (ActiveSnapshot->as_snap->active_count == 0 &&
976 5674 : ActiveSnapshot->as_snap->regd_count == 0)
977 5674 : FreeSnapshot(ActiveSnapshot->as_snap);
978 :
979 : /* and free the stack element */
980 5674 : pfree(ActiveSnapshot);
981 :
982 5674 : ActiveSnapshot = next;
983 5674 : if (ActiveSnapshot == NULL)
984 242 : OldestActiveSnapshot = NULL;
985 : }
986 :
987 9288 : SnapshotResetXmin();
988 9288 : }
989 :
990 : /*
991 : * AtEOXact_Snapshot
992 : * Snapshot manager's cleanup function for end of transaction
993 : */
994 : void
995 749246 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
996 : {
997 : /*
998 : * In transaction-snapshot mode we must release our privately-managed
999 : * reference to the transaction snapshot. We must remove it from
1000 : * RegisteredSnapshots to keep the check below happy. But we don't bother
1001 : * to do FreeSnapshot, for two reasons: the memory will go away with
1002 : * TopTransactionContext anyway, and if someone has left the snapshot
1003 : * stacked as active, we don't want the code below to be chasing through a
1004 : * dangling pointer.
1005 : */
1006 749246 : if (FirstXactSnapshot != NULL)
1007 : {
1008 : Assert(FirstXactSnapshot->regd_count > 0);
1009 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1010 5758 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1011 : }
1012 749246 : FirstXactSnapshot = NULL;
1013 :
1014 : /*
1015 : * If we exported any snapshots, clean them up.
1016 : */
1017 749246 : if (exportedSnapshots != NIL)
1018 : {
1019 : ListCell *lc;
1020 :
1021 : /*
1022 : * Get rid of the files. Unlink failure is only a WARNING because (1)
1023 : * it's too late to abort the transaction, and (2) leaving a leaked
1024 : * file around has little real consequence anyway.
1025 : *
1026 : * We also need to remove the snapshots from RegisteredSnapshots to
1027 : * prevent a warning below.
1028 : *
1029 : * As with the FirstXactSnapshot, we don't need to free resources of
1030 : * the snapshot itself as it will go away with the memory context.
1031 : */
1032 32 : foreach(lc, exportedSnapshots)
1033 : {
1034 16 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1035 :
1036 16 : if (unlink(esnap->snapfile))
1037 0 : elog(WARNING, "could not unlink file \"%s\": %m",
1038 : esnap->snapfile);
1039 :
1040 16 : pairingheap_remove(&RegisteredSnapshots,
1041 16 : &esnap->snapshot->ph_node);
1042 : }
1043 :
1044 16 : exportedSnapshots = NIL;
1045 : }
1046 :
1047 : /* Drop catalog snapshot if any */
1048 749246 : InvalidateCatalogSnapshot();
1049 :
1050 : /* On commit, complain about leftover snapshots */
1051 749246 : if (isCommit)
1052 : {
1053 : ActiveSnapshotElt *active;
1054 :
1055 702672 : if (!pairingheap_is_empty(&RegisteredSnapshots))
1056 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
1057 :
1058 : /* complain about unpopped active snapshots */
1059 702672 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1060 0 : elog(WARNING, "snapshot %p still active", active);
1061 : }
1062 :
1063 : /*
1064 : * And reset our state. We don't need to free the memory explicitly --
1065 : * it'll go away with TopTransactionContext.
1066 : */
1067 749246 : ActiveSnapshot = NULL;
1068 749246 : OldestActiveSnapshot = NULL;
1069 749246 : pairingheap_reset(&RegisteredSnapshots);
1070 :
1071 749246 : CurrentSnapshot = NULL;
1072 749246 : SecondarySnapshot = NULL;
1073 :
1074 749246 : FirstSnapshotSet = false;
1075 :
1076 : /*
1077 : * During normal commit processing, we call ProcArrayEndTransaction() to
1078 : * reset the MyProc->xmin. That call happens prior to the call to
1079 : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1080 : */
1081 749246 : if (resetXmin)
1082 47342 : SnapshotResetXmin();
1083 :
1084 : Assert(resetXmin || MyProc->xmin == 0);
1085 749246 : }
1086 :
1087 :
1088 : /*
1089 : * ExportSnapshot
1090 : * Export the snapshot to a file so that other backends can import it.
1091 : * Returns the token (the file name) that can be used to import this
1092 : * snapshot.
1093 : */
1094 : char *
1095 16 : ExportSnapshot(Snapshot snapshot)
1096 : {
1097 : TransactionId topXid;
1098 : TransactionId *children;
1099 : ExportedSnapshot *esnap;
1100 : int nchildren;
1101 : int addTopXid;
1102 : StringInfoData buf;
1103 : FILE *f;
1104 : int i;
1105 : MemoryContext oldcxt;
1106 : char path[MAXPGPATH];
1107 : char pathtmp[MAXPGPATH];
1108 :
1109 : /*
1110 : * It's tempting to call RequireTransactionBlock here, since it's not very
1111 : * useful to export a snapshot that will disappear immediately afterwards.
1112 : * However, we haven't got enough information to do that, since we don't
1113 : * know if we're at top level or not. For example, we could be inside a
1114 : * plpgsql function that is going to fire off other transactions via
1115 : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1116 : * check.
1117 : *
1118 : * Also note that we don't make any restriction on the transaction's
1119 : * isolation level; however, importers must check the level if they are
1120 : * serializable.
1121 : */
1122 :
1123 : /*
1124 : * Get our transaction ID if there is one, to include in the snapshot.
1125 : */
1126 16 : topXid = GetTopTransactionIdIfAny();
1127 :
1128 : /*
1129 : * We cannot export a snapshot from a subtransaction because there's no
1130 : * easy way for importers to verify that the same subtransaction is still
1131 : * running.
1132 : */
1133 16 : if (IsSubTransaction())
1134 0 : ereport(ERROR,
1135 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1136 : errmsg("cannot export a snapshot from a subtransaction")));
1137 :
1138 : /*
1139 : * We do however allow previous committed subtransactions to exist.
1140 : * Importers of the snapshot must see them as still running, so get their
1141 : * XIDs to add them to the snapshot.
1142 : */
1143 16 : nchildren = xactGetCommittedChildren(&children);
1144 :
1145 : /*
1146 : * Generate file path for the snapshot. We start numbering of snapshots
1147 : * inside the transaction from 1.
1148 : */
1149 16 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1150 16 : MyProc->vxid.procNumber, MyProc->vxid.lxid,
1151 16 : list_length(exportedSnapshots) + 1);
1152 :
1153 : /*
1154 : * Copy the snapshot into TopTransactionContext, add it to the
1155 : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1156 : * ensure that the snapshot's xmin is honored for the rest of the
1157 : * transaction.
1158 : */
1159 16 : snapshot = CopySnapshot(snapshot);
1160 :
1161 16 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1162 16 : esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1163 16 : esnap->snapfile = pstrdup(path);
1164 16 : esnap->snapshot = snapshot;
1165 16 : exportedSnapshots = lappend(exportedSnapshots, esnap);
1166 16 : MemoryContextSwitchTo(oldcxt);
1167 :
1168 16 : snapshot->regd_count++;
1169 16 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1170 :
1171 : /*
1172 : * Fill buf with a text serialization of the snapshot, plus identification
1173 : * data about this transaction. The format expected by ImportSnapshot is
1174 : * pretty rigid: each line must be fieldname:value.
1175 : */
1176 16 : initStringInfo(&buf);
1177 :
1178 16 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1179 16 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1180 16 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1181 16 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1182 16 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1183 :
1184 16 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1185 16 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1186 :
1187 : /*
1188 : * We must include our own top transaction ID in the top-xid data, since
1189 : * by definition we will still be running when the importing transaction
1190 : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1191 : * the snapshot. (There must, therefore, be enough room to add it.)
1192 : *
1193 : * However, it could be that our topXid is after the xmax, in which case
1194 : * we shouldn't include it because xip[] members are expected to be before
1195 : * xmax. (We need not make the same check for subxip[] members, see
1196 : * snapshot.h.)
1197 : */
1198 16 : addTopXid = (TransactionIdIsValid(topXid) &&
1199 16 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1200 16 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1201 16 : for (i = 0; i < snapshot->xcnt; i++)
1202 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1203 16 : if (addTopXid)
1204 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
1205 :
1206 : /*
1207 : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1208 : * we have to cope with possible overflow.
1209 : */
1210 32 : if (snapshot->suboverflowed ||
1211 16 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1212 0 : appendStringInfoString(&buf, "sof:1\n");
1213 : else
1214 : {
1215 16 : appendStringInfoString(&buf, "sof:0\n");
1216 16 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1217 16 : for (i = 0; i < snapshot->subxcnt; i++)
1218 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1219 16 : for (i = 0; i < nchildren; i++)
1220 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
1221 : }
1222 16 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1223 :
1224 : /*
1225 : * Now write the text representation into a file. We first write to a
1226 : * ".tmp" filename, and rename to final filename if no error. This
1227 : * ensures that no other backend can read an incomplete file
1228 : * (ImportSnapshot won't allow it because of its valid-characters check).
1229 : */
1230 16 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1231 16 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1232 0 : ereport(ERROR,
1233 : (errcode_for_file_access(),
1234 : errmsg("could not create file \"%s\": %m", pathtmp)));
1235 :
1236 16 : if (fwrite(buf.data, buf.len, 1, f) != 1)
1237 0 : ereport(ERROR,
1238 : (errcode_for_file_access(),
1239 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1240 :
1241 : /* no fsync() since file need not survive a system crash */
1242 :
1243 16 : if (FreeFile(f))
1244 0 : ereport(ERROR,
1245 : (errcode_for_file_access(),
1246 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1247 :
1248 : /*
1249 : * Now that we have written everything into a .tmp file, rename the file
1250 : * to remove the .tmp suffix.
1251 : */
1252 16 : if (rename(pathtmp, path) < 0)
1253 0 : ereport(ERROR,
1254 : (errcode_for_file_access(),
1255 : errmsg("could not rename file \"%s\" to \"%s\": %m",
1256 : pathtmp, path)));
1257 :
1258 : /*
1259 : * The basename of the file is what we return from pg_export_snapshot().
1260 : * It's already in path in a textual format and we know that the path
1261 : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1262 : * and pstrdup it so as not to return the address of a local variable.
1263 : */
1264 16 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1265 : }
1266 :
1267 : /*
1268 : * pg_export_snapshot
1269 : * SQL-callable wrapper for ExportSnapshot.
1270 : */
1271 : Datum
1272 16 : pg_export_snapshot(PG_FUNCTION_ARGS)
1273 : {
1274 : char *snapshotName;
1275 :
1276 16 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1277 16 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1278 : }
1279 :
1280 :
1281 : /*
1282 : * Parsing subroutines for ImportSnapshot: parse a line with the given
1283 : * prefix followed by a value, and advance *s to the next line. The
1284 : * filename is provided for use in error messages.
1285 : */
1286 : static int
1287 224 : parseIntFromText(const char *prefix, char **s, const char *filename)
1288 : {
1289 224 : char *ptr = *s;
1290 224 : int prefixlen = strlen(prefix);
1291 : int val;
1292 :
1293 224 : if (strncmp(ptr, prefix, prefixlen) != 0)
1294 0 : ereport(ERROR,
1295 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1296 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1297 224 : ptr += prefixlen;
1298 224 : if (sscanf(ptr, "%d", &val) != 1)
1299 0 : ereport(ERROR,
1300 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1301 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1302 224 : ptr = strchr(ptr, '\n');
1303 224 : if (!ptr)
1304 0 : ereport(ERROR,
1305 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1306 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1307 224 : *s = ptr + 1;
1308 224 : return val;
1309 : }
1310 :
1311 : static TransactionId
1312 96 : parseXidFromText(const char *prefix, char **s, const char *filename)
1313 : {
1314 96 : char *ptr = *s;
1315 96 : int prefixlen = strlen(prefix);
1316 : TransactionId val;
1317 :
1318 96 : if (strncmp(ptr, prefix, prefixlen) != 0)
1319 0 : ereport(ERROR,
1320 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1321 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1322 96 : ptr += prefixlen;
1323 96 : if (sscanf(ptr, "%u", &val) != 1)
1324 0 : ereport(ERROR,
1325 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1326 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1327 96 : ptr = strchr(ptr, '\n');
1328 96 : if (!ptr)
1329 0 : ereport(ERROR,
1330 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1331 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1332 96 : *s = ptr + 1;
1333 96 : return val;
1334 : }
1335 :
1336 : static void
1337 32 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1338 : VirtualTransactionId *vxid)
1339 : {
1340 32 : char *ptr = *s;
1341 32 : int prefixlen = strlen(prefix);
1342 :
1343 32 : if (strncmp(ptr, prefix, prefixlen) != 0)
1344 0 : ereport(ERROR,
1345 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1346 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1347 32 : ptr += prefixlen;
1348 32 : if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1349 0 : ereport(ERROR,
1350 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1351 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1352 32 : ptr = strchr(ptr, '\n');
1353 32 : if (!ptr)
1354 0 : ereport(ERROR,
1355 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1356 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1357 32 : *s = ptr + 1;
1358 32 : }
1359 :
1360 : /*
1361 : * ImportSnapshot
1362 : * Import a previously exported snapshot. The argument should be a
1363 : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1364 : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1365 : */
1366 : void
1367 44 : ImportSnapshot(const char *idstr)
1368 : {
1369 : char path[MAXPGPATH];
1370 : FILE *f;
1371 : struct stat stat_buf;
1372 : char *filebuf;
1373 : int xcnt;
1374 : int i;
1375 : VirtualTransactionId src_vxid;
1376 : int src_pid;
1377 : Oid src_dbid;
1378 : int src_isolevel;
1379 : bool src_readonly;
1380 : SnapshotData snapshot;
1381 :
1382 : /*
1383 : * Must be at top level of a fresh transaction. Note in particular that
1384 : * we check we haven't acquired an XID --- if we have, it's conceivable
1385 : * that the snapshot would show it as not running, making for very screwy
1386 : * behavior.
1387 : */
1388 88 : if (FirstSnapshotSet ||
1389 88 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
1390 44 : IsSubTransaction())
1391 0 : ereport(ERROR,
1392 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1393 : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1394 :
1395 : /*
1396 : * If we are in read committed mode then the next query would execute with
1397 : * a new snapshot thus making this function call quite useless.
1398 : */
1399 44 : if (!IsolationUsesXactSnapshot())
1400 0 : ereport(ERROR,
1401 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1402 : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1403 :
1404 : /*
1405 : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1406 : * this mainly to prevent reading arbitrary files.
1407 : */
1408 44 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1409 6 : ereport(ERROR,
1410 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1411 : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1412 :
1413 : /* OK, read the file */
1414 38 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1415 :
1416 38 : f = AllocateFile(path, PG_BINARY_R);
1417 38 : if (!f)
1418 : {
1419 : /*
1420 : * If file is missing while identifier has a correct format, avoid
1421 : * system errors.
1422 : */
1423 6 : if (errno == ENOENT)
1424 6 : ereport(ERROR,
1425 : (errcode(ERRCODE_UNDEFINED_OBJECT),
1426 : errmsg("snapshot \"%s\" does not exist", idstr)));
1427 : else
1428 0 : ereport(ERROR,
1429 : (errcode_for_file_access(),
1430 : errmsg("could not open file \"%s\" for reading: %m",
1431 : path)));
1432 : }
1433 :
1434 : /* get the size of the file so that we know how much memory we need */
1435 32 : if (fstat(fileno(f), &stat_buf))
1436 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
1437 :
1438 : /* and read the file into a palloc'd string */
1439 32 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1440 32 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1441 0 : elog(ERROR, "could not read file \"%s\": %m", path);
1442 :
1443 32 : filebuf[stat_buf.st_size] = '\0';
1444 :
1445 32 : FreeFile(f);
1446 :
1447 : /*
1448 : * Construct a snapshot struct by parsing the file content.
1449 : */
1450 32 : memset(&snapshot, 0, sizeof(snapshot));
1451 :
1452 32 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1453 32 : src_pid = parseIntFromText("pid:", &filebuf, path);
1454 : /* we abuse parseXidFromText a bit here ... */
1455 32 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
1456 32 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
1457 32 : src_readonly = parseIntFromText("ro:", &filebuf, path);
1458 :
1459 32 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1460 :
1461 32 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1462 32 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1463 :
1464 32 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1465 :
1466 : /* sanity-check the xid count before palloc */
1467 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1468 0 : ereport(ERROR,
1469 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1470 : errmsg("invalid snapshot data in file \"%s\"", path)));
1471 :
1472 32 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1473 32 : for (i = 0; i < xcnt; i++)
1474 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1475 :
1476 32 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1477 :
1478 32 : if (!snapshot.suboverflowed)
1479 : {
1480 32 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1481 :
1482 : /* sanity-check the xid count before palloc */
1483 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1484 0 : ereport(ERROR,
1485 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1486 : errmsg("invalid snapshot data in file \"%s\"", path)));
1487 :
1488 32 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1489 32 : for (i = 0; i < xcnt; i++)
1490 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1491 : }
1492 : else
1493 : {
1494 0 : snapshot.subxcnt = 0;
1495 0 : snapshot.subxip = NULL;
1496 : }
1497 :
1498 32 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1499 :
1500 : /*
1501 : * Do some additional sanity checking, just to protect ourselves. We
1502 : * don't trouble to check the array elements, just the most critical
1503 : * fields.
1504 : */
1505 32 : if (!VirtualTransactionIdIsValid(src_vxid) ||
1506 32 : !OidIsValid(src_dbid) ||
1507 32 : !TransactionIdIsNormal(snapshot.xmin) ||
1508 32 : !TransactionIdIsNormal(snapshot.xmax))
1509 0 : ereport(ERROR,
1510 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1511 : errmsg("invalid snapshot data in file \"%s\"", path)));
1512 :
1513 : /*
1514 : * If we're serializable, the source transaction must be too, otherwise
1515 : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1516 : * non-read-only transaction can't adopt a snapshot from a read-only
1517 : * transaction, as predicate.c handles the cases very differently.
1518 : */
1519 32 : if (IsolationIsSerializable())
1520 : {
1521 0 : if (src_isolevel != XACT_SERIALIZABLE)
1522 0 : ereport(ERROR,
1523 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1524 : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1525 0 : if (src_readonly && !XactReadOnly)
1526 0 : ereport(ERROR,
1527 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1528 : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1529 : }
1530 :
1531 : /*
1532 : * We cannot import a snapshot that was taken in a different database,
1533 : * because vacuum calculates OldestXmin on a per-database basis; so the
1534 : * source transaction's xmin doesn't protect us from data loss. This
1535 : * restriction could be removed if the source transaction were to mark its
1536 : * xmin as being globally applicable. But that would require some
1537 : * additional syntax, since that has to be known when the snapshot is
1538 : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1539 : */
1540 32 : if (src_dbid != MyDatabaseId)
1541 0 : ereport(ERROR,
1542 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1543 : errmsg("cannot import a snapshot from a different database")));
1544 :
1545 : /* OK, install the snapshot */
1546 32 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1547 32 : }
1548 :
1549 : /*
1550 : * XactHasExportedSnapshots
1551 : * Test whether current transaction has exported any snapshots.
1552 : */
1553 : bool
1554 796 : XactHasExportedSnapshots(void)
1555 : {
1556 796 : return (exportedSnapshots != NIL);
1557 : }
1558 :
1559 : /*
1560 : * DeleteAllExportedSnapshotFiles
1561 : * Clean up any files that have been left behind by a crashed backend
1562 : * that had exported snapshots before it died.
1563 : *
1564 : * This should be called during database startup or crash recovery.
1565 : */
1566 : void
1567 410 : DeleteAllExportedSnapshotFiles(void)
1568 : {
1569 : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1570 : DIR *s_dir;
1571 : struct dirent *s_de;
1572 :
1573 : /*
1574 : * Problems in reading the directory, or unlinking files, are reported at
1575 : * LOG level. Since we're running in the startup process, ERROR level
1576 : * would prevent database start, and it's not important enough for that.
1577 : */
1578 410 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1579 :
1580 1230 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1581 : {
1582 820 : if (strcmp(s_de->d_name, ".") == 0 ||
1583 410 : strcmp(s_de->d_name, "..") == 0)
1584 820 : continue;
1585 :
1586 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1587 :
1588 0 : if (unlink(buf) != 0)
1589 0 : ereport(LOG,
1590 : (errcode_for_file_access(),
1591 : errmsg("could not remove file \"%s\": %m", buf)));
1592 : }
1593 :
1594 410 : FreeDir(s_dir);
1595 410 : }
1596 :
1597 : /*
1598 : * ThereAreNoPriorRegisteredSnapshots
1599 : * Is the registered snapshot count less than or equal to one?
1600 : *
1601 : * Don't use this to settle important decisions. While zero registrations and
1602 : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1603 : * guarantees about the significance of one registered snapshot.
1604 : */
1605 : bool
1606 60 : ThereAreNoPriorRegisteredSnapshots(void)
1607 : {
1608 60 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
1609 0 : pairingheap_is_singular(&RegisteredSnapshots))
1610 60 : return true;
1611 :
1612 0 : return false;
1613 : }
1614 :
1615 : /*
1616 : * HaveRegisteredOrActiveSnapshot
1617 : * Is there any registered or active snapshot?
1618 : *
1619 : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1620 : * this function to return true. That allows this function to be used in
1621 : * checks enforcing a longer-lived snapshot.
1622 : */
1623 : bool
1624 344 : HaveRegisteredOrActiveSnapshot(void)
1625 : {
1626 344 : if (ActiveSnapshot != NULL)
1627 0 : return true;
1628 :
1629 : /*
1630 : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1631 : * removed at any time due to invalidation processing. If explicitly
1632 : * registered more than one snapshot has to be in RegisteredSnapshots.
1633 : */
1634 344 : if (CatalogSnapshot != NULL &&
1635 0 : pairingheap_is_singular(&RegisteredSnapshots))
1636 0 : return false;
1637 :
1638 344 : return !pairingheap_is_empty(&RegisteredSnapshots);
1639 : }
1640 :
1641 :
1642 : /*
1643 : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1644 : * access to behave just like it did at a certain point in the past.
1645 : *
1646 : * Needed for logical decoding.
1647 : */
1648 : void
1649 8292 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1650 : {
1651 : Assert(historic_snapshot != NULL);
1652 :
1653 : /* setup the timetravel snapshot */
1654 8292 : HistoricSnapshot = historic_snapshot;
1655 :
1656 : /* setup (cmin, cmax) lookup hash */
1657 8292 : tuplecid_data = tuplecids;
1658 8292 : }
1659 :
1660 :
1661 : /*
1662 : * Make catalog snapshots behave normally again.
1663 : */
1664 : void
1665 8290 : TeardownHistoricSnapshot(bool is_error)
1666 : {
1667 8290 : HistoricSnapshot = NULL;
1668 8290 : tuplecid_data = NULL;
1669 8290 : }
1670 :
1671 : bool
1672 17556356 : HistoricSnapshotActive(void)
1673 : {
1674 17556356 : return HistoricSnapshot != NULL;
1675 : }
1676 :
1677 : HTAB *
1678 1442 : HistoricSnapshotGetTupleCids(void)
1679 : {
1680 : Assert(HistoricSnapshotActive());
1681 1442 : return tuplecid_data;
1682 : }
1683 :
1684 : /*
1685 : * EstimateSnapshotSpace
1686 : * Returns the size needed to store the given snapshot.
1687 : *
1688 : * We are exporting only required fields from the Snapshot, stored in
1689 : * SerializedSnapshotData.
1690 : */
1691 : Size
1692 1924 : EstimateSnapshotSpace(Snapshot snapshot)
1693 : {
1694 : Size size;
1695 :
1696 : Assert(snapshot != InvalidSnapshot);
1697 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1698 :
1699 : /* We allocate any XID arrays needed in the same palloc block. */
1700 1924 : size = add_size(sizeof(SerializedSnapshotData),
1701 1924 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
1702 1924 : if (snapshot->subxcnt > 0 &&
1703 8 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1704 8 : size = add_size(size,
1705 8 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1706 :
1707 1924 : return size;
1708 : }
1709 :
1710 : /*
1711 : * SerializeSnapshot
1712 : * Dumps the serialized snapshot (extracted from given snapshot) onto the
1713 : * memory location at start_address.
1714 : */
1715 : void
1716 1866 : SerializeSnapshot(Snapshot snapshot, char *start_address)
1717 : {
1718 : SerializedSnapshotData serialized_snapshot;
1719 :
1720 : Assert(snapshot->subxcnt >= 0);
1721 :
1722 : /* Copy all required fields */
1723 1866 : serialized_snapshot.xmin = snapshot->xmin;
1724 1866 : serialized_snapshot.xmax = snapshot->xmax;
1725 1866 : serialized_snapshot.xcnt = snapshot->xcnt;
1726 1866 : serialized_snapshot.subxcnt = snapshot->subxcnt;
1727 1866 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1728 1866 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1729 1866 : serialized_snapshot.curcid = snapshot->curcid;
1730 1866 : serialized_snapshot.whenTaken = snapshot->whenTaken;
1731 1866 : serialized_snapshot.lsn = snapshot->lsn;
1732 :
1733 : /*
1734 : * Ignore the SubXID array if it has overflowed, unless the snapshot was
1735 : * taken during recovery - in that case, top-level XIDs are in subxip as
1736 : * well, and we mustn't lose them.
1737 : */
1738 1866 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1739 0 : serialized_snapshot.subxcnt = 0;
1740 :
1741 : /* Copy struct to possibly-unaligned buffer */
1742 1866 : memcpy(start_address,
1743 : &serialized_snapshot, sizeof(SerializedSnapshotData));
1744 :
1745 : /* Copy XID array */
1746 1866 : if (snapshot->xcnt > 0)
1747 714 : memcpy((TransactionId *) (start_address +
1748 : sizeof(SerializedSnapshotData)),
1749 714 : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1750 :
1751 : /*
1752 : * Copy SubXID array. Don't bother to copy it if it had overflowed,
1753 : * though, because it's not used anywhere in that case. Except if it's a
1754 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
1755 : * well in that case, so we mustn't lose them.
1756 : */
1757 1866 : if (serialized_snapshot.subxcnt > 0)
1758 : {
1759 8 : Size subxipoff = sizeof(SerializedSnapshotData) +
1760 8 : snapshot->xcnt * sizeof(TransactionId);
1761 :
1762 8 : memcpy((TransactionId *) (start_address + subxipoff),
1763 8 : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1764 : }
1765 1866 : }
1766 :
1767 : /*
1768 : * RestoreSnapshot
1769 : * Restore a serialized snapshot from the specified address.
1770 : *
1771 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1772 : * to 0. The returned snapshot has the copied flag set.
1773 : */
1774 : Snapshot
1775 6712 : RestoreSnapshot(char *start_address)
1776 : {
1777 : SerializedSnapshotData serialized_snapshot;
1778 : Size size;
1779 : Snapshot snapshot;
1780 : TransactionId *serialized_xids;
1781 :
1782 6712 : memcpy(&serialized_snapshot, start_address,
1783 : sizeof(SerializedSnapshotData));
1784 6712 : serialized_xids = (TransactionId *)
1785 : (start_address + sizeof(SerializedSnapshotData));
1786 :
1787 : /* We allocate any XID arrays needed in the same palloc block. */
1788 6712 : size = sizeof(SnapshotData)
1789 6712 : + serialized_snapshot.xcnt * sizeof(TransactionId)
1790 6712 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
1791 :
1792 : /* Copy all required fields */
1793 6712 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1794 6712 : snapshot->snapshot_type = SNAPSHOT_MVCC;
1795 6712 : snapshot->xmin = serialized_snapshot.xmin;
1796 6712 : snapshot->xmax = serialized_snapshot.xmax;
1797 6712 : snapshot->xip = NULL;
1798 6712 : snapshot->xcnt = serialized_snapshot.xcnt;
1799 6712 : snapshot->subxip = NULL;
1800 6712 : snapshot->subxcnt = serialized_snapshot.subxcnt;
1801 6712 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1802 6712 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1803 6712 : snapshot->curcid = serialized_snapshot.curcid;
1804 6712 : snapshot->whenTaken = serialized_snapshot.whenTaken;
1805 6712 : snapshot->lsn = serialized_snapshot.lsn;
1806 6712 : snapshot->snapXactCompletionCount = 0;
1807 :
1808 : /* Copy XIDs, if present. */
1809 6712 : if (serialized_snapshot.xcnt > 0)
1810 : {
1811 1976 : snapshot->xip = (TransactionId *) (snapshot + 1);
1812 1976 : memcpy(snapshot->xip, serialized_xids,
1813 1976 : serialized_snapshot.xcnt * sizeof(TransactionId));
1814 : }
1815 :
1816 : /* Copy SubXIDs, if present. */
1817 6712 : if (serialized_snapshot.subxcnt > 0)
1818 : {
1819 20 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1820 20 : serialized_snapshot.xcnt;
1821 20 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1822 20 : serialized_snapshot.subxcnt * sizeof(TransactionId));
1823 : }
1824 :
1825 : /* Set the copied flag so that the caller will set refcounts correctly. */
1826 6712 : snapshot->regd_count = 0;
1827 6712 : snapshot->active_count = 0;
1828 6712 : snapshot->copied = true;
1829 :
1830 6712 : return snapshot;
1831 : }
1832 :
1833 : /*
1834 : * Install a restored snapshot as the transaction snapshot.
1835 : *
1836 : * The second argument is of type void * so that snapmgr.h need not include
1837 : * the declaration for PGPROC.
1838 : */
1839 : void
1840 3056 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
1841 : {
1842 3056 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1843 3056 : }
1844 :
1845 : /*
1846 : * XidInMVCCSnapshot
1847 : * Is the given XID still-in-progress according to the snapshot?
1848 : *
1849 : * Note: GetSnapshotData never stores either top xid or subxids of our own
1850 : * backend into a snapshot, so these xids will not be reported as "running"
1851 : * by this function. This is OK for current uses, because we always check
1852 : * TransactionIdIsCurrentTransactionId first, except when it's known the
1853 : * XID could not be ours anyway.
1854 : */
1855 : bool
1856 126519074 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1857 : {
1858 : /*
1859 : * Make a quick range check to eliminate most XIDs without looking at the
1860 : * xip arrays. Note that this is OK even if we convert a subxact XID to
1861 : * its parent below, because a subxact with XID < xmin has surely also got
1862 : * a parent with XID < xmin, while one with XID >= xmax must belong to a
1863 : * parent that was not yet committed at the time of this snapshot.
1864 : */
1865 :
1866 : /* Any xid < xmin is not in-progress */
1867 126519074 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1868 120974198 : return false;
1869 : /* Any xid >= xmax is in-progress */
1870 5544876 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1871 22726 : return true;
1872 :
1873 : /*
1874 : * Snapshot information is stored slightly differently in snapshots taken
1875 : * during recovery.
1876 : */
1877 5522150 : if (!snapshot->takenDuringRecovery)
1878 : {
1879 : /*
1880 : * If the snapshot contains full subxact data, the fastest way to
1881 : * check things is just to compare the given XID against both subxact
1882 : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1883 : * use pg_subtrans to convert a subxact XID to its parent XID, but
1884 : * then we need only look at top-level XIDs not subxacts.
1885 : */
1886 5521994 : if (!snapshot->suboverflowed)
1887 : {
1888 : /* we have full data, so search subxip */
1889 5521294 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1890 552 : return true;
1891 :
1892 : /* not there, fall through to search xip[] */
1893 : }
1894 : else
1895 : {
1896 : /*
1897 : * Snapshot overflowed, so convert xid to top-level. This is safe
1898 : * because we eliminated too-old XIDs above.
1899 : */
1900 700 : xid = SubTransGetTopmostTransaction(xid);
1901 :
1902 : /*
1903 : * If xid was indeed a subxact, we might now have an xid < xmin,
1904 : * so recheck to avoid an array scan. No point in rechecking
1905 : * xmax.
1906 : */
1907 700 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1908 0 : return false;
1909 : }
1910 :
1911 5521442 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1912 13260 : return true;
1913 : }
1914 : else
1915 : {
1916 : /*
1917 : * In recovery we store all xids in the subxip array because it is by
1918 : * far the bigger array, and we mostly don't know which xids are
1919 : * top-level and which are subxacts. The xip array is empty.
1920 : *
1921 : * We start by searching subtrans, if we overflowed.
1922 : */
1923 156 : if (snapshot->suboverflowed)
1924 : {
1925 : /*
1926 : * Snapshot overflowed, so convert xid to top-level. This is safe
1927 : * because we eliminated too-old XIDs above.
1928 : */
1929 8 : xid = SubTransGetTopmostTransaction(xid);
1930 :
1931 : /*
1932 : * If xid was indeed a subxact, we might now have an xid < xmin,
1933 : * so recheck to avoid an array scan. No point in rechecking
1934 : * xmax.
1935 : */
1936 8 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1937 0 : return false;
1938 : }
1939 :
1940 : /*
1941 : * We now have either a top-level xid higher than xmin or an
1942 : * indeterminate xid. We don't know whether it's top level or subxact
1943 : * but it doesn't matter. If it's present, the xid is visible.
1944 : */
1945 156 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1946 8 : return true;
1947 : }
1948 :
1949 5508330 : return false;
1950 : }
1951 :
1952 : /* ResourceOwner callbacks */
1953 :
1954 : static void
1955 55584 : ResOwnerReleaseSnapshot(Datum res)
1956 : {
1957 55584 : UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1958 55584 : }
|