Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * snapmgr.c
4 : * PostgreSQL snapshot manager
5 : *
6 : * The following functions return an MVCC snapshot that can be used in tuple
7 : * visibility checks:
8 : *
9 : * - GetTransactionSnapshot
10 : * - GetLatestSnapshot
11 : * - GetCatalogSnapshot
12 : * - GetNonHistoricCatalogSnapshot
13 : *
14 : * Each of these functions returns a reference to a statically allocated
15 : * snapshot. The statically allocated snapshot is subject to change on any
16 : * snapshot-related function call, and should not be used directly. Instead,
17 : * call PushActiveSnapshot() or RegisterSnapshot() to create a longer-lived
18 : * copy and use that.
19 : *
20 : * We keep track of snapshots in two ways: those "registered" by resowner.c,
21 : * and the "active snapshot" stack. All snapshots in either of them live in
22 : * persistent memory. When a snapshot is no longer in any of these lists
23 : * (tracked by separate refcounts on each snapshot), its memory can be freed.
24 : *
25 : * In addition to the above-mentioned MVCC snapshots, there are some special
26 : * snapshots like SnapshotSelf, SnapshotAny, and "dirty" snapshots. They can
27 : * only be used in limited contexts and cannot be registered or pushed to the
28 : * active stack.
29 : *
30 : * ActiveSnapshot stack
31 : * --------------------
32 : *
33 : * Most visibility checks use the current "active snapshot" returned by
34 : * GetActiveSnapshot(). When running normal queries, the active snapshot is
35 : * set when query execution begins based on the transaction isolation level.
36 : *
37 : * The active snapshot is tracked in a stack so that the currently active one
38 : * is at the top of the stack. It mirrors the process call stack: whenever we
39 : * recurse or switch context to fetch rows from a different portal for
40 : * example, the appropriate snapshot is pushed to become the active snapshot,
41 : * and popped on return. Once upon a time, ActiveSnapshot was just a global
42 : * variable that was saved and restored similar to CurrentMemoryContext, but
43 : * nowadays it's managed as a separate data structure so that we can keep
44 : * track of which snapshots are in use and reset MyProc->xmin when there is no
45 : * active snapshot.
46 : *
47 : * However, there are a couple of exceptions where the active snapshot stack
48 : * does not strictly mirror the call stack:
49 : *
50 : * - VACUUM and a few other utility commands manage their own transactions,
51 : * which take their own snapshots. They are called with an active snapshot
52 : * set, like most utility commands, but they pop the active snapshot that
53 : * was pushed by the caller. PortalRunUtility knows about the possibility
54 : * that the snapshot it pushed is no longer active on return.
55 : *
56 : * - When COMMIT or ROLLBACK is executed within a procedure or DO-block, the
57 : * active snapshot stack is destroyed, and re-established later when
58 : * subsequent statements in the procedure are executed. There are many
59 : * limitations on when in-procedure COMMIT/ROLLBACK is allowed; one such
60 : * limitation is that all the snapshots on the active snapshot stack are
61 : * known to portals that are being executed, which makes it safe to reset
62 : * the stack. See EnsurePortalSnapshotExists().
63 : *
64 : * Registered snapshots
65 : * --------------------
66 : *
67 : * In addition to snapshots pushed to the active snapshot stack, a snapshot
68 : * can be registered with a resource owner.
69 : *
70 : * The FirstXactSnapshot, if any, is treated a bit specially: we increment its
71 : * regd_count and list it in RegisteredSnapshots, but this reference is not
72 : * tracked by a resource owner. We used to use the TopTransactionResourceOwner
73 : * to track this snapshot reference, but that introduces logical circularity
74 : * and thus makes it impossible to clean up in a sane fashion. It's better to
75 : * handle this reference as an internally-tracked registration, so that this
76 : * module is entirely lower-level than ResourceOwners.
77 : *
78 : * Likewise, any snapshots that have been exported by pg_export_snapshot
79 : * have regd_count = 1 and are listed in RegisteredSnapshots, but are not
80 : * tracked by any resource owner.
81 : *
82 : * Likewise, the CatalogSnapshot is listed in RegisteredSnapshots when it
83 : * is valid, but is not tracked by any resource owner.
84 : *
85 : * The same is true for historic snapshots used during logical decoding,
86 : * their lifetime is managed separately (as they live longer than one xact.c
87 : * transaction).
88 : *
89 : * These arrangements let us reset MyProc->xmin when there are no snapshots
90 : * referenced by this transaction, and advance it when the one with oldest
91 : * Xmin is no longer referenced. For simplicity however, only registered
92 : * snapshots not active snapshots participate in tracking which one is oldest;
93 : * we don't try to change MyProc->xmin except when the active-snapshot
94 : * stack is empty.
95 : *
96 : *
97 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
98 : * Portions Copyright (c) 1994, Regents of the University of California
99 : *
100 : * IDENTIFICATION
101 : * src/backend/utils/time/snapmgr.c
102 : *
103 : *-------------------------------------------------------------------------
104 : */
105 : #include "postgres.h"
106 :
107 : #include <sys/stat.h>
108 : #include <unistd.h>
109 :
110 : #include "access/subtrans.h"
111 : #include "access/transam.h"
112 : #include "access/xact.h"
113 : #include "datatype/timestamp.h"
114 : #include "lib/pairingheap.h"
115 : #include "miscadmin.h"
116 : #include "port/pg_lfind.h"
117 : #include "storage/fd.h"
118 : #include "storage/predicate.h"
119 : #include "storage/proc.h"
120 : #include "storage/procarray.h"
121 : #include "utils/builtins.h"
122 : #include "utils/memutils.h"
123 : #include "utils/resowner.h"
124 : #include "utils/snapmgr.h"
125 : #include "utils/syscache.h"
126 :
127 :
128 : /*
129 : * CurrentSnapshot points to the only snapshot taken in transaction-snapshot
130 : * mode, and to the latest one taken in a read-committed transaction.
131 : * SecondarySnapshot is a snapshot that's always up-to-date as of the current
132 : * instant, even in transaction-snapshot mode. It should only be used for
133 : * special-purpose code (say, RI checking.) CatalogSnapshot points to an
134 : * MVCC snapshot intended to be used for catalog scans; we must invalidate it
135 : * whenever a system catalog change occurs.
136 : *
137 : * These SnapshotData structs are static to simplify memory allocation
138 : * (see the hack in GetSnapshotData to avoid repeated malloc/free).
139 : */
140 : static SnapshotData CurrentSnapshotData = {SNAPSHOT_MVCC};
141 : static SnapshotData SecondarySnapshotData = {SNAPSHOT_MVCC};
142 : static SnapshotData CatalogSnapshotData = {SNAPSHOT_MVCC};
143 : SnapshotData SnapshotSelfData = {SNAPSHOT_SELF};
144 : SnapshotData SnapshotAnyData = {SNAPSHOT_ANY};
145 : SnapshotData SnapshotToastData = {SNAPSHOT_TOAST};
146 :
147 : /* Pointers to valid snapshots */
148 : static Snapshot CurrentSnapshot = NULL;
149 : static Snapshot SecondarySnapshot = NULL;
150 : static Snapshot CatalogSnapshot = NULL;
151 : static Snapshot HistoricSnapshot = NULL;
152 :
153 : /*
154 : * These are updated by GetSnapshotData. We initialize them this way
155 : * for the convenience of TransactionIdIsInProgress: even in bootstrap
156 : * mode, we don't want it to say that BootstrapTransactionId is in progress.
157 : */
158 : TransactionId TransactionXmin = FirstNormalTransactionId;
159 : TransactionId RecentXmin = FirstNormalTransactionId;
160 :
161 : /* (table, ctid) => (cmin, cmax) mapping during timetravel */
162 : static HTAB *tuplecid_data = NULL;
163 :
164 : /*
165 : * Elements of the active snapshot stack.
166 : *
167 : * Each element here accounts for exactly one active_count on SnapshotData.
168 : *
169 : * NB: the code assumes that elements in this list are in non-increasing
170 : * order of as_level; also, the list must be NULL-terminated.
171 : */
172 : typedef struct ActiveSnapshotElt
173 : {
174 : Snapshot as_snap;
175 : int as_level;
176 : struct ActiveSnapshotElt *as_next;
177 : } ActiveSnapshotElt;
178 :
179 : /* Top of the stack of active snapshots */
180 : static ActiveSnapshotElt *ActiveSnapshot = NULL;
181 :
182 : /*
183 : * Currently registered Snapshots. Ordered in a heap by xmin, so that we can
184 : * quickly find the one with lowest xmin, to advance our MyProc->xmin.
185 : */
186 : static int xmin_cmp(const pairingheap_node *a, const pairingheap_node *b,
187 : void *arg);
188 :
189 : static pairingheap RegisteredSnapshots = {&xmin_cmp, NULL, NULL};
190 :
191 : /* first GetTransactionSnapshot call in a transaction? */
192 : bool FirstSnapshotSet = false;
193 :
194 : /*
195 : * Remember the serializable transaction snapshot, if any. We cannot trust
196 : * FirstSnapshotSet in combination with IsolationUsesXactSnapshot(), because
197 : * GUC may be reset before us, changing the value of IsolationUsesXactSnapshot.
198 : */
199 : static Snapshot FirstXactSnapshot = NULL;
200 :
201 : /* Define pathname of exported-snapshot files */
202 : #define SNAPSHOT_EXPORT_DIR "pg_snapshots"
203 :
204 : /* Structure holding info about exported snapshot. */
205 : typedef struct ExportedSnapshot
206 : {
207 : char *snapfile;
208 : Snapshot snapshot;
209 : } ExportedSnapshot;
210 :
211 : /* Current xact's exported snapshots (a list of ExportedSnapshot structs) */
212 : static List *exportedSnapshots = NIL;
213 :
214 : /* Prototypes for local functions */
215 : static Snapshot CopySnapshot(Snapshot snapshot);
216 : static void UnregisterSnapshotNoOwner(Snapshot snapshot);
217 : static void FreeSnapshot(Snapshot snapshot);
218 : static void SnapshotResetXmin(void);
219 :
220 : /* ResourceOwner callbacks to track snapshot references */
221 : static void ResOwnerReleaseSnapshot(Datum res);
222 :
223 : static const ResourceOwnerDesc snapshot_resowner_desc =
224 : {
225 : .name = "snapshot reference",
226 : .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
227 : .release_priority = RELEASE_PRIO_SNAPSHOT_REFS,
228 : .ReleaseResource = ResOwnerReleaseSnapshot,
229 : .DebugPrint = NULL /* the default message is fine */
230 : };
231 :
232 : /* Convenience wrappers over ResourceOwnerRemember/Forget */
233 : static inline void
234 15649848 : ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snap)
235 : {
236 15649848 : ResourceOwnerRemember(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
237 15649848 : }
238 : static inline void
239 15590864 : ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snap)
240 : {
241 15590864 : ResourceOwnerForget(owner, PointerGetDatum(snap), &snapshot_resowner_desc);
242 15590864 : }
243 :
244 : /*
245 : * Snapshot fields to be serialized.
246 : *
247 : * Only these fields need to be sent to the cooperating backend; the
248 : * remaining ones can (and must) be set by the receiver upon restore.
249 : */
250 : typedef struct SerializedSnapshotData
251 : {
252 : TransactionId xmin;
253 : TransactionId xmax;
254 : uint32 xcnt;
255 : int32 subxcnt;
256 : bool suboverflowed;
257 : bool takenDuringRecovery;
258 : CommandId curcid;
259 : } SerializedSnapshotData;
260 :
261 : /*
262 : * GetTransactionSnapshot
263 : * Get the appropriate snapshot for a new query in a transaction.
264 : *
265 : * Note that the return value points at static storage that will be modified
266 : * by future calls and by CommandCounterIncrement(). Callers must call
267 : * RegisterSnapshot or PushActiveSnapshot on the returned snap before doing
268 : * any other non-trivial work that could invalidate it.
269 : */
270 : Snapshot
271 1877058 : GetTransactionSnapshot(void)
272 : {
273 : /*
274 : * Return historic snapshot if doing logical decoding.
275 : *
276 : * Historic snapshots are only usable for catalog access, not for
277 : * general-purpose queries. The caller is responsible for ensuring that
278 : * the snapshot is used correctly! (PostgreSQL code never calls this
279 : * during logical decoding, but extensions can do it.)
280 : */
281 1877058 : if (HistoricSnapshotActive())
282 : {
283 : /*
284 : * We'll never need a non-historic transaction snapshot in this
285 : * (sub-)transaction, so there's no need to be careful to set one up
286 : * for later calls to GetTransactionSnapshot().
287 : */
288 : Assert(!FirstSnapshotSet);
289 0 : return HistoricSnapshot;
290 : }
291 :
292 : /* First call in transaction? */
293 1877058 : if (!FirstSnapshotSet)
294 : {
295 : /*
296 : * Don't allow catalog snapshot to be older than xact snapshot. Must
297 : * do this first to allow the empty-heap Assert to succeed.
298 : */
299 737258 : InvalidateCatalogSnapshot();
300 :
301 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
302 : Assert(FirstXactSnapshot == NULL);
303 :
304 737258 : if (IsInParallelMode())
305 0 : elog(ERROR,
306 : "cannot take query snapshot during a parallel operation");
307 :
308 : /*
309 : * In transaction-snapshot mode, the first snapshot must live until
310 : * end of xact regardless of what the caller does with it, so we must
311 : * make a copy of it rather than returning CurrentSnapshotData
312 : * directly. Furthermore, if we're running in serializable mode,
313 : * predicate.c needs to wrap the snapshot fetch in its own processing.
314 : */
315 737258 : if (IsolationUsesXactSnapshot())
316 : {
317 : /* First, create the snapshot in CurrentSnapshotData */
318 5452 : if (IsolationIsSerializable())
319 3280 : CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData);
320 : else
321 2172 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
322 : /* Make a saved copy */
323 5452 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
324 5452 : FirstXactSnapshot = CurrentSnapshot;
325 : /* Mark it as "registered" in FirstXactSnapshot */
326 5452 : FirstXactSnapshot->regd_count++;
327 5452 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
328 : }
329 : else
330 731806 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
331 :
332 737258 : FirstSnapshotSet = true;
333 737258 : return CurrentSnapshot;
334 : }
335 :
336 1139800 : if (IsolationUsesXactSnapshot())
337 139378 : return CurrentSnapshot;
338 :
339 : /* Don't allow catalog snapshot to be older than xact snapshot. */
340 1000422 : InvalidateCatalogSnapshot();
341 :
342 1000422 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
343 :
344 1000422 : return CurrentSnapshot;
345 : }
346 :
347 : /*
348 : * GetLatestSnapshot
349 : * Get a snapshot that is up-to-date as of the current instant,
350 : * even if we are executing in transaction-snapshot mode.
351 : */
352 : Snapshot
353 152600 : GetLatestSnapshot(void)
354 : {
355 : /*
356 : * We might be able to relax this, but nothing that could otherwise work
357 : * needs it.
358 : */
359 152600 : if (IsInParallelMode())
360 0 : elog(ERROR,
361 : "cannot update SecondarySnapshot during a parallel operation");
362 :
363 : /*
364 : * So far there are no cases requiring support for GetLatestSnapshot()
365 : * during logical decoding, but it wouldn't be hard to add if required.
366 : */
367 : Assert(!HistoricSnapshotActive());
368 :
369 : /* If first call in transaction, go ahead and set the xact snapshot */
370 152600 : if (!FirstSnapshotSet)
371 100 : return GetTransactionSnapshot();
372 :
373 152500 : SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
374 :
375 152500 : return SecondarySnapshot;
376 : }
377 :
378 : /*
379 : * GetCatalogSnapshot
380 : * Get a snapshot that is sufficiently up-to-date for scan of the
381 : * system catalog with the specified OID.
382 : */
383 : Snapshot
384 14383294 : GetCatalogSnapshot(Oid relid)
385 : {
386 : /*
387 : * Return historic snapshot while we're doing logical decoding, so we can
388 : * see the appropriate state of the catalog.
389 : *
390 : * This is the primary reason for needing to reset the system caches after
391 : * finishing decoding.
392 : */
393 14383294 : if (HistoricSnapshotActive())
394 32414 : return HistoricSnapshot;
395 :
396 14350880 : return GetNonHistoricCatalogSnapshot(relid);
397 : }
398 :
399 : /*
400 : * GetNonHistoricCatalogSnapshot
401 : * Get a snapshot that is sufficiently up-to-date for scan of the system
402 : * catalog with the specified OID, even while historic snapshots are set
403 : * up.
404 : */
405 : Snapshot
406 14354228 : GetNonHistoricCatalogSnapshot(Oid relid)
407 : {
408 : /*
409 : * If the caller is trying to scan a relation that has no syscache, no
410 : * catcache invalidations will be sent when it is updated. For a few key
411 : * relations, snapshot invalidations are sent instead. If we're trying to
412 : * scan a relation for which neither catcache nor snapshot invalidations
413 : * are sent, we must refresh the snapshot every time.
414 : */
415 14354228 : if (CatalogSnapshot &&
416 12514032 : !RelationInvalidatesSnapshotsOnly(relid) &&
417 10955326 : !RelationHasSysCache(relid))
418 486088 : InvalidateCatalogSnapshot();
419 :
420 14354228 : if (CatalogSnapshot == NULL)
421 : {
422 : /* Get new snapshot. */
423 2326284 : CatalogSnapshot = GetSnapshotData(&CatalogSnapshotData);
424 :
425 : /*
426 : * Make sure the catalog snapshot will be accounted for in decisions
427 : * about advancing PGPROC->xmin. We could apply RegisterSnapshot, but
428 : * that would result in making a physical copy, which is overkill; and
429 : * it would also create a dependency on some resource owner, which we
430 : * do not want for reasons explained at the head of this file. Instead
431 : * just shove the CatalogSnapshot into the pairing heap manually. This
432 : * has to be reversed in InvalidateCatalogSnapshot, of course.
433 : *
434 : * NB: it had better be impossible for this to throw error, since the
435 : * CatalogSnapshot pointer is already valid.
436 : */
437 2326284 : pairingheap_add(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
438 : }
439 :
440 14354228 : return CatalogSnapshot;
441 : }
442 :
443 : /*
444 : * InvalidateCatalogSnapshot
445 : * Mark the current catalog snapshot, if any, as invalid
446 : *
447 : * We could change this API to allow the caller to provide more fine-grained
448 : * invalidation details, so that a change to relation A wouldn't prevent us
449 : * from using our cached snapshot to scan relation B, but so far there's no
450 : * evidence that the CPU cycles we spent tracking such fine details would be
451 : * well-spent.
452 : */
453 : void
454 28251166 : InvalidateCatalogSnapshot(void)
455 : {
456 28251166 : if (CatalogSnapshot)
457 : {
458 2326284 : pairingheap_remove(&RegisteredSnapshots, &CatalogSnapshot->ph_node);
459 2326284 : CatalogSnapshot = NULL;
460 2326284 : SnapshotResetXmin();
461 : }
462 28251166 : }
463 :
464 : /*
465 : * InvalidateCatalogSnapshotConditionally
466 : * Drop catalog snapshot if it's the only one we have
467 : *
468 : * This is called when we are about to wait for client input, so we don't
469 : * want to continue holding the catalog snapshot if it might mean that the
470 : * global xmin horizon can't advance. However, if there are other snapshots
471 : * still active or registered, the catalog snapshot isn't likely to be the
472 : * oldest one, so we might as well keep it.
473 : */
474 : void
475 795904 : InvalidateCatalogSnapshotConditionally(void)
476 : {
477 795904 : if (CatalogSnapshot &&
478 106886 : ActiveSnapshot == NULL &&
479 105208 : pairingheap_is_singular(&RegisteredSnapshots))
480 18316 : InvalidateCatalogSnapshot();
481 795904 : }
482 :
483 : /*
484 : * SnapshotSetCommandId
485 : * Propagate CommandCounterIncrement into the static snapshots, if set
486 : */
487 : void
488 1148282 : SnapshotSetCommandId(CommandId curcid)
489 : {
490 1148282 : if (!FirstSnapshotSet)
491 20172 : return;
492 :
493 1128110 : if (CurrentSnapshot)
494 1128110 : CurrentSnapshot->curcid = curcid;
495 1128110 : if (SecondarySnapshot)
496 157924 : SecondarySnapshot->curcid = curcid;
497 : /* Should we do the same with CatalogSnapshot? */
498 : }
499 :
500 : /*
501 : * SetTransactionSnapshot
502 : * Set the transaction's snapshot from an imported MVCC snapshot.
503 : *
504 : * Note that this is very closely tied to GetTransactionSnapshot --- it
505 : * must take care of all the same considerations as the first-snapshot case
506 : * in GetTransactionSnapshot.
507 : */
508 : static void
509 3154 : SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
510 : int sourcepid, PGPROC *sourceproc)
511 : {
512 : /* Caller should have checked this already */
513 : Assert(!FirstSnapshotSet);
514 :
515 : /* Better do this to ensure following Assert succeeds. */
516 3154 : InvalidateCatalogSnapshot();
517 :
518 : Assert(pairingheap_is_empty(&RegisteredSnapshots));
519 : Assert(FirstXactSnapshot == NULL);
520 : Assert(!HistoricSnapshotActive());
521 :
522 : /*
523 : * Even though we are not going to use the snapshot it computes, we must
524 : * call GetSnapshotData, for two reasons: (1) to be sure that
525 : * CurrentSnapshotData's XID arrays have been allocated, and (2) to update
526 : * the state for GlobalVis*.
527 : */
528 3154 : CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
529 :
530 : /*
531 : * Now copy appropriate fields from the source snapshot.
532 : */
533 3154 : CurrentSnapshot->xmin = sourcesnap->xmin;
534 3154 : CurrentSnapshot->xmax = sourcesnap->xmax;
535 3154 : CurrentSnapshot->xcnt = sourcesnap->xcnt;
536 : Assert(sourcesnap->xcnt <= GetMaxSnapshotXidCount());
537 3154 : if (sourcesnap->xcnt > 0)
538 694 : memcpy(CurrentSnapshot->xip, sourcesnap->xip,
539 694 : sourcesnap->xcnt * sizeof(TransactionId));
540 3154 : CurrentSnapshot->subxcnt = sourcesnap->subxcnt;
541 : Assert(sourcesnap->subxcnt <= GetMaxSnapshotSubxidCount());
542 3154 : if (sourcesnap->subxcnt > 0)
543 0 : memcpy(CurrentSnapshot->subxip, sourcesnap->subxip,
544 0 : sourcesnap->subxcnt * sizeof(TransactionId));
545 3154 : CurrentSnapshot->suboverflowed = sourcesnap->suboverflowed;
546 3154 : CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
547 : /* NB: curcid should NOT be copied, it's a local matter */
548 :
549 3154 : CurrentSnapshot->snapXactCompletionCount = 0;
550 :
551 : /*
552 : * Now we have to fix what GetSnapshotData did with MyProc->xmin and
553 : * TransactionXmin. There is a race condition: to make sure we are not
554 : * causing the global xmin to go backwards, we have to test that the
555 : * source transaction is still running, and that has to be done
556 : * atomically. So let procarray.c do it.
557 : *
558 : * Note: in serializable mode, predicate.c will do this a second time. It
559 : * doesn't seem worth contorting the logic here to avoid two calls,
560 : * especially since it's not clear that predicate.c *must* do this.
561 : */
562 3154 : if (sourceproc != NULL)
563 : {
564 3122 : if (!ProcArrayInstallRestoredXmin(CurrentSnapshot->xmin, sourceproc))
565 0 : ereport(ERROR,
566 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
567 : errmsg("could not import the requested snapshot"),
568 : errdetail("The source transaction is not running anymore.")));
569 : }
570 32 : else if (!ProcArrayInstallImportedXmin(CurrentSnapshot->xmin, sourcevxid))
571 0 : ereport(ERROR,
572 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
573 : errmsg("could not import the requested snapshot"),
574 : errdetail("The source process with PID %d is not running anymore.",
575 : sourcepid)));
576 :
577 : /*
578 : * In transaction-snapshot mode, the first snapshot must live until end of
579 : * xact, so we must make a copy of it. Furthermore, if we're running in
580 : * serializable mode, predicate.c needs to do its own processing.
581 : */
582 3154 : if (IsolationUsesXactSnapshot())
583 : {
584 460 : if (IsolationIsSerializable())
585 26 : SetSerializableTransactionSnapshot(CurrentSnapshot, sourcevxid,
586 : sourcepid);
587 : /* Make a saved copy */
588 460 : CurrentSnapshot = CopySnapshot(CurrentSnapshot);
589 460 : FirstXactSnapshot = CurrentSnapshot;
590 : /* Mark it as "registered" in FirstXactSnapshot */
591 460 : FirstXactSnapshot->regd_count++;
592 460 : pairingheap_add(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
593 : }
594 :
595 3154 : FirstSnapshotSet = true;
596 3154 : }
597 :
598 : /*
599 : * CopySnapshot
600 : * Copy the given snapshot.
601 : *
602 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
603 : * to 0. The returned snapshot has the copied flag set.
604 : */
605 : static Snapshot
606 16420712 : CopySnapshot(Snapshot snapshot)
607 : {
608 : Snapshot newsnap;
609 : Size subxipoff;
610 : Size size;
611 :
612 : Assert(snapshot != InvalidSnapshot);
613 :
614 : /* We allocate any XID arrays needed in the same palloc block. */
615 16420712 : size = subxipoff = sizeof(SnapshotData) +
616 16420712 : snapshot->xcnt * sizeof(TransactionId);
617 16420712 : if (snapshot->subxcnt > 0)
618 127292 : size += snapshot->subxcnt * sizeof(TransactionId);
619 :
620 16420712 : newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
621 16420712 : memcpy(newsnap, snapshot, sizeof(SnapshotData));
622 :
623 16420712 : newsnap->regd_count = 0;
624 16420712 : newsnap->active_count = 0;
625 16420712 : newsnap->copied = true;
626 16420712 : newsnap->snapXactCompletionCount = 0;
627 :
628 : /* setup XID array */
629 16420712 : if (snapshot->xcnt > 0)
630 : {
631 4492998 : newsnap->xip = (TransactionId *) (newsnap + 1);
632 4492998 : memcpy(newsnap->xip, snapshot->xip,
633 4492998 : snapshot->xcnt * sizeof(TransactionId));
634 : }
635 : else
636 11927714 : newsnap->xip = NULL;
637 :
638 : /*
639 : * Setup subXID array. Don't bother to copy it if it had overflowed,
640 : * though, because it's not used anywhere in that case. Except if it's a
641 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
642 : * well in that case, so we mustn't lose them.
643 : */
644 16420712 : if (snapshot->subxcnt > 0 &&
645 127292 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
646 : {
647 127266 : newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
648 127266 : memcpy(newsnap->subxip, snapshot->subxip,
649 127266 : snapshot->subxcnt * sizeof(TransactionId));
650 : }
651 : else
652 16293446 : newsnap->subxip = NULL;
653 :
654 16420712 : return newsnap;
655 : }
656 :
657 : /*
658 : * FreeSnapshot
659 : * Free the memory associated with a snapshot.
660 : */
661 : static void
662 16372340 : FreeSnapshot(Snapshot snapshot)
663 : {
664 : Assert(snapshot->regd_count == 0);
665 : Assert(snapshot->active_count == 0);
666 : Assert(snapshot->copied);
667 :
668 16372340 : pfree(snapshot);
669 16372340 : }
670 :
671 : /*
672 : * PushActiveSnapshot
673 : * Set the given snapshot as the current active snapshot
674 : *
675 : * If the passed snapshot is a statically-allocated one, or it is possibly
676 : * subject to a future command counter update, create a new long-lived copy
677 : * with active refcount=1. Otherwise, only increment the refcount.
678 : */
679 : void
680 2036046 : PushActiveSnapshot(Snapshot snapshot)
681 : {
682 2036046 : PushActiveSnapshotWithLevel(snapshot, GetCurrentTransactionNestLevel());
683 2036046 : }
684 :
685 : /*
686 : * PushActiveSnapshotWithLevel
687 : * Set the given snapshot as the current active snapshot
688 : *
689 : * Same as PushActiveSnapshot except that caller can specify the
690 : * transaction nesting level that "owns" the snapshot. This level
691 : * must not be deeper than the current top of the snapshot stack.
692 : */
693 : void
694 2330814 : PushActiveSnapshotWithLevel(Snapshot snapshot, int snap_level)
695 : {
696 : ActiveSnapshotElt *newactive;
697 :
698 : Assert(snapshot != InvalidSnapshot);
699 : Assert(ActiveSnapshot == NULL || snap_level >= ActiveSnapshot->as_level);
700 :
701 2330814 : newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
702 :
703 : /*
704 : * Checking SecondarySnapshot is probably useless here, but it seems
705 : * better to be sure.
706 : */
707 2330814 : if (snapshot == CurrentSnapshot || snapshot == SecondarySnapshot ||
708 466462 : !snapshot->copied)
709 1864352 : newactive->as_snap = CopySnapshot(snapshot);
710 : else
711 466462 : newactive->as_snap = snapshot;
712 :
713 2330814 : newactive->as_next = ActiveSnapshot;
714 2330814 : newactive->as_level = snap_level;
715 :
716 2330814 : newactive->as_snap->active_count++;
717 :
718 2330814 : ActiveSnapshot = newactive;
719 2330814 : }
720 :
721 : /*
722 : * PushCopiedSnapshot
723 : * As above, except forcibly copy the presented snapshot.
724 : *
725 : * This should be used when the ActiveSnapshot has to be modifiable, for
726 : * example if the caller intends to call UpdateActiveSnapshotCommandId.
727 : * The new snapshot will be released when popped from the stack.
728 : */
729 : void
730 117470 : PushCopiedSnapshot(Snapshot snapshot)
731 : {
732 117470 : PushActiveSnapshot(CopySnapshot(snapshot));
733 117470 : }
734 :
735 : /*
736 : * UpdateActiveSnapshotCommandId
737 : *
738 : * Update the current CID of the active snapshot. This can only be applied
739 : * to a snapshot that is not referenced elsewhere.
740 : */
741 : void
742 122366 : UpdateActiveSnapshotCommandId(void)
743 : {
744 : CommandId save_curcid,
745 : curcid;
746 :
747 : Assert(ActiveSnapshot != NULL);
748 : Assert(ActiveSnapshot->as_snap->active_count == 1);
749 : Assert(ActiveSnapshot->as_snap->regd_count == 0);
750 :
751 : /*
752 : * Don't allow modification of the active snapshot during parallel
753 : * operation. We share the snapshot to worker backends at the beginning
754 : * of parallel operation, so any change to the snapshot can lead to
755 : * inconsistencies. We have other defenses against
756 : * CommandCounterIncrement, but there are a few places that call this
757 : * directly, so we put an additional guard here.
758 : */
759 122366 : save_curcid = ActiveSnapshot->as_snap->curcid;
760 122366 : curcid = GetCurrentCommandId(false);
761 122366 : if (IsInParallelMode() && save_curcid != curcid)
762 0 : elog(ERROR, "cannot modify commandid in active snapshot during a parallel operation");
763 122366 : ActiveSnapshot->as_snap->curcid = curcid;
764 122366 : }
765 :
766 : /*
767 : * PopActiveSnapshot
768 : *
769 : * Remove the topmost snapshot from the active snapshot stack, decrementing the
770 : * reference count, and free it if this was the last reference.
771 : */
772 : void
773 2275938 : PopActiveSnapshot(void)
774 : {
775 : ActiveSnapshotElt *newstack;
776 :
777 2275938 : newstack = ActiveSnapshot->as_next;
778 :
779 : Assert(ActiveSnapshot->as_snap->active_count > 0);
780 :
781 2275938 : ActiveSnapshot->as_snap->active_count--;
782 :
783 2275938 : if (ActiveSnapshot->as_snap->active_count == 0 &&
784 2239482 : ActiveSnapshot->as_snap->regd_count == 0)
785 1662732 : FreeSnapshot(ActiveSnapshot->as_snap);
786 :
787 2275938 : pfree(ActiveSnapshot);
788 2275938 : ActiveSnapshot = newstack;
789 :
790 2275938 : SnapshotResetXmin();
791 2275938 : }
792 :
793 : /*
794 : * GetActiveSnapshot
795 : * Return the topmost snapshot in the Active stack.
796 : */
797 : Snapshot
798 1088078 : GetActiveSnapshot(void)
799 : {
800 : Assert(ActiveSnapshot != NULL);
801 :
802 1088078 : return ActiveSnapshot->as_snap;
803 : }
804 :
805 : /*
806 : * ActiveSnapshotSet
807 : * Return whether there is at least one snapshot in the Active stack
808 : */
809 : bool
810 1077586 : ActiveSnapshotSet(void)
811 : {
812 1077586 : return ActiveSnapshot != NULL;
813 : }
814 :
815 : /*
816 : * RegisterSnapshot
817 : * Register a snapshot as being in use by the current resource owner
818 : *
819 : * If InvalidSnapshot is passed, it is not registered.
820 : */
821 : Snapshot
822 16832160 : RegisterSnapshot(Snapshot snapshot)
823 : {
824 16832160 : if (snapshot == InvalidSnapshot)
825 1182532 : return InvalidSnapshot;
826 :
827 15649628 : return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
828 : }
829 :
830 : /*
831 : * RegisterSnapshotOnOwner
832 : * As above, but use the specified resource owner
833 : */
834 : Snapshot
835 15649848 : RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
836 : {
837 : Snapshot snap;
838 :
839 15649848 : if (snapshot == InvalidSnapshot)
840 0 : return InvalidSnapshot;
841 :
842 : /* Static snapshot? Create a persistent copy */
843 15649848 : snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
844 :
845 : /* and tell resowner.c about it */
846 15649848 : ResourceOwnerEnlarge(owner);
847 15649848 : snap->regd_count++;
848 15649848 : ResourceOwnerRememberSnapshot(owner, snap);
849 :
850 15649848 : if (snap->regd_count == 1)
851 15019182 : pairingheap_add(&RegisteredSnapshots, &snap->ph_node);
852 :
853 15649848 : return snap;
854 : }
855 :
856 : /*
857 : * UnregisterSnapshot
858 : *
859 : * Decrement the reference count of a snapshot, remove the corresponding
860 : * reference from CurrentResourceOwner, and free the snapshot if no more
861 : * references remain.
862 : */
863 : void
864 16673370 : UnregisterSnapshot(Snapshot snapshot)
865 : {
866 16673370 : if (snapshot == NULL)
867 1125298 : return;
868 :
869 15548072 : UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
870 : }
871 :
872 : /*
873 : * UnregisterSnapshotFromOwner
874 : * As above, but use the specified resource owner
875 : */
876 : void
877 15590864 : UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
878 : {
879 15590864 : if (snapshot == NULL)
880 0 : return;
881 :
882 15590864 : ResourceOwnerForgetSnapshot(owner, snapshot);
883 15590864 : UnregisterSnapshotNoOwner(snapshot);
884 : }
885 :
886 : static void
887 15649848 : UnregisterSnapshotNoOwner(Snapshot snapshot)
888 : {
889 : Assert(snapshot->regd_count > 0);
890 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
891 :
892 15649848 : snapshot->regd_count--;
893 15649848 : if (snapshot->regd_count == 0)
894 15019182 : pairingheap_remove(&RegisteredSnapshots, &snapshot->ph_node);
895 :
896 15649848 : if (snapshot->regd_count == 0 && snapshot->active_count == 0)
897 : {
898 14703888 : FreeSnapshot(snapshot);
899 14703888 : SnapshotResetXmin();
900 : }
901 15649848 : }
902 :
903 : /*
904 : * Comparison function for RegisteredSnapshots heap. Snapshots are ordered
905 : * by xmin, so that the snapshot with smallest xmin is at the top.
906 : */
907 : static int
908 15011852 : xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg)
909 : {
910 15011852 : const SnapshotData *asnap = pairingheap_const_container(SnapshotData, ph_node, a);
911 15011852 : const SnapshotData *bsnap = pairingheap_const_container(SnapshotData, ph_node, b);
912 :
913 15011852 : if (TransactionIdPrecedes(asnap->xmin, bsnap->xmin))
914 77936 : return 1;
915 14933916 : else if (TransactionIdFollows(asnap->xmin, bsnap->xmin))
916 21410 : return -1;
917 : else
918 14912506 : return 0;
919 : }
920 :
921 : /*
922 : * SnapshotResetXmin
923 : *
924 : * If there are no more snapshots, we can reset our PGPROC->xmin to
925 : * InvalidTransactionId. Note we can do this without locking because we assume
926 : * that storing an Xid is atomic.
927 : *
928 : * Even if there are some remaining snapshots, we may be able to advance our
929 : * PGPROC->xmin to some degree. This typically happens when a portal is
930 : * dropped. For efficiency, we only consider recomputing PGPROC->xmin when
931 : * the active snapshot stack is empty; this allows us not to need to track
932 : * which active snapshot is oldest.
933 : */
934 : static void
935 19365952 : SnapshotResetXmin(void)
936 : {
937 : Snapshot minSnapshot;
938 :
939 19365952 : if (ActiveSnapshot != NULL)
940 13842670 : return;
941 :
942 5523282 : if (pairingheap_is_empty(&RegisteredSnapshots))
943 : {
944 1772128 : MyProc->xmin = TransactionXmin = InvalidTransactionId;
945 1772128 : return;
946 : }
947 :
948 3751154 : minSnapshot = pairingheap_container(SnapshotData, ph_node,
949 : pairingheap_first(&RegisteredSnapshots));
950 :
951 3751154 : if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
952 7358 : MyProc->xmin = TransactionXmin = minSnapshot->xmin;
953 : }
954 :
955 : /*
956 : * AtSubCommit_Snapshot
957 : */
958 : void
959 10660 : AtSubCommit_Snapshot(int level)
960 : {
961 : ActiveSnapshotElt *active;
962 :
963 : /*
964 : * Relabel the active snapshots set in this subtransaction as though they
965 : * are owned by the parent subxact.
966 : */
967 10660 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
968 : {
969 9094 : if (active->as_level < level)
970 9094 : break;
971 0 : active->as_level = level - 1;
972 : }
973 10660 : }
974 :
975 : /*
976 : * AtSubAbort_Snapshot
977 : * Clean up snapshots after a subtransaction abort
978 : */
979 : void
980 9382 : AtSubAbort_Snapshot(int level)
981 : {
982 : /* Forget the active snapshots set by this subtransaction */
983 15102 : while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
984 : {
985 : ActiveSnapshotElt *next;
986 :
987 5720 : next = ActiveSnapshot->as_next;
988 :
989 : /*
990 : * Decrement the snapshot's active count. If it's still registered or
991 : * marked as active by an outer subtransaction, we can't free it yet.
992 : */
993 : Assert(ActiveSnapshot->as_snap->active_count >= 1);
994 5720 : ActiveSnapshot->as_snap->active_count -= 1;
995 :
996 5720 : if (ActiveSnapshot->as_snap->active_count == 0 &&
997 5720 : ActiveSnapshot->as_snap->regd_count == 0)
998 5720 : FreeSnapshot(ActiveSnapshot->as_snap);
999 :
1000 : /* and free the stack element */
1001 5720 : pfree(ActiveSnapshot);
1002 :
1003 5720 : ActiveSnapshot = next;
1004 : }
1005 :
1006 9382 : SnapshotResetXmin();
1007 9382 : }
1008 :
1009 : /*
1010 : * AtEOXact_Snapshot
1011 : * Snapshot manager's cleanup function for end of transaction
1012 : */
1013 : void
1014 1026420 : AtEOXact_Snapshot(bool isCommit, bool resetXmin)
1015 : {
1016 : /*
1017 : * In transaction-snapshot mode we must release our privately-managed
1018 : * reference to the transaction snapshot. We must remove it from
1019 : * RegisteredSnapshots to keep the check below happy. But we don't bother
1020 : * to do FreeSnapshot, for two reasons: the memory will go away with
1021 : * TopTransactionContext anyway, and if someone has left the snapshot
1022 : * stacked as active, we don't want the code below to be chasing through a
1023 : * dangling pointer.
1024 : */
1025 1026420 : if (FirstXactSnapshot != NULL)
1026 : {
1027 : Assert(FirstXactSnapshot->regd_count > 0);
1028 : Assert(!pairingheap_is_empty(&RegisteredSnapshots));
1029 5912 : pairingheap_remove(&RegisteredSnapshots, &FirstXactSnapshot->ph_node);
1030 : }
1031 1026420 : FirstXactSnapshot = NULL;
1032 :
1033 : /*
1034 : * If we exported any snapshots, clean them up.
1035 : */
1036 1026420 : if (exportedSnapshots != NIL)
1037 : {
1038 : ListCell *lc;
1039 :
1040 : /*
1041 : * Get rid of the files. Unlink failure is only a WARNING because (1)
1042 : * it's too late to abort the transaction, and (2) leaving a leaked
1043 : * file around has little real consequence anyway.
1044 : *
1045 : * We also need to remove the snapshots from RegisteredSnapshots to
1046 : * prevent a warning below.
1047 : *
1048 : * As with the FirstXactSnapshot, we don't need to free resources of
1049 : * the snapshot itself as it will go away with the memory context.
1050 : */
1051 36 : foreach(lc, exportedSnapshots)
1052 : {
1053 18 : ExportedSnapshot *esnap = (ExportedSnapshot *) lfirst(lc);
1054 :
1055 18 : if (unlink(esnap->snapfile))
1056 0 : elog(WARNING, "could not unlink file \"%s\": %m",
1057 : esnap->snapfile);
1058 :
1059 18 : pairingheap_remove(&RegisteredSnapshots,
1060 18 : &esnap->snapshot->ph_node);
1061 : }
1062 :
1063 18 : exportedSnapshots = NIL;
1064 : }
1065 :
1066 : /* Drop catalog snapshot if any */
1067 1026420 : InvalidateCatalogSnapshot();
1068 :
1069 : /* On commit, complain about leftover snapshots */
1070 1026420 : if (isCommit)
1071 : {
1072 : ActiveSnapshotElt *active;
1073 :
1074 976568 : if (!pairingheap_is_empty(&RegisteredSnapshots))
1075 0 : elog(WARNING, "registered snapshots seem to remain after cleanup");
1076 :
1077 : /* complain about unpopped active snapshots */
1078 976568 : for (active = ActiveSnapshot; active != NULL; active = active->as_next)
1079 0 : elog(WARNING, "snapshot %p still active", active);
1080 : }
1081 :
1082 : /*
1083 : * And reset our state. We don't need to free the memory explicitly --
1084 : * it'll go away with TopTransactionContext.
1085 : */
1086 1026420 : ActiveSnapshot = NULL;
1087 1026420 : pairingheap_reset(&RegisteredSnapshots);
1088 :
1089 1026420 : CurrentSnapshot = NULL;
1090 1026420 : SecondarySnapshot = NULL;
1091 :
1092 1026420 : FirstSnapshotSet = false;
1093 :
1094 : /*
1095 : * During normal commit processing, we call ProcArrayEndTransaction() to
1096 : * reset the MyProc->xmin. That call happens prior to the call to
1097 : * AtEOXact_Snapshot(), so we need not touch xmin here at all.
1098 : */
1099 1026420 : if (resetXmin)
1100 50460 : SnapshotResetXmin();
1101 :
1102 : Assert(resetXmin || MyProc->xmin == 0);
1103 1026420 : }
1104 :
1105 :
1106 : /*
1107 : * ExportSnapshot
1108 : * Export the snapshot to a file so that other backends can import it.
1109 : * Returns the token (the file name) that can be used to import this
1110 : * snapshot.
1111 : */
1112 : char *
1113 18 : ExportSnapshot(Snapshot snapshot)
1114 : {
1115 : TransactionId topXid;
1116 : TransactionId *children;
1117 : ExportedSnapshot *esnap;
1118 : int nchildren;
1119 : int addTopXid;
1120 : StringInfoData buf;
1121 : FILE *f;
1122 : int i;
1123 : MemoryContext oldcxt;
1124 : char path[MAXPGPATH];
1125 : char pathtmp[MAXPGPATH];
1126 :
1127 : /*
1128 : * It's tempting to call RequireTransactionBlock here, since it's not very
1129 : * useful to export a snapshot that will disappear immediately afterwards.
1130 : * However, we haven't got enough information to do that, since we don't
1131 : * know if we're at top level or not. For example, we could be inside a
1132 : * plpgsql function that is going to fire off other transactions via
1133 : * dblink. Rather than disallow perfectly legitimate usages, don't make a
1134 : * check.
1135 : *
1136 : * Also note that we don't make any restriction on the transaction's
1137 : * isolation level; however, importers must check the level if they are
1138 : * serializable.
1139 : */
1140 :
1141 : /*
1142 : * Get our transaction ID if there is one, to include in the snapshot.
1143 : */
1144 18 : topXid = GetTopTransactionIdIfAny();
1145 :
1146 : /*
1147 : * We cannot export a snapshot from a subtransaction because there's no
1148 : * easy way for importers to verify that the same subtransaction is still
1149 : * running.
1150 : */
1151 18 : if (IsSubTransaction())
1152 0 : ereport(ERROR,
1153 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1154 : errmsg("cannot export a snapshot from a subtransaction")));
1155 :
1156 : /*
1157 : * We do however allow previous committed subtransactions to exist.
1158 : * Importers of the snapshot must see them as still running, so get their
1159 : * XIDs to add them to the snapshot.
1160 : */
1161 18 : nchildren = xactGetCommittedChildren(&children);
1162 :
1163 : /*
1164 : * Generate file path for the snapshot. We start numbering of snapshots
1165 : * inside the transaction from 1.
1166 : */
1167 18 : snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
1168 18 : MyProc->vxid.procNumber, MyProc->vxid.lxid,
1169 18 : list_length(exportedSnapshots) + 1);
1170 :
1171 : /*
1172 : * Copy the snapshot into TopTransactionContext, add it to the
1173 : * exportedSnapshots list, and mark it pseudo-registered. We do this to
1174 : * ensure that the snapshot's xmin is honored for the rest of the
1175 : * transaction.
1176 : */
1177 18 : snapshot = CopySnapshot(snapshot);
1178 :
1179 18 : oldcxt = MemoryContextSwitchTo(TopTransactionContext);
1180 18 : esnap = (ExportedSnapshot *) palloc(sizeof(ExportedSnapshot));
1181 18 : esnap->snapfile = pstrdup(path);
1182 18 : esnap->snapshot = snapshot;
1183 18 : exportedSnapshots = lappend(exportedSnapshots, esnap);
1184 18 : MemoryContextSwitchTo(oldcxt);
1185 :
1186 18 : snapshot->regd_count++;
1187 18 : pairingheap_add(&RegisteredSnapshots, &snapshot->ph_node);
1188 :
1189 : /*
1190 : * Fill buf with a text serialization of the snapshot, plus identification
1191 : * data about this transaction. The format expected by ImportSnapshot is
1192 : * pretty rigid: each line must be fieldname:value.
1193 : */
1194 18 : initStringInfo(&buf);
1195 :
1196 18 : appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->vxid.procNumber, MyProc->vxid.lxid);
1197 18 : appendStringInfo(&buf, "pid:%d\n", MyProcPid);
1198 18 : appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId);
1199 18 : appendStringInfo(&buf, "iso:%d\n", XactIsoLevel);
1200 18 : appendStringInfo(&buf, "ro:%d\n", XactReadOnly);
1201 :
1202 18 : appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin);
1203 18 : appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax);
1204 :
1205 : /*
1206 : * We must include our own top transaction ID in the top-xid data, since
1207 : * by definition we will still be running when the importing transaction
1208 : * adopts the snapshot, but GetSnapshotData never includes our own XID in
1209 : * the snapshot. (There must, therefore, be enough room to add it.)
1210 : *
1211 : * However, it could be that our topXid is after the xmax, in which case
1212 : * we shouldn't include it because xip[] members are expected to be before
1213 : * xmax. (We need not make the same check for subxip[] members, see
1214 : * snapshot.h.)
1215 : */
1216 18 : addTopXid = (TransactionIdIsValid(topXid) &&
1217 18 : TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0;
1218 18 : appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid);
1219 18 : for (i = 0; i < snapshot->xcnt; i++)
1220 0 : appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]);
1221 18 : if (addTopXid)
1222 0 : appendStringInfo(&buf, "xip:%u\n", topXid);
1223 :
1224 : /*
1225 : * Similarly, we add our subcommitted child XIDs to the subxid data. Here,
1226 : * we have to cope with possible overflow.
1227 : */
1228 36 : if (snapshot->suboverflowed ||
1229 18 : snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount())
1230 0 : appendStringInfoString(&buf, "sof:1\n");
1231 : else
1232 : {
1233 18 : appendStringInfoString(&buf, "sof:0\n");
1234 18 : appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren);
1235 18 : for (i = 0; i < snapshot->subxcnt; i++)
1236 0 : appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]);
1237 18 : for (i = 0; i < nchildren; i++)
1238 0 : appendStringInfo(&buf, "sxp:%u\n", children[i]);
1239 : }
1240 18 : appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery);
1241 :
1242 : /*
1243 : * Now write the text representation into a file. We first write to a
1244 : * ".tmp" filename, and rename to final filename if no error. This
1245 : * ensures that no other backend can read an incomplete file
1246 : * (ImportSnapshot won't allow it because of its valid-characters check).
1247 : */
1248 18 : snprintf(pathtmp, sizeof(pathtmp), "%s.tmp", path);
1249 18 : if (!(f = AllocateFile(pathtmp, PG_BINARY_W)))
1250 0 : ereport(ERROR,
1251 : (errcode_for_file_access(),
1252 : errmsg("could not create file \"%s\": %m", pathtmp)));
1253 :
1254 18 : if (fwrite(buf.data, buf.len, 1, f) != 1)
1255 0 : ereport(ERROR,
1256 : (errcode_for_file_access(),
1257 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1258 :
1259 : /* no fsync() since file need not survive a system crash */
1260 :
1261 18 : if (FreeFile(f))
1262 0 : ereport(ERROR,
1263 : (errcode_for_file_access(),
1264 : errmsg("could not write to file \"%s\": %m", pathtmp)));
1265 :
1266 : /*
1267 : * Now that we have written everything into a .tmp file, rename the file
1268 : * to remove the .tmp suffix.
1269 : */
1270 18 : if (rename(pathtmp, path) < 0)
1271 0 : ereport(ERROR,
1272 : (errcode_for_file_access(),
1273 : errmsg("could not rename file \"%s\" to \"%s\": %m",
1274 : pathtmp, path)));
1275 :
1276 : /*
1277 : * The basename of the file is what we return from pg_export_snapshot().
1278 : * It's already in path in a textual format and we know that the path
1279 : * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash
1280 : * and pstrdup it so as not to return the address of a local variable.
1281 : */
1282 18 : return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1);
1283 : }
1284 :
1285 : /*
1286 : * pg_export_snapshot
1287 : * SQL-callable wrapper for ExportSnapshot.
1288 : */
1289 : Datum
1290 16 : pg_export_snapshot(PG_FUNCTION_ARGS)
1291 : {
1292 : char *snapshotName;
1293 :
1294 16 : snapshotName = ExportSnapshot(GetActiveSnapshot());
1295 16 : PG_RETURN_TEXT_P(cstring_to_text(snapshotName));
1296 : }
1297 :
1298 :
1299 : /*
1300 : * Parsing subroutines for ImportSnapshot: parse a line with the given
1301 : * prefix followed by a value, and advance *s to the next line. The
1302 : * filename is provided for use in error messages.
1303 : */
1304 : static int
1305 224 : parseIntFromText(const char *prefix, char **s, const char *filename)
1306 : {
1307 224 : char *ptr = *s;
1308 224 : int prefixlen = strlen(prefix);
1309 : int val;
1310 :
1311 224 : if (strncmp(ptr, prefix, prefixlen) != 0)
1312 0 : ereport(ERROR,
1313 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1314 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1315 224 : ptr += prefixlen;
1316 224 : if (sscanf(ptr, "%d", &val) != 1)
1317 0 : ereport(ERROR,
1318 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1319 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1320 224 : ptr = strchr(ptr, '\n');
1321 224 : if (!ptr)
1322 0 : ereport(ERROR,
1323 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1324 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1325 224 : *s = ptr + 1;
1326 224 : return val;
1327 : }
1328 :
1329 : static TransactionId
1330 96 : parseXidFromText(const char *prefix, char **s, const char *filename)
1331 : {
1332 96 : char *ptr = *s;
1333 96 : int prefixlen = strlen(prefix);
1334 : TransactionId val;
1335 :
1336 96 : if (strncmp(ptr, prefix, prefixlen) != 0)
1337 0 : ereport(ERROR,
1338 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1339 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1340 96 : ptr += prefixlen;
1341 96 : if (sscanf(ptr, "%u", &val) != 1)
1342 0 : ereport(ERROR,
1343 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1344 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1345 96 : ptr = strchr(ptr, '\n');
1346 96 : if (!ptr)
1347 0 : ereport(ERROR,
1348 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1349 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1350 96 : *s = ptr + 1;
1351 96 : return val;
1352 : }
1353 :
1354 : static void
1355 32 : parseVxidFromText(const char *prefix, char **s, const char *filename,
1356 : VirtualTransactionId *vxid)
1357 : {
1358 32 : char *ptr = *s;
1359 32 : int prefixlen = strlen(prefix);
1360 :
1361 32 : if (strncmp(ptr, prefix, prefixlen) != 0)
1362 0 : ereport(ERROR,
1363 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1364 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1365 32 : ptr += prefixlen;
1366 32 : if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2)
1367 0 : ereport(ERROR,
1368 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1369 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1370 32 : ptr = strchr(ptr, '\n');
1371 32 : if (!ptr)
1372 0 : ereport(ERROR,
1373 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1374 : errmsg("invalid snapshot data in file \"%s\"", filename)));
1375 32 : *s = ptr + 1;
1376 32 : }
1377 :
1378 : /*
1379 : * ImportSnapshot
1380 : * Import a previously exported snapshot. The argument should be a
1381 : * filename in SNAPSHOT_EXPORT_DIR. Load the snapshot from that file.
1382 : * This is called by "SET TRANSACTION SNAPSHOT 'foo'".
1383 : */
1384 : void
1385 44 : ImportSnapshot(const char *idstr)
1386 : {
1387 : char path[MAXPGPATH];
1388 : FILE *f;
1389 : struct stat stat_buf;
1390 : char *filebuf;
1391 : int xcnt;
1392 : int i;
1393 : VirtualTransactionId src_vxid;
1394 : int src_pid;
1395 : Oid src_dbid;
1396 : int src_isolevel;
1397 : bool src_readonly;
1398 : SnapshotData snapshot;
1399 :
1400 : /*
1401 : * Must be at top level of a fresh transaction. Note in particular that
1402 : * we check we haven't acquired an XID --- if we have, it's conceivable
1403 : * that the snapshot would show it as not running, making for very screwy
1404 : * behavior.
1405 : */
1406 88 : if (FirstSnapshotSet ||
1407 88 : GetTopTransactionIdIfAny() != InvalidTransactionId ||
1408 44 : IsSubTransaction())
1409 0 : ereport(ERROR,
1410 : (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
1411 : errmsg("SET TRANSACTION SNAPSHOT must be called before any query")));
1412 :
1413 : /*
1414 : * If we are in read committed mode then the next query would execute with
1415 : * a new snapshot thus making this function call quite useless.
1416 : */
1417 44 : if (!IsolationUsesXactSnapshot())
1418 0 : ereport(ERROR,
1419 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1420 : errmsg("a snapshot-importing transaction must have isolation level SERIALIZABLE or REPEATABLE READ")));
1421 :
1422 : /*
1423 : * Verify the identifier: only 0-9, A-F and hyphens are allowed. We do
1424 : * this mainly to prevent reading arbitrary files.
1425 : */
1426 44 : if (strspn(idstr, "0123456789ABCDEF-") != strlen(idstr))
1427 6 : ereport(ERROR,
1428 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1429 : errmsg("invalid snapshot identifier: \"%s\"", idstr)));
1430 :
1431 : /* OK, read the file */
1432 38 : snprintf(path, MAXPGPATH, SNAPSHOT_EXPORT_DIR "/%s", idstr);
1433 :
1434 38 : f = AllocateFile(path, PG_BINARY_R);
1435 38 : if (!f)
1436 : {
1437 : /*
1438 : * If file is missing while identifier has a correct format, avoid
1439 : * system errors.
1440 : */
1441 6 : if (errno == ENOENT)
1442 6 : ereport(ERROR,
1443 : (errcode(ERRCODE_UNDEFINED_OBJECT),
1444 : errmsg("snapshot \"%s\" does not exist", idstr)));
1445 : else
1446 0 : ereport(ERROR,
1447 : (errcode_for_file_access(),
1448 : errmsg("could not open file \"%s\" for reading: %m",
1449 : path)));
1450 : }
1451 :
1452 : /* get the size of the file so that we know how much memory we need */
1453 32 : if (fstat(fileno(f), &stat_buf))
1454 0 : elog(ERROR, "could not stat file \"%s\": %m", path);
1455 :
1456 : /* and read the file into a palloc'd string */
1457 32 : filebuf = (char *) palloc(stat_buf.st_size + 1);
1458 32 : if (fread(filebuf, stat_buf.st_size, 1, f) != 1)
1459 0 : elog(ERROR, "could not read file \"%s\": %m", path);
1460 :
1461 32 : filebuf[stat_buf.st_size] = '\0';
1462 :
1463 32 : FreeFile(f);
1464 :
1465 : /*
1466 : * Construct a snapshot struct by parsing the file content.
1467 : */
1468 32 : memset(&snapshot, 0, sizeof(snapshot));
1469 :
1470 32 : parseVxidFromText("vxid:", &filebuf, path, &src_vxid);
1471 32 : src_pid = parseIntFromText("pid:", &filebuf, path);
1472 : /* we abuse parseXidFromText a bit here ... */
1473 32 : src_dbid = parseXidFromText("dbid:", &filebuf, path);
1474 32 : src_isolevel = parseIntFromText("iso:", &filebuf, path);
1475 32 : src_readonly = parseIntFromText("ro:", &filebuf, path);
1476 :
1477 32 : snapshot.snapshot_type = SNAPSHOT_MVCC;
1478 :
1479 32 : snapshot.xmin = parseXidFromText("xmin:", &filebuf, path);
1480 32 : snapshot.xmax = parseXidFromText("xmax:", &filebuf, path);
1481 :
1482 32 : snapshot.xcnt = xcnt = parseIntFromText("xcnt:", &filebuf, path);
1483 :
1484 : /* sanity-check the xid count before palloc */
1485 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotXidCount())
1486 0 : ereport(ERROR,
1487 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1488 : errmsg("invalid snapshot data in file \"%s\"", path)));
1489 :
1490 32 : snapshot.xip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1491 32 : for (i = 0; i < xcnt; i++)
1492 0 : snapshot.xip[i] = parseXidFromText("xip:", &filebuf, path);
1493 :
1494 32 : snapshot.suboverflowed = parseIntFromText("sof:", &filebuf, path);
1495 :
1496 32 : if (!snapshot.suboverflowed)
1497 : {
1498 32 : snapshot.subxcnt = xcnt = parseIntFromText("sxcnt:", &filebuf, path);
1499 :
1500 : /* sanity-check the xid count before palloc */
1501 32 : if (xcnt < 0 || xcnt > GetMaxSnapshotSubxidCount())
1502 0 : ereport(ERROR,
1503 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1504 : errmsg("invalid snapshot data in file \"%s\"", path)));
1505 :
1506 32 : snapshot.subxip = (TransactionId *) palloc(xcnt * sizeof(TransactionId));
1507 32 : for (i = 0; i < xcnt; i++)
1508 0 : snapshot.subxip[i] = parseXidFromText("sxp:", &filebuf, path);
1509 : }
1510 : else
1511 : {
1512 0 : snapshot.subxcnt = 0;
1513 0 : snapshot.subxip = NULL;
1514 : }
1515 :
1516 32 : snapshot.takenDuringRecovery = parseIntFromText("rec:", &filebuf, path);
1517 :
1518 : /*
1519 : * Do some additional sanity checking, just to protect ourselves. We
1520 : * don't trouble to check the array elements, just the most critical
1521 : * fields.
1522 : */
1523 32 : if (!VirtualTransactionIdIsValid(src_vxid) ||
1524 32 : !OidIsValid(src_dbid) ||
1525 32 : !TransactionIdIsNormal(snapshot.xmin) ||
1526 32 : !TransactionIdIsNormal(snapshot.xmax))
1527 0 : ereport(ERROR,
1528 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1529 : errmsg("invalid snapshot data in file \"%s\"", path)));
1530 :
1531 : /*
1532 : * If we're serializable, the source transaction must be too, otherwise
1533 : * predicate.c has problems (SxactGlobalXmin could go backwards). Also, a
1534 : * non-read-only transaction can't adopt a snapshot from a read-only
1535 : * transaction, as predicate.c handles the cases very differently.
1536 : */
1537 32 : if (IsolationIsSerializable())
1538 : {
1539 0 : if (src_isolevel != XACT_SERIALIZABLE)
1540 0 : ereport(ERROR,
1541 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1542 : errmsg("a serializable transaction cannot import a snapshot from a non-serializable transaction")));
1543 0 : if (src_readonly && !XactReadOnly)
1544 0 : ereport(ERROR,
1545 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1546 : errmsg("a non-read-only serializable transaction cannot import a snapshot from a read-only transaction")));
1547 : }
1548 :
1549 : /*
1550 : * We cannot import a snapshot that was taken in a different database,
1551 : * because vacuum calculates OldestXmin on a per-database basis; so the
1552 : * source transaction's xmin doesn't protect us from data loss. This
1553 : * restriction could be removed if the source transaction were to mark its
1554 : * xmin as being globally applicable. But that would require some
1555 : * additional syntax, since that has to be known when the snapshot is
1556 : * initially taken. (See pgsql-hackers discussion of 2011-10-21.)
1557 : */
1558 32 : if (src_dbid != MyDatabaseId)
1559 0 : ereport(ERROR,
1560 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1561 : errmsg("cannot import a snapshot from a different database")));
1562 :
1563 : /* OK, install the snapshot */
1564 32 : SetTransactionSnapshot(&snapshot, &src_vxid, src_pid, NULL);
1565 32 : }
1566 :
1567 : /*
1568 : * XactHasExportedSnapshots
1569 : * Test whether current transaction has exported any snapshots.
1570 : */
1571 : bool
1572 636 : XactHasExportedSnapshots(void)
1573 : {
1574 636 : return (exportedSnapshots != NIL);
1575 : }
1576 :
1577 : /*
1578 : * DeleteAllExportedSnapshotFiles
1579 : * Clean up any files that have been left behind by a crashed backend
1580 : * that had exported snapshots before it died.
1581 : *
1582 : * This should be called during database startup or crash recovery.
1583 : */
1584 : void
1585 428 : DeleteAllExportedSnapshotFiles(void)
1586 : {
1587 : char buf[MAXPGPATH + sizeof(SNAPSHOT_EXPORT_DIR)];
1588 : DIR *s_dir;
1589 : struct dirent *s_de;
1590 :
1591 : /*
1592 : * Problems in reading the directory, or unlinking files, are reported at
1593 : * LOG level. Since we're running in the startup process, ERROR level
1594 : * would prevent database start, and it's not important enough for that.
1595 : */
1596 428 : s_dir = AllocateDir(SNAPSHOT_EXPORT_DIR);
1597 :
1598 1284 : while ((s_de = ReadDirExtended(s_dir, SNAPSHOT_EXPORT_DIR, LOG)) != NULL)
1599 : {
1600 856 : if (strcmp(s_de->d_name, ".") == 0 ||
1601 428 : strcmp(s_de->d_name, "..") == 0)
1602 856 : continue;
1603 :
1604 0 : snprintf(buf, sizeof(buf), SNAPSHOT_EXPORT_DIR "/%s", s_de->d_name);
1605 :
1606 0 : if (unlink(buf) != 0)
1607 0 : ereport(LOG,
1608 : (errcode_for_file_access(),
1609 : errmsg("could not remove file \"%s\": %m", buf)));
1610 : }
1611 :
1612 428 : FreeDir(s_dir);
1613 428 : }
1614 :
1615 : /*
1616 : * ThereAreNoPriorRegisteredSnapshots
1617 : * Is the registered snapshot count less than or equal to one?
1618 : *
1619 : * Don't use this to settle important decisions. While zero registrations and
1620 : * no ActiveSnapshot would confirm a certain idleness, the system makes no
1621 : * guarantees about the significance of one registered snapshot.
1622 : */
1623 : bool
1624 60 : ThereAreNoPriorRegisteredSnapshots(void)
1625 : {
1626 60 : if (pairingheap_is_empty(&RegisteredSnapshots) ||
1627 0 : pairingheap_is_singular(&RegisteredSnapshots))
1628 60 : return true;
1629 :
1630 0 : return false;
1631 : }
1632 :
1633 : /*
1634 : * HaveRegisteredOrActiveSnapshot
1635 : * Is there any registered or active snapshot?
1636 : *
1637 : * NB: Unless pushed or active, the cached catalog snapshot will not cause
1638 : * this function to return true. That allows this function to be used in
1639 : * checks enforcing a longer-lived snapshot.
1640 : */
1641 : bool
1642 48766 : HaveRegisteredOrActiveSnapshot(void)
1643 : {
1644 48766 : if (ActiveSnapshot != NULL)
1645 48344 : return true;
1646 :
1647 : /*
1648 : * The catalog snapshot is in RegisteredSnapshots when valid, but can be
1649 : * removed at any time due to invalidation processing. If explicitly
1650 : * registered more than one snapshot has to be in RegisteredSnapshots.
1651 : */
1652 422 : if (CatalogSnapshot != NULL &&
1653 32 : pairingheap_is_singular(&RegisteredSnapshots))
1654 0 : return false;
1655 :
1656 422 : return !pairingheap_is_empty(&RegisteredSnapshots);
1657 : }
1658 :
1659 :
1660 : /*
1661 : * Setup a snapshot that replaces normal catalog snapshots that allows catalog
1662 : * access to behave just like it did at a certain point in the past.
1663 : *
1664 : * Needed for logical decoding.
1665 : */
1666 : void
1667 9282 : SetupHistoricSnapshot(Snapshot historic_snapshot, HTAB *tuplecids)
1668 : {
1669 : Assert(historic_snapshot != NULL);
1670 :
1671 : /* setup the timetravel snapshot */
1672 9282 : HistoricSnapshot = historic_snapshot;
1673 :
1674 : /* setup (cmin, cmax) lookup hash */
1675 9282 : tuplecid_data = tuplecids;
1676 9282 : }
1677 :
1678 :
1679 : /*
1680 : * Make catalog snapshots behave normally again.
1681 : */
1682 : void
1683 9220 : TeardownHistoricSnapshot(bool is_error)
1684 : {
1685 9220 : HistoricSnapshot = NULL;
1686 9220 : tuplecid_data = NULL;
1687 9220 : }
1688 :
1689 : bool
1690 20807982 : HistoricSnapshotActive(void)
1691 : {
1692 20807982 : return HistoricSnapshot != NULL;
1693 : }
1694 :
1695 : HTAB *
1696 1534 : HistoricSnapshotGetTupleCids(void)
1697 : {
1698 : Assert(HistoricSnapshotActive());
1699 1534 : return tuplecid_data;
1700 : }
1701 :
1702 : /*
1703 : * EstimateSnapshotSpace
1704 : * Returns the size needed to store the given snapshot.
1705 : *
1706 : * We are exporting only required fields from the Snapshot, stored in
1707 : * SerializedSnapshotData.
1708 : */
1709 : Size
1710 2506 : EstimateSnapshotSpace(Snapshot snapshot)
1711 : {
1712 : Size size;
1713 :
1714 : Assert(snapshot != InvalidSnapshot);
1715 : Assert(snapshot->snapshot_type == SNAPSHOT_MVCC);
1716 :
1717 : /* We allocate any XID arrays needed in the same palloc block. */
1718 2506 : size = add_size(sizeof(SerializedSnapshotData),
1719 2506 : mul_size(snapshot->xcnt, sizeof(TransactionId)));
1720 2506 : if (snapshot->subxcnt > 0 &&
1721 0 : (!snapshot->suboverflowed || snapshot->takenDuringRecovery))
1722 0 : size = add_size(size,
1723 0 : mul_size(snapshot->subxcnt, sizeof(TransactionId)));
1724 :
1725 2506 : return size;
1726 : }
1727 :
1728 : /*
1729 : * SerializeSnapshot
1730 : * Dumps the serialized snapshot (extracted from given snapshot) onto the
1731 : * memory location at start_address.
1732 : */
1733 : void
1734 2172 : SerializeSnapshot(Snapshot snapshot, char *start_address)
1735 : {
1736 : SerializedSnapshotData serialized_snapshot;
1737 :
1738 : Assert(snapshot->subxcnt >= 0);
1739 :
1740 : /* Copy all required fields */
1741 2172 : serialized_snapshot.xmin = snapshot->xmin;
1742 2172 : serialized_snapshot.xmax = snapshot->xmax;
1743 2172 : serialized_snapshot.xcnt = snapshot->xcnt;
1744 2172 : serialized_snapshot.subxcnt = snapshot->subxcnt;
1745 2172 : serialized_snapshot.suboverflowed = snapshot->suboverflowed;
1746 2172 : serialized_snapshot.takenDuringRecovery = snapshot->takenDuringRecovery;
1747 2172 : serialized_snapshot.curcid = snapshot->curcid;
1748 :
1749 : /*
1750 : * Ignore the SubXID array if it has overflowed, unless the snapshot was
1751 : * taken during recovery - in that case, top-level XIDs are in subxip as
1752 : * well, and we mustn't lose them.
1753 : */
1754 2172 : if (serialized_snapshot.suboverflowed && !snapshot->takenDuringRecovery)
1755 0 : serialized_snapshot.subxcnt = 0;
1756 :
1757 : /* Copy struct to possibly-unaligned buffer */
1758 2172 : memcpy(start_address,
1759 : &serialized_snapshot, sizeof(SerializedSnapshotData));
1760 :
1761 : /* Copy XID array */
1762 2172 : if (snapshot->xcnt > 0)
1763 878 : memcpy((TransactionId *) (start_address +
1764 : sizeof(SerializedSnapshotData)),
1765 878 : snapshot->xip, snapshot->xcnt * sizeof(TransactionId));
1766 :
1767 : /*
1768 : * Copy SubXID array. Don't bother to copy it if it had overflowed,
1769 : * though, because it's not used anywhere in that case. Except if it's a
1770 : * snapshot taken during recovery; all the top-level XIDs are in subxip as
1771 : * well in that case, so we mustn't lose them.
1772 : */
1773 2172 : if (serialized_snapshot.subxcnt > 0)
1774 : {
1775 0 : Size subxipoff = sizeof(SerializedSnapshotData) +
1776 0 : snapshot->xcnt * sizeof(TransactionId);
1777 :
1778 0 : memcpy((TransactionId *) (start_address + subxipoff),
1779 0 : snapshot->subxip, snapshot->subxcnt * sizeof(TransactionId));
1780 : }
1781 2172 : }
1782 :
1783 : /*
1784 : * RestoreSnapshot
1785 : * Restore a serialized snapshot from the specified address.
1786 : *
1787 : * The copy is palloc'd in TopTransactionContext and has initial refcounts set
1788 : * to 0. The returned snapshot has the copied flag set.
1789 : */
1790 : Snapshot
1791 6752 : RestoreSnapshot(char *start_address)
1792 : {
1793 : SerializedSnapshotData serialized_snapshot;
1794 : Size size;
1795 : Snapshot snapshot;
1796 : TransactionId *serialized_xids;
1797 :
1798 6752 : memcpy(&serialized_snapshot, start_address,
1799 : sizeof(SerializedSnapshotData));
1800 6752 : serialized_xids = (TransactionId *)
1801 : (start_address + sizeof(SerializedSnapshotData));
1802 :
1803 : /* We allocate any XID arrays needed in the same palloc block. */
1804 6752 : size = sizeof(SnapshotData)
1805 6752 : + serialized_snapshot.xcnt * sizeof(TransactionId)
1806 6752 : + serialized_snapshot.subxcnt * sizeof(TransactionId);
1807 :
1808 : /* Copy all required fields */
1809 6752 : snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
1810 6752 : snapshot->snapshot_type = SNAPSHOT_MVCC;
1811 6752 : snapshot->xmin = serialized_snapshot.xmin;
1812 6752 : snapshot->xmax = serialized_snapshot.xmax;
1813 6752 : snapshot->xip = NULL;
1814 6752 : snapshot->xcnt = serialized_snapshot.xcnt;
1815 6752 : snapshot->subxip = NULL;
1816 6752 : snapshot->subxcnt = serialized_snapshot.subxcnt;
1817 6752 : snapshot->suboverflowed = serialized_snapshot.suboverflowed;
1818 6752 : snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery;
1819 6752 : snapshot->curcid = serialized_snapshot.curcid;
1820 6752 : snapshot->snapXactCompletionCount = 0;
1821 :
1822 : /* Copy XIDs, if present. */
1823 6752 : if (serialized_snapshot.xcnt > 0)
1824 : {
1825 2130 : snapshot->xip = (TransactionId *) (snapshot + 1);
1826 2130 : memcpy(snapshot->xip, serialized_xids,
1827 2130 : serialized_snapshot.xcnt * sizeof(TransactionId));
1828 : }
1829 :
1830 : /* Copy SubXIDs, if present. */
1831 6752 : if (serialized_snapshot.subxcnt > 0)
1832 : {
1833 0 : snapshot->subxip = ((TransactionId *) (snapshot + 1)) +
1834 0 : serialized_snapshot.xcnt;
1835 0 : memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt,
1836 0 : serialized_snapshot.subxcnt * sizeof(TransactionId));
1837 : }
1838 :
1839 : /* Set the copied flag so that the caller will set refcounts correctly. */
1840 6752 : snapshot->regd_count = 0;
1841 6752 : snapshot->active_count = 0;
1842 6752 : snapshot->copied = true;
1843 :
1844 6752 : return snapshot;
1845 : }
1846 :
1847 : /*
1848 : * Install a restored snapshot as the transaction snapshot.
1849 : *
1850 : * The second argument is of type void * so that snapmgr.h need not include
1851 : * the declaration for PGPROC.
1852 : */
1853 : void
1854 3122 : RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc)
1855 : {
1856 3122 : SetTransactionSnapshot(snapshot, NULL, InvalidPid, source_pgproc);
1857 3122 : }
1858 :
1859 : /*
1860 : * XidInMVCCSnapshot
1861 : * Is the given XID still-in-progress according to the snapshot?
1862 : *
1863 : * Note: GetSnapshotData never stores either top xid or subxids of our own
1864 : * backend into a snapshot, so these xids will not be reported as "running"
1865 : * by this function. This is OK for current uses, because we always check
1866 : * TransactionIdIsCurrentTransactionId first, except when it's known the
1867 : * XID could not be ours anyway.
1868 : */
1869 : bool
1870 146394090 : XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
1871 : {
1872 : /*
1873 : * Make a quick range check to eliminate most XIDs without looking at the
1874 : * xip arrays. Note that this is OK even if we convert a subxact XID to
1875 : * its parent below, because a subxact with XID < xmin has surely also got
1876 : * a parent with XID < xmin, while one with XID >= xmax must belong to a
1877 : * parent that was not yet committed at the time of this snapshot.
1878 : */
1879 :
1880 : /* Any xid < xmin is not in-progress */
1881 146394090 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1882 137786014 : return false;
1883 : /* Any xid >= xmax is in-progress */
1884 8608076 : if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
1885 38988 : return true;
1886 :
1887 : /*
1888 : * Snapshot information is stored slightly differently in snapshots taken
1889 : * during recovery.
1890 : */
1891 8569088 : if (!snapshot->takenDuringRecovery)
1892 : {
1893 : /*
1894 : * If the snapshot contains full subxact data, the fastest way to
1895 : * check things is just to compare the given XID against both subxact
1896 : * XIDs and top-level XIDs. If the snapshot overflowed, we have to
1897 : * use pg_subtrans to convert a subxact XID to its parent XID, but
1898 : * then we need only look at top-level XIDs not subxacts.
1899 : */
1900 8568928 : if (!snapshot->suboverflowed)
1901 : {
1902 : /* we have full data, so search subxip */
1903 8568228 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1904 506 : return true;
1905 :
1906 : /* not there, fall through to search xip[] */
1907 : }
1908 : else
1909 : {
1910 : /*
1911 : * Snapshot overflowed, so convert xid to top-level. This is safe
1912 : * because we eliminated too-old XIDs above.
1913 : */
1914 700 : xid = SubTransGetTopmostTransaction(xid);
1915 :
1916 : /*
1917 : * If xid was indeed a subxact, we might now have an xid < xmin,
1918 : * so recheck to avoid an array scan. No point in rechecking
1919 : * xmax.
1920 : */
1921 700 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1922 0 : return false;
1923 : }
1924 :
1925 8568422 : if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
1926 38166 : return true;
1927 : }
1928 : else
1929 : {
1930 : /*
1931 : * In recovery we store all xids in the subxip array because it is by
1932 : * far the bigger array, and we mostly don't know which xids are
1933 : * top-level and which are subxacts. The xip array is empty.
1934 : *
1935 : * We start by searching subtrans, if we overflowed.
1936 : */
1937 160 : if (snapshot->suboverflowed)
1938 : {
1939 : /*
1940 : * Snapshot overflowed, so convert xid to top-level. This is safe
1941 : * because we eliminated too-old XIDs above.
1942 : */
1943 8 : xid = SubTransGetTopmostTransaction(xid);
1944 :
1945 : /*
1946 : * If xid was indeed a subxact, we might now have an xid < xmin,
1947 : * so recheck to avoid an array scan. No point in rechecking
1948 : * xmax.
1949 : */
1950 8 : if (TransactionIdPrecedes(xid, snapshot->xmin))
1951 0 : return false;
1952 : }
1953 :
1954 : /*
1955 : * We now have either a top-level xid higher than xmin or an
1956 : * indeterminate xid. We don't know whether it's top level or subxact
1957 : * but it doesn't matter. If it's present, the xid is visible.
1958 : */
1959 160 : if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
1960 12 : return true;
1961 : }
1962 :
1963 8530404 : return false;
1964 : }
1965 :
1966 : /* ResourceOwner callbacks */
1967 :
1968 : static void
1969 58984 : ResOwnerReleaseSnapshot(Datum res)
1970 : {
1971 58984 : UnregisterSnapshotNoOwner((Snapshot) DatumGetPointer(res));
1972 58984 : }
|