Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * predicate.c
4 : * POSTGRES predicate locking
5 : * to support full serializable transaction isolation
6 : *
7 : *
8 : * The approach taken is to implement Serializable Snapshot Isolation (SSI)
9 : * as initially described in this paper:
10 : *
11 : * Michael J. Cahill, Uwe Röhm, and Alan D. Fekete. 2008.
12 : * Serializable isolation for snapshot databases.
13 : * In SIGMOD '08: Proceedings of the 2008 ACM SIGMOD
14 : * international conference on Management of data,
15 : * pages 729-738, New York, NY, USA. ACM.
16 : * http://doi.acm.org/10.1145/1376616.1376690
17 : *
18 : * and further elaborated in Cahill's doctoral thesis:
19 : *
20 : * Michael James Cahill. 2009.
21 : * Serializable Isolation for Snapshot Databases.
22 : * Sydney Digital Theses.
23 : * University of Sydney, School of Information Technologies.
24 : * http://hdl.handle.net/2123/5353
25 : *
26 : *
27 : * Predicate locks for Serializable Snapshot Isolation (SSI) are SIREAD
28 : * locks, which are so different from normal locks that a distinct set of
29 : * structures is required to handle them. They are needed to detect
30 : * rw-conflicts when the read happens before the write. (When the write
31 : * occurs first, the reading transaction can check for a conflict by
32 : * examining the MVCC data.)
33 : *
34 : * (1) Besides tuples actually read, they must cover ranges of tuples
35 : * which would have been read based on the predicate. This will
36 : * require modelling the predicates through locks against database
37 : * objects such as pages, index ranges, or entire tables.
38 : *
39 : * (2) They must be kept in RAM for quick access. Because of this, it
40 : * isn't possible to always maintain tuple-level granularity -- when
41 : * the space allocated to store these approaches exhaustion, a
42 : * request for a lock may need to scan for situations where a single
43 : * transaction holds many fine-grained locks which can be coalesced
44 : * into a single coarser-grained lock.
45 : *
46 : * (3) They never block anything; they are more like flags than locks
47 : * in that regard; although they refer to database objects and are
48 : * used to identify rw-conflicts with normal write locks.
49 : *
50 : * (4) While they are associated with a transaction, they must survive
51 : * a successful COMMIT of that transaction, and remain until all
52 : * overlapping transactions complete. This even means that they
53 : * must survive termination of the transaction's process. If a
54 : * top level transaction is rolled back, however, it is immediately
55 : * flagged so that it can be ignored, and its SIREAD locks can be
56 : * released any time after that.
57 : *
58 : * (5) The only transactions which create SIREAD locks or check for
59 : * conflicts with them are serializable transactions.
60 : *
61 : * (6) When a write lock for a top level transaction is found to cover
62 : * an existing SIREAD lock for the same transaction, the SIREAD lock
63 : * can be deleted.
64 : *
65 : * (7) A write from a serializable transaction must ensure that an xact
66 : * record exists for the transaction, with the same lifespan (until
67 : * all concurrent transaction complete or the transaction is rolled
68 : * back) so that rw-dependencies to that transaction can be
69 : * detected.
70 : *
71 : * We use an optimization for read-only transactions. Under certain
72 : * circumstances, a read-only transaction's snapshot can be shown to
73 : * never have conflicts with other transactions. This is referred to
74 : * as a "safe" snapshot (and one known not to be is "unsafe").
75 : * However, it can't be determined whether a snapshot is safe until
76 : * all concurrent read/write transactions complete.
77 : *
78 : * Once a read-only transaction is known to have a safe snapshot, it
79 : * can release its predicate locks and exempt itself from further
80 : * predicate lock tracking. READ ONLY DEFERRABLE transactions run only
81 : * on safe snapshots, waiting as necessary for one to be available.
82 : *
83 : *
84 : * Lightweight locks to manage access to the predicate locking shared
85 : * memory objects must be taken in this order, and should be released in
86 : * reverse order:
87 : *
88 : * SerializableFinishedListLock
89 : * - Protects the list of transactions which have completed but which
90 : * may yet matter because they overlap still-active transactions.
91 : *
92 : * SerializablePredicateListLock
93 : * - Protects the linked list of locks held by a transaction. Note
94 : * that the locks themselves are also covered by the partition
95 : * locks of their respective lock targets; this lock only affects
96 : * the linked list connecting the locks related to a transaction.
97 : * - All transactions share this single lock (with no partitioning).
98 : * - There is never a need for a process other than the one running
99 : * an active transaction to walk the list of locks held by that
100 : * transaction, except parallel query workers sharing the leader's
101 : * transaction. In the parallel case, an extra per-sxact lock is
102 : * taken; see below.
103 : * - It is relatively infrequent that another process needs to
104 : * modify the list for a transaction, but it does happen for such
105 : * things as index page splits for pages with predicate locks and
106 : * freeing of predicate locked pages by a vacuum process. When
107 : * removing a lock in such cases, the lock itself contains the
108 : * pointers needed to remove it from the list. When adding a
109 : * lock in such cases, the lock can be added using the anchor in
110 : * the transaction structure. Neither requires walking the list.
111 : * - Cleaning up the list for a terminated transaction is sometimes
112 : * not done on a retail basis, in which case no lock is required.
113 : * - Due to the above, a process accessing its active transaction's
114 : * list always uses a shared lock, regardless of whether it is
115 : * walking or maintaining the list. This improves concurrency
116 : * for the common access patterns.
117 : * - A process which needs to alter the list of a transaction other
118 : * than its own active transaction must acquire an exclusive
119 : * lock.
120 : *
121 : * SERIALIZABLEXACT's member 'perXactPredicateListLock'
122 : * - Protects the linked list of predicate locks held by a transaction.
123 : * Only needed for parallel mode, where multiple backends share the
124 : * same SERIALIZABLEXACT object. Not needed if
125 : * SerializablePredicateListLock is held exclusively.
126 : *
127 : * PredicateLockHashPartitionLock(hashcode)
128 : * - The same lock protects a target, all locks on that target, and
129 : * the linked list of locks on the target.
130 : * - When more than one is needed, acquire in ascending address order.
131 : * - When all are needed (rare), acquire in ascending index order with
132 : * PredicateLockHashPartitionLockByIndex(index).
133 : *
134 : * SerializableXactHashLock
135 : * - Protects both PredXact and SerializableXidHash.
136 : *
137 : * SerialControlLock
138 : * - Protects SerialControlData members
139 : *
140 : * SLRU per-bank locks
141 : * - Protects SerialSlruCtl
142 : *
143 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
144 : * Portions Copyright (c) 1994, Regents of the University of California
145 : *
146 : *
147 : * IDENTIFICATION
148 : * src/backend/storage/lmgr/predicate.c
149 : *
150 : *-------------------------------------------------------------------------
151 : */
152 : /*
153 : * INTERFACE ROUTINES
154 : *
155 : * housekeeping for setting up shared memory predicate lock structures
156 : * PredicateLockShmemInit(void)
157 : * PredicateLockShmemSize(void)
158 : *
159 : * predicate lock reporting
160 : * GetPredicateLockStatusData(void)
161 : * PageIsPredicateLocked(Relation relation, BlockNumber blkno)
162 : *
163 : * predicate lock maintenance
164 : * GetSerializableTransactionSnapshot(Snapshot snapshot)
165 : * SetSerializableTransactionSnapshot(Snapshot snapshot,
166 : * VirtualTransactionId *sourcevxid)
167 : * RegisterPredicateLockingXid(void)
168 : * PredicateLockRelation(Relation relation, Snapshot snapshot)
169 : * PredicateLockPage(Relation relation, BlockNumber blkno,
170 : * Snapshot snapshot)
171 : * PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot,
172 : * TransactionId tuple_xid)
173 : * PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
174 : * BlockNumber newblkno)
175 : * PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
176 : * BlockNumber newblkno)
177 : * TransferPredicateLocksToHeapRelation(Relation relation)
178 : * ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
179 : *
180 : * conflict detection (may also trigger rollback)
181 : * CheckForSerializableConflictOut(Relation relation, TransactionId xid,
182 : * Snapshot snapshot)
183 : * CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid,
184 : * BlockNumber blkno)
185 : * CheckTableForSerializableConflictIn(Relation relation)
186 : *
187 : * final rollback checking
188 : * PreCommit_CheckForSerializationFailure(void)
189 : *
190 : * two-phase commit support
191 : * AtPrepare_PredicateLocks(void);
192 : * PostPrepare_PredicateLocks(TransactionId xid);
193 : * PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit);
194 : * predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
195 : * void *recdata, uint32 len);
196 : */
197 :
198 : #include "postgres.h"
199 :
200 : #include "access/parallel.h"
201 : #include "access/slru.h"
202 : #include "access/transam.h"
203 : #include "access/twophase.h"
204 : #include "access/twophase_rmgr.h"
205 : #include "access/xact.h"
206 : #include "access/xlog.h"
207 : #include "miscadmin.h"
208 : #include "pgstat.h"
209 : #include "port/pg_lfind.h"
210 : #include "storage/predicate.h"
211 : #include "storage/predicate_internals.h"
212 : #include "storage/proc.h"
213 : #include "storage/procarray.h"
214 : #include "utils/guc_hooks.h"
215 : #include "utils/rel.h"
216 : #include "utils/snapmgr.h"
217 : #include "utils/wait_event.h"
218 :
219 : /* Uncomment the next line to test the graceful degradation code. */
220 : /* #define TEST_SUMMARIZE_SERIAL */
221 :
222 : /*
223 : * Test the most selective fields first, for performance.
224 : *
225 : * a is covered by b if all of the following hold:
226 : * 1) a.database = b.database
227 : * 2) a.relation = b.relation
228 : * 3) b.offset is invalid (b is page-granularity or higher)
229 : * 4) either of the following:
230 : * 4a) a.offset is valid (a is tuple-granularity) and a.page = b.page
231 : * or 4b) a.offset is invalid and b.page is invalid (a is
232 : * page-granularity and b is relation-granularity
233 : */
234 : #define TargetTagIsCoveredBy(covered_target, covering_target) \
235 : ((GET_PREDICATELOCKTARGETTAG_RELATION(covered_target) == /* (2) */ \
236 : GET_PREDICATELOCKTARGETTAG_RELATION(covering_target)) \
237 : && (GET_PREDICATELOCKTARGETTAG_OFFSET(covering_target) == \
238 : InvalidOffsetNumber) /* (3) */ \
239 : && (((GET_PREDICATELOCKTARGETTAG_OFFSET(covered_target) != \
240 : InvalidOffsetNumber) /* (4a) */ \
241 : && (GET_PREDICATELOCKTARGETTAG_PAGE(covering_target) == \
242 : GET_PREDICATELOCKTARGETTAG_PAGE(covered_target))) \
243 : || ((GET_PREDICATELOCKTARGETTAG_PAGE(covering_target) == \
244 : InvalidBlockNumber) /* (4b) */ \
245 : && (GET_PREDICATELOCKTARGETTAG_PAGE(covered_target) \
246 : != InvalidBlockNumber))) \
247 : && (GET_PREDICATELOCKTARGETTAG_DB(covered_target) == /* (1) */ \
248 : GET_PREDICATELOCKTARGETTAG_DB(covering_target)))
249 :
250 : /*
251 : * The predicate locking target and lock shared hash tables are partitioned to
252 : * reduce contention. To determine which partition a given target belongs to,
253 : * compute the tag's hash code with PredicateLockTargetTagHashCode(), then
254 : * apply one of these macros.
255 : * NB: NUM_PREDICATELOCK_PARTITIONS must be a power of 2!
256 : */
257 : #define PredicateLockHashPartition(hashcode) \
258 : ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
259 : #define PredicateLockHashPartitionLock(hashcode) \
260 : (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + \
261 : PredicateLockHashPartition(hashcode)].lock)
262 : #define PredicateLockHashPartitionLockByIndex(i) \
263 : (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
264 :
265 : #define NPREDICATELOCKTARGETENTS() \
266 : mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
267 :
268 : #define SxactIsOnFinishedList(sxact) (!dlist_node_is_detached(&(sxact)->finishedLink))
269 :
270 : /*
271 : * Note that a sxact is marked "prepared" once it has passed
272 : * PreCommit_CheckForSerializationFailure, even if it isn't using
273 : * 2PC. This is the point at which it can no longer be aborted.
274 : *
275 : * The PREPARED flag remains set after commit, so SxactIsCommitted
276 : * implies SxactIsPrepared.
277 : */
278 : #define SxactIsCommitted(sxact) (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)
279 : #define SxactIsPrepared(sxact) (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)
280 : #define SxactIsRolledBack(sxact) (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)
281 : #define SxactIsDoomed(sxact) (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)
282 : #define SxactIsReadOnly(sxact) (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)
283 : #define SxactHasSummaryConflictIn(sxact) (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)
284 : #define SxactHasSummaryConflictOut(sxact) (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)
285 : /*
286 : * The following macro actually means that the specified transaction has a
287 : * conflict out *to a transaction which committed ahead of it*. It's hard
288 : * to get that into a name of a reasonable length.
289 : */
290 : #define SxactHasConflictOut(sxact) (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)
291 : #define SxactIsDeferrableWaiting(sxact) (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)
292 : #define SxactIsROSafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)
293 : #define SxactIsROUnsafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)
294 : #define SxactIsPartiallyReleased(sxact) (((sxact)->flags & SXACT_FLAG_PARTIALLY_RELEASED) != 0)
295 :
296 : /*
297 : * Compute the hash code associated with a PREDICATELOCKTARGETTAG.
298 : *
299 : * To avoid unnecessary recomputations of the hash code, we try to do this
300 : * just once per function, and then pass it around as needed. Aside from
301 : * passing the hashcode to hash_search_with_hash_value(), we can extract
302 : * the lock partition number from the hashcode.
303 : */
304 : #define PredicateLockTargetTagHashCode(predicatelocktargettag) \
305 : get_hash_value(PredicateLockTargetHash, predicatelocktargettag)
306 :
307 : /*
308 : * Given a predicate lock tag, and the hash for its target,
309 : * compute the lock hash.
310 : *
311 : * To make the hash code also depend on the transaction, we xor the sxid
312 : * struct's address into the hash code, left-shifted so that the
313 : * partition-number bits don't change. Since this is only a hash, we
314 : * don't care if we lose high-order bits of the address; use an
315 : * intermediate variable to suppress cast-pointer-to-int warnings.
316 : */
317 : #define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash) \
318 : ((targethash) ^ ((uint32) PointerGetDatum((predicatelocktag)->myXact)) \
319 : << LOG2_NUM_PREDICATELOCK_PARTITIONS)
320 :
321 :
322 : /*
323 : * The SLRU buffer area through which we access the old xids.
324 : */
325 : static SlruCtlData SerialSlruCtlData;
326 :
327 : #define SerialSlruCtl (&SerialSlruCtlData)
328 :
329 : #define SERIAL_PAGESIZE BLCKSZ
330 : #define SERIAL_ENTRYSIZE sizeof(SerCommitSeqNo)
331 : #define SERIAL_ENTRIESPERPAGE (SERIAL_PAGESIZE / SERIAL_ENTRYSIZE)
332 :
333 : /*
334 : * Set maximum pages based on the number needed to track all transactions.
335 : */
336 : #define SERIAL_MAX_PAGE (MaxTransactionId / SERIAL_ENTRIESPERPAGE)
337 :
338 : #define SerialNextPage(page) (((page) >= SERIAL_MAX_PAGE) ? 0 : (page) + 1)
339 :
340 : #define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
341 : (SerialSlruCtl->shared->page_buffer[slotno] + \
342 : ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
343 :
344 : #define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
345 :
346 : typedef struct SerialControlData
347 : {
348 : int64 headPage; /* newest initialized page */
349 : TransactionId headXid; /* newest valid Xid in the SLRU */
350 : TransactionId tailXid; /* oldest xmin we might be interested in */
351 : } SerialControlData;
352 :
353 : typedef struct SerialControlData *SerialControl;
354 :
355 : static SerialControl serialControl;
356 :
357 : /*
358 : * When the oldest committed transaction on the "finished" list is moved to
359 : * SLRU, its predicate locks will be moved to this "dummy" transaction,
360 : * collapsing duplicate targets. When a duplicate is found, the later
361 : * commitSeqNo is used.
362 : */
363 : static SERIALIZABLEXACT *OldCommittedSxact;
364 :
365 :
366 : /*
367 : * These configuration variables are used to set the predicate lock table size
368 : * and to control promotion of predicate locks to coarser granularity in an
369 : * attempt to degrade performance (mostly as false positive serialization
370 : * failure) gracefully in the face of memory pressure.
371 : */
372 : int max_predicate_locks_per_xact; /* in guc_tables.c */
373 : int max_predicate_locks_per_relation; /* in guc_tables.c */
374 : int max_predicate_locks_per_page; /* in guc_tables.c */
375 :
376 : /*
377 : * This provides a list of objects in order to track transactions
378 : * participating in predicate locking. Entries in the list are fixed size,
379 : * and reside in shared memory. The memory address of an entry must remain
380 : * fixed during its lifetime. The list will be protected from concurrent
381 : * update externally; no provision is made in this code to manage that. The
382 : * number of entries in the list, and the size allowed for each entry is
383 : * fixed upon creation.
384 : */
385 : static PredXactList PredXact;
386 :
387 : /*
388 : * This provides a pool of RWConflict data elements to use in conflict lists
389 : * between transactions.
390 : */
391 : static RWConflictPoolHeader RWConflictPool;
392 :
393 : /*
394 : * The predicate locking hash tables are in shared memory.
395 : * Each backend keeps pointers to them.
396 : */
397 : static HTAB *SerializableXidHash;
398 : static HTAB *PredicateLockTargetHash;
399 : static HTAB *PredicateLockHash;
400 : static dlist_head *FinishedSerializableTransactions;
401 :
402 : /*
403 : * Tag for a dummy entry in PredicateLockTargetHash. By temporarily removing
404 : * this entry, you can ensure that there's enough scratch space available for
405 : * inserting one entry in the hash table. This is an otherwise-invalid tag.
406 : */
407 : static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
408 : static uint32 ScratchTargetTagHash;
409 : static LWLock *ScratchPartitionLock;
410 :
411 : /*
412 : * The local hash table used to determine when to combine multiple fine-
413 : * grained locks into a single courser-grained lock.
414 : */
415 : static HTAB *LocalPredicateLockHash = NULL;
416 :
417 : /*
418 : * Keep a pointer to the currently-running serializable transaction (if any)
419 : * for quick reference. Also, remember if we have written anything that could
420 : * cause a rw-conflict.
421 : */
422 : static SERIALIZABLEXACT *MySerializableXact = InvalidSerializableXact;
423 : static bool MyXactDidWrite = false;
424 :
425 : /*
426 : * The SXACT_FLAG_RO_UNSAFE optimization might lead us to release
427 : * MySerializableXact early. If that happens in a parallel query, the leader
428 : * needs to defer the destruction of the SERIALIZABLEXACT until end of
429 : * transaction, because the workers still have a reference to it. In that
430 : * case, the leader stores it here.
431 : */
432 : static SERIALIZABLEXACT *SavedSerializableXact = InvalidSerializableXact;
433 :
434 : /* local functions */
435 :
436 : static SERIALIZABLEXACT *CreatePredXact(void);
437 : static void ReleasePredXact(SERIALIZABLEXACT *sxact);
438 :
439 : static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer);
440 : static void SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer);
441 : static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact);
442 : static void ReleaseRWConflict(RWConflict conflict);
443 : static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact);
444 :
445 : static bool SerialPagePrecedesLogically(int64 page1, int64 page2);
446 : static int serial_errdetail_for_io_error(const void *opaque_data);
447 : static void SerialInit(void);
448 : static void SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo);
449 : static SerCommitSeqNo SerialGetMinConflictCommitSeqNo(TransactionId xid);
450 : static void SerialSetActiveSerXmin(TransactionId xid);
451 :
452 : static uint32 predicatelock_hash(const void *key, Size keysize);
453 : static void SummarizeOldestCommittedSxact(void);
454 : static Snapshot GetSafeSnapshot(Snapshot origSnapshot);
455 : static Snapshot GetSerializableTransactionSnapshotInt(Snapshot snapshot,
456 : VirtualTransactionId *sourcevxid,
457 : int sourcepid);
458 : static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag);
459 : static bool GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag,
460 : PREDICATELOCKTARGETTAG *parent);
461 : static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag);
462 : static void RemoveScratchTarget(bool lockheld);
463 : static void RestoreScratchTarget(bool lockheld);
464 : static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target,
465 : uint32 targettaghash);
466 : static void DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag);
467 : static int MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag);
468 : static bool CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag);
469 : static void DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag);
470 : static void CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
471 : uint32 targettaghash,
472 : SERIALIZABLEXACT *sxact);
473 : static void DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash);
474 : static bool TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
475 : PREDICATELOCKTARGETTAG newtargettag,
476 : bool removeOld);
477 : static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag);
478 : static void DropAllPredicateLocksFromTable(Relation relation,
479 : bool transfer);
480 : static void SetNewSxactGlobalXmin(void);
481 : static void ClearOldPredicateLocks(void);
482 : static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
483 : bool summarize);
484 : static bool XidIsConcurrent(TransactionId xid);
485 : static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag);
486 : static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer);
487 : static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
488 : SERIALIZABLEXACT *writer);
489 : static void CreateLocalPredicateLockHash(void);
490 : static void ReleasePredicateLocksLocal(void);
491 :
492 :
493 : /*------------------------------------------------------------------------*/
494 :
495 : /*
496 : * Does this relation participate in predicate locking? Temporary and system
497 : * relations are exempt.
498 : */
499 : static inline bool
500 160629 : PredicateLockingNeededForRelation(Relation relation)
501 : {
502 217899 : return !(relation->rd_id < FirstUnpinnedObjectId ||
503 57270 : RelationUsesLocalBuffers(relation));
504 : }
505 :
506 : /*
507 : * When a public interface method is called for a read, this is the test to
508 : * see if we should do a quick return.
509 : *
510 : * Note: this function has side-effects! If this transaction has been flagged
511 : * as RO-safe since the last call, we release all predicate locks and reset
512 : * MySerializableXact. That makes subsequent calls to return quickly.
513 : *
514 : * This is marked as 'inline' to eliminate the function call overhead in the
515 : * common case that serialization is not needed.
516 : */
517 : static inline bool
518 78370268 : SerializationNeededForRead(Relation relation, Snapshot snapshot)
519 : {
520 : /* Nothing to do if this is not a serializable transaction */
521 78370268 : if (MySerializableXact == InvalidSerializableXact)
522 78215572 : return false;
523 :
524 : /*
525 : * Don't acquire locks or conflict when scanning with a special snapshot.
526 : * This excludes things like CLUSTER and REINDEX. They use the wholesale
527 : * functions TransferPredicateLocksToHeapRelation() and
528 : * CheckTableForSerializableConflictIn() to participate in serialization,
529 : * but the scans involved don't need serialization.
530 : */
531 154696 : if (!IsMVCCSnapshot(snapshot))
532 2058 : return false;
533 :
534 : /*
535 : * Check if we have just become "RO-safe". If we have, immediately release
536 : * all locks as they're not needed anymore. This also resets
537 : * MySerializableXact, so that subsequent calls to this function can exit
538 : * quickly.
539 : *
540 : * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
541 : * commit without having conflicts out to an earlier snapshot, thus
542 : * ensuring that no conflicts are possible for this transaction.
543 : */
544 152638 : if (SxactIsROSafe(MySerializableXact))
545 : {
546 33 : ReleasePredicateLocks(false, true);
547 33 : return false;
548 : }
549 :
550 : /* Check if the relation doesn't participate in predicate locking */
551 152605 : if (!PredicateLockingNeededForRelation(relation))
552 100051 : return false;
553 :
554 52554 : return true; /* no excuse to skip predicate locking */
555 : }
556 :
557 : /*
558 : * Like SerializationNeededForRead(), but called on writes.
559 : * The logic is the same, but there is no snapshot and we can't be RO-safe.
560 : */
561 : static inline bool
562 20516785 : SerializationNeededForWrite(Relation relation)
563 : {
564 : /* Nothing to do if this is not a serializable transaction */
565 20516785 : if (MySerializableXact == InvalidSerializableXact)
566 20508849 : return false;
567 :
568 : /* Check if the relation doesn't participate in predicate locking */
569 7936 : if (!PredicateLockingNeededForRelation(relation))
570 3436 : return false;
571 :
572 4500 : return true; /* no excuse to skip predicate locking */
573 : }
574 :
575 :
576 : /*------------------------------------------------------------------------*/
577 :
578 : /*
579 : * These functions are a simple implementation of a list for this specific
580 : * type of struct. If there is ever a generalized shared memory list, we
581 : * should probably switch to that.
582 : */
583 : static SERIALIZABLEXACT *
584 2862 : CreatePredXact(void)
585 : {
586 : SERIALIZABLEXACT *sxact;
587 :
588 2862 : if (dlist_is_empty(&PredXact->availableList))
589 0 : return NULL;
590 :
591 2862 : sxact = dlist_container(SERIALIZABLEXACT, xactLink,
592 : dlist_pop_head_node(&PredXact->availableList));
593 2862 : dlist_push_tail(&PredXact->activeList, &sxact->xactLink);
594 2862 : return sxact;
595 : }
596 :
597 : static void
598 1682 : ReleasePredXact(SERIALIZABLEXACT *sxact)
599 : {
600 : Assert(ShmemAddrIsValid(sxact));
601 :
602 1682 : dlist_delete(&sxact->xactLink);
603 1682 : dlist_push_tail(&PredXact->availableList, &sxact->xactLink);
604 1682 : }
605 :
606 : /*------------------------------------------------------------------------*/
607 :
608 : /*
609 : * These functions manage primitive access to the RWConflict pool and lists.
610 : */
611 : static bool
612 1890 : RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
613 : {
614 : dlist_iter iter;
615 :
616 : Assert(reader != writer);
617 :
618 : /* Check the ends of the purported conflict first. */
619 1890 : if (SxactIsDoomed(reader)
620 1890 : || SxactIsDoomed(writer)
621 1890 : || dlist_is_empty(&reader->outConflicts)
622 569 : || dlist_is_empty(&writer->inConflicts))
623 1361 : return false;
624 :
625 : /*
626 : * A conflict is possible; walk the list to find out.
627 : *
628 : * The unconstify is needed as we have no const version of
629 : * dlist_foreach().
630 : */
631 545 : dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->outConflicts)
632 : {
633 529 : RWConflict conflict =
634 : dlist_container(RWConflictData, outLink, iter.cur);
635 :
636 529 : if (conflict->sxactIn == writer)
637 513 : return true;
638 : }
639 :
640 : /* No conflict found. */
641 16 : return false;
642 : }
643 :
644 : static void
645 792 : SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
646 : {
647 : RWConflict conflict;
648 :
649 : Assert(reader != writer);
650 : Assert(!RWConflictExists(reader, writer));
651 :
652 792 : if (dlist_is_empty(&RWConflictPool->availableList))
653 0 : ereport(ERROR,
654 : (errcode(ERRCODE_OUT_OF_MEMORY),
655 : errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
656 : errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
657 :
658 792 : conflict = dlist_head_element(RWConflictData, outLink, &RWConflictPool->availableList);
659 792 : dlist_delete(&conflict->outLink);
660 :
661 792 : conflict->sxactOut = reader;
662 792 : conflict->sxactIn = writer;
663 792 : dlist_push_tail(&reader->outConflicts, &conflict->outLink);
664 792 : dlist_push_tail(&writer->inConflicts, &conflict->inLink);
665 792 : }
666 :
667 : static void
668 135 : SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact,
669 : SERIALIZABLEXACT *activeXact)
670 : {
671 : RWConflict conflict;
672 :
673 : Assert(roXact != activeXact);
674 : Assert(SxactIsReadOnly(roXact));
675 : Assert(!SxactIsReadOnly(activeXact));
676 :
677 135 : if (dlist_is_empty(&RWConflictPool->availableList))
678 0 : ereport(ERROR,
679 : (errcode(ERRCODE_OUT_OF_MEMORY),
680 : errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
681 : errhint("You might need to run fewer transactions at a time or increase \"max_connections\".")));
682 :
683 135 : conflict = dlist_head_element(RWConflictData, outLink, &RWConflictPool->availableList);
684 135 : dlist_delete(&conflict->outLink);
685 :
686 135 : conflict->sxactOut = activeXact;
687 135 : conflict->sxactIn = roXact;
688 135 : dlist_push_tail(&activeXact->possibleUnsafeConflicts, &conflict->outLink);
689 135 : dlist_push_tail(&roXact->possibleUnsafeConflicts, &conflict->inLink);
690 135 : }
691 :
692 : static void
693 927 : ReleaseRWConflict(RWConflict conflict)
694 : {
695 927 : dlist_delete(&conflict->inLink);
696 927 : dlist_delete(&conflict->outLink);
697 927 : dlist_push_tail(&RWConflictPool->availableList, &conflict->outLink);
698 927 : }
699 :
700 : static void
701 3 : FlagSxactUnsafe(SERIALIZABLEXACT *sxact)
702 : {
703 : dlist_mutable_iter iter;
704 :
705 : Assert(SxactIsReadOnly(sxact));
706 : Assert(!SxactIsROSafe(sxact));
707 :
708 3 : sxact->flags |= SXACT_FLAG_RO_UNSAFE;
709 :
710 : /*
711 : * We know this isn't a safe snapshot, so we can stop looking for other
712 : * potential conflicts.
713 : */
714 6 : dlist_foreach_modify(iter, &sxact->possibleUnsafeConflicts)
715 : {
716 3 : RWConflict conflict =
717 3 : dlist_container(RWConflictData, inLink, iter.cur);
718 :
719 : Assert(!SxactIsReadOnly(conflict->sxactOut));
720 : Assert(sxact == conflict->sxactIn);
721 :
722 3 : ReleaseRWConflict(conflict);
723 : }
724 3 : }
725 :
726 : /*------------------------------------------------------------------------*/
727 :
728 : /*
729 : * Decide whether a Serial page number is "older" for truncation purposes.
730 : * Analogous to CLOGPagePrecedes().
731 : */
732 : static bool
733 0 : SerialPagePrecedesLogically(int64 page1, int64 page2)
734 : {
735 : TransactionId xid1;
736 : TransactionId xid2;
737 :
738 0 : xid1 = ((TransactionId) page1) * SERIAL_ENTRIESPERPAGE;
739 0 : xid1 += FirstNormalTransactionId + 1;
740 0 : xid2 = ((TransactionId) page2) * SERIAL_ENTRIESPERPAGE;
741 0 : xid2 += FirstNormalTransactionId + 1;
742 :
743 0 : return (TransactionIdPrecedes(xid1, xid2) &&
744 0 : TransactionIdPrecedes(xid1, xid2 + SERIAL_ENTRIESPERPAGE - 1));
745 : }
746 :
747 : static int
748 0 : serial_errdetail_for_io_error(const void *opaque_data)
749 : {
750 0 : TransactionId xid = *(const TransactionId *) opaque_data;
751 :
752 0 : return errdetail("Could not access serializable CSN of transaction %u.", xid);
753 : }
754 :
755 : #ifdef USE_ASSERT_CHECKING
756 : static void
757 : SerialPagePrecedesLogicallyUnitTests(void)
758 : {
759 : int per_page = SERIAL_ENTRIESPERPAGE,
760 : offset = per_page / 2;
761 : int64 newestPage,
762 : oldestPage,
763 : headPage,
764 : targetPage;
765 : TransactionId newestXact,
766 : oldestXact;
767 :
768 : /* GetNewTransactionId() has assigned the last XID it can safely use. */
769 : newestPage = 2 * SLRU_PAGES_PER_SEGMENT - 1; /* nothing special */
770 : newestXact = newestPage * per_page + offset;
771 : Assert(newestXact / per_page == newestPage);
772 : oldestXact = newestXact + 1;
773 : oldestXact -= 1U << 31;
774 : oldestPage = oldestXact / per_page;
775 :
776 : /*
777 : * In this scenario, the SLRU headPage pertains to the last ~1000 XIDs
778 : * assigned. oldestXact finishes, ~2B XIDs having elapsed since it
779 : * started. Further transactions cause us to summarize oldestXact to
780 : * tailPage. Function must return false so SerialAdd() doesn't zero
781 : * tailPage (which may contain entries for other old, recently-finished
782 : * XIDs) and half the SLRU. Reaching this requires burning ~2B XIDs in
783 : * single-user mode, a negligible possibility.
784 : */
785 : headPage = newestPage;
786 : targetPage = oldestPage;
787 : Assert(!SerialPagePrecedesLogically(headPage, targetPage));
788 :
789 : /*
790 : * In this scenario, the SLRU headPage pertains to oldestXact. We're
791 : * summarizing an XID near newestXact. (Assume few other XIDs used
792 : * SERIALIZABLE, hence the minimal headPage advancement. Assume
793 : * oldestXact was long-running and only recently reached the SLRU.)
794 : * Function must return true to make SerialAdd() create targetPage.
795 : *
796 : * Today's implementation mishandles this case, but it doesn't matter
797 : * enough to fix. Verify that the defect affects just one page by
798 : * asserting correct treatment of its prior page. Reaching this case
799 : * requires burning ~2B XIDs in single-user mode, a negligible
800 : * possibility. Moreover, if it does happen, the consequence would be
801 : * mild, namely a new transaction failing in SimpleLruReadPage().
802 : */
803 : headPage = oldestPage;
804 : targetPage = newestPage;
805 : Assert(SerialPagePrecedesLogically(headPage, targetPage - 1));
806 : #if 0
807 : Assert(SerialPagePrecedesLogically(headPage, targetPage));
808 : #endif
809 : }
810 : #endif
811 :
812 : /*
813 : * Initialize for the tracking of old serializable committed xids.
814 : */
815 : static void
816 1180 : SerialInit(void)
817 : {
818 : bool found;
819 :
820 : /*
821 : * Set up SLRU management of the pg_serial data.
822 : */
823 1180 : SerialSlruCtl->PagePrecedes = SerialPagePrecedesLogically;
824 1180 : SerialSlruCtl->errdetail_for_io_error = serial_errdetail_for_io_error;
825 1180 : SimpleLruInit(SerialSlruCtl, "serializable",
826 : serializable_buffers, 0, "pg_serial",
827 : LWTRANCHE_SERIAL_BUFFER, LWTRANCHE_SERIAL_SLRU,
828 : SYNC_HANDLER_NONE, false);
829 : #ifdef USE_ASSERT_CHECKING
830 : SerialPagePrecedesLogicallyUnitTests();
831 : #endif
832 : SlruPagePrecedesUnitTests(SerialSlruCtl, SERIAL_ENTRIESPERPAGE);
833 :
834 : /*
835 : * Create or attach to the SerialControl structure.
836 : */
837 1180 : serialControl = (SerialControl)
838 1180 : ShmemInitStruct("SerialControlData", sizeof(SerialControlData), &found);
839 :
840 : Assert(found == IsUnderPostmaster);
841 1180 : if (!found)
842 : {
843 : /*
844 : * Set control information to reflect empty SLRU.
845 : */
846 1180 : LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
847 1180 : serialControl->headPage = -1;
848 1180 : serialControl->headXid = InvalidTransactionId;
849 1180 : serialControl->tailXid = InvalidTransactionId;
850 1180 : LWLockRelease(SerialControlLock);
851 : }
852 1180 : }
853 :
854 : /*
855 : * GUC check_hook for serializable_buffers
856 : */
857 : bool
858 1222 : check_serial_buffers(int *newval, void **extra, GucSource source)
859 : {
860 1222 : return check_slru_buffers("serializable_buffers", newval);
861 : }
862 :
863 : /*
864 : * Record a committed read write serializable xid and the minimum
865 : * commitSeqNo of any transactions to which this xid had a rw-conflict out.
866 : * An invalid commitSeqNo means that there were no conflicts out from xid.
867 : */
868 : static void
869 0 : SerialAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
870 : {
871 : TransactionId tailXid;
872 : int64 targetPage;
873 : int slotno;
874 : int64 firstZeroPage;
875 : bool isNewPage;
876 : LWLock *lock;
877 :
878 : Assert(TransactionIdIsValid(xid));
879 :
880 0 : targetPage = SerialPage(xid);
881 0 : lock = SimpleLruGetBankLock(SerialSlruCtl, targetPage);
882 :
883 : /*
884 : * In this routine, we must hold both SerialControlLock and the SLRU bank
885 : * lock simultaneously while making the SLRU data catch up with the new
886 : * state that we determine.
887 : */
888 0 : LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
889 :
890 : /*
891 : * If 'xid' is older than the global xmin (== tailXid), there's no need to
892 : * store it, after all. This can happen if the oldest transaction holding
893 : * back the global xmin just finished, making 'xid' uninteresting, but
894 : * ClearOldPredicateLocks() has not yet run.
895 : */
896 0 : tailXid = serialControl->tailXid;
897 0 : if (!TransactionIdIsValid(tailXid) || TransactionIdPrecedes(xid, tailXid))
898 : {
899 0 : LWLockRelease(SerialControlLock);
900 0 : return;
901 : }
902 :
903 : /*
904 : * If the SLRU is currently unused, zero out the whole active region from
905 : * tailXid to headXid before taking it into use. Otherwise zero out only
906 : * any new pages that enter the tailXid-headXid range as we advance
907 : * headXid.
908 : */
909 0 : if (serialControl->headPage < 0)
910 : {
911 0 : firstZeroPage = SerialPage(tailXid);
912 0 : isNewPage = true;
913 : }
914 : else
915 : {
916 0 : firstZeroPage = SerialNextPage(serialControl->headPage);
917 0 : isNewPage = SerialPagePrecedesLogically(serialControl->headPage,
918 : targetPage);
919 : }
920 :
921 0 : if (!TransactionIdIsValid(serialControl->headXid)
922 0 : || TransactionIdFollows(xid, serialControl->headXid))
923 0 : serialControl->headXid = xid;
924 0 : if (isNewPage)
925 0 : serialControl->headPage = targetPage;
926 :
927 0 : if (isNewPage)
928 : {
929 : /* Initialize intervening pages; might involve trading locks */
930 : for (;;)
931 : {
932 0 : lock = SimpleLruGetBankLock(SerialSlruCtl, firstZeroPage);
933 0 : LWLockAcquire(lock, LW_EXCLUSIVE);
934 0 : slotno = SimpleLruZeroPage(SerialSlruCtl, firstZeroPage);
935 0 : if (firstZeroPage == targetPage)
936 0 : break;
937 0 : firstZeroPage = SerialNextPage(firstZeroPage);
938 0 : LWLockRelease(lock);
939 : }
940 : }
941 : else
942 : {
943 0 : LWLockAcquire(lock, LW_EXCLUSIVE);
944 0 : slotno = SimpleLruReadPage(SerialSlruCtl, targetPage, true, &xid);
945 : }
946 :
947 0 : SerialValue(slotno, xid) = minConflictCommitSeqNo;
948 0 : SerialSlruCtl->shared->page_dirty[slotno] = true;
949 :
950 0 : LWLockRelease(lock);
951 0 : LWLockRelease(SerialControlLock);
952 : }
953 :
954 : /*
955 : * Get the minimum commitSeqNo for any conflict out for the given xid. For
956 : * a transaction which exists but has no conflict out, InvalidSerCommitSeqNo
957 : * will be returned.
958 : */
959 : static SerCommitSeqNo
960 20 : SerialGetMinConflictCommitSeqNo(TransactionId xid)
961 : {
962 : TransactionId headXid;
963 : TransactionId tailXid;
964 : SerCommitSeqNo val;
965 : int slotno;
966 :
967 : Assert(TransactionIdIsValid(xid));
968 :
969 20 : LWLockAcquire(SerialControlLock, LW_SHARED);
970 20 : headXid = serialControl->headXid;
971 20 : tailXid = serialControl->tailXid;
972 20 : LWLockRelease(SerialControlLock);
973 :
974 20 : if (!TransactionIdIsValid(headXid))
975 20 : return 0;
976 :
977 : Assert(TransactionIdIsValid(tailXid));
978 :
979 0 : if (TransactionIdPrecedes(xid, tailXid)
980 0 : || TransactionIdFollows(xid, headXid))
981 0 : return 0;
982 :
983 : /*
984 : * The following function must be called without holding SLRU bank lock,
985 : * but will return with that lock held, which must then be released.
986 : */
987 0 : slotno = SimpleLruReadPage_ReadOnly(SerialSlruCtl,
988 0 : SerialPage(xid), &xid);
989 0 : val = SerialValue(slotno, xid);
990 0 : LWLockRelease(SimpleLruGetBankLock(SerialSlruCtl, SerialPage(xid)));
991 0 : return val;
992 : }
993 :
994 : /*
995 : * Call this whenever there is a new xmin for active serializable
996 : * transactions. We don't need to keep information on transactions which
997 : * precede that. InvalidTransactionId means none active, so everything in
998 : * the SLRU can be discarded.
999 : */
1000 : static void
1001 1758 : SerialSetActiveSerXmin(TransactionId xid)
1002 : {
1003 1758 : LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
1004 :
1005 : /*
1006 : * When no sxacts are active, nothing overlaps, set the xid values to
1007 : * invalid to show that there are no valid entries. Don't clear headPage,
1008 : * though. A new xmin might still land on that page, and we don't want to
1009 : * repeatedly zero out the same page.
1010 : */
1011 1758 : if (!TransactionIdIsValid(xid))
1012 : {
1013 870 : serialControl->tailXid = InvalidTransactionId;
1014 870 : serialControl->headXid = InvalidTransactionId;
1015 870 : LWLockRelease(SerialControlLock);
1016 870 : return;
1017 : }
1018 :
1019 : /*
1020 : * When we're recovering prepared transactions, the global xmin might move
1021 : * backwards depending on the order they're recovered. Normally that's not
1022 : * OK, but during recovery no serializable transactions will commit, so
1023 : * the SLRU is empty and we can get away with it.
1024 : */
1025 888 : if (RecoveryInProgress())
1026 : {
1027 : Assert(serialControl->headPage < 0);
1028 0 : if (!TransactionIdIsValid(serialControl->tailXid)
1029 0 : || TransactionIdPrecedes(xid, serialControl->tailXid))
1030 : {
1031 0 : serialControl->tailXid = xid;
1032 : }
1033 0 : LWLockRelease(SerialControlLock);
1034 0 : return;
1035 : }
1036 :
1037 : Assert(!TransactionIdIsValid(serialControl->tailXid)
1038 : || TransactionIdFollows(xid, serialControl->tailXid));
1039 :
1040 888 : serialControl->tailXid = xid;
1041 :
1042 888 : LWLockRelease(SerialControlLock);
1043 : }
1044 :
1045 : /*
1046 : * Perform a checkpoint --- either during shutdown, or on-the-fly
1047 : *
1048 : * We don't have any data that needs to survive a restart, but this is a
1049 : * convenient place to truncate the SLRU.
1050 : */
1051 : void
1052 1837 : CheckPointPredicate(void)
1053 : {
1054 : int64 truncateCutoffPage;
1055 :
1056 1837 : LWLockAcquire(SerialControlLock, LW_EXCLUSIVE);
1057 :
1058 : /* Exit quickly if the SLRU is currently not in use. */
1059 1837 : if (serialControl->headPage < 0)
1060 : {
1061 1837 : LWLockRelease(SerialControlLock);
1062 1837 : return;
1063 : }
1064 :
1065 0 : if (TransactionIdIsValid(serialControl->tailXid))
1066 : {
1067 : int64 tailPage;
1068 :
1069 0 : tailPage = SerialPage(serialControl->tailXid);
1070 :
1071 : /*
1072 : * It is possible for the tailXid to be ahead of the headXid. This
1073 : * occurs if we checkpoint while there are in-progress serializable
1074 : * transaction(s) advancing the tail but we are yet to summarize the
1075 : * transactions. In this case, we cutoff up to the headPage and the
1076 : * next summary will advance the headXid.
1077 : */
1078 0 : if (SerialPagePrecedesLogically(tailPage, serialControl->headPage))
1079 : {
1080 : /* We can truncate the SLRU up to the page containing tailXid */
1081 0 : truncateCutoffPage = tailPage;
1082 : }
1083 : else
1084 0 : truncateCutoffPage = serialControl->headPage;
1085 : }
1086 : else
1087 : {
1088 : /*----------
1089 : * The SLRU is no longer needed. Truncate to head before we set head
1090 : * invalid.
1091 : *
1092 : * XXX: It's possible that the SLRU is not needed again until XID
1093 : * wrap-around has happened, so that the segment containing headPage
1094 : * that we leave behind will appear to be new again. In that case it
1095 : * won't be removed until XID horizon advances enough to make it
1096 : * current again.
1097 : *
1098 : * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
1099 : * Consider this scenario, starting from a system with no in-progress
1100 : * transactions and VACUUM FREEZE having maximized oldestXact:
1101 : * - Start a SERIALIZABLE transaction.
1102 : * - Start, finish, and summarize a SERIALIZABLE transaction, creating
1103 : * one SLRU page.
1104 : * - Consume XIDs to reach xidStopLimit.
1105 : * - Finish all transactions. Due to the long-running SERIALIZABLE
1106 : * transaction, earlier checkpoints did not touch headPage. The
1107 : * next checkpoint will change it, but that checkpoint happens after
1108 : * the end of the scenario.
1109 : * - VACUUM to advance XID limits.
1110 : * - Consume ~2M XIDs, crossing the former xidWrapLimit.
1111 : * - Start, finish, and summarize a SERIALIZABLE transaction.
1112 : * SerialAdd() declines to create the targetPage, because headPage
1113 : * is not regarded as in the past relative to that targetPage. The
1114 : * transaction instigating the summarize fails in
1115 : * SimpleLruReadPage().
1116 : */
1117 0 : truncateCutoffPage = serialControl->headPage;
1118 0 : serialControl->headPage = -1;
1119 : }
1120 :
1121 0 : LWLockRelease(SerialControlLock);
1122 :
1123 : /*
1124 : * Truncate away pages that are no longer required. Note that no
1125 : * additional locking is required, because this is only called as part of
1126 : * a checkpoint, and the validity limits have already been determined.
1127 : */
1128 0 : SimpleLruTruncate(SerialSlruCtl, truncateCutoffPage);
1129 :
1130 : /*
1131 : * Write dirty SLRU pages to disk
1132 : *
1133 : * This is not actually necessary from a correctness point of view. We do
1134 : * it merely as a debugging aid.
1135 : *
1136 : * We're doing this after the truncation to avoid writing pages right
1137 : * before deleting the file in which they sit, which would be completely
1138 : * pointless.
1139 : */
1140 0 : SimpleLruWriteAll(SerialSlruCtl, true);
1141 : }
1142 :
1143 : /*------------------------------------------------------------------------*/
1144 :
1145 : /*
1146 : * PredicateLockShmemInit -- Initialize the predicate locking data structures.
1147 : *
1148 : * This is called from CreateSharedMemoryAndSemaphores(), which see for
1149 : * more comments. In the normal postmaster case, the shared hash tables
1150 : * are created here. Backends inherit the pointers
1151 : * to the shared tables via fork(). In the EXEC_BACKEND case, each
1152 : * backend re-executes this code to obtain pointers to the already existing
1153 : * shared hash tables.
1154 : */
1155 : void
1156 1180 : PredicateLockShmemInit(void)
1157 : {
1158 : HASHCTL info;
1159 : int64 max_table_size;
1160 : Size requestSize;
1161 : bool found;
1162 :
1163 : #ifndef EXEC_BACKEND
1164 : Assert(!IsUnderPostmaster);
1165 : #endif
1166 :
1167 : /*
1168 : * Compute size of predicate lock target hashtable. Note these
1169 : * calculations must agree with PredicateLockShmemSize!
1170 : */
1171 1180 : max_table_size = NPREDICATELOCKTARGETENTS();
1172 :
1173 : /*
1174 : * Allocate hash table for PREDICATELOCKTARGET structs. This stores
1175 : * per-predicate-lock-target information.
1176 : */
1177 1180 : info.keysize = sizeof(PREDICATELOCKTARGETTAG);
1178 1180 : info.entrysize = sizeof(PREDICATELOCKTARGET);
1179 1180 : info.num_partitions = NUM_PREDICATELOCK_PARTITIONS;
1180 :
1181 1180 : PredicateLockTargetHash = ShmemInitHash("PREDICATELOCKTARGET hash",
1182 : max_table_size,
1183 : max_table_size,
1184 : &info,
1185 : HASH_ELEM | HASH_BLOBS |
1186 : HASH_PARTITION | HASH_FIXED_SIZE);
1187 :
1188 : /*
1189 : * Reserve a dummy entry in the hash table; we use it to make sure there's
1190 : * always one entry available when we need to split or combine a page,
1191 : * because running out of space there could mean aborting a
1192 : * non-serializable transaction.
1193 : */
1194 1180 : if (!IsUnderPostmaster)
1195 : {
1196 1180 : (void) hash_search(PredicateLockTargetHash, &ScratchTargetTag,
1197 : HASH_ENTER, &found);
1198 : Assert(!found);
1199 : }
1200 :
1201 : /* Pre-calculate the hash and partition lock of the scratch entry */
1202 1180 : ScratchTargetTagHash = PredicateLockTargetTagHashCode(&ScratchTargetTag);
1203 1180 : ScratchPartitionLock = PredicateLockHashPartitionLock(ScratchTargetTagHash);
1204 :
1205 : /*
1206 : * Allocate hash table for PREDICATELOCK structs. This stores per
1207 : * xact-lock-of-a-target information.
1208 : */
1209 1180 : info.keysize = sizeof(PREDICATELOCKTAG);
1210 1180 : info.entrysize = sizeof(PREDICATELOCK);
1211 1180 : info.hash = predicatelock_hash;
1212 1180 : info.num_partitions = NUM_PREDICATELOCK_PARTITIONS;
1213 :
1214 : /* Assume an average of 2 xacts per target */
1215 1180 : max_table_size *= 2;
1216 :
1217 1180 : PredicateLockHash = ShmemInitHash("PREDICATELOCK hash",
1218 : max_table_size,
1219 : max_table_size,
1220 : &info,
1221 : HASH_ELEM | HASH_FUNCTION |
1222 : HASH_PARTITION | HASH_FIXED_SIZE);
1223 :
1224 : /*
1225 : * Compute size for serializable transaction hashtable. Note these
1226 : * calculations must agree with PredicateLockShmemSize!
1227 : */
1228 1180 : max_table_size = (MaxBackends + max_prepared_xacts);
1229 :
1230 : /*
1231 : * Allocate a list to hold information on transactions participating in
1232 : * predicate locking.
1233 : *
1234 : * Assume an average of 10 predicate locking transactions per backend.
1235 : * This allows aggressive cleanup while detail is present before data must
1236 : * be summarized for storage in SLRU and the "dummy" transaction.
1237 : */
1238 1180 : max_table_size *= 10;
1239 :
1240 1180 : requestSize = add_size(PredXactListDataSize,
1241 : (mul_size((Size) max_table_size,
1242 : sizeof(SERIALIZABLEXACT))));
1243 :
1244 1180 : PredXact = ShmemInitStruct("PredXactList",
1245 : requestSize,
1246 : &found);
1247 : Assert(found == IsUnderPostmaster);
1248 1180 : if (!found)
1249 : {
1250 : int i;
1251 :
1252 : /* clean everything, both the header and the element */
1253 1180 : memset(PredXact, 0, requestSize);
1254 :
1255 1180 : dlist_init(&PredXact->availableList);
1256 1180 : dlist_init(&PredXact->activeList);
1257 1180 : PredXact->SxactGlobalXmin = InvalidTransactionId;
1258 1180 : PredXact->SxactGlobalXminCount = 0;
1259 1180 : PredXact->WritableSxactCount = 0;
1260 1180 : PredXact->LastSxactCommitSeqNo = FirstNormalSerCommitSeqNo - 1;
1261 1180 : PredXact->CanPartialClearThrough = 0;
1262 1180 : PredXact->HavePartialClearedThrough = 0;
1263 1180 : PredXact->element
1264 1180 : = (SERIALIZABLEXACT *) ((char *) PredXact + PredXactListDataSize);
1265 : /* Add all elements to available list, clean. */
1266 1104740 : for (i = 0; i < max_table_size; i++)
1267 : {
1268 1103560 : LWLockInitialize(&PredXact->element[i].perXactPredicateListLock,
1269 : LWTRANCHE_PER_XACT_PREDICATE_LIST);
1270 1103560 : dlist_push_tail(&PredXact->availableList, &PredXact->element[i].xactLink);
1271 : }
1272 1180 : PredXact->OldCommittedSxact = CreatePredXact();
1273 1180 : SetInvalidVirtualTransactionId(PredXact->OldCommittedSxact->vxid);
1274 1180 : PredXact->OldCommittedSxact->prepareSeqNo = 0;
1275 1180 : PredXact->OldCommittedSxact->commitSeqNo = 0;
1276 1180 : PredXact->OldCommittedSxact->SeqNo.lastCommitBeforeSnapshot = 0;
1277 1180 : dlist_init(&PredXact->OldCommittedSxact->outConflicts);
1278 1180 : dlist_init(&PredXact->OldCommittedSxact->inConflicts);
1279 1180 : dlist_init(&PredXact->OldCommittedSxact->predicateLocks);
1280 1180 : dlist_node_init(&PredXact->OldCommittedSxact->finishedLink);
1281 1180 : dlist_init(&PredXact->OldCommittedSxact->possibleUnsafeConflicts);
1282 1180 : PredXact->OldCommittedSxact->topXid = InvalidTransactionId;
1283 1180 : PredXact->OldCommittedSxact->finishedBefore = InvalidTransactionId;
1284 1180 : PredXact->OldCommittedSxact->xmin = InvalidTransactionId;
1285 1180 : PredXact->OldCommittedSxact->flags = SXACT_FLAG_COMMITTED;
1286 1180 : PredXact->OldCommittedSxact->pid = 0;
1287 1180 : PredXact->OldCommittedSxact->pgprocno = INVALID_PROC_NUMBER;
1288 : }
1289 : /* This never changes, so let's keep a local copy. */
1290 1180 : OldCommittedSxact = PredXact->OldCommittedSxact;
1291 :
1292 : /*
1293 : * Allocate hash table for SERIALIZABLEXID structs. This stores per-xid
1294 : * information for serializable transactions which have accessed data.
1295 : */
1296 1180 : info.keysize = sizeof(SERIALIZABLEXIDTAG);
1297 1180 : info.entrysize = sizeof(SERIALIZABLEXID);
1298 :
1299 1180 : SerializableXidHash = ShmemInitHash("SERIALIZABLEXID hash",
1300 : max_table_size,
1301 : max_table_size,
1302 : &info,
1303 : HASH_ELEM | HASH_BLOBS |
1304 : HASH_FIXED_SIZE);
1305 :
1306 : /*
1307 : * Allocate space for tracking rw-conflicts in lists attached to the
1308 : * transactions.
1309 : *
1310 : * Assume an average of 5 conflicts per transaction. Calculations suggest
1311 : * that this will prevent resource exhaustion in even the most pessimal
1312 : * loads up to max_connections = 200 with all 200 connections pounding the
1313 : * database with serializable transactions. Beyond that, there may be
1314 : * occasional transactions canceled when trying to flag conflicts. That's
1315 : * probably OK.
1316 : */
1317 1180 : max_table_size *= 5;
1318 :
1319 1180 : requestSize = RWConflictPoolHeaderDataSize +
1320 1180 : mul_size((Size) max_table_size,
1321 : RWConflictDataSize);
1322 :
1323 1180 : RWConflictPool = ShmemInitStruct("RWConflictPool",
1324 : requestSize,
1325 : &found);
1326 : Assert(found == IsUnderPostmaster);
1327 1180 : if (!found)
1328 : {
1329 : int i;
1330 :
1331 : /* clean everything, including the elements */
1332 1180 : memset(RWConflictPool, 0, requestSize);
1333 :
1334 1180 : dlist_init(&RWConflictPool->availableList);
1335 1180 : RWConflictPool->element = (RWConflict) ((char *) RWConflictPool +
1336 : RWConflictPoolHeaderDataSize);
1337 : /* Add all elements to available list, clean. */
1338 5518980 : for (i = 0; i < max_table_size; i++)
1339 : {
1340 5517800 : dlist_push_tail(&RWConflictPool->availableList,
1341 5517800 : &RWConflictPool->element[i].outLink);
1342 : }
1343 : }
1344 :
1345 : /*
1346 : * Create or attach to the header for the list of finished serializable
1347 : * transactions.
1348 : */
1349 1180 : FinishedSerializableTransactions = (dlist_head *)
1350 1180 : ShmemInitStruct("FinishedSerializableTransactions",
1351 : sizeof(dlist_head),
1352 : &found);
1353 : Assert(found == IsUnderPostmaster);
1354 1180 : if (!found)
1355 1180 : dlist_init(FinishedSerializableTransactions);
1356 :
1357 : /*
1358 : * Initialize the SLRU storage for old committed serializable
1359 : * transactions.
1360 : */
1361 1180 : SerialInit();
1362 1180 : }
1363 :
1364 : /*
1365 : * Estimate shared-memory space used for predicate lock table
1366 : */
1367 : Size
1368 2207 : PredicateLockShmemSize(void)
1369 : {
1370 2207 : Size size = 0;
1371 : long max_table_size;
1372 :
1373 : /* predicate lock target hash table */
1374 2207 : max_table_size = NPREDICATELOCKTARGETENTS();
1375 2207 : size = add_size(size, hash_estimate_size(max_table_size,
1376 : sizeof(PREDICATELOCKTARGET)));
1377 :
1378 : /* predicate lock hash table */
1379 2207 : max_table_size *= 2;
1380 2207 : size = add_size(size, hash_estimate_size(max_table_size,
1381 : sizeof(PREDICATELOCK)));
1382 :
1383 : /*
1384 : * Since NPREDICATELOCKTARGETENTS is only an estimate, add 10% safety
1385 : * margin.
1386 : */
1387 2207 : size = add_size(size, size / 10);
1388 :
1389 : /* transaction list */
1390 2207 : max_table_size = MaxBackends + max_prepared_xacts;
1391 2207 : max_table_size *= 10;
1392 2207 : size = add_size(size, PredXactListDataSize);
1393 2207 : size = add_size(size, mul_size((Size) max_table_size,
1394 : sizeof(SERIALIZABLEXACT)));
1395 :
1396 : /* transaction xid table */
1397 2207 : size = add_size(size, hash_estimate_size(max_table_size,
1398 : sizeof(SERIALIZABLEXID)));
1399 :
1400 : /* rw-conflict pool */
1401 2207 : max_table_size *= 5;
1402 2207 : size = add_size(size, RWConflictPoolHeaderDataSize);
1403 2207 : size = add_size(size, mul_size((Size) max_table_size,
1404 : RWConflictDataSize));
1405 :
1406 : /* Head for list of finished serializable transactions. */
1407 2207 : size = add_size(size, sizeof(dlist_head));
1408 :
1409 : /* Shared memory structures for SLRU tracking of old committed xids. */
1410 2207 : size = add_size(size, sizeof(SerialControlData));
1411 2207 : size = add_size(size, SimpleLruShmemSize(serializable_buffers, 0));
1412 :
1413 2207 : return size;
1414 : }
1415 :
1416 :
1417 : /*
1418 : * Compute the hash code associated with a PREDICATELOCKTAG.
1419 : *
1420 : * Because we want to use just one set of partition locks for both the
1421 : * PREDICATELOCKTARGET and PREDICATELOCK hash tables, we have to make sure
1422 : * that PREDICATELOCKs fall into the same partition number as their
1423 : * associated PREDICATELOCKTARGETs. dynahash.c expects the partition number
1424 : * to be the low-order bits of the hash code, and therefore a
1425 : * PREDICATELOCKTAG's hash code must have the same low-order bits as the
1426 : * associated PREDICATELOCKTARGETTAG's hash code. We achieve this with this
1427 : * specialized hash function.
1428 : */
1429 : static uint32
1430 0 : predicatelock_hash(const void *key, Size keysize)
1431 : {
1432 0 : const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1433 : uint32 targethash;
1434 :
1435 : Assert(keysize == sizeof(PREDICATELOCKTAG));
1436 :
1437 : /* Look into the associated target object, and compute its hash code */
1438 0 : targethash = PredicateLockTargetTagHashCode(&predicatelocktag->myTarget->tag);
1439 :
1440 0 : return PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash);
1441 : }
1442 :
1443 :
1444 : /*
1445 : * GetPredicateLockStatusData
1446 : * Return a table containing the internal state of the predicate
1447 : * lock manager for use in pg_lock_status.
1448 : *
1449 : * Like GetLockStatusData, this function tries to hold the partition LWLocks
1450 : * for as short a time as possible by returning two arrays that simply
1451 : * contain the PREDICATELOCKTARGETTAG and SERIALIZABLEXACT for each lock
1452 : * table entry. Multiple copies of the same PREDICATELOCKTARGETTAG and
1453 : * SERIALIZABLEXACT will likely appear.
1454 : */
1455 : PredicateLockData *
1456 302 : GetPredicateLockStatusData(void)
1457 : {
1458 : PredicateLockData *data;
1459 : int i;
1460 : int els,
1461 : el;
1462 : HASH_SEQ_STATUS seqstat;
1463 : PREDICATELOCK *predlock;
1464 :
1465 302 : data = palloc_object(PredicateLockData);
1466 :
1467 : /*
1468 : * To ensure consistency, take simultaneous locks on all partition locks
1469 : * in ascending order, then SerializableXactHashLock.
1470 : */
1471 5134 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1472 4832 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
1473 302 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
1474 :
1475 : /* Get number of locks and allocate appropriately-sized arrays. */
1476 302 : els = hash_get_num_entries(PredicateLockHash);
1477 302 : data->nelements = els;
1478 302 : data->locktags = palloc_array(PREDICATELOCKTARGETTAG, els);
1479 302 : data->xacts = palloc_array(SERIALIZABLEXACT, els);
1480 :
1481 :
1482 : /* Scan through PredicateLockHash and copy contents */
1483 302 : hash_seq_init(&seqstat, PredicateLockHash);
1484 :
1485 302 : el = 0;
1486 :
1487 307 : while ((predlock = (PREDICATELOCK *) hash_seq_search(&seqstat)))
1488 : {
1489 5 : data->locktags[el] = predlock->tag.myTarget->tag;
1490 5 : data->xacts[el] = *predlock->tag.myXact;
1491 5 : el++;
1492 : }
1493 :
1494 : Assert(el == els);
1495 :
1496 : /* Release locks in reverse order */
1497 302 : LWLockRelease(SerializableXactHashLock);
1498 5134 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1499 4832 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
1500 :
1501 302 : return data;
1502 : }
1503 :
1504 : /*
1505 : * Free up shared memory structures by pushing the oldest sxact (the one at
1506 : * the front of the SummarizeOldestCommittedSxact queue) into summary form.
1507 : * Each call will free exactly one SERIALIZABLEXACT structure and may also
1508 : * free one or more of these structures: SERIALIZABLEXID, PREDICATELOCK,
1509 : * PREDICATELOCKTARGET, RWConflictData.
1510 : */
1511 : static void
1512 0 : SummarizeOldestCommittedSxact(void)
1513 : {
1514 : SERIALIZABLEXACT *sxact;
1515 :
1516 0 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
1517 :
1518 : /*
1519 : * This function is only called if there are no sxact slots available.
1520 : * Some of them must belong to old, already-finished transactions, so
1521 : * there should be something in FinishedSerializableTransactions list that
1522 : * we can summarize. However, there's a race condition: while we were not
1523 : * holding any locks, a transaction might have ended and cleaned up all
1524 : * the finished sxact entries already, freeing up their sxact slots. In
1525 : * that case, we have nothing to do here. The caller will find one of the
1526 : * slots released by the other backend when it retries.
1527 : */
1528 0 : if (dlist_is_empty(FinishedSerializableTransactions))
1529 : {
1530 0 : LWLockRelease(SerializableFinishedListLock);
1531 0 : return;
1532 : }
1533 :
1534 : /*
1535 : * Grab the first sxact off the finished list -- this will be the earliest
1536 : * commit. Remove it from the list.
1537 : */
1538 0 : sxact = dlist_head_element(SERIALIZABLEXACT, finishedLink,
1539 : FinishedSerializableTransactions);
1540 0 : dlist_delete_thoroughly(&sxact->finishedLink);
1541 :
1542 : /* Add to SLRU summary information. */
1543 0 : if (TransactionIdIsValid(sxact->topXid) && !SxactIsReadOnly(sxact))
1544 0 : SerialAdd(sxact->topXid, SxactHasConflictOut(sxact)
1545 : ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1546 :
1547 : /* Summarize and release the detail. */
1548 0 : ReleaseOneSerializableXact(sxact, false, true);
1549 :
1550 0 : LWLockRelease(SerializableFinishedListLock);
1551 : }
1552 :
1553 : /*
1554 : * GetSafeSnapshot
1555 : * Obtain and register a snapshot for a READ ONLY DEFERRABLE
1556 : * transaction. Ensures that the snapshot is "safe", i.e. a
1557 : * read-only transaction running on it can execute serializably
1558 : * without further checks. This requires waiting for concurrent
1559 : * transactions to complete, and retrying with a new snapshot if
1560 : * one of them could possibly create a conflict.
1561 : *
1562 : * As with GetSerializableTransactionSnapshot (which this is a subroutine
1563 : * for), the passed-in Snapshot pointer should reference a static data
1564 : * area that can safely be passed to GetSnapshotData.
1565 : */
1566 : static Snapshot
1567 5 : GetSafeSnapshot(Snapshot origSnapshot)
1568 : {
1569 : Snapshot snapshot;
1570 :
1571 : Assert(XactReadOnly && XactDeferrable);
1572 :
1573 : while (true)
1574 : {
1575 : /*
1576 : * GetSerializableTransactionSnapshotInt is going to call
1577 : * GetSnapshotData, so we need to provide it the static snapshot area
1578 : * our caller passed to us. The pointer returned is actually the same
1579 : * one passed to it, but we avoid assuming that here.
1580 : */
1581 6 : snapshot = GetSerializableTransactionSnapshotInt(origSnapshot,
1582 : NULL, InvalidPid);
1583 :
1584 6 : if (MySerializableXact == InvalidSerializableXact)
1585 3 : return snapshot; /* no concurrent r/w xacts; it's safe */
1586 :
1587 3 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1588 :
1589 : /*
1590 : * Wait for concurrent transactions to finish. Stop early if one of
1591 : * them marked us as conflicted.
1592 : */
1593 3 : MySerializableXact->flags |= SXACT_FLAG_DEFERRABLE_WAITING;
1594 7 : while (!(dlist_is_empty(&MySerializableXact->possibleUnsafeConflicts) ||
1595 4 : SxactIsROUnsafe(MySerializableXact)))
1596 : {
1597 4 : LWLockRelease(SerializableXactHashLock);
1598 4 : ProcWaitForSignal(WAIT_EVENT_SAFE_SNAPSHOT);
1599 4 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1600 : }
1601 3 : MySerializableXact->flags &= ~SXACT_FLAG_DEFERRABLE_WAITING;
1602 :
1603 3 : if (!SxactIsROUnsafe(MySerializableXact))
1604 : {
1605 2 : LWLockRelease(SerializableXactHashLock);
1606 2 : break; /* success */
1607 : }
1608 :
1609 1 : LWLockRelease(SerializableXactHashLock);
1610 :
1611 : /* else, need to retry... */
1612 1 : ereport(DEBUG2,
1613 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1614 : errmsg_internal("deferrable snapshot was unsafe; trying a new one")));
1615 1 : ReleasePredicateLocks(false, false);
1616 : }
1617 :
1618 : /*
1619 : * Now we have a safe snapshot, so we don't need to do any further checks.
1620 : */
1621 : Assert(SxactIsROSafe(MySerializableXact));
1622 2 : ReleasePredicateLocks(false, true);
1623 :
1624 2 : return snapshot;
1625 : }
1626 :
1627 : /*
1628 : * GetSafeSnapshotBlockingPids
1629 : * If the specified process is currently blocked in GetSafeSnapshot,
1630 : * write the process IDs of all processes that it is blocked by
1631 : * into the caller-supplied buffer output[]. The list is truncated at
1632 : * output_size, and the number of PIDs written into the buffer is
1633 : * returned. Returns zero if the given PID is not currently blocked
1634 : * in GetSafeSnapshot.
1635 : */
1636 : int
1637 372 : GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
1638 : {
1639 372 : int num_written = 0;
1640 : dlist_iter iter;
1641 372 : SERIALIZABLEXACT *blocking_sxact = NULL;
1642 :
1643 372 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
1644 :
1645 : /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1646 853 : dlist_foreach(iter, &PredXact->activeList)
1647 : {
1648 542 : SERIALIZABLEXACT *sxact =
1649 542 : dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1650 :
1651 542 : if (sxact->pid == blocked_pid)
1652 : {
1653 61 : blocking_sxact = sxact;
1654 61 : break;
1655 : }
1656 : }
1657 :
1658 : /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1659 372 : if (blocking_sxact != NULL && SxactIsDeferrableWaiting(blocking_sxact))
1660 : {
1661 : /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1662 2 : dlist_foreach(iter, &blocking_sxact->possibleUnsafeConflicts)
1663 : {
1664 2 : RWConflict possibleUnsafeConflict =
1665 2 : dlist_container(RWConflictData, inLink, iter.cur);
1666 :
1667 2 : output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1668 :
1669 2 : if (num_written >= output_size)
1670 2 : break;
1671 : }
1672 : }
1673 :
1674 372 : LWLockRelease(SerializableXactHashLock);
1675 :
1676 372 : return num_written;
1677 : }
1678 :
1679 : /*
1680 : * Acquire a snapshot that can be used for the current transaction.
1681 : *
1682 : * Make sure we have a SERIALIZABLEXACT reference in MySerializableXact.
1683 : * It should be current for this process and be contained in PredXact.
1684 : *
1685 : * The passed-in Snapshot pointer should reference a static data area that
1686 : * can safely be passed to GetSnapshotData. The return value is actually
1687 : * always this same pointer; no new snapshot data structure is allocated
1688 : * within this function.
1689 : */
1690 : Snapshot
1691 1681 : GetSerializableTransactionSnapshot(Snapshot snapshot)
1692 : {
1693 : Assert(IsolationIsSerializable());
1694 :
1695 : /*
1696 : * Can't use serializable mode while recovery is still active, as it is,
1697 : * for example, on a hot standby. We could get here despite the check in
1698 : * check_transaction_isolation() if default_transaction_isolation is set
1699 : * to serializable, so phrase the hint accordingly.
1700 : */
1701 1681 : if (RecoveryInProgress())
1702 0 : ereport(ERROR,
1703 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1704 : errmsg("cannot use serializable mode in a hot standby"),
1705 : errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1706 : errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1707 :
1708 : /*
1709 : * A special optimization is available for SERIALIZABLE READ ONLY
1710 : * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1711 : * thereby avoid all SSI overhead once it's running.
1712 : */
1713 1681 : if (XactReadOnly && XactDeferrable)
1714 5 : return GetSafeSnapshot(snapshot);
1715 :
1716 1676 : return GetSerializableTransactionSnapshotInt(snapshot,
1717 : NULL, InvalidPid);
1718 : }
1719 :
1720 : /*
1721 : * Import a snapshot to be used for the current transaction.
1722 : *
1723 : * This is nearly the same as GetSerializableTransactionSnapshot, except that
1724 : * we don't take a new snapshot, but rather use the data we're handed.
1725 : *
1726 : * The caller must have verified that the snapshot came from a serializable
1727 : * transaction; and if we're read-write, the source transaction must not be
1728 : * read-only.
1729 : */
1730 : void
1731 13 : SetSerializableTransactionSnapshot(Snapshot snapshot,
1732 : VirtualTransactionId *sourcevxid,
1733 : int sourcepid)
1734 : {
1735 : Assert(IsolationIsSerializable());
1736 :
1737 : /*
1738 : * If this is called by parallel.c in a parallel worker, we don't want to
1739 : * create a SERIALIZABLEXACT just yet because the leader's
1740 : * SERIALIZABLEXACT will be installed with AttachSerializableXact(). We
1741 : * also don't want to reject SERIALIZABLE READ ONLY DEFERRABLE in this
1742 : * case, because the leader has already determined that the snapshot it
1743 : * has passed us is safe. So there is nothing for us to do.
1744 : */
1745 13 : if (IsParallelWorker())
1746 13 : return;
1747 :
1748 : /*
1749 : * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1750 : * import snapshots, since there's no way to wait for a safe snapshot when
1751 : * we're using the snap we're told to. (XXX instead of throwing an error,
1752 : * we could just ignore the XactDeferrable flag?)
1753 : */
1754 0 : if (XactReadOnly && XactDeferrable)
1755 0 : ereport(ERROR,
1756 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1757 : errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1758 :
1759 0 : (void) GetSerializableTransactionSnapshotInt(snapshot, sourcevxid,
1760 : sourcepid);
1761 : }
1762 :
1763 : /*
1764 : * Guts of GetSerializableTransactionSnapshot
1765 : *
1766 : * If sourcevxid is valid, this is actually an import operation and we should
1767 : * skip calling GetSnapshotData, because the snapshot contents are already
1768 : * loaded up. HOWEVER: to avoid race conditions, we must check that the
1769 : * source xact is still running after we acquire SerializableXactHashLock.
1770 : * We do that by calling ProcArrayInstallImportedXmin.
1771 : */
1772 : static Snapshot
1773 1682 : GetSerializableTransactionSnapshotInt(Snapshot snapshot,
1774 : VirtualTransactionId *sourcevxid,
1775 : int sourcepid)
1776 : {
1777 : PGPROC *proc;
1778 : VirtualTransactionId vxid;
1779 : SERIALIZABLEXACT *sxact,
1780 : *othersxact;
1781 :
1782 : /* We only do this for serializable transactions. Once. */
1783 : Assert(MySerializableXact == InvalidSerializableXact);
1784 :
1785 : Assert(!RecoveryInProgress());
1786 :
1787 : /*
1788 : * Since all parts of a serializable transaction must use the same
1789 : * snapshot, it is too late to establish one after a parallel operation
1790 : * has begun.
1791 : */
1792 1682 : if (IsInParallelMode())
1793 0 : elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1794 :
1795 1682 : proc = MyProc;
1796 : Assert(proc != NULL);
1797 1682 : GET_VXID_FROM_PGPROC(vxid, *proc);
1798 :
1799 : /*
1800 : * First we get the sxact structure, which may involve looping and access
1801 : * to the "finished" list to free a structure for use.
1802 : *
1803 : * We must hold SerializableXactHashLock when taking/checking the snapshot
1804 : * to avoid race conditions, for much the same reasons that
1805 : * GetSnapshotData takes the ProcArrayLock. Since we might have to
1806 : * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1807 : * this means we have to create the sxact first, which is a bit annoying
1808 : * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1809 : * the sxact). Consider refactoring to avoid this.
1810 : */
1811 : #ifdef TEST_SUMMARIZE_SERIAL
1812 : SummarizeOldestCommittedSxact();
1813 : #endif
1814 1682 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1815 : do
1816 : {
1817 1682 : sxact = CreatePredXact();
1818 : /* If null, push out committed sxact to SLRU summary & retry. */
1819 1682 : if (!sxact)
1820 : {
1821 0 : LWLockRelease(SerializableXactHashLock);
1822 0 : SummarizeOldestCommittedSxact();
1823 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1824 : }
1825 1682 : } while (!sxact);
1826 :
1827 : /* Get the snapshot, or check that it's safe to use */
1828 1682 : if (!sourcevxid)
1829 1682 : snapshot = GetSnapshotData(snapshot);
1830 0 : else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1831 : {
1832 0 : ReleasePredXact(sxact);
1833 0 : LWLockRelease(SerializableXactHashLock);
1834 0 : ereport(ERROR,
1835 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1836 : errmsg("could not import the requested snapshot"),
1837 : errdetail("The source process with PID %d is not running anymore.",
1838 : sourcepid)));
1839 : }
1840 :
1841 : /*
1842 : * If there are no serializable transactions which are not read-only, we
1843 : * can "opt out" of predicate locking and conflict checking for a
1844 : * read-only transaction.
1845 : *
1846 : * The reason this is safe is that a read-only transaction can only become
1847 : * part of a dangerous structure if it overlaps a writable transaction
1848 : * which in turn overlaps a writable transaction which committed before
1849 : * the read-only transaction started. A new writable transaction can
1850 : * overlap this one, but it can't meet the other condition of overlapping
1851 : * a transaction which committed before this one started.
1852 : */
1853 1682 : if (XactReadOnly && PredXact->WritableSxactCount == 0)
1854 : {
1855 111 : ReleasePredXact(sxact);
1856 111 : LWLockRelease(SerializableXactHashLock);
1857 111 : return snapshot;
1858 : }
1859 :
1860 : /* Initialize the structure. */
1861 1571 : sxact->vxid = vxid;
1862 1571 : sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1863 1571 : sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1864 1571 : sxact->commitSeqNo = InvalidSerCommitSeqNo;
1865 1571 : dlist_init(&(sxact->outConflicts));
1866 1571 : dlist_init(&(sxact->inConflicts));
1867 1571 : dlist_init(&(sxact->possibleUnsafeConflicts));
1868 1571 : sxact->topXid = GetTopTransactionIdIfAny();
1869 1571 : sxact->finishedBefore = InvalidTransactionId;
1870 1571 : sxact->xmin = snapshot->xmin;
1871 1571 : sxact->pid = MyProcPid;
1872 1571 : sxact->pgprocno = MyProcNumber;
1873 1571 : dlist_init(&sxact->predicateLocks);
1874 1571 : dlist_node_init(&sxact->finishedLink);
1875 1571 : sxact->flags = 0;
1876 1571 : if (XactReadOnly)
1877 : {
1878 : dlist_iter iter;
1879 :
1880 108 : sxact->flags |= SXACT_FLAG_READ_ONLY;
1881 :
1882 : /*
1883 : * Register all concurrent r/w transactions as possible conflicts; if
1884 : * all of them commit without any outgoing conflicts to earlier
1885 : * transactions then this snapshot can be deemed safe (and we can run
1886 : * without tracking predicate locks).
1887 : */
1888 473 : dlist_foreach(iter, &PredXact->activeList)
1889 : {
1890 365 : othersxact = dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
1891 :
1892 365 : if (!SxactIsCommitted(othersxact)
1893 244 : && !SxactIsDoomed(othersxact)
1894 244 : && !SxactIsReadOnly(othersxact))
1895 : {
1896 135 : SetPossibleUnsafeConflict(sxact, othersxact);
1897 : }
1898 : }
1899 :
1900 : /*
1901 : * If we didn't find any possibly unsafe conflicts because every
1902 : * uncommitted writable transaction turned out to be doomed, then we
1903 : * can "opt out" immediately. See comments above the earlier check
1904 : * for PredXact->WritableSxactCount == 0.
1905 : */
1906 108 : if (dlist_is_empty(&sxact->possibleUnsafeConflicts))
1907 : {
1908 0 : ReleasePredXact(sxact);
1909 0 : LWLockRelease(SerializableXactHashLock);
1910 0 : return snapshot;
1911 : }
1912 : }
1913 : else
1914 : {
1915 1463 : ++(PredXact->WritableSxactCount);
1916 : Assert(PredXact->WritableSxactCount <=
1917 : (MaxBackends + max_prepared_xacts));
1918 : }
1919 :
1920 : /* Maintain serializable global xmin info. */
1921 1571 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
1922 : {
1923 : Assert(PredXact->SxactGlobalXminCount == 0);
1924 870 : PredXact->SxactGlobalXmin = snapshot->xmin;
1925 870 : PredXact->SxactGlobalXminCount = 1;
1926 870 : SerialSetActiveSerXmin(snapshot->xmin);
1927 : }
1928 701 : else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1929 : {
1930 : Assert(PredXact->SxactGlobalXminCount > 0);
1931 662 : PredXact->SxactGlobalXminCount++;
1932 : }
1933 : else
1934 : {
1935 : Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin));
1936 : }
1937 :
1938 1571 : MySerializableXact = sxact;
1939 1571 : MyXactDidWrite = false; /* haven't written anything yet */
1940 :
1941 1571 : LWLockRelease(SerializableXactHashLock);
1942 :
1943 1571 : CreateLocalPredicateLockHash();
1944 :
1945 1571 : return snapshot;
1946 : }
1947 :
1948 : static void
1949 1584 : CreateLocalPredicateLockHash(void)
1950 : {
1951 : HASHCTL hash_ctl;
1952 :
1953 : /* Initialize the backend-local hash table of parent locks */
1954 : Assert(LocalPredicateLockHash == NULL);
1955 1584 : hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1956 1584 : hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1957 1584 : LocalPredicateLockHash = hash_create("Local predicate lock",
1958 : max_predicate_locks_per_xact,
1959 : &hash_ctl,
1960 : HASH_ELEM | HASH_BLOBS);
1961 1584 : }
1962 :
1963 : /*
1964 : * Register the top level XID in SerializableXidHash.
1965 : * Also store it for easy reference in MySerializableXact.
1966 : */
1967 : void
1968 159577 : RegisterPredicateLockingXid(TransactionId xid)
1969 : {
1970 : SERIALIZABLEXIDTAG sxidtag;
1971 : SERIALIZABLEXID *sxid;
1972 : bool found;
1973 :
1974 : /*
1975 : * If we're not tracking predicate lock data for this transaction, we
1976 : * should ignore the request and return quickly.
1977 : */
1978 159577 : if (MySerializableXact == InvalidSerializableXact)
1979 158279 : return;
1980 :
1981 : /* We should have a valid XID and be at the top level. */
1982 : Assert(TransactionIdIsValid(xid));
1983 :
1984 1298 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1985 :
1986 : /* This should only be done once per transaction. */
1987 : Assert(MySerializableXact->topXid == InvalidTransactionId);
1988 :
1989 1298 : MySerializableXact->topXid = xid;
1990 :
1991 1298 : sxidtag.xid = xid;
1992 1298 : sxid = (SERIALIZABLEXID *) hash_search(SerializableXidHash,
1993 : &sxidtag,
1994 : HASH_ENTER, &found);
1995 : Assert(!found);
1996 :
1997 : /* Initialize the structure. */
1998 1298 : sxid->myXact = MySerializableXact;
1999 1298 : LWLockRelease(SerializableXactHashLock);
2000 : }
2001 :
2002 :
2003 : /*
2004 : * Check whether there are any predicate locks held by any transaction
2005 : * for the page at the given block number.
2006 : *
2007 : * Note that the transaction may be completed but not yet subject to
2008 : * cleanup due to overlapping serializable transactions. This must
2009 : * return valid information regardless of transaction isolation level.
2010 : *
2011 : * Also note that this doesn't check for a conflicting relation lock,
2012 : * just a lock specifically on the given page.
2013 : *
2014 : * One use is to support proper behavior during GiST index vacuum.
2015 : */
2016 : bool
2017 0 : PageIsPredicateLocked(Relation relation, BlockNumber blkno)
2018 : {
2019 : PREDICATELOCKTARGETTAG targettag;
2020 : uint32 targettaghash;
2021 : LWLock *partitionLock;
2022 : PREDICATELOCKTARGET *target;
2023 :
2024 0 : SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
2025 : relation->rd_locator.dbOid,
2026 : relation->rd_id,
2027 : blkno);
2028 :
2029 0 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
2030 0 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
2031 0 : LWLockAcquire(partitionLock, LW_SHARED);
2032 : target = (PREDICATELOCKTARGET *)
2033 0 : hash_search_with_hash_value(PredicateLockTargetHash,
2034 : &targettag, targettaghash,
2035 : HASH_FIND, NULL);
2036 0 : LWLockRelease(partitionLock);
2037 :
2038 0 : return (target != NULL);
2039 : }
2040 :
2041 :
2042 : /*
2043 : * Check whether a particular lock is held by this transaction.
2044 : *
2045 : * Important note: this function may return false even if the lock is
2046 : * being held, because it uses the local lock table which is not
2047 : * updated if another transaction modifies our lock list (e.g. to
2048 : * split an index page). It can also return true when a coarser
2049 : * granularity lock that covers this target is being held. Be careful
2050 : * to only use this function in circumstances where such errors are
2051 : * acceptable!
2052 : */
2053 : static bool
2054 77307 : PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag)
2055 : {
2056 : LOCALPREDICATELOCK *lock;
2057 :
2058 : /* check local hash table */
2059 77307 : lock = (LOCALPREDICATELOCK *) hash_search(LocalPredicateLockHash,
2060 : targettag,
2061 : HASH_FIND, NULL);
2062 :
2063 77307 : if (!lock)
2064 30203 : return false;
2065 :
2066 : /*
2067 : * Found entry in the table, but still need to check whether it's actually
2068 : * held -- it could just be a parent of some held lock.
2069 : */
2070 47104 : return lock->held;
2071 : }
2072 :
2073 : /*
2074 : * Return the parent lock tag in the lock hierarchy: the next coarser
2075 : * lock that covers the provided tag.
2076 : *
2077 : * Returns true and sets *parent to the parent tag if one exists,
2078 : * returns false if none exists.
2079 : */
2080 : static bool
2081 45367 : GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag,
2082 : PREDICATELOCKTARGETTAG *parent)
2083 : {
2084 45367 : switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2085 : {
2086 9902 : case PREDLOCKTAG_RELATION:
2087 : /* relation locks have no parent lock */
2088 9902 : return false;
2089 :
2090 8490 : case PREDLOCKTAG_PAGE:
2091 : /* parent lock is relation lock */
2092 8490 : SET_PREDICATELOCKTARGETTAG_RELATION(*parent,
2093 : GET_PREDICATELOCKTARGETTAG_DB(*tag),
2094 : GET_PREDICATELOCKTARGETTAG_RELATION(*tag));
2095 :
2096 8490 : return true;
2097 :
2098 26975 : case PREDLOCKTAG_TUPLE:
2099 : /* parent lock is page lock */
2100 26975 : SET_PREDICATELOCKTARGETTAG_PAGE(*parent,
2101 : GET_PREDICATELOCKTARGETTAG_DB(*tag),
2102 : GET_PREDICATELOCKTARGETTAG_RELATION(*tag),
2103 : GET_PREDICATELOCKTARGETTAG_PAGE(*tag));
2104 26975 : return true;
2105 : }
2106 :
2107 : /* not reachable */
2108 : Assert(false);
2109 0 : return false;
2110 : }
2111 :
2112 : /*
2113 : * Check whether the lock we are considering is already covered by a
2114 : * coarser lock for our transaction.
2115 : *
2116 : * Like PredicateLockExists, this function might return a false
2117 : * negative, but it will never return a false positive.
2118 : */
2119 : static bool
2120 26078 : CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag)
2121 : {
2122 : PREDICATELOCKTARGETTAG targettag,
2123 : parenttag;
2124 :
2125 26078 : targettag = *newtargettag;
2126 :
2127 : /* check parents iteratively until no more */
2128 31512 : while (GetParentPredicateLockTag(&targettag, &parenttag))
2129 : {
2130 27255 : targettag = parenttag;
2131 27255 : if (PredicateLockExists(&targettag))
2132 21821 : return true;
2133 : }
2134 :
2135 : /* no more parents to check; lock is not covered */
2136 4257 : return false;
2137 : }
2138 :
2139 : /*
2140 : * Remove the dummy entry from the predicate lock target hash, to free up some
2141 : * scratch space. The caller must be holding SerializablePredicateListLock,
2142 : * and must restore the entry with RestoreScratchTarget() before releasing the
2143 : * lock.
2144 : *
2145 : * If lockheld is true, the caller is already holding the partition lock
2146 : * of the partition containing the scratch entry.
2147 : */
2148 : static void
2149 49 : RemoveScratchTarget(bool lockheld)
2150 : {
2151 : bool found;
2152 :
2153 : Assert(LWLockHeldByMe(SerializablePredicateListLock));
2154 :
2155 49 : if (!lockheld)
2156 0 : LWLockAcquire(ScratchPartitionLock, LW_EXCLUSIVE);
2157 49 : hash_search_with_hash_value(PredicateLockTargetHash,
2158 : &ScratchTargetTag,
2159 : ScratchTargetTagHash,
2160 : HASH_REMOVE, &found);
2161 : Assert(found);
2162 49 : if (!lockheld)
2163 0 : LWLockRelease(ScratchPartitionLock);
2164 49 : }
2165 :
2166 : /*
2167 : * Re-insert the dummy entry in predicate lock target hash.
2168 : */
2169 : static void
2170 49 : RestoreScratchTarget(bool lockheld)
2171 : {
2172 : bool found;
2173 :
2174 : Assert(LWLockHeldByMe(SerializablePredicateListLock));
2175 :
2176 49 : if (!lockheld)
2177 0 : LWLockAcquire(ScratchPartitionLock, LW_EXCLUSIVE);
2178 49 : hash_search_with_hash_value(PredicateLockTargetHash,
2179 : &ScratchTargetTag,
2180 : ScratchTargetTagHash,
2181 : HASH_ENTER, &found);
2182 : Assert(!found);
2183 49 : if (!lockheld)
2184 0 : LWLockRelease(ScratchPartitionLock);
2185 49 : }
2186 :
2187 : /*
2188 : * Check whether the list of related predicate locks is empty for a
2189 : * predicate lock target, and remove the target if it is.
2190 : */
2191 : static void
2192 4251 : RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash)
2193 : {
2194 : PREDICATELOCKTARGET *rmtarget PG_USED_FOR_ASSERTS_ONLY;
2195 :
2196 : Assert(LWLockHeldByMe(SerializablePredicateListLock));
2197 :
2198 : /* Can't remove it until no locks at this target. */
2199 4251 : if (!dlist_is_empty(&target->predicateLocks))
2200 973 : return;
2201 :
2202 : /* Actually remove the target. */
2203 3278 : rmtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2204 3278 : &target->tag,
2205 : targettaghash,
2206 : HASH_REMOVE, NULL);
2207 : Assert(rmtarget == target);
2208 : }
2209 :
2210 : /*
2211 : * Delete child target locks owned by this process.
2212 : * This implementation is assuming that the usage of each target tag field
2213 : * is uniform. No need to make this hard if we don't have to.
2214 : *
2215 : * We acquire an LWLock in the case of parallel mode, because worker
2216 : * backends have access to the leader's SERIALIZABLEXACT. Otherwise,
2217 : * we aren't acquiring LWLocks for the predicate lock or lock
2218 : * target structures associated with this transaction unless we're going
2219 : * to modify them, because no other process is permitted to modify our
2220 : * locks.
2221 : */
2222 : static void
2223 2374 : DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
2224 : {
2225 : SERIALIZABLEXACT *sxact;
2226 : PREDICATELOCK *predlock;
2227 : dlist_mutable_iter iter;
2228 :
2229 2374 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
2230 2374 : sxact = MySerializableXact;
2231 2374 : if (IsInParallelMode())
2232 11 : LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2233 :
2234 7837 : dlist_foreach_modify(iter, &sxact->predicateLocks)
2235 : {
2236 : PREDICATELOCKTAG oldlocktag;
2237 : PREDICATELOCKTARGET *oldtarget;
2238 : PREDICATELOCKTARGETTAG oldtargettag;
2239 :
2240 5463 : predlock = dlist_container(PREDICATELOCK, xactLink, iter.cur);
2241 :
2242 5463 : oldlocktag = predlock->tag;
2243 : Assert(oldlocktag.myXact == sxact);
2244 5463 : oldtarget = oldlocktag.myTarget;
2245 5463 : oldtargettag = oldtarget->tag;
2246 :
2247 5463 : if (TargetTagIsCoveredBy(oldtargettag, *newtargettag))
2248 : {
2249 : uint32 oldtargettaghash;
2250 : LWLock *partitionLock;
2251 : PREDICATELOCK *rmpredlock PG_USED_FOR_ASSERTS_ONLY;
2252 :
2253 999 : oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
2254 999 : partitionLock = PredicateLockHashPartitionLock(oldtargettaghash);
2255 :
2256 999 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
2257 :
2258 999 : dlist_delete(&predlock->xactLink);
2259 999 : dlist_delete(&predlock->targetLink);
2260 999 : rmpredlock = hash_search_with_hash_value
2261 : (PredicateLockHash,
2262 : &oldlocktag,
2263 999 : PredicateLockHashCodeFromTargetHashCode(&oldlocktag,
2264 : oldtargettaghash),
2265 : HASH_REMOVE, NULL);
2266 : Assert(rmpredlock == predlock);
2267 :
2268 999 : RemoveTargetIfNoLongerUsed(oldtarget, oldtargettaghash);
2269 :
2270 999 : LWLockRelease(partitionLock);
2271 :
2272 999 : DecrementParentLocks(&oldtargettag);
2273 : }
2274 : }
2275 2374 : if (IsInParallelMode())
2276 11 : LWLockRelease(&sxact->perXactPredicateListLock);
2277 2374 : LWLockRelease(SerializablePredicateListLock);
2278 2374 : }
2279 :
2280 : /*
2281 : * Returns the promotion limit for a given predicate lock target. This is the
2282 : * max number of descendant locks allowed before promoting to the specified
2283 : * tag. Note that the limit includes non-direct descendants (e.g., both tuples
2284 : * and pages for a relation lock).
2285 : *
2286 : * Currently the default limit is 2 for a page lock, and half of the value of
2287 : * max_pred_locks_per_transaction - 1 for a relation lock, to match behavior
2288 : * of earlier releases when upgrading.
2289 : *
2290 : * TODO SSI: We should probably add additional GUCs to allow a maximum ratio
2291 : * of page and tuple locks based on the pages in a relation, and the maximum
2292 : * ratio of tuple locks to tuples in a page. This would provide more
2293 : * generally "balanced" allocation of locks to where they are most useful,
2294 : * while still allowing the absolute numbers to prevent one relation from
2295 : * tying up all predicate lock resources.
2296 : */
2297 : static int
2298 5434 : MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag)
2299 : {
2300 5434 : switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2301 : {
2302 3551 : case PREDLOCKTAG_RELATION:
2303 3551 : return max_predicate_locks_per_relation < 0
2304 : ? (max_predicate_locks_per_xact
2305 3551 : / (-max_predicate_locks_per_relation)) - 1
2306 3551 : : max_predicate_locks_per_relation;
2307 :
2308 1883 : case PREDLOCKTAG_PAGE:
2309 1883 : return max_predicate_locks_per_page;
2310 :
2311 0 : case PREDLOCKTAG_TUPLE:
2312 :
2313 : /*
2314 : * not reachable: nothing is finer-granularity than a tuple, so we
2315 : * should never try to promote to it.
2316 : */
2317 : Assert(false);
2318 0 : return 0;
2319 : }
2320 :
2321 : /* not reachable */
2322 : Assert(false);
2323 0 : return 0;
2324 : }
2325 :
2326 : /*
2327 : * For all ancestors of a newly-acquired predicate lock, increment
2328 : * their child count in the parent hash table. If any of them have
2329 : * more descendants than their promotion threshold, acquire the
2330 : * coarsest such lock.
2331 : *
2332 : * Returns true if a parent lock was acquired and false otherwise.
2333 : */
2334 : static bool
2335 4257 : CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag)
2336 : {
2337 : PREDICATELOCKTARGETTAG targettag,
2338 : nexttag,
2339 : promotiontag;
2340 : LOCALPREDICATELOCK *parentlock;
2341 : bool found,
2342 : promote;
2343 :
2344 4257 : promote = false;
2345 :
2346 4257 : targettag = *reqtag;
2347 :
2348 : /* check parents iteratively */
2349 13948 : while (GetParentPredicateLockTag(&targettag, &nexttag))
2350 : {
2351 5434 : targettag = nexttag;
2352 5434 : parentlock = (LOCALPREDICATELOCK *) hash_search(LocalPredicateLockHash,
2353 : &targettag,
2354 : HASH_ENTER,
2355 : &found);
2356 5434 : if (!found)
2357 : {
2358 3373 : parentlock->held = false;
2359 3373 : parentlock->childLocks = 1;
2360 : }
2361 : else
2362 2061 : parentlock->childLocks++;
2363 :
2364 5434 : if (parentlock->childLocks >
2365 5434 : MaxPredicateChildLocks(&targettag))
2366 : {
2367 : /*
2368 : * We should promote to this parent lock. Continue to check its
2369 : * ancestors, however, both to get their child counts right and to
2370 : * check whether we should just go ahead and promote to one of
2371 : * them.
2372 : */
2373 333 : promotiontag = targettag;
2374 333 : promote = true;
2375 : }
2376 : }
2377 :
2378 4257 : if (promote)
2379 : {
2380 : /* acquire coarsest ancestor eligible for promotion */
2381 333 : PredicateLockAcquire(&promotiontag);
2382 333 : return true;
2383 : }
2384 : else
2385 3924 : return false;
2386 : }
2387 :
2388 : /*
2389 : * When releasing a lock, decrement the child count on all ancestor
2390 : * locks.
2391 : *
2392 : * This is called only when releasing a lock via
2393 : * DeleteChildTargetLocks (i.e. when a lock becomes redundant because
2394 : * we've acquired its parent, possibly due to promotion) or when a new
2395 : * MVCC write lock makes the predicate lock unnecessary. There's no
2396 : * point in calling it when locks are released at transaction end, as
2397 : * this information is no longer needed.
2398 : */
2399 : static void
2400 1388 : DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag)
2401 : {
2402 : PREDICATELOCKTARGETTAG parenttag,
2403 : nexttag;
2404 :
2405 1388 : parenttag = *targettag;
2406 :
2407 4164 : while (GetParentPredicateLockTag(&parenttag, &nexttag))
2408 : {
2409 : uint32 targettaghash;
2410 : LOCALPREDICATELOCK *parentlock,
2411 : *rmlock PG_USED_FOR_ASSERTS_ONLY;
2412 :
2413 2776 : parenttag = nexttag;
2414 2776 : targettaghash = PredicateLockTargetTagHashCode(&parenttag);
2415 : parentlock = (LOCALPREDICATELOCK *)
2416 2776 : hash_search_with_hash_value(LocalPredicateLockHash,
2417 : &parenttag, targettaghash,
2418 : HASH_FIND, NULL);
2419 :
2420 : /*
2421 : * There's a small chance the parent lock doesn't exist in the lock
2422 : * table. This can happen if we prematurely removed it because an
2423 : * index split caused the child refcount to be off.
2424 : */
2425 2776 : if (parentlock == NULL)
2426 0 : continue;
2427 :
2428 2776 : parentlock->childLocks--;
2429 :
2430 : /*
2431 : * Under similar circumstances the parent lock's refcount might be
2432 : * zero. This only happens if we're holding that lock (otherwise we
2433 : * would have removed the entry).
2434 : */
2435 2776 : if (parentlock->childLocks < 0)
2436 : {
2437 : Assert(parentlock->held);
2438 0 : parentlock->childLocks = 0;
2439 : }
2440 :
2441 2776 : if ((parentlock->childLocks == 0) && (!parentlock->held))
2442 : {
2443 : rmlock = (LOCALPREDICATELOCK *)
2444 766 : hash_search_with_hash_value(LocalPredicateLockHash,
2445 : &parenttag, targettaghash,
2446 : HASH_REMOVE, NULL);
2447 : Assert(rmlock == parentlock);
2448 : }
2449 : }
2450 1388 : }
2451 :
2452 : /*
2453 : * Indicate that a predicate lock on the given target is held by the
2454 : * specified transaction. Has no effect if the lock is already held.
2455 : *
2456 : * This updates the lock table and the sxact's lock list, and creates
2457 : * the lock target if necessary, but does *not* do anything related to
2458 : * granularity promotion or the local lock table. See
2459 : * PredicateLockAcquire for that.
2460 : */
2461 : static void
2462 4257 : CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
2463 : uint32 targettaghash,
2464 : SERIALIZABLEXACT *sxact)
2465 : {
2466 : PREDICATELOCKTARGET *target;
2467 : PREDICATELOCKTAG locktag;
2468 : PREDICATELOCK *lock;
2469 : LWLock *partitionLock;
2470 : bool found;
2471 :
2472 4257 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
2473 :
2474 4257 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
2475 4257 : if (IsInParallelMode())
2476 16 : LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
2477 4257 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
2478 :
2479 : /* Make sure that the target is represented. */
2480 : target = (PREDICATELOCKTARGET *)
2481 4257 : hash_search_with_hash_value(PredicateLockTargetHash,
2482 : targettag, targettaghash,
2483 : HASH_ENTER_NULL, &found);
2484 4257 : if (!target)
2485 0 : ereport(ERROR,
2486 : (errcode(ERRCODE_OUT_OF_MEMORY),
2487 : errmsg("out of shared memory"),
2488 : errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2489 4257 : if (!found)
2490 3278 : dlist_init(&target->predicateLocks);
2491 :
2492 : /* We've got the sxact and target, make sure they're joined. */
2493 4257 : locktag.myTarget = target;
2494 4257 : locktag.myXact = sxact;
2495 : lock = (PREDICATELOCK *)
2496 4257 : hash_search_with_hash_value(PredicateLockHash, &locktag,
2497 4257 : PredicateLockHashCodeFromTargetHashCode(&locktag, targettaghash),
2498 : HASH_ENTER_NULL, &found);
2499 4257 : if (!lock)
2500 0 : ereport(ERROR,
2501 : (errcode(ERRCODE_OUT_OF_MEMORY),
2502 : errmsg("out of shared memory"),
2503 : errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
2504 :
2505 4257 : if (!found)
2506 : {
2507 4251 : dlist_push_tail(&target->predicateLocks, &lock->targetLink);
2508 4251 : dlist_push_tail(&sxact->predicateLocks, &lock->xactLink);
2509 4251 : lock->commitSeqNo = InvalidSerCommitSeqNo;
2510 : }
2511 :
2512 4257 : LWLockRelease(partitionLock);
2513 4257 : if (IsInParallelMode())
2514 16 : LWLockRelease(&sxact->perXactPredicateListLock);
2515 4257 : LWLockRelease(SerializablePredicateListLock);
2516 4257 : }
2517 :
2518 : /*
2519 : * Acquire a predicate lock on the specified target for the current
2520 : * connection if not already held. This updates the local lock table
2521 : * and uses it to implement granularity promotion. It will consolidate
2522 : * multiple locks into a coarser lock if warranted, and will release
2523 : * any finer-grained locks covered by the new one.
2524 : */
2525 : static void
2526 26281 : PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag)
2527 : {
2528 : uint32 targettaghash;
2529 : bool found;
2530 : LOCALPREDICATELOCK *locallock;
2531 :
2532 : /* Do we have the lock already, or a covering lock? */
2533 26281 : if (PredicateLockExists(targettag))
2534 22024 : return;
2535 :
2536 26078 : if (CoarserLockCovers(targettag))
2537 21821 : return;
2538 :
2539 : /* the same hash and LW lock apply to the lock target and the local lock. */
2540 4257 : targettaghash = PredicateLockTargetTagHashCode(targettag);
2541 :
2542 : /* Acquire lock in local table */
2543 : locallock = (LOCALPREDICATELOCK *)
2544 4257 : hash_search_with_hash_value(LocalPredicateLockHash,
2545 : targettag, targettaghash,
2546 : HASH_ENTER, &found);
2547 4257 : locallock->held = true;
2548 4257 : if (!found)
2549 3924 : locallock->childLocks = 0;
2550 :
2551 : /* Actually create the lock */
2552 4257 : CreatePredicateLock(targettag, targettaghash, MySerializableXact);
2553 :
2554 : /*
2555 : * Lock has been acquired. Check whether it should be promoted to a
2556 : * coarser granularity, or whether there are finer-granularity locks to
2557 : * clean up.
2558 : */
2559 4257 : if (CheckAndPromotePredicateLockRequest(targettag))
2560 : {
2561 : /*
2562 : * Lock request was promoted to a coarser-granularity lock, and that
2563 : * lock was acquired. It will delete this lock and any of its
2564 : * children, so we're done.
2565 : */
2566 : }
2567 : else
2568 : {
2569 : /* Clean up any finer-granularity locks */
2570 3924 : if (GET_PREDICATELOCKTARGETTAG_TYPE(*targettag) != PREDLOCKTAG_TUPLE)
2571 2374 : DeleteChildTargetLocks(targettag);
2572 : }
2573 : }
2574 :
2575 :
2576 : /*
2577 : * PredicateLockRelation
2578 : *
2579 : * Gets a predicate lock at the relation level.
2580 : * Skip if not in full serializable transaction isolation level.
2581 : * Skip if this is a temporary table.
2582 : * Clear any finer-grained predicate locks this session has on the relation.
2583 : */
2584 : void
2585 752383 : PredicateLockRelation(Relation relation, Snapshot snapshot)
2586 : {
2587 : PREDICATELOCKTARGETTAG tag;
2588 :
2589 752383 : if (!SerializationNeededForRead(relation, snapshot))
2590 751658 : return;
2591 :
2592 725 : SET_PREDICATELOCKTARGETTAG_RELATION(tag,
2593 : relation->rd_locator.dbOid,
2594 : relation->rd_id);
2595 725 : PredicateLockAcquire(&tag);
2596 : }
2597 :
2598 : /*
2599 : * PredicateLockPage
2600 : *
2601 : * Gets a predicate lock at the page level.
2602 : * Skip if not in full serializable transaction isolation level.
2603 : * Skip if this is a temporary table.
2604 : * Skip if a coarser predicate lock already covers this page.
2605 : * Clear any finer-grained predicate locks this session has on the relation.
2606 : */
2607 : void
2608 12297291 : PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
2609 : {
2610 : PREDICATELOCKTARGETTAG tag;
2611 :
2612 12297291 : if (!SerializationNeededForRead(relation, snapshot))
2613 12295839 : return;
2614 :
2615 1452 : SET_PREDICATELOCKTARGETTAG_PAGE(tag,
2616 : relation->rd_locator.dbOid,
2617 : relation->rd_id,
2618 : blkno);
2619 1452 : PredicateLockAcquire(&tag);
2620 : }
2621 :
2622 : /*
2623 : * PredicateLockTID
2624 : *
2625 : * Gets a predicate lock at the tuple level.
2626 : * Skip if not in full serializable transaction isolation level.
2627 : * Skip if this is a temporary table.
2628 : */
2629 : void
2630 21397923 : PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot,
2631 : TransactionId tuple_xid)
2632 : {
2633 : PREDICATELOCKTARGETTAG tag;
2634 :
2635 21397923 : if (!SerializationNeededForRead(relation, snapshot))
2636 21374152 : return;
2637 :
2638 : /*
2639 : * Return if this xact wrote it.
2640 : */
2641 23773 : if (relation->rd_index == NULL)
2642 : {
2643 : /* If we wrote it; we already have a write lock. */
2644 23773 : if (TransactionIdIsCurrentTransactionId(tuple_xid))
2645 2 : return;
2646 : }
2647 :
2648 : /*
2649 : * Do quick-but-not-definitive test for a relation lock first. This will
2650 : * never cause a return when the relation is *not* locked, but will
2651 : * occasionally let the check continue when there really *is* a relation
2652 : * level lock.
2653 : */
2654 23771 : SET_PREDICATELOCKTARGETTAG_RELATION(tag,
2655 : relation->rd_locator.dbOid,
2656 : relation->rd_id);
2657 23771 : if (PredicateLockExists(&tag))
2658 0 : return;
2659 :
2660 23771 : SET_PREDICATELOCKTARGETTAG_TUPLE(tag,
2661 : relation->rd_locator.dbOid,
2662 : relation->rd_id,
2663 : ItemPointerGetBlockNumber(tid),
2664 : ItemPointerGetOffsetNumber(tid));
2665 23771 : PredicateLockAcquire(&tag);
2666 : }
2667 :
2668 :
2669 : /*
2670 : * DeleteLockTarget
2671 : *
2672 : * Remove a predicate lock target along with any locks held for it.
2673 : *
2674 : * Caller must hold SerializablePredicateListLock and the
2675 : * appropriate hash partition lock for the target.
2676 : */
2677 : static void
2678 0 : DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash)
2679 : {
2680 : dlist_mutable_iter iter;
2681 :
2682 : Assert(LWLockHeldByMeInMode(SerializablePredicateListLock,
2683 : LW_EXCLUSIVE));
2684 : Assert(LWLockHeldByMe(PredicateLockHashPartitionLock(targettaghash)));
2685 :
2686 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2687 :
2688 0 : dlist_foreach_modify(iter, &target->predicateLocks)
2689 : {
2690 0 : PREDICATELOCK *predlock =
2691 0 : dlist_container(PREDICATELOCK, targetLink, iter.cur);
2692 : bool found;
2693 :
2694 0 : dlist_delete(&(predlock->xactLink));
2695 0 : dlist_delete(&(predlock->targetLink));
2696 :
2697 0 : hash_search_with_hash_value
2698 : (PredicateLockHash,
2699 0 : &predlock->tag,
2700 0 : PredicateLockHashCodeFromTargetHashCode(&predlock->tag,
2701 : targettaghash),
2702 : HASH_REMOVE, &found);
2703 : Assert(found);
2704 : }
2705 0 : LWLockRelease(SerializableXactHashLock);
2706 :
2707 : /* Remove the target itself, if possible. */
2708 0 : RemoveTargetIfNoLongerUsed(target, targettaghash);
2709 0 : }
2710 :
2711 :
2712 : /*
2713 : * TransferPredicateLocksToNewTarget
2714 : *
2715 : * Move or copy all the predicate locks for a lock target, for use by
2716 : * index page splits/combines and other things that create or replace
2717 : * lock targets. If 'removeOld' is true, the old locks and the target
2718 : * will be removed.
2719 : *
2720 : * Returns true on success, or false if we ran out of shared memory to
2721 : * allocate the new target or locks. Guaranteed to always succeed if
2722 : * removeOld is set (by using the scratch entry in PredicateLockTargetHash
2723 : * for scratch space).
2724 : *
2725 : * Warning: the "removeOld" option should be used only with care,
2726 : * because this function does not (indeed, can not) update other
2727 : * backends' LocalPredicateLockHash. If we are only adding new
2728 : * entries, this is not a problem: the local lock table is used only
2729 : * as a hint, so missing entries for locks that are held are
2730 : * OK. Having entries for locks that are no longer held, as can happen
2731 : * when using "removeOld", is not in general OK. We can only use it
2732 : * safely when replacing a lock with a coarser-granularity lock that
2733 : * covers it, or if we are absolutely certain that no one will need to
2734 : * refer to that lock in the future.
2735 : *
2736 : * Caller must hold SerializablePredicateListLock exclusively.
2737 : */
2738 : static bool
2739 1 : TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
2740 : PREDICATELOCKTARGETTAG newtargettag,
2741 : bool removeOld)
2742 : {
2743 : uint32 oldtargettaghash;
2744 : LWLock *oldpartitionLock;
2745 : PREDICATELOCKTARGET *oldtarget;
2746 : uint32 newtargettaghash;
2747 : LWLock *newpartitionLock;
2748 : bool found;
2749 1 : bool outOfShmem = false;
2750 :
2751 : Assert(LWLockHeldByMeInMode(SerializablePredicateListLock,
2752 : LW_EXCLUSIVE));
2753 :
2754 1 : oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
2755 1 : newtargettaghash = PredicateLockTargetTagHashCode(&newtargettag);
2756 1 : oldpartitionLock = PredicateLockHashPartitionLock(oldtargettaghash);
2757 1 : newpartitionLock = PredicateLockHashPartitionLock(newtargettaghash);
2758 :
2759 1 : if (removeOld)
2760 : {
2761 : /*
2762 : * Remove the dummy entry to give us scratch space, so we know we'll
2763 : * be able to create the new lock target.
2764 : */
2765 0 : RemoveScratchTarget(false);
2766 : }
2767 :
2768 : /*
2769 : * We must get the partition locks in ascending sequence to avoid
2770 : * deadlocks. If old and new partitions are the same, we must request the
2771 : * lock only once.
2772 : */
2773 1 : if (oldpartitionLock < newpartitionLock)
2774 : {
2775 1 : LWLockAcquire(oldpartitionLock,
2776 1 : (removeOld ? LW_EXCLUSIVE : LW_SHARED));
2777 1 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2778 : }
2779 0 : else if (oldpartitionLock > newpartitionLock)
2780 : {
2781 0 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2782 0 : LWLockAcquire(oldpartitionLock,
2783 0 : (removeOld ? LW_EXCLUSIVE : LW_SHARED));
2784 : }
2785 : else
2786 0 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2787 :
2788 : /*
2789 : * Look for the old target. If not found, that's OK; no predicate locks
2790 : * are affected, so we can just clean up and return. If it does exist,
2791 : * walk its list of predicate locks and move or copy them to the new
2792 : * target.
2793 : */
2794 1 : oldtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2795 : &oldtargettag,
2796 : oldtargettaghash,
2797 : HASH_FIND, NULL);
2798 :
2799 1 : if (oldtarget)
2800 : {
2801 : PREDICATELOCKTARGET *newtarget;
2802 : PREDICATELOCKTAG newpredlocktag;
2803 : dlist_mutable_iter iter;
2804 :
2805 0 : newtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2806 : &newtargettag,
2807 : newtargettaghash,
2808 : HASH_ENTER_NULL, &found);
2809 :
2810 0 : if (!newtarget)
2811 : {
2812 : /* Failed to allocate due to insufficient shmem */
2813 0 : outOfShmem = true;
2814 0 : goto exit;
2815 : }
2816 :
2817 : /* If we created a new entry, initialize it */
2818 0 : if (!found)
2819 0 : dlist_init(&newtarget->predicateLocks);
2820 :
2821 0 : newpredlocktag.myTarget = newtarget;
2822 :
2823 : /*
2824 : * Loop through all the locks on the old target, replacing them with
2825 : * locks on the new target.
2826 : */
2827 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2828 :
2829 0 : dlist_foreach_modify(iter, &oldtarget->predicateLocks)
2830 : {
2831 0 : PREDICATELOCK *oldpredlock =
2832 0 : dlist_container(PREDICATELOCK, targetLink, iter.cur);
2833 : PREDICATELOCK *newpredlock;
2834 0 : SerCommitSeqNo oldCommitSeqNo = oldpredlock->commitSeqNo;
2835 :
2836 0 : newpredlocktag.myXact = oldpredlock->tag.myXact;
2837 :
2838 0 : if (removeOld)
2839 : {
2840 0 : dlist_delete(&(oldpredlock->xactLink));
2841 0 : dlist_delete(&(oldpredlock->targetLink));
2842 :
2843 0 : hash_search_with_hash_value
2844 : (PredicateLockHash,
2845 0 : &oldpredlock->tag,
2846 0 : PredicateLockHashCodeFromTargetHashCode(&oldpredlock->tag,
2847 : oldtargettaghash),
2848 : HASH_REMOVE, &found);
2849 : Assert(found);
2850 : }
2851 :
2852 : newpredlock = (PREDICATELOCK *)
2853 0 : hash_search_with_hash_value(PredicateLockHash,
2854 : &newpredlocktag,
2855 0 : PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
2856 : newtargettaghash),
2857 : HASH_ENTER_NULL,
2858 : &found);
2859 0 : if (!newpredlock)
2860 : {
2861 : /* Out of shared memory. Undo what we've done so far. */
2862 0 : LWLockRelease(SerializableXactHashLock);
2863 0 : DeleteLockTarget(newtarget, newtargettaghash);
2864 0 : outOfShmem = true;
2865 0 : goto exit;
2866 : }
2867 0 : if (!found)
2868 : {
2869 0 : dlist_push_tail(&(newtarget->predicateLocks),
2870 : &(newpredlock->targetLink));
2871 0 : dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
2872 : &(newpredlock->xactLink));
2873 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
2874 : }
2875 : else
2876 : {
2877 0 : if (newpredlock->commitSeqNo < oldCommitSeqNo)
2878 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
2879 : }
2880 :
2881 : Assert(newpredlock->commitSeqNo != 0);
2882 : Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2883 : || (newpredlock->tag.myXact == OldCommittedSxact));
2884 : }
2885 0 : LWLockRelease(SerializableXactHashLock);
2886 :
2887 0 : if (removeOld)
2888 : {
2889 : Assert(dlist_is_empty(&oldtarget->predicateLocks));
2890 0 : RemoveTargetIfNoLongerUsed(oldtarget, oldtargettaghash);
2891 : }
2892 : }
2893 :
2894 :
2895 1 : exit:
2896 : /* Release partition locks in reverse order of acquisition. */
2897 1 : if (oldpartitionLock < newpartitionLock)
2898 : {
2899 1 : LWLockRelease(newpartitionLock);
2900 1 : LWLockRelease(oldpartitionLock);
2901 : }
2902 0 : else if (oldpartitionLock > newpartitionLock)
2903 : {
2904 0 : LWLockRelease(oldpartitionLock);
2905 0 : LWLockRelease(newpartitionLock);
2906 : }
2907 : else
2908 0 : LWLockRelease(newpartitionLock);
2909 :
2910 1 : if (removeOld)
2911 : {
2912 : /* We shouldn't run out of memory if we're moving locks */
2913 : Assert(!outOfShmem);
2914 :
2915 : /* Put the scratch entry back */
2916 0 : RestoreScratchTarget(false);
2917 : }
2918 :
2919 1 : return !outOfShmem;
2920 : }
2921 :
2922 : /*
2923 : * Drop all predicate locks of any granularity from the specified relation,
2924 : * which can be a heap relation or an index relation. If 'transfer' is true,
2925 : * acquire a relation lock on the heap for any transactions with any lock(s)
2926 : * on the specified relation.
2927 : *
2928 : * This requires grabbing a lot of LW locks and scanning the entire lock
2929 : * target table for matches. That makes this more expensive than most
2930 : * predicate lock management functions, but it will only be called for DDL
2931 : * type commands that are expensive anyway, and there are fast returns when
2932 : * no serializable transactions are active or the relation is temporary.
2933 : *
2934 : * We don't use the TransferPredicateLocksToNewTarget function because it
2935 : * acquires its own locks on the partitions of the two targets involved,
2936 : * and we'll already be holding all partition locks.
2937 : *
2938 : * We can't throw an error from here, because the call could be from a
2939 : * transaction which is not serializable.
2940 : *
2941 : * NOTE: This is currently only called with transfer set to true, but that may
2942 : * change. If we decide to clean up the locks from a table on commit of a
2943 : * transaction which executed DROP TABLE, the false condition will be useful.
2944 : */
2945 : static void
2946 21852 : DropAllPredicateLocksFromTable(Relation relation, bool transfer)
2947 : {
2948 : HASH_SEQ_STATUS seqstat;
2949 : PREDICATELOCKTARGET *oldtarget;
2950 : PREDICATELOCKTARGET *heaptarget;
2951 : Oid dbId;
2952 : Oid relId;
2953 : Oid heapId;
2954 : int i;
2955 : bool isIndex;
2956 : bool found;
2957 : uint32 heaptargettaghash;
2958 :
2959 : /*
2960 : * Bail out quickly if there are no serializable transactions running.
2961 : * It's safe to check this without taking locks because the caller is
2962 : * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2963 : * would matter here can be acquired while that is held.
2964 : */
2965 21852 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
2966 21803 : return;
2967 :
2968 71 : if (!PredicateLockingNeededForRelation(relation))
2969 22 : return;
2970 :
2971 49 : dbId = relation->rd_locator.dbOid;
2972 49 : relId = relation->rd_id;
2973 49 : if (relation->rd_index == NULL)
2974 : {
2975 0 : isIndex = false;
2976 0 : heapId = relId;
2977 : }
2978 : else
2979 : {
2980 49 : isIndex = true;
2981 49 : heapId = relation->rd_index->indrelid;
2982 : }
2983 : Assert(heapId != InvalidOid);
2984 : Assert(transfer || !isIndex); /* index OID only makes sense with
2985 : * transfer */
2986 :
2987 : /* Retrieve first time needed, then keep. */
2988 49 : heaptargettaghash = 0;
2989 49 : heaptarget = NULL;
2990 :
2991 : /* Acquire locks on all lock partitions */
2992 49 : LWLockAcquire(SerializablePredicateListLock, LW_EXCLUSIVE);
2993 833 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2994 784 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
2995 49 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2996 :
2997 : /*
2998 : * Remove the dummy entry to give us scratch space, so we know we'll be
2999 : * able to create the new lock target.
3000 : */
3001 49 : if (transfer)
3002 49 : RemoveScratchTarget(true);
3003 :
3004 : /* Scan through target map */
3005 49 : hash_seq_init(&seqstat, PredicateLockTargetHash);
3006 :
3007 93 : while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
3008 : {
3009 : dlist_mutable_iter iter;
3010 :
3011 : /*
3012 : * Check whether this is a target which needs attention.
3013 : */
3014 44 : if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != relId)
3015 44 : continue; /* wrong relation id */
3016 0 : if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
3017 0 : continue; /* wrong database id */
3018 0 : if (transfer && !isIndex
3019 0 : && GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == PREDLOCKTAG_RELATION)
3020 0 : continue; /* already the right lock */
3021 :
3022 : /*
3023 : * If we made it here, we have work to do. We make sure the heap
3024 : * relation lock exists, then we walk the list of predicate locks for
3025 : * the old target we found, moving all locks to the heap relation lock
3026 : * -- unless they already hold that.
3027 : */
3028 :
3029 : /*
3030 : * First make sure we have the heap relation target. We only need to
3031 : * do this once.
3032 : */
3033 0 : if (transfer && heaptarget == NULL)
3034 : {
3035 : PREDICATELOCKTARGETTAG heaptargettag;
3036 :
3037 0 : SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId);
3038 0 : heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag);
3039 0 : heaptarget = hash_search_with_hash_value(PredicateLockTargetHash,
3040 : &heaptargettag,
3041 : heaptargettaghash,
3042 : HASH_ENTER, &found);
3043 0 : if (!found)
3044 0 : dlist_init(&heaptarget->predicateLocks);
3045 : }
3046 :
3047 : /*
3048 : * Loop through all the locks on the old target, replacing them with
3049 : * locks on the new target.
3050 : */
3051 0 : dlist_foreach_modify(iter, &oldtarget->predicateLocks)
3052 : {
3053 0 : PREDICATELOCK *oldpredlock =
3054 0 : dlist_container(PREDICATELOCK, targetLink, iter.cur);
3055 : PREDICATELOCK *newpredlock;
3056 : SerCommitSeqNo oldCommitSeqNo;
3057 : SERIALIZABLEXACT *oldXact;
3058 :
3059 : /*
3060 : * Remove the old lock first. This avoids the chance of running
3061 : * out of lock structure entries for the hash table.
3062 : */
3063 0 : oldCommitSeqNo = oldpredlock->commitSeqNo;
3064 0 : oldXact = oldpredlock->tag.myXact;
3065 :
3066 0 : dlist_delete(&(oldpredlock->xactLink));
3067 :
3068 : /*
3069 : * No need for retail delete from oldtarget list, we're removing
3070 : * the whole target anyway.
3071 : */
3072 0 : hash_search(PredicateLockHash,
3073 0 : &oldpredlock->tag,
3074 : HASH_REMOVE, &found);
3075 : Assert(found);
3076 :
3077 0 : if (transfer)
3078 : {
3079 : PREDICATELOCKTAG newpredlocktag;
3080 :
3081 0 : newpredlocktag.myTarget = heaptarget;
3082 0 : newpredlocktag.myXact = oldXact;
3083 : newpredlock = (PREDICATELOCK *)
3084 0 : hash_search_with_hash_value(PredicateLockHash,
3085 : &newpredlocktag,
3086 0 : PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
3087 : heaptargettaghash),
3088 : HASH_ENTER,
3089 : &found);
3090 0 : if (!found)
3091 : {
3092 0 : dlist_push_tail(&(heaptarget->predicateLocks),
3093 : &(newpredlock->targetLink));
3094 0 : dlist_push_tail(&(newpredlocktag.myXact->predicateLocks),
3095 : &(newpredlock->xactLink));
3096 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
3097 : }
3098 : else
3099 : {
3100 0 : if (newpredlock->commitSeqNo < oldCommitSeqNo)
3101 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
3102 : }
3103 :
3104 : Assert(newpredlock->commitSeqNo != 0);
3105 : Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3106 : || (newpredlock->tag.myXact == OldCommittedSxact));
3107 : }
3108 : }
3109 :
3110 0 : hash_search(PredicateLockTargetHash, &oldtarget->tag, HASH_REMOVE,
3111 : &found);
3112 : Assert(found);
3113 : }
3114 :
3115 : /* Put the scratch entry back */
3116 49 : if (transfer)
3117 49 : RestoreScratchTarget(true);
3118 :
3119 : /* Release locks in reverse order */
3120 49 : LWLockRelease(SerializableXactHashLock);
3121 833 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3122 784 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
3123 49 : LWLockRelease(SerializablePredicateListLock);
3124 : }
3125 :
3126 : /*
3127 : * TransferPredicateLocksToHeapRelation
3128 : * For all transactions, transfer all predicate locks for the given
3129 : * relation to a single relation lock on the heap.
3130 : */
3131 : void
3132 21852 : TransferPredicateLocksToHeapRelation(Relation relation)
3133 : {
3134 21852 : DropAllPredicateLocksFromTable(relation, true);
3135 21852 : }
3136 :
3137 :
3138 : /*
3139 : * PredicateLockPageSplit
3140 : *
3141 : * Copies any predicate locks for the old page to the new page.
3142 : * Skip if this is a temporary table or toast table.
3143 : *
3144 : * NOTE: A page split (or overflow) affects all serializable transactions,
3145 : * even if it occurs in the context of another transaction isolation level.
3146 : *
3147 : * NOTE: This currently leaves the local copy of the locks without
3148 : * information on the new lock which is in shared memory. This could cause
3149 : * problems if enough page splits occur on locked pages without the processes
3150 : * which hold the locks getting in and noticing.
3151 : */
3152 : void
3153 36590 : PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
3154 : BlockNumber newblkno)
3155 : {
3156 : PREDICATELOCKTARGETTAG oldtargettag;
3157 : PREDICATELOCKTARGETTAG newtargettag;
3158 : bool success;
3159 :
3160 : /*
3161 : * Bail out quickly if there are no serializable transactions running.
3162 : *
3163 : * It's safe to do this check without taking any additional locks. Even if
3164 : * a serializable transaction starts concurrently, we know it can't take
3165 : * any SIREAD locks on the page being split because the caller is holding
3166 : * the associated buffer page lock. Memory reordering isn't an issue; the
3167 : * memory barrier in the LWLock acquisition guarantees that this read
3168 : * occurs while the buffer page lock is held.
3169 : */
3170 36590 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
3171 36589 : return;
3172 :
3173 17 : if (!PredicateLockingNeededForRelation(relation))
3174 16 : return;
3175 :
3176 : Assert(oldblkno != newblkno);
3177 : Assert(BlockNumberIsValid(oldblkno));
3178 : Assert(BlockNumberIsValid(newblkno));
3179 :
3180 1 : SET_PREDICATELOCKTARGETTAG_PAGE(oldtargettag,
3181 : relation->rd_locator.dbOid,
3182 : relation->rd_id,
3183 : oldblkno);
3184 1 : SET_PREDICATELOCKTARGETTAG_PAGE(newtargettag,
3185 : relation->rd_locator.dbOid,
3186 : relation->rd_id,
3187 : newblkno);
3188 :
3189 1 : LWLockAcquire(SerializablePredicateListLock, LW_EXCLUSIVE);
3190 :
3191 : /*
3192 : * Try copying the locks over to the new page's tag, creating it if
3193 : * necessary.
3194 : */
3195 1 : success = TransferPredicateLocksToNewTarget(oldtargettag,
3196 : newtargettag,
3197 : false);
3198 :
3199 1 : if (!success)
3200 : {
3201 : /*
3202 : * No more predicate lock entries are available. Failure isn't an
3203 : * option here, so promote the page lock to a relation lock.
3204 : */
3205 :
3206 : /* Get the parent relation lock's lock tag */
3207 0 : success = GetParentPredicateLockTag(&oldtargettag,
3208 : &newtargettag);
3209 : Assert(success);
3210 :
3211 : /*
3212 : * Move the locks to the parent. This shouldn't fail.
3213 : *
3214 : * Note that here we are removing locks held by other backends,
3215 : * leading to a possible inconsistency in their local lock hash table.
3216 : * This is OK because we're replacing it with a lock that covers the
3217 : * old one.
3218 : */
3219 0 : success = TransferPredicateLocksToNewTarget(oldtargettag,
3220 : newtargettag,
3221 : true);
3222 : Assert(success);
3223 : }
3224 :
3225 1 : LWLockRelease(SerializablePredicateListLock);
3226 : }
3227 :
3228 : /*
3229 : * PredicateLockPageCombine
3230 : *
3231 : * Combines predicate locks for two existing pages.
3232 : * Skip if this is a temporary table or toast table.
3233 : *
3234 : * NOTE: A page combine affects all serializable transactions, even if it
3235 : * occurs in the context of another transaction isolation level.
3236 : */
3237 : void
3238 3417 : PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
3239 : BlockNumber newblkno)
3240 : {
3241 : /*
3242 : * Page combines differ from page splits in that we ought to be able to
3243 : * remove the locks on the old page after transferring them to the new
3244 : * page, instead of duplicating them. However, because we can't edit other
3245 : * backends' local lock tables, removing the old lock would leave them
3246 : * with an entry in their LocalPredicateLockHash for a lock they're not
3247 : * holding, which isn't acceptable. So we wind up having to do the same
3248 : * work as a page split, acquiring a lock on the new page and keeping the
3249 : * old page locked too. That can lead to some false positives, but should
3250 : * be rare in practice.
3251 : */
3252 3417 : PredicateLockPageSplit(relation, oldblkno, newblkno);
3253 3417 : }
3254 :
3255 : /*
3256 : * Walk the list of in-progress serializable transactions and find the new
3257 : * xmin.
3258 : */
3259 : static void
3260 888 : SetNewSxactGlobalXmin(void)
3261 : {
3262 : dlist_iter iter;
3263 :
3264 : Assert(LWLockHeldByMe(SerializableXactHashLock));
3265 :
3266 888 : PredXact->SxactGlobalXmin = InvalidTransactionId;
3267 888 : PredXact->SxactGlobalXminCount = 0;
3268 :
3269 3355 : dlist_foreach(iter, &PredXact->activeList)
3270 : {
3271 2467 : SERIALIZABLEXACT *sxact =
3272 2467 : dlist_container(SERIALIZABLEXACT, xactLink, iter.cur);
3273 :
3274 2467 : if (!SxactIsRolledBack(sxact)
3275 2167 : && !SxactIsCommitted(sxact)
3276 19 : && sxact != OldCommittedSxact)
3277 : {
3278 : Assert(sxact->xmin != InvalidTransactionId);
3279 19 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)
3280 1 : || TransactionIdPrecedes(sxact->xmin,
3281 1 : PredXact->SxactGlobalXmin))
3282 : {
3283 18 : PredXact->SxactGlobalXmin = sxact->xmin;
3284 18 : PredXact->SxactGlobalXminCount = 1;
3285 : }
3286 1 : else if (TransactionIdEquals(sxact->xmin,
3287 : PredXact->SxactGlobalXmin))
3288 1 : PredXact->SxactGlobalXminCount++;
3289 : }
3290 : }
3291 :
3292 888 : SerialSetActiveSerXmin(PredXact->SxactGlobalXmin);
3293 888 : }
3294 :
3295 : /*
3296 : * ReleasePredicateLocks
3297 : *
3298 : * Releases predicate locks based on completion of the current transaction,
3299 : * whether committed or rolled back. It can also be called for a read only
3300 : * transaction when it becomes impossible for the transaction to become
3301 : * part of a dangerous structure.
3302 : *
3303 : * We do nothing unless this is a serializable transaction.
3304 : *
3305 : * This method must ensure that shared memory hash tables are cleaned
3306 : * up in some relatively timely fashion.
3307 : *
3308 : * If this transaction is committing and is holding any predicate locks,
3309 : * it must be added to a list of completed serializable transactions still
3310 : * holding locks.
3311 : *
3312 : * If isReadOnlySafe is true, then predicate locks are being released before
3313 : * the end of the transaction because MySerializableXact has been determined
3314 : * to be RO_SAFE. In non-parallel mode we can release it completely, but it
3315 : * in parallel mode we partially release the SERIALIZABLEXACT and keep it
3316 : * around until the end of the transaction, allowing each backend to clear its
3317 : * MySerializableXact variable and benefit from the optimization in its own
3318 : * time.
3319 : */
3320 : void
3321 617950 : ReleasePredicateLocks(bool isCommit, bool isReadOnlySafe)
3322 : {
3323 617950 : bool partiallyReleasing = false;
3324 : bool needToClear;
3325 : SERIALIZABLEXACT *roXact;
3326 : dlist_mutable_iter iter;
3327 :
3328 : /*
3329 : * We can't trust XactReadOnly here, because a transaction which started
3330 : * as READ WRITE can show as READ ONLY later, e.g., within
3331 : * subtransactions. We want to flag a transaction as READ ONLY if it
3332 : * commits without writing so that de facto READ ONLY transactions get the
3333 : * benefit of some RO optimizations, so we will use this local variable to
3334 : * get some cleanup logic right which is based on whether the transaction
3335 : * was declared READ ONLY at the top level.
3336 : */
3337 : bool topLevelIsDeclaredReadOnly;
3338 :
3339 : /* We can't be both committing and releasing early due to RO_SAFE. */
3340 : Assert(!(isCommit && isReadOnlySafe));
3341 :
3342 : /* Are we at the end of a transaction, that is, a commit or abort? */
3343 617950 : if (!isReadOnlySafe)
3344 : {
3345 : /*
3346 : * Parallel workers mustn't release predicate locks at the end of
3347 : * their transaction. The leader will do that at the end of its
3348 : * transaction.
3349 : */
3350 617915 : if (IsParallelWorker())
3351 : {
3352 6012 : ReleasePredicateLocksLocal();
3353 616375 : return;
3354 : }
3355 :
3356 : /*
3357 : * By the time the leader in a parallel query reaches end of
3358 : * transaction, it has waited for all workers to exit.
3359 : */
3360 : Assert(!ParallelContextActive());
3361 :
3362 : /*
3363 : * If the leader in a parallel query earlier stashed a partially
3364 : * released SERIALIZABLEXACT for final clean-up at end of transaction
3365 : * (because workers might still have been accessing it), then it's
3366 : * time to restore it.
3367 : */
3368 611903 : if (SavedSerializableXact != InvalidSerializableXact)
3369 : {
3370 : Assert(MySerializableXact == InvalidSerializableXact);
3371 1 : MySerializableXact = SavedSerializableXact;
3372 1 : SavedSerializableXact = InvalidSerializableXact;
3373 : Assert(SxactIsPartiallyReleased(MySerializableXact));
3374 : }
3375 : }
3376 :
3377 611938 : if (MySerializableXact == InvalidSerializableXact)
3378 : {
3379 : Assert(LocalPredicateLockHash == NULL);
3380 610362 : return;
3381 : }
3382 :
3383 1576 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
3384 :
3385 : /*
3386 : * If the transaction is committing, but it has been partially released
3387 : * already, then treat this as a roll back. It was marked as rolled back.
3388 : */
3389 1576 : if (isCommit && SxactIsPartiallyReleased(MySerializableXact))
3390 2 : isCommit = false;
3391 :
3392 : /*
3393 : * If we're called in the middle of a transaction because we discovered
3394 : * that the SXACT_FLAG_RO_SAFE flag was set, then we'll partially release
3395 : * it (that is, release the predicate locks and conflicts, but not the
3396 : * SERIALIZABLEXACT itself) if we're the first backend to have noticed.
3397 : */
3398 1576 : if (isReadOnlySafe && IsInParallelMode())
3399 : {
3400 : /*
3401 : * The leader needs to stash a pointer to it, so that it can
3402 : * completely release it at end-of-transaction.
3403 : */
3404 5 : if (!IsParallelWorker())
3405 1 : SavedSerializableXact = MySerializableXact;
3406 :
3407 : /*
3408 : * The first backend to reach this condition will partially release
3409 : * the SERIALIZABLEXACT. All others will just clear their
3410 : * backend-local state so that they stop doing SSI checks for the rest
3411 : * of the transaction.
3412 : */
3413 5 : if (SxactIsPartiallyReleased(MySerializableXact))
3414 : {
3415 3 : LWLockRelease(SerializableXactHashLock);
3416 3 : ReleasePredicateLocksLocal();
3417 1 : return;
3418 : }
3419 : else
3420 : {
3421 2 : MySerializableXact->flags |= SXACT_FLAG_PARTIALLY_RELEASED;
3422 2 : partiallyReleasing = true;
3423 : /* ... and proceed to perform the partial release below. */
3424 : }
3425 : }
3426 : Assert(!isCommit || SxactIsPrepared(MySerializableXact));
3427 : Assert(!isCommit || !SxactIsDoomed(MySerializableXact));
3428 : Assert(!SxactIsCommitted(MySerializableXact));
3429 : Assert(SxactIsPartiallyReleased(MySerializableXact)
3430 : || !SxactIsRolledBack(MySerializableXact));
3431 :
3432 : /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3433 : Assert(MySerializableXact->pid == 0 || IsolationIsSerializable());
3434 :
3435 : /* We'd better not already be on the cleanup list. */
3436 : Assert(!SxactIsOnFinishedList(MySerializableXact));
3437 :
3438 1573 : topLevelIsDeclaredReadOnly = SxactIsReadOnly(MySerializableXact);
3439 :
3440 : /*
3441 : * We don't hold XidGenLock lock here, assuming that TransactionId is
3442 : * atomic!
3443 : *
3444 : * If this value is changing, we don't care that much whether we get the
3445 : * old or new value -- it is just used to determine how far
3446 : * SxactGlobalXmin must advance before this transaction can be fully
3447 : * cleaned up. The worst that could happen is we wait for one more
3448 : * transaction to complete before freeing some RAM; correctness of visible
3449 : * behavior is not affected.
3450 : */
3451 1573 : MySerializableXact->finishedBefore = XidFromFullTransactionId(TransamVariables->nextXid);
3452 :
3453 : /*
3454 : * If it's not a commit it's either a rollback or a read-only transaction
3455 : * flagged SXACT_FLAG_RO_SAFE, and we can clear our locks immediately.
3456 : */
3457 1573 : if (isCommit)
3458 : {
3459 1242 : MySerializableXact->flags |= SXACT_FLAG_COMMITTED;
3460 1242 : MySerializableXact->commitSeqNo = ++(PredXact->LastSxactCommitSeqNo);
3461 : /* Recognize implicit read-only transaction (commit without write). */
3462 1242 : if (!MyXactDidWrite)
3463 237 : MySerializableXact->flags |= SXACT_FLAG_READ_ONLY;
3464 : }
3465 : else
3466 : {
3467 : /*
3468 : * The DOOMED flag indicates that we intend to roll back this
3469 : * transaction and so it should not cause serialization failures for
3470 : * other transactions that conflict with it. Note that this flag might
3471 : * already be set, if another backend marked this transaction for
3472 : * abort.
3473 : *
3474 : * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3475 : * has been called, and so the SerializableXact is eligible for
3476 : * cleanup. This means it should not be considered when calculating
3477 : * SxactGlobalXmin.
3478 : */
3479 331 : MySerializableXact->flags |= SXACT_FLAG_DOOMED;
3480 331 : MySerializableXact->flags |= SXACT_FLAG_ROLLED_BACK;
3481 :
3482 : /*
3483 : * If the transaction was previously prepared, but is now failing due
3484 : * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3485 : * prepare, clear the prepared flag. This simplifies conflict
3486 : * checking.
3487 : */
3488 331 : MySerializableXact->flags &= ~SXACT_FLAG_PREPARED;
3489 : }
3490 :
3491 1573 : if (!topLevelIsDeclaredReadOnly)
3492 : {
3493 : Assert(PredXact->WritableSxactCount > 0);
3494 1463 : if (--(PredXact->WritableSxactCount) == 0)
3495 : {
3496 : /*
3497 : * Release predicate locks and rw-conflicts in for all committed
3498 : * transactions. There are no longer any transactions which might
3499 : * conflict with the locks and no chance for new transactions to
3500 : * overlap. Similarly, existing conflicts in can't cause pivots,
3501 : * and any conflicts in which could have completed a dangerous
3502 : * structure would already have caused a rollback, so any
3503 : * remaining ones must be benign.
3504 : */
3505 880 : PredXact->CanPartialClearThrough = PredXact->LastSxactCommitSeqNo;
3506 : }
3507 : }
3508 : else
3509 : {
3510 : /*
3511 : * Read-only transactions: clear the list of transactions that might
3512 : * make us unsafe. Note that we use 'inLink' for the iteration as
3513 : * opposed to 'outLink' for the r/w xacts.
3514 : */
3515 152 : dlist_foreach_modify(iter, &MySerializableXact->possibleUnsafeConflicts)
3516 : {
3517 42 : RWConflict possibleUnsafeConflict =
3518 42 : dlist_container(RWConflictData, inLink, iter.cur);
3519 :
3520 : Assert(!SxactIsReadOnly(possibleUnsafeConflict->sxactOut));
3521 : Assert(MySerializableXact == possibleUnsafeConflict->sxactIn);
3522 :
3523 42 : ReleaseRWConflict(possibleUnsafeConflict);
3524 : }
3525 : }
3526 :
3527 : /* Check for conflict out to old committed transactions. */
3528 1573 : if (isCommit
3529 1242 : && !SxactIsReadOnly(MySerializableXact)
3530 1005 : && SxactHasSummaryConflictOut(MySerializableXact))
3531 : {
3532 : /*
3533 : * we don't know which old committed transaction we conflicted with,
3534 : * so be conservative and use FirstNormalSerCommitSeqNo here
3535 : */
3536 0 : MySerializableXact->SeqNo.earliestOutConflictCommit =
3537 : FirstNormalSerCommitSeqNo;
3538 0 : MySerializableXact->flags |= SXACT_FLAG_CONFLICT_OUT;
3539 : }
3540 :
3541 : /*
3542 : * Release all outConflicts to committed transactions. If we're rolling
3543 : * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3544 : * previously committed transactions.
3545 : */
3546 2260 : dlist_foreach_modify(iter, &MySerializableXact->outConflicts)
3547 : {
3548 687 : RWConflict conflict =
3549 : dlist_container(RWConflictData, outLink, iter.cur);
3550 :
3551 687 : if (isCommit
3552 455 : && !SxactIsReadOnly(MySerializableXact)
3553 347 : && SxactIsCommitted(conflict->sxactIn))
3554 : {
3555 96 : if ((MySerializableXact->flags & SXACT_FLAG_CONFLICT_OUT) == 0
3556 0 : || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3557 96 : MySerializableXact->SeqNo.earliestOutConflictCommit = conflict->sxactIn->prepareSeqNo;
3558 96 : MySerializableXact->flags |= SXACT_FLAG_CONFLICT_OUT;
3559 : }
3560 :
3561 687 : if (!isCommit
3562 455 : || SxactIsCommitted(conflict->sxactIn)
3563 337 : || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3564 350 : ReleaseRWConflict(conflict);
3565 : }
3566 :
3567 : /*
3568 : * Release all inConflicts from committed and read-only transactions. If
3569 : * we're rolling back, clear them all.
3570 : */
3571 2351 : dlist_foreach_modify(iter, &MySerializableXact->inConflicts)
3572 : {
3573 778 : RWConflict conflict =
3574 778 : dlist_container(RWConflictData, inLink, iter.cur);
3575 :
3576 778 : if (!isCommit
3577 601 : || SxactIsCommitted(conflict->sxactOut)
3578 416 : || SxactIsReadOnly(conflict->sxactOut))
3579 442 : ReleaseRWConflict(conflict);
3580 : }
3581 :
3582 1573 : if (!topLevelIsDeclaredReadOnly)
3583 : {
3584 : /*
3585 : * Remove ourselves from the list of possible conflicts for concurrent
3586 : * READ ONLY transactions, flagging them as unsafe if we have a
3587 : * conflict out. If any are waiting DEFERRABLE transactions, wake them
3588 : * up if they are known safe or known unsafe.
3589 : */
3590 1556 : dlist_foreach_modify(iter, &MySerializableXact->possibleUnsafeConflicts)
3591 : {
3592 93 : RWConflict possibleUnsafeConflict =
3593 : dlist_container(RWConflictData, outLink, iter.cur);
3594 :
3595 93 : roXact = possibleUnsafeConflict->sxactIn;
3596 : Assert(MySerializableXact == possibleUnsafeConflict->sxactOut);
3597 : Assert(SxactIsReadOnly(roXact));
3598 :
3599 : /* Mark conflicted if necessary. */
3600 93 : if (isCommit
3601 91 : && MyXactDidWrite
3602 86 : && SxactHasConflictOut(MySerializableXact)
3603 13 : && (MySerializableXact->SeqNo.earliestOutConflictCommit
3604 13 : <= roXact->SeqNo.lastCommitBeforeSnapshot))
3605 : {
3606 : /*
3607 : * This releases possibleUnsafeConflict (as well as all other
3608 : * possible conflicts for roXact)
3609 : */
3610 3 : FlagSxactUnsafe(roXact);
3611 : }
3612 : else
3613 : {
3614 90 : ReleaseRWConflict(possibleUnsafeConflict);
3615 :
3616 : /*
3617 : * If we were the last possible conflict, flag it safe. The
3618 : * transaction can now safely release its predicate locks (but
3619 : * that transaction's backend has to do that itself).
3620 : */
3621 90 : if (dlist_is_empty(&roXact->possibleUnsafeConflicts))
3622 67 : roXact->flags |= SXACT_FLAG_RO_SAFE;
3623 : }
3624 :
3625 : /*
3626 : * Wake up the process for a waiting DEFERRABLE transaction if we
3627 : * now know it's either safe or conflicted.
3628 : */
3629 93 : if (SxactIsDeferrableWaiting(roXact) &&
3630 4 : (SxactIsROUnsafe(roXact) || SxactIsROSafe(roXact)))
3631 3 : ProcSendSignal(roXact->pgprocno);
3632 : }
3633 : }
3634 :
3635 : /*
3636 : * Check whether it's time to clean up old transactions. This can only be
3637 : * done when the last serializable transaction with the oldest xmin among
3638 : * serializable transactions completes. We then find the "new oldest"
3639 : * xmin and purge any transactions which finished before this transaction
3640 : * was launched.
3641 : *
3642 : * For parallel queries in read-only transactions, it might run twice. We
3643 : * only release the reference on the first call.
3644 : */
3645 1573 : needToClear = false;
3646 1573 : if ((partiallyReleasing ||
3647 1571 : !SxactIsPartiallyReleased(MySerializableXact)) &&
3648 1571 : TransactionIdEquals(MySerializableXact->xmin,
3649 : PredXact->SxactGlobalXmin))
3650 : {
3651 : Assert(PredXact->SxactGlobalXminCount > 0);
3652 1551 : if (--(PredXact->SxactGlobalXminCount) == 0)
3653 : {
3654 888 : SetNewSxactGlobalXmin();
3655 888 : needToClear = true;
3656 : }
3657 : }
3658 :
3659 1573 : LWLockRelease(SerializableXactHashLock);
3660 :
3661 1573 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
3662 :
3663 : /* Add this to the list of transactions to check for later cleanup. */
3664 1573 : if (isCommit)
3665 1242 : dlist_push_tail(FinishedSerializableTransactions,
3666 1242 : &MySerializableXact->finishedLink);
3667 :
3668 : /*
3669 : * If we're releasing a RO_SAFE transaction in parallel mode, we'll only
3670 : * partially release it. That's necessary because other backends may have
3671 : * a reference to it. The leader will release the SERIALIZABLEXACT itself
3672 : * at the end of the transaction after workers have stopped running.
3673 : */
3674 1573 : if (!isCommit)
3675 331 : ReleaseOneSerializableXact(MySerializableXact,
3676 331 : isReadOnlySafe && IsInParallelMode(),
3677 331 : false);
3678 :
3679 1573 : LWLockRelease(SerializableFinishedListLock);
3680 :
3681 1573 : if (needToClear)
3682 888 : ClearOldPredicateLocks();
3683 :
3684 1573 : ReleasePredicateLocksLocal();
3685 : }
3686 :
3687 : static void
3688 7588 : ReleasePredicateLocksLocal(void)
3689 : {
3690 7588 : MySerializableXact = InvalidSerializableXact;
3691 7588 : MyXactDidWrite = false;
3692 :
3693 : /* Delete per-transaction lock table */
3694 7588 : if (LocalPredicateLockHash != NULL)
3695 : {
3696 1572 : hash_destroy(LocalPredicateLockHash);
3697 1572 : LocalPredicateLockHash = NULL;
3698 : }
3699 7588 : }
3700 :
3701 : /*
3702 : * Clear old predicate locks, belonging to committed transactions that are no
3703 : * longer interesting to any in-progress transaction.
3704 : */
3705 : static void
3706 888 : ClearOldPredicateLocks(void)
3707 : {
3708 : dlist_mutable_iter iter;
3709 :
3710 : /*
3711 : * Loop through finished transactions. They are in commit order, so we can
3712 : * stop as soon as we find one that's still interesting.
3713 : */
3714 888 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
3715 888 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3716 2138 : dlist_foreach_modify(iter, FinishedSerializableTransactions)
3717 : {
3718 1259 : SERIALIZABLEXACT *finishedSxact =
3719 1259 : dlist_container(SERIALIZABLEXACT, finishedLink, iter.cur);
3720 :
3721 1259 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)
3722 28 : || TransactionIdPrecedesOrEquals(finishedSxact->finishedBefore,
3723 28 : PredXact->SxactGlobalXmin))
3724 : {
3725 : /*
3726 : * This transaction committed before any in-progress transaction
3727 : * took its snapshot. It's no longer interesting.
3728 : */
3729 1242 : LWLockRelease(SerializableXactHashLock);
3730 1242 : dlist_delete_thoroughly(&finishedSxact->finishedLink);
3731 1242 : ReleaseOneSerializableXact(finishedSxact, false, false);
3732 1242 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3733 : }
3734 17 : else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3735 17 : && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3736 : {
3737 : /*
3738 : * Any active transactions that took their snapshot before this
3739 : * transaction committed are read-only, so we can clear part of
3740 : * its state.
3741 : */
3742 8 : LWLockRelease(SerializableXactHashLock);
3743 :
3744 8 : if (SxactIsReadOnly(finishedSxact))
3745 : {
3746 : /* A read-only transaction can be removed entirely */
3747 0 : dlist_delete_thoroughly(&(finishedSxact->finishedLink));
3748 0 : ReleaseOneSerializableXact(finishedSxact, false, false);
3749 : }
3750 : else
3751 : {
3752 : /*
3753 : * A read-write transaction can only be partially cleared. We
3754 : * need to keep the SERIALIZABLEXACT but can release the
3755 : * SIREAD locks and conflicts in.
3756 : */
3757 8 : ReleaseOneSerializableXact(finishedSxact, true, false);
3758 : }
3759 :
3760 8 : PredXact->HavePartialClearedThrough = finishedSxact->commitSeqNo;
3761 8 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3762 : }
3763 : else
3764 : {
3765 : /* Still interesting. */
3766 : break;
3767 : }
3768 : }
3769 888 : LWLockRelease(SerializableXactHashLock);
3770 :
3771 : /*
3772 : * Loop through predicate locks on dummy transaction for summarized data.
3773 : */
3774 888 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
3775 888 : dlist_foreach_modify(iter, &OldCommittedSxact->predicateLocks)
3776 : {
3777 0 : PREDICATELOCK *predlock =
3778 0 : dlist_container(PREDICATELOCK, xactLink, iter.cur);
3779 : bool canDoPartialCleanup;
3780 :
3781 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3782 : Assert(predlock->commitSeqNo != 0);
3783 : Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3784 0 : canDoPartialCleanup = (predlock->commitSeqNo <= PredXact->CanPartialClearThrough);
3785 0 : LWLockRelease(SerializableXactHashLock);
3786 :
3787 : /*
3788 : * If this lock originally belonged to an old enough transaction, we
3789 : * can release it.
3790 : */
3791 0 : if (canDoPartialCleanup)
3792 : {
3793 : PREDICATELOCKTAG tag;
3794 : PREDICATELOCKTARGET *target;
3795 : PREDICATELOCKTARGETTAG targettag;
3796 : uint32 targettaghash;
3797 : LWLock *partitionLock;
3798 :
3799 0 : tag = predlock->tag;
3800 0 : target = tag.myTarget;
3801 0 : targettag = target->tag;
3802 0 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
3803 0 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
3804 :
3805 0 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
3806 :
3807 0 : dlist_delete(&(predlock->targetLink));
3808 0 : dlist_delete(&(predlock->xactLink));
3809 :
3810 0 : hash_search_with_hash_value(PredicateLockHash, &tag,
3811 0 : PredicateLockHashCodeFromTargetHashCode(&tag,
3812 : targettaghash),
3813 : HASH_REMOVE, NULL);
3814 0 : RemoveTargetIfNoLongerUsed(target, targettaghash);
3815 :
3816 0 : LWLockRelease(partitionLock);
3817 : }
3818 : }
3819 :
3820 888 : LWLockRelease(SerializablePredicateListLock);
3821 888 : LWLockRelease(SerializableFinishedListLock);
3822 888 : }
3823 :
3824 : /*
3825 : * This is the normal way to delete anything from any of the predicate
3826 : * locking hash tables. Given a transaction which we know can be deleted:
3827 : * delete all predicate locks held by that transaction and any predicate
3828 : * lock targets which are now unreferenced by a lock; delete all conflicts
3829 : * for the transaction; delete all xid values for the transaction; then
3830 : * delete the transaction.
3831 : *
3832 : * When the partial flag is set, we can release all predicate locks and
3833 : * in-conflict information -- we've established that there are no longer
3834 : * any overlapping read write transactions for which this transaction could
3835 : * matter -- but keep the transaction entry itself and any outConflicts.
3836 : *
3837 : * When the summarize flag is set, we've run short of room for sxact data
3838 : * and must summarize to the SLRU. Predicate locks are transferred to a
3839 : * dummy "old" transaction, with duplicate locks on a single target
3840 : * collapsing to a single lock with the "latest" commitSeqNo from among
3841 : * the conflicting locks..
3842 : */
3843 : static void
3844 1581 : ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
3845 : bool summarize)
3846 : {
3847 : SERIALIZABLEXIDTAG sxidtag;
3848 : dlist_mutable_iter iter;
3849 :
3850 : Assert(sxact != NULL);
3851 : Assert(SxactIsRolledBack(sxact) || SxactIsCommitted(sxact));
3852 : Assert(partial || !SxactIsOnFinishedList(sxact));
3853 : Assert(LWLockHeldByMe(SerializableFinishedListLock));
3854 :
3855 : /*
3856 : * First release all the predicate locks held by this xact (or transfer
3857 : * them to OldCommittedSxact if summarize is true)
3858 : */
3859 1581 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
3860 1581 : if (IsInParallelMode())
3861 3 : LWLockAcquire(&sxact->perXactPredicateListLock, LW_EXCLUSIVE);
3862 4444 : dlist_foreach_modify(iter, &sxact->predicateLocks)
3863 : {
3864 2863 : PREDICATELOCK *predlock =
3865 2863 : dlist_container(PREDICATELOCK, xactLink, iter.cur);
3866 : PREDICATELOCKTAG tag;
3867 : PREDICATELOCKTARGET *target;
3868 : PREDICATELOCKTARGETTAG targettag;
3869 : uint32 targettaghash;
3870 : LWLock *partitionLock;
3871 :
3872 2863 : tag = predlock->tag;
3873 2863 : target = tag.myTarget;
3874 2863 : targettag = target->tag;
3875 2863 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
3876 2863 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
3877 :
3878 2863 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
3879 :
3880 2863 : dlist_delete(&predlock->targetLink);
3881 :
3882 2863 : hash_search_with_hash_value(PredicateLockHash, &tag,
3883 2863 : PredicateLockHashCodeFromTargetHashCode(&tag,
3884 : targettaghash),
3885 : HASH_REMOVE, NULL);
3886 2863 : if (summarize)
3887 : {
3888 : bool found;
3889 :
3890 : /* Fold into dummy transaction list. */
3891 0 : tag.myXact = OldCommittedSxact;
3892 0 : predlock = hash_search_with_hash_value(PredicateLockHash, &tag,
3893 0 : PredicateLockHashCodeFromTargetHashCode(&tag,
3894 : targettaghash),
3895 : HASH_ENTER_NULL, &found);
3896 0 : if (!predlock)
3897 0 : ereport(ERROR,
3898 : (errcode(ERRCODE_OUT_OF_MEMORY),
3899 : errmsg("out of shared memory"),
3900 : errhint("You might need to increase \"%s\".", "max_pred_locks_per_transaction")));
3901 0 : if (found)
3902 : {
3903 : Assert(predlock->commitSeqNo != 0);
3904 : Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3905 0 : if (predlock->commitSeqNo < sxact->commitSeqNo)
3906 0 : predlock->commitSeqNo = sxact->commitSeqNo;
3907 : }
3908 : else
3909 : {
3910 0 : dlist_push_tail(&target->predicateLocks,
3911 : &predlock->targetLink);
3912 0 : dlist_push_tail(&OldCommittedSxact->predicateLocks,
3913 : &predlock->xactLink);
3914 0 : predlock->commitSeqNo = sxact->commitSeqNo;
3915 : }
3916 : }
3917 : else
3918 2863 : RemoveTargetIfNoLongerUsed(target, targettaghash);
3919 :
3920 2863 : LWLockRelease(partitionLock);
3921 : }
3922 :
3923 : /*
3924 : * Rather than retail removal, just re-init the head after we've run
3925 : * through the list.
3926 : */
3927 1581 : dlist_init(&sxact->predicateLocks);
3928 :
3929 1581 : if (IsInParallelMode())
3930 3 : LWLockRelease(&sxact->perXactPredicateListLock);
3931 1581 : LWLockRelease(SerializablePredicateListLock);
3932 :
3933 1581 : sxidtag.xid = sxact->topXid;
3934 1581 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
3935 :
3936 : /* Release all outConflicts (unless 'partial' is true) */
3937 1581 : if (!partial)
3938 : {
3939 1571 : dlist_foreach_modify(iter, &sxact->outConflicts)
3940 : {
3941 0 : RWConflict conflict =
3942 : dlist_container(RWConflictData, outLink, iter.cur);
3943 :
3944 0 : if (summarize)
3945 0 : conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3946 0 : ReleaseRWConflict(conflict);
3947 : }
3948 : }
3949 :
3950 : /* Release all inConflicts. */
3951 1581 : dlist_foreach_modify(iter, &sxact->inConflicts)
3952 : {
3953 0 : RWConflict conflict =
3954 0 : dlist_container(RWConflictData, inLink, iter.cur);
3955 :
3956 0 : if (summarize)
3957 0 : conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3958 0 : ReleaseRWConflict(conflict);
3959 : }
3960 :
3961 : /* Finally, get rid of the xid and the record of the transaction itself. */
3962 1581 : if (!partial)
3963 : {
3964 1571 : if (sxidtag.xid != InvalidTransactionId)
3965 1298 : hash_search(SerializableXidHash, &sxidtag, HASH_REMOVE, NULL);
3966 1571 : ReleasePredXact(sxact);
3967 : }
3968 :
3969 1581 : LWLockRelease(SerializableXactHashLock);
3970 1581 : }
3971 :
3972 : /*
3973 : * Tests whether the given top level transaction is concurrent with
3974 : * (overlaps) our current transaction.
3975 : *
3976 : * We need to identify the top level transaction for SSI, anyway, so pass
3977 : * that to this function to save the overhead of checking the snapshot's
3978 : * subxip array.
3979 : */
3980 : static bool
3981 536 : XidIsConcurrent(TransactionId xid)
3982 : {
3983 : Snapshot snap;
3984 :
3985 : Assert(TransactionIdIsValid(xid));
3986 : Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));
3987 :
3988 536 : snap = GetTransactionSnapshot();
3989 :
3990 536 : if (TransactionIdPrecedes(xid, snap->xmin))
3991 0 : return false;
3992 :
3993 536 : if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3994 524 : return true;
3995 :
3996 12 : return pg_lfind32(xid, snap->xip, snap->xcnt);
3997 : }
3998 :
3999 : bool
4000 43922343 : CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
4001 : {
4002 43922343 : if (!SerializationNeededForRead(relation, snapshot))
4003 43896305 : return false;
4004 :
4005 : /* Check if someone else has already decided that we need to die */
4006 26038 : if (SxactIsDoomed(MySerializableXact))
4007 : {
4008 0 : ereport(ERROR,
4009 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4010 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4011 : errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4012 : errhint("The transaction might succeed if retried.")));
4013 : }
4014 :
4015 26038 : return true;
4016 : }
4017 :
4018 : /*
4019 : * CheckForSerializableConflictOut
4020 : * A table AM is reading a tuple that has been modified. If it determines
4021 : * that the tuple version it is reading is not visible to us, it should
4022 : * pass in the top level xid of the transaction that created it.
4023 : * Otherwise, if it determines that it is visible to us but it has been
4024 : * deleted or there is a newer version available due to an update, it
4025 : * should pass in the top level xid of the modifying transaction.
4026 : *
4027 : * This function will check for overlap with our own transaction. If the given
4028 : * xid is also serializable and the transactions overlap (i.e., they cannot see
4029 : * each other's writes), then we have a conflict out.
4030 : */
4031 : void
4032 566 : CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
4033 : {
4034 : SERIALIZABLEXIDTAG sxidtag;
4035 : SERIALIZABLEXID *sxid;
4036 : SERIALIZABLEXACT *sxact;
4037 :
4038 566 : if (!SerializationNeededForRead(relation, snapshot))
4039 199 : return;
4040 :
4041 : /* Check if someone else has already decided that we need to die */
4042 566 : if (SxactIsDoomed(MySerializableXact))
4043 : {
4044 0 : ereport(ERROR,
4045 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4046 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4047 : errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
4048 : errhint("The transaction might succeed if retried.")));
4049 : }
4050 : Assert(TransactionIdIsValid(xid));
4051 :
4052 566 : if (TransactionIdEquals(xid, GetTopTransactionIdIfAny()))
4053 0 : return;
4054 :
4055 : /*
4056 : * Find sxact or summarized info for the top level xid.
4057 : */
4058 566 : sxidtag.xid = xid;
4059 566 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4060 : sxid = (SERIALIZABLEXID *)
4061 566 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
4062 566 : if (!sxid)
4063 : {
4064 : /*
4065 : * Transaction not found in "normal" SSI structures. Check whether it
4066 : * got pushed out to SLRU storage for "old committed" transactions.
4067 : */
4068 : SerCommitSeqNo conflictCommitSeqNo;
4069 :
4070 20 : conflictCommitSeqNo = SerialGetMinConflictCommitSeqNo(xid);
4071 20 : if (conflictCommitSeqNo != 0)
4072 : {
4073 0 : if (conflictCommitSeqNo != InvalidSerCommitSeqNo
4074 0 : && (!SxactIsReadOnly(MySerializableXact)
4075 0 : || conflictCommitSeqNo
4076 0 : <= MySerializableXact->SeqNo.lastCommitBeforeSnapshot))
4077 0 : ereport(ERROR,
4078 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4079 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4080 : errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4081 : errhint("The transaction might succeed if retried.")));
4082 :
4083 0 : if (SxactHasSummaryConflictIn(MySerializableXact)
4084 0 : || !dlist_is_empty(&MySerializableXact->inConflicts))
4085 0 : ereport(ERROR,
4086 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4087 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4088 : errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4089 : errhint("The transaction might succeed if retried.")));
4090 :
4091 0 : MySerializableXact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4092 : }
4093 :
4094 : /* It's not serializable or otherwise not important. */
4095 20 : LWLockRelease(SerializableXactHashLock);
4096 20 : return;
4097 : }
4098 546 : sxact = sxid->myXact;
4099 : Assert(TransactionIdEquals(sxact->topXid, xid));
4100 546 : if (sxact == MySerializableXact || SxactIsDoomed(sxact))
4101 : {
4102 : /* Can't conflict with ourself or a transaction that will roll back. */
4103 4 : LWLockRelease(SerializableXactHashLock);
4104 4 : return;
4105 : }
4106 :
4107 : /*
4108 : * We have a conflict out to a transaction which has a conflict out to a
4109 : * summarized transaction. That summarized transaction must have
4110 : * committed first, and we can't tell when it committed in relation to our
4111 : * snapshot acquisition, so something needs to be canceled.
4112 : */
4113 542 : if (SxactHasSummaryConflictOut(sxact))
4114 : {
4115 0 : if (!SxactIsPrepared(sxact))
4116 : {
4117 0 : sxact->flags |= SXACT_FLAG_DOOMED;
4118 0 : LWLockRelease(SerializableXactHashLock);
4119 0 : return;
4120 : }
4121 : else
4122 : {
4123 0 : LWLockRelease(SerializableXactHashLock);
4124 0 : ereport(ERROR,
4125 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4126 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4127 : errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4128 : errhint("The transaction might succeed if retried.")));
4129 : }
4130 : }
4131 :
4132 : /*
4133 : * If this is a read-only transaction and the writing transaction has
4134 : * committed, and it doesn't have a rw-conflict to a transaction which
4135 : * committed before it, no conflict.
4136 : */
4137 542 : if (SxactIsReadOnly(MySerializableXact)
4138 119 : && SxactIsCommitted(sxact)
4139 8 : && !SxactHasSummaryConflictOut(sxact)
4140 8 : && (!SxactHasConflictOut(sxact)
4141 2 : || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4142 : {
4143 : /* Read-only transaction will appear to run first. No conflict. */
4144 6 : LWLockRelease(SerializableXactHashLock);
4145 6 : return;
4146 : }
4147 :
4148 536 : if (!XidIsConcurrent(xid))
4149 : {
4150 : /* This write was already in our snapshot; no conflict. */
4151 0 : LWLockRelease(SerializableXactHashLock);
4152 0 : return;
4153 : }
4154 :
4155 536 : if (RWConflictExists(MySerializableXact, sxact))
4156 : {
4157 : /* We don't want duplicate conflict records in the list. */
4158 169 : LWLockRelease(SerializableXactHashLock);
4159 169 : return;
4160 : }
4161 :
4162 : /*
4163 : * Flag the conflict. But first, if this conflict creates a dangerous
4164 : * structure, ereport an error.
4165 : */
4166 367 : FlagRWConflict(MySerializableXact, sxact);
4167 354 : LWLockRelease(SerializableXactHashLock);
4168 : }
4169 :
4170 : /*
4171 : * Check a particular target for rw-dependency conflict in. A subroutine of
4172 : * CheckForSerializableConflictIn().
4173 : */
4174 : static void
4175 7567 : CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
4176 : {
4177 : uint32 targettaghash;
4178 : LWLock *partitionLock;
4179 : PREDICATELOCKTARGET *target;
4180 7567 : PREDICATELOCK *mypredlock = NULL;
4181 : PREDICATELOCKTAG mypredlocktag;
4182 : dlist_mutable_iter iter;
4183 :
4184 : Assert(MySerializableXact != InvalidSerializableXact);
4185 :
4186 : /*
4187 : * The same hash and LW lock apply to the lock target and the lock itself.
4188 : */
4189 7567 : targettaghash = PredicateLockTargetTagHashCode(targettag);
4190 7567 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
4191 7567 : LWLockAcquire(partitionLock, LW_SHARED);
4192 : target = (PREDICATELOCKTARGET *)
4193 7567 : hash_search_with_hash_value(PredicateLockTargetHash,
4194 : targettag, targettaghash,
4195 : HASH_FIND, NULL);
4196 7567 : if (!target)
4197 : {
4198 : /* Nothing has this target locked; we're done here. */
4199 5680 : LWLockRelease(partitionLock);
4200 5680 : return;
4201 : }
4202 :
4203 : /*
4204 : * Each lock for an overlapping transaction represents a conflict: a
4205 : * rw-dependency in to this transaction.
4206 : */
4207 1887 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4208 :
4209 4254 : dlist_foreach_modify(iter, &target->predicateLocks)
4210 : {
4211 2434 : PREDICATELOCK *predlock =
4212 2434 : dlist_container(PREDICATELOCK, targetLink, iter.cur);
4213 2434 : SERIALIZABLEXACT *sxact = predlock->tag.myXact;
4214 :
4215 2434 : if (sxact == MySerializableXact)
4216 : {
4217 : /*
4218 : * If we're getting a write lock on a tuple, we don't need a
4219 : * predicate (SIREAD) lock on the same tuple. We can safely remove
4220 : * our SIREAD lock, but we'll defer doing so until after the loop
4221 : * because that requires upgrading to an exclusive partition lock.
4222 : *
4223 : * We can't use this optimization within a subtransaction because
4224 : * the subtransaction could roll back, and we would be left
4225 : * without any lock at the top level.
4226 : */
4227 1576 : if (!IsSubTransaction()
4228 1576 : && GET_PREDICATELOCKTARGETTAG_OFFSET(*targettag))
4229 : {
4230 396 : mypredlock = predlock;
4231 396 : mypredlocktag = predlock->tag;
4232 : }
4233 : }
4234 858 : else if (!SxactIsDoomed(sxact)
4235 858 : && (!SxactIsCommitted(sxact)
4236 85 : || TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
4237 : sxact->finishedBefore))
4238 849 : && !RWConflictExists(sxact, MySerializableXact))
4239 : {
4240 505 : LWLockRelease(SerializableXactHashLock);
4241 505 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4242 :
4243 : /*
4244 : * Re-check after getting exclusive lock because the other
4245 : * transaction may have flagged a conflict.
4246 : */
4247 505 : if (!SxactIsDoomed(sxact)
4248 505 : && (!SxactIsCommitted(sxact)
4249 75 : || TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
4250 : sxact->finishedBefore))
4251 505 : && !RWConflictExists(sxact, MySerializableXact))
4252 : {
4253 505 : FlagRWConflict(sxact, MySerializableXact);
4254 : }
4255 :
4256 438 : LWLockRelease(SerializableXactHashLock);
4257 438 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4258 : }
4259 : }
4260 1820 : LWLockRelease(SerializableXactHashLock);
4261 1820 : LWLockRelease(partitionLock);
4262 :
4263 : /*
4264 : * If we found one of our own SIREAD locks to remove, remove it now.
4265 : *
4266 : * At this point our transaction already has a RowExclusiveLock on the
4267 : * relation, so we are OK to drop the predicate lock on the tuple, if
4268 : * found, without fearing that another write against the tuple will occur
4269 : * before the MVCC information makes it to the buffer.
4270 : */
4271 1820 : if (mypredlock != NULL)
4272 : {
4273 : uint32 predlockhashcode;
4274 : PREDICATELOCK *rmpredlock;
4275 :
4276 389 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
4277 389 : if (IsInParallelMode())
4278 0 : LWLockAcquire(&MySerializableXact->perXactPredicateListLock, LW_EXCLUSIVE);
4279 389 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
4280 389 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4281 :
4282 : /*
4283 : * Remove the predicate lock from shared memory, if it wasn't removed
4284 : * while the locks were released. One way that could happen is from
4285 : * autovacuum cleaning up an index.
4286 : */
4287 389 : predlockhashcode = PredicateLockHashCodeFromTargetHashCode
4288 : (&mypredlocktag, targettaghash);
4289 : rmpredlock = (PREDICATELOCK *)
4290 389 : hash_search_with_hash_value(PredicateLockHash,
4291 : &mypredlocktag,
4292 : predlockhashcode,
4293 : HASH_FIND, NULL);
4294 389 : if (rmpredlock != NULL)
4295 : {
4296 : Assert(rmpredlock == mypredlock);
4297 :
4298 389 : dlist_delete(&(mypredlock->targetLink));
4299 389 : dlist_delete(&(mypredlock->xactLink));
4300 :
4301 : rmpredlock = (PREDICATELOCK *)
4302 389 : hash_search_with_hash_value(PredicateLockHash,
4303 : &mypredlocktag,
4304 : predlockhashcode,
4305 : HASH_REMOVE, NULL);
4306 : Assert(rmpredlock == mypredlock);
4307 :
4308 389 : RemoveTargetIfNoLongerUsed(target, targettaghash);
4309 : }
4310 :
4311 389 : LWLockRelease(SerializableXactHashLock);
4312 389 : LWLockRelease(partitionLock);
4313 389 : if (IsInParallelMode())
4314 0 : LWLockRelease(&MySerializableXact->perXactPredicateListLock);
4315 389 : LWLockRelease(SerializablePredicateListLock);
4316 :
4317 389 : if (rmpredlock != NULL)
4318 : {
4319 : /*
4320 : * Remove entry in local lock table if it exists. It's OK if it
4321 : * doesn't exist; that means the lock was transferred to a new
4322 : * target by a different backend.
4323 : */
4324 389 : hash_search_with_hash_value(LocalPredicateLockHash,
4325 : targettag, targettaghash,
4326 : HASH_REMOVE, NULL);
4327 :
4328 389 : DecrementParentLocks(targettag);
4329 : }
4330 : }
4331 : }
4332 :
4333 : /*
4334 : * CheckForSerializableConflictIn
4335 : * We are writing the given tuple. If that indicates a rw-conflict
4336 : * in from another serializable transaction, take appropriate action.
4337 : *
4338 : * Skip checking for any granularity for which a parameter is missing.
4339 : *
4340 : * A tuple update or delete is in conflict if we have a predicate lock
4341 : * against the relation or page in which the tuple exists, or against the
4342 : * tuple itself.
4343 : */
4344 : void
4345 20516618 : CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
4346 : {
4347 : PREDICATELOCKTARGETTAG targettag;
4348 :
4349 20516618 : if (!SerializationNeededForWrite(relation))
4350 20512138 : return;
4351 :
4352 : /* Check if someone else has already decided that we need to die */
4353 4480 : if (SxactIsDoomed(MySerializableXact))
4354 1 : ereport(ERROR,
4355 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4356 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4357 : errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4358 : errhint("The transaction might succeed if retried.")));
4359 :
4360 : /*
4361 : * We're doing a write which might cause rw-conflicts now or later.
4362 : * Memorize that fact.
4363 : */
4364 4479 : MyXactDidWrite = true;
4365 :
4366 : /*
4367 : * It is important that we check for locks from the finest granularity to
4368 : * the coarsest granularity, so that granularity promotion doesn't cause
4369 : * us to miss a lock. The new (coarser) lock will be acquired before the
4370 : * old (finer) locks are released.
4371 : *
4372 : * It is not possible to take and hold a lock across the checks for all
4373 : * granularities because each target could be in a separate partition.
4374 : */
4375 4479 : if (tid != NULL)
4376 : {
4377 652 : SET_PREDICATELOCKTARGETTAG_TUPLE(targettag,
4378 : relation->rd_locator.dbOid,
4379 : relation->rd_id,
4380 : ItemPointerGetBlockNumber(tid),
4381 : ItemPointerGetOffsetNumber(tid));
4382 652 : CheckTargetForConflictsIn(&targettag);
4383 : }
4384 :
4385 4456 : if (blkno != InvalidBlockNumber)
4386 : {
4387 2489 : SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
4388 : relation->rd_locator.dbOid,
4389 : relation->rd_id,
4390 : blkno);
4391 2489 : CheckTargetForConflictsIn(&targettag);
4392 : }
4393 :
4394 4426 : SET_PREDICATELOCKTARGETTAG_RELATION(targettag,
4395 : relation->rd_locator.dbOid,
4396 : relation->rd_id);
4397 4426 : CheckTargetForConflictsIn(&targettag);
4398 : }
4399 :
4400 : /*
4401 : * CheckTableForSerializableConflictIn
4402 : * The entire table is going through a DDL-style logical mass delete
4403 : * like TRUNCATE or DROP TABLE. If that causes a rw-conflict in from
4404 : * another serializable transaction, take appropriate action.
4405 : *
4406 : * While these operations do not operate entirely within the bounds of
4407 : * snapshot isolation, they can occur inside a serializable transaction, and
4408 : * will logically occur after any reads which saw rows which were destroyed
4409 : * by these operations, so we do what we can to serialize properly under
4410 : * SSI.
4411 : *
4412 : * The relation passed in must be a heap relation. Any predicate lock of any
4413 : * granularity on the heap will cause a rw-conflict in to this transaction.
4414 : * Predicate locks on indexes do not matter because they only exist to guard
4415 : * against conflicting inserts into the index, and this is a mass *delete*.
4416 : * When a table is truncated or dropped, the index will also be truncated
4417 : * or dropped, and we'll deal with locks on the index when that happens.
4418 : *
4419 : * Dropping or truncating a table also needs to drop any existing predicate
4420 : * locks on heap tuples or pages, because they're about to go away. This
4421 : * should be done before altering the predicate locks because the transaction
4422 : * could be rolled back because of a conflict, in which case the lock changes
4423 : * are not needed. (At the moment, we don't actually bother to drop the
4424 : * existing locks on a dropped or truncated table at the moment. That might
4425 : * lead to some false positives, but it doesn't seem worth the trouble.)
4426 : */
4427 : void
4428 34175 : CheckTableForSerializableConflictIn(Relation relation)
4429 : {
4430 : HASH_SEQ_STATUS seqstat;
4431 : PREDICATELOCKTARGET *target;
4432 : Oid dbId;
4433 : Oid heapId;
4434 : int i;
4435 :
4436 : /*
4437 : * Bail out quickly if there are no serializable transactions running.
4438 : * It's safe to check this without taking locks because the caller is
4439 : * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4440 : * would matter here can be acquired while that is held.
4441 : */
4442 34175 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
4443 34155 : return;
4444 :
4445 167 : if (!SerializationNeededForWrite(relation))
4446 147 : return;
4447 :
4448 : /*
4449 : * We're doing a write which might cause rw-conflicts now or later.
4450 : * Memorize that fact.
4451 : */
4452 20 : MyXactDidWrite = true;
4453 :
4454 : Assert(relation->rd_index == NULL); /* not an index relation */
4455 :
4456 20 : dbId = relation->rd_locator.dbOid;
4457 20 : heapId = relation->rd_id;
4458 :
4459 20 : LWLockAcquire(SerializablePredicateListLock, LW_EXCLUSIVE);
4460 340 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4461 320 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
4462 20 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4463 :
4464 : /* Scan through target list */
4465 20 : hash_seq_init(&seqstat, PredicateLockTargetHash);
4466 :
4467 70 : while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4468 : {
4469 : dlist_mutable_iter iter;
4470 :
4471 : /*
4472 : * Check whether this is a target which needs attention.
4473 : */
4474 50 : if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId)
4475 41 : continue; /* wrong relation id */
4476 9 : if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4477 0 : continue; /* wrong database id */
4478 :
4479 : /*
4480 : * Loop through locks for this target and flag conflicts.
4481 : */
4482 18 : dlist_foreach_modify(iter, &target->predicateLocks)
4483 : {
4484 9 : PREDICATELOCK *predlock =
4485 9 : dlist_container(PREDICATELOCK, targetLink, iter.cur);
4486 :
4487 9 : if (predlock->tag.myXact != MySerializableXact
4488 0 : && !RWConflictExists(predlock->tag.myXact, MySerializableXact))
4489 : {
4490 0 : FlagRWConflict(predlock->tag.myXact, MySerializableXact);
4491 : }
4492 : }
4493 : }
4494 :
4495 : /* Release locks in reverse order */
4496 20 : LWLockRelease(SerializableXactHashLock);
4497 340 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4498 320 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
4499 20 : LWLockRelease(SerializablePredicateListLock);
4500 : }
4501 :
4502 :
4503 : /*
4504 : * Flag a rw-dependency between two serializable transactions.
4505 : *
4506 : * The caller is responsible for ensuring that we have a LW lock on
4507 : * the transaction hash table.
4508 : */
4509 : static void
4510 872 : FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
4511 : {
4512 : Assert(reader != writer);
4513 :
4514 : /* First, see if this conflict causes failure. */
4515 872 : OnConflict_CheckForSerializationFailure(reader, writer);
4516 :
4517 : /* Actually do the conflict flagging. */
4518 792 : if (reader == OldCommittedSxact)
4519 0 : writer->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
4520 792 : else if (writer == OldCommittedSxact)
4521 0 : reader->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4522 : else
4523 792 : SetRWConflict(reader, writer);
4524 792 : }
4525 :
4526 : /*----------------------------------------------------------------------------
4527 : * We are about to add a RW-edge to the dependency graph - check that we don't
4528 : * introduce a dangerous structure by doing so, and abort one of the
4529 : * transactions if so.
4530 : *
4531 : * A serialization failure can only occur if there is a dangerous structure
4532 : * in the dependency graph:
4533 : *
4534 : * Tin ------> Tpivot ------> Tout
4535 : * rw rw
4536 : *
4537 : * Furthermore, Tout must commit first.
4538 : *
4539 : * One more optimization is that if Tin is declared READ ONLY (or commits
4540 : * without writing), we can only have a problem if Tout committed before Tin
4541 : * acquired its snapshot.
4542 : *----------------------------------------------------------------------------
4543 : */
4544 : static void
4545 872 : OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
4546 : SERIALIZABLEXACT *writer)
4547 : {
4548 : bool failure;
4549 :
4550 : Assert(LWLockHeldByMe(SerializableXactHashLock));
4551 :
4552 872 : failure = false;
4553 :
4554 : /*------------------------------------------------------------------------
4555 : * Check for already-committed writer with rw-conflict out flagged
4556 : * (conflict-flag on W means that T2 committed before W):
4557 : *
4558 : * R ------> W ------> T2
4559 : * rw rw
4560 : *
4561 : * That is a dangerous structure, so we must abort. (Since the writer
4562 : * has already committed, we must be the reader)
4563 : *------------------------------------------------------------------------
4564 : */
4565 872 : if (SxactIsCommitted(writer)
4566 18 : && (SxactHasConflictOut(writer) || SxactHasSummaryConflictOut(writer)))
4567 2 : failure = true;
4568 :
4569 : /*------------------------------------------------------------------------
4570 : * Check whether the writer has become a pivot with an out-conflict
4571 : * committed transaction (T2), and T2 committed first:
4572 : *
4573 : * R ------> W ------> T2
4574 : * rw rw
4575 : *
4576 : * Because T2 must've committed first, there is no anomaly if:
4577 : * - the reader committed before T2
4578 : * - the writer committed before T2
4579 : * - the reader is a READ ONLY transaction and the reader was concurrent
4580 : * with T2 (= reader acquired its snapshot before T2 committed)
4581 : *
4582 : * We also handle the case that T2 is prepared but not yet committed
4583 : * here. In that case T2 has already checked for conflicts, so if it
4584 : * commits first, making the above conflict real, it's too late for it
4585 : * to abort.
4586 : *------------------------------------------------------------------------
4587 : */
4588 872 : if (!failure && SxactHasSummaryConflictOut(writer))
4589 0 : failure = true;
4590 872 : else if (!failure)
4591 : {
4592 : dlist_iter iter;
4593 :
4594 1087 : dlist_foreach(iter, &writer->outConflicts)
4595 : {
4596 292 : RWConflict conflict =
4597 : dlist_container(RWConflictData, outLink, iter.cur);
4598 292 : SERIALIZABLEXACT *t2 = conflict->sxactIn;
4599 :
4600 292 : if (SxactIsPrepared(t2)
4601 81 : && (!SxactIsCommitted(reader)
4602 64 : || t2->prepareSeqNo <= reader->commitSeqNo)
4603 81 : && (!SxactIsCommitted(writer)
4604 0 : || t2->prepareSeqNo <= writer->commitSeqNo)
4605 81 : && (!SxactIsReadOnly(reader)
4606 12 : || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4607 : {
4608 75 : failure = true;
4609 75 : break;
4610 : }
4611 : }
4612 : }
4613 :
4614 : /*------------------------------------------------------------------------
4615 : * Check whether the reader has become a pivot with a writer
4616 : * that's committed (or prepared):
4617 : *
4618 : * T0 ------> R ------> W
4619 : * rw rw
4620 : *
4621 : * Because W must've committed first for an anomaly to occur, there is no
4622 : * anomaly if:
4623 : * - T0 committed before the writer
4624 : * - T0 is READ ONLY, and overlaps the writer
4625 : *------------------------------------------------------------------------
4626 : */
4627 872 : if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4628 : {
4629 18 : if (SxactHasSummaryConflictIn(reader))
4630 : {
4631 0 : failure = true;
4632 : }
4633 : else
4634 : {
4635 : dlist_iter iter;
4636 :
4637 : /*
4638 : * The unconstify is needed as we have no const version of
4639 : * dlist_foreach().
4640 : */
4641 18 : dlist_foreach(iter, &unconstify(SERIALIZABLEXACT *, reader)->inConflicts)
4642 : {
4643 11 : const RWConflict conflict =
4644 11 : dlist_container(RWConflictData, inLink, iter.cur);
4645 11 : const SERIALIZABLEXACT *t0 = conflict->sxactOut;
4646 :
4647 11 : if (!SxactIsDoomed(t0)
4648 11 : && (!SxactIsCommitted(t0)
4649 11 : || t0->commitSeqNo >= writer->prepareSeqNo)
4650 11 : && (!SxactIsReadOnly(t0)
4651 0 : || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4652 : {
4653 11 : failure = true;
4654 11 : break;
4655 : }
4656 : }
4657 : }
4658 : }
4659 :
4660 872 : if (failure)
4661 : {
4662 : /*
4663 : * We have to kill a transaction to avoid a possible anomaly from
4664 : * occurring. If the writer is us, we can just ereport() to cause a
4665 : * transaction abort. Otherwise we flag the writer for termination,
4666 : * causing it to abort when it tries to commit. However, if the writer
4667 : * is a prepared transaction, already prepared, we can't abort it
4668 : * anymore, so we have to kill the reader instead.
4669 : */
4670 88 : if (MySerializableXact == writer)
4671 : {
4672 67 : LWLockRelease(SerializableXactHashLock);
4673 67 : ereport(ERROR,
4674 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4675 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4676 : errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4677 : errhint("The transaction might succeed if retried.")));
4678 : }
4679 21 : else if (SxactIsPrepared(writer))
4680 : {
4681 13 : LWLockRelease(SerializableXactHashLock);
4682 :
4683 : /* if we're not the writer, we have to be the reader */
4684 : Assert(MySerializableXact == reader);
4685 13 : ereport(ERROR,
4686 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4687 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4688 : errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4689 : errhint("The transaction might succeed if retried.")));
4690 : }
4691 8 : writer->flags |= SXACT_FLAG_DOOMED;
4692 : }
4693 792 : }
4694 :
4695 : /*
4696 : * PreCommit_CheckForSerializationFailure
4697 : * Check for dangerous structures in a serializable transaction
4698 : * at commit.
4699 : *
4700 : * We're checking for a dangerous structure as each conflict is recorded.
4701 : * The only way we could have a problem at commit is if this is the "out"
4702 : * side of a pivot, and neither the "in" side nor the pivot has yet
4703 : * committed.
4704 : *
4705 : * If a dangerous structure is found, the pivot (the near conflict) is
4706 : * marked for death, because rolling back another transaction might mean
4707 : * that we fail without ever making progress. This transaction is
4708 : * committing writes, so letting it commit ensures progress. If we
4709 : * canceled the far conflict, it might immediately fail again on retry.
4710 : */
4711 : void
4712 581494 : PreCommit_CheckForSerializationFailure(void)
4713 : {
4714 : dlist_iter near_iter;
4715 :
4716 581494 : if (MySerializableXact == InvalidSerializableXact)
4717 580088 : return;
4718 :
4719 : Assert(IsolationIsSerializable());
4720 :
4721 1406 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4722 :
4723 : /*
4724 : * Check if someone else has already decided that we need to die. Since
4725 : * we set our own DOOMED flag when partially releasing, ignore in that
4726 : * case.
4727 : */
4728 1406 : if (SxactIsDoomed(MySerializableXact) &&
4729 156 : !SxactIsPartiallyReleased(MySerializableXact))
4730 : {
4731 155 : LWLockRelease(SerializableXactHashLock);
4732 155 : ereport(ERROR,
4733 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4734 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4735 : errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4736 : errhint("The transaction might succeed if retried.")));
4737 : }
4738 :
4739 1857 : dlist_foreach(near_iter, &MySerializableXact->inConflicts)
4740 : {
4741 606 : RWConflict nearConflict =
4742 606 : dlist_container(RWConflictData, inLink, near_iter.cur);
4743 :
4744 606 : if (!SxactIsCommitted(nearConflict->sxactOut)
4745 421 : && !SxactIsDoomed(nearConflict->sxactOut))
4746 : {
4747 : dlist_iter far_iter;
4748 :
4749 451 : dlist_foreach(far_iter, &nearConflict->sxactOut->inConflicts)
4750 : {
4751 182 : RWConflict farConflict =
4752 182 : dlist_container(RWConflictData, inLink, far_iter.cur);
4753 :
4754 182 : if (farConflict->sxactOut == MySerializableXact
4755 42 : || (!SxactIsCommitted(farConflict->sxactOut)
4756 24 : && !SxactIsReadOnly(farConflict->sxactOut)
4757 12 : && !SxactIsDoomed(farConflict->sxactOut)))
4758 : {
4759 : /*
4760 : * Normally, we kill the pivot transaction to make sure we
4761 : * make progress if the failing transaction is retried.
4762 : * However, we can't kill it if it's already prepared, so
4763 : * in that case we commit suicide instead.
4764 : */
4765 152 : if (SxactIsPrepared(nearConflict->sxactOut))
4766 : {
4767 0 : LWLockRelease(SerializableXactHashLock);
4768 0 : ereport(ERROR,
4769 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4770 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4771 : errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4772 : errhint("The transaction might succeed if retried.")));
4773 : }
4774 152 : nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4775 152 : break;
4776 : }
4777 : }
4778 : }
4779 : }
4780 :
4781 1251 : MySerializableXact->prepareSeqNo = ++(PredXact->LastSxactCommitSeqNo);
4782 1251 : MySerializableXact->flags |= SXACT_FLAG_PREPARED;
4783 :
4784 1251 : LWLockRelease(SerializableXactHashLock);
4785 : }
4786 :
4787 : /*------------------------------------------------------------------------*/
4788 :
4789 : /*
4790 : * Two-phase commit support
4791 : */
4792 :
4793 : /*
4794 : * AtPrepare_Locks
4795 : * Do the preparatory work for a PREPARE: make 2PC state file
4796 : * records for all predicate locks currently held.
4797 : */
4798 : void
4799 318 : AtPrepare_PredicateLocks(void)
4800 : {
4801 : SERIALIZABLEXACT *sxact;
4802 : TwoPhasePredicateRecord record;
4803 : TwoPhasePredicateXactRecord *xactRecord;
4804 : TwoPhasePredicateLockRecord *lockRecord;
4805 : dlist_iter iter;
4806 :
4807 318 : sxact = MySerializableXact;
4808 318 : xactRecord = &(record.data.xactRecord);
4809 318 : lockRecord = &(record.data.lockRecord);
4810 :
4811 318 : if (MySerializableXact == InvalidSerializableXact)
4812 306 : return;
4813 :
4814 : /* Generate an xact record for our SERIALIZABLEXACT */
4815 12 : record.type = TWOPHASEPREDICATERECORD_XACT;
4816 12 : xactRecord->xmin = MySerializableXact->xmin;
4817 12 : xactRecord->flags = MySerializableXact->flags;
4818 :
4819 : /*
4820 : * Note that we don't include the list of conflicts in our out in the
4821 : * statefile, because new conflicts can be added even after the
4822 : * transaction prepares. We'll just make a conservative assumption during
4823 : * recovery instead.
4824 : */
4825 :
4826 12 : RegisterTwoPhaseRecord(TWOPHASE_RM_PREDICATELOCK_ID, 0,
4827 : &record, sizeof(record));
4828 :
4829 : /*
4830 : * Generate a lock record for each lock.
4831 : *
4832 : * To do this, we need to walk the predicate lock list in our sxact rather
4833 : * than using the local predicate lock table because the latter is not
4834 : * guaranteed to be accurate.
4835 : */
4836 12 : LWLockAcquire(SerializablePredicateListLock, LW_SHARED);
4837 :
4838 : /*
4839 : * No need to take sxact->perXactPredicateListLock in parallel mode
4840 : * because there cannot be any parallel workers running while we are
4841 : * preparing a transaction.
4842 : */
4843 : Assert(!IsParallelWorker() && !ParallelContextActive());
4844 :
4845 22 : dlist_foreach(iter, &sxact->predicateLocks)
4846 : {
4847 10 : PREDICATELOCK *predlock =
4848 10 : dlist_container(PREDICATELOCK, xactLink, iter.cur);
4849 :
4850 10 : record.type = TWOPHASEPREDICATERECORD_LOCK;
4851 10 : lockRecord->target = predlock->tag.myTarget->tag;
4852 :
4853 10 : RegisterTwoPhaseRecord(TWOPHASE_RM_PREDICATELOCK_ID, 0,
4854 : &record, sizeof(record));
4855 : }
4856 :
4857 12 : LWLockRelease(SerializablePredicateListLock);
4858 : }
4859 :
4860 : /*
4861 : * PostPrepare_Locks
4862 : * Clean up after successful PREPARE. Unlike the non-predicate
4863 : * lock manager, we do not need to transfer locks to a dummy
4864 : * PGPROC because our SERIALIZABLEXACT will stay around
4865 : * anyway. We only need to clean up our local state.
4866 : */
4867 : void
4868 318 : PostPrepare_PredicateLocks(FullTransactionId fxid)
4869 : {
4870 318 : if (MySerializableXact == InvalidSerializableXact)
4871 306 : return;
4872 :
4873 : Assert(SxactIsPrepared(MySerializableXact));
4874 :
4875 12 : MySerializableXact->pid = 0;
4876 12 : MySerializableXact->pgprocno = INVALID_PROC_NUMBER;
4877 :
4878 12 : hash_destroy(LocalPredicateLockHash);
4879 12 : LocalPredicateLockHash = NULL;
4880 :
4881 12 : MySerializableXact = InvalidSerializableXact;
4882 12 : MyXactDidWrite = false;
4883 : }
4884 :
4885 : /*
4886 : * PredicateLockTwoPhaseFinish
4887 : * Release a prepared transaction's predicate locks once it
4888 : * commits or aborts.
4889 : */
4890 : void
4891 325 : PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
4892 : {
4893 : SERIALIZABLEXID *sxid;
4894 : SERIALIZABLEXIDTAG sxidtag;
4895 :
4896 325 : sxidtag.xid = XidFromFullTransactionId(fxid);
4897 :
4898 325 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4899 : sxid = (SERIALIZABLEXID *)
4900 325 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
4901 325 : LWLockRelease(SerializableXactHashLock);
4902 :
4903 : /* xid will not be found if it wasn't a serializable transaction */
4904 325 : if (sxid == NULL)
4905 313 : return;
4906 :
4907 : /* Release its locks */
4908 12 : MySerializableXact = sxid->myXact;
4909 12 : MyXactDidWrite = true; /* conservatively assume that we wrote
4910 : * something */
4911 12 : ReleasePredicateLocks(isCommit, false);
4912 : }
4913 :
4914 : /*
4915 : * Re-acquire a predicate lock belonging to a transaction that was prepared.
4916 : */
4917 : void
4918 0 : predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
4919 : void *recdata, uint32 len)
4920 : {
4921 : TwoPhasePredicateRecord *record;
4922 0 : TransactionId xid = XidFromFullTransactionId(fxid);
4923 :
4924 : Assert(len == sizeof(TwoPhasePredicateRecord));
4925 :
4926 0 : record = (TwoPhasePredicateRecord *) recdata;
4927 :
4928 : Assert((record->type == TWOPHASEPREDICATERECORD_XACT) ||
4929 : (record->type == TWOPHASEPREDICATERECORD_LOCK));
4930 :
4931 0 : if (record->type == TWOPHASEPREDICATERECORD_XACT)
4932 : {
4933 : /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4934 : TwoPhasePredicateXactRecord *xactRecord;
4935 : SERIALIZABLEXACT *sxact;
4936 : SERIALIZABLEXID *sxid;
4937 : SERIALIZABLEXIDTAG sxidtag;
4938 : bool found;
4939 :
4940 0 : xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4941 :
4942 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4943 0 : sxact = CreatePredXact();
4944 0 : if (!sxact)
4945 0 : ereport(ERROR,
4946 : (errcode(ERRCODE_OUT_OF_MEMORY),
4947 : errmsg("out of shared memory")));
4948 :
4949 : /* vxid for a prepared xact is INVALID_PROC_NUMBER/xid; no pid */
4950 0 : sxact->vxid.procNumber = INVALID_PROC_NUMBER;
4951 0 : sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4952 0 : sxact->pid = 0;
4953 0 : sxact->pgprocno = INVALID_PROC_NUMBER;
4954 :
4955 : /* a prepared xact hasn't committed yet */
4956 0 : sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4957 0 : sxact->commitSeqNo = InvalidSerCommitSeqNo;
4958 0 : sxact->finishedBefore = InvalidTransactionId;
4959 :
4960 0 : sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4961 :
4962 : /*
4963 : * Don't need to track this; no transactions running at the time the
4964 : * recovered xact started are still active, except possibly other
4965 : * prepared xacts and we don't care whether those are RO_SAFE or not.
4966 : */
4967 0 : dlist_init(&(sxact->possibleUnsafeConflicts));
4968 :
4969 0 : dlist_init(&(sxact->predicateLocks));
4970 0 : dlist_node_init(&sxact->finishedLink);
4971 :
4972 0 : sxact->topXid = xid;
4973 0 : sxact->xmin = xactRecord->xmin;
4974 0 : sxact->flags = xactRecord->flags;
4975 : Assert(SxactIsPrepared(sxact));
4976 0 : if (!SxactIsReadOnly(sxact))
4977 : {
4978 0 : ++(PredXact->WritableSxactCount);
4979 : Assert(PredXact->WritableSxactCount <=
4980 : (MaxBackends + max_prepared_xacts));
4981 : }
4982 :
4983 : /*
4984 : * We don't know whether the transaction had any conflicts or not, so
4985 : * we'll conservatively assume that it had both a conflict in and a
4986 : * conflict out, and represent that with the summary conflict flags.
4987 : */
4988 0 : dlist_init(&(sxact->outConflicts));
4989 0 : dlist_init(&(sxact->inConflicts));
4990 0 : sxact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
4991 0 : sxact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4992 :
4993 : /* Register the transaction's xid */
4994 0 : sxidtag.xid = xid;
4995 0 : sxid = (SERIALIZABLEXID *) hash_search(SerializableXidHash,
4996 : &sxidtag,
4997 : HASH_ENTER, &found);
4998 : Assert(sxid != NULL);
4999 : Assert(!found);
5000 0 : sxid->myXact = sxact;
5001 :
5002 : /*
5003 : * Update global xmin. Note that this is a special case compared to
5004 : * registering a normal transaction, because the global xmin might go
5005 : * backwards. That's OK, because until recovery is over we're not
5006 : * going to complete any transactions or create any non-prepared
5007 : * transactions, so there's no danger of throwing away.
5008 : */
5009 0 : if ((!TransactionIdIsValid(PredXact->SxactGlobalXmin)) ||
5010 0 : (TransactionIdFollows(PredXact->SxactGlobalXmin, sxact->xmin)))
5011 : {
5012 0 : PredXact->SxactGlobalXmin = sxact->xmin;
5013 0 : PredXact->SxactGlobalXminCount = 1;
5014 0 : SerialSetActiveSerXmin(sxact->xmin);
5015 : }
5016 0 : else if (TransactionIdEquals(sxact->xmin, PredXact->SxactGlobalXmin))
5017 : {
5018 : Assert(PredXact->SxactGlobalXminCount > 0);
5019 0 : PredXact->SxactGlobalXminCount++;
5020 : }
5021 :
5022 0 : LWLockRelease(SerializableXactHashLock);
5023 : }
5024 0 : else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
5025 : {
5026 : /* Lock record. Recreate the PREDICATELOCK */
5027 : TwoPhasePredicateLockRecord *lockRecord;
5028 : SERIALIZABLEXID *sxid;
5029 : SERIALIZABLEXACT *sxact;
5030 : SERIALIZABLEXIDTAG sxidtag;
5031 : uint32 targettaghash;
5032 :
5033 0 : lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
5034 0 : targettaghash = PredicateLockTargetTagHashCode(&lockRecord->target);
5035 :
5036 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
5037 0 : sxidtag.xid = xid;
5038 : sxid = (SERIALIZABLEXID *)
5039 0 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
5040 0 : LWLockRelease(SerializableXactHashLock);
5041 :
5042 : Assert(sxid != NULL);
5043 0 : sxact = sxid->myXact;
5044 : Assert(sxact != InvalidSerializableXact);
5045 :
5046 0 : CreatePredicateLock(&lockRecord->target, targettaghash, sxact);
5047 : }
5048 0 : }
5049 :
5050 : /*
5051 : * Prepare to share the current SERIALIZABLEXACT with parallel workers.
5052 : * Return a handle object that can be used by AttachSerializableXact() in a
5053 : * parallel worker.
5054 : */
5055 : SerializableXactHandle
5056 676 : ShareSerializableXact(void)
5057 : {
5058 676 : return MySerializableXact;
5059 : }
5060 :
5061 : /*
5062 : * Allow parallel workers to import the leader's SERIALIZABLEXACT.
5063 : */
5064 : void
5065 2004 : AttachSerializableXact(SerializableXactHandle handle)
5066 : {
5067 :
5068 : Assert(MySerializableXact == InvalidSerializableXact);
5069 :
5070 2004 : MySerializableXact = (SERIALIZABLEXACT *) handle;
5071 2004 : if (MySerializableXact != InvalidSerializableXact)
5072 13 : CreateLocalPredicateLockHash();
5073 2004 : }
|