Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished its
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "storage/proc.h"
84 : #include "storage/proclist.h"
85 : #include "storage/procnumber.h"
86 : #include "storage/spin.h"
87 : #include "utils/memutils.h"
88 :
89 : #ifdef LWLOCK_STATS
90 : #include "utils/hsearch.h"
91 : #endif
92 :
93 :
94 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
95 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 30)
96 : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
97 : #define LW_FLAG_BITS 3
98 : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
99 :
100 : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101 : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
102 : #define LW_VAL_SHARED 1
103 :
104 : /* already (power of 2)-1, i.e. suitable for a mask */
105 : #define LW_SHARED_MASK MAX_BACKENDS
106 : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
107 :
108 :
109 : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110 : "MAX_BACKENDS + 1 needs to be a power of 2");
111 :
112 : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113 : "MAX_BACKENDS and LW_FLAG_MASK overlap");
114 :
115 : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116 : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
117 :
118 : /*
119 : * There are three sorts of LWLock "tranches":
120 : *
121 : * 1. The individually-named locks defined in lwlocklist.h each have their
122 : * own tranche. We absorb the names of these tranches from there into
123 : * BuiltinTrancheNames here.
124 : *
125 : * 2. There are some predefined tranches for built-in groups of locks.
126 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
127 : * appear in BuiltinTrancheNames[] below.
128 : *
129 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
130 : * or LWLockRegisterTranche. The names of these that are known in the current
131 : * process appear in LWLockTrancheNames[].
132 : *
133 : * All these names are user-visible as wait event names, so choose with care
134 : * ... and do not forget to update the documentation's list of wait events.
135 : */
136 : static const char *const BuiltinTrancheNames[] = {
137 : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
138 : #include "storage/lwlocklist.h"
139 : #undef PG_LWLOCK
140 : [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
141 : [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
142 : [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
143 : [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
144 : [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
145 : [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
146 : [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
147 : [LWTRANCHE_WAL_INSERT] = "WALInsert",
148 : [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
149 : [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
150 : [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
151 : [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
152 : [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
153 : [LWTRANCHE_LOCK_MANAGER] = "LockManager",
154 : [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
155 : [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
156 : [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
157 : [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
158 : [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
159 : [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
160 : [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
161 : [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
162 : [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
163 : [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
164 : [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
165 : [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
166 : [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
167 : [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
168 : [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
169 : [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
170 : [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
171 : [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
172 : [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
173 : [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
174 : [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
175 : [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
176 : [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
177 : [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
178 : [LWTRANCHE_XACT_SLRU] = "XactSLRU",
179 : [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
180 : [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
181 : };
182 :
183 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
184 : LWTRANCHE_FIRST_USER_DEFINED,
185 : "missing entries in BuiltinTrancheNames[]");
186 :
187 : /*
188 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
189 : * stores the names of all dynamically-created tranches known to the current
190 : * process. Any unused entries in the array will contain NULL.
191 : */
192 : static const char **LWLockTrancheNames = NULL;
193 : static int LWLockTrancheNamesAllocated = 0;
194 :
195 : /*
196 : * This points to the main array of LWLocks in shared memory. Backends inherit
197 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
198 : * where we have special measures to pass it down).
199 : */
200 : LWLockPadded *MainLWLockArray = NULL;
201 :
202 : /*
203 : * We use this structure to keep track of locked LWLocks for release
204 : * during error recovery. Normally, only a few will be held at once, but
205 : * occasionally the number can be much higher; for example, the pg_buffercache
206 : * extension locks all buffer partitions simultaneously.
207 : */
208 : #define MAX_SIMUL_LWLOCKS 200
209 :
210 : /* struct representing the LWLocks we're holding */
211 : typedef struct LWLockHandle
212 : {
213 : LWLock *lock;
214 : LWLockMode mode;
215 : } LWLockHandle;
216 :
217 : static int num_held_lwlocks = 0;
218 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
219 :
220 : /* struct representing the LWLock tranche request for named tranche */
221 : typedef struct NamedLWLockTrancheRequest
222 : {
223 : char tranche_name[NAMEDATALEN];
224 : int num_lwlocks;
225 : } NamedLWLockTrancheRequest;
226 :
227 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
228 : static int NamedLWLockTrancheRequestsAllocated = 0;
229 :
230 : /*
231 : * NamedLWLockTrancheRequests is both the valid length of the request array,
232 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
233 : * This variable and NamedLWLockTrancheArray are non-static so that
234 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
235 : */
236 : int NamedLWLockTrancheRequests = 0;
237 :
238 : /* points to data in shared memory: */
239 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
240 :
241 : static void InitializeLWLocks(void);
242 : static inline void LWLockReportWaitStart(LWLock *lock);
243 : static inline void LWLockReportWaitEnd(void);
244 : static const char *GetLWTrancheName(uint16 trancheId);
245 :
246 : #define T_NAME(lock) \
247 : GetLWTrancheName((lock)->tranche)
248 :
249 : #ifdef LWLOCK_STATS
250 : typedef struct lwlock_stats_key
251 : {
252 : int tranche;
253 : void *instance;
254 : } lwlock_stats_key;
255 :
256 : typedef struct lwlock_stats
257 : {
258 : lwlock_stats_key key;
259 : int sh_acquire_count;
260 : int ex_acquire_count;
261 : int block_count;
262 : int dequeue_self_count;
263 : int spin_delay_count;
264 : } lwlock_stats;
265 :
266 : static HTAB *lwlock_stats_htab;
267 : static lwlock_stats lwlock_stats_dummy;
268 : #endif
269 :
270 : #ifdef LOCK_DEBUG
271 : bool Trace_lwlocks = false;
272 :
273 : inline static void
274 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
275 : {
276 : /* hide statement & context here, otherwise the log is just too verbose */
277 : if (Trace_lwlocks)
278 : {
279 : uint32 state = pg_atomic_read_u32(&lock->state);
280 :
281 : ereport(LOG,
282 : (errhidestmt(true),
283 : errhidecontext(true),
284 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
285 : MyProcPid,
286 : where, T_NAME(lock), lock,
287 : (state & LW_VAL_EXCLUSIVE) != 0,
288 : state & LW_SHARED_MASK,
289 : (state & LW_FLAG_HAS_WAITERS) != 0,
290 : pg_atomic_read_u32(&lock->nwaiters),
291 : (state & LW_FLAG_RELEASE_OK) != 0)));
292 : }
293 : }
294 :
295 : inline static void
296 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
297 : {
298 : /* hide statement & context here, otherwise the log is just too verbose */
299 : if (Trace_lwlocks)
300 : {
301 : ereport(LOG,
302 : (errhidestmt(true),
303 : errhidecontext(true),
304 : errmsg_internal("%s(%s %p): %s", where,
305 : T_NAME(lock), lock, msg)));
306 : }
307 : }
308 :
309 : #else /* not LOCK_DEBUG */
310 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
311 : #define LOG_LWDEBUG(a,b,c) ((void)0)
312 : #endif /* LOCK_DEBUG */
313 :
314 : #ifdef LWLOCK_STATS
315 :
316 : static void init_lwlock_stats(void);
317 : static void print_lwlock_stats(int code, Datum arg);
318 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
319 :
320 : static void
321 : init_lwlock_stats(void)
322 : {
323 : HASHCTL ctl;
324 : static MemoryContext lwlock_stats_cxt = NULL;
325 : static bool exit_registered = false;
326 :
327 : if (lwlock_stats_cxt != NULL)
328 : MemoryContextDelete(lwlock_stats_cxt);
329 :
330 : /*
331 : * The LWLock stats will be updated within a critical section, which
332 : * requires allocating new hash entries. Allocations within a critical
333 : * section are normally not allowed because running out of memory would
334 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
335 : * turned on in production, so that's an acceptable risk. The hash entries
336 : * are small, so the risk of running out of memory is minimal in practice.
337 : */
338 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
339 : "LWLock stats",
340 : ALLOCSET_DEFAULT_SIZES);
341 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
342 :
343 : ctl.keysize = sizeof(lwlock_stats_key);
344 : ctl.entrysize = sizeof(lwlock_stats);
345 : ctl.hcxt = lwlock_stats_cxt;
346 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
347 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
348 : if (!exit_registered)
349 : {
350 : on_shmem_exit(print_lwlock_stats, 0);
351 : exit_registered = true;
352 : }
353 : }
354 :
355 : static void
356 : print_lwlock_stats(int code, Datum arg)
357 : {
358 : HASH_SEQ_STATUS scan;
359 : lwlock_stats *lwstats;
360 :
361 : hash_seq_init(&scan, lwlock_stats_htab);
362 :
363 : /* Grab an LWLock to keep different backends from mixing reports */
364 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
365 :
366 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
367 : {
368 : fprintf(stderr,
369 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
370 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
371 : lwstats->key.instance, lwstats->sh_acquire_count,
372 : lwstats->ex_acquire_count, lwstats->block_count,
373 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
374 : }
375 :
376 : LWLockRelease(&MainLWLockArray[0].lock);
377 : }
378 :
379 : static lwlock_stats *
380 : get_lwlock_stats_entry(LWLock *lock)
381 : {
382 : lwlock_stats_key key;
383 : lwlock_stats *lwstats;
384 : bool found;
385 :
386 : /*
387 : * During shared memory initialization, the hash table doesn't exist yet.
388 : * Stats of that phase aren't very interesting, so just collect operations
389 : * on all locks in a single dummy entry.
390 : */
391 : if (lwlock_stats_htab == NULL)
392 : return &lwlock_stats_dummy;
393 :
394 : /* Fetch or create the entry. */
395 : MemSet(&key, 0, sizeof(key));
396 : key.tranche = lock->tranche;
397 : key.instance = lock;
398 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
399 : if (!found)
400 : {
401 : lwstats->sh_acquire_count = 0;
402 : lwstats->ex_acquire_count = 0;
403 : lwstats->block_count = 0;
404 : lwstats->dequeue_self_count = 0;
405 : lwstats->spin_delay_count = 0;
406 : }
407 : return lwstats;
408 : }
409 : #endif /* LWLOCK_STATS */
410 :
411 :
412 : /*
413 : * Compute number of LWLocks required by named tranches. These will be
414 : * allocated in the main array.
415 : */
416 : static int
417 7858 : NumLWLocksForNamedTranches(void)
418 : {
419 7858 : int numLocks = 0;
420 : int i;
421 :
422 7914 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
423 56 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
424 :
425 7858 : return numLocks;
426 : }
427 :
428 : /*
429 : * Compute shmem space needed for LWLocks and named tranches.
430 : */
431 : Size
432 5826 : LWLockShmemSize(void)
433 : {
434 : Size size;
435 : int i;
436 5826 : int numLocks = NUM_FIXED_LWLOCKS;
437 :
438 : /* Calculate total number of locks needed in the main array. */
439 5826 : numLocks += NumLWLocksForNamedTranches();
440 :
441 : /* Space for the LWLock array. */
442 5826 : size = mul_size(numLocks, sizeof(LWLockPadded));
443 :
444 : /* Space for dynamic allocation counter, plus room for alignment. */
445 5826 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
446 :
447 : /* space for named tranches. */
448 5826 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
449 :
450 : /* space for name of each tranche. */
451 5868 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
452 42 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
453 :
454 5826 : return size;
455 : }
456 :
457 : /*
458 : * Allocate shmem space for the main LWLock array and all tranches and
459 : * initialize it. We also register extension LWLock tranches here.
460 : */
461 : void
462 2032 : CreateLWLocks(void)
463 : {
464 2032 : if (!IsUnderPostmaster)
465 : {
466 2032 : Size spaceLocks = LWLockShmemSize();
467 : int *LWLockCounter;
468 : char *ptr;
469 :
470 : /* Allocate space */
471 2032 : ptr = (char *) ShmemAlloc(spaceLocks);
472 :
473 : /* Leave room for dynamic allocation of tranches */
474 2032 : ptr += sizeof(int);
475 :
476 : /* Ensure desired alignment of LWLock array */
477 2032 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
478 :
479 2032 : MainLWLockArray = (LWLockPadded *) ptr;
480 :
481 : /*
482 : * Initialize the dynamic-allocation counter for tranches, which is
483 : * stored just before the first LWLock.
484 : */
485 2032 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
486 2032 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
487 :
488 : /* Initialize all LWLocks */
489 2032 : InitializeLWLocks();
490 : }
491 :
492 : /* Register named extension LWLock tranches in the current process. */
493 2046 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
494 14 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
495 14 : NamedLWLockTrancheArray[i].trancheName);
496 2032 : }
497 :
498 : /*
499 : * Initialize LWLocks that are fixed and those belonging to named tranches.
500 : */
501 : static void
502 2032 : InitializeLWLocks(void)
503 : {
504 2032 : int numNamedLocks = NumLWLocksForNamedTranches();
505 : int id;
506 : int i;
507 : int j;
508 : LWLockPadded *lock;
509 :
510 : /* Initialize all individual LWLocks in main array */
511 111760 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
512 109728 : LWLockInitialize(&lock->lock, id);
513 :
514 : /* Initialize buffer mapping LWLocks in main array */
515 2032 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
516 262128 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
517 260096 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
518 :
519 : /* Initialize lmgrs' LWLocks in main array */
520 2032 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
521 34544 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
522 32512 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
523 :
524 : /* Initialize predicate lmgrs' LWLocks in main array */
525 2032 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
526 34544 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
527 32512 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
528 :
529 : /*
530 : * Copy the info about any named tranches into shared memory (so that
531 : * other processes can see it), and initialize the requested LWLocks.
532 : */
533 2032 : if (NamedLWLockTrancheRequests > 0)
534 : {
535 : char *trancheNames;
536 :
537 14 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
538 14 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
539 :
540 14 : trancheNames = (char *) NamedLWLockTrancheArray +
541 14 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
542 14 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
543 :
544 28 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
545 : {
546 : NamedLWLockTrancheRequest *request;
547 : NamedLWLockTranche *tranche;
548 : char *name;
549 :
550 14 : request = &NamedLWLockTrancheRequestArray[i];
551 14 : tranche = &NamedLWLockTrancheArray[i];
552 :
553 14 : name = trancheNames;
554 14 : trancheNames += strlen(request->tranche_name) + 1;
555 14 : strcpy(name, request->tranche_name);
556 14 : tranche->trancheId = LWLockNewTrancheId();
557 14 : tranche->trancheName = name;
558 :
559 28 : for (j = 0; j < request->num_lwlocks; j++, lock++)
560 14 : LWLockInitialize(&lock->lock, tranche->trancheId);
561 : }
562 : }
563 2032 : }
564 :
565 : /*
566 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
567 : */
568 : void
569 42318 : InitLWLockAccess(void)
570 : {
571 : #ifdef LWLOCK_STATS
572 : init_lwlock_stats();
573 : #endif
574 42318 : }
575 :
576 : /*
577 : * GetNamedLWLockTranche - returns the base address of LWLock from the
578 : * specified tranche.
579 : *
580 : * Caller needs to retrieve the requested number of LWLocks starting from
581 : * the base lock address returned by this API. This can be used for
582 : * tranches that are requested by using RequestNamedLWLockTranche() API.
583 : */
584 : LWLockPadded *
585 14 : GetNamedLWLockTranche(const char *tranche_name)
586 : {
587 : int lock_pos;
588 : int i;
589 :
590 : /*
591 : * Obtain the position of base address of LWLock belonging to requested
592 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
593 : * in MainLWLockArray after fixed locks.
594 : */
595 14 : lock_pos = NUM_FIXED_LWLOCKS;
596 14 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
597 : {
598 14 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
599 : tranche_name) == 0)
600 14 : return &MainLWLockArray[lock_pos];
601 :
602 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
603 : }
604 :
605 0 : elog(ERROR, "requested tranche is not registered");
606 :
607 : /* just to keep compiler quiet */
608 : return NULL;
609 : }
610 :
611 : /*
612 : * Allocate a new tranche ID.
613 : */
614 : int
615 34 : LWLockNewTrancheId(void)
616 : {
617 : int result;
618 : int *LWLockCounter;
619 :
620 34 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
621 : /* We use the ShmemLock spinlock to protect LWLockCounter */
622 34 : SpinLockAcquire(ShmemLock);
623 34 : result = (*LWLockCounter)++;
624 34 : SpinLockRelease(ShmemLock);
625 :
626 34 : return result;
627 : }
628 :
629 : /*
630 : * Register a dynamic tranche name in the lookup table of the current process.
631 : *
632 : * This routine will save a pointer to the tranche name passed as an argument,
633 : * so the name should be allocated in a backend-lifetime context
634 : * (shared memory, TopMemoryContext, static constant, or similar).
635 : *
636 : * The tranche name will be user-visible as a wait event name, so try to
637 : * use a name that fits the style for those.
638 : */
639 : void
640 40 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
641 : {
642 : /* This should only be called for user-defined tranches. */
643 40 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
644 0 : return;
645 :
646 : /* Convert to array index. */
647 40 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
648 :
649 : /* If necessary, create or enlarge array. */
650 40 : if (tranche_id >= LWLockTrancheNamesAllocated)
651 : {
652 : int newalloc;
653 :
654 34 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
655 :
656 34 : if (LWLockTrancheNames == NULL)
657 34 : LWLockTrancheNames = (const char **)
658 34 : MemoryContextAllocZero(TopMemoryContext,
659 : newalloc * sizeof(char *));
660 : else
661 0 : LWLockTrancheNames =
662 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
663 34 : LWLockTrancheNamesAllocated = newalloc;
664 : }
665 :
666 40 : LWLockTrancheNames[tranche_id] = tranche_name;
667 : }
668 :
669 : /*
670 : * RequestNamedLWLockTranche
671 : * Request that extra LWLocks be allocated during postmaster
672 : * startup.
673 : *
674 : * This may only be called via the shmem_request_hook of a library that is
675 : * loaded into the postmaster via shared_preload_libraries. Calls from
676 : * elsewhere will fail.
677 : *
678 : * The tranche name will be user-visible as a wait event name, so try to
679 : * use a name that fits the style for those.
680 : */
681 : void
682 14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
683 : {
684 : NamedLWLockTrancheRequest *request;
685 :
686 14 : if (!process_shmem_requests_in_progress)
687 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
688 :
689 14 : if (NamedLWLockTrancheRequestArray == NULL)
690 : {
691 14 : NamedLWLockTrancheRequestsAllocated = 16;
692 14 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
693 14 : MemoryContextAlloc(TopMemoryContext,
694 : NamedLWLockTrancheRequestsAllocated
695 : * sizeof(NamedLWLockTrancheRequest));
696 : }
697 :
698 14 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
699 : {
700 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
701 :
702 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
703 0 : repalloc(NamedLWLockTrancheRequestArray,
704 : i * sizeof(NamedLWLockTrancheRequest));
705 0 : NamedLWLockTrancheRequestsAllocated = i;
706 : }
707 :
708 14 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
709 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
710 14 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
711 14 : request->num_lwlocks = num_lwlocks;
712 14 : NamedLWLockTrancheRequests++;
713 14 : }
714 :
715 : /*
716 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
717 : */
718 : void
719 23438908 : LWLockInitialize(LWLock *lock, int tranche_id)
720 : {
721 23438908 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
722 : #ifdef LOCK_DEBUG
723 : pg_atomic_init_u32(&lock->nwaiters, 0);
724 : #endif
725 23438908 : lock->tranche = tranche_id;
726 23438908 : proclist_init(&lock->waiters);
727 23438908 : }
728 :
729 : /*
730 : * Report start of wait event for light-weight locks.
731 : *
732 : * This function will be used by all the light-weight lock calls which
733 : * needs to wait to acquire the lock. This function distinguishes wait
734 : * event based on tranche and lock id.
735 : */
736 : static inline void
737 4424112 : LWLockReportWaitStart(LWLock *lock)
738 : {
739 4424112 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
740 4424112 : }
741 :
742 : /*
743 : * Report end of wait event for light-weight locks.
744 : */
745 : static inline void
746 4424112 : LWLockReportWaitEnd(void)
747 : {
748 4424112 : pgstat_report_wait_end();
749 4424112 : }
750 :
751 : /*
752 : * Return the name of an LWLock tranche.
753 : */
754 : static const char *
755 58 : GetLWTrancheName(uint16 trancheId)
756 : {
757 : /* Built-in tranche or individual LWLock? */
758 58 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
759 58 : return BuiltinTrancheNames[trancheId];
760 :
761 : /*
762 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
763 : * it's possible that the tranche has never been registered in the current
764 : * process, in which case give up and return "extension".
765 : */
766 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
767 :
768 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
769 0 : LWLockTrancheNames[trancheId] == NULL)
770 0 : return "extension";
771 :
772 0 : return LWLockTrancheNames[trancheId];
773 : }
774 :
775 : /*
776 : * Return an identifier for an LWLock based on the wait class and event.
777 : */
778 : const char *
779 58 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
780 : {
781 : Assert(classId == PG_WAIT_LWLOCK);
782 : /* The event IDs are just tranche numbers. */
783 58 : return GetLWTrancheName(eventId);
784 : }
785 :
786 : /*
787 : * Internal function that tries to atomically acquire the lwlock in the passed
788 : * in mode.
789 : *
790 : * This function will not block waiting for a lock to become free - that's the
791 : * caller's job.
792 : *
793 : * Returns true if the lock isn't free and we need to wait.
794 : */
795 : static bool
796 712671378 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
797 : {
798 : uint32 old_state;
799 :
800 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
801 :
802 : /*
803 : * Read once outside the loop, later iterations will get the newer value
804 : * via compare & exchange.
805 : */
806 712671378 : old_state = pg_atomic_read_u32(&lock->state);
807 :
808 : /* loop until we've determined whether we could acquire the lock or not */
809 : while (true)
810 524722 : {
811 : uint32 desired_state;
812 : bool lock_free;
813 :
814 713196100 : desired_state = old_state;
815 :
816 713196100 : if (mode == LW_EXCLUSIVE)
817 : {
818 448145268 : lock_free = (old_state & LW_LOCK_MASK) == 0;
819 448145268 : if (lock_free)
820 444385480 : desired_state += LW_VAL_EXCLUSIVE;
821 : }
822 : else
823 : {
824 265050832 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
825 265050832 : if (lock_free)
826 259762118 : desired_state += LW_VAL_SHARED;
827 : }
828 :
829 : /*
830 : * Attempt to swap in the state we are expecting. If we didn't see
831 : * lock to be free, that's just the old value. If we saw it as free,
832 : * we'll attempt to mark it acquired. The reason that we always swap
833 : * in the value is that this doubles as a memory barrier. We could try
834 : * to be smarter and only swap in values if we saw the lock as free,
835 : * but benchmark haven't shown it as beneficial so far.
836 : *
837 : * Retry if the value changed since we last looked at it.
838 : */
839 713196100 : if (pg_atomic_compare_exchange_u32(&lock->state,
840 : &old_state, desired_state))
841 : {
842 712671378 : if (lock_free)
843 : {
844 : /* Great! Got the lock. */
845 : #ifdef LOCK_DEBUG
846 : if (mode == LW_EXCLUSIVE)
847 : lock->owner = MyProc;
848 : #endif
849 703790038 : return false;
850 : }
851 : else
852 8881340 : return true; /* somebody else has the lock */
853 : }
854 : }
855 : pg_unreachable();
856 : }
857 :
858 : /*
859 : * Lock the LWLock's wait list against concurrent activity.
860 : *
861 : * NB: even though the wait list is locked, non-conflicting lock operations
862 : * may still happen concurrently.
863 : *
864 : * Time spent holding mutex should be short!
865 : */
866 : static void
867 15185748 : LWLockWaitListLock(LWLock *lock)
868 : {
869 : uint32 old_state;
870 : #ifdef LWLOCK_STATS
871 : lwlock_stats *lwstats;
872 : uint32 delays = 0;
873 :
874 : lwstats = get_lwlock_stats_entry(lock);
875 : #endif
876 :
877 : while (true)
878 : {
879 : /* always try once to acquire lock directly */
880 15185748 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
881 15185748 : if (!(old_state & LW_FLAG_LOCKED))
882 14993540 : break; /* got lock */
883 :
884 : /* and then spin without atomic operations until lock is released */
885 : {
886 : SpinDelayStatus delayStatus;
887 :
888 192208 : init_local_spin_delay(&delayStatus);
889 :
890 639478 : while (old_state & LW_FLAG_LOCKED)
891 : {
892 447270 : perform_spin_delay(&delayStatus);
893 447270 : old_state = pg_atomic_read_u32(&lock->state);
894 : }
895 : #ifdef LWLOCK_STATS
896 : delays += delayStatus.delays;
897 : #endif
898 192208 : finish_spin_delay(&delayStatus);
899 : }
900 :
901 : /*
902 : * Retry. The lock might obviously already be re-acquired by the time
903 : * we're attempting to get it again.
904 : */
905 : }
906 :
907 : #ifdef LWLOCK_STATS
908 : lwstats->spin_delay_count += delays;
909 : #endif
910 14993540 : }
911 :
912 : /*
913 : * Unlock the LWLock's wait list.
914 : *
915 : * Note that it can be more efficient to manipulate flags and release the
916 : * locks in a single atomic operation.
917 : */
918 : static void
919 9794082 : LWLockWaitListUnlock(LWLock *lock)
920 : {
921 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
922 :
923 9794082 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
924 :
925 : Assert(old_state & LW_FLAG_LOCKED);
926 9794082 : }
927 :
928 : /*
929 : * Wakeup all the lockers that currently have a chance to acquire the lock.
930 : */
931 : static void
932 5199458 : LWLockWakeup(LWLock *lock)
933 : {
934 : bool new_release_ok;
935 5199458 : bool wokeup_somebody = false;
936 : proclist_head wakeup;
937 : proclist_mutable_iter iter;
938 :
939 5199458 : proclist_init(&wakeup);
940 :
941 5199458 : new_release_ok = true;
942 :
943 : /* lock wait list while collecting backends to wake up */
944 5199458 : LWLockWaitListLock(lock);
945 :
946 7919134 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
947 : {
948 4481156 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
949 :
950 4481156 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
951 2828 : continue;
952 :
953 4478328 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
954 4478328 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
955 :
956 4478328 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
957 : {
958 : /*
959 : * Prevent additional wakeups until retryer gets to run. Backends
960 : * that are just waiting for the lock to become free don't retry
961 : * automatically.
962 : */
963 4370744 : new_release_ok = false;
964 :
965 : /*
966 : * Don't wakeup (further) exclusive locks.
967 : */
968 4370744 : wokeup_somebody = true;
969 : }
970 :
971 : /*
972 : * Signal that the process isn't on the wait list anymore. This allows
973 : * LWLockDequeueSelf() to remove itself of the waitlist with a
974 : * proclist_delete(), rather than having to check if it has been
975 : * removed from the list.
976 : */
977 : Assert(waiter->lwWaiting == LW_WS_WAITING);
978 4478328 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
979 :
980 : /*
981 : * Once we've woken up an exclusive lock, there's no point in waking
982 : * up anybody else.
983 : */
984 4478328 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
985 1761480 : break;
986 : }
987 :
988 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
989 :
990 : /* unset required flags, and release lock, in one fell swoop */
991 : {
992 : uint32 old_state;
993 : uint32 desired_state;
994 :
995 5199458 : old_state = pg_atomic_read_u32(&lock->state);
996 : while (true)
997 : {
998 5256820 : desired_state = old_state;
999 :
1000 : /* compute desired flags */
1001 :
1002 5256820 : if (new_release_ok)
1003 885444 : desired_state |= LW_FLAG_RELEASE_OK;
1004 : else
1005 4371376 : desired_state &= ~LW_FLAG_RELEASE_OK;
1006 :
1007 5256820 : if (proclist_is_empty(&wakeup))
1008 840444 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1009 :
1010 5256820 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1011 :
1012 5256820 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1013 : desired_state))
1014 5199458 : break;
1015 : }
1016 : }
1017 :
1018 : /* Awaken any waiters I removed from the queue. */
1019 9677786 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1020 : {
1021 4478328 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1022 :
1023 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1024 4478328 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1025 :
1026 : /*
1027 : * Guarantee that lwWaiting being unset only becomes visible once the
1028 : * unlink from the link has completed. Otherwise the target backend
1029 : * could be woken up for other reason and enqueue for a new lock - if
1030 : * that happens before the list unlink happens, the list would end up
1031 : * being corrupted.
1032 : *
1033 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1034 : * another lock.
1035 : */
1036 4478328 : pg_write_barrier();
1037 4478328 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1038 4478328 : PGSemaphoreUnlock(waiter->sem);
1039 : }
1040 5199458 : }
1041 :
1042 : /*
1043 : * Add ourselves to the end of the queue.
1044 : *
1045 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1046 : */
1047 : static void
1048 4660644 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1049 : {
1050 : /*
1051 : * If we don't have a PGPROC structure, there's no way to wait. This
1052 : * should never occur, since MyProc should only be null during shared
1053 : * memory initialization.
1054 : */
1055 4660644 : if (MyProc == NULL)
1056 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1057 :
1058 4660644 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1059 0 : elog(PANIC, "queueing for lock while waiting on another one");
1060 :
1061 4660644 : LWLockWaitListLock(lock);
1062 :
1063 : /* setting the flag is protected by the spinlock */
1064 4660644 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1065 :
1066 4660644 : MyProc->lwWaiting = LW_WS_WAITING;
1067 4660644 : MyProc->lwWaitMode = mode;
1068 :
1069 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1070 4660644 : if (mode == LW_WAIT_UNTIL_FREE)
1071 115296 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1072 : else
1073 4545348 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1074 :
1075 : /* Can release the mutex now */
1076 4660644 : LWLockWaitListUnlock(lock);
1077 :
1078 : #ifdef LOCK_DEBUG
1079 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1080 : #endif
1081 4660644 : }
1082 :
1083 : /*
1084 : * Remove ourselves from the waitlist.
1085 : *
1086 : * This is used if we queued ourselves because we thought we needed to sleep
1087 : * but, after further checking, we discovered that we don't actually need to
1088 : * do so.
1089 : */
1090 : static void
1091 236532 : LWLockDequeueSelf(LWLock *lock)
1092 : {
1093 : bool on_waitlist;
1094 :
1095 : #ifdef LWLOCK_STATS
1096 : lwlock_stats *lwstats;
1097 :
1098 : lwstats = get_lwlock_stats_entry(lock);
1099 :
1100 : lwstats->dequeue_self_count++;
1101 : #endif
1102 :
1103 236532 : LWLockWaitListLock(lock);
1104 :
1105 : /*
1106 : * Remove ourselves from the waitlist, unless we've already been removed.
1107 : * The removal happens with the wait list lock held, so there's no race in
1108 : * this check.
1109 : */
1110 236532 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1111 236532 : if (on_waitlist)
1112 179396 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1113 :
1114 236532 : if (proclist_is_empty(&lock->waiters) &&
1115 220940 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1116 : {
1117 220792 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1118 : }
1119 :
1120 : /* XXX: combine with fetch_and above? */
1121 236532 : LWLockWaitListUnlock(lock);
1122 :
1123 : /* clear waiting state again, nice for debugging */
1124 236532 : if (on_waitlist)
1125 179396 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1126 : else
1127 : {
1128 57136 : int extraWaits = 0;
1129 :
1130 : /*
1131 : * Somebody else dequeued us and has or will wake us up. Deal with the
1132 : * superfluous absorption of a wakeup.
1133 : */
1134 :
1135 : /*
1136 : * Reset RELEASE_OK flag if somebody woke us before we removed
1137 : * ourselves - they'll have set it to false.
1138 : */
1139 57136 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1140 :
1141 : /*
1142 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1143 : * get reset at some inconvenient point later. Most of the time this
1144 : * will immediately return.
1145 : */
1146 : for (;;)
1147 : {
1148 57136 : PGSemaphoreLock(MyProc->sem);
1149 57136 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1150 57136 : break;
1151 0 : extraWaits++;
1152 : }
1153 :
1154 : /*
1155 : * Fix the process wait semaphore's count for any absorbed wakeups.
1156 : */
1157 57136 : while (extraWaits-- > 0)
1158 0 : PGSemaphoreUnlock(MyProc->sem);
1159 : }
1160 :
1161 : #ifdef LOCK_DEBUG
1162 : {
1163 : /* not waiting anymore */
1164 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1165 :
1166 : Assert(nwaiters < MAX_BACKENDS);
1167 : }
1168 : #endif
1169 236532 : }
1170 :
1171 : /*
1172 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1173 : *
1174 : * If the lock is not available, sleep until it is. Returns true if the lock
1175 : * was available immediately, false if we had to sleep.
1176 : *
1177 : * Side effect: cancel/die interrupts are held off until lock release.
1178 : */
1179 : bool
1180 699251824 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1181 : {
1182 699251824 : PGPROC *proc = MyProc;
1183 699251824 : bool result = true;
1184 699251824 : int extraWaits = 0;
1185 : #ifdef LWLOCK_STATS
1186 : lwlock_stats *lwstats;
1187 :
1188 : lwstats = get_lwlock_stats_entry(lock);
1189 : #endif
1190 :
1191 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1192 :
1193 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1194 :
1195 : #ifdef LWLOCK_STATS
1196 : /* Count lock acquisition attempts */
1197 : if (mode == LW_EXCLUSIVE)
1198 : lwstats->ex_acquire_count++;
1199 : else
1200 : lwstats->sh_acquire_count++;
1201 : #endif /* LWLOCK_STATS */
1202 :
1203 : /*
1204 : * We can't wait if we haven't got a PGPROC. This should only occur
1205 : * during bootstrap or shared memory initialization. Put an Assert here
1206 : * to catch unsafe coding practices.
1207 : */
1208 : Assert(!(proc == NULL && IsUnderPostmaster));
1209 :
1210 : /* Ensure we will have room to remember the lock */
1211 699251824 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1212 0 : elog(ERROR, "too many LWLocks taken");
1213 :
1214 : /*
1215 : * Lock out cancel/die interrupts until we exit the code section protected
1216 : * by the LWLock. This ensures that interrupts will not interfere with
1217 : * manipulations of data structures in shared memory.
1218 : */
1219 699251824 : HOLD_INTERRUPTS();
1220 :
1221 : /*
1222 : * Loop here to try to acquire lock after each time we are signaled by
1223 : * LWLockRelease.
1224 : *
1225 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1226 : * lock, rather than retrying and possibly having to go back to sleep. But
1227 : * in practice that is no good because it means a process swap for every
1228 : * lock acquisition when two or more processes are contending for the same
1229 : * lock. Since LWLocks are normally used to protect not-very-long
1230 : * sections of computation, a process needs to be able to acquire and
1231 : * release the same lock many times during a single CPU time slice, even
1232 : * in the presence of contention. The efficiency of being able to do that
1233 : * outweighs the inefficiency of sometimes wasting a process dispatch
1234 : * cycle because the lock is not free when a released waiter finally gets
1235 : * to run. See pgsql-hackers archives for 29-Dec-01.
1236 : */
1237 : for (;;)
1238 4314550 : {
1239 : bool mustwait;
1240 :
1241 : /*
1242 : * Try to grab the lock the first time, we're not in the waitqueue
1243 : * yet/anymore.
1244 : */
1245 703566374 : mustwait = LWLockAttemptLock(lock, mode);
1246 :
1247 703566374 : if (!mustwait)
1248 : {
1249 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1250 699021026 : break; /* got the lock */
1251 : }
1252 :
1253 : /*
1254 : * Ok, at this point we couldn't grab the lock on the first try. We
1255 : * cannot simply queue ourselves to the end of the list and wait to be
1256 : * woken up because by now the lock could long have been released.
1257 : * Instead add us to the queue and try to grab the lock again. If we
1258 : * succeed we need to revert the queuing and be happy, otherwise we
1259 : * recheck the lock. If we still couldn't grab it, we know that the
1260 : * other locker will see our queue entries when releasing since they
1261 : * existed before we checked for the lock.
1262 : */
1263 :
1264 : /* add to the queue */
1265 4545348 : LWLockQueueSelf(lock, mode);
1266 :
1267 : /* we're now guaranteed to be woken up if necessary */
1268 4545348 : mustwait = LWLockAttemptLock(lock, mode);
1269 :
1270 : /* ok, grabbed the lock the second time round, need to undo queueing */
1271 4545348 : if (!mustwait)
1272 : {
1273 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1274 :
1275 230798 : LWLockDequeueSelf(lock);
1276 230798 : break;
1277 : }
1278 :
1279 : /*
1280 : * Wait until awakened.
1281 : *
1282 : * It is possible that we get awakened for a reason other than being
1283 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1284 : * we've gotten the LWLock, re-increment the sema by the number of
1285 : * additional signals received.
1286 : */
1287 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1288 :
1289 : #ifdef LWLOCK_STATS
1290 : lwstats->block_count++;
1291 : #endif
1292 :
1293 4314550 : LWLockReportWaitStart(lock);
1294 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1295 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1296 :
1297 : for (;;)
1298 : {
1299 4314550 : PGSemaphoreLock(proc->sem);
1300 4314550 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1301 4314550 : break;
1302 0 : extraWaits++;
1303 : }
1304 :
1305 : /* Retrying, allow LWLockRelease to release waiters again. */
1306 4314550 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1307 :
1308 : #ifdef LOCK_DEBUG
1309 : {
1310 : /* not waiting anymore */
1311 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1312 :
1313 : Assert(nwaiters < MAX_BACKENDS);
1314 : }
1315 : #endif
1316 :
1317 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1318 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1319 4314550 : LWLockReportWaitEnd();
1320 :
1321 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1322 :
1323 : /* Now loop back and try to acquire lock again. */
1324 4314550 : result = false;
1325 : }
1326 :
1327 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1328 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1329 :
1330 : /* Add lock to list of locks held by this backend */
1331 699251824 : held_lwlocks[num_held_lwlocks].lock = lock;
1332 699251824 : held_lwlocks[num_held_lwlocks++].mode = mode;
1333 :
1334 : /*
1335 : * Fix the process wait semaphore's count for any absorbed wakeups.
1336 : */
1337 699251824 : while (extraWaits-- > 0)
1338 0 : PGSemaphoreUnlock(proc->sem);
1339 :
1340 699251824 : return result;
1341 : }
1342 :
1343 : /*
1344 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1345 : *
1346 : * If the lock is not available, return false with no side-effects.
1347 : *
1348 : * If successful, cancel/die interrupts are held off until lock release.
1349 : */
1350 : bool
1351 4298104 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1352 : {
1353 : bool mustwait;
1354 :
1355 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1356 :
1357 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1358 :
1359 : /* Ensure we will have room to remember the lock */
1360 4298104 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1361 0 : elog(ERROR, "too many LWLocks taken");
1362 :
1363 : /*
1364 : * Lock out cancel/die interrupts until we exit the code section protected
1365 : * by the LWLock. This ensures that interrupts will not interfere with
1366 : * manipulations of data structures in shared memory.
1367 : */
1368 4298104 : HOLD_INTERRUPTS();
1369 :
1370 : /* Check for the lock */
1371 4298104 : mustwait = LWLockAttemptLock(lock, mode);
1372 :
1373 4298104 : if (mustwait)
1374 : {
1375 : /* Failed to get lock, so release interrupt holdoff */
1376 1720 : RESUME_INTERRUPTS();
1377 :
1378 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1379 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1380 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1381 : }
1382 : else
1383 : {
1384 : /* Add lock to list of locks held by this backend */
1385 4296384 : held_lwlocks[num_held_lwlocks].lock = lock;
1386 4296384 : held_lwlocks[num_held_lwlocks++].mode = mode;
1387 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1388 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1389 : }
1390 4298104 : return !mustwait;
1391 : }
1392 :
1393 : /*
1394 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1395 : *
1396 : * The semantics of this function are a bit funky. If the lock is currently
1397 : * free, it is acquired in the given mode, and the function returns true. If
1398 : * the lock isn't immediately free, the function waits until it is released
1399 : * and returns false, but does not acquire the lock.
1400 : *
1401 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1402 : * holding WALWriteLock, it can flush the commit records of many other
1403 : * backends as a side-effect. Those other backends need to wait until the
1404 : * flush finishes, but don't need to acquire the lock anymore. They can just
1405 : * wake up, observe that their records have already been flushed, and return.
1406 : */
1407 : bool
1408 251558 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1409 : {
1410 251558 : PGPROC *proc = MyProc;
1411 : bool mustwait;
1412 251558 : int extraWaits = 0;
1413 : #ifdef LWLOCK_STATS
1414 : lwlock_stats *lwstats;
1415 :
1416 : lwstats = get_lwlock_stats_entry(lock);
1417 : #endif
1418 :
1419 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1420 :
1421 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1422 :
1423 : /* Ensure we will have room to remember the lock */
1424 251558 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1425 0 : elog(ERROR, "too many LWLocks taken");
1426 :
1427 : /*
1428 : * Lock out cancel/die interrupts until we exit the code section protected
1429 : * by the LWLock. This ensures that interrupts will not interfere with
1430 : * manipulations of data structures in shared memory.
1431 : */
1432 251558 : HOLD_INTERRUPTS();
1433 :
1434 : /*
1435 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1436 : * protocol as LWLockAcquire(). Check its comments for details.
1437 : */
1438 251558 : mustwait = LWLockAttemptLock(lock, mode);
1439 :
1440 251558 : if (mustwait)
1441 : {
1442 9994 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1443 :
1444 9994 : mustwait = LWLockAttemptLock(lock, mode);
1445 :
1446 9994 : if (mustwait)
1447 : {
1448 : /*
1449 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1450 : * bogus wakeups.
1451 : */
1452 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1453 :
1454 : #ifdef LWLOCK_STATS
1455 : lwstats->block_count++;
1456 : #endif
1457 :
1458 9728 : LWLockReportWaitStart(lock);
1459 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1460 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1461 :
1462 : for (;;)
1463 : {
1464 9728 : PGSemaphoreLock(proc->sem);
1465 9728 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1466 9728 : break;
1467 0 : extraWaits++;
1468 : }
1469 :
1470 : #ifdef LOCK_DEBUG
1471 : {
1472 : /* not waiting anymore */
1473 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1474 :
1475 : Assert(nwaiters < MAX_BACKENDS);
1476 : }
1477 : #endif
1478 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1479 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1480 9728 : LWLockReportWaitEnd();
1481 :
1482 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1483 : }
1484 : else
1485 : {
1486 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1487 :
1488 : /*
1489 : * Got lock in the second attempt, undo queueing. We need to treat
1490 : * this as having successfully acquired the lock, otherwise we'd
1491 : * not necessarily wake up people we've prevented from acquiring
1492 : * the lock.
1493 : */
1494 266 : LWLockDequeueSelf(lock);
1495 : }
1496 : }
1497 :
1498 : /*
1499 : * Fix the process wait semaphore's count for any absorbed wakeups.
1500 : */
1501 251558 : while (extraWaits-- > 0)
1502 0 : PGSemaphoreUnlock(proc->sem);
1503 :
1504 251558 : if (mustwait)
1505 : {
1506 : /* Failed to get lock, so release interrupt holdoff */
1507 9728 : RESUME_INTERRUPTS();
1508 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1509 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1510 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1511 : }
1512 : else
1513 : {
1514 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1515 : /* Add lock to list of locks held by this backend */
1516 241830 : held_lwlocks[num_held_lwlocks].lock = lock;
1517 241830 : held_lwlocks[num_held_lwlocks++].mode = mode;
1518 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1519 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1520 : }
1521 :
1522 251558 : return !mustwait;
1523 : }
1524 :
1525 : /*
1526 : * Does the lwlock in its current state need to wait for the variable value to
1527 : * change?
1528 : *
1529 : * If we don't need to wait, and it's because the value of the variable has
1530 : * changed, store the current value in newval.
1531 : *
1532 : * *result is set to true if the lock was free, and false otherwise.
1533 : */
1534 : static bool
1535 6627634 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1536 : uint64 *newval, bool *result)
1537 : {
1538 : bool mustwait;
1539 : uint64 value;
1540 :
1541 : /*
1542 : * Test first to see if it the slot is free right now.
1543 : *
1544 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1545 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1546 : * this, so we don't need a memory barrier here as far as the current
1547 : * usage is concerned. But that might not be safe in general.
1548 : */
1549 6627634 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1550 :
1551 6627634 : if (!mustwait)
1552 : {
1553 4685650 : *result = true;
1554 4685650 : return false;
1555 : }
1556 :
1557 1941984 : *result = false;
1558 :
1559 : /*
1560 : * Reading this value atomically is safe even on platforms where uint64
1561 : * cannot be read without observing a torn value.
1562 : */
1563 1941984 : value = pg_atomic_read_u64(valptr);
1564 :
1565 1941984 : if (value != oldval)
1566 : {
1567 1736848 : mustwait = false;
1568 1736848 : *newval = value;
1569 : }
1570 : else
1571 : {
1572 205136 : mustwait = true;
1573 : }
1574 :
1575 1941984 : return mustwait;
1576 : }
1577 :
1578 : /*
1579 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1580 : *
1581 : * If the lock is held and *valptr equals oldval, waits until the lock is
1582 : * either freed, or the lock holder updates *valptr by calling
1583 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1584 : * waiting), returns true. If the lock is still held, but *valptr no longer
1585 : * matches oldval, returns false and sets *newval to the current value in
1586 : * *valptr.
1587 : *
1588 : * Note: this function ignores shared lock holders; if the lock is held
1589 : * in shared mode, returns 'true'.
1590 : *
1591 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1592 : * hence the caller of this function may want to rely on an explicit barrier or
1593 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1594 : */
1595 : bool
1596 6422498 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1597 : uint64 *newval)
1598 : {
1599 6422498 : PGPROC *proc = MyProc;
1600 6422498 : int extraWaits = 0;
1601 6422498 : bool result = false;
1602 : #ifdef LWLOCK_STATS
1603 : lwlock_stats *lwstats;
1604 :
1605 : lwstats = get_lwlock_stats_entry(lock);
1606 : #endif
1607 :
1608 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1609 :
1610 : /*
1611 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1612 : * cleanup mechanism to remove us from the wait queue if we got
1613 : * interrupted.
1614 : */
1615 6422498 : HOLD_INTERRUPTS();
1616 :
1617 : /*
1618 : * Loop here to check the lock's status after each time we are signaled.
1619 : */
1620 : for (;;)
1621 99834 : {
1622 : bool mustwait;
1623 :
1624 6522332 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1625 : &result);
1626 :
1627 6522332 : if (!mustwait)
1628 6417030 : break; /* the lock was free or value didn't match */
1629 :
1630 : /*
1631 : * Add myself to wait queue. Note that this is racy, somebody else
1632 : * could wakeup before we're finished queuing. NB: We're using nearly
1633 : * the same twice-in-a-row lock acquisition protocol as
1634 : * LWLockAcquire(). Check its comments for details. The only
1635 : * difference is that we also have to check the variable's values when
1636 : * checking the state of the lock.
1637 : */
1638 105302 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1639 :
1640 : /*
1641 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1642 : * lock is released.
1643 : */
1644 105302 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1645 :
1646 : /*
1647 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1648 : * and variables state.
1649 : */
1650 105302 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1651 : &result);
1652 :
1653 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1654 105302 : if (!mustwait)
1655 : {
1656 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1657 :
1658 5468 : LWLockDequeueSelf(lock);
1659 5468 : break;
1660 : }
1661 :
1662 : /*
1663 : * Wait until awakened.
1664 : *
1665 : * It is possible that we get awakened for a reason other than being
1666 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1667 : * we've gotten the LWLock, re-increment the sema by the number of
1668 : * additional signals received.
1669 : */
1670 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1671 :
1672 : #ifdef LWLOCK_STATS
1673 : lwstats->block_count++;
1674 : #endif
1675 :
1676 99834 : LWLockReportWaitStart(lock);
1677 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1678 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1679 :
1680 : for (;;)
1681 : {
1682 99834 : PGSemaphoreLock(proc->sem);
1683 99834 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1684 99834 : break;
1685 0 : extraWaits++;
1686 : }
1687 :
1688 : #ifdef LOCK_DEBUG
1689 : {
1690 : /* not waiting anymore */
1691 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1692 :
1693 : Assert(nwaiters < MAX_BACKENDS);
1694 : }
1695 : #endif
1696 :
1697 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1698 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1699 99834 : LWLockReportWaitEnd();
1700 :
1701 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1702 :
1703 : /* Now loop back and check the status of the lock again. */
1704 : }
1705 :
1706 : /*
1707 : * Fix the process wait semaphore's count for any absorbed wakeups.
1708 : */
1709 6422498 : while (extraWaits-- > 0)
1710 0 : PGSemaphoreUnlock(proc->sem);
1711 :
1712 : /*
1713 : * Now okay to allow cancel/die interrupts.
1714 : */
1715 6422498 : RESUME_INTERRUPTS();
1716 :
1717 6422498 : return result;
1718 : }
1719 :
1720 :
1721 : /*
1722 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1723 : *
1724 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1725 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1726 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1727 : * lock is guaranteed to see the new value, and act accordingly.
1728 : *
1729 : * The caller must be holding the lock in exclusive mode.
1730 : */
1731 : void
1732 4896906 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1733 : {
1734 : proclist_head wakeup;
1735 : proclist_mutable_iter iter;
1736 :
1737 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1738 :
1739 : /*
1740 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1741 : * that the variable is updated before waking up waiters.
1742 : */
1743 4896906 : pg_atomic_exchange_u64(valptr, val);
1744 :
1745 4896906 : proclist_init(&wakeup);
1746 :
1747 4896906 : LWLockWaitListLock(lock);
1748 :
1749 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1750 :
1751 : /*
1752 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1753 : * up. They are always in the front of the queue.
1754 : */
1755 4900618 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1756 : {
1757 93306 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1758 :
1759 93306 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1760 89594 : break;
1761 :
1762 3712 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1763 3712 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1764 :
1765 : /* see LWLockWakeup() */
1766 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1767 3712 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1768 : }
1769 :
1770 : /* We are done updating shared state of the lock itself. */
1771 4896906 : LWLockWaitListUnlock(lock);
1772 :
1773 : /*
1774 : * Awaken any waiters I removed from the queue.
1775 : */
1776 4900618 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1777 : {
1778 3712 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1779 :
1780 3712 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1781 : /* check comment in LWLockWakeup() about this barrier */
1782 3712 : pg_write_barrier();
1783 3712 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1784 3712 : PGSemaphoreUnlock(waiter->sem);
1785 : }
1786 4896906 : }
1787 :
1788 :
1789 : /*
1790 : * Stop treating lock as held by current backend.
1791 : *
1792 : * This is the code that can be shared between actually releasing a lock
1793 : * (LWLockRelease()) and just not tracking ownership of the lock anymore
1794 : * without releasing the lock (LWLockDisown()).
1795 : *
1796 : * Returns the mode in which the lock was held by the current backend.
1797 : *
1798 : * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
1799 : * of the caller.
1800 : *
1801 : * NB: This will leave lock->owner pointing to the current backend (if
1802 : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1803 : * debug cases of missing wakeups during lock release.
1804 : */
1805 : static inline LWLockMode
1806 703790038 : LWLockDisownInternal(LWLock *lock)
1807 : {
1808 : LWLockMode mode;
1809 : int i;
1810 :
1811 : /*
1812 : * Remove lock from list of locks held. Usually, but not always, it will
1813 : * be the latest-acquired lock; so search array backwards.
1814 : */
1815 782831686 : for (i = num_held_lwlocks; --i >= 0;)
1816 782831686 : if (lock == held_lwlocks[i].lock)
1817 703790038 : break;
1818 :
1819 703790038 : if (i < 0)
1820 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1821 :
1822 703790038 : mode = held_lwlocks[i].mode;
1823 :
1824 703790038 : num_held_lwlocks--;
1825 782831686 : for (; i < num_held_lwlocks; i++)
1826 79041648 : held_lwlocks[i] = held_lwlocks[i + 1];
1827 :
1828 703790038 : return mode;
1829 : }
1830 :
1831 : /*
1832 : * Helper function to release lock, shared between LWLockRelease() and
1833 : * LWLockeleaseDisowned().
1834 : */
1835 : static void
1836 703790038 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
1837 : {
1838 : uint32 oldstate;
1839 : bool check_waiters;
1840 :
1841 : /*
1842 : * Release my hold on lock, after that it can immediately be acquired by
1843 : * others, even if we still have to wakeup other waiters.
1844 : */
1845 703790038 : if (mode == LW_EXCLUSIVE)
1846 444177476 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1847 : else
1848 259612562 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1849 :
1850 : /* nobody else can have that kind of lock */
1851 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1852 :
1853 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1854 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1855 :
1856 : /*
1857 : * We're still waiting for backends to get scheduled, don't wake them up
1858 : * again.
1859 : */
1860 703790038 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1861 5216046 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1862 5216046 : (oldstate & LW_LOCK_MASK) == 0)
1863 5199458 : check_waiters = true;
1864 : else
1865 698590580 : check_waiters = false;
1866 :
1867 : /*
1868 : * As waking up waiters requires the spinlock to be acquired, only do so
1869 : * if necessary.
1870 : */
1871 703790038 : if (check_waiters)
1872 : {
1873 : /* XXX: remove before commit? */
1874 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1875 5199458 : LWLockWakeup(lock);
1876 : }
1877 703790038 : }
1878 :
1879 :
1880 : /*
1881 : * Stop treating lock as held by current backend.
1882 : *
1883 : * After calling this function it's the callers responsibility to ensure that
1884 : * the lock gets released (via LWLockReleaseDisowned()), even in case of an
1885 : * error. This only is desirable if the lock is going to be released in a
1886 : * different process than the process that acquired it.
1887 : */
1888 : void
1889 0 : LWLockDisown(LWLock *lock)
1890 : {
1891 0 : LWLockDisownInternal(lock);
1892 :
1893 0 : RESUME_INTERRUPTS();
1894 0 : }
1895 :
1896 : /*
1897 : * LWLockRelease - release a previously acquired lock
1898 : */
1899 : void
1900 703790038 : LWLockRelease(LWLock *lock)
1901 : {
1902 : LWLockMode mode;
1903 :
1904 703790038 : mode = LWLockDisownInternal(lock);
1905 :
1906 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1907 :
1908 703790038 : LWLockReleaseInternal(lock, mode);
1909 :
1910 : /*
1911 : * Now okay to allow cancel/die interrupts.
1912 : */
1913 703790038 : RESUME_INTERRUPTS();
1914 703790038 : }
1915 :
1916 : /*
1917 : * Release lock previously disowned with LWLockDisown().
1918 : */
1919 : void
1920 0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
1921 : {
1922 0 : LWLockReleaseInternal(lock, mode);
1923 0 : }
1924 :
1925 : /*
1926 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1927 : */
1928 : void
1929 28483938 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1930 : {
1931 : /*
1932 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1933 : * that the variable is updated before releasing the lock.
1934 : */
1935 28483938 : pg_atomic_exchange_u64(valptr, val);
1936 :
1937 28483938 : LWLockRelease(lock);
1938 28483938 : }
1939 :
1940 :
1941 : /*
1942 : * LWLockReleaseAll - release all currently-held locks
1943 : *
1944 : * Used to clean up after ereport(ERROR). An important difference between this
1945 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1946 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1947 : * has been set to an appropriate level earlier in error recovery. We could
1948 : * decrement it below zero if we allow it to drop for each released lock!
1949 : */
1950 : void
1951 107942 : LWLockReleaseAll(void)
1952 : {
1953 108330 : while (num_held_lwlocks > 0)
1954 : {
1955 388 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1956 :
1957 388 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1958 : }
1959 107942 : }
1960 :
1961 :
1962 : /*
1963 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1964 : *
1965 : * This is meant as debug support only.
1966 : */
1967 : bool
1968 0 : LWLockHeldByMe(LWLock *lock)
1969 : {
1970 : int i;
1971 :
1972 0 : for (i = 0; i < num_held_lwlocks; i++)
1973 : {
1974 0 : if (held_lwlocks[i].lock == lock)
1975 0 : return true;
1976 : }
1977 0 : return false;
1978 : }
1979 :
1980 : /*
1981 : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1982 : *
1983 : * This is meant as debug support only.
1984 : */
1985 : bool
1986 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1987 : {
1988 : char *held_lock_addr;
1989 : char *begin;
1990 : char *end;
1991 : int i;
1992 :
1993 0 : begin = (char *) lock;
1994 0 : end = begin + nlocks * stride;
1995 0 : for (i = 0; i < num_held_lwlocks; i++)
1996 : {
1997 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
1998 0 : if (held_lock_addr >= begin &&
1999 0 : held_lock_addr < end &&
2000 0 : (held_lock_addr - begin) % stride == 0)
2001 0 : return true;
2002 : }
2003 0 : return false;
2004 : }
2005 :
2006 : /*
2007 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
2008 : *
2009 : * This is meant as debug support only.
2010 : */
2011 : bool
2012 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
2013 : {
2014 : int i;
2015 :
2016 0 : for (i = 0; i < num_held_lwlocks; i++)
2017 : {
2018 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
2019 0 : return true;
2020 : }
2021 0 : return false;
2022 : }
|