Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished it's
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "postmaster/postmaster.h"
84 : #include "storage/proc.h"
85 : #include "storage/proclist.h"
86 : #include "storage/spin.h"
87 : #include "utils/memutils.h"
88 :
89 : #ifdef LWLOCK_STATS
90 : #include "utils/hsearch.h"
91 : #endif
92 :
93 :
94 : /* We use the ShmemLock spinlock to protect LWLockCounter */
95 : extern slock_t *ShmemLock;
96 :
97 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
98 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
99 : #define LW_FLAG_LOCKED ((uint32) 1 << 28)
100 :
101 : #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
102 : #define LW_VAL_SHARED 1
103 :
104 : #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
105 : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
106 : #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
107 :
108 : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
109 : "MAX_BACKENDS too big for lwlock.c");
110 :
111 : /*
112 : * There are three sorts of LWLock "tranches":
113 : *
114 : * 1. The individually-named locks defined in lwlocknames.h each have their
115 : * own tranche. We absorb the names of these tranches from there into
116 : * BuiltinTrancheNames here.
117 : *
118 : * 2. There are some predefined tranches for built-in groups of locks.
119 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
120 : * appear in BuiltinTrancheNames[] below.
121 : *
122 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
123 : * or LWLockRegisterTranche. The names of these that are known in the current
124 : * process appear in LWLockTrancheNames[].
125 : *
126 : * All these names are user-visible as wait event names, so choose with care
127 : * ... and do not forget to update the documentation's list of wait events.
128 : */
129 : static const char *const BuiltinTrancheNames[] = {
130 : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname) "Lock",
131 : #include "storage/lwlocklist.h"
132 : #undef PG_LWLOCK
133 : [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
134 : [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
135 : [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
136 : [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
137 : [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
138 : [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
139 : [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
140 : [LWTRANCHE_WAL_INSERT] = "WALInsert",
141 : [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
142 : [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
143 : [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
144 : [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
145 : [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
146 : [LWTRANCHE_LOCK_MANAGER] = "LockManager",
147 : [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
148 : [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
149 : [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
150 : [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
151 : [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
152 : [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
153 : [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
154 : [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
155 : [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
156 : [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
157 : [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
158 : [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
159 : [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
160 : [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
161 : [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
162 : [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
163 : [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
164 : [LWTRANCHE_COMMITTS_SLRU] = "CommitTSSLRU",
165 : [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
166 : [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
167 : [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
168 : [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
169 : [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
170 : [LWTRANCHE_XACT_SLRU] = "XactSLRU",
171 : [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
172 : };
173 :
174 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
175 : LWTRANCHE_FIRST_USER_DEFINED,
176 : "missing entries in BuiltinTrancheNames[]");
177 :
178 : /*
179 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
180 : * stores the names of all dynamically-created tranches known to the current
181 : * process. Any unused entries in the array will contain NULL.
182 : */
183 : static const char **LWLockTrancheNames = NULL;
184 : static int LWLockTrancheNamesAllocated = 0;
185 :
186 : /*
187 : * This points to the main array of LWLocks in shared memory. Backends inherit
188 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
189 : * where we have special measures to pass it down).
190 : */
191 : LWLockPadded *MainLWLockArray = NULL;
192 :
193 : /*
194 : * We use this structure to keep track of locked LWLocks for release
195 : * during error recovery. Normally, only a few will be held at once, but
196 : * occasionally the number can be much higher; for example, the pg_buffercache
197 : * extension locks all buffer partitions simultaneously.
198 : */
199 : #define MAX_SIMUL_LWLOCKS 200
200 :
201 : /* struct representing the LWLocks we're holding */
202 : typedef struct LWLockHandle
203 : {
204 : LWLock *lock;
205 : LWLockMode mode;
206 : } LWLockHandle;
207 :
208 : static int num_held_lwlocks = 0;
209 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
210 :
211 : /* struct representing the LWLock tranche request for named tranche */
212 : typedef struct NamedLWLockTrancheRequest
213 : {
214 : char tranche_name[NAMEDATALEN];
215 : int num_lwlocks;
216 : } NamedLWLockTrancheRequest;
217 :
218 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
219 : static int NamedLWLockTrancheRequestsAllocated = 0;
220 :
221 : /*
222 : * NamedLWLockTrancheRequests is both the valid length of the request array,
223 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
224 : * This variable and NamedLWLockTrancheArray are non-static so that
225 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
226 : */
227 : int NamedLWLockTrancheRequests = 0;
228 :
229 : /* points to data in shared memory: */
230 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
231 :
232 : static void InitializeLWLocks(void);
233 : static inline void LWLockReportWaitStart(LWLock *lock);
234 : static inline void LWLockReportWaitEnd(void);
235 : static const char *GetLWTrancheName(uint16 trancheId);
236 :
237 : #define T_NAME(lock) \
238 : GetLWTrancheName((lock)->tranche)
239 :
240 : #ifdef LWLOCK_STATS
241 : typedef struct lwlock_stats_key
242 : {
243 : int tranche;
244 : void *instance;
245 : } lwlock_stats_key;
246 :
247 : typedef struct lwlock_stats
248 : {
249 : lwlock_stats_key key;
250 : int sh_acquire_count;
251 : int ex_acquire_count;
252 : int block_count;
253 : int dequeue_self_count;
254 : int spin_delay_count;
255 : } lwlock_stats;
256 :
257 : static HTAB *lwlock_stats_htab;
258 : static lwlock_stats lwlock_stats_dummy;
259 : #endif
260 :
261 : #ifdef LOCK_DEBUG
262 : bool Trace_lwlocks = false;
263 :
264 : inline static void
265 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
266 : {
267 : /* hide statement & context here, otherwise the log is just too verbose */
268 : if (Trace_lwlocks)
269 : {
270 : uint32 state = pg_atomic_read_u32(&lock->state);
271 :
272 : ereport(LOG,
273 : (errhidestmt(true),
274 : errhidecontext(true),
275 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
276 : MyProcPid,
277 : where, T_NAME(lock), lock,
278 : (state & LW_VAL_EXCLUSIVE) != 0,
279 : state & LW_SHARED_MASK,
280 : (state & LW_FLAG_HAS_WAITERS) != 0,
281 : pg_atomic_read_u32(&lock->nwaiters),
282 : (state & LW_FLAG_RELEASE_OK) != 0)));
283 : }
284 : }
285 :
286 : inline static void
287 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
288 : {
289 : /* hide statement & context here, otherwise the log is just too verbose */
290 : if (Trace_lwlocks)
291 : {
292 : ereport(LOG,
293 : (errhidestmt(true),
294 : errhidecontext(true),
295 : errmsg_internal("%s(%s %p): %s", where,
296 : T_NAME(lock), lock, msg)));
297 : }
298 : }
299 :
300 : #else /* not LOCK_DEBUG */
301 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
302 : #define LOG_LWDEBUG(a,b,c) ((void)0)
303 : #endif /* LOCK_DEBUG */
304 :
305 : #ifdef LWLOCK_STATS
306 :
307 : static void init_lwlock_stats(void);
308 : static void print_lwlock_stats(int code, Datum arg);
309 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
310 :
311 : static void
312 : init_lwlock_stats(void)
313 : {
314 : HASHCTL ctl;
315 : static MemoryContext lwlock_stats_cxt = NULL;
316 : static bool exit_registered = false;
317 :
318 : if (lwlock_stats_cxt != NULL)
319 : MemoryContextDelete(lwlock_stats_cxt);
320 :
321 : /*
322 : * The LWLock stats will be updated within a critical section, which
323 : * requires allocating new hash entries. Allocations within a critical
324 : * section are normally not allowed because running out of memory would
325 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
326 : * turned on in production, so that's an acceptable risk. The hash entries
327 : * are small, so the risk of running out of memory is minimal in practice.
328 : */
329 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
330 : "LWLock stats",
331 : ALLOCSET_DEFAULT_SIZES);
332 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
333 :
334 : ctl.keysize = sizeof(lwlock_stats_key);
335 : ctl.entrysize = sizeof(lwlock_stats);
336 : ctl.hcxt = lwlock_stats_cxt;
337 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
338 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
339 : if (!exit_registered)
340 : {
341 : on_shmem_exit(print_lwlock_stats, 0);
342 : exit_registered = true;
343 : }
344 : }
345 :
346 : static void
347 : print_lwlock_stats(int code, Datum arg)
348 : {
349 : HASH_SEQ_STATUS scan;
350 : lwlock_stats *lwstats;
351 :
352 : hash_seq_init(&scan, lwlock_stats_htab);
353 :
354 : /* Grab an LWLock to keep different backends from mixing reports */
355 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
356 :
357 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
358 : {
359 : fprintf(stderr,
360 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
361 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
362 : lwstats->key.instance, lwstats->sh_acquire_count,
363 : lwstats->ex_acquire_count, lwstats->block_count,
364 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
365 : }
366 :
367 : LWLockRelease(&MainLWLockArray[0].lock);
368 : }
369 :
370 : static lwlock_stats *
371 : get_lwlock_stats_entry(LWLock *lock)
372 : {
373 : lwlock_stats_key key;
374 : lwlock_stats *lwstats;
375 : bool found;
376 :
377 : /*
378 : * During shared memory initialization, the hash table doesn't exist yet.
379 : * Stats of that phase aren't very interesting, so just collect operations
380 : * on all locks in a single dummy entry.
381 : */
382 : if (lwlock_stats_htab == NULL)
383 : return &lwlock_stats_dummy;
384 :
385 : /* Fetch or create the entry. */
386 : MemSet(&key, 0, sizeof(key));
387 : key.tranche = lock->tranche;
388 : key.instance = lock;
389 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
390 : if (!found)
391 : {
392 : lwstats->sh_acquire_count = 0;
393 : lwstats->ex_acquire_count = 0;
394 : lwstats->block_count = 0;
395 : lwstats->dequeue_self_count = 0;
396 : lwstats->spin_delay_count = 0;
397 : }
398 : return lwstats;
399 : }
400 : #endif /* LWLOCK_STATS */
401 :
402 :
403 : /*
404 : * Compute number of LWLocks required by named tranches. These will be
405 : * allocated in the main array.
406 : */
407 : static int
408 6834 : NumLWLocksForNamedTranches(void)
409 : {
410 6834 : int numLocks = 0;
411 : int i;
412 :
413 6882 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
414 48 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
415 :
416 6834 : return numLocks;
417 : }
418 :
419 : /*
420 : * Compute shmem space needed for LWLocks and named tranches.
421 : */
422 : Size
423 5066 : LWLockShmemSize(void)
424 : {
425 : Size size;
426 : int i;
427 5066 : int numLocks = NUM_FIXED_LWLOCKS;
428 :
429 : /* Calculate total number of locks needed in the main array. */
430 5066 : numLocks += NumLWLocksForNamedTranches();
431 :
432 : /* Space for the LWLock array. */
433 5066 : size = mul_size(numLocks, sizeof(LWLockPadded));
434 :
435 : /* Space for dynamic allocation counter, plus room for alignment. */
436 5066 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
437 :
438 : /* space for named tranches. */
439 5066 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
440 :
441 : /* space for name of each tranche. */
442 5102 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
443 36 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
444 :
445 5066 : return size;
446 : }
447 :
448 : /*
449 : * Allocate shmem space for the main LWLock array and all tranches and
450 : * initialize it. We also register extension LWLock tranches here.
451 : */
452 : void
453 1768 : CreateLWLocks(void)
454 : {
455 1768 : if (!IsUnderPostmaster)
456 : {
457 1768 : Size spaceLocks = LWLockShmemSize();
458 : int *LWLockCounter;
459 : char *ptr;
460 :
461 : /* Allocate space */
462 1768 : ptr = (char *) ShmemAlloc(spaceLocks);
463 :
464 : /* Leave room for dynamic allocation of tranches */
465 1768 : ptr += sizeof(int);
466 :
467 : /* Ensure desired alignment of LWLock array */
468 1768 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
469 :
470 1768 : MainLWLockArray = (LWLockPadded *) ptr;
471 :
472 : /*
473 : * Initialize the dynamic-allocation counter for tranches, which is
474 : * stored just before the first LWLock.
475 : */
476 1768 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
477 1768 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
478 :
479 : /* Initialize all LWLocks */
480 1768 : InitializeLWLocks();
481 : }
482 :
483 : /* Register named extension LWLock tranches in the current process. */
484 1780 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
485 12 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
486 12 : NamedLWLockTrancheArray[i].trancheName);
487 1768 : }
488 :
489 : /*
490 : * Initialize LWLocks that are fixed and those belonging to named tranches.
491 : */
492 : static void
493 1768 : InitializeLWLocks(void)
494 : {
495 1768 : int numNamedLocks = NumLWLocksForNamedTranches();
496 : int id;
497 : int i;
498 : int j;
499 : LWLockPadded *lock;
500 :
501 : /* Initialize all individual LWLocks in main array */
502 95472 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
503 93704 : LWLockInitialize(&lock->lock, id);
504 :
505 : /* Initialize buffer mapping LWLocks in main array */
506 1768 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
507 228072 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
508 226304 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
509 :
510 : /* Initialize lmgrs' LWLocks in main array */
511 1768 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
512 30056 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
513 28288 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
514 :
515 : /* Initialize predicate lmgrs' LWLocks in main array */
516 1768 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
517 30056 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
518 28288 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
519 :
520 : /*
521 : * Copy the info about any named tranches into shared memory (so that
522 : * other processes can see it), and initialize the requested LWLocks.
523 : */
524 1768 : if (NamedLWLockTrancheRequests > 0)
525 : {
526 : char *trancheNames;
527 :
528 12 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
529 12 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
530 :
531 12 : trancheNames = (char *) NamedLWLockTrancheArray +
532 12 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
533 12 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
534 :
535 24 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
536 : {
537 : NamedLWLockTrancheRequest *request;
538 : NamedLWLockTranche *tranche;
539 : char *name;
540 :
541 12 : request = &NamedLWLockTrancheRequestArray[i];
542 12 : tranche = &NamedLWLockTrancheArray[i];
543 :
544 12 : name = trancheNames;
545 12 : trancheNames += strlen(request->tranche_name) + 1;
546 12 : strcpy(name, request->tranche_name);
547 12 : tranche->trancheId = LWLockNewTrancheId();
548 12 : tranche->trancheName = name;
549 :
550 24 : for (j = 0; j < request->num_lwlocks; j++, lock++)
551 12 : LWLockInitialize(&lock->lock, tranche->trancheId);
552 : }
553 : }
554 1768 : }
555 :
556 : /*
557 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
558 : */
559 : void
560 29874 : InitLWLockAccess(void)
561 : {
562 : #ifdef LWLOCK_STATS
563 : init_lwlock_stats();
564 : #endif
565 29874 : }
566 :
567 : /*
568 : * GetNamedLWLockTranche - returns the base address of LWLock from the
569 : * specified tranche.
570 : *
571 : * Caller needs to retrieve the requested number of LWLocks starting from
572 : * the base lock address returned by this API. This can be used for
573 : * tranches that are requested by using RequestNamedLWLockTranche() API.
574 : */
575 : LWLockPadded *
576 12 : GetNamedLWLockTranche(const char *tranche_name)
577 : {
578 : int lock_pos;
579 : int i;
580 :
581 : /*
582 : * Obtain the position of base address of LWLock belonging to requested
583 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
584 : * in MainLWLockArray after fixed locks.
585 : */
586 12 : lock_pos = NUM_FIXED_LWLOCKS;
587 12 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
588 : {
589 12 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
590 : tranche_name) == 0)
591 12 : return &MainLWLockArray[lock_pos];
592 :
593 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
594 : }
595 :
596 0 : elog(ERROR, "requested tranche is not registered");
597 :
598 : /* just to keep compiler quiet */
599 : return NULL;
600 : }
601 :
602 : /*
603 : * Allocate a new tranche ID.
604 : */
605 : int
606 28 : LWLockNewTrancheId(void)
607 : {
608 : int result;
609 : int *LWLockCounter;
610 :
611 28 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
612 28 : SpinLockAcquire(ShmemLock);
613 28 : result = (*LWLockCounter)++;
614 28 : SpinLockRelease(ShmemLock);
615 :
616 28 : return result;
617 : }
618 :
619 : /*
620 : * Register a dynamic tranche name in the lookup table of the current process.
621 : *
622 : * This routine will save a pointer to the tranche name passed as an argument,
623 : * so the name should be allocated in a backend-lifetime context
624 : * (shared memory, TopMemoryContext, static constant, or similar).
625 : *
626 : * The tranche name will be user-visible as a wait event name, so try to
627 : * use a name that fits the style for those.
628 : */
629 : void
630 34 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
631 : {
632 : /* This should only be called for user-defined tranches. */
633 34 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
634 0 : return;
635 :
636 : /* Convert to array index. */
637 34 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
638 :
639 : /* If necessary, create or enlarge array. */
640 34 : if (tranche_id >= LWLockTrancheNamesAllocated)
641 : {
642 : int newalloc;
643 :
644 30 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
645 :
646 30 : if (LWLockTrancheNames == NULL)
647 30 : LWLockTrancheNames = (const char **)
648 30 : MemoryContextAllocZero(TopMemoryContext,
649 : newalloc * sizeof(char *));
650 : else
651 0 : LWLockTrancheNames =
652 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
653 30 : LWLockTrancheNamesAllocated = newalloc;
654 : }
655 :
656 34 : LWLockTrancheNames[tranche_id] = tranche_name;
657 : }
658 :
659 : /*
660 : * RequestNamedLWLockTranche
661 : * Request that extra LWLocks be allocated during postmaster
662 : * startup.
663 : *
664 : * This may only be called via the shmem_request_hook of a library that is
665 : * loaded into the postmaster via shared_preload_libraries. Calls from
666 : * elsewhere will fail.
667 : *
668 : * The tranche name will be user-visible as a wait event name, so try to
669 : * use a name that fits the style for those.
670 : */
671 : void
672 12 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
673 : {
674 : NamedLWLockTrancheRequest *request;
675 :
676 12 : if (!process_shmem_requests_in_progress)
677 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
678 :
679 12 : if (NamedLWLockTrancheRequestArray == NULL)
680 : {
681 12 : NamedLWLockTrancheRequestsAllocated = 16;
682 12 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
683 12 : MemoryContextAlloc(TopMemoryContext,
684 : NamedLWLockTrancheRequestsAllocated
685 : * sizeof(NamedLWLockTrancheRequest));
686 : }
687 :
688 12 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
689 : {
690 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
691 :
692 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
693 0 : repalloc(NamedLWLockTrancheRequestArray,
694 : i * sizeof(NamedLWLockTrancheRequest));
695 0 : NamedLWLockTrancheRequestsAllocated = i;
696 : }
697 :
698 12 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
699 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
700 12 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
701 12 : request->num_lwlocks = num_lwlocks;
702 12 : NamedLWLockTrancheRequests++;
703 12 : }
704 :
705 : /*
706 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
707 : */
708 : void
709 19427340 : LWLockInitialize(LWLock *lock, int tranche_id)
710 : {
711 19427340 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
712 : #ifdef LOCK_DEBUG
713 : pg_atomic_init_u32(&lock->nwaiters, 0);
714 : #endif
715 19427340 : lock->tranche = tranche_id;
716 19427340 : proclist_init(&lock->waiters);
717 19427340 : }
718 :
719 : /*
720 : * Report start of wait event for light-weight locks.
721 : *
722 : * This function will be used by all the light-weight lock calls which
723 : * needs to wait to acquire the lock. This function distinguishes wait
724 : * event based on tranche and lock id.
725 : */
726 : static inline void
727 40120 : LWLockReportWaitStart(LWLock *lock)
728 : {
729 40120 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
730 40120 : }
731 :
732 : /*
733 : * Report end of wait event for light-weight locks.
734 : */
735 : static inline void
736 40120 : LWLockReportWaitEnd(void)
737 : {
738 40120 : pgstat_report_wait_end();
739 40120 : }
740 :
741 : /*
742 : * Return the name of an LWLock tranche.
743 : */
744 : static const char *
745 0 : GetLWTrancheName(uint16 trancheId)
746 : {
747 : /* Built-in tranche or individual LWLock? */
748 0 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
749 0 : return BuiltinTrancheNames[trancheId];
750 :
751 : /*
752 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
753 : * it's possible that the tranche has never been registered in the current
754 : * process, in which case give up and return "extension".
755 : */
756 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
757 :
758 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
759 0 : LWLockTrancheNames[trancheId] == NULL)
760 0 : return "extension";
761 :
762 0 : return LWLockTrancheNames[trancheId];
763 : }
764 :
765 : /*
766 : * Return an identifier for an LWLock based on the wait class and event.
767 : */
768 : const char *
769 0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
770 : {
771 : Assert(classId == PG_WAIT_LWLOCK);
772 : /* The event IDs are just tranche numbers. */
773 0 : return GetLWTrancheName(eventId);
774 : }
775 :
776 : /*
777 : * Internal function that tries to atomically acquire the lwlock in the passed
778 : * in mode.
779 : *
780 : * This function will not block waiting for a lock to become free - that's the
781 : * caller's job.
782 : *
783 : * Returns true if the lock isn't free and we need to wait.
784 : */
785 : static bool
786 384614574 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
787 : {
788 : uint32 old_state;
789 :
790 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
791 :
792 : /*
793 : * Read once outside the loop, later iterations will get the newer value
794 : * via compare & exchange.
795 : */
796 384614574 : old_state = pg_atomic_read_u32(&lock->state);
797 :
798 : /* loop until we've determined whether we could acquire the lock or not */
799 : while (true)
800 87642 : {
801 : uint32 desired_state;
802 : bool lock_free;
803 :
804 384702216 : desired_state = old_state;
805 :
806 384702216 : if (mode == LW_EXCLUSIVE)
807 : {
808 170272086 : lock_free = (old_state & LW_LOCK_MASK) == 0;
809 170272086 : if (lock_free)
810 170180678 : desired_state += LW_VAL_EXCLUSIVE;
811 : }
812 : else
813 : {
814 214430130 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
815 214430130 : if (lock_free)
816 214411624 : desired_state += LW_VAL_SHARED;
817 : }
818 :
819 : /*
820 : * Attempt to swap in the state we are expecting. If we didn't see
821 : * lock to be free, that's just the old value. If we saw it as free,
822 : * we'll attempt to mark it acquired. The reason that we always swap
823 : * in the value is that this doubles as a memory barrier. We could try
824 : * to be smarter and only swap in values if we saw the lock as free,
825 : * but benchmark haven't shown it as beneficial so far.
826 : *
827 : * Retry if the value changed since we last looked at it.
828 : */
829 384702216 : if (pg_atomic_compare_exchange_u32(&lock->state,
830 : &old_state, desired_state))
831 : {
832 384614574 : if (lock_free)
833 : {
834 : /* Great! Got the lock. */
835 : #ifdef LOCK_DEBUG
836 : if (mode == LW_EXCLUSIVE)
837 : lock->owner = MyProc;
838 : #endif
839 384515374 : return false;
840 : }
841 : else
842 99200 : return true; /* somebody else has the lock */
843 : }
844 : }
845 : pg_unreachable();
846 : }
847 :
848 : /*
849 : * Lock the LWLock's wait list against concurrent activity.
850 : *
851 : * NB: even though the wait list is locked, non-conflicting lock operations
852 : * may still happen concurrently.
853 : *
854 : * Time spent holding mutex should be short!
855 : */
856 : static void
857 1611780 : LWLockWaitListLock(LWLock *lock)
858 : {
859 : uint32 old_state;
860 : #ifdef LWLOCK_STATS
861 : lwlock_stats *lwstats;
862 : uint32 delays = 0;
863 :
864 : lwstats = get_lwlock_stats_entry(lock);
865 : #endif
866 :
867 : while (true)
868 : {
869 : /* always try once to acquire lock directly */
870 1611780 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
871 1611780 : if (!(old_state & LW_FLAG_LOCKED))
872 1599398 : break; /* got lock */
873 :
874 : /* and then spin without atomic operations until lock is released */
875 : {
876 : SpinDelayStatus delayStatus;
877 :
878 12382 : init_local_spin_delay(&delayStatus);
879 :
880 38972 : while (old_state & LW_FLAG_LOCKED)
881 : {
882 26590 : perform_spin_delay(&delayStatus);
883 26590 : old_state = pg_atomic_read_u32(&lock->state);
884 : }
885 : #ifdef LWLOCK_STATS
886 : delays += delayStatus.delays;
887 : #endif
888 12382 : finish_spin_delay(&delayStatus);
889 : }
890 :
891 : /*
892 : * Retry. The lock might obviously already be re-acquired by the time
893 : * we're attempting to get it again.
894 : */
895 : }
896 :
897 : #ifdef LWLOCK_STATS
898 : lwstats->spin_delay_count += delays;
899 : #endif
900 1599398 : }
901 :
902 : /*
903 : * Unlock the LWLock's wait list.
904 : *
905 : * Note that it can be more efficient to manipulate flags and release the
906 : * locks in a single atomic operation.
907 : */
908 : static void
909 1522402 : LWLockWaitListUnlock(LWLock *lock)
910 : {
911 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
912 :
913 1522402 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
914 :
915 : Assert(old_state & LW_FLAG_LOCKED);
916 1522402 : }
917 :
918 : /*
919 : * Wakeup all the lockers that currently have a chance to acquire the lock.
920 : */
921 : static void
922 76996 : LWLockWakeup(LWLock *lock)
923 : {
924 : bool new_release_ok;
925 76996 : bool wokeup_somebody = false;
926 : proclist_head wakeup;
927 : proclist_mutable_iter iter;
928 :
929 76996 : proclist_init(&wakeup);
930 :
931 76996 : new_release_ok = true;
932 :
933 : /* lock wait list while collecting backends to wake up */
934 76996 : LWLockWaitListLock(lock);
935 :
936 93062 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
937 : {
938 45964 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
939 :
940 45964 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
941 162 : continue;
942 :
943 45802 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
944 45802 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
945 :
946 45802 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
947 : {
948 : /*
949 : * Prevent additional wakeups until retryer gets to run. Backends
950 : * that are just waiting for the lock to become free don't retry
951 : * automatically.
952 : */
953 38088 : new_release_ok = false;
954 :
955 : /*
956 : * Don't wakeup (further) exclusive locks.
957 : */
958 38088 : wokeup_somebody = true;
959 : }
960 :
961 : /*
962 : * Signal that the process isn't on the wait list anymore. This allows
963 : * LWLockDequeueSelf() to remove itself of the waitlist with a
964 : * proclist_delete(), rather than having to check if it has been
965 : * removed from the list.
966 : */
967 : Assert(waiter->lwWaiting == LW_WS_WAITING);
968 45802 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
969 :
970 : /*
971 : * Once we've woken up an exclusive lock, there's no point in waking
972 : * up anybody else.
973 : */
974 45802 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
975 29898 : break;
976 : }
977 :
978 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
979 :
980 : /* unset required flags, and release lock, in one fell swoop */
981 : {
982 : uint32 old_state;
983 : uint32 desired_state;
984 :
985 76996 : old_state = pg_atomic_read_u32(&lock->state);
986 : while (true)
987 : {
988 77472 : desired_state = old_state;
989 :
990 : /* compute desired flags */
991 :
992 77472 : if (new_release_ok)
993 39828 : desired_state |= LW_FLAG_RELEASE_OK;
994 : else
995 37644 : desired_state &= ~LW_FLAG_RELEASE_OK;
996 :
997 77472 : if (proclist_is_empty(&wakeup))
998 35278 : desired_state &= ~LW_FLAG_HAS_WAITERS;
999 :
1000 77472 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1001 :
1002 77472 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1003 : desired_state))
1004 76996 : break;
1005 : }
1006 : }
1007 :
1008 : /* Awaken any waiters I removed from the queue. */
1009 122798 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1010 : {
1011 45802 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1012 :
1013 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1014 45802 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1015 :
1016 : /*
1017 : * Guarantee that lwWaiting being unset only becomes visible once the
1018 : * unlink from the link has completed. Otherwise the target backend
1019 : * could be woken up for other reason and enqueue for a new lock - if
1020 : * that happens before the list unlink happens, the list would end up
1021 : * being corrupted.
1022 : *
1023 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1024 : * another lock.
1025 : */
1026 45802 : pg_write_barrier();
1027 45802 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1028 45802 : PGSemaphoreUnlock(waiter->sem);
1029 : }
1030 76996 : }
1031 :
1032 : /*
1033 : * Add ourselves to the end of the queue.
1034 : *
1035 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1036 : */
1037 : static void
1038 67176 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1039 : {
1040 : /*
1041 : * If we don't have a PGPROC structure, there's no way to wait. This
1042 : * should never occur, since MyProc should only be null during shared
1043 : * memory initialization.
1044 : */
1045 67176 : if (MyProc == NULL)
1046 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1047 :
1048 67176 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1049 0 : elog(PANIC, "queueing for lock while waiting on another one");
1050 :
1051 67176 : LWLockWaitListLock(lock);
1052 :
1053 : /* setting the flag is protected by the spinlock */
1054 67176 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1055 :
1056 67176 : MyProc->lwWaiting = LW_WS_WAITING;
1057 67176 : MyProc->lwWaitMode = mode;
1058 :
1059 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1060 67176 : if (mode == LW_WAIT_UNTIL_FREE)
1061 9096 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1062 : else
1063 58080 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1064 :
1065 : /* Can release the mutex now */
1066 67176 : LWLockWaitListUnlock(lock);
1067 :
1068 : #ifdef LOCK_DEBUG
1069 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1070 : #endif
1071 67176 : }
1072 :
1073 : /*
1074 : * Remove ourselves from the waitlist.
1075 : *
1076 : * This is used if we queued ourselves because we thought we needed to sleep
1077 : * but, after further checking, we discovered that we don't actually need to
1078 : * do so.
1079 : */
1080 : static void
1081 27056 : LWLockDequeueSelf(LWLock *lock)
1082 : {
1083 : bool on_waitlist;
1084 :
1085 : #ifdef LWLOCK_STATS
1086 : lwlock_stats *lwstats;
1087 :
1088 : lwstats = get_lwlock_stats_entry(lock);
1089 :
1090 : lwstats->dequeue_self_count++;
1091 : #endif
1092 :
1093 27056 : LWLockWaitListLock(lock);
1094 :
1095 : /*
1096 : * Remove ourselves from the waitlist, unless we've already been removed.
1097 : * The removal happens with the wait list lock held, so there's no race in
1098 : * this check.
1099 : */
1100 27056 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1101 27056 : if (on_waitlist)
1102 21200 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1103 :
1104 27056 : if (proclist_is_empty(&lock->waiters) &&
1105 26792 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1106 : {
1107 26740 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1108 : }
1109 :
1110 : /* XXX: combine with fetch_and above? */
1111 27056 : LWLockWaitListUnlock(lock);
1112 :
1113 : /* clear waiting state again, nice for debugging */
1114 27056 : if (on_waitlist)
1115 21200 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1116 : else
1117 : {
1118 5856 : int extraWaits = 0;
1119 :
1120 : /*
1121 : * Somebody else dequeued us and has or will wake us up. Deal with the
1122 : * superfluous absorption of a wakeup.
1123 : */
1124 :
1125 : /*
1126 : * Reset RELEASE_OK flag if somebody woke us before we removed
1127 : * ourselves - they'll have set it to false.
1128 : */
1129 5856 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1130 :
1131 : /*
1132 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1133 : * get reset at some inconvenient point later. Most of the time this
1134 : * will immediately return.
1135 : */
1136 : for (;;)
1137 : {
1138 5856 : PGSemaphoreLock(MyProc->sem);
1139 5856 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1140 5856 : break;
1141 0 : extraWaits++;
1142 : }
1143 :
1144 : /*
1145 : * Fix the process wait semaphore's count for any absorbed wakeups.
1146 : */
1147 5856 : while (extraWaits-- > 0)
1148 0 : PGSemaphoreUnlock(MyProc->sem);
1149 : }
1150 :
1151 : #ifdef LOCK_DEBUG
1152 : {
1153 : /* not waiting anymore */
1154 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1155 :
1156 : Assert(nwaiters < MAX_BACKENDS);
1157 : }
1158 : #endif
1159 27056 : }
1160 :
1161 : /*
1162 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1163 : *
1164 : * If the lock is not available, sleep until it is. Returns true if the lock
1165 : * was available immediately, false if we had to sleep.
1166 : *
1167 : * Side effect: cancel/die interrupts are held off until lock release.
1168 : */
1169 : bool
1170 380896240 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1171 : {
1172 380896240 : PGPROC *proc = MyProc;
1173 380896240 : bool result = true;
1174 380896240 : int extraWaits = 0;
1175 : #ifdef LWLOCK_STATS
1176 : lwlock_stats *lwstats;
1177 :
1178 : lwstats = get_lwlock_stats_entry(lock);
1179 : #endif
1180 :
1181 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1182 :
1183 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1184 :
1185 : #ifdef LWLOCK_STATS
1186 : /* Count lock acquisition attempts */
1187 : if (mode == LW_EXCLUSIVE)
1188 : lwstats->ex_acquire_count++;
1189 : else
1190 : lwstats->sh_acquire_count++;
1191 : #endif /* LWLOCK_STATS */
1192 :
1193 : /*
1194 : * We can't wait if we haven't got a PGPROC. This should only occur
1195 : * during bootstrap or shared memory initialization. Put an Assert here
1196 : * to catch unsafe coding practices.
1197 : */
1198 : Assert(!(proc == NULL && IsUnderPostmaster));
1199 :
1200 : /* Ensure we will have room to remember the lock */
1201 380896240 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1202 0 : elog(ERROR, "too many LWLocks taken");
1203 :
1204 : /*
1205 : * Lock out cancel/die interrupts until we exit the code section protected
1206 : * by the LWLock. This ensures that interrupts will not interfere with
1207 : * manipulations of data structures in shared memory.
1208 : */
1209 380896240 : HOLD_INTERRUPTS();
1210 :
1211 : /*
1212 : * Loop here to try to acquire lock after each time we are signaled by
1213 : * LWLockRelease.
1214 : *
1215 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1216 : * lock, rather than retrying and possibly having to go back to sleep. But
1217 : * in practice that is no good because it means a process swap for every
1218 : * lock acquisition when two or more processes are contending for the same
1219 : * lock. Since LWLocks are normally used to protect not-very-long
1220 : * sections of computation, a process needs to be able to acquire and
1221 : * release the same lock many times during a single CPU time slice, even
1222 : * in the presence of contention. The efficiency of being able to do that
1223 : * outweighs the inefficiency of sometimes wasting a process dispatch
1224 : * cycle because the lock is not free when a released waiter finally gets
1225 : * to run. See pgsql-hackers archives for 29-Dec-01.
1226 : */
1227 : for (;;)
1228 32510 : {
1229 : bool mustwait;
1230 :
1231 : /*
1232 : * Try to grab the lock the first time, we're not in the waitqueue
1233 : * yet/anymore.
1234 : */
1235 380928750 : mustwait = LWLockAttemptLock(lock, mode);
1236 :
1237 380928750 : if (!mustwait)
1238 : {
1239 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1240 380870670 : break; /* got the lock */
1241 : }
1242 :
1243 : /*
1244 : * Ok, at this point we couldn't grab the lock on the first try. We
1245 : * cannot simply queue ourselves to the end of the list and wait to be
1246 : * woken up because by now the lock could long have been released.
1247 : * Instead add us to the queue and try to grab the lock again. If we
1248 : * succeed we need to revert the queuing and be happy, otherwise we
1249 : * recheck the lock. If we still couldn't grab it, we know that the
1250 : * other locker will see our queue entries when releasing since they
1251 : * existed before we checked for the lock.
1252 : */
1253 :
1254 : /* add to the queue */
1255 58080 : LWLockQueueSelf(lock, mode);
1256 :
1257 : /* we're now guaranteed to be woken up if necessary */
1258 58080 : mustwait = LWLockAttemptLock(lock, mode);
1259 :
1260 : /* ok, grabbed the lock the second time round, need to undo queueing */
1261 58080 : if (!mustwait)
1262 : {
1263 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1264 :
1265 25570 : LWLockDequeueSelf(lock);
1266 25570 : break;
1267 : }
1268 :
1269 : /*
1270 : * Wait until awakened.
1271 : *
1272 : * It is possible that we get awakened for a reason other than being
1273 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1274 : * we've gotten the LWLock, re-increment the sema by the number of
1275 : * additional signals received.
1276 : */
1277 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1278 :
1279 : #ifdef LWLOCK_STATS
1280 : lwstats->block_count++;
1281 : #endif
1282 :
1283 32510 : LWLockReportWaitStart(lock);
1284 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1285 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1286 :
1287 : for (;;)
1288 : {
1289 32510 : PGSemaphoreLock(proc->sem);
1290 32510 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1291 32510 : break;
1292 0 : extraWaits++;
1293 : }
1294 :
1295 : /* Retrying, allow LWLockRelease to release waiters again. */
1296 32510 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1297 :
1298 : #ifdef LOCK_DEBUG
1299 : {
1300 : /* not waiting anymore */
1301 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1302 :
1303 : Assert(nwaiters < MAX_BACKENDS);
1304 : }
1305 : #endif
1306 :
1307 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1308 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1309 32510 : LWLockReportWaitEnd();
1310 :
1311 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1312 :
1313 : /* Now loop back and try to acquire lock again. */
1314 32510 : result = false;
1315 : }
1316 :
1317 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1318 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1319 :
1320 : /* Add lock to list of locks held by this backend */
1321 380896240 : held_lwlocks[num_held_lwlocks].lock = lock;
1322 380896240 : held_lwlocks[num_held_lwlocks++].mode = mode;
1323 :
1324 : /*
1325 : * Fix the process wait semaphore's count for any absorbed wakeups.
1326 : */
1327 380896240 : while (extraWaits-- > 0)
1328 0 : PGSemaphoreUnlock(proc->sem);
1329 :
1330 380896240 : return result;
1331 : }
1332 :
1333 : /*
1334 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1335 : *
1336 : * If the lock is not available, return false with no side-effects.
1337 : *
1338 : * If successful, cancel/die interrupts are held off until lock release.
1339 : */
1340 : bool
1341 3397884 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1342 : {
1343 : bool mustwait;
1344 :
1345 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1346 :
1347 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1348 :
1349 : /* Ensure we will have room to remember the lock */
1350 3397884 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1351 0 : elog(ERROR, "too many LWLocks taken");
1352 :
1353 : /*
1354 : * Lock out cancel/die interrupts until we exit the code section protected
1355 : * by the LWLock. This ensures that interrupts will not interfere with
1356 : * manipulations of data structures in shared memory.
1357 : */
1358 3397884 : HOLD_INTERRUPTS();
1359 :
1360 : /* Check for the lock */
1361 3397884 : mustwait = LWLockAttemptLock(lock, mode);
1362 :
1363 3397884 : if (mustwait)
1364 : {
1365 : /* Failed to get lock, so release interrupt holdoff */
1366 1602 : RESUME_INTERRUPTS();
1367 :
1368 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1369 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1370 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1371 : }
1372 : else
1373 : {
1374 : /* Add lock to list of locks held by this backend */
1375 3396282 : held_lwlocks[num_held_lwlocks].lock = lock;
1376 3396282 : held_lwlocks[num_held_lwlocks++].mode = mode;
1377 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1378 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1379 : }
1380 3397884 : return !mustwait;
1381 : }
1382 :
1383 : /*
1384 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1385 : *
1386 : * The semantics of this function are a bit funky. If the lock is currently
1387 : * free, it is acquired in the given mode, and the function returns true. If
1388 : * the lock isn't immediately free, the function waits until it is released
1389 : * and returns false, but does not acquire the lock.
1390 : *
1391 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1392 : * holding WALWriteLock, it can flush the commit records of many other
1393 : * backends as a side-effect. Those other backends need to wait until the
1394 : * flush finishes, but don't need to acquire the lock anymore. They can just
1395 : * wake up, observe that their records have already been flushed, and return.
1396 : */
1397 : bool
1398 226338 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1399 : {
1400 226338 : PGPROC *proc = MyProc;
1401 : bool mustwait;
1402 226338 : int extraWaits = 0;
1403 : #ifdef LWLOCK_STATS
1404 : lwlock_stats *lwstats;
1405 :
1406 : lwstats = get_lwlock_stats_entry(lock);
1407 : #endif
1408 :
1409 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1410 :
1411 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1412 :
1413 : /* Ensure we will have room to remember the lock */
1414 226338 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1415 0 : elog(ERROR, "too many LWLocks taken");
1416 :
1417 : /*
1418 : * Lock out cancel/die interrupts until we exit the code section protected
1419 : * by the LWLock. This ensures that interrupts will not interfere with
1420 : * manipulations of data structures in shared memory.
1421 : */
1422 226338 : HOLD_INTERRUPTS();
1423 :
1424 : /*
1425 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1426 : * protocol as LWLockAcquire(). Check its comments for details.
1427 : */
1428 226338 : mustwait = LWLockAttemptLock(lock, mode);
1429 :
1430 226338 : if (mustwait)
1431 : {
1432 3522 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1433 :
1434 3522 : mustwait = LWLockAttemptLock(lock, mode);
1435 :
1436 3522 : if (mustwait)
1437 : {
1438 : /*
1439 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1440 : * bogus wakeups.
1441 : */
1442 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1443 :
1444 : #ifdef LWLOCK_STATS
1445 : lwstats->block_count++;
1446 : #endif
1447 :
1448 3486 : LWLockReportWaitStart(lock);
1449 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1450 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1451 :
1452 : for (;;)
1453 : {
1454 3486 : PGSemaphoreLock(proc->sem);
1455 3486 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1456 3486 : break;
1457 0 : extraWaits++;
1458 : }
1459 :
1460 : #ifdef LOCK_DEBUG
1461 : {
1462 : /* not waiting anymore */
1463 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1464 :
1465 : Assert(nwaiters < MAX_BACKENDS);
1466 : }
1467 : #endif
1468 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1469 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1470 3486 : LWLockReportWaitEnd();
1471 :
1472 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1473 : }
1474 : else
1475 : {
1476 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1477 :
1478 : /*
1479 : * Got lock in the second attempt, undo queueing. We need to treat
1480 : * this as having successfully acquired the lock, otherwise we'd
1481 : * not necessarily wake up people we've prevented from acquiring
1482 : * the lock.
1483 : */
1484 36 : LWLockDequeueSelf(lock);
1485 : }
1486 : }
1487 :
1488 : /*
1489 : * Fix the process wait semaphore's count for any absorbed wakeups.
1490 : */
1491 226338 : while (extraWaits-- > 0)
1492 0 : PGSemaphoreUnlock(proc->sem);
1493 :
1494 226338 : if (mustwait)
1495 : {
1496 : /* Failed to get lock, so release interrupt holdoff */
1497 3486 : RESUME_INTERRUPTS();
1498 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1499 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1500 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1501 : }
1502 : else
1503 : {
1504 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1505 : /* Add lock to list of locks held by this backend */
1506 222852 : held_lwlocks[num_held_lwlocks].lock = lock;
1507 222852 : held_lwlocks[num_held_lwlocks++].mode = mode;
1508 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1509 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1510 : }
1511 :
1512 226338 : return !mustwait;
1513 : }
1514 :
1515 : /*
1516 : * Does the lwlock in its current state need to wait for the variable value to
1517 : * change?
1518 : *
1519 : * If we don't need to wait, and it's because the value of the variable has
1520 : * changed, store the current value in newval.
1521 : *
1522 : * *result is set to true if the lock was free, and false otherwise.
1523 : */
1524 : static bool
1525 2645132 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1526 : uint64 *newval, bool *result)
1527 : {
1528 : bool mustwait;
1529 : uint64 value;
1530 :
1531 : /*
1532 : * Test first to see if it the slot is free right now.
1533 : *
1534 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1535 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1536 : * this, so we don't need a memory barrier here as far as the current
1537 : * usage is concerned. But that might not be safe in general.
1538 : */
1539 2645132 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1540 :
1541 2645132 : if (!mustwait)
1542 : {
1543 1906396 : *result = true;
1544 1906396 : return false;
1545 : }
1546 :
1547 738736 : *result = false;
1548 :
1549 : /*
1550 : * Reading this value atomically is safe even on platforms where uint64
1551 : * cannot be read without observing a torn value.
1552 : */
1553 738736 : value = pg_atomic_read_u64(valptr);
1554 :
1555 738736 : if (value != oldval)
1556 : {
1557 729038 : mustwait = false;
1558 729038 : *newval = value;
1559 : }
1560 : else
1561 : {
1562 9698 : mustwait = true;
1563 : }
1564 :
1565 738736 : return mustwait;
1566 : }
1567 :
1568 : /*
1569 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1570 : *
1571 : * If the lock is held and *valptr equals oldval, waits until the lock is
1572 : * either freed, or the lock holder updates *valptr by calling
1573 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1574 : * waiting), returns true. If the lock is still held, but *valptr no longer
1575 : * matches oldval, returns false and sets *newval to the current value in
1576 : * *valptr.
1577 : *
1578 : * Note: this function ignores shared lock holders; if the lock is held
1579 : * in shared mode, returns 'true'.
1580 : *
1581 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1582 : * hence the caller of this function may want to rely on an explicit barrier or
1583 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1584 : */
1585 : bool
1586 2635434 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1587 : uint64 *newval)
1588 : {
1589 2635434 : PGPROC *proc = MyProc;
1590 2635434 : int extraWaits = 0;
1591 2635434 : bool result = false;
1592 : #ifdef LWLOCK_STATS
1593 : lwlock_stats *lwstats;
1594 :
1595 : lwstats = get_lwlock_stats_entry(lock);
1596 : #endif
1597 :
1598 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1599 :
1600 : /*
1601 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1602 : * cleanup mechanism to remove us from the wait queue if we got
1603 : * interrupted.
1604 : */
1605 2635434 : HOLD_INTERRUPTS();
1606 :
1607 : /*
1608 : * Loop here to check the lock's status after each time we are signaled.
1609 : */
1610 : for (;;)
1611 4124 : {
1612 : bool mustwait;
1613 :
1614 2639558 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1615 : &result);
1616 :
1617 2639558 : if (!mustwait)
1618 2633984 : break; /* the lock was free or value didn't match */
1619 :
1620 : /*
1621 : * Add myself to wait queue. Note that this is racy, somebody else
1622 : * could wakeup before we're finished queuing. NB: We're using nearly
1623 : * the same twice-in-a-row lock acquisition protocol as
1624 : * LWLockAcquire(). Check its comments for details. The only
1625 : * difference is that we also have to check the variable's values when
1626 : * checking the state of the lock.
1627 : */
1628 5574 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1629 :
1630 : /*
1631 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1632 : * lock is released.
1633 : */
1634 5574 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1635 :
1636 : /*
1637 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1638 : * and variables state.
1639 : */
1640 5574 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1641 : &result);
1642 :
1643 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1644 5574 : if (!mustwait)
1645 : {
1646 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1647 :
1648 1450 : LWLockDequeueSelf(lock);
1649 1450 : break;
1650 : }
1651 :
1652 : /*
1653 : * Wait until awakened.
1654 : *
1655 : * It is possible that we get awakened for a reason other than being
1656 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1657 : * we've gotten the LWLock, re-increment the sema by the number of
1658 : * additional signals received.
1659 : */
1660 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1661 :
1662 : #ifdef LWLOCK_STATS
1663 : lwstats->block_count++;
1664 : #endif
1665 :
1666 4124 : LWLockReportWaitStart(lock);
1667 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1668 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1669 :
1670 : for (;;)
1671 : {
1672 4124 : PGSemaphoreLock(proc->sem);
1673 4124 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1674 4124 : break;
1675 0 : extraWaits++;
1676 : }
1677 :
1678 : #ifdef LOCK_DEBUG
1679 : {
1680 : /* not waiting anymore */
1681 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1682 :
1683 : Assert(nwaiters < MAX_BACKENDS);
1684 : }
1685 : #endif
1686 :
1687 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1688 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1689 4124 : LWLockReportWaitEnd();
1690 :
1691 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1692 :
1693 : /* Now loop back and check the status of the lock again. */
1694 : }
1695 :
1696 : /*
1697 : * Fix the process wait semaphore's count for any absorbed wakeups.
1698 : */
1699 2635434 : while (extraWaits-- > 0)
1700 0 : PGSemaphoreUnlock(proc->sem);
1701 :
1702 : /*
1703 : * Now okay to allow cancel/die interrupts.
1704 : */
1705 2635434 : RESUME_INTERRUPTS();
1706 :
1707 2635434 : return result;
1708 : }
1709 :
1710 :
1711 : /*
1712 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1713 : *
1714 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1715 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1716 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1717 : * lock is guaranteed to see the new value, and act accordingly.
1718 : *
1719 : * The caller must be holding the lock in exclusive mode.
1720 : */
1721 : void
1722 1428170 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1723 : {
1724 : proclist_head wakeup;
1725 : proclist_mutable_iter iter;
1726 :
1727 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1728 :
1729 : /*
1730 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1731 : * that the variable is updated before waking up waiters.
1732 : */
1733 1428170 : pg_atomic_exchange_u64(valptr, val);
1734 :
1735 1428170 : proclist_init(&wakeup);
1736 :
1737 1428170 : LWLockWaitListLock(lock);
1738 :
1739 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1740 :
1741 : /*
1742 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1743 : * up. They are always in the front of the queue.
1744 : */
1745 1428332 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1746 : {
1747 226 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1748 :
1749 226 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1750 64 : break;
1751 :
1752 162 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1753 162 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1754 :
1755 : /* see LWLockWakeup() */
1756 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1757 162 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1758 : }
1759 :
1760 : /* We are done updating shared state of the lock itself. */
1761 1428170 : LWLockWaitListUnlock(lock);
1762 :
1763 : /*
1764 : * Awaken any waiters I removed from the queue.
1765 : */
1766 1428332 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1767 : {
1768 162 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1769 :
1770 162 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1771 : /* check comment in LWLockWakeup() about this barrier */
1772 162 : pg_write_barrier();
1773 162 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1774 162 : PGSemaphoreUnlock(waiter->sem);
1775 : }
1776 1428170 : }
1777 :
1778 :
1779 : /*
1780 : * LWLockRelease - release a previously acquired lock
1781 : */
1782 : void
1783 384515374 : LWLockRelease(LWLock *lock)
1784 : {
1785 : LWLockMode mode;
1786 : uint32 oldstate;
1787 : bool check_waiters;
1788 : int i;
1789 :
1790 : /*
1791 : * Remove lock from list of locks held. Usually, but not always, it will
1792 : * be the latest-acquired lock; so search array backwards.
1793 : */
1794 421317602 : for (i = num_held_lwlocks; --i >= 0;)
1795 421317602 : if (lock == held_lwlocks[i].lock)
1796 384515374 : break;
1797 :
1798 384515374 : if (i < 0)
1799 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1800 :
1801 384515374 : mode = held_lwlocks[i].mode;
1802 :
1803 384515374 : num_held_lwlocks--;
1804 421317602 : for (; i < num_held_lwlocks; i++)
1805 36802228 : held_lwlocks[i] = held_lwlocks[i + 1];
1806 :
1807 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1808 :
1809 : /*
1810 : * Release my hold on lock, after that it can immediately be acquired by
1811 : * others, even if we still have to wakeup other waiters.
1812 : */
1813 384515374 : if (mode == LW_EXCLUSIVE)
1814 170159558 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1815 : else
1816 214355816 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1817 :
1818 : /* nobody else can have that kind of lock */
1819 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1820 :
1821 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1822 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1823 :
1824 : /*
1825 : * We're still waiting for backends to get scheduled, don't wake them up
1826 : * again.
1827 : */
1828 384515374 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1829 108174 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1830 108174 : (oldstate & LW_LOCK_MASK) == 0)
1831 76996 : check_waiters = true;
1832 : else
1833 384438378 : check_waiters = false;
1834 :
1835 : /*
1836 : * As waking up waiters requires the spinlock to be acquired, only do so
1837 : * if necessary.
1838 : */
1839 384515374 : if (check_waiters)
1840 : {
1841 : /* XXX: remove before commit? */
1842 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1843 76996 : LWLockWakeup(lock);
1844 : }
1845 :
1846 : /*
1847 : * Now okay to allow cancel/die interrupts.
1848 : */
1849 384515374 : RESUME_INTERRUPTS();
1850 384515374 : }
1851 :
1852 : /*
1853 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1854 : */
1855 : void
1856 25662146 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1857 : {
1858 : /*
1859 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1860 : * that the variable is updated before releasing the lock.
1861 : */
1862 25662146 : pg_atomic_exchange_u64(valptr, val);
1863 :
1864 25662146 : LWLockRelease(lock);
1865 25662146 : }
1866 :
1867 :
1868 : /*
1869 : * LWLockReleaseAll - release all currently-held locks
1870 : *
1871 : * Used to clean up after ereport(ERROR). An important difference between this
1872 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1873 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1874 : * has been set to an appropriate level earlier in error recovery. We could
1875 : * decrement it below zero if we allow it to drop for each released lock!
1876 : */
1877 : void
1878 88398 : LWLockReleaseAll(void)
1879 : {
1880 88782 : while (num_held_lwlocks > 0)
1881 : {
1882 384 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1883 :
1884 384 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1885 : }
1886 88398 : }
1887 :
1888 :
1889 : /*
1890 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1891 : *
1892 : * This is meant as debug support only.
1893 : */
1894 : bool
1895 0 : LWLockHeldByMe(LWLock *lock)
1896 : {
1897 : int i;
1898 :
1899 0 : for (i = 0; i < num_held_lwlocks; i++)
1900 : {
1901 0 : if (held_lwlocks[i].lock == lock)
1902 0 : return true;
1903 : }
1904 0 : return false;
1905 : }
1906 :
1907 : /*
1908 : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1909 : *
1910 : * This is meant as debug support only.
1911 : */
1912 : bool
1913 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1914 : {
1915 : char *held_lock_addr;
1916 : char *begin;
1917 : char *end;
1918 : int i;
1919 :
1920 0 : begin = (char *) lock;
1921 0 : end = begin + nlocks * stride;
1922 0 : for (i = 0; i < num_held_lwlocks; i++)
1923 : {
1924 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
1925 0 : if (held_lock_addr >= begin &&
1926 0 : held_lock_addr < end &&
1927 0 : (held_lock_addr - begin) % stride == 0)
1928 0 : return true;
1929 : }
1930 0 : return false;
1931 : }
1932 :
1933 : /*
1934 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1935 : *
1936 : * This is meant as debug support only.
1937 : */
1938 : bool
1939 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1940 : {
1941 : int i;
1942 :
1943 0 : for (i = 0; i < num_held_lwlocks; i++)
1944 : {
1945 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
1946 0 : return true;
1947 : }
1948 0 : return false;
1949 : }
|