Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished it's
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "postmaster/postmaster.h"
84 : #include "storage/proc.h"
85 : #include "storage/proclist.h"
86 : #include "storage/spin.h"
87 : #include "utils/memutils.h"
88 :
89 : #ifdef LWLOCK_STATS
90 : #include "utils/hsearch.h"
91 : #endif
92 :
93 :
94 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
95 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
96 : #define LW_FLAG_LOCKED ((uint32) 1 << 28)
97 :
98 : #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
99 : #define LW_VAL_SHARED 1
100 :
101 : #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
102 : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
103 : #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
104 :
105 : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
106 : "MAX_BACKENDS too big for lwlock.c");
107 :
108 : /*
109 : * There are three sorts of LWLock "tranches":
110 : *
111 : * 1. The individually-named locks defined in lwlocklist.h each have their
112 : * own tranche. We absorb the names of these tranches from there into
113 : * BuiltinTrancheNames here.
114 : *
115 : * 2. There are some predefined tranches for built-in groups of locks.
116 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
117 : * appear in BuiltinTrancheNames[] below.
118 : *
119 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
120 : * or LWLockRegisterTranche. The names of these that are known in the current
121 : * process appear in LWLockTrancheNames[].
122 : *
123 : * All these names are user-visible as wait event names, so choose with care
124 : * ... and do not forget to update the documentation's list of wait events.
125 : */
126 : static const char *const BuiltinTrancheNames[] = {
127 : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
128 : #include "storage/lwlocklist.h"
129 : #undef PG_LWLOCK
130 : [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
131 : [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
132 : [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
133 : [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
134 : [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
135 : [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
136 : [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
137 : [LWTRANCHE_WAL_INSERT] = "WALInsert",
138 : [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
139 : [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
140 : [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
141 : [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
142 : [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
143 : [LWTRANCHE_LOCK_MANAGER] = "LockManager",
144 : [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
145 : [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
146 : [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
147 : [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
148 : [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
149 : [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
150 : [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
151 : [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
152 : [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
153 : [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
154 : [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
155 : [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
156 : [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
157 : [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
158 : [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
159 : [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
160 : [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
161 : [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
162 : [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
163 : [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
164 : [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
165 : [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
166 : [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
167 : [LWTRANCHE_XACT_SLRU] = "XactSLRU",
168 : [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
169 : };
170 :
171 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
172 : LWTRANCHE_FIRST_USER_DEFINED,
173 : "missing entries in BuiltinTrancheNames[]");
174 :
175 : /*
176 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
177 : * stores the names of all dynamically-created tranches known to the current
178 : * process. Any unused entries in the array will contain NULL.
179 : */
180 : static const char **LWLockTrancheNames = NULL;
181 : static int LWLockTrancheNamesAllocated = 0;
182 :
183 : /*
184 : * This points to the main array of LWLocks in shared memory. Backends inherit
185 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
186 : * where we have special measures to pass it down).
187 : */
188 : LWLockPadded *MainLWLockArray = NULL;
189 :
190 : /*
191 : * We use this structure to keep track of locked LWLocks for release
192 : * during error recovery. Normally, only a few will be held at once, but
193 : * occasionally the number can be much higher; for example, the pg_buffercache
194 : * extension locks all buffer partitions simultaneously.
195 : */
196 : #define MAX_SIMUL_LWLOCKS 200
197 :
198 : /* struct representing the LWLocks we're holding */
199 : typedef struct LWLockHandle
200 : {
201 : LWLock *lock;
202 : LWLockMode mode;
203 : } LWLockHandle;
204 :
205 : static int num_held_lwlocks = 0;
206 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
207 :
208 : /* struct representing the LWLock tranche request for named tranche */
209 : typedef struct NamedLWLockTrancheRequest
210 : {
211 : char tranche_name[NAMEDATALEN];
212 : int num_lwlocks;
213 : } NamedLWLockTrancheRequest;
214 :
215 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
216 : static int NamedLWLockTrancheRequestsAllocated = 0;
217 :
218 : /*
219 : * NamedLWLockTrancheRequests is both the valid length of the request array,
220 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
221 : * This variable and NamedLWLockTrancheArray are non-static so that
222 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
223 : */
224 : int NamedLWLockTrancheRequests = 0;
225 :
226 : /* points to data in shared memory: */
227 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
228 :
229 : static void InitializeLWLocks(void);
230 : static inline void LWLockReportWaitStart(LWLock *lock);
231 : static inline void LWLockReportWaitEnd(void);
232 : static const char *GetLWTrancheName(uint16 trancheId);
233 :
234 : #define T_NAME(lock) \
235 : GetLWTrancheName((lock)->tranche)
236 :
237 : #ifdef LWLOCK_STATS
238 : typedef struct lwlock_stats_key
239 : {
240 : int tranche;
241 : void *instance;
242 : } lwlock_stats_key;
243 :
244 : typedef struct lwlock_stats
245 : {
246 : lwlock_stats_key key;
247 : int sh_acquire_count;
248 : int ex_acquire_count;
249 : int block_count;
250 : int dequeue_self_count;
251 : int spin_delay_count;
252 : } lwlock_stats;
253 :
254 : static HTAB *lwlock_stats_htab;
255 : static lwlock_stats lwlock_stats_dummy;
256 : #endif
257 :
258 : #ifdef LOCK_DEBUG
259 : bool Trace_lwlocks = false;
260 :
261 : inline static void
262 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
263 : {
264 : /* hide statement & context here, otherwise the log is just too verbose */
265 : if (Trace_lwlocks)
266 : {
267 : uint32 state = pg_atomic_read_u32(&lock->state);
268 :
269 : ereport(LOG,
270 : (errhidestmt(true),
271 : errhidecontext(true),
272 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
273 : MyProcPid,
274 : where, T_NAME(lock), lock,
275 : (state & LW_VAL_EXCLUSIVE) != 0,
276 : state & LW_SHARED_MASK,
277 : (state & LW_FLAG_HAS_WAITERS) != 0,
278 : pg_atomic_read_u32(&lock->nwaiters),
279 : (state & LW_FLAG_RELEASE_OK) != 0)));
280 : }
281 : }
282 :
283 : inline static void
284 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
285 : {
286 : /* hide statement & context here, otherwise the log is just too verbose */
287 : if (Trace_lwlocks)
288 : {
289 : ereport(LOG,
290 : (errhidestmt(true),
291 : errhidecontext(true),
292 : errmsg_internal("%s(%s %p): %s", where,
293 : T_NAME(lock), lock, msg)));
294 : }
295 : }
296 :
297 : #else /* not LOCK_DEBUG */
298 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
299 : #define LOG_LWDEBUG(a,b,c) ((void)0)
300 : #endif /* LOCK_DEBUG */
301 :
302 : #ifdef LWLOCK_STATS
303 :
304 : static void init_lwlock_stats(void);
305 : static void print_lwlock_stats(int code, Datum arg);
306 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
307 :
308 : static void
309 : init_lwlock_stats(void)
310 : {
311 : HASHCTL ctl;
312 : static MemoryContext lwlock_stats_cxt = NULL;
313 : static bool exit_registered = false;
314 :
315 : if (lwlock_stats_cxt != NULL)
316 : MemoryContextDelete(lwlock_stats_cxt);
317 :
318 : /*
319 : * The LWLock stats will be updated within a critical section, which
320 : * requires allocating new hash entries. Allocations within a critical
321 : * section are normally not allowed because running out of memory would
322 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
323 : * turned on in production, so that's an acceptable risk. The hash entries
324 : * are small, so the risk of running out of memory is minimal in practice.
325 : */
326 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
327 : "LWLock stats",
328 : ALLOCSET_DEFAULT_SIZES);
329 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
330 :
331 : ctl.keysize = sizeof(lwlock_stats_key);
332 : ctl.entrysize = sizeof(lwlock_stats);
333 : ctl.hcxt = lwlock_stats_cxt;
334 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
335 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
336 : if (!exit_registered)
337 : {
338 : on_shmem_exit(print_lwlock_stats, 0);
339 : exit_registered = true;
340 : }
341 : }
342 :
343 : static void
344 : print_lwlock_stats(int code, Datum arg)
345 : {
346 : HASH_SEQ_STATUS scan;
347 : lwlock_stats *lwstats;
348 :
349 : hash_seq_init(&scan, lwlock_stats_htab);
350 :
351 : /* Grab an LWLock to keep different backends from mixing reports */
352 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
353 :
354 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
355 : {
356 : fprintf(stderr,
357 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
358 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
359 : lwstats->key.instance, lwstats->sh_acquire_count,
360 : lwstats->ex_acquire_count, lwstats->block_count,
361 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
362 : }
363 :
364 : LWLockRelease(&MainLWLockArray[0].lock);
365 : }
366 :
367 : static lwlock_stats *
368 : get_lwlock_stats_entry(LWLock *lock)
369 : {
370 : lwlock_stats_key key;
371 : lwlock_stats *lwstats;
372 : bool found;
373 :
374 : /*
375 : * During shared memory initialization, the hash table doesn't exist yet.
376 : * Stats of that phase aren't very interesting, so just collect operations
377 : * on all locks in a single dummy entry.
378 : */
379 : if (lwlock_stats_htab == NULL)
380 : return &lwlock_stats_dummy;
381 :
382 : /* Fetch or create the entry. */
383 : MemSet(&key, 0, sizeof(key));
384 : key.tranche = lock->tranche;
385 : key.instance = lock;
386 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
387 : if (!found)
388 : {
389 : lwstats->sh_acquire_count = 0;
390 : lwstats->ex_acquire_count = 0;
391 : lwstats->block_count = 0;
392 : lwstats->dequeue_self_count = 0;
393 : lwstats->spin_delay_count = 0;
394 : }
395 : return lwstats;
396 : }
397 : #endif /* LWLOCK_STATS */
398 :
399 :
400 : /*
401 : * Compute number of LWLocks required by named tranches. These will be
402 : * allocated in the main array.
403 : */
404 : static int
405 7402 : NumLWLocksForNamedTranches(void)
406 : {
407 7402 : int numLocks = 0;
408 : int i;
409 :
410 7458 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
411 56 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
412 :
413 7402 : return numLocks;
414 : }
415 :
416 : /*
417 : * Compute shmem space needed for LWLocks and named tranches.
418 : */
419 : Size
420 5484 : LWLockShmemSize(void)
421 : {
422 : Size size;
423 : int i;
424 5484 : int numLocks = NUM_FIXED_LWLOCKS;
425 :
426 : /* Calculate total number of locks needed in the main array. */
427 5484 : numLocks += NumLWLocksForNamedTranches();
428 :
429 : /* Space for the LWLock array. */
430 5484 : size = mul_size(numLocks, sizeof(LWLockPadded));
431 :
432 : /* Space for dynamic allocation counter, plus room for alignment. */
433 5484 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
434 :
435 : /* space for named tranches. */
436 5484 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
437 :
438 : /* space for name of each tranche. */
439 5526 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
440 42 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
441 :
442 5484 : return size;
443 : }
444 :
445 : /*
446 : * Allocate shmem space for the main LWLock array and all tranches and
447 : * initialize it. We also register extension LWLock tranches here.
448 : */
449 : void
450 1918 : CreateLWLocks(void)
451 : {
452 1918 : if (!IsUnderPostmaster)
453 : {
454 1918 : Size spaceLocks = LWLockShmemSize();
455 : int *LWLockCounter;
456 : char *ptr;
457 :
458 : /* Allocate space */
459 1918 : ptr = (char *) ShmemAlloc(spaceLocks);
460 :
461 : /* Leave room for dynamic allocation of tranches */
462 1918 : ptr += sizeof(int);
463 :
464 : /* Ensure desired alignment of LWLock array */
465 1918 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
466 :
467 1918 : MainLWLockArray = (LWLockPadded *) ptr;
468 :
469 : /*
470 : * Initialize the dynamic-allocation counter for tranches, which is
471 : * stored just before the first LWLock.
472 : */
473 1918 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
474 1918 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
475 :
476 : /* Initialize all LWLocks */
477 1918 : InitializeLWLocks();
478 : }
479 :
480 : /* Register named extension LWLock tranches in the current process. */
481 1932 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
482 14 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
483 14 : NamedLWLockTrancheArray[i].trancheName);
484 1918 : }
485 :
486 : /*
487 : * Initialize LWLocks that are fixed and those belonging to named tranches.
488 : */
489 : static void
490 1918 : InitializeLWLocks(void)
491 : {
492 1918 : int numNamedLocks = NumLWLocksForNamedTranches();
493 : int id;
494 : int i;
495 : int j;
496 : LWLockPadded *lock;
497 :
498 : /* Initialize all individual LWLocks in main array */
499 103572 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
500 101654 : LWLockInitialize(&lock->lock, id);
501 :
502 : /* Initialize buffer mapping LWLocks in main array */
503 1918 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
504 247422 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
505 245504 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
506 :
507 : /* Initialize lmgrs' LWLocks in main array */
508 1918 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
509 32606 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
510 30688 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
511 :
512 : /* Initialize predicate lmgrs' LWLocks in main array */
513 1918 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
514 32606 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
515 30688 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
516 :
517 : /*
518 : * Copy the info about any named tranches into shared memory (so that
519 : * other processes can see it), and initialize the requested LWLocks.
520 : */
521 1918 : if (NamedLWLockTrancheRequests > 0)
522 : {
523 : char *trancheNames;
524 :
525 14 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
526 14 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
527 :
528 14 : trancheNames = (char *) NamedLWLockTrancheArray +
529 14 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
530 14 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
531 :
532 28 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
533 : {
534 : NamedLWLockTrancheRequest *request;
535 : NamedLWLockTranche *tranche;
536 : char *name;
537 :
538 14 : request = &NamedLWLockTrancheRequestArray[i];
539 14 : tranche = &NamedLWLockTrancheArray[i];
540 :
541 14 : name = trancheNames;
542 14 : trancheNames += strlen(request->tranche_name) + 1;
543 14 : strcpy(name, request->tranche_name);
544 14 : tranche->trancheId = LWLockNewTrancheId();
545 14 : tranche->trancheName = name;
546 :
547 28 : for (j = 0; j < request->num_lwlocks; j++, lock++)
548 14 : LWLockInitialize(&lock->lock, tranche->trancheId);
549 : }
550 : }
551 1918 : }
552 :
553 : /*
554 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
555 : */
556 : void
557 34700 : InitLWLockAccess(void)
558 : {
559 : #ifdef LWLOCK_STATS
560 : init_lwlock_stats();
561 : #endif
562 34700 : }
563 :
564 : /*
565 : * GetNamedLWLockTranche - returns the base address of LWLock from the
566 : * specified tranche.
567 : *
568 : * Caller needs to retrieve the requested number of LWLocks starting from
569 : * the base lock address returned by this API. This can be used for
570 : * tranches that are requested by using RequestNamedLWLockTranche() API.
571 : */
572 : LWLockPadded *
573 14 : GetNamedLWLockTranche(const char *tranche_name)
574 : {
575 : int lock_pos;
576 : int i;
577 :
578 : /*
579 : * Obtain the position of base address of LWLock belonging to requested
580 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
581 : * in MainLWLockArray after fixed locks.
582 : */
583 14 : lock_pos = NUM_FIXED_LWLOCKS;
584 14 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
585 : {
586 14 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
587 : tranche_name) == 0)
588 14 : return &MainLWLockArray[lock_pos];
589 :
590 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
591 : }
592 :
593 0 : elog(ERROR, "requested tranche is not registered");
594 :
595 : /* just to keep compiler quiet */
596 : return NULL;
597 : }
598 :
599 : /*
600 : * Allocate a new tranche ID.
601 : */
602 : int
603 34 : LWLockNewTrancheId(void)
604 : {
605 : int result;
606 : int *LWLockCounter;
607 :
608 34 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
609 : /* We use the ShmemLock spinlock to protect LWLockCounter */
610 34 : SpinLockAcquire(ShmemLock);
611 34 : result = (*LWLockCounter)++;
612 34 : SpinLockRelease(ShmemLock);
613 :
614 34 : return result;
615 : }
616 :
617 : /*
618 : * Register a dynamic tranche name in the lookup table of the current process.
619 : *
620 : * This routine will save a pointer to the tranche name passed as an argument,
621 : * so the name should be allocated in a backend-lifetime context
622 : * (shared memory, TopMemoryContext, static constant, or similar).
623 : *
624 : * The tranche name will be user-visible as a wait event name, so try to
625 : * use a name that fits the style for those.
626 : */
627 : void
628 40 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
629 : {
630 : /* This should only be called for user-defined tranches. */
631 40 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
632 0 : return;
633 :
634 : /* Convert to array index. */
635 40 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
636 :
637 : /* If necessary, create or enlarge array. */
638 40 : if (tranche_id >= LWLockTrancheNamesAllocated)
639 : {
640 : int newalloc;
641 :
642 34 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
643 :
644 34 : if (LWLockTrancheNames == NULL)
645 34 : LWLockTrancheNames = (const char **)
646 34 : MemoryContextAllocZero(TopMemoryContext,
647 : newalloc * sizeof(char *));
648 : else
649 0 : LWLockTrancheNames =
650 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
651 34 : LWLockTrancheNamesAllocated = newalloc;
652 : }
653 :
654 40 : LWLockTrancheNames[tranche_id] = tranche_name;
655 : }
656 :
657 : /*
658 : * RequestNamedLWLockTranche
659 : * Request that extra LWLocks be allocated during postmaster
660 : * startup.
661 : *
662 : * This may only be called via the shmem_request_hook of a library that is
663 : * loaded into the postmaster via shared_preload_libraries. Calls from
664 : * elsewhere will fail.
665 : *
666 : * The tranche name will be user-visible as a wait event name, so try to
667 : * use a name that fits the style for those.
668 : */
669 : void
670 14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
671 : {
672 : NamedLWLockTrancheRequest *request;
673 :
674 14 : if (!process_shmem_requests_in_progress)
675 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
676 :
677 14 : if (NamedLWLockTrancheRequestArray == NULL)
678 : {
679 14 : NamedLWLockTrancheRequestsAllocated = 16;
680 14 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
681 14 : MemoryContextAlloc(TopMemoryContext,
682 : NamedLWLockTrancheRequestsAllocated
683 : * sizeof(NamedLWLockTrancheRequest));
684 : }
685 :
686 14 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
687 : {
688 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
689 :
690 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
691 0 : repalloc(NamedLWLockTrancheRequestArray,
692 : i * sizeof(NamedLWLockTrancheRequest));
693 0 : NamedLWLockTrancheRequestsAllocated = i;
694 : }
695 :
696 14 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
697 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
698 14 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
699 14 : request->num_lwlocks = num_lwlocks;
700 14 : NamedLWLockTrancheRequests++;
701 14 : }
702 :
703 : /*
704 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
705 : */
706 : void
707 21555854 : LWLockInitialize(LWLock *lock, int tranche_id)
708 : {
709 21555854 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
710 : #ifdef LOCK_DEBUG
711 : pg_atomic_init_u32(&lock->nwaiters, 0);
712 : #endif
713 21555854 : lock->tranche = tranche_id;
714 21555854 : proclist_init(&lock->waiters);
715 21555854 : }
716 :
717 : /*
718 : * Report start of wait event for light-weight locks.
719 : *
720 : * This function will be used by all the light-weight lock calls which
721 : * needs to wait to acquire the lock. This function distinguishes wait
722 : * event based on tranche and lock id.
723 : */
724 : static inline void
725 5699622 : LWLockReportWaitStart(LWLock *lock)
726 : {
727 5699622 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
728 5699622 : }
729 :
730 : /*
731 : * Report end of wait event for light-weight locks.
732 : */
733 : static inline void
734 5699622 : LWLockReportWaitEnd(void)
735 : {
736 5699622 : pgstat_report_wait_end();
737 5699622 : }
738 :
739 : /*
740 : * Return the name of an LWLock tranche.
741 : */
742 : static const char *
743 6 : GetLWTrancheName(uint16 trancheId)
744 : {
745 : /* Built-in tranche or individual LWLock? */
746 6 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
747 6 : return BuiltinTrancheNames[trancheId];
748 :
749 : /*
750 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
751 : * it's possible that the tranche has never been registered in the current
752 : * process, in which case give up and return "extension".
753 : */
754 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
755 :
756 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
757 0 : LWLockTrancheNames[trancheId] == NULL)
758 0 : return "extension";
759 :
760 0 : return LWLockTrancheNames[trancheId];
761 : }
762 :
763 : /*
764 : * Return an identifier for an LWLock based on the wait class and event.
765 : */
766 : const char *
767 6 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
768 : {
769 : Assert(classId == PG_WAIT_LWLOCK);
770 : /* The event IDs are just tranche numbers. */
771 6 : return GetLWTrancheName(eventId);
772 : }
773 :
774 : /*
775 : * Internal function that tries to atomically acquire the lwlock in the passed
776 : * in mode.
777 : *
778 : * This function will not block waiting for a lock to become free - that's the
779 : * caller's job.
780 : *
781 : * Returns true if the lock isn't free and we need to wait.
782 : */
783 : static bool
784 682509802 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
785 : {
786 : uint32 old_state;
787 :
788 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
789 :
790 : /*
791 : * Read once outside the loop, later iterations will get the newer value
792 : * via compare & exchange.
793 : */
794 682509802 : old_state = pg_atomic_read_u32(&lock->state);
795 :
796 : /* loop until we've determined whether we could acquire the lock or not */
797 : while (true)
798 562482 : {
799 : uint32 desired_state;
800 : bool lock_free;
801 :
802 683072284 : desired_state = old_state;
803 :
804 683072284 : if (mode == LW_EXCLUSIVE)
805 : {
806 426061428 : lock_free = (old_state & LW_LOCK_MASK) == 0;
807 426061428 : if (lock_free)
808 421997738 : desired_state += LW_VAL_EXCLUSIVE;
809 : }
810 : else
811 : {
812 257010856 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
813 257010856 : if (lock_free)
814 249422248 : desired_state += LW_VAL_SHARED;
815 : }
816 :
817 : /*
818 : * Attempt to swap in the state we are expecting. If we didn't see
819 : * lock to be free, that's just the old value. If we saw it as free,
820 : * we'll attempt to mark it acquired. The reason that we always swap
821 : * in the value is that this doubles as a memory barrier. We could try
822 : * to be smarter and only swap in values if we saw the lock as free,
823 : * but benchmark haven't shown it as beneficial so far.
824 : *
825 : * Retry if the value changed since we last looked at it.
826 : */
827 683072284 : if (pg_atomic_compare_exchange_u32(&lock->state,
828 : &old_state, desired_state))
829 : {
830 682509802 : if (lock_free)
831 : {
832 : /* Great! Got the lock. */
833 : #ifdef LOCK_DEBUG
834 : if (mode == LW_EXCLUSIVE)
835 : lock->owner = MyProc;
836 : #endif
837 671035534 : return false;
838 : }
839 : else
840 11474268 : return true; /* somebody else has the lock */
841 : }
842 : }
843 : pg_unreachable();
844 : }
845 :
846 : /*
847 : * Lock the LWLock's wait list against concurrent activity.
848 : *
849 : * NB: even though the wait list is locked, non-conflicting lock operations
850 : * may still happen concurrently.
851 : *
852 : * Time spent holding mutex should be short!
853 : */
854 : static void
855 17706690 : LWLockWaitListLock(LWLock *lock)
856 : {
857 : uint32 old_state;
858 : #ifdef LWLOCK_STATS
859 : lwlock_stats *lwstats;
860 : uint32 delays = 0;
861 :
862 : lwstats = get_lwlock_stats_entry(lock);
863 : #endif
864 :
865 : while (true)
866 : {
867 : /* always try once to acquire lock directly */
868 17706690 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
869 17706690 : if (!(old_state & LW_FLAG_LOCKED))
870 17517834 : break; /* got lock */
871 :
872 : /* and then spin without atomic operations until lock is released */
873 : {
874 : SpinDelayStatus delayStatus;
875 :
876 188856 : init_local_spin_delay(&delayStatus);
877 :
878 588398 : while (old_state & LW_FLAG_LOCKED)
879 : {
880 399542 : perform_spin_delay(&delayStatus);
881 399542 : old_state = pg_atomic_read_u32(&lock->state);
882 : }
883 : #ifdef LWLOCK_STATS
884 : delays += delayStatus.delays;
885 : #endif
886 188856 : finish_spin_delay(&delayStatus);
887 : }
888 :
889 : /*
890 : * Retry. The lock might obviously already be re-acquired by the time
891 : * we're attempting to get it again.
892 : */
893 : }
894 :
895 : #ifdef LWLOCK_STATS
896 : lwstats->spin_delay_count += delays;
897 : #endif
898 17517834 : }
899 :
900 : /*
901 : * Unlock the LWLock's wait list.
902 : *
903 : * Note that it can be more efficient to manipulate flags and release the
904 : * locks in a single atomic operation.
905 : */
906 : static void
907 10986488 : LWLockWaitListUnlock(LWLock *lock)
908 : {
909 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
910 :
911 10986488 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
912 :
913 : Assert(old_state & LW_FLAG_LOCKED);
914 10986488 : }
915 :
916 : /*
917 : * Wakeup all the lockers that currently have a chance to acquire the lock.
918 : */
919 : static void
920 6531346 : LWLockWakeup(LWLock *lock)
921 : {
922 : bool new_release_ok;
923 6531346 : bool wokeup_somebody = false;
924 : proclist_head wakeup;
925 : proclist_mutable_iter iter;
926 :
927 6531346 : proclist_init(&wakeup);
928 :
929 6531346 : new_release_ok = true;
930 :
931 : /* lock wait list while collecting backends to wake up */
932 6531346 : LWLockWaitListLock(lock);
933 :
934 10375766 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
935 : {
936 5751672 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
937 :
938 5751672 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
939 3344 : continue;
940 :
941 5748328 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
942 5748328 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
943 :
944 5748328 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
945 : {
946 : /*
947 : * Prevent additional wakeups until retryer gets to run. Backends
948 : * that are just waiting for the lock to become free don't retry
949 : * automatically.
950 : */
951 5669282 : new_release_ok = false;
952 :
953 : /*
954 : * Don't wakeup (further) exclusive locks.
955 : */
956 5669282 : wokeup_somebody = true;
957 : }
958 :
959 : /*
960 : * Signal that the process isn't on the wait list anymore. This allows
961 : * LWLockDequeueSelf() to remove itself of the waitlist with a
962 : * proclist_delete(), rather than having to check if it has been
963 : * removed from the list.
964 : */
965 : Assert(waiter->lwWaiting == LW_WS_WAITING);
966 5748328 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
967 :
968 : /*
969 : * Once we've woken up an exclusive lock, there's no point in waking
970 : * up anybody else.
971 : */
972 5748328 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
973 1907252 : break;
974 : }
975 :
976 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
977 :
978 : /* unset required flags, and release lock, in one fell swoop */
979 : {
980 : uint32 old_state;
981 : uint32 desired_state;
982 :
983 6531346 : old_state = pg_atomic_read_u32(&lock->state);
984 : while (true)
985 : {
986 6592162 : desired_state = old_state;
987 :
988 : /* compute desired flags */
989 :
990 6592162 : if (new_release_ok)
991 921586 : desired_state |= LW_FLAG_RELEASE_OK;
992 : else
993 5670576 : desired_state &= ~LW_FLAG_RELEASE_OK;
994 :
995 6592162 : if (proclist_is_empty(&wakeup))
996 890968 : desired_state &= ~LW_FLAG_HAS_WAITERS;
997 :
998 6592162 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
999 :
1000 6592162 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1001 : desired_state))
1002 6531346 : break;
1003 : }
1004 : }
1005 :
1006 : /* Awaken any waiters I removed from the queue. */
1007 12279674 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1008 : {
1009 5748328 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1010 :
1011 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1012 5748328 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1013 :
1014 : /*
1015 : * Guarantee that lwWaiting being unset only becomes visible once the
1016 : * unlink from the link has completed. Otherwise the target backend
1017 : * could be woken up for other reason and enqueue for a new lock - if
1018 : * that happens before the list unlink happens, the list would end up
1019 : * being corrupted.
1020 : *
1021 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1022 : * another lock.
1023 : */
1024 5748328 : pg_write_barrier();
1025 5748328 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1026 5748328 : PGSemaphoreUnlock(waiter->sem);
1027 : }
1028 6531346 : }
1029 :
1030 : /*
1031 : * Add ourselves to the end of the queue.
1032 : *
1033 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1034 : */
1035 : static void
1036 5917908 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1037 : {
1038 : /*
1039 : * If we don't have a PGPROC structure, there's no way to wait. This
1040 : * should never occur, since MyProc should only be null during shared
1041 : * memory initialization.
1042 : */
1043 5917908 : if (MyProc == NULL)
1044 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1045 :
1046 5917908 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1047 0 : elog(PANIC, "queueing for lock while waiting on another one");
1048 :
1049 5917908 : LWLockWaitListLock(lock);
1050 :
1051 : /* setting the flag is protected by the spinlock */
1052 5917908 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1053 :
1054 5917908 : MyProc->lwWaiting = LW_WS_WAITING;
1055 5917908 : MyProc->lwWaitMode = mode;
1056 :
1057 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1058 5917908 : if (mode == LW_WAIT_UNTIL_FREE)
1059 87278 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1060 : else
1061 5830630 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1062 :
1063 : /* Can release the mutex now */
1064 5917908 : LWLockWaitListUnlock(lock);
1065 :
1066 : #ifdef LOCK_DEBUG
1067 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1068 : #endif
1069 5917908 : }
1070 :
1071 : /*
1072 : * Remove ourselves from the waitlist.
1073 : *
1074 : * This is used if we queued ourselves because we thought we needed to sleep
1075 : * but, after further checking, we discovered that we don't actually need to
1076 : * do so.
1077 : */
1078 : static void
1079 218286 : LWLockDequeueSelf(LWLock *lock)
1080 : {
1081 : bool on_waitlist;
1082 :
1083 : #ifdef LWLOCK_STATS
1084 : lwlock_stats *lwstats;
1085 :
1086 : lwstats = get_lwlock_stats_entry(lock);
1087 :
1088 : lwstats->dequeue_self_count++;
1089 : #endif
1090 :
1091 218286 : LWLockWaitListLock(lock);
1092 :
1093 : /*
1094 : * Remove ourselves from the waitlist, unless we've already been removed.
1095 : * The removal happens with the wait list lock held, so there's no race in
1096 : * this check.
1097 : */
1098 218286 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1099 218286 : if (on_waitlist)
1100 166236 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1101 :
1102 218286 : if (proclist_is_empty(&lock->waiters) &&
1103 202198 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1104 : {
1105 201978 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1106 : }
1107 :
1108 : /* XXX: combine with fetch_and above? */
1109 218286 : LWLockWaitListUnlock(lock);
1110 :
1111 : /* clear waiting state again, nice for debugging */
1112 218286 : if (on_waitlist)
1113 166236 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1114 : else
1115 : {
1116 52050 : int extraWaits = 0;
1117 :
1118 : /*
1119 : * Somebody else dequeued us and has or will wake us up. Deal with the
1120 : * superfluous absorption of a wakeup.
1121 : */
1122 :
1123 : /*
1124 : * Reset RELEASE_OK flag if somebody woke us before we removed
1125 : * ourselves - they'll have set it to false.
1126 : */
1127 52050 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1128 :
1129 : /*
1130 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1131 : * get reset at some inconvenient point later. Most of the time this
1132 : * will immediately return.
1133 : */
1134 : for (;;)
1135 : {
1136 52050 : PGSemaphoreLock(MyProc->sem);
1137 52050 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1138 52050 : break;
1139 0 : extraWaits++;
1140 : }
1141 :
1142 : /*
1143 : * Fix the process wait semaphore's count for any absorbed wakeups.
1144 : */
1145 52050 : while (extraWaits-- > 0)
1146 0 : PGSemaphoreUnlock(MyProc->sem);
1147 : }
1148 :
1149 : #ifdef LOCK_DEBUG
1150 : {
1151 : /* not waiting anymore */
1152 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1153 :
1154 : Assert(nwaiters < MAX_BACKENDS);
1155 : }
1156 : #endif
1157 218286 : }
1158 :
1159 : /*
1160 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1161 : *
1162 : * If the lock is not available, sleep until it is. Returns true if the lock
1163 : * was available immediately, false if we had to sleep.
1164 : *
1165 : * Side effect: cancel/die interrupts are held off until lock release.
1166 : */
1167 : bool
1168 666600242 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1169 : {
1170 666600242 : PGPROC *proc = MyProc;
1171 666600242 : bool result = true;
1172 666600242 : int extraWaits = 0;
1173 : #ifdef LWLOCK_STATS
1174 : lwlock_stats *lwstats;
1175 :
1176 : lwstats = get_lwlock_stats_entry(lock);
1177 : #endif
1178 :
1179 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1180 :
1181 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1182 :
1183 : #ifdef LWLOCK_STATS
1184 : /* Count lock acquisition attempts */
1185 : if (mode == LW_EXCLUSIVE)
1186 : lwstats->ex_acquire_count++;
1187 : else
1188 : lwstats->sh_acquire_count++;
1189 : #endif /* LWLOCK_STATS */
1190 :
1191 : /*
1192 : * We can't wait if we haven't got a PGPROC. This should only occur
1193 : * during bootstrap or shared memory initialization. Put an Assert here
1194 : * to catch unsafe coding practices.
1195 : */
1196 : Assert(!(proc == NULL && IsUnderPostmaster));
1197 :
1198 : /* Ensure we will have room to remember the lock */
1199 666600242 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1200 0 : elog(ERROR, "too many LWLocks taken");
1201 :
1202 : /*
1203 : * Lock out cancel/die interrupts until we exit the code section protected
1204 : * by the LWLock. This ensures that interrupts will not interfere with
1205 : * manipulations of data structures in shared memory.
1206 : */
1207 666600242 : HOLD_INTERRUPTS();
1208 :
1209 : /*
1210 : * Loop here to try to acquire lock after each time we are signaled by
1211 : * LWLockRelease.
1212 : *
1213 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1214 : * lock, rather than retrying and possibly having to go back to sleep. But
1215 : * in practice that is no good because it means a process swap for every
1216 : * lock acquisition when two or more processes are contending for the same
1217 : * lock. Since LWLocks are normally used to protect not-very-long
1218 : * sections of computation, a process needs to be able to acquire and
1219 : * release the same lock many times during a single CPU time slice, even
1220 : * in the presence of contention. The efficiency of being able to do that
1221 : * outweighs the inefficiency of sometimes wasting a process dispatch
1222 : * cycle because the lock is not free when a released waiter finally gets
1223 : * to run. See pgsql-hackers archives for 29-Dec-01.
1224 : */
1225 : for (;;)
1226 5618928 : {
1227 : bool mustwait;
1228 :
1229 : /*
1230 : * Try to grab the lock the first time, we're not in the waitqueue
1231 : * yet/anymore.
1232 : */
1233 672219170 : mustwait = LWLockAttemptLock(lock, mode);
1234 :
1235 672219170 : if (!mustwait)
1236 : {
1237 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1238 666388540 : break; /* got the lock */
1239 : }
1240 :
1241 : /*
1242 : * Ok, at this point we couldn't grab the lock on the first try. We
1243 : * cannot simply queue ourselves to the end of the list and wait to be
1244 : * woken up because by now the lock could long have been released.
1245 : * Instead add us to the queue and try to grab the lock again. If we
1246 : * succeed we need to revert the queuing and be happy, otherwise we
1247 : * recheck the lock. If we still couldn't grab it, we know that the
1248 : * other locker will see our queue entries when releasing since they
1249 : * existed before we checked for the lock.
1250 : */
1251 :
1252 : /* add to the queue */
1253 5830630 : LWLockQueueSelf(lock, mode);
1254 :
1255 : /* we're now guaranteed to be woken up if necessary */
1256 5830630 : mustwait = LWLockAttemptLock(lock, mode);
1257 :
1258 : /* ok, grabbed the lock the second time round, need to undo queueing */
1259 5830630 : if (!mustwait)
1260 : {
1261 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1262 :
1263 211702 : LWLockDequeueSelf(lock);
1264 211702 : break;
1265 : }
1266 :
1267 : /*
1268 : * Wait until awakened.
1269 : *
1270 : * It is possible that we get awakened for a reason other than being
1271 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1272 : * we've gotten the LWLock, re-increment the sema by the number of
1273 : * additional signals received.
1274 : */
1275 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1276 :
1277 : #ifdef LWLOCK_STATS
1278 : lwstats->block_count++;
1279 : #endif
1280 :
1281 5618928 : LWLockReportWaitStart(lock);
1282 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1283 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1284 :
1285 : for (;;)
1286 : {
1287 5618928 : PGSemaphoreLock(proc->sem);
1288 5618928 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1289 5618928 : break;
1290 0 : extraWaits++;
1291 : }
1292 :
1293 : /* Retrying, allow LWLockRelease to release waiters again. */
1294 5618928 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1295 :
1296 : #ifdef LOCK_DEBUG
1297 : {
1298 : /* not waiting anymore */
1299 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1300 :
1301 : Assert(nwaiters < MAX_BACKENDS);
1302 : }
1303 : #endif
1304 :
1305 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1306 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1307 5618928 : LWLockReportWaitEnd();
1308 :
1309 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1310 :
1311 : /* Now loop back and try to acquire lock again. */
1312 5618928 : result = false;
1313 : }
1314 :
1315 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1316 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1317 :
1318 : /* Add lock to list of locks held by this backend */
1319 666600242 : held_lwlocks[num_held_lwlocks].lock = lock;
1320 666600242 : held_lwlocks[num_held_lwlocks++].mode = mode;
1321 :
1322 : /*
1323 : * Fix the process wait semaphore's count for any absorbed wakeups.
1324 : */
1325 666600242 : while (extraWaits-- > 0)
1326 0 : PGSemaphoreUnlock(proc->sem);
1327 :
1328 666600242 : return result;
1329 : }
1330 :
1331 : /*
1332 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1333 : *
1334 : * If the lock is not available, return false with no side-effects.
1335 : *
1336 : * If successful, cancel/die interrupts are held off until lock release.
1337 : */
1338 : bool
1339 4201344 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1340 : {
1341 : bool mustwait;
1342 :
1343 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1344 :
1345 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1346 :
1347 : /* Ensure we will have room to remember the lock */
1348 4201344 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1349 0 : elog(ERROR, "too many LWLocks taken");
1350 :
1351 : /*
1352 : * Lock out cancel/die interrupts until we exit the code section protected
1353 : * by the LWLock. This ensures that interrupts will not interfere with
1354 : * manipulations of data structures in shared memory.
1355 : */
1356 4201344 : HOLD_INTERRUPTS();
1357 :
1358 : /* Check for the lock */
1359 4201344 : mustwait = LWLockAttemptLock(lock, mode);
1360 :
1361 4201344 : if (mustwait)
1362 : {
1363 : /* Failed to get lock, so release interrupt holdoff */
1364 2064 : RESUME_INTERRUPTS();
1365 :
1366 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1367 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1368 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1369 : }
1370 : else
1371 : {
1372 : /* Add lock to list of locks held by this backend */
1373 4199280 : held_lwlocks[num_held_lwlocks].lock = lock;
1374 4199280 : held_lwlocks[num_held_lwlocks++].mode = mode;
1375 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1376 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1377 : }
1378 4201344 : return !mustwait;
1379 : }
1380 :
1381 : /*
1382 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1383 : *
1384 : * The semantics of this function are a bit funky. If the lock is currently
1385 : * free, it is acquired in the given mode, and the function returns true. If
1386 : * the lock isn't immediately free, the function waits until it is released
1387 : * and returns false, but does not acquire the lock.
1388 : *
1389 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1390 : * holding WALWriteLock, it can flush the commit records of many other
1391 : * backends as a side-effect. Those other backends need to wait until the
1392 : * flush finishes, but don't need to acquire the lock anymore. They can just
1393 : * wake up, observe that their records have already been flushed, and return.
1394 : */
1395 : bool
1396 247182 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1397 : {
1398 247182 : PGPROC *proc = MyProc;
1399 : bool mustwait;
1400 247182 : int extraWaits = 0;
1401 : #ifdef LWLOCK_STATS
1402 : lwlock_stats *lwstats;
1403 :
1404 : lwstats = get_lwlock_stats_entry(lock);
1405 : #endif
1406 :
1407 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1408 :
1409 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1410 :
1411 : /* Ensure we will have room to remember the lock */
1412 247182 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1413 0 : elog(ERROR, "too many LWLocks taken");
1414 :
1415 : /*
1416 : * Lock out cancel/die interrupts until we exit the code section protected
1417 : * by the LWLock. This ensures that interrupts will not interfere with
1418 : * manipulations of data structures in shared memory.
1419 : */
1420 247182 : HOLD_INTERRUPTS();
1421 :
1422 : /*
1423 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1424 : * protocol as LWLockAcquire(). Check its comments for details.
1425 : */
1426 247182 : mustwait = LWLockAttemptLock(lock, mode);
1427 :
1428 247182 : if (mustwait)
1429 : {
1430 11476 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1431 :
1432 11476 : mustwait = LWLockAttemptLock(lock, mode);
1433 :
1434 11476 : if (mustwait)
1435 : {
1436 : /*
1437 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1438 : * bogus wakeups.
1439 : */
1440 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1441 :
1442 : #ifdef LWLOCK_STATS
1443 : lwstats->block_count++;
1444 : #endif
1445 :
1446 11170 : LWLockReportWaitStart(lock);
1447 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1448 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1449 :
1450 : for (;;)
1451 : {
1452 11170 : PGSemaphoreLock(proc->sem);
1453 11170 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1454 11170 : break;
1455 0 : extraWaits++;
1456 : }
1457 :
1458 : #ifdef LOCK_DEBUG
1459 : {
1460 : /* not waiting anymore */
1461 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1462 :
1463 : Assert(nwaiters < MAX_BACKENDS);
1464 : }
1465 : #endif
1466 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1467 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1468 11170 : LWLockReportWaitEnd();
1469 :
1470 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1471 : }
1472 : else
1473 : {
1474 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1475 :
1476 : /*
1477 : * Got lock in the second attempt, undo queueing. We need to treat
1478 : * this as having successfully acquired the lock, otherwise we'd
1479 : * not necessarily wake up people we've prevented from acquiring
1480 : * the lock.
1481 : */
1482 306 : LWLockDequeueSelf(lock);
1483 : }
1484 : }
1485 :
1486 : /*
1487 : * Fix the process wait semaphore's count for any absorbed wakeups.
1488 : */
1489 247182 : while (extraWaits-- > 0)
1490 0 : PGSemaphoreUnlock(proc->sem);
1491 :
1492 247182 : if (mustwait)
1493 : {
1494 : /* Failed to get lock, so release interrupt holdoff */
1495 11170 : RESUME_INTERRUPTS();
1496 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1497 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1498 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1499 : }
1500 : else
1501 : {
1502 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1503 : /* Add lock to list of locks held by this backend */
1504 236012 : held_lwlocks[num_held_lwlocks].lock = lock;
1505 236012 : held_lwlocks[num_held_lwlocks++].mode = mode;
1506 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1507 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1508 : }
1509 :
1510 247182 : return !mustwait;
1511 : }
1512 :
1513 : /*
1514 : * Does the lwlock in its current state need to wait for the variable value to
1515 : * change?
1516 : *
1517 : * If we don't need to wait, and it's because the value of the variable has
1518 : * changed, store the current value in newval.
1519 : *
1520 : * *result is set to true if the lock was free, and false otherwise.
1521 : */
1522 : static bool
1523 6222690 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1524 : uint64 *newval, bool *result)
1525 : {
1526 : bool mustwait;
1527 : uint64 value;
1528 :
1529 : /*
1530 : * Test first to see if it the slot is free right now.
1531 : *
1532 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1533 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1534 : * this, so we don't need a memory barrier here as far as the current
1535 : * usage is concerned. But that might not be safe in general.
1536 : */
1537 6222690 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1538 :
1539 6222690 : if (!mustwait)
1540 : {
1541 4491336 : *result = true;
1542 4491336 : return false;
1543 : }
1544 :
1545 1731354 : *result = false;
1546 :
1547 : /*
1548 : * Reading this value atomically is safe even on platforms where uint64
1549 : * cannot be read without observing a torn value.
1550 : */
1551 1731354 : value = pg_atomic_read_u64(valptr);
1552 :
1553 1731354 : if (value != oldval)
1554 : {
1555 1586028 : mustwait = false;
1556 1586028 : *newval = value;
1557 : }
1558 : else
1559 : {
1560 145326 : mustwait = true;
1561 : }
1562 :
1563 1731354 : return mustwait;
1564 : }
1565 :
1566 : /*
1567 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1568 : *
1569 : * If the lock is held and *valptr equals oldval, waits until the lock is
1570 : * either freed, or the lock holder updates *valptr by calling
1571 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1572 : * waiting), returns true. If the lock is still held, but *valptr no longer
1573 : * matches oldval, returns false and sets *newval to the current value in
1574 : * *valptr.
1575 : *
1576 : * Note: this function ignores shared lock holders; if the lock is held
1577 : * in shared mode, returns 'true'.
1578 : *
1579 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1580 : * hence the caller of this function may want to rely on an explicit barrier or
1581 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1582 : */
1583 : bool
1584 6077364 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1585 : uint64 *newval)
1586 : {
1587 6077364 : PGPROC *proc = MyProc;
1588 6077364 : int extraWaits = 0;
1589 6077364 : bool result = false;
1590 : #ifdef LWLOCK_STATS
1591 : lwlock_stats *lwstats;
1592 :
1593 : lwstats = get_lwlock_stats_entry(lock);
1594 : #endif
1595 :
1596 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1597 :
1598 : /*
1599 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1600 : * cleanup mechanism to remove us from the wait queue if we got
1601 : * interrupted.
1602 : */
1603 6077364 : HOLD_INTERRUPTS();
1604 :
1605 : /*
1606 : * Loop here to check the lock's status after each time we are signaled.
1607 : */
1608 : for (;;)
1609 69524 : {
1610 : bool mustwait;
1611 :
1612 6146888 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1613 : &result);
1614 :
1615 6146888 : if (!mustwait)
1616 6071086 : break; /* the lock was free or value didn't match */
1617 :
1618 : /*
1619 : * Add myself to wait queue. Note that this is racy, somebody else
1620 : * could wakeup before we're finished queuing. NB: We're using nearly
1621 : * the same twice-in-a-row lock acquisition protocol as
1622 : * LWLockAcquire(). Check its comments for details. The only
1623 : * difference is that we also have to check the variable's values when
1624 : * checking the state of the lock.
1625 : */
1626 75802 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1627 :
1628 : /*
1629 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1630 : * lock is released.
1631 : */
1632 75802 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1633 :
1634 : /*
1635 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1636 : * and variables state.
1637 : */
1638 75802 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1639 : &result);
1640 :
1641 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1642 75802 : if (!mustwait)
1643 : {
1644 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1645 :
1646 6278 : LWLockDequeueSelf(lock);
1647 6278 : break;
1648 : }
1649 :
1650 : /*
1651 : * Wait until awakened.
1652 : *
1653 : * It is possible that we get awakened for a reason other than being
1654 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1655 : * we've gotten the LWLock, re-increment the sema by the number of
1656 : * additional signals received.
1657 : */
1658 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1659 :
1660 : #ifdef LWLOCK_STATS
1661 : lwstats->block_count++;
1662 : #endif
1663 :
1664 69524 : LWLockReportWaitStart(lock);
1665 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1666 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1667 :
1668 : for (;;)
1669 : {
1670 69524 : PGSemaphoreLock(proc->sem);
1671 69524 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1672 69524 : break;
1673 0 : extraWaits++;
1674 : }
1675 :
1676 : #ifdef LOCK_DEBUG
1677 : {
1678 : /* not waiting anymore */
1679 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1680 :
1681 : Assert(nwaiters < MAX_BACKENDS);
1682 : }
1683 : #endif
1684 :
1685 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1686 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1687 69524 : LWLockReportWaitEnd();
1688 :
1689 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1690 :
1691 : /* Now loop back and check the status of the lock again. */
1692 : }
1693 :
1694 : /*
1695 : * Fix the process wait semaphore's count for any absorbed wakeups.
1696 : */
1697 6077364 : while (extraWaits-- > 0)
1698 0 : PGSemaphoreUnlock(proc->sem);
1699 :
1700 : /*
1701 : * Now okay to allow cancel/die interrupts.
1702 : */
1703 6077364 : RESUME_INTERRUPTS();
1704 :
1705 6077364 : return result;
1706 : }
1707 :
1708 :
1709 : /*
1710 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1711 : *
1712 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1713 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1714 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1715 : * lock is guaranteed to see the new value, and act accordingly.
1716 : *
1717 : * The caller must be holding the lock in exclusive mode.
1718 : */
1719 : void
1720 4850294 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1721 : {
1722 : proclist_head wakeup;
1723 : proclist_mutable_iter iter;
1724 :
1725 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1726 :
1727 : /*
1728 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1729 : * that the variable is updated before waking up waiters.
1730 : */
1731 4850294 : pg_atomic_exchange_u64(valptr, val);
1732 :
1733 4850294 : proclist_init(&wakeup);
1734 :
1735 4850294 : LWLockWaitListLock(lock);
1736 :
1737 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1738 :
1739 : /*
1740 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1741 : * up. They are always in the front of the queue.
1742 : */
1743 4853952 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1744 : {
1745 93698 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1746 :
1747 93698 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1748 90040 : break;
1749 :
1750 3658 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1751 3658 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1752 :
1753 : /* see LWLockWakeup() */
1754 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1755 3658 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1756 : }
1757 :
1758 : /* We are done updating shared state of the lock itself. */
1759 4850294 : LWLockWaitListUnlock(lock);
1760 :
1761 : /*
1762 : * Awaken any waiters I removed from the queue.
1763 : */
1764 4853952 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1765 : {
1766 3658 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1767 :
1768 3658 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1769 : /* check comment in LWLockWakeup() about this barrier */
1770 3658 : pg_write_barrier();
1771 3658 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1772 3658 : PGSemaphoreUnlock(waiter->sem);
1773 : }
1774 4850294 : }
1775 :
1776 :
1777 : /*
1778 : * LWLockRelease - release a previously acquired lock
1779 : */
1780 : void
1781 671035534 : LWLockRelease(LWLock *lock)
1782 : {
1783 : LWLockMode mode;
1784 : uint32 oldstate;
1785 : bool check_waiters;
1786 : int i;
1787 :
1788 : /*
1789 : * Remove lock from list of locks held. Usually, but not always, it will
1790 : * be the latest-acquired lock; so search array backwards.
1791 : */
1792 745896398 : for (i = num_held_lwlocks; --i >= 0;)
1793 745896398 : if (lock == held_lwlocks[i].lock)
1794 671035534 : break;
1795 :
1796 671035534 : if (i < 0)
1797 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1798 :
1799 671035534 : mode = held_lwlocks[i].mode;
1800 :
1801 671035534 : num_held_lwlocks--;
1802 745896398 : for (; i < num_held_lwlocks; i++)
1803 74860864 : held_lwlocks[i] = held_lwlocks[i + 1];
1804 :
1805 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1806 :
1807 : /*
1808 : * Release my hold on lock, after that it can immediately be acquired by
1809 : * others, even if we still have to wakeup other waiters.
1810 : */
1811 671035534 : if (mode == LW_EXCLUSIVE)
1812 421792632 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1813 : else
1814 249242902 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1815 :
1816 : /* nobody else can have that kind of lock */
1817 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1818 :
1819 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1820 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1821 :
1822 : /*
1823 : * We're still waiting for backends to get scheduled, don't wake them up
1824 : * again.
1825 : */
1826 671035534 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1827 6564912 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1828 6564912 : (oldstate & LW_LOCK_MASK) == 0)
1829 6531346 : check_waiters = true;
1830 : else
1831 664504188 : check_waiters = false;
1832 :
1833 : /*
1834 : * As waking up waiters requires the spinlock to be acquired, only do so
1835 : * if necessary.
1836 : */
1837 671035534 : if (check_waiters)
1838 : {
1839 : /* XXX: remove before commit? */
1840 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1841 6531346 : LWLockWakeup(lock);
1842 : }
1843 :
1844 : /*
1845 : * Now okay to allow cancel/die interrupts.
1846 : */
1847 671035534 : RESUME_INTERRUPTS();
1848 671035534 : }
1849 :
1850 : /*
1851 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1852 : */
1853 : void
1854 27774242 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1855 : {
1856 : /*
1857 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1858 : * that the variable is updated before releasing the lock.
1859 : */
1860 27774242 : pg_atomic_exchange_u64(valptr, val);
1861 :
1862 27774242 : LWLockRelease(lock);
1863 27774242 : }
1864 :
1865 :
1866 : /*
1867 : * LWLockReleaseAll - release all currently-held locks
1868 : *
1869 : * Used to clean up after ereport(ERROR). An important difference between this
1870 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1871 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1872 : * has been set to an appropriate level earlier in error recovery. We could
1873 : * decrement it below zero if we allow it to drop for each released lock!
1874 : */
1875 : void
1876 95680 : LWLockReleaseAll(void)
1877 : {
1878 96066 : while (num_held_lwlocks > 0)
1879 : {
1880 386 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1881 :
1882 386 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1883 : }
1884 95680 : }
1885 :
1886 :
1887 : /*
1888 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1889 : *
1890 : * This is meant as debug support only.
1891 : */
1892 : bool
1893 0 : LWLockHeldByMe(LWLock *lock)
1894 : {
1895 : int i;
1896 :
1897 0 : for (i = 0; i < num_held_lwlocks; i++)
1898 : {
1899 0 : if (held_lwlocks[i].lock == lock)
1900 0 : return true;
1901 : }
1902 0 : return false;
1903 : }
1904 :
1905 : /*
1906 : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1907 : *
1908 : * This is meant as debug support only.
1909 : */
1910 : bool
1911 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1912 : {
1913 : char *held_lock_addr;
1914 : char *begin;
1915 : char *end;
1916 : int i;
1917 :
1918 0 : begin = (char *) lock;
1919 0 : end = begin + nlocks * stride;
1920 0 : for (i = 0; i < num_held_lwlocks; i++)
1921 : {
1922 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
1923 0 : if (held_lock_addr >= begin &&
1924 0 : held_lock_addr < end &&
1925 0 : (held_lock_addr - begin) % stride == 0)
1926 0 : return true;
1927 : }
1928 0 : return false;
1929 : }
1930 :
1931 : /*
1932 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1933 : *
1934 : * This is meant as debug support only.
1935 : */
1936 : bool
1937 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1938 : {
1939 : int i;
1940 :
1941 0 : for (i = 0; i < num_held_lwlocks; i++)
1942 : {
1943 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
1944 0 : return true;
1945 : }
1946 0 : return false;
1947 : }
|