Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished its
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "storage/proc.h"
84 : #include "storage/proclist.h"
85 : #include "storage/procnumber.h"
86 : #include "storage/spin.h"
87 : #include "utils/memutils.h"
88 :
89 : #ifdef LWLOCK_STATS
90 : #include "utils/hsearch.h"
91 : #endif
92 :
93 :
94 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
95 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 30)
96 : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
97 : #define LW_FLAG_BITS 3
98 : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
99 :
100 : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101 : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
102 : #define LW_VAL_SHARED 1
103 :
104 : /* already (power of 2)-1, i.e. suitable for a mask */
105 : #define LW_SHARED_MASK MAX_BACKENDS
106 : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
107 :
108 :
109 : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110 : "MAX_BACKENDS + 1 needs to be a power of 2");
111 :
112 : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113 : "MAX_BACKENDS and LW_FLAG_MASK overlap");
114 :
115 : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116 : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
117 :
118 : /*
119 : * There are three sorts of LWLock "tranches":
120 : *
121 : * 1. The individually-named locks defined in lwlocklist.h each have their
122 : * own tranche. We absorb the names of these tranches from there into
123 : * BuiltinTrancheNames here.
124 : *
125 : * 2. There are some predefined tranches for built-in groups of locks.
126 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
127 : * appear in BuiltinTrancheNames[] below.
128 : *
129 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
130 : * or LWLockRegisterTranche. The names of these that are known in the current
131 : * process appear in LWLockTrancheNames[].
132 : *
133 : * All these names are user-visible as wait event names, so choose with care
134 : * ... and do not forget to update the documentation's list of wait events.
135 : */
136 : static const char *const BuiltinTrancheNames[] = {
137 : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
138 : #include "storage/lwlocklist.h"
139 : #undef PG_LWLOCK
140 : [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
141 : [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
142 : [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
143 : [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
144 : [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
145 : [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
146 : [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
147 : [LWTRANCHE_WAL_INSERT] = "WALInsert",
148 : [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
149 : [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
150 : [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
151 : [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
152 : [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
153 : [LWTRANCHE_LOCK_MANAGER] = "LockManager",
154 : [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
155 : [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
156 : [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
157 : [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
158 : [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
159 : [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
160 : [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
161 : [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
162 : [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
163 : [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
164 : [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
165 : [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
166 : [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
167 : [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
168 : [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
169 : [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
170 : [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
171 : [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
172 : [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
173 : [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
174 : [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
175 : [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
176 : [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
177 : [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
178 : [LWTRANCHE_XACT_SLRU] = "XactSLRU",
179 : [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
180 : [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
181 : [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
182 : [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
183 : };
184 :
185 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
186 : LWTRANCHE_FIRST_USER_DEFINED,
187 : "missing entries in BuiltinTrancheNames[]");
188 :
189 : /*
190 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
191 : * stores the names of all dynamically-created tranches known to the current
192 : * process. Any unused entries in the array will contain NULL.
193 : */
194 : static const char **LWLockTrancheNames = NULL;
195 : static int LWLockTrancheNamesAllocated = 0;
196 :
197 : /*
198 : * This points to the main array of LWLocks in shared memory. Backends inherit
199 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
200 : * where we have special measures to pass it down).
201 : */
202 : LWLockPadded *MainLWLockArray = NULL;
203 :
204 : /*
205 : * We use this structure to keep track of locked LWLocks for release
206 : * during error recovery. Normally, only a few will be held at once, but
207 : * occasionally the number can be much higher; for example, the pg_buffercache
208 : * extension locks all buffer partitions simultaneously.
209 : */
210 : #define MAX_SIMUL_LWLOCKS 200
211 :
212 : /* struct representing the LWLocks we're holding */
213 : typedef struct LWLockHandle
214 : {
215 : LWLock *lock;
216 : LWLockMode mode;
217 : } LWLockHandle;
218 :
219 : static int num_held_lwlocks = 0;
220 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
221 :
222 : /* struct representing the LWLock tranche request for named tranche */
223 : typedef struct NamedLWLockTrancheRequest
224 : {
225 : char tranche_name[NAMEDATALEN];
226 : int num_lwlocks;
227 : } NamedLWLockTrancheRequest;
228 :
229 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
230 : static int NamedLWLockTrancheRequestsAllocated = 0;
231 :
232 : /*
233 : * NamedLWLockTrancheRequests is both the valid length of the request array,
234 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
235 : * This variable and NamedLWLockTrancheArray are non-static so that
236 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
237 : */
238 : int NamedLWLockTrancheRequests = 0;
239 :
240 : /* points to data in shared memory: */
241 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
242 :
243 : static void InitializeLWLocks(void);
244 : static inline void LWLockReportWaitStart(LWLock *lock);
245 : static inline void LWLockReportWaitEnd(void);
246 : static const char *GetLWTrancheName(uint16 trancheId);
247 :
248 : #define T_NAME(lock) \
249 : GetLWTrancheName((lock)->tranche)
250 :
251 : #ifdef LWLOCK_STATS
252 : typedef struct lwlock_stats_key
253 : {
254 : int tranche;
255 : void *instance;
256 : } lwlock_stats_key;
257 :
258 : typedef struct lwlock_stats
259 : {
260 : lwlock_stats_key key;
261 : int sh_acquire_count;
262 : int ex_acquire_count;
263 : int block_count;
264 : int dequeue_self_count;
265 : int spin_delay_count;
266 : } lwlock_stats;
267 :
268 : static HTAB *lwlock_stats_htab;
269 : static lwlock_stats lwlock_stats_dummy;
270 : #endif
271 :
272 : #ifdef LOCK_DEBUG
273 : bool Trace_lwlocks = false;
274 :
275 : inline static void
276 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
277 : {
278 : /* hide statement & context here, otherwise the log is just too verbose */
279 : if (Trace_lwlocks)
280 : {
281 : uint32 state = pg_atomic_read_u32(&lock->state);
282 :
283 : ereport(LOG,
284 : (errhidestmt(true),
285 : errhidecontext(true),
286 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
287 : MyProcPid,
288 : where, T_NAME(lock), lock,
289 : (state & LW_VAL_EXCLUSIVE) != 0,
290 : state & LW_SHARED_MASK,
291 : (state & LW_FLAG_HAS_WAITERS) != 0,
292 : pg_atomic_read_u32(&lock->nwaiters),
293 : (state & LW_FLAG_RELEASE_OK) != 0)));
294 : }
295 : }
296 :
297 : inline static void
298 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
299 : {
300 : /* hide statement & context here, otherwise the log is just too verbose */
301 : if (Trace_lwlocks)
302 : {
303 : ereport(LOG,
304 : (errhidestmt(true),
305 : errhidecontext(true),
306 : errmsg_internal("%s(%s %p): %s", where,
307 : T_NAME(lock), lock, msg)));
308 : }
309 : }
310 :
311 : #else /* not LOCK_DEBUG */
312 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
313 : #define LOG_LWDEBUG(a,b,c) ((void)0)
314 : #endif /* LOCK_DEBUG */
315 :
316 : #ifdef LWLOCK_STATS
317 :
318 : static void init_lwlock_stats(void);
319 : static void print_lwlock_stats(int code, Datum arg);
320 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
321 :
322 : static void
323 : init_lwlock_stats(void)
324 : {
325 : HASHCTL ctl;
326 : static MemoryContext lwlock_stats_cxt = NULL;
327 : static bool exit_registered = false;
328 :
329 : if (lwlock_stats_cxt != NULL)
330 : MemoryContextDelete(lwlock_stats_cxt);
331 :
332 : /*
333 : * The LWLock stats will be updated within a critical section, which
334 : * requires allocating new hash entries. Allocations within a critical
335 : * section are normally not allowed because running out of memory would
336 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
337 : * turned on in production, so that's an acceptable risk. The hash entries
338 : * are small, so the risk of running out of memory is minimal in practice.
339 : */
340 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
341 : "LWLock stats",
342 : ALLOCSET_DEFAULT_SIZES);
343 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
344 :
345 : ctl.keysize = sizeof(lwlock_stats_key);
346 : ctl.entrysize = sizeof(lwlock_stats);
347 : ctl.hcxt = lwlock_stats_cxt;
348 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
349 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
350 : if (!exit_registered)
351 : {
352 : on_shmem_exit(print_lwlock_stats, 0);
353 : exit_registered = true;
354 : }
355 : }
356 :
357 : static void
358 : print_lwlock_stats(int code, Datum arg)
359 : {
360 : HASH_SEQ_STATUS scan;
361 : lwlock_stats *lwstats;
362 :
363 : hash_seq_init(&scan, lwlock_stats_htab);
364 :
365 : /* Grab an LWLock to keep different backends from mixing reports */
366 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
367 :
368 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
369 : {
370 : fprintf(stderr,
371 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
372 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
373 : lwstats->key.instance, lwstats->sh_acquire_count,
374 : lwstats->ex_acquire_count, lwstats->block_count,
375 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
376 : }
377 :
378 : LWLockRelease(&MainLWLockArray[0].lock);
379 : }
380 :
381 : static lwlock_stats *
382 : get_lwlock_stats_entry(LWLock *lock)
383 : {
384 : lwlock_stats_key key;
385 : lwlock_stats *lwstats;
386 : bool found;
387 :
388 : /*
389 : * During shared memory initialization, the hash table doesn't exist yet.
390 : * Stats of that phase aren't very interesting, so just collect operations
391 : * on all locks in a single dummy entry.
392 : */
393 : if (lwlock_stats_htab == NULL)
394 : return &lwlock_stats_dummy;
395 :
396 : /* Fetch or create the entry. */
397 : MemSet(&key, 0, sizeof(key));
398 : key.tranche = lock->tranche;
399 : key.instance = lock;
400 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
401 : if (!found)
402 : {
403 : lwstats->sh_acquire_count = 0;
404 : lwstats->ex_acquire_count = 0;
405 : lwstats->block_count = 0;
406 : lwstats->dequeue_self_count = 0;
407 : lwstats->spin_delay_count = 0;
408 : }
409 : return lwstats;
410 : }
411 : #endif /* LWLOCK_STATS */
412 :
413 :
414 : /*
415 : * Compute number of LWLocks required by named tranches. These will be
416 : * allocated in the main array.
417 : */
418 : static int
419 8106 : NumLWLocksForNamedTranches(void)
420 : {
421 8106 : int numLocks = 0;
422 : int i;
423 :
424 8162 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
425 56 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
426 :
427 8106 : return numLocks;
428 : }
429 :
430 : /*
431 : * Compute shmem space needed for LWLocks and named tranches.
432 : */
433 : Size
434 6006 : LWLockShmemSize(void)
435 : {
436 : Size size;
437 : int i;
438 6006 : int numLocks = NUM_FIXED_LWLOCKS;
439 :
440 : /* Calculate total number of locks needed in the main array. */
441 6006 : numLocks += NumLWLocksForNamedTranches();
442 :
443 : /* Space for the LWLock array. */
444 6006 : size = mul_size(numLocks, sizeof(LWLockPadded));
445 :
446 : /* Space for dynamic allocation counter, plus room for alignment. */
447 6006 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
448 :
449 : /* space for named tranches. */
450 6006 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
451 :
452 : /* space for name of each tranche. */
453 6048 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
454 42 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
455 :
456 6006 : return size;
457 : }
458 :
459 : /*
460 : * Allocate shmem space for the main LWLock array and all tranches and
461 : * initialize it. We also register extension LWLock tranches here.
462 : */
463 : void
464 2100 : CreateLWLocks(void)
465 : {
466 2100 : if (!IsUnderPostmaster)
467 : {
468 2100 : Size spaceLocks = LWLockShmemSize();
469 : int *LWLockCounter;
470 : char *ptr;
471 :
472 : /* Allocate space */
473 2100 : ptr = (char *) ShmemAlloc(spaceLocks);
474 :
475 : /* Leave room for dynamic allocation of tranches */
476 2100 : ptr += sizeof(int);
477 :
478 : /* Ensure desired alignment of LWLock array */
479 2100 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
480 :
481 2100 : MainLWLockArray = (LWLockPadded *) ptr;
482 :
483 : /*
484 : * Initialize the dynamic-allocation counter for tranches, which is
485 : * stored just before the first LWLock.
486 : */
487 2100 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
488 2100 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
489 :
490 : /* Initialize all LWLocks */
491 2100 : InitializeLWLocks();
492 : }
493 :
494 : /* Register named extension LWLock tranches in the current process. */
495 2114 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
496 14 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
497 14 : NamedLWLockTrancheArray[i].trancheName);
498 2100 : }
499 :
500 : /*
501 : * Initialize LWLocks that are fixed and those belonging to named tranches.
502 : */
503 : static void
504 2100 : InitializeLWLocks(void)
505 : {
506 2100 : int numNamedLocks = NumLWLocksForNamedTranches();
507 : int id;
508 : int i;
509 : int j;
510 : LWLockPadded *lock;
511 :
512 : /* Initialize all individual LWLocks in main array */
513 115500 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
514 113400 : LWLockInitialize(&lock->lock, id);
515 :
516 : /* Initialize buffer mapping LWLocks in main array */
517 2100 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
518 270900 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
519 268800 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
520 :
521 : /* Initialize lmgrs' LWLocks in main array */
522 2100 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
523 35700 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
524 33600 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
525 :
526 : /* Initialize predicate lmgrs' LWLocks in main array */
527 2100 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
528 35700 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
529 33600 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
530 :
531 : /*
532 : * Copy the info about any named tranches into shared memory (so that
533 : * other processes can see it), and initialize the requested LWLocks.
534 : */
535 2100 : if (NamedLWLockTrancheRequests > 0)
536 : {
537 : char *trancheNames;
538 :
539 14 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
540 14 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
541 :
542 14 : trancheNames = (char *) NamedLWLockTrancheArray +
543 14 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
544 14 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
545 :
546 28 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
547 : {
548 : NamedLWLockTrancheRequest *request;
549 : NamedLWLockTranche *tranche;
550 : char *name;
551 :
552 14 : request = &NamedLWLockTrancheRequestArray[i];
553 14 : tranche = &NamedLWLockTrancheArray[i];
554 :
555 14 : name = trancheNames;
556 14 : trancheNames += strlen(request->tranche_name) + 1;
557 14 : strcpy(name, request->tranche_name);
558 14 : tranche->trancheId = LWLockNewTrancheId();
559 14 : tranche->trancheName = name;
560 :
561 28 : for (j = 0; j < request->num_lwlocks; j++, lock++)
562 14 : LWLockInitialize(&lock->lock, tranche->trancheId);
563 : }
564 : }
565 2100 : }
566 :
567 : /*
568 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
569 : */
570 : void
571 43174 : InitLWLockAccess(void)
572 : {
573 : #ifdef LWLOCK_STATS
574 : init_lwlock_stats();
575 : #endif
576 43174 : }
577 :
578 : /*
579 : * GetNamedLWLockTranche - returns the base address of LWLock from the
580 : * specified tranche.
581 : *
582 : * Caller needs to retrieve the requested number of LWLocks starting from
583 : * the base lock address returned by this API. This can be used for
584 : * tranches that are requested by using RequestNamedLWLockTranche() API.
585 : */
586 : LWLockPadded *
587 14 : GetNamedLWLockTranche(const char *tranche_name)
588 : {
589 : int lock_pos;
590 : int i;
591 :
592 : /*
593 : * Obtain the position of base address of LWLock belonging to requested
594 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
595 : * in MainLWLockArray after fixed locks.
596 : */
597 14 : lock_pos = NUM_FIXED_LWLOCKS;
598 14 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
599 : {
600 14 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
601 : tranche_name) == 0)
602 14 : return &MainLWLockArray[lock_pos];
603 :
604 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
605 : }
606 :
607 0 : elog(ERROR, "requested tranche is not registered");
608 :
609 : /* just to keep compiler quiet */
610 : return NULL;
611 : }
612 :
613 : /*
614 : * Allocate a new tranche ID.
615 : */
616 : int
617 34 : LWLockNewTrancheId(void)
618 : {
619 : int result;
620 : int *LWLockCounter;
621 :
622 34 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
623 : /* We use the ShmemLock spinlock to protect LWLockCounter */
624 34 : SpinLockAcquire(ShmemLock);
625 34 : result = (*LWLockCounter)++;
626 34 : SpinLockRelease(ShmemLock);
627 :
628 34 : return result;
629 : }
630 :
631 : /*
632 : * Register a dynamic tranche name in the lookup table of the current process.
633 : *
634 : * This routine will save a pointer to the tranche name passed as an argument,
635 : * so the name should be allocated in a backend-lifetime context
636 : * (shared memory, TopMemoryContext, static constant, or similar).
637 : *
638 : * The tranche name will be user-visible as a wait event name, so try to
639 : * use a name that fits the style for those.
640 : */
641 : void
642 40 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
643 : {
644 : /* This should only be called for user-defined tranches. */
645 40 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
646 0 : return;
647 :
648 : /* Convert to array index. */
649 40 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
650 :
651 : /* If necessary, create or enlarge array. */
652 40 : if (tranche_id >= LWLockTrancheNamesAllocated)
653 : {
654 : int newalloc;
655 :
656 34 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
657 :
658 34 : if (LWLockTrancheNames == NULL)
659 34 : LWLockTrancheNames = (const char **)
660 34 : MemoryContextAllocZero(TopMemoryContext,
661 : newalloc * sizeof(char *));
662 : else
663 0 : LWLockTrancheNames =
664 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
665 34 : LWLockTrancheNamesAllocated = newalloc;
666 : }
667 :
668 40 : LWLockTrancheNames[tranche_id] = tranche_name;
669 : }
670 :
671 : /*
672 : * RequestNamedLWLockTranche
673 : * Request that extra LWLocks be allocated during postmaster
674 : * startup.
675 : *
676 : * This may only be called via the shmem_request_hook of a library that is
677 : * loaded into the postmaster via shared_preload_libraries. Calls from
678 : * elsewhere will fail.
679 : *
680 : * The tranche name will be user-visible as a wait event name, so try to
681 : * use a name that fits the style for those.
682 : */
683 : void
684 14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
685 : {
686 : NamedLWLockTrancheRequest *request;
687 :
688 14 : if (!process_shmem_requests_in_progress)
689 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
690 :
691 14 : if (NamedLWLockTrancheRequestArray == NULL)
692 : {
693 14 : NamedLWLockTrancheRequestsAllocated = 16;
694 14 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
695 14 : MemoryContextAlloc(TopMemoryContext,
696 : NamedLWLockTrancheRequestsAllocated
697 : * sizeof(NamedLWLockTrancheRequest));
698 : }
699 :
700 14 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
701 : {
702 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
703 :
704 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
705 0 : repalloc(NamedLWLockTrancheRequestArray,
706 : i * sizeof(NamedLWLockTrancheRequest));
707 0 : NamedLWLockTrancheRequestsAllocated = i;
708 : }
709 :
710 14 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
711 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
712 14 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
713 14 : request->num_lwlocks = num_lwlocks;
714 14 : NamedLWLockTrancheRequests++;
715 14 : }
716 :
717 : /*
718 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
719 : */
720 : void
721 24771240 : LWLockInitialize(LWLock *lock, int tranche_id)
722 : {
723 24771240 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
724 : #ifdef LOCK_DEBUG
725 : pg_atomic_init_u32(&lock->nwaiters, 0);
726 : #endif
727 24771240 : lock->tranche = tranche_id;
728 24771240 : proclist_init(&lock->waiters);
729 24771240 : }
730 :
731 : /*
732 : * Report start of wait event for light-weight locks.
733 : *
734 : * This function will be used by all the light-weight lock calls which
735 : * needs to wait to acquire the lock. This function distinguishes wait
736 : * event based on tranche and lock id.
737 : */
738 : static inline void
739 4492510 : LWLockReportWaitStart(LWLock *lock)
740 : {
741 4492510 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
742 4492510 : }
743 :
744 : /*
745 : * Report end of wait event for light-weight locks.
746 : */
747 : static inline void
748 4492510 : LWLockReportWaitEnd(void)
749 : {
750 4492510 : pgstat_report_wait_end();
751 4492510 : }
752 :
753 : /*
754 : * Return the name of an LWLock tranche.
755 : */
756 : static const char *
757 52 : GetLWTrancheName(uint16 trancheId)
758 : {
759 : /* Built-in tranche or individual LWLock? */
760 52 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
761 52 : return BuiltinTrancheNames[trancheId];
762 :
763 : /*
764 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
765 : * it's possible that the tranche has never been registered in the current
766 : * process, in which case give up and return "extension".
767 : */
768 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
769 :
770 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
771 0 : LWLockTrancheNames[trancheId] == NULL)
772 0 : return "extension";
773 :
774 0 : return LWLockTrancheNames[trancheId];
775 : }
776 :
777 : /*
778 : * Return an identifier for an LWLock based on the wait class and event.
779 : */
780 : const char *
781 52 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
782 : {
783 : Assert(classId == PG_WAIT_LWLOCK);
784 : /* The event IDs are just tranche numbers. */
785 52 : return GetLWTrancheName(eventId);
786 : }
787 :
788 : /*
789 : * Internal function that tries to atomically acquire the lwlock in the passed
790 : * in mode.
791 : *
792 : * This function will not block waiting for a lock to become free - that's the
793 : * caller's job.
794 : *
795 : * Returns true if the lock isn't free and we need to wait.
796 : */
797 : static bool
798 726922502 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
799 : {
800 : uint32 old_state;
801 :
802 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
803 :
804 : /*
805 : * Read once outside the loop, later iterations will get the newer value
806 : * via compare & exchange.
807 : */
808 726922502 : old_state = pg_atomic_read_u32(&lock->state);
809 :
810 : /* loop until we've determined whether we could acquire the lock or not */
811 : while (true)
812 429162 : {
813 : uint32 desired_state;
814 : bool lock_free;
815 :
816 727351664 : desired_state = old_state;
817 :
818 727351664 : if (mode == LW_EXCLUSIVE)
819 : {
820 447559460 : lock_free = (old_state & LW_LOCK_MASK) == 0;
821 447559460 : if (lock_free)
822 444967084 : desired_state += LW_VAL_EXCLUSIVE;
823 : }
824 : else
825 : {
826 279792204 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
827 279792204 : if (lock_free)
828 273258788 : desired_state += LW_VAL_SHARED;
829 : }
830 :
831 : /*
832 : * Attempt to swap in the state we are expecting. If we didn't see
833 : * lock to be free, that's just the old value. If we saw it as free,
834 : * we'll attempt to mark it acquired. The reason that we always swap
835 : * in the value is that this doubles as a memory barrier. We could try
836 : * to be smarter and only swap in values if we saw the lock as free,
837 : * but benchmark haven't shown it as beneficial so far.
838 : *
839 : * Retry if the value changed since we last looked at it.
840 : */
841 727351664 : if (pg_atomic_compare_exchange_u32(&lock->state,
842 : &old_state, desired_state))
843 : {
844 726922502 : if (lock_free)
845 : {
846 : /* Great! Got the lock. */
847 : #ifdef LOCK_DEBUG
848 : if (mode == LW_EXCLUSIVE)
849 : lock->owner = MyProc;
850 : #endif
851 717917186 : return false;
852 : }
853 : else
854 9005316 : return true; /* somebody else has the lock */
855 : }
856 : }
857 : pg_unreachable();
858 : }
859 :
860 : /*
861 : * Lock the LWLock's wait list against concurrent activity.
862 : *
863 : * NB: even though the wait list is locked, non-conflicting lock operations
864 : * may still happen concurrently.
865 : *
866 : * Time spent holding mutex should be short!
867 : */
868 : static void
869 14821206 : LWLockWaitListLock(LWLock *lock)
870 : {
871 : uint32 old_state;
872 : #ifdef LWLOCK_STATS
873 : lwlock_stats *lwstats;
874 : uint32 delays = 0;
875 :
876 : lwstats = get_lwlock_stats_entry(lock);
877 : #endif
878 :
879 : while (true)
880 : {
881 : /* always try once to acquire lock directly */
882 14821206 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
883 14821206 : if (!(old_state & LW_FLAG_LOCKED))
884 14717038 : break; /* got lock */
885 :
886 : /* and then spin without atomic operations until lock is released */
887 : {
888 : SpinDelayStatus delayStatus;
889 :
890 104168 : init_local_spin_delay(&delayStatus);
891 :
892 335284 : while (old_state & LW_FLAG_LOCKED)
893 : {
894 231116 : perform_spin_delay(&delayStatus);
895 231116 : old_state = pg_atomic_read_u32(&lock->state);
896 : }
897 : #ifdef LWLOCK_STATS
898 : delays += delayStatus.delays;
899 : #endif
900 104168 : finish_spin_delay(&delayStatus);
901 : }
902 :
903 : /*
904 : * Retry. The lock might obviously already be re-acquired by the time
905 : * we're attempting to get it again.
906 : */
907 : }
908 :
909 : #ifdef LWLOCK_STATS
910 : lwstats->spin_delay_count += delays;
911 : #endif
912 14717038 : }
913 :
914 : /*
915 : * Unlock the LWLock's wait list.
916 : *
917 : * Note that it can be more efficient to manipulate flags and release the
918 : * locks in a single atomic operation.
919 : */
920 : static void
921 9828202 : LWLockWaitListUnlock(LWLock *lock)
922 : {
923 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
924 :
925 9828202 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
926 :
927 : Assert(old_state & LW_FLAG_LOCKED);
928 9828202 : }
929 :
930 : /*
931 : * Wakeup all the lockers that currently have a chance to acquire the lock.
932 : */
933 : static void
934 4888836 : LWLockWakeup(LWLock *lock)
935 : {
936 : bool new_release_ok;
937 4888836 : bool wokeup_somebody = false;
938 : proclist_head wakeup;
939 : proclist_mutable_iter iter;
940 :
941 4888836 : proclist_init(&wakeup);
942 :
943 4888836 : new_release_ok = true;
944 :
945 : /* lock wait list while collecting backends to wake up */
946 4888836 : LWLockWaitListLock(lock);
947 :
948 8185596 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
949 : {
950 4524744 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
951 :
952 4524744 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
953 4350 : continue;
954 :
955 4520394 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
956 4520394 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
957 :
958 4520394 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
959 : {
960 : /*
961 : * Prevent additional wakeups until retryer gets to run. Backends
962 : * that are just waiting for the lock to become free don't retry
963 : * automatically.
964 : */
965 4454988 : new_release_ok = false;
966 :
967 : /*
968 : * Don't wakeup (further) exclusive locks.
969 : */
970 4454988 : wokeup_somebody = true;
971 : }
972 :
973 : /*
974 : * Signal that the process isn't on the wait list anymore. This allows
975 : * LWLockDequeueSelf() to remove itself of the waitlist with a
976 : * proclist_delete(), rather than having to check if it has been
977 : * removed from the list.
978 : */
979 : Assert(waiter->lwWaiting == LW_WS_WAITING);
980 4520394 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
981 :
982 : /*
983 : * Once we've woken up an exclusive lock, there's no point in waking
984 : * up anybody else.
985 : */
986 4520394 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
987 1227984 : break;
988 : }
989 :
990 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
991 :
992 : /* unset required flags, and release lock, in one fell swoop */
993 : {
994 : uint32 old_state;
995 : uint32 desired_state;
996 :
997 4888836 : old_state = pg_atomic_read_u32(&lock->state);
998 : while (true)
999 : {
1000 4929460 : desired_state = old_state;
1001 :
1002 : /* compute desired flags */
1003 :
1004 4929460 : if (new_release_ok)
1005 485392 : desired_state |= LW_FLAG_RELEASE_OK;
1006 : else
1007 4444068 : desired_state &= ~LW_FLAG_RELEASE_OK;
1008 :
1009 4929460 : if (proclist_is_empty(&wakeup))
1010 448786 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1011 :
1012 4929460 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1013 :
1014 4929460 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1015 : desired_state))
1016 4888836 : break;
1017 : }
1018 : }
1019 :
1020 : /* Awaken any waiters I removed from the queue. */
1021 9409230 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1022 : {
1023 4520394 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1024 :
1025 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1026 4520394 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1027 :
1028 : /*
1029 : * Guarantee that lwWaiting being unset only becomes visible once the
1030 : * unlink from the link has completed. Otherwise the target backend
1031 : * could be woken up for other reason and enqueue for a new lock - if
1032 : * that happens before the list unlink happens, the list would end up
1033 : * being corrupted.
1034 : *
1035 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1036 : * another lock.
1037 : */
1038 4520394 : pg_write_barrier();
1039 4520394 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1040 4520394 : PGSemaphoreUnlock(waiter->sem);
1041 : }
1042 4888836 : }
1043 :
1044 : /*
1045 : * Add ourselves to the end of the queue.
1046 : *
1047 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1048 : */
1049 : static void
1050 4631630 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1051 : {
1052 : /*
1053 : * If we don't have a PGPROC structure, there's no way to wait. This
1054 : * should never occur, since MyProc should only be null during shared
1055 : * memory initialization.
1056 : */
1057 4631630 : if (MyProc == NULL)
1058 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1059 :
1060 4631630 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1061 0 : elog(PANIC, "queueing for lock while waiting on another one");
1062 :
1063 4631630 : LWLockWaitListLock(lock);
1064 :
1065 : /* setting the flag is protected by the spinlock */
1066 4631630 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1067 :
1068 4631630 : MyProc->lwWaiting = LW_WS_WAITING;
1069 4631630 : MyProc->lwWaitMode = mode;
1070 :
1071 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1072 4631630 : if (mode == LW_WAIT_UNTIL_FREE)
1073 72640 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1074 : else
1075 4558990 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1076 :
1077 : /* Can release the mutex now */
1078 4631630 : LWLockWaitListUnlock(lock);
1079 :
1080 : #ifdef LOCK_DEBUG
1081 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1082 : #endif
1083 4631630 : }
1084 :
1085 : /*
1086 : * Remove ourselves from the waitlist.
1087 : *
1088 : * This is used if we queued ourselves because we thought we needed to sleep
1089 : * but, after further checking, we discovered that we don't actually need to
1090 : * do so.
1091 : */
1092 : static void
1093 139120 : LWLockDequeueSelf(LWLock *lock)
1094 : {
1095 : bool on_waitlist;
1096 :
1097 : #ifdef LWLOCK_STATS
1098 : lwlock_stats *lwstats;
1099 :
1100 : lwstats = get_lwlock_stats_entry(lock);
1101 :
1102 : lwstats->dequeue_self_count++;
1103 : #endif
1104 :
1105 139120 : LWLockWaitListLock(lock);
1106 :
1107 : /*
1108 : * Remove ourselves from the waitlist, unless we've already been removed.
1109 : * The removal happens with the wait list lock held, so there's no race in
1110 : * this check.
1111 : */
1112 139120 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1113 139120 : if (on_waitlist)
1114 108542 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1115 :
1116 139120 : if (proclist_is_empty(&lock->waiters) &&
1117 135290 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1118 : {
1119 135270 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1120 : }
1121 :
1122 : /* XXX: combine with fetch_and above? */
1123 139120 : LWLockWaitListUnlock(lock);
1124 :
1125 : /* clear waiting state again, nice for debugging */
1126 139120 : if (on_waitlist)
1127 108542 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1128 : else
1129 : {
1130 30578 : int extraWaits = 0;
1131 :
1132 : /*
1133 : * Somebody else dequeued us and has or will wake us up. Deal with the
1134 : * superfluous absorption of a wakeup.
1135 : */
1136 :
1137 : /*
1138 : * Reset RELEASE_OK flag if somebody woke us before we removed
1139 : * ourselves - they'll have set it to false.
1140 : */
1141 30578 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1142 :
1143 : /*
1144 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1145 : * get reset at some inconvenient point later. Most of the time this
1146 : * will immediately return.
1147 : */
1148 : for (;;)
1149 : {
1150 30578 : PGSemaphoreLock(MyProc->sem);
1151 30578 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1152 30578 : break;
1153 0 : extraWaits++;
1154 : }
1155 :
1156 : /*
1157 : * Fix the process wait semaphore's count for any absorbed wakeups.
1158 : */
1159 30578 : while (extraWaits-- > 0)
1160 0 : PGSemaphoreUnlock(MyProc->sem);
1161 : }
1162 :
1163 : #ifdef LOCK_DEBUG
1164 : {
1165 : /* not waiting anymore */
1166 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1167 :
1168 : Assert(nwaiters < MAX_BACKENDS);
1169 : }
1170 : #endif
1171 139120 : }
1172 :
1173 : /*
1174 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1175 : *
1176 : * If the lock is not available, sleep until it is. Returns true if the lock
1177 : * was available immediately, false if we had to sleep.
1178 : *
1179 : * Side effect: cancel/die interrupts are held off until lock release.
1180 : */
1181 : bool
1182 712910410 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1183 : {
1184 712910410 : PGPROC *proc = MyProc;
1185 712910410 : bool result = true;
1186 712910410 : int extraWaits = 0;
1187 : #ifdef LWLOCK_STATS
1188 : lwlock_stats *lwstats;
1189 :
1190 : lwstats = get_lwlock_stats_entry(lock);
1191 : #endif
1192 :
1193 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1194 :
1195 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1196 :
1197 : #ifdef LWLOCK_STATS
1198 : /* Count lock acquisition attempts */
1199 : if (mode == LW_EXCLUSIVE)
1200 : lwstats->ex_acquire_count++;
1201 : else
1202 : lwstats->sh_acquire_count++;
1203 : #endif /* LWLOCK_STATS */
1204 :
1205 : /*
1206 : * We can't wait if we haven't got a PGPROC. This should only occur
1207 : * during bootstrap or shared memory initialization. Put an Assert here
1208 : * to catch unsafe coding practices.
1209 : */
1210 : Assert(!(proc == NULL && IsUnderPostmaster));
1211 :
1212 : /* Ensure we will have room to remember the lock */
1213 712910410 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1214 0 : elog(ERROR, "too many LWLocks taken");
1215 :
1216 : /*
1217 : * Lock out cancel/die interrupts until we exit the code section protected
1218 : * by the LWLock. This ensures that interrupts will not interfere with
1219 : * manipulations of data structures in shared memory.
1220 : */
1221 712910410 : HOLD_INTERRUPTS();
1222 :
1223 : /*
1224 : * Loop here to try to acquire lock after each time we are signaled by
1225 : * LWLockRelease.
1226 : *
1227 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1228 : * lock, rather than retrying and possibly having to go back to sleep. But
1229 : * in practice that is no good because it means a process swap for every
1230 : * lock acquisition when two or more processes are contending for the same
1231 : * lock. Since LWLocks are normally used to protect not-very-long
1232 : * sections of computation, a process needs to be able to acquire and
1233 : * release the same lock many times during a single CPU time slice, even
1234 : * in the presence of contention. The efficiency of being able to do that
1235 : * outweighs the inefficiency of sometimes wasting a process dispatch
1236 : * cycle because the lock is not free when a released waiter finally gets
1237 : * to run. See pgsql-hackers archives for 29-Dec-01.
1238 : */
1239 : for (;;)
1240 4425686 : {
1241 : bool mustwait;
1242 :
1243 : /*
1244 : * Try to grab the lock the first time, we're not in the waitqueue
1245 : * yet/anymore.
1246 : */
1247 717336096 : mustwait = LWLockAttemptLock(lock, mode);
1248 :
1249 717336096 : if (!mustwait)
1250 : {
1251 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1252 712777106 : break; /* got the lock */
1253 : }
1254 :
1255 : /*
1256 : * Ok, at this point we couldn't grab the lock on the first try. We
1257 : * cannot simply queue ourselves to the end of the list and wait to be
1258 : * woken up because by now the lock could long have been released.
1259 : * Instead add us to the queue and try to grab the lock again. If we
1260 : * succeed we need to revert the queuing and be happy, otherwise we
1261 : * recheck the lock. If we still couldn't grab it, we know that the
1262 : * other locker will see our queue entries when releasing since they
1263 : * existed before we checked for the lock.
1264 : */
1265 :
1266 : /* add to the queue */
1267 4558990 : LWLockQueueSelf(lock, mode);
1268 :
1269 : /* we're now guaranteed to be woken up if necessary */
1270 4558990 : mustwait = LWLockAttemptLock(lock, mode);
1271 :
1272 : /* ok, grabbed the lock the second time round, need to undo queueing */
1273 4558990 : if (!mustwait)
1274 : {
1275 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1276 :
1277 133304 : LWLockDequeueSelf(lock);
1278 133304 : break;
1279 : }
1280 :
1281 : /*
1282 : * Wait until awakened.
1283 : *
1284 : * It is possible that we get awakened for a reason other than being
1285 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1286 : * we've gotten the LWLock, re-increment the sema by the number of
1287 : * additional signals received.
1288 : */
1289 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1290 :
1291 : #ifdef LWLOCK_STATS
1292 : lwstats->block_count++;
1293 : #endif
1294 :
1295 4425686 : LWLockReportWaitStart(lock);
1296 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1297 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1298 :
1299 : for (;;)
1300 : {
1301 4425686 : PGSemaphoreLock(proc->sem);
1302 4425686 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1303 4425686 : break;
1304 0 : extraWaits++;
1305 : }
1306 :
1307 : /* Retrying, allow LWLockRelease to release waiters again. */
1308 4425686 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1309 :
1310 : #ifdef LOCK_DEBUG
1311 : {
1312 : /* not waiting anymore */
1313 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1314 :
1315 : Assert(nwaiters < MAX_BACKENDS);
1316 : }
1317 : #endif
1318 :
1319 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1320 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1321 4425686 : LWLockReportWaitEnd();
1322 :
1323 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1324 :
1325 : /* Now loop back and try to acquire lock again. */
1326 4425686 : result = false;
1327 : }
1328 :
1329 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1330 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1331 :
1332 : /* Add lock to list of locks held by this backend */
1333 712910410 : held_lwlocks[num_held_lwlocks].lock = lock;
1334 712910410 : held_lwlocks[num_held_lwlocks++].mode = mode;
1335 :
1336 : /*
1337 : * Fix the process wait semaphore's count for any absorbed wakeups.
1338 : */
1339 712910410 : while (extraWaits-- > 0)
1340 0 : PGSemaphoreUnlock(proc->sem);
1341 :
1342 712910410 : return result;
1343 : }
1344 :
1345 : /*
1346 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1347 : *
1348 : * If the lock is not available, return false with no side-effects.
1349 : *
1350 : * If successful, cancel/die interrupts are held off until lock release.
1351 : */
1352 : bool
1353 4749886 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1354 : {
1355 : bool mustwait;
1356 :
1357 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1358 :
1359 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1360 :
1361 : /* Ensure we will have room to remember the lock */
1362 4749886 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1363 0 : elog(ERROR, "too many LWLocks taken");
1364 :
1365 : /*
1366 : * Lock out cancel/die interrupts until we exit the code section protected
1367 : * by the LWLock. This ensures that interrupts will not interfere with
1368 : * manipulations of data structures in shared memory.
1369 : */
1370 4749886 : HOLD_INTERRUPTS();
1371 :
1372 : /* Check for the lock */
1373 4749886 : mustwait = LWLockAttemptLock(lock, mode);
1374 :
1375 4749886 : if (mustwait)
1376 : {
1377 : /* Failed to get lock, so release interrupt holdoff */
1378 1346 : RESUME_INTERRUPTS();
1379 :
1380 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1381 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1382 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1383 : }
1384 : else
1385 : {
1386 : /* Add lock to list of locks held by this backend */
1387 4748540 : held_lwlocks[num_held_lwlocks].lock = lock;
1388 4748540 : held_lwlocks[num_held_lwlocks++].mode = mode;
1389 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1390 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1391 : }
1392 4749886 : return !mustwait;
1393 : }
1394 :
1395 : /*
1396 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1397 : *
1398 : * The semantics of this function are a bit funky. If the lock is currently
1399 : * free, it is acquired in the given mode, and the function returns true. If
1400 : * the lock isn't immediately free, the function waits until it is released
1401 : * and returns false, but does not acquire the lock.
1402 : *
1403 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1404 : * holding WALWriteLock, it can flush the commit records of many other
1405 : * backends as a side-effect. Those other backends need to wait until the
1406 : * flush finishes, but don't need to acquire the lock anymore. They can just
1407 : * wake up, observe that their records have already been flushed, and return.
1408 : */
1409 : bool
1410 267796 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1411 : {
1412 267796 : PGPROC *proc = MyProc;
1413 : bool mustwait;
1414 267796 : int extraWaits = 0;
1415 : #ifdef LWLOCK_STATS
1416 : lwlock_stats *lwstats;
1417 :
1418 : lwstats = get_lwlock_stats_entry(lock);
1419 : #endif
1420 :
1421 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1422 :
1423 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1424 :
1425 : /* Ensure we will have room to remember the lock */
1426 267796 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1427 0 : elog(ERROR, "too many LWLocks taken");
1428 :
1429 : /*
1430 : * Lock out cancel/die interrupts until we exit the code section protected
1431 : * by the LWLock. This ensures that interrupts will not interfere with
1432 : * manipulations of data structures in shared memory.
1433 : */
1434 267796 : HOLD_INTERRUPTS();
1435 :
1436 : /*
1437 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1438 : * protocol as LWLockAcquire(). Check its comments for details.
1439 : */
1440 267796 : mustwait = LWLockAttemptLock(lock, mode);
1441 :
1442 267796 : if (mustwait)
1443 : {
1444 9734 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1445 :
1446 9734 : mustwait = LWLockAttemptLock(lock, mode);
1447 :
1448 9734 : if (mustwait)
1449 : {
1450 : /*
1451 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1452 : * bogus wakeups.
1453 : */
1454 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1455 :
1456 : #ifdef LWLOCK_STATS
1457 : lwstats->block_count++;
1458 : #endif
1459 :
1460 9560 : LWLockReportWaitStart(lock);
1461 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1462 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1463 :
1464 : for (;;)
1465 : {
1466 9560 : PGSemaphoreLock(proc->sem);
1467 9560 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1468 9560 : break;
1469 0 : extraWaits++;
1470 : }
1471 :
1472 : #ifdef LOCK_DEBUG
1473 : {
1474 : /* not waiting anymore */
1475 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1476 :
1477 : Assert(nwaiters < MAX_BACKENDS);
1478 : }
1479 : #endif
1480 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1481 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1482 9560 : LWLockReportWaitEnd();
1483 :
1484 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1485 : }
1486 : else
1487 : {
1488 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1489 :
1490 : /*
1491 : * Got lock in the second attempt, undo queueing. We need to treat
1492 : * this as having successfully acquired the lock, otherwise we'd
1493 : * not necessarily wake up people we've prevented from acquiring
1494 : * the lock.
1495 : */
1496 174 : LWLockDequeueSelf(lock);
1497 : }
1498 : }
1499 :
1500 : /*
1501 : * Fix the process wait semaphore's count for any absorbed wakeups.
1502 : */
1503 267796 : while (extraWaits-- > 0)
1504 0 : PGSemaphoreUnlock(proc->sem);
1505 :
1506 267796 : if (mustwait)
1507 : {
1508 : /* Failed to get lock, so release interrupt holdoff */
1509 9560 : RESUME_INTERRUPTS();
1510 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1511 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1512 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1513 : }
1514 : else
1515 : {
1516 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1517 : /* Add lock to list of locks held by this backend */
1518 258236 : held_lwlocks[num_held_lwlocks].lock = lock;
1519 258236 : held_lwlocks[num_held_lwlocks++].mode = mode;
1520 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1521 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1522 : }
1523 :
1524 267796 : return !mustwait;
1525 : }
1526 :
1527 : /*
1528 : * Does the lwlock in its current state need to wait for the variable value to
1529 : * change?
1530 : *
1531 : * If we don't need to wait, and it's because the value of the variable has
1532 : * changed, store the current value in newval.
1533 : *
1534 : * *result is set to true if the lock was free, and false otherwise.
1535 : */
1536 : static bool
1537 6248204 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1538 : uint64 *newval, bool *result)
1539 : {
1540 : bool mustwait;
1541 : uint64 value;
1542 :
1543 : /*
1544 : * Test first to see if it the slot is free right now.
1545 : *
1546 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1547 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1548 : * this, so we don't need a memory barrier here as far as the current
1549 : * usage is concerned. But that might not be safe in general.
1550 : */
1551 6248204 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1552 :
1553 6248204 : if (!mustwait)
1554 : {
1555 4539668 : *result = true;
1556 4539668 : return false;
1557 : }
1558 :
1559 1708536 : *result = false;
1560 :
1561 : /*
1562 : * Reading this value atomically is safe even on platforms where uint64
1563 : * cannot be read without observing a torn value.
1564 : */
1565 1708536 : value = pg_atomic_read_u64(valptr);
1566 :
1567 1708536 : if (value != oldval)
1568 : {
1569 1588366 : mustwait = false;
1570 1588366 : *newval = value;
1571 : }
1572 : else
1573 : {
1574 120170 : mustwait = true;
1575 : }
1576 :
1577 1708536 : return mustwait;
1578 : }
1579 :
1580 : /*
1581 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1582 : *
1583 : * If the lock is held and *valptr equals oldval, waits until the lock is
1584 : * either freed, or the lock holder updates *valptr by calling
1585 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1586 : * waiting), returns true. If the lock is still held, but *valptr no longer
1587 : * matches oldval, returns false and sets *newval to the current value in
1588 : * *valptr.
1589 : *
1590 : * Note: this function ignores shared lock holders; if the lock is held
1591 : * in shared mode, returns 'true'.
1592 : *
1593 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1594 : * hence the caller of this function may want to rely on an explicit barrier or
1595 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1596 : */
1597 : bool
1598 6128034 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1599 : uint64 *newval)
1600 : {
1601 6128034 : PGPROC *proc = MyProc;
1602 6128034 : int extraWaits = 0;
1603 6128034 : bool result = false;
1604 : #ifdef LWLOCK_STATS
1605 : lwlock_stats *lwstats;
1606 :
1607 : lwstats = get_lwlock_stats_entry(lock);
1608 : #endif
1609 :
1610 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1611 :
1612 : /*
1613 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1614 : * cleanup mechanism to remove us from the wait queue if we got
1615 : * interrupted.
1616 : */
1617 6128034 : HOLD_INTERRUPTS();
1618 :
1619 : /*
1620 : * Loop here to check the lock's status after each time we are signaled.
1621 : */
1622 : for (;;)
1623 57264 : {
1624 : bool mustwait;
1625 :
1626 6185298 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1627 : &result);
1628 :
1629 6185298 : if (!mustwait)
1630 6122392 : break; /* the lock was free or value didn't match */
1631 :
1632 : /*
1633 : * Add myself to wait queue. Note that this is racy, somebody else
1634 : * could wakeup before we're finished queuing. NB: We're using nearly
1635 : * the same twice-in-a-row lock acquisition protocol as
1636 : * LWLockAcquire(). Check its comments for details. The only
1637 : * difference is that we also have to check the variable's values when
1638 : * checking the state of the lock.
1639 : */
1640 62906 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1641 :
1642 : /*
1643 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1644 : * lock is released.
1645 : */
1646 62906 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1647 :
1648 : /*
1649 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1650 : * and variables state.
1651 : */
1652 62906 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1653 : &result);
1654 :
1655 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1656 62906 : if (!mustwait)
1657 : {
1658 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1659 :
1660 5642 : LWLockDequeueSelf(lock);
1661 5642 : break;
1662 : }
1663 :
1664 : /*
1665 : * Wait until awakened.
1666 : *
1667 : * It is possible that we get awakened for a reason other than being
1668 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1669 : * we've gotten the LWLock, re-increment the sema by the number of
1670 : * additional signals received.
1671 : */
1672 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1673 :
1674 : #ifdef LWLOCK_STATS
1675 : lwstats->block_count++;
1676 : #endif
1677 :
1678 57264 : LWLockReportWaitStart(lock);
1679 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1680 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1681 :
1682 : for (;;)
1683 : {
1684 57264 : PGSemaphoreLock(proc->sem);
1685 57264 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1686 57264 : break;
1687 0 : extraWaits++;
1688 : }
1689 :
1690 : #ifdef LOCK_DEBUG
1691 : {
1692 : /* not waiting anymore */
1693 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1694 :
1695 : Assert(nwaiters < MAX_BACKENDS);
1696 : }
1697 : #endif
1698 :
1699 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1700 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1701 57264 : LWLockReportWaitEnd();
1702 :
1703 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1704 :
1705 : /* Now loop back and check the status of the lock again. */
1706 : }
1707 :
1708 : /*
1709 : * Fix the process wait semaphore's count for any absorbed wakeups.
1710 : */
1711 6128034 : while (extraWaits-- > 0)
1712 0 : PGSemaphoreUnlock(proc->sem);
1713 :
1714 : /*
1715 : * Now okay to allow cancel/die interrupts.
1716 : */
1717 6128034 : RESUME_INTERRUPTS();
1718 :
1719 6128034 : return result;
1720 : }
1721 :
1722 :
1723 : /*
1724 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1725 : *
1726 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1727 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1728 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1729 : * lock is guaranteed to see the new value, and act accordingly.
1730 : *
1731 : * The caller must be holding the lock in exclusive mode.
1732 : */
1733 : void
1734 5057452 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1735 : {
1736 : proclist_head wakeup;
1737 : proclist_mutable_iter iter;
1738 :
1739 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1740 :
1741 : /*
1742 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1743 : * that the variable is updated before waking up waiters.
1744 : */
1745 5057452 : pg_atomic_exchange_u64(valptr, val);
1746 :
1747 5057452 : proclist_init(&wakeup);
1748 :
1749 5057452 : LWLockWaitListLock(lock);
1750 :
1751 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1752 :
1753 : /*
1754 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1755 : * up. They are always in the front of the queue.
1756 : */
1757 5060568 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1758 : {
1759 99460 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1760 :
1761 99460 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1762 96344 : break;
1763 :
1764 3116 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1765 3116 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1766 :
1767 : /* see LWLockWakeup() */
1768 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1769 3116 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1770 : }
1771 :
1772 : /* We are done updating shared state of the lock itself. */
1773 5057452 : LWLockWaitListUnlock(lock);
1774 :
1775 : /*
1776 : * Awaken any waiters I removed from the queue.
1777 : */
1778 5060568 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1779 : {
1780 3116 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1781 :
1782 3116 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1783 : /* check comment in LWLockWakeup() about this barrier */
1784 3116 : pg_write_barrier();
1785 3116 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1786 3116 : PGSemaphoreUnlock(waiter->sem);
1787 : }
1788 5057452 : }
1789 :
1790 :
1791 : /*
1792 : * Stop treating lock as held by current backend.
1793 : *
1794 : * This is the code that can be shared between actually releasing a lock
1795 : * (LWLockRelease()) and just not tracking ownership of the lock anymore
1796 : * without releasing the lock (LWLockDisown()).
1797 : *
1798 : * Returns the mode in which the lock was held by the current backend.
1799 : *
1800 : * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
1801 : * of the caller.
1802 : *
1803 : * NB: This will leave lock->owner pointing to the current backend (if
1804 : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1805 : * debug cases of missing wakeups during lock release.
1806 : */
1807 : static inline LWLockMode
1808 717917186 : LWLockDisownInternal(LWLock *lock)
1809 : {
1810 : LWLockMode mode;
1811 : int i;
1812 :
1813 : /*
1814 : * Remove lock from list of locks held. Usually, but not always, it will
1815 : * be the latest-acquired lock; so search array backwards.
1816 : */
1817 798904032 : for (i = num_held_lwlocks; --i >= 0;)
1818 798904032 : if (lock == held_lwlocks[i].lock)
1819 717917186 : break;
1820 :
1821 717917186 : if (i < 0)
1822 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1823 :
1824 717917186 : mode = held_lwlocks[i].mode;
1825 :
1826 717917186 : num_held_lwlocks--;
1827 798904032 : for (; i < num_held_lwlocks; i++)
1828 80986846 : held_lwlocks[i] = held_lwlocks[i + 1];
1829 :
1830 717917186 : return mode;
1831 : }
1832 :
1833 : /*
1834 : * Helper function to release lock, shared between LWLockRelease() and
1835 : * LWLockReleaseDisowned().
1836 : */
1837 : static void
1838 717917186 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
1839 : {
1840 : uint32 oldstate;
1841 : bool check_waiters;
1842 :
1843 : /*
1844 : * Release my hold on lock, after that it can immediately be acquired by
1845 : * others, even if we still have to wakeup other waiters.
1846 : */
1847 717917186 : if (mode == LW_EXCLUSIVE)
1848 444823856 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1849 : else
1850 273093330 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1851 :
1852 : /* nobody else can have that kind of lock */
1853 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1854 :
1855 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1856 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1857 :
1858 : /*
1859 : * We're still waiting for backends to get scheduled, don't wake them up
1860 : * again.
1861 : */
1862 717917186 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1863 4920326 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1864 4920326 : (oldstate & LW_LOCK_MASK) == 0)
1865 4888836 : check_waiters = true;
1866 : else
1867 713028350 : check_waiters = false;
1868 :
1869 : /*
1870 : * As waking up waiters requires the spinlock to be acquired, only do so
1871 : * if necessary.
1872 : */
1873 717917186 : if (check_waiters)
1874 : {
1875 : /* XXX: remove before commit? */
1876 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1877 4888836 : LWLockWakeup(lock);
1878 : }
1879 717917186 : }
1880 :
1881 :
1882 : /*
1883 : * Stop treating lock as held by current backend.
1884 : *
1885 : * After calling this function it's the callers responsibility to ensure that
1886 : * the lock gets released (via LWLockReleaseDisowned()), even in case of an
1887 : * error. This only is desirable if the lock is going to be released in a
1888 : * different process than the process that acquired it.
1889 : */
1890 : void
1891 0 : LWLockDisown(LWLock *lock)
1892 : {
1893 0 : LWLockDisownInternal(lock);
1894 :
1895 0 : RESUME_INTERRUPTS();
1896 0 : }
1897 :
1898 : /*
1899 : * LWLockRelease - release a previously acquired lock
1900 : */
1901 : void
1902 717917186 : LWLockRelease(LWLock *lock)
1903 : {
1904 : LWLockMode mode;
1905 :
1906 717917186 : mode = LWLockDisownInternal(lock);
1907 :
1908 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1909 :
1910 717917186 : LWLockReleaseInternal(lock, mode);
1911 :
1912 : /*
1913 : * Now okay to allow cancel/die interrupts.
1914 : */
1915 717917186 : RESUME_INTERRUPTS();
1916 717917186 : }
1917 :
1918 : /*
1919 : * Release lock previously disowned with LWLockDisown().
1920 : */
1921 : void
1922 0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
1923 : {
1924 0 : LWLockReleaseInternal(lock, mode);
1925 0 : }
1926 :
1927 : /*
1928 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1929 : */
1930 : void
1931 29123018 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1932 : {
1933 : /*
1934 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1935 : * that the variable is updated before releasing the lock.
1936 : */
1937 29123018 : pg_atomic_exchange_u64(valptr, val);
1938 :
1939 29123018 : LWLockRelease(lock);
1940 29123018 : }
1941 :
1942 :
1943 : /*
1944 : * LWLockReleaseAll - release all currently-held locks
1945 : *
1946 : * Used to clean up after ereport(ERROR). An important difference between this
1947 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1948 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1949 : * has been set to an appropriate level earlier in error recovery. We could
1950 : * decrement it below zero if we allow it to drop for each released lock!
1951 : */
1952 : void
1953 109554 : LWLockReleaseAll(void)
1954 : {
1955 109940 : while (num_held_lwlocks > 0)
1956 : {
1957 386 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1958 :
1959 386 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1960 : }
1961 109554 : }
1962 :
1963 :
1964 : /*
1965 : * ForEachLWLockHeldByMe - run a callback for each held lock
1966 : *
1967 : * This is meant as debug support only.
1968 : */
1969 : void
1970 0 : ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),
1971 : void *context)
1972 : {
1973 : int i;
1974 :
1975 0 : for (i = 0; i < num_held_lwlocks; i++)
1976 0 : callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);
1977 0 : }
1978 :
1979 : /*
1980 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1981 : *
1982 : * This is meant as debug support only.
1983 : */
1984 : bool
1985 0 : LWLockHeldByMe(LWLock *lock)
1986 : {
1987 : int i;
1988 :
1989 0 : for (i = 0; i < num_held_lwlocks; i++)
1990 : {
1991 0 : if (held_lwlocks[i].lock == lock)
1992 0 : return true;
1993 : }
1994 0 : return false;
1995 : }
1996 :
1997 : /*
1998 : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1999 : *
2000 : * This is meant as debug support only.
2001 : */
2002 : bool
2003 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
2004 : {
2005 : char *held_lock_addr;
2006 : char *begin;
2007 : char *end;
2008 : int i;
2009 :
2010 0 : begin = (char *) lock;
2011 0 : end = begin + nlocks * stride;
2012 0 : for (i = 0; i < num_held_lwlocks; i++)
2013 : {
2014 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
2015 0 : if (held_lock_addr >= begin &&
2016 0 : held_lock_addr < end &&
2017 0 : (held_lock_addr - begin) % stride == 0)
2018 0 : return true;
2019 : }
2020 0 : return false;
2021 : }
2022 :
2023 : /*
2024 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
2025 : *
2026 : * This is meant as debug support only.
2027 : */
2028 : bool
2029 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
2030 : {
2031 : int i;
2032 :
2033 0 : for (i = 0; i < num_held_lwlocks; i++)
2034 : {
2035 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
2036 0 : return true;
2037 : }
2038 0 : return false;
2039 : }
|