Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished its
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "storage/proc.h"
84 : #include "storage/proclist.h"
85 : #include "storage/procnumber.h"
86 : #include "storage/spin.h"
87 : #include "utils/memutils.h"
88 :
89 : #ifdef LWLOCK_STATS
90 : #include "utils/hsearch.h"
91 : #endif
92 :
93 :
94 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 31)
95 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 30)
96 : #define LW_FLAG_LOCKED ((uint32) 1 << 29)
97 : #define LW_FLAG_BITS 3
98 : #define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
99 :
100 : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101 : #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
102 : #define LW_VAL_SHARED 1
103 :
104 : /* already (power of 2)-1, i.e. suitable for a mask */
105 : #define LW_SHARED_MASK MAX_BACKENDS
106 : #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
107 :
108 :
109 : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
110 : "MAX_BACKENDS + 1 needs to be a power of 2");
111 :
112 : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
113 : "MAX_BACKENDS and LW_FLAG_MASK overlap");
114 :
115 : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
116 : "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
117 :
118 : /*
119 : * There are three sorts of LWLock "tranches":
120 : *
121 : * 1. The individually-named locks defined in lwlocklist.h each have their
122 : * own tranche. We absorb the names of these tranches from there into
123 : * BuiltinTrancheNames here.
124 : *
125 : * 2. There are some predefined tranches for built-in groups of locks defined
126 : * in lwlocklist.h. We absorb the names of these tranches, too.
127 : *
128 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
129 : * or LWLockRegisterTranche. The names of these that are known in the current
130 : * process appear in LWLockTrancheNames[].
131 : *
132 : * All these names are user-visible as wait event names, so choose with care
133 : * ... and do not forget to update the documentation's list of wait events.
134 : */
135 : static const char *const BuiltinTrancheNames[] = {
136 : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
137 : #define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
138 : #include "storage/lwlocklist.h"
139 : #undef PG_LWLOCK
140 : #undef PG_LWLOCKTRANCHE
141 : };
142 :
143 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
144 : LWTRANCHE_FIRST_USER_DEFINED,
145 : "missing entries in BuiltinTrancheNames[]");
146 :
147 : /*
148 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
149 : * stores the names of all dynamically-created tranches known to the current
150 : * process. Any unused entries in the array will contain NULL.
151 : */
152 : static const char **LWLockTrancheNames = NULL;
153 : static int LWLockTrancheNamesAllocated = 0;
154 :
155 : /*
156 : * This points to the main array of LWLocks in shared memory. Backends inherit
157 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
158 : * where we have special measures to pass it down).
159 : */
160 : LWLockPadded *MainLWLockArray = NULL;
161 :
162 : /*
163 : * We use this structure to keep track of locked LWLocks for release
164 : * during error recovery. Normally, only a few will be held at once, but
165 : * occasionally the number can be much higher; for example, the pg_buffercache
166 : * extension locks all buffer partitions simultaneously.
167 : */
168 : #define MAX_SIMUL_LWLOCKS 200
169 :
170 : /* struct representing the LWLocks we're holding */
171 : typedef struct LWLockHandle
172 : {
173 : LWLock *lock;
174 : LWLockMode mode;
175 : } LWLockHandle;
176 :
177 : static int num_held_lwlocks = 0;
178 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
179 :
180 : /* struct representing the LWLock tranche request for named tranche */
181 : typedef struct NamedLWLockTrancheRequest
182 : {
183 : char tranche_name[NAMEDATALEN];
184 : int num_lwlocks;
185 : } NamedLWLockTrancheRequest;
186 :
187 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
188 : static int NamedLWLockTrancheRequestsAllocated = 0;
189 :
190 : /*
191 : * NamedLWLockTrancheRequests is both the valid length of the request array,
192 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
193 : * This variable and NamedLWLockTrancheArray are non-static so that
194 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
195 : */
196 : int NamedLWLockTrancheRequests = 0;
197 :
198 : /* points to data in shared memory: */
199 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
200 :
201 : static void InitializeLWLocks(void);
202 : static inline void LWLockReportWaitStart(LWLock *lock);
203 : static inline void LWLockReportWaitEnd(void);
204 : static const char *GetLWTrancheName(uint16 trancheId);
205 :
206 : #define T_NAME(lock) \
207 : GetLWTrancheName((lock)->tranche)
208 :
209 : #ifdef LWLOCK_STATS
210 : typedef struct lwlock_stats_key
211 : {
212 : int tranche;
213 : void *instance;
214 : } lwlock_stats_key;
215 :
216 : typedef struct lwlock_stats
217 : {
218 : lwlock_stats_key key;
219 : int sh_acquire_count;
220 : int ex_acquire_count;
221 : int block_count;
222 : int dequeue_self_count;
223 : int spin_delay_count;
224 : } lwlock_stats;
225 :
226 : static HTAB *lwlock_stats_htab;
227 : static lwlock_stats lwlock_stats_dummy;
228 : #endif
229 :
230 : #ifdef LOCK_DEBUG
231 : bool Trace_lwlocks = false;
232 :
233 : inline static void
234 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
235 : {
236 : /* hide statement & context here, otherwise the log is just too verbose */
237 : if (Trace_lwlocks)
238 : {
239 : uint32 state = pg_atomic_read_u32(&lock->state);
240 :
241 : ereport(LOG,
242 : (errhidestmt(true),
243 : errhidecontext(true),
244 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
245 : MyProcPid,
246 : where, T_NAME(lock), lock,
247 : (state & LW_VAL_EXCLUSIVE) != 0,
248 : state & LW_SHARED_MASK,
249 : (state & LW_FLAG_HAS_WAITERS) != 0,
250 : pg_atomic_read_u32(&lock->nwaiters),
251 : (state & LW_FLAG_RELEASE_OK) != 0)));
252 : }
253 : }
254 :
255 : inline static void
256 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
257 : {
258 : /* hide statement & context here, otherwise the log is just too verbose */
259 : if (Trace_lwlocks)
260 : {
261 : ereport(LOG,
262 : (errhidestmt(true),
263 : errhidecontext(true),
264 : errmsg_internal("%s(%s %p): %s", where,
265 : T_NAME(lock), lock, msg)));
266 : }
267 : }
268 :
269 : #else /* not LOCK_DEBUG */
270 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
271 : #define LOG_LWDEBUG(a,b,c) ((void)0)
272 : #endif /* LOCK_DEBUG */
273 :
274 : #ifdef LWLOCK_STATS
275 :
276 : static void init_lwlock_stats(void);
277 : static void print_lwlock_stats(int code, Datum arg);
278 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
279 :
280 : static void
281 : init_lwlock_stats(void)
282 : {
283 : HASHCTL ctl;
284 : static MemoryContext lwlock_stats_cxt = NULL;
285 : static bool exit_registered = false;
286 :
287 : if (lwlock_stats_cxt != NULL)
288 : MemoryContextDelete(lwlock_stats_cxt);
289 :
290 : /*
291 : * The LWLock stats will be updated within a critical section, which
292 : * requires allocating new hash entries. Allocations within a critical
293 : * section are normally not allowed because running out of memory would
294 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
295 : * turned on in production, so that's an acceptable risk. The hash entries
296 : * are small, so the risk of running out of memory is minimal in practice.
297 : */
298 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
299 : "LWLock stats",
300 : ALLOCSET_DEFAULT_SIZES);
301 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
302 :
303 : ctl.keysize = sizeof(lwlock_stats_key);
304 : ctl.entrysize = sizeof(lwlock_stats);
305 : ctl.hcxt = lwlock_stats_cxt;
306 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
307 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
308 : if (!exit_registered)
309 : {
310 : on_shmem_exit(print_lwlock_stats, 0);
311 : exit_registered = true;
312 : }
313 : }
314 :
315 : static void
316 : print_lwlock_stats(int code, Datum arg)
317 : {
318 : HASH_SEQ_STATUS scan;
319 : lwlock_stats *lwstats;
320 :
321 : hash_seq_init(&scan, lwlock_stats_htab);
322 :
323 : /* Grab an LWLock to keep different backends from mixing reports */
324 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
325 :
326 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
327 : {
328 : fprintf(stderr,
329 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
330 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
331 : lwstats->key.instance, lwstats->sh_acquire_count,
332 : lwstats->ex_acquire_count, lwstats->block_count,
333 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
334 : }
335 :
336 : LWLockRelease(&MainLWLockArray[0].lock);
337 : }
338 :
339 : static lwlock_stats *
340 : get_lwlock_stats_entry(LWLock *lock)
341 : {
342 : lwlock_stats_key key;
343 : lwlock_stats *lwstats;
344 : bool found;
345 :
346 : /*
347 : * During shared memory initialization, the hash table doesn't exist yet.
348 : * Stats of that phase aren't very interesting, so just collect operations
349 : * on all locks in a single dummy entry.
350 : */
351 : if (lwlock_stats_htab == NULL)
352 : return &lwlock_stats_dummy;
353 :
354 : /* Fetch or create the entry. */
355 : MemSet(&key, 0, sizeof(key));
356 : key.tranche = lock->tranche;
357 : key.instance = lock;
358 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
359 : if (!found)
360 : {
361 : lwstats->sh_acquire_count = 0;
362 : lwstats->ex_acquire_count = 0;
363 : lwstats->block_count = 0;
364 : lwstats->dequeue_self_count = 0;
365 : lwstats->spin_delay_count = 0;
366 : }
367 : return lwstats;
368 : }
369 : #endif /* LWLOCK_STATS */
370 :
371 :
372 : /*
373 : * Compute number of LWLocks required by named tranches. These will be
374 : * allocated in the main array.
375 : */
376 : static int
377 8302 : NumLWLocksForNamedTranches(void)
378 : {
379 8302 : int numLocks = 0;
380 : int i;
381 :
382 8358 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
383 56 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
384 :
385 8302 : return numLocks;
386 : }
387 :
388 : /*
389 : * Compute shmem space needed for LWLocks and named tranches.
390 : */
391 : Size
392 6150 : LWLockShmemSize(void)
393 : {
394 : Size size;
395 : int i;
396 6150 : int numLocks = NUM_FIXED_LWLOCKS;
397 :
398 : /* Calculate total number of locks needed in the main array. */
399 6150 : numLocks += NumLWLocksForNamedTranches();
400 :
401 : /* Space for the LWLock array. */
402 6150 : size = mul_size(numLocks, sizeof(LWLockPadded));
403 :
404 : /* Space for dynamic allocation counter, plus room for alignment. */
405 6150 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
406 :
407 : /* space for named tranches. */
408 6150 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
409 :
410 : /* space for name of each tranche. */
411 6192 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
412 42 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
413 :
414 6150 : return size;
415 : }
416 :
417 : /*
418 : * Allocate shmem space for the main LWLock array and all tranches and
419 : * initialize it. We also register extension LWLock tranches here.
420 : */
421 : void
422 2152 : CreateLWLocks(void)
423 : {
424 2152 : if (!IsUnderPostmaster)
425 : {
426 2152 : Size spaceLocks = LWLockShmemSize();
427 : int *LWLockCounter;
428 : char *ptr;
429 :
430 : /* Allocate space */
431 2152 : ptr = (char *) ShmemAlloc(spaceLocks);
432 :
433 : /* Leave room for dynamic allocation of tranches */
434 2152 : ptr += sizeof(int);
435 :
436 : /* Ensure desired alignment of LWLock array */
437 2152 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
438 :
439 2152 : MainLWLockArray = (LWLockPadded *) ptr;
440 :
441 : /*
442 : * Initialize the dynamic-allocation counter for tranches, which is
443 : * stored just before the first LWLock.
444 : */
445 2152 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
446 2152 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
447 :
448 : /* Initialize all LWLocks */
449 2152 : InitializeLWLocks();
450 : }
451 :
452 : /* Register named extension LWLock tranches in the current process. */
453 2166 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
454 14 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
455 14 : NamedLWLockTrancheArray[i].trancheName);
456 2152 : }
457 :
458 : /*
459 : * Initialize LWLocks that are fixed and those belonging to named tranches.
460 : */
461 : static void
462 2152 : InitializeLWLocks(void)
463 : {
464 2152 : int numNamedLocks = NumLWLocksForNamedTranches();
465 : int id;
466 : int i;
467 : int j;
468 : LWLockPadded *lock;
469 :
470 : /* Initialize all individual LWLocks in main array */
471 118360 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
472 116208 : LWLockInitialize(&lock->lock, id);
473 :
474 : /* Initialize buffer mapping LWLocks in main array */
475 2152 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
476 277608 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
477 275456 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
478 :
479 : /* Initialize lmgrs' LWLocks in main array */
480 2152 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
481 36584 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
482 34432 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
483 :
484 : /* Initialize predicate lmgrs' LWLocks in main array */
485 2152 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
486 36584 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
487 34432 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
488 :
489 : /*
490 : * Copy the info about any named tranches into shared memory (so that
491 : * other processes can see it), and initialize the requested LWLocks.
492 : */
493 2152 : if (NamedLWLockTrancheRequests > 0)
494 : {
495 : char *trancheNames;
496 :
497 14 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
498 14 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
499 :
500 14 : trancheNames = (char *) NamedLWLockTrancheArray +
501 14 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
502 14 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
503 :
504 28 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
505 : {
506 : NamedLWLockTrancheRequest *request;
507 : NamedLWLockTranche *tranche;
508 : char *name;
509 :
510 14 : request = &NamedLWLockTrancheRequestArray[i];
511 14 : tranche = &NamedLWLockTrancheArray[i];
512 :
513 14 : name = trancheNames;
514 14 : trancheNames += strlen(request->tranche_name) + 1;
515 14 : strcpy(name, request->tranche_name);
516 14 : tranche->trancheId = LWLockNewTrancheId();
517 14 : tranche->trancheName = name;
518 :
519 28 : for (j = 0; j < request->num_lwlocks; j++, lock++)
520 14 : LWLockInitialize(&lock->lock, tranche->trancheId);
521 : }
522 : }
523 2152 : }
524 :
525 : /*
526 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
527 : */
528 : void
529 42536 : InitLWLockAccess(void)
530 : {
531 : #ifdef LWLOCK_STATS
532 : init_lwlock_stats();
533 : #endif
534 42536 : }
535 :
536 : /*
537 : * GetNamedLWLockTranche - returns the base address of LWLock from the
538 : * specified tranche.
539 : *
540 : * Caller needs to retrieve the requested number of LWLocks starting from
541 : * the base lock address returned by this API. This can be used for
542 : * tranches that are requested by using RequestNamedLWLockTranche() API.
543 : */
544 : LWLockPadded *
545 14 : GetNamedLWLockTranche(const char *tranche_name)
546 : {
547 : int lock_pos;
548 : int i;
549 :
550 : /*
551 : * Obtain the position of base address of LWLock belonging to requested
552 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
553 : * in MainLWLockArray after fixed locks.
554 : */
555 14 : lock_pos = NUM_FIXED_LWLOCKS;
556 14 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
557 : {
558 14 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
559 : tranche_name) == 0)
560 14 : return &MainLWLockArray[lock_pos];
561 :
562 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
563 : }
564 :
565 0 : elog(ERROR, "requested tranche is not registered");
566 :
567 : /* just to keep compiler quiet */
568 : return NULL;
569 : }
570 :
571 : /*
572 : * Allocate a new tranche ID.
573 : */
574 : int
575 40 : LWLockNewTrancheId(void)
576 : {
577 : int result;
578 : int *LWLockCounter;
579 :
580 40 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
581 : /* We use the ShmemLock spinlock to protect LWLockCounter */
582 40 : SpinLockAcquire(ShmemLock);
583 40 : result = (*LWLockCounter)++;
584 40 : SpinLockRelease(ShmemLock);
585 :
586 40 : return result;
587 : }
588 :
589 : /*
590 : * Register a dynamic tranche name in the lookup table of the current process.
591 : *
592 : * This routine will save a pointer to the tranche name passed as an argument,
593 : * so the name should be allocated in a backend-lifetime context
594 : * (shared memory, TopMemoryContext, static constant, or similar).
595 : *
596 : * The tranche name will be user-visible as a wait event name, so try to
597 : * use a name that fits the style for those.
598 : */
599 : void
600 56 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
601 : {
602 : /* This should only be called for user-defined tranches. */
603 56 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
604 0 : return;
605 :
606 : /* Convert to array index. */
607 56 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
608 :
609 : /* If necessary, create or enlarge array. */
610 56 : if (tranche_id >= LWLockTrancheNamesAllocated)
611 : {
612 : int newalloc;
613 :
614 34 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
615 :
616 34 : if (LWLockTrancheNames == NULL)
617 34 : LWLockTrancheNames = (const char **)
618 34 : MemoryContextAllocZero(TopMemoryContext,
619 : newalloc * sizeof(char *));
620 : else
621 0 : LWLockTrancheNames =
622 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
623 34 : LWLockTrancheNamesAllocated = newalloc;
624 : }
625 :
626 56 : LWLockTrancheNames[tranche_id] = tranche_name;
627 : }
628 :
629 : /*
630 : * RequestNamedLWLockTranche
631 : * Request that extra LWLocks be allocated during postmaster
632 : * startup.
633 : *
634 : * This may only be called via the shmem_request_hook of a library that is
635 : * loaded into the postmaster via shared_preload_libraries. Calls from
636 : * elsewhere will fail.
637 : *
638 : * The tranche name will be user-visible as a wait event name, so try to
639 : * use a name that fits the style for those.
640 : */
641 : void
642 14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
643 : {
644 : NamedLWLockTrancheRequest *request;
645 :
646 14 : if (!process_shmem_requests_in_progress)
647 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
648 :
649 14 : if (NamedLWLockTrancheRequestArray == NULL)
650 : {
651 14 : NamedLWLockTrancheRequestsAllocated = 16;
652 14 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
653 14 : MemoryContextAlloc(TopMemoryContext,
654 : NamedLWLockTrancheRequestsAllocated
655 : * sizeof(NamedLWLockTrancheRequest));
656 : }
657 :
658 14 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
659 : {
660 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
661 :
662 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
663 0 : repalloc(NamedLWLockTrancheRequestArray,
664 : i * sizeof(NamedLWLockTrancheRequest));
665 0 : NamedLWLockTrancheRequestsAllocated = i;
666 : }
667 :
668 14 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
669 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
670 14 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
671 14 : request->num_lwlocks = num_lwlocks;
672 14 : NamedLWLockTrancheRequests++;
673 14 : }
674 :
675 : /*
676 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
677 : */
678 : void
679 24638636 : LWLockInitialize(LWLock *lock, int tranche_id)
680 : {
681 24638636 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
682 : #ifdef LOCK_DEBUG
683 : pg_atomic_init_u32(&lock->nwaiters, 0);
684 : #endif
685 24638636 : lock->tranche = tranche_id;
686 24638636 : proclist_init(&lock->waiters);
687 24638636 : }
688 :
689 : /*
690 : * Report start of wait event for light-weight locks.
691 : *
692 : * This function will be used by all the light-weight lock calls which
693 : * needs to wait to acquire the lock. This function distinguishes wait
694 : * event based on tranche and lock id.
695 : */
696 : static inline void
697 9370620 : LWLockReportWaitStart(LWLock *lock)
698 : {
699 9370620 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
700 9370620 : }
701 :
702 : /*
703 : * Report end of wait event for light-weight locks.
704 : */
705 : static inline void
706 9370620 : LWLockReportWaitEnd(void)
707 : {
708 9370620 : pgstat_report_wait_end();
709 9370620 : }
710 :
711 : /*
712 : * Return the name of an LWLock tranche.
713 : */
714 : static const char *
715 50 : GetLWTrancheName(uint16 trancheId)
716 : {
717 : /* Built-in tranche or individual LWLock? */
718 50 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
719 50 : return BuiltinTrancheNames[trancheId];
720 :
721 : /*
722 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
723 : * it's possible that the tranche has never been registered in the current
724 : * process, in which case give up and return "extension".
725 : */
726 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
727 :
728 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
729 0 : LWLockTrancheNames[trancheId] == NULL)
730 0 : return "extension";
731 :
732 0 : return LWLockTrancheNames[trancheId];
733 : }
734 :
735 : /*
736 : * Return an identifier for an LWLock based on the wait class and event.
737 : */
738 : const char *
739 50 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
740 : {
741 : Assert(classId == PG_WAIT_LWLOCK);
742 : /* The event IDs are just tranche numbers. */
743 50 : return GetLWTrancheName(eventId);
744 : }
745 :
746 : /*
747 : * Internal function that tries to atomically acquire the lwlock in the passed
748 : * in mode.
749 : *
750 : * This function will not block waiting for a lock to become free - that's the
751 : * caller's job.
752 : *
753 : * Returns true if the lock isn't free and we need to wait.
754 : */
755 : static bool
756 754754028 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
757 : {
758 : uint32 old_state;
759 :
760 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
761 :
762 : /*
763 : * Read once outside the loop, later iterations will get the newer value
764 : * via compare & exchange.
765 : */
766 754754028 : old_state = pg_atomic_read_u32(&lock->state);
767 :
768 : /* loop until we've determined whether we could acquire the lock or not */
769 : while (true)
770 597782 : {
771 : uint32 desired_state;
772 : bool lock_free;
773 :
774 755351810 : desired_state = old_state;
775 :
776 755351810 : if (mode == LW_EXCLUSIVE)
777 : {
778 458824284 : lock_free = (old_state & LW_LOCK_MASK) == 0;
779 458824284 : if (lock_free)
780 454325238 : desired_state += LW_VAL_EXCLUSIVE;
781 : }
782 : else
783 : {
784 296527526 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
785 296527526 : if (lock_free)
786 282224276 : desired_state += LW_VAL_SHARED;
787 : }
788 :
789 : /*
790 : * Attempt to swap in the state we are expecting. If we didn't see
791 : * lock to be free, that's just the old value. If we saw it as free,
792 : * we'll attempt to mark it acquired. The reason that we always swap
793 : * in the value is that this doubles as a memory barrier. We could try
794 : * to be smarter and only swap in values if we saw the lock as free,
795 : * but benchmark haven't shown it as beneficial so far.
796 : *
797 : * Retry if the value changed since we last looked at it.
798 : */
799 755351810 : if (pg_atomic_compare_exchange_u32(&lock->state,
800 : &old_state, desired_state))
801 : {
802 754754028 : if (lock_free)
803 : {
804 : /* Great! Got the lock. */
805 : #ifdef LOCK_DEBUG
806 : if (mode == LW_EXCLUSIVE)
807 : lock->owner = MyProc;
808 : #endif
809 736125844 : return false;
810 : }
811 : else
812 18628184 : return true; /* somebody else has the lock */
813 : }
814 : }
815 : pg_unreachable();
816 : }
817 :
818 : /*
819 : * Lock the LWLock's wait list against concurrent activity.
820 : *
821 : * NB: even though the wait list is locked, non-conflicting lock operations
822 : * may still happen concurrently.
823 : *
824 : * Time spent holding mutex should be short!
825 : */
826 : static void
827 24223802 : LWLockWaitListLock(LWLock *lock)
828 : {
829 : uint32 old_state;
830 : #ifdef LWLOCK_STATS
831 : lwlock_stats *lwstats;
832 : uint32 delays = 0;
833 :
834 : lwstats = get_lwlock_stats_entry(lock);
835 : #endif
836 :
837 : while (true)
838 : {
839 : /* always try once to acquire lock directly */
840 24358274 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
841 24358274 : if (!(old_state & LW_FLAG_LOCKED))
842 24223802 : break; /* got lock */
843 :
844 : /* and then spin without atomic operations until lock is released */
845 : {
846 : SpinDelayStatus delayStatus;
847 :
848 134472 : init_local_spin_delay(&delayStatus);
849 :
850 373362 : while (old_state & LW_FLAG_LOCKED)
851 : {
852 238890 : perform_spin_delay(&delayStatus);
853 238890 : old_state = pg_atomic_read_u32(&lock->state);
854 : }
855 : #ifdef LWLOCK_STATS
856 : delays += delayStatus.delays;
857 : #endif
858 134472 : finish_spin_delay(&delayStatus);
859 : }
860 :
861 : /*
862 : * Retry. The lock might obviously already be re-acquired by the time
863 : * we're attempting to get it again.
864 : */
865 : }
866 :
867 : #ifdef LWLOCK_STATS
868 : lwstats->spin_delay_count += delays;
869 : #endif
870 24223802 : }
871 :
872 : /*
873 : * Unlock the LWLock's wait list.
874 : *
875 : * Note that it can be more efficient to manipulate flags and release the
876 : * locks in a single atomic operation.
877 : */
878 : static void
879 14642108 : LWLockWaitListUnlock(LWLock *lock)
880 : {
881 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
882 :
883 14642108 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
884 :
885 : Assert(old_state & LW_FLAG_LOCKED);
886 14642108 : }
887 :
888 : /*
889 : * Wakeup all the lockers that currently have a chance to acquire the lock.
890 : */
891 : static void
892 9581694 : LWLockWakeup(LWLock *lock)
893 : {
894 : bool new_release_ok;
895 9581694 : bool wokeup_somebody = false;
896 : proclist_head wakeup;
897 : proclist_mutable_iter iter;
898 :
899 9581694 : proclist_init(&wakeup);
900 :
901 9581694 : new_release_ok = true;
902 :
903 : /* lock wait list while collecting backends to wake up */
904 9581694 : LWLockWaitListLock(lock);
905 :
906 16862682 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
907 : {
908 9452550 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
909 :
910 9452550 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
911 44970 : continue;
912 :
913 9407580 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
914 9407580 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
915 :
916 9407580 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
917 : {
918 : /*
919 : * Prevent additional wakeups until retryer gets to run. Backends
920 : * that are just waiting for the lock to become free don't retry
921 : * automatically.
922 : */
923 9266698 : new_release_ok = false;
924 :
925 : /*
926 : * Don't wakeup (further) exclusive locks.
927 : */
928 9266698 : wokeup_somebody = true;
929 : }
930 :
931 : /*
932 : * Signal that the process isn't on the wait list anymore. This allows
933 : * LWLockDequeueSelf() to remove itself of the waitlist with a
934 : * proclist_delete(), rather than having to check if it has been
935 : * removed from the list.
936 : */
937 : Assert(waiter->lwWaiting == LW_WS_WAITING);
938 9407580 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
939 :
940 : /*
941 : * Once we've woken up an exclusive lock, there's no point in waking
942 : * up anybody else.
943 : */
944 9407580 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
945 2171562 : break;
946 : }
947 :
948 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
949 :
950 : /* unset required flags, and release lock, in one fell swoop */
951 : {
952 : uint32 old_state;
953 : uint32 desired_state;
954 :
955 9581694 : old_state = pg_atomic_read_u32(&lock->state);
956 : while (true)
957 : {
958 9661376 : desired_state = old_state;
959 :
960 : /* compute desired flags */
961 :
962 9661376 : if (new_release_ok)
963 710324 : desired_state |= LW_FLAG_RELEASE_OK;
964 : else
965 8951052 : desired_state &= ~LW_FLAG_RELEASE_OK;
966 :
967 9661376 : if (proclist_is_empty(&wakeup))
968 613600 : desired_state &= ~LW_FLAG_HAS_WAITERS;
969 :
970 9661376 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
971 :
972 9661376 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
973 : desired_state))
974 9581694 : break;
975 : }
976 : }
977 :
978 : /* Awaken any waiters I removed from the queue. */
979 18989274 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
980 : {
981 9407580 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
982 :
983 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
984 9407580 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
985 :
986 : /*
987 : * Guarantee that lwWaiting being unset only becomes visible once the
988 : * unlink from the link has completed. Otherwise the target backend
989 : * could be woken up for other reason and enqueue for a new lock - if
990 : * that happens before the list unlink happens, the list would end up
991 : * being corrupted.
992 : *
993 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
994 : * another lock.
995 : */
996 9407580 : pg_write_barrier();
997 9407580 : waiter->lwWaiting = LW_WS_NOT_WAITING;
998 9407580 : PGSemaphoreUnlock(waiter->sem);
999 : }
1000 9581694 : }
1001 :
1002 : /*
1003 : * Add ourselves to the end of the queue.
1004 : *
1005 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1006 : */
1007 : static void
1008 9523190 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1009 : {
1010 : /*
1011 : * If we don't have a PGPROC structure, there's no way to wait. This
1012 : * should never occur, since MyProc should only be null during shared
1013 : * memory initialization.
1014 : */
1015 9523190 : if (MyProc == NULL)
1016 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1017 :
1018 9523190 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1019 0 : elog(PANIC, "queueing for lock while waiting on another one");
1020 :
1021 9523190 : LWLockWaitListLock(lock);
1022 :
1023 : /* setting the flag is protected by the spinlock */
1024 9523190 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1025 :
1026 9523190 : MyProc->lwWaiting = LW_WS_WAITING;
1027 9523190 : MyProc->lwWaitMode = mode;
1028 :
1029 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1030 9523190 : if (mode == LW_WAIT_UNTIL_FREE)
1031 144108 : proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
1032 : else
1033 9379082 : proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
1034 :
1035 : /* Can release the mutex now */
1036 9523190 : LWLockWaitListUnlock(lock);
1037 :
1038 : #ifdef LOCK_DEBUG
1039 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1040 : #endif
1041 9523190 : }
1042 :
1043 : /*
1044 : * Remove ourselves from the waitlist.
1045 : *
1046 : * This is used if we queued ourselves because we thought we needed to sleep
1047 : * but, after further checking, we discovered that we don't actually need to
1048 : * do so.
1049 : */
1050 : static void
1051 152570 : LWLockDequeueSelf(LWLock *lock)
1052 : {
1053 : bool on_waitlist;
1054 :
1055 : #ifdef LWLOCK_STATS
1056 : lwlock_stats *lwstats;
1057 :
1058 : lwstats = get_lwlock_stats_entry(lock);
1059 :
1060 : lwstats->dequeue_self_count++;
1061 : #endif
1062 :
1063 152570 : LWLockWaitListLock(lock);
1064 :
1065 : /*
1066 : * Remove ourselves from the waitlist, unless we've already been removed.
1067 : * The removal happens with the wait list lock held, so there's no race in
1068 : * this check.
1069 : */
1070 152570 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1071 152570 : if (on_waitlist)
1072 115360 : proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
1073 :
1074 152570 : if (proclist_is_empty(&lock->waiters) &&
1075 145912 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1076 : {
1077 145678 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1078 : }
1079 :
1080 : /* XXX: combine with fetch_and above? */
1081 152570 : LWLockWaitListUnlock(lock);
1082 :
1083 : /* clear waiting state again, nice for debugging */
1084 152570 : if (on_waitlist)
1085 115360 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1086 : else
1087 : {
1088 37210 : int extraWaits = 0;
1089 :
1090 : /*
1091 : * Somebody else dequeued us and has or will wake us up. Deal with the
1092 : * superfluous absorption of a wakeup.
1093 : */
1094 :
1095 : /*
1096 : * Reset RELEASE_OK flag if somebody woke us before we removed
1097 : * ourselves - they'll have set it to false.
1098 : */
1099 37210 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1100 :
1101 : /*
1102 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1103 : * get reset at some inconvenient point later. Most of the time this
1104 : * will immediately return.
1105 : */
1106 : for (;;)
1107 : {
1108 37210 : PGSemaphoreLock(MyProc->sem);
1109 37210 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1110 37210 : break;
1111 0 : extraWaits++;
1112 : }
1113 :
1114 : /*
1115 : * Fix the process wait semaphore's count for any absorbed wakeups.
1116 : */
1117 37210 : while (extraWaits-- > 0)
1118 0 : PGSemaphoreUnlock(MyProc->sem);
1119 : }
1120 :
1121 : #ifdef LOCK_DEBUG
1122 : {
1123 : /* not waiting anymore */
1124 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1125 :
1126 : Assert(nwaiters < MAX_BACKENDS);
1127 : }
1128 : #endif
1129 152570 : }
1130 :
1131 : /*
1132 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1133 : *
1134 : * If the lock is not available, sleep until it is. Returns true if the lock
1135 : * was available immediately, false if we had to sleep.
1136 : *
1137 : * Side effect: cancel/die interrupts are held off until lock release.
1138 : */
1139 : bool
1140 731051696 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1141 : {
1142 731051696 : PGPROC *proc = MyProc;
1143 731051696 : bool result = true;
1144 731051696 : int extraWaits = 0;
1145 : #ifdef LWLOCK_STATS
1146 : lwlock_stats *lwstats;
1147 :
1148 : lwstats = get_lwlock_stats_entry(lock);
1149 : #endif
1150 :
1151 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1152 :
1153 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1154 :
1155 : #ifdef LWLOCK_STATS
1156 : /* Count lock acquisition attempts */
1157 : if (mode == LW_EXCLUSIVE)
1158 : lwstats->ex_acquire_count++;
1159 : else
1160 : lwstats->sh_acquire_count++;
1161 : #endif /* LWLOCK_STATS */
1162 :
1163 : /*
1164 : * We can't wait if we haven't got a PGPROC. This should only occur
1165 : * during bootstrap or shared memory initialization. Put an Assert here
1166 : * to catch unsafe coding practices.
1167 : */
1168 : Assert(!(proc == NULL && IsUnderPostmaster));
1169 :
1170 : /* Ensure we will have room to remember the lock */
1171 731051696 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1172 0 : elog(ERROR, "too many LWLocks taken");
1173 :
1174 : /*
1175 : * Lock out cancel/die interrupts until we exit the code section protected
1176 : * by the LWLock. This ensures that interrupts will not interfere with
1177 : * manipulations of data structures in shared memory.
1178 : */
1179 731051696 : HOLD_INTERRUPTS();
1180 :
1181 : /*
1182 : * Loop here to try to acquire lock after each time we are signaled by
1183 : * LWLockRelease.
1184 : *
1185 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1186 : * lock, rather than retrying and possibly having to go back to sleep. But
1187 : * in practice that is no good because it means a process swap for every
1188 : * lock acquisition when two or more processes are contending for the same
1189 : * lock. Since LWLocks are normally used to protect not-very-long
1190 : * sections of computation, a process needs to be able to acquire and
1191 : * release the same lock many times during a single CPU time slice, even
1192 : * in the presence of contention. The efficiency of being able to do that
1193 : * outweighs the inefficiency of sometimes wasting a process dispatch
1194 : * cycle because the lock is not free when a released waiter finally gets
1195 : * to run. See pgsql-hackers archives for 29-Dec-01.
1196 : */
1197 : for (;;)
1198 9228934 : {
1199 : bool mustwait;
1200 :
1201 : /*
1202 : * Try to grab the lock the first time, we're not in the waitqueue
1203 : * yet/anymore.
1204 : */
1205 740280630 : mustwait = LWLockAttemptLock(lock, mode);
1206 :
1207 740280630 : if (!mustwait)
1208 : {
1209 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1210 730901548 : break; /* got the lock */
1211 : }
1212 :
1213 : /*
1214 : * Ok, at this point we couldn't grab the lock on the first try. We
1215 : * cannot simply queue ourselves to the end of the list and wait to be
1216 : * woken up because by now the lock could long have been released.
1217 : * Instead add us to the queue and try to grab the lock again. If we
1218 : * succeed we need to revert the queuing and be happy, otherwise we
1219 : * recheck the lock. If we still couldn't grab it, we know that the
1220 : * other locker will see our queue entries when releasing since they
1221 : * existed before we checked for the lock.
1222 : */
1223 :
1224 : /* add to the queue */
1225 9379082 : LWLockQueueSelf(lock, mode);
1226 :
1227 : /* we're now guaranteed to be woken up if necessary */
1228 9379082 : mustwait = LWLockAttemptLock(lock, mode);
1229 :
1230 : /* ok, grabbed the lock the second time round, need to undo queueing */
1231 9379082 : if (!mustwait)
1232 : {
1233 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1234 :
1235 150148 : LWLockDequeueSelf(lock);
1236 150148 : break;
1237 : }
1238 :
1239 : /*
1240 : * Wait until awakened.
1241 : *
1242 : * It is possible that we get awakened for a reason other than being
1243 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1244 : * we've gotten the LWLock, re-increment the sema by the number of
1245 : * additional signals received.
1246 : */
1247 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1248 :
1249 : #ifdef LWLOCK_STATS
1250 : lwstats->block_count++;
1251 : #endif
1252 :
1253 9228934 : LWLockReportWaitStart(lock);
1254 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1255 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1256 :
1257 : for (;;)
1258 : {
1259 9228934 : PGSemaphoreLock(proc->sem);
1260 9228934 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1261 9228934 : break;
1262 0 : extraWaits++;
1263 : }
1264 :
1265 : /* Retrying, allow LWLockRelease to release waiters again. */
1266 9228934 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1267 :
1268 : #ifdef LOCK_DEBUG
1269 : {
1270 : /* not waiting anymore */
1271 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1272 :
1273 : Assert(nwaiters < MAX_BACKENDS);
1274 : }
1275 : #endif
1276 :
1277 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1278 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1279 9228934 : LWLockReportWaitEnd();
1280 :
1281 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1282 :
1283 : /* Now loop back and try to acquire lock again. */
1284 9228934 : result = false;
1285 : }
1286 :
1287 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1288 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1289 :
1290 : /* Add lock to list of locks held by this backend */
1291 731051696 : held_lwlocks[num_held_lwlocks].lock = lock;
1292 731051696 : held_lwlocks[num_held_lwlocks++].mode = mode;
1293 :
1294 : /*
1295 : * Fix the process wait semaphore's count for any absorbed wakeups.
1296 : */
1297 731051696 : while (extraWaits-- > 0)
1298 0 : PGSemaphoreUnlock(proc->sem);
1299 :
1300 731051696 : return result;
1301 : }
1302 :
1303 : /*
1304 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1305 : *
1306 : * If the lock is not available, return false with no side-effects.
1307 : *
1308 : * If successful, cancel/die interrupts are held off until lock release.
1309 : */
1310 : bool
1311 4817868 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1312 : {
1313 : bool mustwait;
1314 :
1315 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1316 :
1317 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1318 :
1319 : /* Ensure we will have room to remember the lock */
1320 4817868 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1321 0 : elog(ERROR, "too many LWLocks taken");
1322 :
1323 : /*
1324 : * Lock out cancel/die interrupts until we exit the code section protected
1325 : * by the LWLock. This ensures that interrupts will not interfere with
1326 : * manipulations of data structures in shared memory.
1327 : */
1328 4817868 : HOLD_INTERRUPTS();
1329 :
1330 : /* Check for the lock */
1331 4817868 : mustwait = LWLockAttemptLock(lock, mode);
1332 :
1333 4817868 : if (mustwait)
1334 : {
1335 : /* Failed to get lock, so release interrupt holdoff */
1336 4080 : RESUME_INTERRUPTS();
1337 :
1338 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1339 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1340 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1341 : }
1342 : else
1343 : {
1344 : /* Add lock to list of locks held by this backend */
1345 4813788 : held_lwlocks[num_held_lwlocks].lock = lock;
1346 4813788 : held_lwlocks[num_held_lwlocks++].mode = mode;
1347 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1348 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1349 : }
1350 4817868 : return !mustwait;
1351 : }
1352 :
1353 : /*
1354 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1355 : *
1356 : * The semantics of this function are a bit funky. If the lock is currently
1357 : * free, it is acquired in the given mode, and the function returns true. If
1358 : * the lock isn't immediately free, the function waits until it is released
1359 : * and returns false, but does not acquire the lock.
1360 : *
1361 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1362 : * holding WALWriteLock, it can flush the commit records of many other
1363 : * backends as a side-effect. Those other backends need to wait until the
1364 : * flush finishes, but don't need to acquire the lock anymore. They can just
1365 : * wake up, observe that their records have already been flushed, and return.
1366 : */
1367 : bool
1368 268340 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1369 : {
1370 268340 : PGPROC *proc = MyProc;
1371 : bool mustwait;
1372 268340 : int extraWaits = 0;
1373 : #ifdef LWLOCK_STATS
1374 : lwlock_stats *lwstats;
1375 :
1376 : lwstats = get_lwlock_stats_entry(lock);
1377 : #endif
1378 :
1379 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1380 :
1381 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1382 :
1383 : /* Ensure we will have room to remember the lock */
1384 268340 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1385 0 : elog(ERROR, "too many LWLocks taken");
1386 :
1387 : /*
1388 : * Lock out cancel/die interrupts until we exit the code section protected
1389 : * by the LWLock. This ensures that interrupts will not interfere with
1390 : * manipulations of data structures in shared memory.
1391 : */
1392 268340 : HOLD_INTERRUPTS();
1393 :
1394 : /*
1395 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1396 : * protocol as LWLockAcquire(). Check its comments for details.
1397 : */
1398 268340 : mustwait = LWLockAttemptLock(lock, mode);
1399 :
1400 268340 : if (mustwait)
1401 : {
1402 8108 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1403 :
1404 8108 : mustwait = LWLockAttemptLock(lock, mode);
1405 :
1406 8108 : if (mustwait)
1407 : {
1408 : /*
1409 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1410 : * bogus wakeups.
1411 : */
1412 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1413 :
1414 : #ifdef LWLOCK_STATS
1415 : lwstats->block_count++;
1416 : #endif
1417 :
1418 7980 : LWLockReportWaitStart(lock);
1419 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1420 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1421 :
1422 : for (;;)
1423 : {
1424 7980 : PGSemaphoreLock(proc->sem);
1425 7980 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1426 7980 : break;
1427 0 : extraWaits++;
1428 : }
1429 :
1430 : #ifdef LOCK_DEBUG
1431 : {
1432 : /* not waiting anymore */
1433 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1434 :
1435 : Assert(nwaiters < MAX_BACKENDS);
1436 : }
1437 : #endif
1438 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1439 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1440 7980 : LWLockReportWaitEnd();
1441 :
1442 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1443 : }
1444 : else
1445 : {
1446 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1447 :
1448 : /*
1449 : * Got lock in the second attempt, undo queueing. We need to treat
1450 : * this as having successfully acquired the lock, otherwise we'd
1451 : * not necessarily wake up people we've prevented from acquiring
1452 : * the lock.
1453 : */
1454 128 : LWLockDequeueSelf(lock);
1455 : }
1456 : }
1457 :
1458 : /*
1459 : * Fix the process wait semaphore's count for any absorbed wakeups.
1460 : */
1461 268340 : while (extraWaits-- > 0)
1462 0 : PGSemaphoreUnlock(proc->sem);
1463 :
1464 268340 : if (mustwait)
1465 : {
1466 : /* Failed to get lock, so release interrupt holdoff */
1467 7980 : RESUME_INTERRUPTS();
1468 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1469 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1470 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1471 : }
1472 : else
1473 : {
1474 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1475 : /* Add lock to list of locks held by this backend */
1476 260360 : held_lwlocks[num_held_lwlocks].lock = lock;
1477 260360 : held_lwlocks[num_held_lwlocks++].mode = mode;
1478 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1479 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1480 : }
1481 :
1482 268340 : return !mustwait;
1483 : }
1484 :
1485 : /*
1486 : * Does the lwlock in its current state need to wait for the variable value to
1487 : * change?
1488 : *
1489 : * If we don't need to wait, and it's because the value of the variable has
1490 : * changed, store the current value in newval.
1491 : *
1492 : * *result is set to true if the lock was free, and false otherwise.
1493 : */
1494 : static bool
1495 6815336 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1496 : uint64 *newval, bool *result)
1497 : {
1498 : bool mustwait;
1499 : uint64 value;
1500 :
1501 : /*
1502 : * Test first to see if it the slot is free right now.
1503 : *
1504 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1505 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1506 : * this, so we don't need a memory barrier here as far as the current
1507 : * usage is concerned. But that might not be safe in general.
1508 : */
1509 6815336 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1510 :
1511 6815336 : if (!mustwait)
1512 : {
1513 4729528 : *result = true;
1514 4729528 : return false;
1515 : }
1516 :
1517 2085808 : *result = false;
1518 :
1519 : /*
1520 : * Reading this value atomically is safe even on platforms where uint64
1521 : * cannot be read without observing a torn value.
1522 : */
1523 2085808 : value = pg_atomic_read_u64(valptr);
1524 :
1525 2085808 : if (value != oldval)
1526 : {
1527 1816102 : mustwait = false;
1528 1816102 : *newval = value;
1529 : }
1530 : else
1531 : {
1532 269706 : mustwait = true;
1533 : }
1534 :
1535 2085808 : return mustwait;
1536 : }
1537 :
1538 : /*
1539 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1540 : *
1541 : * If the lock is held and *valptr equals oldval, waits until the lock is
1542 : * either freed, or the lock holder updates *valptr by calling
1543 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1544 : * waiting), returns true. If the lock is still held, but *valptr no longer
1545 : * matches oldval, returns false and sets *newval to the current value in
1546 : * *valptr.
1547 : *
1548 : * Note: this function ignores shared lock holders; if the lock is held
1549 : * in shared mode, returns 'true'.
1550 : *
1551 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1552 : * hence the caller of this function may want to rely on an explicit barrier or
1553 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1554 : */
1555 : bool
1556 6545630 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1557 : uint64 *newval)
1558 : {
1559 6545630 : PGPROC *proc = MyProc;
1560 6545630 : int extraWaits = 0;
1561 6545630 : bool result = false;
1562 : #ifdef LWLOCK_STATS
1563 : lwlock_stats *lwstats;
1564 :
1565 : lwstats = get_lwlock_stats_entry(lock);
1566 : #endif
1567 :
1568 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1569 :
1570 : /*
1571 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1572 : * cleanup mechanism to remove us from the wait queue if we got
1573 : * interrupted.
1574 : */
1575 6545630 : HOLD_INTERRUPTS();
1576 :
1577 : /*
1578 : * Loop here to check the lock's status after each time we are signaled.
1579 : */
1580 : for (;;)
1581 133706 : {
1582 : bool mustwait;
1583 :
1584 6679336 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1585 : &result);
1586 :
1587 6679336 : if (!mustwait)
1588 6543336 : break; /* the lock was free or value didn't match */
1589 :
1590 : /*
1591 : * Add myself to wait queue. Note that this is racy, somebody else
1592 : * could wakeup before we're finished queuing. NB: We're using nearly
1593 : * the same twice-in-a-row lock acquisition protocol as
1594 : * LWLockAcquire(). Check its comments for details. The only
1595 : * difference is that we also have to check the variable's values when
1596 : * checking the state of the lock.
1597 : */
1598 136000 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1599 :
1600 : /*
1601 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1602 : * lock is released.
1603 : */
1604 136000 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1605 :
1606 : /*
1607 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1608 : * and variables state.
1609 : */
1610 136000 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1611 : &result);
1612 :
1613 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1614 136000 : if (!mustwait)
1615 : {
1616 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1617 :
1618 2294 : LWLockDequeueSelf(lock);
1619 2294 : break;
1620 : }
1621 :
1622 : /*
1623 : * Wait until awakened.
1624 : *
1625 : * It is possible that we get awakened for a reason other than being
1626 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1627 : * we've gotten the LWLock, re-increment the sema by the number of
1628 : * additional signals received.
1629 : */
1630 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1631 :
1632 : #ifdef LWLOCK_STATS
1633 : lwstats->block_count++;
1634 : #endif
1635 :
1636 133706 : LWLockReportWaitStart(lock);
1637 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1638 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1639 :
1640 : for (;;)
1641 : {
1642 133706 : PGSemaphoreLock(proc->sem);
1643 133706 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1644 133706 : break;
1645 0 : extraWaits++;
1646 : }
1647 :
1648 : #ifdef LOCK_DEBUG
1649 : {
1650 : /* not waiting anymore */
1651 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1652 :
1653 : Assert(nwaiters < MAX_BACKENDS);
1654 : }
1655 : #endif
1656 :
1657 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1658 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1659 133706 : LWLockReportWaitEnd();
1660 :
1661 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1662 :
1663 : /* Now loop back and check the status of the lock again. */
1664 : }
1665 :
1666 : /*
1667 : * Fix the process wait semaphore's count for any absorbed wakeups.
1668 : */
1669 6545630 : while (extraWaits-- > 0)
1670 0 : PGSemaphoreUnlock(proc->sem);
1671 :
1672 : /*
1673 : * Now okay to allow cancel/die interrupts.
1674 : */
1675 6545630 : RESUME_INTERRUPTS();
1676 :
1677 6545630 : return result;
1678 : }
1679 :
1680 :
1681 : /*
1682 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1683 : *
1684 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1685 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1686 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1687 : * lock is guaranteed to see the new value, and act accordingly.
1688 : *
1689 : * The caller must be holding the lock in exclusive mode.
1690 : */
1691 : void
1692 4966348 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1693 : {
1694 : proclist_head wakeup;
1695 : proclist_mutable_iter iter;
1696 :
1697 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1698 :
1699 : /*
1700 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1701 : * that the variable is updated before waking up waiters.
1702 : */
1703 4966348 : pg_atomic_exchange_u64(valptr, val);
1704 :
1705 4966348 : proclist_init(&wakeup);
1706 :
1707 4966348 : LWLockWaitListLock(lock);
1708 :
1709 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1710 :
1711 : /*
1712 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1713 : * up. They are always in the front of the queue.
1714 : */
1715 4967736 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1716 : {
1717 128542 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1718 :
1719 128542 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1720 127154 : break;
1721 :
1722 1388 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1723 1388 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1724 :
1725 : /* see LWLockWakeup() */
1726 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1727 1388 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1728 : }
1729 :
1730 : /* We are done updating shared state of the lock itself. */
1731 4966348 : LWLockWaitListUnlock(lock);
1732 :
1733 : /*
1734 : * Awaken any waiters I removed from the queue.
1735 : */
1736 4967736 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1737 : {
1738 1388 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1739 :
1740 1388 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1741 : /* check comment in LWLockWakeup() about this barrier */
1742 1388 : pg_write_barrier();
1743 1388 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1744 1388 : PGSemaphoreUnlock(waiter->sem);
1745 : }
1746 4966348 : }
1747 :
1748 :
1749 : /*
1750 : * Stop treating lock as held by current backend.
1751 : *
1752 : * This is the code that can be shared between actually releasing a lock
1753 : * (LWLockRelease()) and just not tracking ownership of the lock anymore
1754 : * without releasing the lock (LWLockDisown()).
1755 : *
1756 : * Returns the mode in which the lock was held by the current backend.
1757 : *
1758 : * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
1759 : * of the caller.
1760 : *
1761 : * NB: This will leave lock->owner pointing to the current backend (if
1762 : * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
1763 : * debug cases of missing wakeups during lock release.
1764 : */
1765 : static inline LWLockMode
1766 736125844 : LWLockDisownInternal(LWLock *lock)
1767 : {
1768 : LWLockMode mode;
1769 : int i;
1770 :
1771 : /*
1772 : * Remove lock from list of locks held. Usually, but not always, it will
1773 : * be the latest-acquired lock; so search array backwards.
1774 : */
1775 818361198 : for (i = num_held_lwlocks; --i >= 0;)
1776 818361198 : if (lock == held_lwlocks[i].lock)
1777 736125844 : break;
1778 :
1779 736125844 : if (i < 0)
1780 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1781 :
1782 736125844 : mode = held_lwlocks[i].mode;
1783 :
1784 736125844 : num_held_lwlocks--;
1785 818361198 : for (; i < num_held_lwlocks; i++)
1786 82235354 : held_lwlocks[i] = held_lwlocks[i + 1];
1787 :
1788 736125844 : return mode;
1789 : }
1790 :
1791 : /*
1792 : * Helper function to release lock, shared between LWLockRelease() and
1793 : * LWLockReleaseDisowned().
1794 : */
1795 : static void
1796 736125844 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
1797 : {
1798 : uint32 oldstate;
1799 : bool check_waiters;
1800 :
1801 : /*
1802 : * Release my hold on lock, after that it can immediately be acquired by
1803 : * others, even if we still have to wakeup other waiters.
1804 : */
1805 736125844 : if (mode == LW_EXCLUSIVE)
1806 454138168 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1807 : else
1808 281987676 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1809 :
1810 : /* nobody else can have that kind of lock */
1811 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1812 :
1813 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1814 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1815 :
1816 : /*
1817 : * We're still waiting for backends to get scheduled, don't wake them up
1818 : * again.
1819 : */
1820 736125844 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1821 9618136 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1822 9618136 : (oldstate & LW_LOCK_MASK) == 0)
1823 9581694 : check_waiters = true;
1824 : else
1825 726544150 : check_waiters = false;
1826 :
1827 : /*
1828 : * As waking up waiters requires the spinlock to be acquired, only do so
1829 : * if necessary.
1830 : */
1831 736125844 : if (check_waiters)
1832 : {
1833 : /* XXX: remove before commit? */
1834 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1835 9581694 : LWLockWakeup(lock);
1836 : }
1837 736125844 : }
1838 :
1839 :
1840 : /*
1841 : * Stop treating lock as held by current backend.
1842 : *
1843 : * After calling this function it's the callers responsibility to ensure that
1844 : * the lock gets released (via LWLockReleaseDisowned()), even in case of an
1845 : * error. This only is desirable if the lock is going to be released in a
1846 : * different process than the process that acquired it.
1847 : */
1848 : void
1849 0 : LWLockDisown(LWLock *lock)
1850 : {
1851 0 : LWLockDisownInternal(lock);
1852 :
1853 0 : RESUME_INTERRUPTS();
1854 0 : }
1855 :
1856 : /*
1857 : * LWLockRelease - release a previously acquired lock
1858 : */
1859 : void
1860 736125844 : LWLockRelease(LWLock *lock)
1861 : {
1862 : LWLockMode mode;
1863 :
1864 736125844 : mode = LWLockDisownInternal(lock);
1865 :
1866 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1867 :
1868 736125844 : LWLockReleaseInternal(lock, mode);
1869 :
1870 : /*
1871 : * Now okay to allow cancel/die interrupts.
1872 : */
1873 736125844 : RESUME_INTERRUPTS();
1874 736125844 : }
1875 :
1876 : /*
1877 : * Release lock previously disowned with LWLockDisown().
1878 : */
1879 : void
1880 0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
1881 : {
1882 0 : LWLockReleaseInternal(lock, mode);
1883 0 : }
1884 :
1885 : /*
1886 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1887 : */
1888 : void
1889 29315968 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1890 : {
1891 : /*
1892 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1893 : * that the variable is updated before releasing the lock.
1894 : */
1895 29315968 : pg_atomic_exchange_u64(valptr, val);
1896 :
1897 29315968 : LWLockRelease(lock);
1898 29315968 : }
1899 :
1900 :
1901 : /*
1902 : * LWLockReleaseAll - release all currently-held locks
1903 : *
1904 : * Used to clean up after ereport(ERROR). An important difference between this
1905 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1906 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1907 : * has been set to an appropriate level earlier in error recovery. We could
1908 : * decrement it below zero if we allow it to drop for each released lock!
1909 : */
1910 : void
1911 109618 : LWLockReleaseAll(void)
1912 : {
1913 110014 : while (num_held_lwlocks > 0)
1914 : {
1915 396 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1916 :
1917 396 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1918 : }
1919 109618 : }
1920 :
1921 :
1922 : /*
1923 : * ForEachLWLockHeldByMe - run a callback for each held lock
1924 : *
1925 : * This is meant as debug support only.
1926 : */
1927 : void
1928 0 : ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),
1929 : void *context)
1930 : {
1931 : int i;
1932 :
1933 0 : for (i = 0; i < num_held_lwlocks; i++)
1934 0 : callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);
1935 0 : }
1936 :
1937 : /*
1938 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1939 : *
1940 : * This is meant as debug support only.
1941 : */
1942 : bool
1943 0 : LWLockHeldByMe(LWLock *lock)
1944 : {
1945 : int i;
1946 :
1947 0 : for (i = 0; i < num_held_lwlocks; i++)
1948 : {
1949 0 : if (held_lwlocks[i].lock == lock)
1950 0 : return true;
1951 : }
1952 0 : return false;
1953 : }
1954 :
1955 : /*
1956 : * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
1957 : *
1958 : * This is meant as debug support only.
1959 : */
1960 : bool
1961 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1962 : {
1963 : char *held_lock_addr;
1964 : char *begin;
1965 : char *end;
1966 : int i;
1967 :
1968 0 : begin = (char *) lock;
1969 0 : end = begin + nlocks * stride;
1970 0 : for (i = 0; i < num_held_lwlocks; i++)
1971 : {
1972 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
1973 0 : if (held_lock_addr >= begin &&
1974 0 : held_lock_addr < end &&
1975 0 : (held_lock_addr - begin) % stride == 0)
1976 0 : return true;
1977 : }
1978 0 : return false;
1979 : }
1980 :
1981 : /*
1982 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1983 : *
1984 : * This is meant as debug support only.
1985 : */
1986 : bool
1987 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1988 : {
1989 : int i;
1990 :
1991 0 : for (i = 0; i < num_held_lwlocks; i++)
1992 : {
1993 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
1994 0 : return true;
1995 : }
1996 0 : return false;
1997 : }
|