Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * lwlock.c
4 : * Lightweight lock manager
5 : *
6 : * Lightweight locks are intended primarily to provide mutual exclusion of
7 : * access to shared-memory data structures. Therefore, they offer both
8 : * exclusive and shared lock modes (to support read/write and read-only
9 : * access to a shared object). There are few other frammishes. User-level
10 : * locking should be done with the full lock manager --- which depends on
11 : * LWLocks to protect its shared state.
12 : *
13 : * In addition to exclusive and shared modes, lightweight locks can be used to
14 : * wait until a variable changes value. The variable is initially not set
15 : * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16 : * value it was set to when the lock was released last, and can be updated
17 : * without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
18 : * waits for the variable to be updated, or until the lock is free. When
19 : * releasing the lock with LWLockReleaseClearVar() the value can be set to an
20 : * appropriate value for a free lock. The meaning of the variable is up to
21 : * the caller, the lightweight lock code just assigns and compares it.
22 : *
23 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/storage/lmgr/lwlock.c
28 : *
29 : * NOTES:
30 : *
31 : * This used to be a pretty straight forward reader-writer lock
32 : * implementation, in which the internal state was protected by a
33 : * spinlock. Unfortunately the overhead of taking the spinlock proved to be
34 : * too high for workloads/locks that were taken in shared mode very
35 : * frequently. Often we were spinning in the (obviously exclusive) spinlock,
36 : * while trying to acquire a shared lock that was actually free.
37 : *
38 : * Thus a new implementation was devised that provides wait-free shared lock
39 : * acquisition for locks that aren't exclusively locked.
40 : *
41 : * The basic idea is to have a single atomic variable 'lockcount' instead of
42 : * the formerly separate shared and exclusive counters and to use atomic
43 : * operations to acquire the lock. That's fairly easy to do for plain
44 : * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
45 : * in the OS.
46 : *
47 : * For lock acquisition we use an atomic compare-and-exchange on the lockcount
48 : * variable. For exclusive lock we swap in a sentinel value
49 : * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
50 : *
51 : * To release the lock we use an atomic decrement to release the lock. If the
52 : * new value is zero (we get that atomically), we know we can/have to release
53 : * waiters.
54 : *
55 : * Obviously it is important that the sentinel value for exclusive locks
56 : * doesn't conflict with the maximum number of possible share lockers -
57 : * luckily MAX_BACKENDS makes that easily possible.
58 : *
59 : *
60 : * The attentive reader might have noticed that naively doing the above has a
61 : * glaring race condition: We try to lock using the atomic operations and
62 : * notice that we have to wait. Unfortunately by the time we have finished
63 : * queuing, the former locker very well might have already finished it's
64 : * work. That's problematic because we're now stuck waiting inside the OS.
65 :
66 : * To mitigate those races we use a two phased attempt at locking:
67 : * Phase 1: Try to do it atomically, if we succeed, nice
68 : * Phase 2: Add ourselves to the waitqueue of the lock
69 : * Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
70 : * the queue
71 : * Phase 4: Sleep till wake-up, goto Phase 1
72 : *
73 : * This protects us against the problem from above as nobody can release too
74 : * quick, before we're queued, since after Phase 2 we're already queued.
75 : * -------------------------------------------------------------------------
76 : */
77 : #include "postgres.h"
78 :
79 : #include "miscadmin.h"
80 : #include "pg_trace.h"
81 : #include "pgstat.h"
82 : #include "port/pg_bitutils.h"
83 : #include "postmaster/postmaster.h"
84 : #include "replication/slot.h"
85 : #include "storage/ipc.h"
86 : #include "storage/predicate.h"
87 : #include "storage/proc.h"
88 : #include "storage/proclist.h"
89 : #include "storage/spin.h"
90 : #include "utils/memutils.h"
91 :
92 : #ifdef LWLOCK_STATS
93 : #include "utils/hsearch.h"
94 : #endif
95 :
96 :
97 : /* We use the ShmemLock spinlock to protect LWLockCounter */
98 : extern slock_t *ShmemLock;
99 :
100 : #define LW_FLAG_HAS_WAITERS ((uint32) 1 << 30)
101 : #define LW_FLAG_RELEASE_OK ((uint32) 1 << 29)
102 : #define LW_FLAG_LOCKED ((uint32) 1 << 28)
103 :
104 : #define LW_VAL_EXCLUSIVE ((uint32) 1 << 24)
105 : #define LW_VAL_SHARED 1
106 :
107 : #define LW_LOCK_MASK ((uint32) ((1 << 25)-1))
108 : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
109 : #define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
110 :
111 : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
112 : "MAX_BACKENDS too big for lwlock.c");
113 :
114 : /*
115 : * There are three sorts of LWLock "tranches":
116 : *
117 : * 1. The individually-named locks defined in lwlocknames.h each have their
118 : * own tranche. The names of these tranches appear in IndividualLWLockNames[]
119 : * in lwlocknames.c.
120 : *
121 : * 2. There are some predefined tranches for built-in groups of locks.
122 : * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
123 : * appear in BuiltinTrancheNames[] below.
124 : *
125 : * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
126 : * or LWLockRegisterTranche. The names of these that are known in the current
127 : * process appear in LWLockTrancheNames[].
128 : *
129 : * All these names are user-visible as wait event names, so choose with care
130 : * ... and do not forget to update the documentation's list of wait events.
131 : */
132 : extern const char *const IndividualLWLockNames[]; /* in lwlocknames.c */
133 :
134 : static const char *const BuiltinTrancheNames[] = {
135 : /* LWTRANCHE_XACT_BUFFER: */
136 : "XactBuffer",
137 : /* LWTRANCHE_COMMITTS_BUFFER: */
138 : "CommitTsBuffer",
139 : /* LWTRANCHE_SUBTRANS_BUFFER: */
140 : "SubtransBuffer",
141 : /* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */
142 : "MultiXactOffsetBuffer",
143 : /* LWTRANCHE_MULTIXACTMEMBER_BUFFER: */
144 : "MultiXactMemberBuffer",
145 : /* LWTRANCHE_NOTIFY_BUFFER: */
146 : "NotifyBuffer",
147 : /* LWTRANCHE_SERIAL_BUFFER: */
148 : "SerialBuffer",
149 : /* LWTRANCHE_WAL_INSERT: */
150 : "WALInsert",
151 : /* LWTRANCHE_BUFFER_CONTENT: */
152 : "BufferContent",
153 : /* LWTRANCHE_REPLICATION_ORIGIN_STATE: */
154 : "ReplicationOriginState",
155 : /* LWTRANCHE_REPLICATION_SLOT_IO: */
156 : "ReplicationSlotIO",
157 : /* LWTRANCHE_LOCK_FASTPATH: */
158 : "LockFastPath",
159 : /* LWTRANCHE_BUFFER_MAPPING: */
160 : "BufferMapping",
161 : /* LWTRANCHE_LOCK_MANAGER: */
162 : "LockManager",
163 : /* LWTRANCHE_PREDICATE_LOCK_MANAGER: */
164 : "PredicateLockManager",
165 : /* LWTRANCHE_PARALLEL_HASH_JOIN: */
166 : "ParallelHashJoin",
167 : /* LWTRANCHE_PARALLEL_QUERY_DSA: */
168 : "ParallelQueryDSA",
169 : /* LWTRANCHE_PER_SESSION_DSA: */
170 : "PerSessionDSA",
171 : /* LWTRANCHE_PER_SESSION_RECORD_TYPE: */
172 : "PerSessionRecordType",
173 : /* LWTRANCHE_PER_SESSION_RECORD_TYPMOD: */
174 : "PerSessionRecordTypmod",
175 : /* LWTRANCHE_SHARED_TUPLESTORE: */
176 : "SharedTupleStore",
177 : /* LWTRANCHE_SHARED_TIDBITMAP: */
178 : "SharedTidBitmap",
179 : /* LWTRANCHE_PARALLEL_APPEND: */
180 : "ParallelAppend",
181 : /* LWTRANCHE_PER_XACT_PREDICATE_LIST: */
182 : "PerXactPredicateList",
183 : /* LWTRANCHE_PGSTATS_DSA: */
184 : "PgStatsDSA",
185 : /* LWTRANCHE_PGSTATS_HASH: */
186 : "PgStatsHash",
187 : /* LWTRANCHE_PGSTATS_DATA: */
188 : "PgStatsData",
189 : /* LWTRANCHE_LAUNCHER_DSA: */
190 : "LogicalRepLauncherDSA",
191 : /* LWTRANCHE_LAUNCHER_HASH: */
192 : "LogicalRepLauncherHash",
193 : };
194 :
195 : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
196 : LWTRANCHE_FIRST_USER_DEFINED - NUM_INDIVIDUAL_LWLOCKS,
197 : "missing entries in BuiltinTrancheNames[]");
198 :
199 : /*
200 : * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
201 : * stores the names of all dynamically-created tranches known to the current
202 : * process. Any unused entries in the array will contain NULL.
203 : */
204 : static const char **LWLockTrancheNames = NULL;
205 : static int LWLockTrancheNamesAllocated = 0;
206 :
207 : /*
208 : * This points to the main array of LWLocks in shared memory. Backends inherit
209 : * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
210 : * where we have special measures to pass it down).
211 : */
212 : LWLockPadded *MainLWLockArray = NULL;
213 :
214 : /*
215 : * We use this structure to keep track of locked LWLocks for release
216 : * during error recovery. Normally, only a few will be held at once, but
217 : * occasionally the number can be much higher; for example, the pg_buffercache
218 : * extension locks all buffer partitions simultaneously.
219 : */
220 : #define MAX_SIMUL_LWLOCKS 200
221 :
222 : /* struct representing the LWLocks we're holding */
223 : typedef struct LWLockHandle
224 : {
225 : LWLock *lock;
226 : LWLockMode mode;
227 : } LWLockHandle;
228 :
229 : static int num_held_lwlocks = 0;
230 : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
231 :
232 : /* struct representing the LWLock tranche request for named tranche */
233 : typedef struct NamedLWLockTrancheRequest
234 : {
235 : char tranche_name[NAMEDATALEN];
236 : int num_lwlocks;
237 : } NamedLWLockTrancheRequest;
238 :
239 : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
240 : static int NamedLWLockTrancheRequestsAllocated = 0;
241 :
242 : /*
243 : * NamedLWLockTrancheRequests is both the valid length of the request array,
244 : * and the length of the shared-memory NamedLWLockTrancheArray later on.
245 : * This variable and NamedLWLockTrancheArray are non-static so that
246 : * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
247 : */
248 : int NamedLWLockTrancheRequests = 0;
249 :
250 : /* points to data in shared memory: */
251 : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
252 :
253 : static void InitializeLWLocks(void);
254 : static inline void LWLockReportWaitStart(LWLock *lock);
255 : static inline void LWLockReportWaitEnd(void);
256 : static const char *GetLWTrancheName(uint16 trancheId);
257 :
258 : #define T_NAME(lock) \
259 : GetLWTrancheName((lock)->tranche)
260 :
261 : #ifdef LWLOCK_STATS
262 : typedef struct lwlock_stats_key
263 : {
264 : int tranche;
265 : void *instance;
266 : } lwlock_stats_key;
267 :
268 : typedef struct lwlock_stats
269 : {
270 : lwlock_stats_key key;
271 : int sh_acquire_count;
272 : int ex_acquire_count;
273 : int block_count;
274 : int dequeue_self_count;
275 : int spin_delay_count;
276 : } lwlock_stats;
277 :
278 : static HTAB *lwlock_stats_htab;
279 : static lwlock_stats lwlock_stats_dummy;
280 : #endif
281 :
282 : #ifdef LOCK_DEBUG
283 : bool Trace_lwlocks = false;
284 :
285 : inline static void
286 : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
287 : {
288 : /* hide statement & context here, otherwise the log is just too verbose */
289 : if (Trace_lwlocks)
290 : {
291 : uint32 state = pg_atomic_read_u32(&lock->state);
292 :
293 : ereport(LOG,
294 : (errhidestmt(true),
295 : errhidecontext(true),
296 : errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
297 : MyProcPid,
298 : where, T_NAME(lock), lock,
299 : (state & LW_VAL_EXCLUSIVE) != 0,
300 : state & LW_SHARED_MASK,
301 : (state & LW_FLAG_HAS_WAITERS) != 0,
302 : pg_atomic_read_u32(&lock->nwaiters),
303 : (state & LW_FLAG_RELEASE_OK) != 0)));
304 : }
305 : }
306 :
307 : inline static void
308 : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
309 : {
310 : /* hide statement & context here, otherwise the log is just too verbose */
311 : if (Trace_lwlocks)
312 : {
313 : ereport(LOG,
314 : (errhidestmt(true),
315 : errhidecontext(true),
316 : errmsg_internal("%s(%s %p): %s", where,
317 : T_NAME(lock), lock, msg)));
318 : }
319 : }
320 :
321 : #else /* not LOCK_DEBUG */
322 : #define PRINT_LWDEBUG(a,b,c) ((void)0)
323 : #define LOG_LWDEBUG(a,b,c) ((void)0)
324 : #endif /* LOCK_DEBUG */
325 :
326 : #ifdef LWLOCK_STATS
327 :
328 : static void init_lwlock_stats(void);
329 : static void print_lwlock_stats(int code, Datum arg);
330 : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
331 :
332 : static void
333 : init_lwlock_stats(void)
334 : {
335 : HASHCTL ctl;
336 : static MemoryContext lwlock_stats_cxt = NULL;
337 : static bool exit_registered = false;
338 :
339 : if (lwlock_stats_cxt != NULL)
340 : MemoryContextDelete(lwlock_stats_cxt);
341 :
342 : /*
343 : * The LWLock stats will be updated within a critical section, which
344 : * requires allocating new hash entries. Allocations within a critical
345 : * section are normally not allowed because running out of memory would
346 : * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
347 : * turned on in production, so that's an acceptable risk. The hash entries
348 : * are small, so the risk of running out of memory is minimal in practice.
349 : */
350 : lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
351 : "LWLock stats",
352 : ALLOCSET_DEFAULT_SIZES);
353 : MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
354 :
355 : ctl.keysize = sizeof(lwlock_stats_key);
356 : ctl.entrysize = sizeof(lwlock_stats);
357 : ctl.hcxt = lwlock_stats_cxt;
358 : lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
359 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
360 : if (!exit_registered)
361 : {
362 : on_shmem_exit(print_lwlock_stats, 0);
363 : exit_registered = true;
364 : }
365 : }
366 :
367 : static void
368 : print_lwlock_stats(int code, Datum arg)
369 : {
370 : HASH_SEQ_STATUS scan;
371 : lwlock_stats *lwstats;
372 :
373 : hash_seq_init(&scan, lwlock_stats_htab);
374 :
375 : /* Grab an LWLock to keep different backends from mixing reports */
376 : LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
377 :
378 : while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
379 : {
380 : fprintf(stderr,
381 : "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
382 : MyProcPid, GetLWTrancheName(lwstats->key.tranche),
383 : lwstats->key.instance, lwstats->sh_acquire_count,
384 : lwstats->ex_acquire_count, lwstats->block_count,
385 : lwstats->spin_delay_count, lwstats->dequeue_self_count);
386 : }
387 :
388 : LWLockRelease(&MainLWLockArray[0].lock);
389 : }
390 :
391 : static lwlock_stats *
392 : get_lwlock_stats_entry(LWLock *lock)
393 : {
394 : lwlock_stats_key key;
395 : lwlock_stats *lwstats;
396 : bool found;
397 :
398 : /*
399 : * During shared memory initialization, the hash table doesn't exist yet.
400 : * Stats of that phase aren't very interesting, so just collect operations
401 : * on all locks in a single dummy entry.
402 : */
403 : if (lwlock_stats_htab == NULL)
404 : return &lwlock_stats_dummy;
405 :
406 : /* Fetch or create the entry. */
407 : MemSet(&key, 0, sizeof(key));
408 : key.tranche = lock->tranche;
409 : key.instance = lock;
410 : lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
411 : if (!found)
412 : {
413 : lwstats->sh_acquire_count = 0;
414 : lwstats->ex_acquire_count = 0;
415 : lwstats->block_count = 0;
416 : lwstats->dequeue_self_count = 0;
417 : lwstats->spin_delay_count = 0;
418 : }
419 : return lwstats;
420 : }
421 : #endif /* LWLOCK_STATS */
422 :
423 :
424 : /*
425 : * Compute number of LWLocks required by named tranches. These will be
426 : * allocated in the main array.
427 : */
428 : static int
429 6058 : NumLWLocksForNamedTranches(void)
430 : {
431 6058 : int numLocks = 0;
432 : int i;
433 :
434 6082 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
435 24 : numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
436 :
437 6058 : return numLocks;
438 : }
439 :
440 : /*
441 : * Compute shmem space needed for LWLocks and named tranches.
442 : */
443 : Size
444 4496 : LWLockShmemSize(void)
445 : {
446 : Size size;
447 : int i;
448 4496 : int numLocks = NUM_FIXED_LWLOCKS;
449 :
450 : /* Calculate total number of locks needed in the main array. */
451 4496 : numLocks += NumLWLocksForNamedTranches();
452 :
453 : /* Space for the LWLock array. */
454 4496 : size = mul_size(numLocks, sizeof(LWLockPadded));
455 :
456 : /* Space for dynamic allocation counter, plus room for alignment. */
457 4496 : size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
458 :
459 : /* space for named tranches. */
460 4496 : size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
461 :
462 : /* space for name of each tranche. */
463 4514 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
464 18 : size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
465 :
466 4496 : return size;
467 : }
468 :
469 : /*
470 : * Allocate shmem space for the main LWLock array and all tranches and
471 : * initialize it. We also register extension LWLock tranches here.
472 : */
473 : void
474 1562 : CreateLWLocks(void)
475 : {
476 1562 : if (!IsUnderPostmaster)
477 : {
478 1562 : Size spaceLocks = LWLockShmemSize();
479 : int *LWLockCounter;
480 : char *ptr;
481 :
482 : /* Allocate space */
483 1562 : ptr = (char *) ShmemAlloc(spaceLocks);
484 :
485 : /* Leave room for dynamic allocation of tranches */
486 1562 : ptr += sizeof(int);
487 :
488 : /* Ensure desired alignment of LWLock array */
489 1562 : ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
490 :
491 1562 : MainLWLockArray = (LWLockPadded *) ptr;
492 :
493 : /*
494 : * Initialize the dynamic-allocation counter for tranches, which is
495 : * stored just before the first LWLock.
496 : */
497 1562 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
498 1562 : *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
499 :
500 : /* Initialize all LWLocks */
501 1562 : InitializeLWLocks();
502 : }
503 :
504 : /* Register named extension LWLock tranches in the current process. */
505 1568 : for (int i = 0; i < NamedLWLockTrancheRequests; i++)
506 6 : LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
507 6 : NamedLWLockTrancheArray[i].trancheName);
508 1562 : }
509 :
510 : /*
511 : * Initialize LWLocks that are fixed and those belonging to named tranches.
512 : */
513 : static void
514 1562 : InitializeLWLocks(void)
515 : {
516 1562 : int numNamedLocks = NumLWLocksForNamedTranches();
517 : int id;
518 : int i;
519 : int j;
520 : LWLockPadded *lock;
521 :
522 : /* Initialize all individual LWLocks in main array */
523 78100 : for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
524 76538 : LWLockInitialize(&lock->lock, id);
525 :
526 : /* Initialize buffer mapping LWLocks in main array */
527 1562 : lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
528 201498 : for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
529 199936 : LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
530 :
531 : /* Initialize lmgrs' LWLocks in main array */
532 1562 : lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
533 26554 : for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
534 24992 : LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
535 :
536 : /* Initialize predicate lmgrs' LWLocks in main array */
537 1562 : lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
538 26554 : for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
539 24992 : LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
540 :
541 : /*
542 : * Copy the info about any named tranches into shared memory (so that
543 : * other processes can see it), and initialize the requested LWLocks.
544 : */
545 1562 : if (NamedLWLockTrancheRequests > 0)
546 : {
547 : char *trancheNames;
548 :
549 6 : NamedLWLockTrancheArray = (NamedLWLockTranche *)
550 6 : &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
551 :
552 6 : trancheNames = (char *) NamedLWLockTrancheArray +
553 6 : (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
554 6 : lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
555 :
556 12 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
557 : {
558 : NamedLWLockTrancheRequest *request;
559 : NamedLWLockTranche *tranche;
560 : char *name;
561 :
562 6 : request = &NamedLWLockTrancheRequestArray[i];
563 6 : tranche = &NamedLWLockTrancheArray[i];
564 :
565 6 : name = trancheNames;
566 6 : trancheNames += strlen(request->tranche_name) + 1;
567 6 : strcpy(name, request->tranche_name);
568 6 : tranche->trancheId = LWLockNewTrancheId();
569 6 : tranche->trancheName = name;
570 :
571 12 : for (j = 0; j < request->num_lwlocks; j++, lock++)
572 6 : LWLockInitialize(&lock->lock, tranche->trancheId);
573 : }
574 : }
575 1562 : }
576 :
577 : /*
578 : * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
579 : */
580 : void
581 27836 : InitLWLockAccess(void)
582 : {
583 : #ifdef LWLOCK_STATS
584 : init_lwlock_stats();
585 : #endif
586 27836 : }
587 :
588 : /*
589 : * GetNamedLWLockTranche - returns the base address of LWLock from the
590 : * specified tranche.
591 : *
592 : * Caller needs to retrieve the requested number of LWLocks starting from
593 : * the base lock address returned by this API. This can be used for
594 : * tranches that are requested by using RequestNamedLWLockTranche() API.
595 : */
596 : LWLockPadded *
597 6 : GetNamedLWLockTranche(const char *tranche_name)
598 : {
599 : int lock_pos;
600 : int i;
601 :
602 : /*
603 : * Obtain the position of base address of LWLock belonging to requested
604 : * tranche_name in MainLWLockArray. LWLocks for named tranches are placed
605 : * in MainLWLockArray after fixed locks.
606 : */
607 6 : lock_pos = NUM_FIXED_LWLOCKS;
608 6 : for (i = 0; i < NamedLWLockTrancheRequests; i++)
609 : {
610 6 : if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
611 : tranche_name) == 0)
612 6 : return &MainLWLockArray[lock_pos];
613 :
614 0 : lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
615 : }
616 :
617 0 : elog(ERROR, "requested tranche is not registered");
618 :
619 : /* just to keep compiler quiet */
620 : return NULL;
621 : }
622 :
623 : /*
624 : * Allocate a new tranche ID.
625 : */
626 : int
627 16 : LWLockNewTrancheId(void)
628 : {
629 : int result;
630 : int *LWLockCounter;
631 :
632 16 : LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
633 16 : SpinLockAcquire(ShmemLock);
634 16 : result = (*LWLockCounter)++;
635 16 : SpinLockRelease(ShmemLock);
636 :
637 16 : return result;
638 : }
639 :
640 : /*
641 : * Register a dynamic tranche name in the lookup table of the current process.
642 : *
643 : * This routine will save a pointer to the tranche name passed as an argument,
644 : * so the name should be allocated in a backend-lifetime context
645 : * (shared memory, TopMemoryContext, static constant, or similar).
646 : *
647 : * The tranche name will be user-visible as a wait event name, so try to
648 : * use a name that fits the style for those.
649 : */
650 : void
651 20 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
652 : {
653 : /* This should only be called for user-defined tranches. */
654 20 : if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
655 0 : return;
656 :
657 : /* Convert to array index. */
658 20 : tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
659 :
660 : /* If necessary, create or enlarge array. */
661 20 : if (tranche_id >= LWLockTrancheNamesAllocated)
662 : {
663 : int newalloc;
664 :
665 18 : newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
666 :
667 18 : if (LWLockTrancheNames == NULL)
668 18 : LWLockTrancheNames = (const char **)
669 18 : MemoryContextAllocZero(TopMemoryContext,
670 : newalloc * sizeof(char *));
671 : else
672 0 : LWLockTrancheNames =
673 0 : repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
674 18 : LWLockTrancheNamesAllocated = newalloc;
675 : }
676 :
677 20 : LWLockTrancheNames[tranche_id] = tranche_name;
678 : }
679 :
680 : /*
681 : * RequestNamedLWLockTranche
682 : * Request that extra LWLocks be allocated during postmaster
683 : * startup.
684 : *
685 : * This may only be called via the shmem_request_hook of a library that is
686 : * loaded into the postmaster via shared_preload_libraries. Calls from
687 : * elsewhere will fail.
688 : *
689 : * The tranche name will be user-visible as a wait event name, so try to
690 : * use a name that fits the style for those.
691 : */
692 : void
693 6 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
694 : {
695 : NamedLWLockTrancheRequest *request;
696 :
697 6 : if (!process_shmem_requests_in_progress)
698 0 : elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
699 :
700 6 : if (NamedLWLockTrancheRequestArray == NULL)
701 : {
702 6 : NamedLWLockTrancheRequestsAllocated = 16;
703 6 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
704 6 : MemoryContextAlloc(TopMemoryContext,
705 : NamedLWLockTrancheRequestsAllocated
706 : * sizeof(NamedLWLockTrancheRequest));
707 : }
708 :
709 6 : if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
710 : {
711 0 : int i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
712 :
713 0 : NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
714 0 : repalloc(NamedLWLockTrancheRequestArray,
715 : i * sizeof(NamedLWLockTrancheRequest));
716 0 : NamedLWLockTrancheRequestsAllocated = i;
717 : }
718 :
719 6 : request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
720 : Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
721 6 : strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
722 6 : request->num_lwlocks = num_lwlocks;
723 6 : NamedLWLockTrancheRequests++;
724 6 : }
725 :
726 : /*
727 : * LWLockInitialize - initialize a new lwlock; it's initially unlocked
728 : */
729 : void
730 17424632 : LWLockInitialize(LWLock *lock, int tranche_id)
731 : {
732 17424632 : pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
733 : #ifdef LOCK_DEBUG
734 : pg_atomic_init_u32(&lock->nwaiters, 0);
735 : #endif
736 17424632 : lock->tranche = tranche_id;
737 17424632 : proclist_init(&lock->waiters);
738 17424632 : }
739 :
740 : /*
741 : * Report start of wait event for light-weight locks.
742 : *
743 : * This function will be used by all the light-weight lock calls which
744 : * needs to wait to acquire the lock. This function distinguishes wait
745 : * event based on tranche and lock id.
746 : */
747 : static inline void
748 31618 : LWLockReportWaitStart(LWLock *lock)
749 : {
750 31618 : pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
751 31618 : }
752 :
753 : /*
754 : * Report end of wait event for light-weight locks.
755 : */
756 : static inline void
757 31618 : LWLockReportWaitEnd(void)
758 : {
759 31618 : pgstat_report_wait_end();
760 31618 : }
761 :
762 : /*
763 : * Return the name of an LWLock tranche.
764 : */
765 : static const char *
766 0 : GetLWTrancheName(uint16 trancheId)
767 : {
768 : /* Individual LWLock? */
769 0 : if (trancheId < NUM_INDIVIDUAL_LWLOCKS)
770 0 : return IndividualLWLockNames[trancheId];
771 :
772 : /* Built-in tranche? */
773 0 : if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
774 0 : return BuiltinTrancheNames[trancheId - NUM_INDIVIDUAL_LWLOCKS];
775 :
776 : /*
777 : * It's an extension tranche, so look in LWLockTrancheNames[]. However,
778 : * it's possible that the tranche has never been registered in the current
779 : * process, in which case give up and return "extension".
780 : */
781 0 : trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
782 :
783 0 : if (trancheId >= LWLockTrancheNamesAllocated ||
784 0 : LWLockTrancheNames[trancheId] == NULL)
785 0 : return "extension";
786 :
787 0 : return LWLockTrancheNames[trancheId];
788 : }
789 :
790 : /*
791 : * Return an identifier for an LWLock based on the wait class and event.
792 : */
793 : const char *
794 0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
795 : {
796 : Assert(classId == PG_WAIT_LWLOCK);
797 : /* The event IDs are just tranche numbers. */
798 0 : return GetLWTrancheName(eventId);
799 : }
800 :
801 : /*
802 : * Internal function that tries to atomically acquire the lwlock in the passed
803 : * in mode.
804 : *
805 : * This function will not block waiting for a lock to become free - that's the
806 : * callers job.
807 : *
808 : * Returns true if the lock isn't free and we need to wait.
809 : */
810 : static bool
811 359945910 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
812 : {
813 : uint32 old_state;
814 :
815 : Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
816 :
817 : /*
818 : * Read once outside the loop, later iterations will get the newer value
819 : * via compare & exchange.
820 : */
821 359945910 : old_state = pg_atomic_read_u32(&lock->state);
822 :
823 : /* loop until we've determined whether we could acquire the lock or not */
824 : while (true)
825 73368 : {
826 : uint32 desired_state;
827 : bool lock_free;
828 :
829 360019278 : desired_state = old_state;
830 :
831 360019278 : if (mode == LW_EXCLUSIVE)
832 : {
833 157238196 : lock_free = (old_state & LW_LOCK_MASK) == 0;
834 157238196 : if (lock_free)
835 157164900 : desired_state += LW_VAL_EXCLUSIVE;
836 : }
837 : else
838 : {
839 202781082 : lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
840 202781082 : if (lock_free)
841 202765884 : desired_state += LW_VAL_SHARED;
842 : }
843 :
844 : /*
845 : * Attempt to swap in the state we are expecting. If we didn't see
846 : * lock to be free, that's just the old value. If we saw it as free,
847 : * we'll attempt to mark it acquired. The reason that we always swap
848 : * in the value is that this doubles as a memory barrier. We could try
849 : * to be smarter and only swap in values if we saw the lock as free,
850 : * but benchmark haven't shown it as beneficial so far.
851 : *
852 : * Retry if the value changed since we last looked at it.
853 : */
854 360019278 : if (pg_atomic_compare_exchange_u32(&lock->state,
855 : &old_state, desired_state))
856 : {
857 359945910 : if (lock_free)
858 : {
859 : /* Great! Got the lock. */
860 : #ifdef LOCK_DEBUG
861 : if (mode == LW_EXCLUSIVE)
862 : lock->owner = MyProc;
863 : #endif
864 359873078 : return false;
865 : }
866 : else
867 72832 : return true; /* somebody else has the lock */
868 : }
869 : }
870 : pg_unreachable();
871 : }
872 :
873 : /*
874 : * Lock the LWLock's wait list against concurrent activity.
875 : *
876 : * NB: even though the wait list is locked, non-conflicting lock operations
877 : * may still happen concurrently.
878 : *
879 : * Time spent holding mutex should be short!
880 : */
881 : static void
882 1377612 : LWLockWaitListLock(LWLock *lock)
883 : {
884 : uint32 old_state;
885 : #ifdef LWLOCK_STATS
886 : lwlock_stats *lwstats;
887 : uint32 delays = 0;
888 :
889 : lwstats = get_lwlock_stats_entry(lock);
890 : #endif
891 :
892 : while (true)
893 : {
894 : /* always try once to acquire lock directly */
895 1377612 : old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
896 1377612 : if (!(old_state & LW_FLAG_LOCKED))
897 1372996 : break; /* got lock */
898 :
899 : /* and then spin without atomic operations until lock is released */
900 : {
901 : SpinDelayStatus delayStatus;
902 :
903 4616 : init_local_spin_delay(&delayStatus);
904 :
905 17544 : while (old_state & LW_FLAG_LOCKED)
906 : {
907 12928 : perform_spin_delay(&delayStatus);
908 12928 : old_state = pg_atomic_read_u32(&lock->state);
909 : }
910 : #ifdef LWLOCK_STATS
911 : delays += delayStatus.delays;
912 : #endif
913 4616 : finish_spin_delay(&delayStatus);
914 : }
915 :
916 : /*
917 : * Retry. The lock might obviously already be re-acquired by the time
918 : * we're attempting to get it again.
919 : */
920 : }
921 :
922 : #ifdef LWLOCK_STATS
923 : lwstats->spin_delay_count += delays;
924 : #endif
925 1372996 : }
926 :
927 : /*
928 : * Unlock the LWLock's wait list.
929 : *
930 : * Note that it can be more efficient to manipulate flags and release the
931 : * locks in a single atomic operation.
932 : */
933 : static void
934 1316064 : LWLockWaitListUnlock(LWLock *lock)
935 : {
936 : uint32 old_state PG_USED_FOR_ASSERTS_ONLY;
937 :
938 1316064 : old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
939 :
940 : Assert(old_state & LW_FLAG_LOCKED);
941 1316064 : }
942 :
943 : /*
944 : * Wakeup all the lockers that currently have a chance to acquire the lock.
945 : */
946 : static void
947 56932 : LWLockWakeup(LWLock *lock)
948 : {
949 : bool new_release_ok;
950 56932 : bool wokeup_somebody = false;
951 : proclist_head wakeup;
952 : proclist_mutable_iter iter;
953 :
954 56932 : proclist_init(&wakeup);
955 :
956 56932 : new_release_ok = true;
957 :
958 : /* lock wait list while collecting backends to wake up */
959 56932 : LWLockWaitListLock(lock);
960 :
961 69498 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
962 : {
963 33638 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
964 :
965 33638 : if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
966 182 : continue;
967 :
968 33456 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
969 33456 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
970 :
971 33456 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
972 : {
973 : /*
974 : * Prevent additional wakeups until retryer gets to run. Backends
975 : * that are just waiting for the lock to become free don't retry
976 : * automatically.
977 : */
978 27786 : new_release_ok = false;
979 :
980 : /*
981 : * Don't wakeup (further) exclusive locks.
982 : */
983 27786 : wokeup_somebody = true;
984 : }
985 :
986 : /*
987 : * Signal that the process isn't on the wait list anymore. This allows
988 : * LWLockDequeueSelf() to remove itself of the waitlist with a
989 : * proclist_delete(), rather than having to check if it has been
990 : * removed from the list.
991 : */
992 : Assert(waiter->lwWaiting == LW_WS_WAITING);
993 33456 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
994 :
995 : /*
996 : * Once we've woken up an exclusive lock, there's no point in waking
997 : * up anybody else.
998 : */
999 33456 : if (waiter->lwWaitMode == LW_EXCLUSIVE)
1000 21072 : break;
1001 : }
1002 :
1003 : Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
1004 :
1005 : /* unset required flags, and release lock, in one fell swoop */
1006 : {
1007 : uint32 old_state;
1008 : uint32 desired_state;
1009 :
1010 56932 : old_state = pg_atomic_read_u32(&lock->state);
1011 : while (true)
1012 : {
1013 57166 : desired_state = old_state;
1014 :
1015 : /* compute desired flags */
1016 :
1017 57166 : if (new_release_ok)
1018 29618 : desired_state |= LW_FLAG_RELEASE_OK;
1019 : else
1020 27548 : desired_state &= ~LW_FLAG_RELEASE_OK;
1021 :
1022 57166 : if (proclist_is_empty(&wakeup))
1023 26422 : desired_state &= ~LW_FLAG_HAS_WAITERS;
1024 :
1025 57166 : desired_state &= ~LW_FLAG_LOCKED; /* release lock */
1026 :
1027 57166 : if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
1028 : desired_state))
1029 56932 : break;
1030 : }
1031 : }
1032 :
1033 : /* Awaken any waiters I removed from the queue. */
1034 90388 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1035 : {
1036 33456 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1037 :
1038 : LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
1039 33456 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1040 :
1041 : /*
1042 : * Guarantee that lwWaiting being unset only becomes visible once the
1043 : * unlink from the link has completed. Otherwise the target backend
1044 : * could be woken up for other reason and enqueue for a new lock - if
1045 : * that happens before the list unlink happens, the list would end up
1046 : * being corrupted.
1047 : *
1048 : * The barrier pairs with the LWLockWaitListLock() when enqueuing for
1049 : * another lock.
1050 : */
1051 33456 : pg_write_barrier();
1052 33456 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1053 33456 : PGSemaphoreUnlock(waiter->sem);
1054 : }
1055 56932 : }
1056 :
1057 : /*
1058 : * Add ourselves to the end of the queue.
1059 : *
1060 : * NB: Mode can be LW_WAIT_UNTIL_FREE here!
1061 : */
1062 : static void
1063 48916 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
1064 : {
1065 : /*
1066 : * If we don't have a PGPROC structure, there's no way to wait. This
1067 : * should never occur, since MyProc should only be null during shared
1068 : * memory initialization.
1069 : */
1070 48916 : if (MyProc == NULL)
1071 0 : elog(PANIC, "cannot wait without a PGPROC structure");
1072 :
1073 48916 : if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
1074 0 : elog(PANIC, "queueing for lock while waiting on another one");
1075 :
1076 48916 : LWLockWaitListLock(lock);
1077 :
1078 : /* setting the flag is protected by the spinlock */
1079 48916 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
1080 :
1081 48916 : MyProc->lwWaiting = LW_WS_WAITING;
1082 48916 : MyProc->lwWaitMode = mode;
1083 :
1084 : /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
1085 48916 : if (mode == LW_WAIT_UNTIL_FREE)
1086 7168 : proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1087 : else
1088 41748 : proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1089 :
1090 : /* Can release the mutex now */
1091 48916 : LWLockWaitListUnlock(lock);
1092 :
1093 : #ifdef LOCK_DEBUG
1094 : pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
1095 : #endif
1096 48916 : }
1097 :
1098 : /*
1099 : * Remove ourselves from the waitlist.
1100 : *
1101 : * This is used if we queued ourselves because we thought we needed to sleep
1102 : * but, after further checking, we discovered that we don't actually need to
1103 : * do so.
1104 : */
1105 : static void
1106 17298 : LWLockDequeueSelf(LWLock *lock)
1107 : {
1108 : bool on_waitlist;
1109 :
1110 : #ifdef LWLOCK_STATS
1111 : lwlock_stats *lwstats;
1112 :
1113 : lwstats = get_lwlock_stats_entry(lock);
1114 :
1115 : lwstats->dequeue_self_count++;
1116 : #endif
1117 :
1118 17298 : LWLockWaitListLock(lock);
1119 :
1120 : /*
1121 : * Remove ourselves from the waitlist, unless we've already been removed.
1122 : * The removal happens with the wait list lock held, so there's no race in
1123 : * this check.
1124 : */
1125 17298 : on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
1126 17298 : if (on_waitlist)
1127 15226 : proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
1128 :
1129 17298 : if (proclist_is_empty(&lock->waiters) &&
1130 17098 : (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
1131 : {
1132 17090 : pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
1133 : }
1134 :
1135 : /* XXX: combine with fetch_and above? */
1136 17298 : LWLockWaitListUnlock(lock);
1137 :
1138 : /* clear waiting state again, nice for debugging */
1139 17298 : if (on_waitlist)
1140 15226 : MyProc->lwWaiting = LW_WS_NOT_WAITING;
1141 : else
1142 : {
1143 2072 : int extraWaits = 0;
1144 :
1145 : /*
1146 : * Somebody else dequeued us and has or will wake us up. Deal with the
1147 : * superfluous absorption of a wakeup.
1148 : */
1149 :
1150 : /*
1151 : * Reset RELEASE_OK flag if somebody woke us before we removed
1152 : * ourselves - they'll have set it to false.
1153 : */
1154 2072 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1155 :
1156 : /*
1157 : * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
1158 : * get reset at some inconvenient point later. Most of the time this
1159 : * will immediately return.
1160 : */
1161 : for (;;)
1162 : {
1163 2072 : PGSemaphoreLock(MyProc->sem);
1164 2072 : if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
1165 2072 : break;
1166 0 : extraWaits++;
1167 : }
1168 :
1169 : /*
1170 : * Fix the process wait semaphore's count for any absorbed wakeups.
1171 : */
1172 2072 : while (extraWaits-- > 0)
1173 0 : PGSemaphoreUnlock(MyProc->sem);
1174 : }
1175 :
1176 : #ifdef LOCK_DEBUG
1177 : {
1178 : /* not waiting anymore */
1179 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1180 :
1181 : Assert(nwaiters < MAX_BACKENDS);
1182 : }
1183 : #endif
1184 17298 : }
1185 :
1186 : /*
1187 : * LWLockAcquire - acquire a lightweight lock in the specified mode
1188 : *
1189 : * If the lock is not available, sleep until it is. Returns true if the lock
1190 : * was available immediately, false if we had to sleep.
1191 : *
1192 : * Side effect: cancel/die interrupts are held off until lock release.
1193 : */
1194 : bool
1195 356423598 : LWLockAcquire(LWLock *lock, LWLockMode mode)
1196 : {
1197 356423598 : PGPROC *proc = MyProc;
1198 356423598 : bool result = true;
1199 356423598 : int extraWaits = 0;
1200 : #ifdef LWLOCK_STATS
1201 : lwlock_stats *lwstats;
1202 :
1203 : lwstats = get_lwlock_stats_entry(lock);
1204 : #endif
1205 :
1206 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1207 :
1208 : PRINT_LWDEBUG("LWLockAcquire", lock, mode);
1209 :
1210 : #ifdef LWLOCK_STATS
1211 : /* Count lock acquisition attempts */
1212 : if (mode == LW_EXCLUSIVE)
1213 : lwstats->ex_acquire_count++;
1214 : else
1215 : lwstats->sh_acquire_count++;
1216 : #endif /* LWLOCK_STATS */
1217 :
1218 : /*
1219 : * We can't wait if we haven't got a PGPROC. This should only occur
1220 : * during bootstrap or shared memory initialization. Put an Assert here
1221 : * to catch unsafe coding practices.
1222 : */
1223 : Assert(!(proc == NULL && IsUnderPostmaster));
1224 :
1225 : /* Ensure we will have room to remember the lock */
1226 356423598 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1227 0 : elog(ERROR, "too many LWLocks taken");
1228 :
1229 : /*
1230 : * Lock out cancel/die interrupts until we exit the code section protected
1231 : * by the LWLock. This ensures that interrupts will not interfere with
1232 : * manipulations of data structures in shared memory.
1233 : */
1234 356423598 : HOLD_INTERRUPTS();
1235 :
1236 : /*
1237 : * Loop here to try to acquire lock after each time we are signaled by
1238 : * LWLockRelease.
1239 : *
1240 : * NOTE: it might seem better to have LWLockRelease actually grant us the
1241 : * lock, rather than retrying and possibly having to go back to sleep. But
1242 : * in practice that is no good because it means a process swap for every
1243 : * lock acquisition when two or more processes are contending for the same
1244 : * lock. Since LWLocks are normally used to protect not-very-long
1245 : * sections of computation, a process needs to be able to acquire and
1246 : * release the same lock many times during a single CPU time slice, even
1247 : * in the presence of contention. The efficiency of being able to do that
1248 : * outweighs the inefficiency of sometimes wasting a process dispatch
1249 : * cycle because the lock is not free when a released waiter finally gets
1250 : * to run. See pgsql-hackers archives for 29-Dec-01.
1251 : */
1252 : for (;;)
1253 26034 : {
1254 : bool mustwait;
1255 :
1256 : /*
1257 : * Try to grab the lock the first time, we're not in the waitqueue
1258 : * yet/anymore.
1259 : */
1260 356449632 : mustwait = LWLockAttemptLock(lock, mode);
1261 :
1262 356449632 : if (!mustwait)
1263 : {
1264 : LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
1265 356407884 : break; /* got the lock */
1266 : }
1267 :
1268 : /*
1269 : * Ok, at this point we couldn't grab the lock on the first try. We
1270 : * cannot simply queue ourselves to the end of the list and wait to be
1271 : * woken up because by now the lock could long have been released.
1272 : * Instead add us to the queue and try to grab the lock again. If we
1273 : * succeed we need to revert the queuing and be happy, otherwise we
1274 : * recheck the lock. If we still couldn't grab it, we know that the
1275 : * other locker will see our queue entries when releasing since they
1276 : * existed before we checked for the lock.
1277 : */
1278 :
1279 : /* add to the queue */
1280 41748 : LWLockQueueSelf(lock, mode);
1281 :
1282 : /* we're now guaranteed to be woken up if necessary */
1283 41748 : mustwait = LWLockAttemptLock(lock, mode);
1284 :
1285 : /* ok, grabbed the lock the second time round, need to undo queueing */
1286 41748 : if (!mustwait)
1287 : {
1288 : LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
1289 :
1290 15714 : LWLockDequeueSelf(lock);
1291 15714 : break;
1292 : }
1293 :
1294 : /*
1295 : * Wait until awakened.
1296 : *
1297 : * It is possible that we get awakened for a reason other than being
1298 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1299 : * we've gotten the LWLock, re-increment the sema by the number of
1300 : * additional signals received.
1301 : */
1302 : LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
1303 :
1304 : #ifdef LWLOCK_STATS
1305 : lwstats->block_count++;
1306 : #endif
1307 :
1308 26034 : LWLockReportWaitStart(lock);
1309 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1310 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1311 :
1312 : for (;;)
1313 : {
1314 26034 : PGSemaphoreLock(proc->sem);
1315 26034 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1316 26034 : break;
1317 0 : extraWaits++;
1318 : }
1319 :
1320 : /* Retrying, allow LWLockRelease to release waiters again. */
1321 26034 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1322 :
1323 : #ifdef LOCK_DEBUG
1324 : {
1325 : /* not waiting anymore */
1326 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1327 :
1328 : Assert(nwaiters < MAX_BACKENDS);
1329 : }
1330 : #endif
1331 :
1332 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1333 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1334 26034 : LWLockReportWaitEnd();
1335 :
1336 : LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
1337 :
1338 : /* Now loop back and try to acquire lock again. */
1339 26034 : result = false;
1340 : }
1341 :
1342 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
1343 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
1344 :
1345 : /* Add lock to list of locks held by this backend */
1346 356423598 : held_lwlocks[num_held_lwlocks].lock = lock;
1347 356423598 : held_lwlocks[num_held_lwlocks++].mode = mode;
1348 :
1349 : /*
1350 : * Fix the process wait semaphore's count for any absorbed wakeups.
1351 : */
1352 356423598 : while (extraWaits-- > 0)
1353 0 : PGSemaphoreUnlock(proc->sem);
1354 :
1355 356423598 : return result;
1356 : }
1357 :
1358 : /*
1359 : * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
1360 : *
1361 : * If the lock is not available, return false with no side-effects.
1362 : *
1363 : * If successful, cancel/die interrupts are held off until lock release.
1364 : */
1365 : bool
1366 3258300 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
1367 : {
1368 : bool mustwait;
1369 :
1370 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1371 :
1372 : PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
1373 :
1374 : /* Ensure we will have room to remember the lock */
1375 3258300 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1376 0 : elog(ERROR, "too many LWLocks taken");
1377 :
1378 : /*
1379 : * Lock out cancel/die interrupts until we exit the code section protected
1380 : * by the LWLock. This ensures that interrupts will not interfere with
1381 : * manipulations of data structures in shared memory.
1382 : */
1383 3258300 : HOLD_INTERRUPTS();
1384 :
1385 : /* Check for the lock */
1386 3258300 : mustwait = LWLockAttemptLock(lock, mode);
1387 :
1388 3258300 : if (mustwait)
1389 : {
1390 : /* Failed to get lock, so release interrupt holdoff */
1391 1396 : RESUME_INTERRUPTS();
1392 :
1393 : LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
1394 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
1395 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
1396 : }
1397 : else
1398 : {
1399 : /* Add lock to list of locks held by this backend */
1400 3256904 : held_lwlocks[num_held_lwlocks].lock = lock;
1401 3256904 : held_lwlocks[num_held_lwlocks++].mode = mode;
1402 : if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
1403 : TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
1404 : }
1405 3258300 : return !mustwait;
1406 : }
1407 :
1408 : /*
1409 : * LWLockAcquireOrWait - Acquire lock, or wait until it's free
1410 : *
1411 : * The semantics of this function are a bit funky. If the lock is currently
1412 : * free, it is acquired in the given mode, and the function returns true. If
1413 : * the lock isn't immediately free, the function waits until it is released
1414 : * and returns false, but does not acquire the lock.
1415 : *
1416 : * This is currently used for WALWriteLock: when a backend flushes the WAL,
1417 : * holding WALWriteLock, it can flush the commit records of many other
1418 : * backends as a side-effect. Those other backends need to wait until the
1419 : * flush finishes, but don't need to acquire the lock anymore. They can just
1420 : * wake up, observe that their records have already been flushed, and return.
1421 : */
1422 : bool
1423 194392 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
1424 : {
1425 194392 : PGPROC *proc = MyProc;
1426 : bool mustwait;
1427 194392 : int extraWaits = 0;
1428 : #ifdef LWLOCK_STATS
1429 : lwlock_stats *lwstats;
1430 :
1431 : lwstats = get_lwlock_stats_entry(lock);
1432 : #endif
1433 :
1434 : Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
1435 :
1436 : PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
1437 :
1438 : /* Ensure we will have room to remember the lock */
1439 194392 : if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
1440 0 : elog(ERROR, "too many LWLocks taken");
1441 :
1442 : /*
1443 : * Lock out cancel/die interrupts until we exit the code section protected
1444 : * by the LWLock. This ensures that interrupts will not interfere with
1445 : * manipulations of data structures in shared memory.
1446 : */
1447 194392 : HOLD_INTERRUPTS();
1448 :
1449 : /*
1450 : * NB: We're using nearly the same twice-in-a-row lock acquisition
1451 : * protocol as LWLockAcquire(). Check its comments for details.
1452 : */
1453 194392 : mustwait = LWLockAttemptLock(lock, mode);
1454 :
1455 194392 : if (mustwait)
1456 : {
1457 1838 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1458 :
1459 1838 : mustwait = LWLockAttemptLock(lock, mode);
1460 :
1461 1838 : if (mustwait)
1462 : {
1463 : /*
1464 : * Wait until awakened. Like in LWLockAcquire, be prepared for
1465 : * bogus wakeups.
1466 : */
1467 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
1468 :
1469 : #ifdef LWLOCK_STATS
1470 : lwstats->block_count++;
1471 : #endif
1472 :
1473 1816 : LWLockReportWaitStart(lock);
1474 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1475 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
1476 :
1477 : for (;;)
1478 : {
1479 1816 : PGSemaphoreLock(proc->sem);
1480 1816 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1481 1816 : break;
1482 0 : extraWaits++;
1483 : }
1484 :
1485 : #ifdef LOCK_DEBUG
1486 : {
1487 : /* not waiting anymore */
1488 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1489 :
1490 : Assert(nwaiters < MAX_BACKENDS);
1491 : }
1492 : #endif
1493 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1494 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
1495 1816 : LWLockReportWaitEnd();
1496 :
1497 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
1498 : }
1499 : else
1500 : {
1501 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
1502 :
1503 : /*
1504 : * Got lock in the second attempt, undo queueing. We need to treat
1505 : * this as having successfully acquired the lock, otherwise we'd
1506 : * not necessarily wake up people we've prevented from acquiring
1507 : * the lock.
1508 : */
1509 22 : LWLockDequeueSelf(lock);
1510 : }
1511 : }
1512 :
1513 : /*
1514 : * Fix the process wait semaphore's count for any absorbed wakeups.
1515 : */
1516 194392 : while (extraWaits-- > 0)
1517 0 : PGSemaphoreUnlock(proc->sem);
1518 :
1519 194392 : if (mustwait)
1520 : {
1521 : /* Failed to get lock, so release interrupt holdoff */
1522 1816 : RESUME_INTERRUPTS();
1523 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
1524 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
1525 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
1526 : }
1527 : else
1528 : {
1529 : LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
1530 : /* Add lock to list of locks held by this backend */
1531 192576 : held_lwlocks[num_held_lwlocks].lock = lock;
1532 192576 : held_lwlocks[num_held_lwlocks++].mode = mode;
1533 : if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
1534 : TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
1535 : }
1536 :
1537 194392 : return !mustwait;
1538 : }
1539 :
1540 : /*
1541 : * Does the lwlock in its current state need to wait for the variable value to
1542 : * change?
1543 : *
1544 : * If we don't need to wait, and it's because the value of the variable has
1545 : * changed, store the current value in newval.
1546 : *
1547 : * *result is set to true if the lock was free, and false otherwise.
1548 : */
1549 : static bool
1550 8595212 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1551 : uint64 *newval, bool *result)
1552 : {
1553 : bool mustwait;
1554 : uint64 value;
1555 :
1556 : /*
1557 : * Test first to see if it the slot is free right now.
1558 : *
1559 : * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
1560 : * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
1561 : * this, so we don't need a memory barrier here as far as the current
1562 : * usage is concerned. But that might not be safe in general.
1563 : */
1564 8595212 : mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1565 :
1566 8595212 : if (!mustwait)
1567 : {
1568 2747096 : *result = true;
1569 2747096 : return false;
1570 : }
1571 :
1572 5848116 : *result = false;
1573 :
1574 : /*
1575 : * Reading this value atomically is safe even on platforms where uint64
1576 : * cannot be read without observing a torn value.
1577 : */
1578 5848116 : value = pg_atomic_read_u64(valptr);
1579 :
1580 5848116 : if (value != oldval)
1581 : {
1582 5839018 : mustwait = false;
1583 5839018 : *newval = value;
1584 : }
1585 : else
1586 : {
1587 9098 : mustwait = true;
1588 : }
1589 :
1590 5848116 : return mustwait;
1591 : }
1592 :
1593 : /*
1594 : * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
1595 : *
1596 : * If the lock is held and *valptr equals oldval, waits until the lock is
1597 : * either freed, or the lock holder updates *valptr by calling
1598 : * LWLockUpdateVar. If the lock is free on exit (immediately or after
1599 : * waiting), returns true. If the lock is still held, but *valptr no longer
1600 : * matches oldval, returns false and sets *newval to the current value in
1601 : * *valptr.
1602 : *
1603 : * Note: this function ignores shared lock holders; if the lock is held
1604 : * in shared mode, returns 'true'.
1605 : *
1606 : * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
1607 : * hence the caller of this function may want to rely on an explicit barrier or
1608 : * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
1609 : */
1610 : bool
1611 8586114 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
1612 : uint64 *newval)
1613 : {
1614 8586114 : PGPROC *proc = MyProc;
1615 8586114 : int extraWaits = 0;
1616 8586114 : bool result = false;
1617 : #ifdef LWLOCK_STATS
1618 : lwlock_stats *lwstats;
1619 :
1620 : lwstats = get_lwlock_stats_entry(lock);
1621 : #endif
1622 :
1623 : PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
1624 :
1625 : /*
1626 : * Lock out cancel/die interrupts while we sleep on the lock. There is no
1627 : * cleanup mechanism to remove us from the wait queue if we got
1628 : * interrupted.
1629 : */
1630 8586114 : HOLD_INTERRUPTS();
1631 :
1632 : /*
1633 : * Loop here to check the lock's status after each time we are signaled.
1634 : */
1635 : for (;;)
1636 3768 : {
1637 : bool mustwait;
1638 :
1639 8589882 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1640 : &result);
1641 :
1642 8589882 : if (!mustwait)
1643 8584552 : break; /* the lock was free or value didn't match */
1644 :
1645 : /*
1646 : * Add myself to wait queue. Note that this is racy, somebody else
1647 : * could wakeup before we're finished queuing. NB: We're using nearly
1648 : * the same twice-in-a-row lock acquisition protocol as
1649 : * LWLockAcquire(). Check its comments for details. The only
1650 : * difference is that we also have to check the variable's values when
1651 : * checking the state of the lock.
1652 : */
1653 5330 : LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
1654 :
1655 : /*
1656 : * Set RELEASE_OK flag, to make sure we get woken up as soon as the
1657 : * lock is released.
1658 : */
1659 5330 : pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
1660 :
1661 : /*
1662 : * We're now guaranteed to be woken up if necessary. Recheck the lock
1663 : * and variables state.
1664 : */
1665 5330 : mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1666 : &result);
1667 :
1668 : /* Ok, no conflict after we queued ourselves. Undo queueing. */
1669 5330 : if (!mustwait)
1670 : {
1671 : LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
1672 :
1673 1562 : LWLockDequeueSelf(lock);
1674 1562 : break;
1675 : }
1676 :
1677 : /*
1678 : * Wait until awakened.
1679 : *
1680 : * It is possible that we get awakened for a reason other than being
1681 : * signaled by LWLockRelease. If so, loop back and wait again. Once
1682 : * we've gotten the LWLock, re-increment the sema by the number of
1683 : * additional signals received.
1684 : */
1685 : LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
1686 :
1687 : #ifdef LWLOCK_STATS
1688 : lwstats->block_count++;
1689 : #endif
1690 :
1691 3768 : LWLockReportWaitStart(lock);
1692 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
1693 : TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
1694 :
1695 : for (;;)
1696 : {
1697 3768 : PGSemaphoreLock(proc->sem);
1698 3768 : if (proc->lwWaiting == LW_WS_NOT_WAITING)
1699 3768 : break;
1700 0 : extraWaits++;
1701 : }
1702 :
1703 : #ifdef LOCK_DEBUG
1704 : {
1705 : /* not waiting anymore */
1706 : uint32 nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
1707 :
1708 : Assert(nwaiters < MAX_BACKENDS);
1709 : }
1710 : #endif
1711 :
1712 : if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
1713 : TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
1714 3768 : LWLockReportWaitEnd();
1715 :
1716 : LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
1717 :
1718 : /* Now loop back and check the status of the lock again. */
1719 : }
1720 :
1721 : /*
1722 : * Fix the process wait semaphore's count for any absorbed wakeups.
1723 : */
1724 8586114 : while (extraWaits-- > 0)
1725 0 : PGSemaphoreUnlock(proc->sem);
1726 :
1727 : /*
1728 : * Now okay to allow cancel/die interrupts.
1729 : */
1730 8586114 : RESUME_INTERRUPTS();
1731 :
1732 8586114 : return result;
1733 : }
1734 :
1735 :
1736 : /*
1737 : * LWLockUpdateVar - Update a variable and wake up waiters atomically
1738 : *
1739 : * Sets *valptr to 'val', and wakes up all processes waiting for us with
1740 : * LWLockWaitForVar(). It first sets the value atomically and then wakes up
1741 : * waiting processes so that any process calling LWLockWaitForVar() on the same
1742 : * lock is guaranteed to see the new value, and act accordingly.
1743 : *
1744 : * The caller must be holding the lock in exclusive mode.
1745 : */
1746 : void
1747 1249850 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1748 : {
1749 : proclist_head wakeup;
1750 : proclist_mutable_iter iter;
1751 :
1752 : PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
1753 :
1754 : /*
1755 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1756 : * that the variable is updated before waking up waiters.
1757 : */
1758 1249850 : pg_atomic_exchange_u64(valptr, val);
1759 :
1760 1249850 : proclist_init(&wakeup);
1761 :
1762 1249850 : LWLockWaitListLock(lock);
1763 :
1764 : Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
1765 :
1766 : /*
1767 : * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
1768 : * up. They are always in the front of the queue.
1769 : */
1770 1250098 : proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
1771 : {
1772 374 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1773 :
1774 374 : if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
1775 126 : break;
1776 :
1777 248 : proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
1778 248 : proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
1779 :
1780 : /* see LWLockWakeup() */
1781 : Assert(waiter->lwWaiting == LW_WS_WAITING);
1782 248 : waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
1783 : }
1784 :
1785 : /* We are done updating shared state of the lock itself. */
1786 1249850 : LWLockWaitListUnlock(lock);
1787 :
1788 : /*
1789 : * Awaken any waiters I removed from the queue.
1790 : */
1791 1250098 : proclist_foreach_modify(iter, &wakeup, lwWaitLink)
1792 : {
1793 248 : PGPROC *waiter = GetPGProcByNumber(iter.cur);
1794 :
1795 248 : proclist_delete(&wakeup, iter.cur, lwWaitLink);
1796 : /* check comment in LWLockWakeup() about this barrier */
1797 248 : pg_write_barrier();
1798 248 : waiter->lwWaiting = LW_WS_NOT_WAITING;
1799 248 : PGSemaphoreUnlock(waiter->sem);
1800 : }
1801 1249850 : }
1802 :
1803 :
1804 : /*
1805 : * LWLockRelease - release a previously acquired lock
1806 : */
1807 : void
1808 359873078 : LWLockRelease(LWLock *lock)
1809 : {
1810 : LWLockMode mode;
1811 : uint32 oldstate;
1812 : bool check_waiters;
1813 : int i;
1814 :
1815 : /*
1816 : * Remove lock from list of locks held. Usually, but not always, it will
1817 : * be the latest-acquired lock; so search array backwards.
1818 : */
1819 397300666 : for (i = num_held_lwlocks; --i >= 0;)
1820 397300666 : if (lock == held_lwlocks[i].lock)
1821 359873078 : break;
1822 :
1823 359873078 : if (i < 0)
1824 0 : elog(ERROR, "lock %s is not held", T_NAME(lock));
1825 :
1826 359873078 : mode = held_lwlocks[i].mode;
1827 :
1828 359873078 : num_held_lwlocks--;
1829 397300666 : for (; i < num_held_lwlocks; i++)
1830 37427588 : held_lwlocks[i] = held_lwlocks[i + 1];
1831 :
1832 : PRINT_LWDEBUG("LWLockRelease", lock, mode);
1833 :
1834 : /*
1835 : * Release my hold on lock, after that it can immediately be acquired by
1836 : * others, even if we still have to wakeup other waiters.
1837 : */
1838 359873078 : if (mode == LW_EXCLUSIVE)
1839 157153000 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
1840 : else
1841 202720078 : oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
1842 :
1843 : /* nobody else can have that kind of lock */
1844 : Assert(!(oldstate & LW_VAL_EXCLUSIVE));
1845 :
1846 : if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
1847 : TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
1848 :
1849 : /*
1850 : * We're still waiting for backends to get scheduled, don't wake them up
1851 : * again.
1852 : */
1853 359873078 : if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
1854 89800 : (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
1855 89800 : (oldstate & LW_LOCK_MASK) == 0)
1856 56932 : check_waiters = true;
1857 : else
1858 359816146 : check_waiters = false;
1859 :
1860 : /*
1861 : * As waking up waiters requires the spinlock to be acquired, only do so
1862 : * if necessary.
1863 : */
1864 359873078 : if (check_waiters)
1865 : {
1866 : /* XXX: remove before commit? */
1867 : LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
1868 56932 : LWLockWakeup(lock);
1869 : }
1870 :
1871 : /*
1872 : * Now okay to allow cancel/die interrupts.
1873 : */
1874 359873078 : RESUME_INTERRUPTS();
1875 359873078 : }
1876 :
1877 : /*
1878 : * LWLockReleaseClearVar - release a previously acquired lock, reset variable
1879 : */
1880 : void
1881 24718748 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
1882 : {
1883 : /*
1884 : * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
1885 : * that the variable is updated before releasing the lock.
1886 : */
1887 24718748 : pg_atomic_exchange_u64(valptr, val);
1888 :
1889 24718748 : LWLockRelease(lock);
1890 24718748 : }
1891 :
1892 :
1893 : /*
1894 : * LWLockReleaseAll - release all currently-held locks
1895 : *
1896 : * Used to clean up after ereport(ERROR). An important difference between this
1897 : * function and retail LWLockRelease calls is that InterruptHoldoffCount is
1898 : * unchanged by this operation. This is necessary since InterruptHoldoffCount
1899 : * has been set to an appropriate level earlier in error recovery. We could
1900 : * decrement it below zero if we allow it to drop for each released lock!
1901 : */
1902 : void
1903 82122 : LWLockReleaseAll(void)
1904 : {
1905 82504 : while (num_held_lwlocks > 0)
1906 : {
1907 382 : HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
1908 :
1909 382 : LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
1910 : }
1911 82122 : }
1912 :
1913 :
1914 : /*
1915 : * LWLockHeldByMe - test whether my process holds a lock in any mode
1916 : *
1917 : * This is meant as debug support only.
1918 : */
1919 : bool
1920 0 : LWLockHeldByMe(LWLock *lock)
1921 : {
1922 : int i;
1923 :
1924 0 : for (i = 0; i < num_held_lwlocks; i++)
1925 : {
1926 0 : if (held_lwlocks[i].lock == lock)
1927 0 : return true;
1928 : }
1929 0 : return false;
1930 : }
1931 :
1932 : /*
1933 : * LWLockHeldByMe - test whether my process holds any of an array of locks
1934 : *
1935 : * This is meant as debug support only.
1936 : */
1937 : bool
1938 0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
1939 : {
1940 : char *held_lock_addr;
1941 : char *begin;
1942 : char *end;
1943 : int i;
1944 :
1945 0 : begin = (char *) lock;
1946 0 : end = begin + nlocks * stride;
1947 0 : for (i = 0; i < num_held_lwlocks; i++)
1948 : {
1949 0 : held_lock_addr = (char *) held_lwlocks[i].lock;
1950 0 : if (held_lock_addr >= begin &&
1951 0 : held_lock_addr < end &&
1952 0 : (held_lock_addr - begin) % stride == 0)
1953 0 : return true;
1954 : }
1955 0 : return false;
1956 : }
1957 :
1958 : /*
1959 : * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
1960 : *
1961 : * This is meant as debug support only.
1962 : */
1963 : bool
1964 0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
1965 : {
1966 : int i;
1967 :
1968 0 : for (i = 0; i < num_held_lwlocks; i++)
1969 : {
1970 0 : if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
1971 0 : return true;
1972 : }
1973 0 : return false;
1974 : }
|