LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - lwlock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 348 408 85.3 %
Date: 2025-04-24 13:15:39 Functions: 31 37 83.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * lwlock.c
       4             :  *    Lightweight lock manager
       5             :  *
       6             :  * Lightweight locks are intended primarily to provide mutual exclusion of
       7             :  * access to shared-memory data structures.  Therefore, they offer both
       8             :  * exclusive and shared lock modes (to support read/write and read-only
       9             :  * access to a shared object).  There are few other frammishes.  User-level
      10             :  * locking should be done with the full lock manager --- which depends on
      11             :  * LWLocks to protect its shared state.
      12             :  *
      13             :  * In addition to exclusive and shared modes, lightweight locks can be used to
      14             :  * wait until a variable changes value.  The variable is initially not set
      15             :  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
      16             :  * value it was set to when the lock was released last, and can be updated
      17             :  * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar
      18             :  * waits for the variable to be updated, or until the lock is free.  When
      19             :  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
      20             :  * appropriate value for a free lock.  The meaning of the variable is up to
      21             :  * the caller, the lightweight lock code just assigns and compares it.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/storage/lmgr/lwlock.c
      28             :  *
      29             :  * NOTES:
      30             :  *
      31             :  * This used to be a pretty straight forward reader-writer lock
      32             :  * implementation, in which the internal state was protected by a
      33             :  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
      34             :  * too high for workloads/locks that were taken in shared mode very
      35             :  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
      36             :  * while trying to acquire a shared lock that was actually free.
      37             :  *
      38             :  * Thus a new implementation was devised that provides wait-free shared lock
      39             :  * acquisition for locks that aren't exclusively locked.
      40             :  *
      41             :  * The basic idea is to have a single atomic variable 'lockcount' instead of
      42             :  * the formerly separate shared and exclusive counters and to use atomic
      43             :  * operations to acquire the lock. That's fairly easy to do for plain
      44             :  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
      45             :  * in the OS.
      46             :  *
      47             :  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
      48             :  * variable. For exclusive lock we swap in a sentinel value
      49             :  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
      50             :  *
      51             :  * To release the lock we use an atomic decrement to release the lock. If the
      52             :  * new value is zero (we get that atomically), we know we can/have to release
      53             :  * waiters.
      54             :  *
      55             :  * Obviously it is important that the sentinel value for exclusive locks
      56             :  * doesn't conflict with the maximum number of possible share lockers -
      57             :  * luckily MAX_BACKENDS makes that easily possible.
      58             :  *
      59             :  *
      60             :  * The attentive reader might have noticed that naively doing the above has a
      61             :  * glaring race condition: We try to lock using the atomic operations and
      62             :  * notice that we have to wait. Unfortunately by the time we have finished
      63             :  * queuing, the former locker very well might have already finished its
      64             :  * work. That's problematic because we're now stuck waiting inside the OS.
      65             : 
      66             :  * To mitigate those races we use a two phased attempt at locking:
      67             :  *   Phase 1: Try to do it atomically, if we succeed, nice
      68             :  *   Phase 2: Add ourselves to the waitqueue of the lock
      69             :  *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
      70             :  *            the queue
      71             :  *   Phase 4: Sleep till wake-up, goto Phase 1
      72             :  *
      73             :  * This protects us against the problem from above as nobody can release too
      74             :  *    quick, before we're queued, since after Phase 2 we're already queued.
      75             :  * -------------------------------------------------------------------------
      76             :  */
      77             : #include "postgres.h"
      78             : 
      79             : #include "miscadmin.h"
      80             : #include "pg_trace.h"
      81             : #include "pgstat.h"
      82             : #include "port/pg_bitutils.h"
      83             : #include "storage/proc.h"
      84             : #include "storage/proclist.h"
      85             : #include "storage/procnumber.h"
      86             : #include "storage/spin.h"
      87             : #include "utils/memutils.h"
      88             : 
      89             : #ifdef LWLOCK_STATS
      90             : #include "utils/hsearch.h"
      91             : #endif
      92             : 
      93             : 
      94             : #define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 31)
      95             : #define LW_FLAG_RELEASE_OK          ((uint32) 1 << 30)
      96             : #define LW_FLAG_LOCKED              ((uint32) 1 << 29)
      97             : #define LW_FLAG_BITS                3
      98             : #define LW_FLAG_MASK                (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
      99             : 
     100             : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
     101             : #define LW_VAL_EXCLUSIVE            (MAX_BACKENDS + 1)
     102             : #define LW_VAL_SHARED               1
     103             : 
     104             : /* already (power of 2)-1, i.e. suitable for a mask */
     105             : #define LW_SHARED_MASK              MAX_BACKENDS
     106             : #define LW_LOCK_MASK                (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
     107             : 
     108             : 
     109             : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
     110             :                  "MAX_BACKENDS + 1 needs to be a power of 2");
     111             : 
     112             : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
     113             :                  "MAX_BACKENDS and LW_FLAG_MASK overlap");
     114             : 
     115             : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
     116             :                  "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
     117             : 
     118             : /*
     119             :  * There are three sorts of LWLock "tranches":
     120             :  *
     121             :  * 1. The individually-named locks defined in lwlocklist.h each have their
     122             :  * own tranche.  We absorb the names of these tranches from there into
     123             :  * BuiltinTrancheNames here.
     124             :  *
     125             :  * 2. There are some predefined tranches for built-in groups of locks.
     126             :  * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
     127             :  * appear in BuiltinTrancheNames[] below.
     128             :  *
     129             :  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
     130             :  * or LWLockRegisterTranche.  The names of these that are known in the current
     131             :  * process appear in LWLockTrancheNames[].
     132             :  *
     133             :  * All these names are user-visible as wait event names, so choose with care
     134             :  * ... and do not forget to update the documentation's list of wait events.
     135             :  */
     136             : static const char *const BuiltinTrancheNames[] = {
     137             : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
     138             : #include "storage/lwlocklist.h"
     139             : #undef PG_LWLOCK
     140             :     [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
     141             :     [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
     142             :     [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
     143             :     [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
     144             :     [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
     145             :     [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
     146             :     [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
     147             :     [LWTRANCHE_WAL_INSERT] = "WALInsert",
     148             :     [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
     149             :     [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
     150             :     [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
     151             :     [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
     152             :     [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
     153             :     [LWTRANCHE_LOCK_MANAGER] = "LockManager",
     154             :     [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
     155             :     [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
     156             :     [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
     157             :     [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
     158             :     [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
     159             :     [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
     160             :     [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
     161             :     [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
     162             :     [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
     163             :     [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
     164             :     [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
     165             :     [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
     166             :     [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
     167             :     [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
     168             :     [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
     169             :     [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
     170             :     [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
     171             :     [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
     172             :     [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
     173             :     [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
     174             :     [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
     175             :     [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
     176             :     [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
     177             :     [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
     178             :     [LWTRANCHE_XACT_SLRU] = "XactSLRU",
     179             :     [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
     180             :     [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
     181             :     [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
     182             :     [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
     183             : };
     184             : 
     185             : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
     186             :                  LWTRANCHE_FIRST_USER_DEFINED,
     187             :                  "missing entries in BuiltinTrancheNames[]");
     188             : 
     189             : /*
     190             :  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
     191             :  * stores the names of all dynamically-created tranches known to the current
     192             :  * process.  Any unused entries in the array will contain NULL.
     193             :  */
     194             : static const char **LWLockTrancheNames = NULL;
     195             : static int  LWLockTrancheNamesAllocated = 0;
     196             : 
     197             : /*
     198             :  * This points to the main array of LWLocks in shared memory.  Backends inherit
     199             :  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
     200             :  * where we have special measures to pass it down).
     201             :  */
     202             : LWLockPadded *MainLWLockArray = NULL;
     203             : 
     204             : /*
     205             :  * We use this structure to keep track of locked LWLocks for release
     206             :  * during error recovery.  Normally, only a few will be held at once, but
     207             :  * occasionally the number can be much higher; for example, the pg_buffercache
     208             :  * extension locks all buffer partitions simultaneously.
     209             :  */
     210             : #define MAX_SIMUL_LWLOCKS   200
     211             : 
     212             : /* struct representing the LWLocks we're holding */
     213             : typedef struct LWLockHandle
     214             : {
     215             :     LWLock     *lock;
     216             :     LWLockMode  mode;
     217             : } LWLockHandle;
     218             : 
     219             : static int  num_held_lwlocks = 0;
     220             : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
     221             : 
     222             : /* struct representing the LWLock tranche request for named tranche */
     223             : typedef struct NamedLWLockTrancheRequest
     224             : {
     225             :     char        tranche_name[NAMEDATALEN];
     226             :     int         num_lwlocks;
     227             : } NamedLWLockTrancheRequest;
     228             : 
     229             : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
     230             : static int  NamedLWLockTrancheRequestsAllocated = 0;
     231             : 
     232             : /*
     233             :  * NamedLWLockTrancheRequests is both the valid length of the request array,
     234             :  * and the length of the shared-memory NamedLWLockTrancheArray later on.
     235             :  * This variable and NamedLWLockTrancheArray are non-static so that
     236             :  * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
     237             :  */
     238             : int         NamedLWLockTrancheRequests = 0;
     239             : 
     240             : /* points to data in shared memory: */
     241             : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
     242             : 
     243             : static void InitializeLWLocks(void);
     244             : static inline void LWLockReportWaitStart(LWLock *lock);
     245             : static inline void LWLockReportWaitEnd(void);
     246             : static const char *GetLWTrancheName(uint16 trancheId);
     247             : 
     248             : #define T_NAME(lock) \
     249             :     GetLWTrancheName((lock)->tranche)
     250             : 
     251             : #ifdef LWLOCK_STATS
     252             : typedef struct lwlock_stats_key
     253             : {
     254             :     int         tranche;
     255             :     void       *instance;
     256             : }           lwlock_stats_key;
     257             : 
     258             : typedef struct lwlock_stats
     259             : {
     260             :     lwlock_stats_key key;
     261             :     int         sh_acquire_count;
     262             :     int         ex_acquire_count;
     263             :     int         block_count;
     264             :     int         dequeue_self_count;
     265             :     int         spin_delay_count;
     266             : }           lwlock_stats;
     267             : 
     268             : static HTAB *lwlock_stats_htab;
     269             : static lwlock_stats lwlock_stats_dummy;
     270             : #endif
     271             : 
     272             : #ifdef LOCK_DEBUG
     273             : bool        Trace_lwlocks = false;
     274             : 
     275             : inline static void
     276             : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
     277             : {
     278             :     /* hide statement & context here, otherwise the log is just too verbose */
     279             :     if (Trace_lwlocks)
     280             :     {
     281             :         uint32      state = pg_atomic_read_u32(&lock->state);
     282             : 
     283             :         ereport(LOG,
     284             :                 (errhidestmt(true),
     285             :                  errhidecontext(true),
     286             :                  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
     287             :                                  MyProcPid,
     288             :                                  where, T_NAME(lock), lock,
     289             :                                  (state & LW_VAL_EXCLUSIVE) != 0,
     290             :                                  state & LW_SHARED_MASK,
     291             :                                  (state & LW_FLAG_HAS_WAITERS) != 0,
     292             :                                  pg_atomic_read_u32(&lock->nwaiters),
     293             :                                  (state & LW_FLAG_RELEASE_OK) != 0)));
     294             :     }
     295             : }
     296             : 
     297             : inline static void
     298             : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
     299             : {
     300             :     /* hide statement & context here, otherwise the log is just too verbose */
     301             :     if (Trace_lwlocks)
     302             :     {
     303             :         ereport(LOG,
     304             :                 (errhidestmt(true),
     305             :                  errhidecontext(true),
     306             :                  errmsg_internal("%s(%s %p): %s", where,
     307             :                                  T_NAME(lock), lock, msg)));
     308             :     }
     309             : }
     310             : 
     311             : #else                           /* not LOCK_DEBUG */
     312             : #define PRINT_LWDEBUG(a,b,c) ((void)0)
     313             : #define LOG_LWDEBUG(a,b,c) ((void)0)
     314             : #endif                          /* LOCK_DEBUG */
     315             : 
     316             : #ifdef LWLOCK_STATS
     317             : 
     318             : static void init_lwlock_stats(void);
     319             : static void print_lwlock_stats(int code, Datum arg);
     320             : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
     321             : 
     322             : static void
     323             : init_lwlock_stats(void)
     324             : {
     325             :     HASHCTL     ctl;
     326             :     static MemoryContext lwlock_stats_cxt = NULL;
     327             :     static bool exit_registered = false;
     328             : 
     329             :     if (lwlock_stats_cxt != NULL)
     330             :         MemoryContextDelete(lwlock_stats_cxt);
     331             : 
     332             :     /*
     333             :      * The LWLock stats will be updated within a critical section, which
     334             :      * requires allocating new hash entries. Allocations within a critical
     335             :      * section are normally not allowed because running out of memory would
     336             :      * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
     337             :      * turned on in production, so that's an acceptable risk. The hash entries
     338             :      * are small, so the risk of running out of memory is minimal in practice.
     339             :      */
     340             :     lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
     341             :                                              "LWLock stats",
     342             :                                              ALLOCSET_DEFAULT_SIZES);
     343             :     MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
     344             : 
     345             :     ctl.keysize = sizeof(lwlock_stats_key);
     346             :     ctl.entrysize = sizeof(lwlock_stats);
     347             :     ctl.hcxt = lwlock_stats_cxt;
     348             :     lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
     349             :                                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     350             :     if (!exit_registered)
     351             :     {
     352             :         on_shmem_exit(print_lwlock_stats, 0);
     353             :         exit_registered = true;
     354             :     }
     355             : }
     356             : 
     357             : static void
     358             : print_lwlock_stats(int code, Datum arg)
     359             : {
     360             :     HASH_SEQ_STATUS scan;
     361             :     lwlock_stats *lwstats;
     362             : 
     363             :     hash_seq_init(&scan, lwlock_stats_htab);
     364             : 
     365             :     /* Grab an LWLock to keep different backends from mixing reports */
     366             :     LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
     367             : 
     368             :     while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
     369             :     {
     370             :         fprintf(stderr,
     371             :                 "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
     372             :                 MyProcPid, GetLWTrancheName(lwstats->key.tranche),
     373             :                 lwstats->key.instance, lwstats->sh_acquire_count,
     374             :                 lwstats->ex_acquire_count, lwstats->block_count,
     375             :                 lwstats->spin_delay_count, lwstats->dequeue_self_count);
     376             :     }
     377             : 
     378             :     LWLockRelease(&MainLWLockArray[0].lock);
     379             : }
     380             : 
     381             : static lwlock_stats *
     382             : get_lwlock_stats_entry(LWLock *lock)
     383             : {
     384             :     lwlock_stats_key key;
     385             :     lwlock_stats *lwstats;
     386             :     bool        found;
     387             : 
     388             :     /*
     389             :      * During shared memory initialization, the hash table doesn't exist yet.
     390             :      * Stats of that phase aren't very interesting, so just collect operations
     391             :      * on all locks in a single dummy entry.
     392             :      */
     393             :     if (lwlock_stats_htab == NULL)
     394             :         return &lwlock_stats_dummy;
     395             : 
     396             :     /* Fetch or create the entry. */
     397             :     MemSet(&key, 0, sizeof(key));
     398             :     key.tranche = lock->tranche;
     399             :     key.instance = lock;
     400             :     lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
     401             :     if (!found)
     402             :     {
     403             :         lwstats->sh_acquire_count = 0;
     404             :         lwstats->ex_acquire_count = 0;
     405             :         lwstats->block_count = 0;
     406             :         lwstats->dequeue_self_count = 0;
     407             :         lwstats->spin_delay_count = 0;
     408             :     }
     409             :     return lwstats;
     410             : }
     411             : #endif                          /* LWLOCK_STATS */
     412             : 
     413             : 
     414             : /*
     415             :  * Compute number of LWLocks required by named tranches.  These will be
     416             :  * allocated in the main array.
     417             :  */
     418             : static int
     419        8106 : NumLWLocksForNamedTranches(void)
     420             : {
     421        8106 :     int         numLocks = 0;
     422             :     int         i;
     423             : 
     424        8162 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     425          56 :         numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     426             : 
     427        8106 :     return numLocks;
     428             : }
     429             : 
     430             : /*
     431             :  * Compute shmem space needed for LWLocks and named tranches.
     432             :  */
     433             : Size
     434        6006 : LWLockShmemSize(void)
     435             : {
     436             :     Size        size;
     437             :     int         i;
     438        6006 :     int         numLocks = NUM_FIXED_LWLOCKS;
     439             : 
     440             :     /* Calculate total number of locks needed in the main array. */
     441        6006 :     numLocks += NumLWLocksForNamedTranches();
     442             : 
     443             :     /* Space for the LWLock array. */
     444        6006 :     size = mul_size(numLocks, sizeof(LWLockPadded));
     445             : 
     446             :     /* Space for dynamic allocation counter, plus room for alignment. */
     447        6006 :     size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
     448             : 
     449             :     /* space for named tranches. */
     450        6006 :     size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
     451             : 
     452             :     /* space for name of each tranche. */
     453        6048 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     454          42 :         size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
     455             : 
     456        6006 :     return size;
     457             : }
     458             : 
     459             : /*
     460             :  * Allocate shmem space for the main LWLock array and all tranches and
     461             :  * initialize it.  We also register extension LWLock tranches here.
     462             :  */
     463             : void
     464        2100 : CreateLWLocks(void)
     465             : {
     466        2100 :     if (!IsUnderPostmaster)
     467             :     {
     468        2100 :         Size        spaceLocks = LWLockShmemSize();
     469             :         int        *LWLockCounter;
     470             :         char       *ptr;
     471             : 
     472             :         /* Allocate space */
     473        2100 :         ptr = (char *) ShmemAlloc(spaceLocks);
     474             : 
     475             :         /* Leave room for dynamic allocation of tranches */
     476        2100 :         ptr += sizeof(int);
     477             : 
     478             :         /* Ensure desired alignment of LWLock array */
     479        2100 :         ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
     480             : 
     481        2100 :         MainLWLockArray = (LWLockPadded *) ptr;
     482             : 
     483             :         /*
     484             :          * Initialize the dynamic-allocation counter for tranches, which is
     485             :          * stored just before the first LWLock.
     486             :          */
     487        2100 :         LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     488        2100 :         *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
     489             : 
     490             :         /* Initialize all LWLocks */
     491        2100 :         InitializeLWLocks();
     492             :     }
     493             : 
     494             :     /* Register named extension LWLock tranches in the current process. */
     495        2114 :     for (int i = 0; i < NamedLWLockTrancheRequests; i++)
     496          14 :         LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
     497          14 :                               NamedLWLockTrancheArray[i].trancheName);
     498        2100 : }
     499             : 
     500             : /*
     501             :  * Initialize LWLocks that are fixed and those belonging to named tranches.
     502             :  */
     503             : static void
     504        2100 : InitializeLWLocks(void)
     505             : {
     506        2100 :     int         numNamedLocks = NumLWLocksForNamedTranches();
     507             :     int         id;
     508             :     int         i;
     509             :     int         j;
     510             :     LWLockPadded *lock;
     511             : 
     512             :     /* Initialize all individual LWLocks in main array */
     513      115500 :     for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
     514      113400 :         LWLockInitialize(&lock->lock, id);
     515             : 
     516             :     /* Initialize buffer mapping LWLocks in main array */
     517        2100 :     lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
     518      270900 :     for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
     519      268800 :         LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
     520             : 
     521             :     /* Initialize lmgrs' LWLocks in main array */
     522        2100 :     lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
     523       35700 :     for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
     524       33600 :         LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
     525             : 
     526             :     /* Initialize predicate lmgrs' LWLocks in main array */
     527        2100 :     lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
     528       35700 :     for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
     529       33600 :         LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
     530             : 
     531             :     /*
     532             :      * Copy the info about any named tranches into shared memory (so that
     533             :      * other processes can see it), and initialize the requested LWLocks.
     534             :      */
     535        2100 :     if (NamedLWLockTrancheRequests > 0)
     536             :     {
     537             :         char       *trancheNames;
     538             : 
     539          14 :         NamedLWLockTrancheArray = (NamedLWLockTranche *)
     540          14 :             &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
     541             : 
     542          14 :         trancheNames = (char *) NamedLWLockTrancheArray +
     543          14 :             (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
     544          14 :         lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
     545             : 
     546          28 :         for (i = 0; i < NamedLWLockTrancheRequests; i++)
     547             :         {
     548             :             NamedLWLockTrancheRequest *request;
     549             :             NamedLWLockTranche *tranche;
     550             :             char       *name;
     551             : 
     552          14 :             request = &NamedLWLockTrancheRequestArray[i];
     553          14 :             tranche = &NamedLWLockTrancheArray[i];
     554             : 
     555          14 :             name = trancheNames;
     556          14 :             trancheNames += strlen(request->tranche_name) + 1;
     557          14 :             strcpy(name, request->tranche_name);
     558          14 :             tranche->trancheId = LWLockNewTrancheId();
     559          14 :             tranche->trancheName = name;
     560             : 
     561          28 :             for (j = 0; j < request->num_lwlocks; j++, lock++)
     562          14 :                 LWLockInitialize(&lock->lock, tranche->trancheId);
     563             :         }
     564             :     }
     565        2100 : }
     566             : 
     567             : /*
     568             :  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
     569             :  */
     570             : void
     571       43174 : InitLWLockAccess(void)
     572             : {
     573             : #ifdef LWLOCK_STATS
     574             :     init_lwlock_stats();
     575             : #endif
     576       43174 : }
     577             : 
     578             : /*
     579             :  * GetNamedLWLockTranche - returns the base address of LWLock from the
     580             :  *      specified tranche.
     581             :  *
     582             :  * Caller needs to retrieve the requested number of LWLocks starting from
     583             :  * the base lock address returned by this API.  This can be used for
     584             :  * tranches that are requested by using RequestNamedLWLockTranche() API.
     585             :  */
     586             : LWLockPadded *
     587          14 : GetNamedLWLockTranche(const char *tranche_name)
     588             : {
     589             :     int         lock_pos;
     590             :     int         i;
     591             : 
     592             :     /*
     593             :      * Obtain the position of base address of LWLock belonging to requested
     594             :      * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed
     595             :      * in MainLWLockArray after fixed locks.
     596             :      */
     597          14 :     lock_pos = NUM_FIXED_LWLOCKS;
     598          14 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     599             :     {
     600          14 :         if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
     601             :                    tranche_name) == 0)
     602          14 :             return &MainLWLockArray[lock_pos];
     603             : 
     604           0 :         lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     605             :     }
     606             : 
     607           0 :     elog(ERROR, "requested tranche is not registered");
     608             : 
     609             :     /* just to keep compiler quiet */
     610             :     return NULL;
     611             : }
     612             : 
     613             : /*
     614             :  * Allocate a new tranche ID.
     615             :  */
     616             : int
     617          34 : LWLockNewTrancheId(void)
     618             : {
     619             :     int         result;
     620             :     int        *LWLockCounter;
     621             : 
     622          34 :     LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     623             :     /* We use the ShmemLock spinlock to protect LWLockCounter */
     624          34 :     SpinLockAcquire(ShmemLock);
     625          34 :     result = (*LWLockCounter)++;
     626          34 :     SpinLockRelease(ShmemLock);
     627             : 
     628          34 :     return result;
     629             : }
     630             : 
     631             : /*
     632             :  * Register a dynamic tranche name in the lookup table of the current process.
     633             :  *
     634             :  * This routine will save a pointer to the tranche name passed as an argument,
     635             :  * so the name should be allocated in a backend-lifetime context
     636             :  * (shared memory, TopMemoryContext, static constant, or similar).
     637             :  *
     638             :  * The tranche name will be user-visible as a wait event name, so try to
     639             :  * use a name that fits the style for those.
     640             :  */
     641             : void
     642          40 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
     643             : {
     644             :     /* This should only be called for user-defined tranches. */
     645          40 :     if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
     646           0 :         return;
     647             : 
     648             :     /* Convert to array index. */
     649          40 :     tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
     650             : 
     651             :     /* If necessary, create or enlarge array. */
     652          40 :     if (tranche_id >= LWLockTrancheNamesAllocated)
     653             :     {
     654             :         int         newalloc;
     655             : 
     656          34 :         newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
     657             : 
     658          34 :         if (LWLockTrancheNames == NULL)
     659          34 :             LWLockTrancheNames = (const char **)
     660          34 :                 MemoryContextAllocZero(TopMemoryContext,
     661             :                                        newalloc * sizeof(char *));
     662             :         else
     663           0 :             LWLockTrancheNames =
     664           0 :                 repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
     665          34 :         LWLockTrancheNamesAllocated = newalloc;
     666             :     }
     667             : 
     668          40 :     LWLockTrancheNames[tranche_id] = tranche_name;
     669             : }
     670             : 
     671             : /*
     672             :  * RequestNamedLWLockTranche
     673             :  *      Request that extra LWLocks be allocated during postmaster
     674             :  *      startup.
     675             :  *
     676             :  * This may only be called via the shmem_request_hook of a library that is
     677             :  * loaded into the postmaster via shared_preload_libraries.  Calls from
     678             :  * elsewhere will fail.
     679             :  *
     680             :  * The tranche name will be user-visible as a wait event name, so try to
     681             :  * use a name that fits the style for those.
     682             :  */
     683             : void
     684          14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
     685             : {
     686             :     NamedLWLockTrancheRequest *request;
     687             : 
     688          14 :     if (!process_shmem_requests_in_progress)
     689           0 :         elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
     690             : 
     691          14 :     if (NamedLWLockTrancheRequestArray == NULL)
     692             :     {
     693          14 :         NamedLWLockTrancheRequestsAllocated = 16;
     694          14 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     695          14 :             MemoryContextAlloc(TopMemoryContext,
     696             :                                NamedLWLockTrancheRequestsAllocated
     697             :                                * sizeof(NamedLWLockTrancheRequest));
     698             :     }
     699             : 
     700          14 :     if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
     701             :     {
     702           0 :         int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
     703             : 
     704           0 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     705           0 :             repalloc(NamedLWLockTrancheRequestArray,
     706             :                      i * sizeof(NamedLWLockTrancheRequest));
     707           0 :         NamedLWLockTrancheRequestsAllocated = i;
     708             :     }
     709             : 
     710          14 :     request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
     711             :     Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
     712          14 :     strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
     713          14 :     request->num_lwlocks = num_lwlocks;
     714          14 :     NamedLWLockTrancheRequests++;
     715          14 : }
     716             : 
     717             : /*
     718             :  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
     719             :  */
     720             : void
     721    24771240 : LWLockInitialize(LWLock *lock, int tranche_id)
     722             : {
     723    24771240 :     pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
     724             : #ifdef LOCK_DEBUG
     725             :     pg_atomic_init_u32(&lock->nwaiters, 0);
     726             : #endif
     727    24771240 :     lock->tranche = tranche_id;
     728    24771240 :     proclist_init(&lock->waiters);
     729    24771240 : }
     730             : 
     731             : /*
     732             :  * Report start of wait event for light-weight locks.
     733             :  *
     734             :  * This function will be used by all the light-weight lock calls which
     735             :  * needs to wait to acquire the lock.  This function distinguishes wait
     736             :  * event based on tranche and lock id.
     737             :  */
     738             : static inline void
     739     4492510 : LWLockReportWaitStart(LWLock *lock)
     740             : {
     741     4492510 :     pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
     742     4492510 : }
     743             : 
     744             : /*
     745             :  * Report end of wait event for light-weight locks.
     746             :  */
     747             : static inline void
     748     4492510 : LWLockReportWaitEnd(void)
     749             : {
     750     4492510 :     pgstat_report_wait_end();
     751     4492510 : }
     752             : 
     753             : /*
     754             :  * Return the name of an LWLock tranche.
     755             :  */
     756             : static const char *
     757          52 : GetLWTrancheName(uint16 trancheId)
     758             : {
     759             :     /* Built-in tranche or individual LWLock? */
     760          52 :     if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
     761          52 :         return BuiltinTrancheNames[trancheId];
     762             : 
     763             :     /*
     764             :      * It's an extension tranche, so look in LWLockTrancheNames[].  However,
     765             :      * it's possible that the tranche has never been registered in the current
     766             :      * process, in which case give up and return "extension".
     767             :      */
     768           0 :     trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
     769             : 
     770           0 :     if (trancheId >= LWLockTrancheNamesAllocated ||
     771           0 :         LWLockTrancheNames[trancheId] == NULL)
     772           0 :         return "extension";
     773             : 
     774           0 :     return LWLockTrancheNames[trancheId];
     775             : }
     776             : 
     777             : /*
     778             :  * Return an identifier for an LWLock based on the wait class and event.
     779             :  */
     780             : const char *
     781          52 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
     782             : {
     783             :     Assert(classId == PG_WAIT_LWLOCK);
     784             :     /* The event IDs are just tranche numbers. */
     785          52 :     return GetLWTrancheName(eventId);
     786             : }
     787             : 
     788             : /*
     789             :  * Internal function that tries to atomically acquire the lwlock in the passed
     790             :  * in mode.
     791             :  *
     792             :  * This function will not block waiting for a lock to become free - that's the
     793             :  * caller's job.
     794             :  *
     795             :  * Returns true if the lock isn't free and we need to wait.
     796             :  */
     797             : static bool
     798   726922502 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
     799             : {
     800             :     uint32      old_state;
     801             : 
     802             :     Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
     803             : 
     804             :     /*
     805             :      * Read once outside the loop, later iterations will get the newer value
     806             :      * via compare & exchange.
     807             :      */
     808   726922502 :     old_state = pg_atomic_read_u32(&lock->state);
     809             : 
     810             :     /* loop until we've determined whether we could acquire the lock or not */
     811             :     while (true)
     812      429162 :     {
     813             :         uint32      desired_state;
     814             :         bool        lock_free;
     815             : 
     816   727351664 :         desired_state = old_state;
     817             : 
     818   727351664 :         if (mode == LW_EXCLUSIVE)
     819             :         {
     820   447559460 :             lock_free = (old_state & LW_LOCK_MASK) == 0;
     821   447559460 :             if (lock_free)
     822   444967084 :                 desired_state += LW_VAL_EXCLUSIVE;
     823             :         }
     824             :         else
     825             :         {
     826   279792204 :             lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
     827   279792204 :             if (lock_free)
     828   273258788 :                 desired_state += LW_VAL_SHARED;
     829             :         }
     830             : 
     831             :         /*
     832             :          * Attempt to swap in the state we are expecting. If we didn't see
     833             :          * lock to be free, that's just the old value. If we saw it as free,
     834             :          * we'll attempt to mark it acquired. The reason that we always swap
     835             :          * in the value is that this doubles as a memory barrier. We could try
     836             :          * to be smarter and only swap in values if we saw the lock as free,
     837             :          * but benchmark haven't shown it as beneficial so far.
     838             :          *
     839             :          * Retry if the value changed since we last looked at it.
     840             :          */
     841   727351664 :         if (pg_atomic_compare_exchange_u32(&lock->state,
     842             :                                            &old_state, desired_state))
     843             :         {
     844   726922502 :             if (lock_free)
     845             :             {
     846             :                 /* Great! Got the lock. */
     847             : #ifdef LOCK_DEBUG
     848             :                 if (mode == LW_EXCLUSIVE)
     849             :                     lock->owner = MyProc;
     850             : #endif
     851   717917186 :                 return false;
     852             :             }
     853             :             else
     854     9005316 :                 return true;    /* somebody else has the lock */
     855             :         }
     856             :     }
     857             :     pg_unreachable();
     858             : }
     859             : 
     860             : /*
     861             :  * Lock the LWLock's wait list against concurrent activity.
     862             :  *
     863             :  * NB: even though the wait list is locked, non-conflicting lock operations
     864             :  * may still happen concurrently.
     865             :  *
     866             :  * Time spent holding mutex should be short!
     867             :  */
     868             : static void
     869    14821206 : LWLockWaitListLock(LWLock *lock)
     870             : {
     871             :     uint32      old_state;
     872             : #ifdef LWLOCK_STATS
     873             :     lwlock_stats *lwstats;
     874             :     uint32      delays = 0;
     875             : 
     876             :     lwstats = get_lwlock_stats_entry(lock);
     877             : #endif
     878             : 
     879             :     while (true)
     880             :     {
     881             :         /* always try once to acquire lock directly */
     882    14821206 :         old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
     883    14821206 :         if (!(old_state & LW_FLAG_LOCKED))
     884    14717038 :             break;              /* got lock */
     885             : 
     886             :         /* and then spin without atomic operations until lock is released */
     887             :         {
     888             :             SpinDelayStatus delayStatus;
     889             : 
     890      104168 :             init_local_spin_delay(&delayStatus);
     891             : 
     892      335284 :             while (old_state & LW_FLAG_LOCKED)
     893             :             {
     894      231116 :                 perform_spin_delay(&delayStatus);
     895      231116 :                 old_state = pg_atomic_read_u32(&lock->state);
     896             :             }
     897             : #ifdef LWLOCK_STATS
     898             :             delays += delayStatus.delays;
     899             : #endif
     900      104168 :             finish_spin_delay(&delayStatus);
     901             :         }
     902             : 
     903             :         /*
     904             :          * Retry. The lock might obviously already be re-acquired by the time
     905             :          * we're attempting to get it again.
     906             :          */
     907             :     }
     908             : 
     909             : #ifdef LWLOCK_STATS
     910             :     lwstats->spin_delay_count += delays;
     911             : #endif
     912    14717038 : }
     913             : 
     914             : /*
     915             :  * Unlock the LWLock's wait list.
     916             :  *
     917             :  * Note that it can be more efficient to manipulate flags and release the
     918             :  * locks in a single atomic operation.
     919             :  */
     920             : static void
     921     9828202 : LWLockWaitListUnlock(LWLock *lock)
     922             : {
     923             :     uint32      old_state PG_USED_FOR_ASSERTS_ONLY;
     924             : 
     925     9828202 :     old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
     926             : 
     927             :     Assert(old_state & LW_FLAG_LOCKED);
     928     9828202 : }
     929             : 
     930             : /*
     931             :  * Wakeup all the lockers that currently have a chance to acquire the lock.
     932             :  */
     933             : static void
     934     4888836 : LWLockWakeup(LWLock *lock)
     935             : {
     936             :     bool        new_release_ok;
     937     4888836 :     bool        wokeup_somebody = false;
     938             :     proclist_head wakeup;
     939             :     proclist_mutable_iter iter;
     940             : 
     941     4888836 :     proclist_init(&wakeup);
     942             : 
     943     4888836 :     new_release_ok = true;
     944             : 
     945             :     /* lock wait list while collecting backends to wake up */
     946     4888836 :     LWLockWaitListLock(lock);
     947             : 
     948     8185596 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
     949             :     {
     950     4524744 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     951             : 
     952     4524744 :         if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
     953        4350 :             continue;
     954             : 
     955     4520394 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
     956     4520394 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
     957             : 
     958     4520394 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
     959             :         {
     960             :             /*
     961             :              * Prevent additional wakeups until retryer gets to run. Backends
     962             :              * that are just waiting for the lock to become free don't retry
     963             :              * automatically.
     964             :              */
     965     4454988 :             new_release_ok = false;
     966             : 
     967             :             /*
     968             :              * Don't wakeup (further) exclusive locks.
     969             :              */
     970     4454988 :             wokeup_somebody = true;
     971             :         }
     972             : 
     973             :         /*
     974             :          * Signal that the process isn't on the wait list anymore. This allows
     975             :          * LWLockDequeueSelf() to remove itself of the waitlist with a
     976             :          * proclist_delete(), rather than having to check if it has been
     977             :          * removed from the list.
     978             :          */
     979             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
     980     4520394 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
     981             : 
     982             :         /*
     983             :          * Once we've woken up an exclusive lock, there's no point in waking
     984             :          * up anybody else.
     985             :          */
     986     4520394 :         if (waiter->lwWaitMode == LW_EXCLUSIVE)
     987     1227984 :             break;
     988             :     }
     989             : 
     990             :     Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
     991             : 
     992             :     /* unset required flags, and release lock, in one fell swoop */
     993             :     {
     994             :         uint32      old_state;
     995             :         uint32      desired_state;
     996             : 
     997     4888836 :         old_state = pg_atomic_read_u32(&lock->state);
     998             :         while (true)
     999             :         {
    1000     4929460 :             desired_state = old_state;
    1001             : 
    1002             :             /* compute desired flags */
    1003             : 
    1004     4929460 :             if (new_release_ok)
    1005      485392 :                 desired_state |= LW_FLAG_RELEASE_OK;
    1006             :             else
    1007     4444068 :                 desired_state &= ~LW_FLAG_RELEASE_OK;
    1008             : 
    1009     4929460 :             if (proclist_is_empty(&wakeup))
    1010      448786 :                 desired_state &= ~LW_FLAG_HAS_WAITERS;
    1011             : 
    1012     4929460 :             desired_state &= ~LW_FLAG_LOCKED;   /* release lock */
    1013             : 
    1014     4929460 :             if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
    1015             :                                                desired_state))
    1016     4888836 :                 break;
    1017             :         }
    1018             :     }
    1019             : 
    1020             :     /* Awaken any waiters I removed from the queue. */
    1021     9409230 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1022             :     {
    1023     4520394 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1024             : 
    1025             :         LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
    1026     4520394 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1027             : 
    1028             :         /*
    1029             :          * Guarantee that lwWaiting being unset only becomes visible once the
    1030             :          * unlink from the link has completed. Otherwise the target backend
    1031             :          * could be woken up for other reason and enqueue for a new lock - if
    1032             :          * that happens before the list unlink happens, the list would end up
    1033             :          * being corrupted.
    1034             :          *
    1035             :          * The barrier pairs with the LWLockWaitListLock() when enqueuing for
    1036             :          * another lock.
    1037             :          */
    1038     4520394 :         pg_write_barrier();
    1039     4520394 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1040     4520394 :         PGSemaphoreUnlock(waiter->sem);
    1041             :     }
    1042     4888836 : }
    1043             : 
    1044             : /*
    1045             :  * Add ourselves to the end of the queue.
    1046             :  *
    1047             :  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
    1048             :  */
    1049             : static void
    1050     4631630 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
    1051             : {
    1052             :     /*
    1053             :      * If we don't have a PGPROC structure, there's no way to wait. This
    1054             :      * should never occur, since MyProc should only be null during shared
    1055             :      * memory initialization.
    1056             :      */
    1057     4631630 :     if (MyProc == NULL)
    1058           0 :         elog(PANIC, "cannot wait without a PGPROC structure");
    1059             : 
    1060     4631630 :     if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
    1061           0 :         elog(PANIC, "queueing for lock while waiting on another one");
    1062             : 
    1063     4631630 :     LWLockWaitListLock(lock);
    1064             : 
    1065             :     /* setting the flag is protected by the spinlock */
    1066     4631630 :     pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
    1067             : 
    1068     4631630 :     MyProc->lwWaiting = LW_WS_WAITING;
    1069     4631630 :     MyProc->lwWaitMode = mode;
    1070             : 
    1071             :     /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
    1072     4631630 :     if (mode == LW_WAIT_UNTIL_FREE)
    1073       72640 :         proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
    1074             :     else
    1075     4558990 :         proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
    1076             : 
    1077             :     /* Can release the mutex now */
    1078     4631630 :     LWLockWaitListUnlock(lock);
    1079             : 
    1080             : #ifdef LOCK_DEBUG
    1081             :     pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
    1082             : #endif
    1083     4631630 : }
    1084             : 
    1085             : /*
    1086             :  * Remove ourselves from the waitlist.
    1087             :  *
    1088             :  * This is used if we queued ourselves because we thought we needed to sleep
    1089             :  * but, after further checking, we discovered that we don't actually need to
    1090             :  * do so.
    1091             :  */
    1092             : static void
    1093      139120 : LWLockDequeueSelf(LWLock *lock)
    1094             : {
    1095             :     bool        on_waitlist;
    1096             : 
    1097             : #ifdef LWLOCK_STATS
    1098             :     lwlock_stats *lwstats;
    1099             : 
    1100             :     lwstats = get_lwlock_stats_entry(lock);
    1101             : 
    1102             :     lwstats->dequeue_self_count++;
    1103             : #endif
    1104             : 
    1105      139120 :     LWLockWaitListLock(lock);
    1106             : 
    1107             :     /*
    1108             :      * Remove ourselves from the waitlist, unless we've already been removed.
    1109             :      * The removal happens with the wait list lock held, so there's no race in
    1110             :      * this check.
    1111             :      */
    1112      139120 :     on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
    1113      139120 :     if (on_waitlist)
    1114      108542 :         proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
    1115             : 
    1116      139120 :     if (proclist_is_empty(&lock->waiters) &&
    1117      135290 :         (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
    1118             :     {
    1119      135270 :         pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
    1120             :     }
    1121             : 
    1122             :     /* XXX: combine with fetch_and above? */
    1123      139120 :     LWLockWaitListUnlock(lock);
    1124             : 
    1125             :     /* clear waiting state again, nice for debugging */
    1126      139120 :     if (on_waitlist)
    1127      108542 :         MyProc->lwWaiting = LW_WS_NOT_WAITING;
    1128             :     else
    1129             :     {
    1130       30578 :         int         extraWaits = 0;
    1131             : 
    1132             :         /*
    1133             :          * Somebody else dequeued us and has or will wake us up. Deal with the
    1134             :          * superfluous absorption of a wakeup.
    1135             :          */
    1136             : 
    1137             :         /*
    1138             :          * Reset RELEASE_OK flag if somebody woke us before we removed
    1139             :          * ourselves - they'll have set it to false.
    1140             :          */
    1141       30578 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1142             : 
    1143             :         /*
    1144             :          * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
    1145             :          * get reset at some inconvenient point later. Most of the time this
    1146             :          * will immediately return.
    1147             :          */
    1148             :         for (;;)
    1149             :         {
    1150       30578 :             PGSemaphoreLock(MyProc->sem);
    1151       30578 :             if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
    1152       30578 :                 break;
    1153           0 :             extraWaits++;
    1154             :         }
    1155             : 
    1156             :         /*
    1157             :          * Fix the process wait semaphore's count for any absorbed wakeups.
    1158             :          */
    1159       30578 :         while (extraWaits-- > 0)
    1160           0 :             PGSemaphoreUnlock(MyProc->sem);
    1161             :     }
    1162             : 
    1163             : #ifdef LOCK_DEBUG
    1164             :     {
    1165             :         /* not waiting anymore */
    1166             :         uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1167             : 
    1168             :         Assert(nwaiters < MAX_BACKENDS);
    1169             :     }
    1170             : #endif
    1171      139120 : }
    1172             : 
    1173             : /*
    1174             :  * LWLockAcquire - acquire a lightweight lock in the specified mode
    1175             :  *
    1176             :  * If the lock is not available, sleep until it is.  Returns true if the lock
    1177             :  * was available immediately, false if we had to sleep.
    1178             :  *
    1179             :  * Side effect: cancel/die interrupts are held off until lock release.
    1180             :  */
    1181             : bool
    1182   712910410 : LWLockAcquire(LWLock *lock, LWLockMode mode)
    1183             : {
    1184   712910410 :     PGPROC     *proc = MyProc;
    1185   712910410 :     bool        result = true;
    1186   712910410 :     int         extraWaits = 0;
    1187             : #ifdef LWLOCK_STATS
    1188             :     lwlock_stats *lwstats;
    1189             : 
    1190             :     lwstats = get_lwlock_stats_entry(lock);
    1191             : #endif
    1192             : 
    1193             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1194             : 
    1195             :     PRINT_LWDEBUG("LWLockAcquire", lock, mode);
    1196             : 
    1197             : #ifdef LWLOCK_STATS
    1198             :     /* Count lock acquisition attempts */
    1199             :     if (mode == LW_EXCLUSIVE)
    1200             :         lwstats->ex_acquire_count++;
    1201             :     else
    1202             :         lwstats->sh_acquire_count++;
    1203             : #endif                          /* LWLOCK_STATS */
    1204             : 
    1205             :     /*
    1206             :      * We can't wait if we haven't got a PGPROC.  This should only occur
    1207             :      * during bootstrap or shared memory initialization.  Put an Assert here
    1208             :      * to catch unsafe coding practices.
    1209             :      */
    1210             :     Assert(!(proc == NULL && IsUnderPostmaster));
    1211             : 
    1212             :     /* Ensure we will have room to remember the lock */
    1213   712910410 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1214           0 :         elog(ERROR, "too many LWLocks taken");
    1215             : 
    1216             :     /*
    1217             :      * Lock out cancel/die interrupts until we exit the code section protected
    1218             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1219             :      * manipulations of data structures in shared memory.
    1220             :      */
    1221   712910410 :     HOLD_INTERRUPTS();
    1222             : 
    1223             :     /*
    1224             :      * Loop here to try to acquire lock after each time we are signaled by
    1225             :      * LWLockRelease.
    1226             :      *
    1227             :      * NOTE: it might seem better to have LWLockRelease actually grant us the
    1228             :      * lock, rather than retrying and possibly having to go back to sleep. But
    1229             :      * in practice that is no good because it means a process swap for every
    1230             :      * lock acquisition when two or more processes are contending for the same
    1231             :      * lock.  Since LWLocks are normally used to protect not-very-long
    1232             :      * sections of computation, a process needs to be able to acquire and
    1233             :      * release the same lock many times during a single CPU time slice, even
    1234             :      * in the presence of contention.  The efficiency of being able to do that
    1235             :      * outweighs the inefficiency of sometimes wasting a process dispatch
    1236             :      * cycle because the lock is not free when a released waiter finally gets
    1237             :      * to run.  See pgsql-hackers archives for 29-Dec-01.
    1238             :      */
    1239             :     for (;;)
    1240     4425686 :     {
    1241             :         bool        mustwait;
    1242             : 
    1243             :         /*
    1244             :          * Try to grab the lock the first time, we're not in the waitqueue
    1245             :          * yet/anymore.
    1246             :          */
    1247   717336096 :         mustwait = LWLockAttemptLock(lock, mode);
    1248             : 
    1249   717336096 :         if (!mustwait)
    1250             :         {
    1251             :             LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
    1252   712777106 :             break;              /* got the lock */
    1253             :         }
    1254             : 
    1255             :         /*
    1256             :          * Ok, at this point we couldn't grab the lock on the first try. We
    1257             :          * cannot simply queue ourselves to the end of the list and wait to be
    1258             :          * woken up because by now the lock could long have been released.
    1259             :          * Instead add us to the queue and try to grab the lock again. If we
    1260             :          * succeed we need to revert the queuing and be happy, otherwise we
    1261             :          * recheck the lock. If we still couldn't grab it, we know that the
    1262             :          * other locker will see our queue entries when releasing since they
    1263             :          * existed before we checked for the lock.
    1264             :          */
    1265             : 
    1266             :         /* add to the queue */
    1267     4558990 :         LWLockQueueSelf(lock, mode);
    1268             : 
    1269             :         /* we're now guaranteed to be woken up if necessary */
    1270     4558990 :         mustwait = LWLockAttemptLock(lock, mode);
    1271             : 
    1272             :         /* ok, grabbed the lock the second time round, need to undo queueing */
    1273     4558990 :         if (!mustwait)
    1274             :         {
    1275             :             LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
    1276             : 
    1277      133304 :             LWLockDequeueSelf(lock);
    1278      133304 :             break;
    1279             :         }
    1280             : 
    1281             :         /*
    1282             :          * Wait until awakened.
    1283             :          *
    1284             :          * It is possible that we get awakened for a reason other than being
    1285             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1286             :          * we've gotten the LWLock, re-increment the sema by the number of
    1287             :          * additional signals received.
    1288             :          */
    1289             :         LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
    1290             : 
    1291             : #ifdef LWLOCK_STATS
    1292             :         lwstats->block_count++;
    1293             : #endif
    1294             : 
    1295     4425686 :         LWLockReportWaitStart(lock);
    1296             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1297             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1298             : 
    1299             :         for (;;)
    1300             :         {
    1301     4425686 :             PGSemaphoreLock(proc->sem);
    1302     4425686 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1303     4425686 :                 break;
    1304           0 :             extraWaits++;
    1305             :         }
    1306             : 
    1307             :         /* Retrying, allow LWLockRelease to release waiters again. */
    1308     4425686 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1309             : 
    1310             : #ifdef LOCK_DEBUG
    1311             :         {
    1312             :             /* not waiting anymore */
    1313             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1314             : 
    1315             :             Assert(nwaiters < MAX_BACKENDS);
    1316             :         }
    1317             : #endif
    1318             : 
    1319             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1320             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1321     4425686 :         LWLockReportWaitEnd();
    1322             : 
    1323             :         LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
    1324             : 
    1325             :         /* Now loop back and try to acquire lock again. */
    1326     4425686 :         result = false;
    1327             :     }
    1328             : 
    1329             :     if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
    1330             :         TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    1331             : 
    1332             :     /* Add lock to list of locks held by this backend */
    1333   712910410 :     held_lwlocks[num_held_lwlocks].lock = lock;
    1334   712910410 :     held_lwlocks[num_held_lwlocks++].mode = mode;
    1335             : 
    1336             :     /*
    1337             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1338             :      */
    1339   712910410 :     while (extraWaits-- > 0)
    1340           0 :         PGSemaphoreUnlock(proc->sem);
    1341             : 
    1342   712910410 :     return result;
    1343             : }
    1344             : 
    1345             : /*
    1346             :  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
    1347             :  *
    1348             :  * If the lock is not available, return false with no side-effects.
    1349             :  *
    1350             :  * If successful, cancel/die interrupts are held off until lock release.
    1351             :  */
    1352             : bool
    1353     4749886 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
    1354             : {
    1355             :     bool        mustwait;
    1356             : 
    1357             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1358             : 
    1359             :     PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
    1360             : 
    1361             :     /* Ensure we will have room to remember the lock */
    1362     4749886 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1363           0 :         elog(ERROR, "too many LWLocks taken");
    1364             : 
    1365             :     /*
    1366             :      * Lock out cancel/die interrupts until we exit the code section protected
    1367             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1368             :      * manipulations of data structures in shared memory.
    1369             :      */
    1370     4749886 :     HOLD_INTERRUPTS();
    1371             : 
    1372             :     /* Check for the lock */
    1373     4749886 :     mustwait = LWLockAttemptLock(lock, mode);
    1374             : 
    1375     4749886 :     if (mustwait)
    1376             :     {
    1377             :         /* Failed to get lock, so release interrupt holdoff */
    1378        1346 :         RESUME_INTERRUPTS();
    1379             : 
    1380             :         LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
    1381             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
    1382             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
    1383             :     }
    1384             :     else
    1385             :     {
    1386             :         /* Add lock to list of locks held by this backend */
    1387     4748540 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1388     4748540 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1389             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
    1390             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
    1391             :     }
    1392     4749886 :     return !mustwait;
    1393             : }
    1394             : 
    1395             : /*
    1396             :  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
    1397             :  *
    1398             :  * The semantics of this function are a bit funky.  If the lock is currently
    1399             :  * free, it is acquired in the given mode, and the function returns true.  If
    1400             :  * the lock isn't immediately free, the function waits until it is released
    1401             :  * and returns false, but does not acquire the lock.
    1402             :  *
    1403             :  * This is currently used for WALWriteLock: when a backend flushes the WAL,
    1404             :  * holding WALWriteLock, it can flush the commit records of many other
    1405             :  * backends as a side-effect.  Those other backends need to wait until the
    1406             :  * flush finishes, but don't need to acquire the lock anymore.  They can just
    1407             :  * wake up, observe that their records have already been flushed, and return.
    1408             :  */
    1409             : bool
    1410      267796 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
    1411             : {
    1412      267796 :     PGPROC     *proc = MyProc;
    1413             :     bool        mustwait;
    1414      267796 :     int         extraWaits = 0;
    1415             : #ifdef LWLOCK_STATS
    1416             :     lwlock_stats *lwstats;
    1417             : 
    1418             :     lwstats = get_lwlock_stats_entry(lock);
    1419             : #endif
    1420             : 
    1421             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1422             : 
    1423             :     PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
    1424             : 
    1425             :     /* Ensure we will have room to remember the lock */
    1426      267796 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1427           0 :         elog(ERROR, "too many LWLocks taken");
    1428             : 
    1429             :     /*
    1430             :      * Lock out cancel/die interrupts until we exit the code section protected
    1431             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1432             :      * manipulations of data structures in shared memory.
    1433             :      */
    1434      267796 :     HOLD_INTERRUPTS();
    1435             : 
    1436             :     /*
    1437             :      * NB: We're using nearly the same twice-in-a-row lock acquisition
    1438             :      * protocol as LWLockAcquire(). Check its comments for details.
    1439             :      */
    1440      267796 :     mustwait = LWLockAttemptLock(lock, mode);
    1441             : 
    1442      267796 :     if (mustwait)
    1443             :     {
    1444        9734 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1445             : 
    1446        9734 :         mustwait = LWLockAttemptLock(lock, mode);
    1447             : 
    1448        9734 :         if (mustwait)
    1449             :         {
    1450             :             /*
    1451             :              * Wait until awakened.  Like in LWLockAcquire, be prepared for
    1452             :              * bogus wakeups.
    1453             :              */
    1454             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
    1455             : 
    1456             : #ifdef LWLOCK_STATS
    1457             :             lwstats->block_count++;
    1458             : #endif
    1459             : 
    1460        9560 :             LWLockReportWaitStart(lock);
    1461             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1462             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1463             : 
    1464             :             for (;;)
    1465             :             {
    1466        9560 :                 PGSemaphoreLock(proc->sem);
    1467        9560 :                 if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1468        9560 :                     break;
    1469           0 :                 extraWaits++;
    1470             :             }
    1471             : 
    1472             : #ifdef LOCK_DEBUG
    1473             :             {
    1474             :                 /* not waiting anymore */
    1475             :                 uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1476             : 
    1477             :                 Assert(nwaiters < MAX_BACKENDS);
    1478             :             }
    1479             : #endif
    1480             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1481             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1482        9560 :             LWLockReportWaitEnd();
    1483             : 
    1484             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
    1485             :         }
    1486             :         else
    1487             :         {
    1488             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
    1489             : 
    1490             :             /*
    1491             :              * Got lock in the second attempt, undo queueing. We need to treat
    1492             :              * this as having successfully acquired the lock, otherwise we'd
    1493             :              * not necessarily wake up people we've prevented from acquiring
    1494             :              * the lock.
    1495             :              */
    1496         174 :             LWLockDequeueSelf(lock);
    1497             :         }
    1498             :     }
    1499             : 
    1500             :     /*
    1501             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1502             :      */
    1503      267796 :     while (extraWaits-- > 0)
    1504           0 :         PGSemaphoreUnlock(proc->sem);
    1505             : 
    1506      267796 :     if (mustwait)
    1507             :     {
    1508             :         /* Failed to get lock, so release interrupt holdoff */
    1509        9560 :         RESUME_INTERRUPTS();
    1510             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
    1511             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
    1512             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
    1513             :     }
    1514             :     else
    1515             :     {
    1516             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
    1517             :         /* Add lock to list of locks held by this backend */
    1518      258236 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1519      258236 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1520             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
    1521             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
    1522             :     }
    1523             : 
    1524      267796 :     return !mustwait;
    1525             : }
    1526             : 
    1527             : /*
    1528             :  * Does the lwlock in its current state need to wait for the variable value to
    1529             :  * change?
    1530             :  *
    1531             :  * If we don't need to wait, and it's because the value of the variable has
    1532             :  * changed, store the current value in newval.
    1533             :  *
    1534             :  * *result is set to true if the lock was free, and false otherwise.
    1535             :  */
    1536             : static bool
    1537     6248204 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1538             :                        uint64 *newval, bool *result)
    1539             : {
    1540             :     bool        mustwait;
    1541             :     uint64      value;
    1542             : 
    1543             :     /*
    1544             :      * Test first to see if it the slot is free right now.
    1545             :      *
    1546             :      * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
    1547             :      * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
    1548             :      * this, so we don't need a memory barrier here as far as the current
    1549             :      * usage is concerned.  But that might not be safe in general.
    1550             :      */
    1551     6248204 :     mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
    1552             : 
    1553     6248204 :     if (!mustwait)
    1554             :     {
    1555     4539668 :         *result = true;
    1556     4539668 :         return false;
    1557             :     }
    1558             : 
    1559     1708536 :     *result = false;
    1560             : 
    1561             :     /*
    1562             :      * Reading this value atomically is safe even on platforms where uint64
    1563             :      * cannot be read without observing a torn value.
    1564             :      */
    1565     1708536 :     value = pg_atomic_read_u64(valptr);
    1566             : 
    1567     1708536 :     if (value != oldval)
    1568             :     {
    1569     1588366 :         mustwait = false;
    1570     1588366 :         *newval = value;
    1571             :     }
    1572             :     else
    1573             :     {
    1574      120170 :         mustwait = true;
    1575             :     }
    1576             : 
    1577     1708536 :     return mustwait;
    1578             : }
    1579             : 
    1580             : /*
    1581             :  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
    1582             :  *
    1583             :  * If the lock is held and *valptr equals oldval, waits until the lock is
    1584             :  * either freed, or the lock holder updates *valptr by calling
    1585             :  * LWLockUpdateVar.  If the lock is free on exit (immediately or after
    1586             :  * waiting), returns true.  If the lock is still held, but *valptr no longer
    1587             :  * matches oldval, returns false and sets *newval to the current value in
    1588             :  * *valptr.
    1589             :  *
    1590             :  * Note: this function ignores shared lock holders; if the lock is held
    1591             :  * in shared mode, returns 'true'.
    1592             :  *
    1593             :  * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
    1594             :  * hence the caller of this function may want to rely on an explicit barrier or
    1595             :  * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
    1596             :  */
    1597             : bool
    1598     6128034 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1599             :                  uint64 *newval)
    1600             : {
    1601     6128034 :     PGPROC     *proc = MyProc;
    1602     6128034 :     int         extraWaits = 0;
    1603     6128034 :     bool        result = false;
    1604             : #ifdef LWLOCK_STATS
    1605             :     lwlock_stats *lwstats;
    1606             : 
    1607             :     lwstats = get_lwlock_stats_entry(lock);
    1608             : #endif
    1609             : 
    1610             :     PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
    1611             : 
    1612             :     /*
    1613             :      * Lock out cancel/die interrupts while we sleep on the lock.  There is no
    1614             :      * cleanup mechanism to remove us from the wait queue if we got
    1615             :      * interrupted.
    1616             :      */
    1617     6128034 :     HOLD_INTERRUPTS();
    1618             : 
    1619             :     /*
    1620             :      * Loop here to check the lock's status after each time we are signaled.
    1621             :      */
    1622             :     for (;;)
    1623       57264 :     {
    1624             :         bool        mustwait;
    1625             : 
    1626     6185298 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1627             :                                           &result);
    1628             : 
    1629     6185298 :         if (!mustwait)
    1630     6122392 :             break;              /* the lock was free or value didn't match */
    1631             : 
    1632             :         /*
    1633             :          * Add myself to wait queue. Note that this is racy, somebody else
    1634             :          * could wakeup before we're finished queuing. NB: We're using nearly
    1635             :          * the same twice-in-a-row lock acquisition protocol as
    1636             :          * LWLockAcquire(). Check its comments for details. The only
    1637             :          * difference is that we also have to check the variable's values when
    1638             :          * checking the state of the lock.
    1639             :          */
    1640       62906 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1641             : 
    1642             :         /*
    1643             :          * Set RELEASE_OK flag, to make sure we get woken up as soon as the
    1644             :          * lock is released.
    1645             :          */
    1646       62906 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1647             : 
    1648             :         /*
    1649             :          * We're now guaranteed to be woken up if necessary. Recheck the lock
    1650             :          * and variables state.
    1651             :          */
    1652       62906 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1653             :                                           &result);
    1654             : 
    1655             :         /* Ok, no conflict after we queued ourselves. Undo queueing. */
    1656       62906 :         if (!mustwait)
    1657             :         {
    1658             :             LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
    1659             : 
    1660        5642 :             LWLockDequeueSelf(lock);
    1661        5642 :             break;
    1662             :         }
    1663             : 
    1664             :         /*
    1665             :          * Wait until awakened.
    1666             :          *
    1667             :          * It is possible that we get awakened for a reason other than being
    1668             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1669             :          * we've gotten the LWLock, re-increment the sema by the number of
    1670             :          * additional signals received.
    1671             :          */
    1672             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
    1673             : 
    1674             : #ifdef LWLOCK_STATS
    1675             :         lwstats->block_count++;
    1676             : #endif
    1677             : 
    1678       57264 :         LWLockReportWaitStart(lock);
    1679             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1680             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
    1681             : 
    1682             :         for (;;)
    1683             :         {
    1684       57264 :             PGSemaphoreLock(proc->sem);
    1685       57264 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1686       57264 :                 break;
    1687           0 :             extraWaits++;
    1688             :         }
    1689             : 
    1690             : #ifdef LOCK_DEBUG
    1691             :         {
    1692             :             /* not waiting anymore */
    1693             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1694             : 
    1695             :             Assert(nwaiters < MAX_BACKENDS);
    1696             :         }
    1697             : #endif
    1698             : 
    1699             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1700             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
    1701       57264 :         LWLockReportWaitEnd();
    1702             : 
    1703             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
    1704             : 
    1705             :         /* Now loop back and check the status of the lock again. */
    1706             :     }
    1707             : 
    1708             :     /*
    1709             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1710             :      */
    1711     6128034 :     while (extraWaits-- > 0)
    1712           0 :         PGSemaphoreUnlock(proc->sem);
    1713             : 
    1714             :     /*
    1715             :      * Now okay to allow cancel/die interrupts.
    1716             :      */
    1717     6128034 :     RESUME_INTERRUPTS();
    1718             : 
    1719     6128034 :     return result;
    1720             : }
    1721             : 
    1722             : 
    1723             : /*
    1724             :  * LWLockUpdateVar - Update a variable and wake up waiters atomically
    1725             :  *
    1726             :  * Sets *valptr to 'val', and wakes up all processes waiting for us with
    1727             :  * LWLockWaitForVar().  It first sets the value atomically and then wakes up
    1728             :  * waiting processes so that any process calling LWLockWaitForVar() on the same
    1729             :  * lock is guaranteed to see the new value, and act accordingly.
    1730             :  *
    1731             :  * The caller must be holding the lock in exclusive mode.
    1732             :  */
    1733             : void
    1734     5057452 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1735             : {
    1736             :     proclist_head wakeup;
    1737             :     proclist_mutable_iter iter;
    1738             : 
    1739             :     PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
    1740             : 
    1741             :     /*
    1742             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1743             :      * that the variable is updated before waking up waiters.
    1744             :      */
    1745     5057452 :     pg_atomic_exchange_u64(valptr, val);
    1746             : 
    1747     5057452 :     proclist_init(&wakeup);
    1748             : 
    1749     5057452 :     LWLockWaitListLock(lock);
    1750             : 
    1751             :     Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
    1752             : 
    1753             :     /*
    1754             :      * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
    1755             :      * up. They are always in the front of the queue.
    1756             :      */
    1757     5060568 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1758             :     {
    1759       99460 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1760             : 
    1761       99460 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
    1762       96344 :             break;
    1763             : 
    1764        3116 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1765        3116 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
    1766             : 
    1767             :         /* see LWLockWakeup() */
    1768             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
    1769        3116 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
    1770             :     }
    1771             : 
    1772             :     /* We are done updating shared state of the lock itself. */
    1773     5057452 :     LWLockWaitListUnlock(lock);
    1774             : 
    1775             :     /*
    1776             :      * Awaken any waiters I removed from the queue.
    1777             :      */
    1778     5060568 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1779             :     {
    1780        3116 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1781             : 
    1782        3116 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1783             :         /* check comment in LWLockWakeup() about this barrier */
    1784        3116 :         pg_write_barrier();
    1785        3116 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1786        3116 :         PGSemaphoreUnlock(waiter->sem);
    1787             :     }
    1788     5057452 : }
    1789             : 
    1790             : 
    1791             : /*
    1792             :  * Stop treating lock as held by current backend.
    1793             :  *
    1794             :  * This is the code that can be shared between actually releasing a lock
    1795             :  * (LWLockRelease()) and just not tracking ownership of the lock anymore
    1796             :  * without releasing the lock (LWLockDisown()).
    1797             :  *
    1798             :  * Returns the mode in which the lock was held by the current backend.
    1799             :  *
    1800             :  * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
    1801             :  * of the caller.
    1802             :  *
    1803             :  * NB: This will leave lock->owner pointing to the current backend (if
    1804             :  * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
    1805             :  * debug cases of missing wakeups during lock release.
    1806             :  */
    1807             : static inline LWLockMode
    1808   717917186 : LWLockDisownInternal(LWLock *lock)
    1809             : {
    1810             :     LWLockMode  mode;
    1811             :     int         i;
    1812             : 
    1813             :     /*
    1814             :      * Remove lock from list of locks held.  Usually, but not always, it will
    1815             :      * be the latest-acquired lock; so search array backwards.
    1816             :      */
    1817   798904032 :     for (i = num_held_lwlocks; --i >= 0;)
    1818   798904032 :         if (lock == held_lwlocks[i].lock)
    1819   717917186 :             break;
    1820             : 
    1821   717917186 :     if (i < 0)
    1822           0 :         elog(ERROR, "lock %s is not held", T_NAME(lock));
    1823             : 
    1824   717917186 :     mode = held_lwlocks[i].mode;
    1825             : 
    1826   717917186 :     num_held_lwlocks--;
    1827   798904032 :     for (; i < num_held_lwlocks; i++)
    1828    80986846 :         held_lwlocks[i] = held_lwlocks[i + 1];
    1829             : 
    1830   717917186 :     return mode;
    1831             : }
    1832             : 
    1833             : /*
    1834             :  * Helper function to release lock, shared between LWLockRelease() and
    1835             :  * LWLockReleaseDisowned().
    1836             :  */
    1837             : static void
    1838   717917186 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
    1839             : {
    1840             :     uint32      oldstate;
    1841             :     bool        check_waiters;
    1842             : 
    1843             :     /*
    1844             :      * Release my hold on lock, after that it can immediately be acquired by
    1845             :      * others, even if we still have to wakeup other waiters.
    1846             :      */
    1847   717917186 :     if (mode == LW_EXCLUSIVE)
    1848   444823856 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    1849             :     else
    1850   273093330 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
    1851             : 
    1852             :     /* nobody else can have that kind of lock */
    1853             :     Assert(!(oldstate & LW_VAL_EXCLUSIVE));
    1854             : 
    1855             :     if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
    1856             :         TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
    1857             : 
    1858             :     /*
    1859             :      * We're still waiting for backends to get scheduled, don't wake them up
    1860             :      * again.
    1861             :      */
    1862   717917186 :     if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
    1863     4920326 :         (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
    1864     4920326 :         (oldstate & LW_LOCK_MASK) == 0)
    1865     4888836 :         check_waiters = true;
    1866             :     else
    1867   713028350 :         check_waiters = false;
    1868             : 
    1869             :     /*
    1870             :      * As waking up waiters requires the spinlock to be acquired, only do so
    1871             :      * if necessary.
    1872             :      */
    1873   717917186 :     if (check_waiters)
    1874             :     {
    1875             :         /* XXX: remove before commit? */
    1876             :         LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
    1877     4888836 :         LWLockWakeup(lock);
    1878             :     }
    1879   717917186 : }
    1880             : 
    1881             : 
    1882             : /*
    1883             :  * Stop treating lock as held by current backend.
    1884             :  *
    1885             :  * After calling this function it's the callers responsibility to ensure that
    1886             :  * the lock gets released (via LWLockReleaseDisowned()), even in case of an
    1887             :  * error. This only is desirable if the lock is going to be released in a
    1888             :  * different process than the process that acquired it.
    1889             :  */
    1890             : void
    1891           0 : LWLockDisown(LWLock *lock)
    1892             : {
    1893           0 :     LWLockDisownInternal(lock);
    1894             : 
    1895           0 :     RESUME_INTERRUPTS();
    1896           0 : }
    1897             : 
    1898             : /*
    1899             :  * LWLockRelease - release a previously acquired lock
    1900             :  */
    1901             : void
    1902   717917186 : LWLockRelease(LWLock *lock)
    1903             : {
    1904             :     LWLockMode  mode;
    1905             : 
    1906   717917186 :     mode = LWLockDisownInternal(lock);
    1907             : 
    1908             :     PRINT_LWDEBUG("LWLockRelease", lock, mode);
    1909             : 
    1910   717917186 :     LWLockReleaseInternal(lock, mode);
    1911             : 
    1912             :     /*
    1913             :      * Now okay to allow cancel/die interrupts.
    1914             :      */
    1915   717917186 :     RESUME_INTERRUPTS();
    1916   717917186 : }
    1917             : 
    1918             : /*
    1919             :  * Release lock previously disowned with LWLockDisown().
    1920             :  */
    1921             : void
    1922           0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
    1923             : {
    1924           0 :     LWLockReleaseInternal(lock, mode);
    1925           0 : }
    1926             : 
    1927             : /*
    1928             :  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
    1929             :  */
    1930             : void
    1931    29123018 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1932             : {
    1933             :     /*
    1934             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1935             :      * that the variable is updated before releasing the lock.
    1936             :      */
    1937    29123018 :     pg_atomic_exchange_u64(valptr, val);
    1938             : 
    1939    29123018 :     LWLockRelease(lock);
    1940    29123018 : }
    1941             : 
    1942             : 
    1943             : /*
    1944             :  * LWLockReleaseAll - release all currently-held locks
    1945             :  *
    1946             :  * Used to clean up after ereport(ERROR). An important difference between this
    1947             :  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
    1948             :  * unchanged by this operation.  This is necessary since InterruptHoldoffCount
    1949             :  * has been set to an appropriate level earlier in error recovery. We could
    1950             :  * decrement it below zero if we allow it to drop for each released lock!
    1951             :  */
    1952             : void
    1953      109554 : LWLockReleaseAll(void)
    1954             : {
    1955      109940 :     while (num_held_lwlocks > 0)
    1956             :     {
    1957         386 :         HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */
    1958             : 
    1959         386 :         LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
    1960             :     }
    1961      109554 : }
    1962             : 
    1963             : 
    1964             : /*
    1965             :  * ForEachLWLockHeldByMe - run a callback for each held lock
    1966             :  *
    1967             :  * This is meant as debug support only.
    1968             :  */
    1969             : void
    1970           0 : ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),
    1971             :                       void *context)
    1972             : {
    1973             :     int         i;
    1974             : 
    1975           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1976           0 :         callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);
    1977           0 : }
    1978             : 
    1979             : /*
    1980             :  * LWLockHeldByMe - test whether my process holds a lock in any mode
    1981             :  *
    1982             :  * This is meant as debug support only.
    1983             :  */
    1984             : bool
    1985           0 : LWLockHeldByMe(LWLock *lock)
    1986             : {
    1987             :     int         i;
    1988             : 
    1989           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1990             :     {
    1991           0 :         if (held_lwlocks[i].lock == lock)
    1992           0 :             return true;
    1993             :     }
    1994           0 :     return false;
    1995             : }
    1996             : 
    1997             : /*
    1998             :  * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
    1999             :  *
    2000             :  * This is meant as debug support only.
    2001             :  */
    2002             : bool
    2003           0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
    2004             : {
    2005             :     char       *held_lock_addr;
    2006             :     char       *begin;
    2007             :     char       *end;
    2008             :     int         i;
    2009             : 
    2010           0 :     begin = (char *) lock;
    2011           0 :     end = begin + nlocks * stride;
    2012           0 :     for (i = 0; i < num_held_lwlocks; i++)
    2013             :     {
    2014           0 :         held_lock_addr = (char *) held_lwlocks[i].lock;
    2015           0 :         if (held_lock_addr >= begin &&
    2016           0 :             held_lock_addr < end &&
    2017           0 :             (held_lock_addr - begin) % stride == 0)
    2018           0 :             return true;
    2019             :     }
    2020           0 :     return false;
    2021             : }
    2022             : 
    2023             : /*
    2024             :  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
    2025             :  *
    2026             :  * This is meant as debug support only.
    2027             :  */
    2028             : bool
    2029           0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
    2030             : {
    2031             :     int         i;
    2032             : 
    2033           0 :     for (i = 0; i < num_held_lwlocks; i++)
    2034             :     {
    2035           0 :         if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
    2036           0 :             return true;
    2037             :     }
    2038           0 :     return false;
    2039             : }

Generated by: LCOV version 1.14