LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - lwlock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 348 404 86.1 %
Date: 2025-04-01 16:15:31 Functions: 31 36 86.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * lwlock.c
       4             :  *    Lightweight lock manager
       5             :  *
       6             :  * Lightweight locks are intended primarily to provide mutual exclusion of
       7             :  * access to shared-memory data structures.  Therefore, they offer both
       8             :  * exclusive and shared lock modes (to support read/write and read-only
       9             :  * access to a shared object).  There are few other frammishes.  User-level
      10             :  * locking should be done with the full lock manager --- which depends on
      11             :  * LWLocks to protect its shared state.
      12             :  *
      13             :  * In addition to exclusive and shared modes, lightweight locks can be used to
      14             :  * wait until a variable changes value.  The variable is initially not set
      15             :  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
      16             :  * value it was set to when the lock was released last, and can be updated
      17             :  * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar
      18             :  * waits for the variable to be updated, or until the lock is free.  When
      19             :  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
      20             :  * appropriate value for a free lock.  The meaning of the variable is up to
      21             :  * the caller, the lightweight lock code just assigns and compares it.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/storage/lmgr/lwlock.c
      28             :  *
      29             :  * NOTES:
      30             :  *
      31             :  * This used to be a pretty straight forward reader-writer lock
      32             :  * implementation, in which the internal state was protected by a
      33             :  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
      34             :  * too high for workloads/locks that were taken in shared mode very
      35             :  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
      36             :  * while trying to acquire a shared lock that was actually free.
      37             :  *
      38             :  * Thus a new implementation was devised that provides wait-free shared lock
      39             :  * acquisition for locks that aren't exclusively locked.
      40             :  *
      41             :  * The basic idea is to have a single atomic variable 'lockcount' instead of
      42             :  * the formerly separate shared and exclusive counters and to use atomic
      43             :  * operations to acquire the lock. That's fairly easy to do for plain
      44             :  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
      45             :  * in the OS.
      46             :  *
      47             :  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
      48             :  * variable. For exclusive lock we swap in a sentinel value
      49             :  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
      50             :  *
      51             :  * To release the lock we use an atomic decrement to release the lock. If the
      52             :  * new value is zero (we get that atomically), we know we can/have to release
      53             :  * waiters.
      54             :  *
      55             :  * Obviously it is important that the sentinel value for exclusive locks
      56             :  * doesn't conflict with the maximum number of possible share lockers -
      57             :  * luckily MAX_BACKENDS makes that easily possible.
      58             :  *
      59             :  *
      60             :  * The attentive reader might have noticed that naively doing the above has a
      61             :  * glaring race condition: We try to lock using the atomic operations and
      62             :  * notice that we have to wait. Unfortunately by the time we have finished
      63             :  * queuing, the former locker very well might have already finished its
      64             :  * work. That's problematic because we're now stuck waiting inside the OS.
      65             : 
      66             :  * To mitigate those races we use a two phased attempt at locking:
      67             :  *   Phase 1: Try to do it atomically, if we succeed, nice
      68             :  *   Phase 2: Add ourselves to the waitqueue of the lock
      69             :  *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
      70             :  *            the queue
      71             :  *   Phase 4: Sleep till wake-up, goto Phase 1
      72             :  *
      73             :  * This protects us against the problem from above as nobody can release too
      74             :  *    quick, before we're queued, since after Phase 2 we're already queued.
      75             :  * -------------------------------------------------------------------------
      76             :  */
      77             : #include "postgres.h"
      78             : 
      79             : #include "miscadmin.h"
      80             : #include "pg_trace.h"
      81             : #include "pgstat.h"
      82             : #include "port/pg_bitutils.h"
      83             : #include "storage/proc.h"
      84             : #include "storage/proclist.h"
      85             : #include "storage/procnumber.h"
      86             : #include "storage/spin.h"
      87             : #include "utils/memutils.h"
      88             : 
      89             : #ifdef LWLOCK_STATS
      90             : #include "utils/hsearch.h"
      91             : #endif
      92             : 
      93             : 
      94             : #define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 31)
      95             : #define LW_FLAG_RELEASE_OK          ((uint32) 1 << 30)
      96             : #define LW_FLAG_LOCKED              ((uint32) 1 << 29)
      97             : #define LW_FLAG_BITS                3
      98             : #define LW_FLAG_MASK                (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
      99             : 
     100             : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
     101             : #define LW_VAL_EXCLUSIVE            (MAX_BACKENDS + 1)
     102             : #define LW_VAL_SHARED               1
     103             : 
     104             : /* already (power of 2)-1, i.e. suitable for a mask */
     105             : #define LW_SHARED_MASK              MAX_BACKENDS
     106             : #define LW_LOCK_MASK                (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
     107             : 
     108             : 
     109             : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
     110             :                  "MAX_BACKENDS + 1 needs to be a power of 2");
     111             : 
     112             : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
     113             :                  "MAX_BACKENDS and LW_FLAG_MASK overlap");
     114             : 
     115             : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
     116             :                  "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
     117             : 
     118             : /*
     119             :  * There are three sorts of LWLock "tranches":
     120             :  *
     121             :  * 1. The individually-named locks defined in lwlocklist.h each have their
     122             :  * own tranche.  We absorb the names of these tranches from there into
     123             :  * BuiltinTrancheNames here.
     124             :  *
     125             :  * 2. There are some predefined tranches for built-in groups of locks.
     126             :  * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
     127             :  * appear in BuiltinTrancheNames[] below.
     128             :  *
     129             :  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
     130             :  * or LWLockRegisterTranche.  The names of these that are known in the current
     131             :  * process appear in LWLockTrancheNames[].
     132             :  *
     133             :  * All these names are user-visible as wait event names, so choose with care
     134             :  * ... and do not forget to update the documentation's list of wait events.
     135             :  */
     136             : static const char *const BuiltinTrancheNames[] = {
     137             : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
     138             : #include "storage/lwlocklist.h"
     139             : #undef PG_LWLOCK
     140             :     [LWTRANCHE_XACT_BUFFER] = "XactBuffer",
     141             :     [LWTRANCHE_COMMITTS_BUFFER] = "CommitTsBuffer",
     142             :     [LWTRANCHE_SUBTRANS_BUFFER] = "SubtransBuffer",
     143             :     [LWTRANCHE_MULTIXACTOFFSET_BUFFER] = "MultiXactOffsetBuffer",
     144             :     [LWTRANCHE_MULTIXACTMEMBER_BUFFER] = "MultiXactMemberBuffer",
     145             :     [LWTRANCHE_NOTIFY_BUFFER] = "NotifyBuffer",
     146             :     [LWTRANCHE_SERIAL_BUFFER] = "SerialBuffer",
     147             :     [LWTRANCHE_WAL_INSERT] = "WALInsert",
     148             :     [LWTRANCHE_BUFFER_CONTENT] = "BufferContent",
     149             :     [LWTRANCHE_REPLICATION_ORIGIN_STATE] = "ReplicationOriginState",
     150             :     [LWTRANCHE_REPLICATION_SLOT_IO] = "ReplicationSlotIO",
     151             :     [LWTRANCHE_LOCK_FASTPATH] = "LockFastPath",
     152             :     [LWTRANCHE_BUFFER_MAPPING] = "BufferMapping",
     153             :     [LWTRANCHE_LOCK_MANAGER] = "LockManager",
     154             :     [LWTRANCHE_PREDICATE_LOCK_MANAGER] = "PredicateLockManager",
     155             :     [LWTRANCHE_PARALLEL_HASH_JOIN] = "ParallelHashJoin",
     156             :     [LWTRANCHE_PARALLEL_BTREE_SCAN] = "ParallelBtreeScan",
     157             :     [LWTRANCHE_PARALLEL_QUERY_DSA] = "ParallelQueryDSA",
     158             :     [LWTRANCHE_PER_SESSION_DSA] = "PerSessionDSA",
     159             :     [LWTRANCHE_PER_SESSION_RECORD_TYPE] = "PerSessionRecordType",
     160             :     [LWTRANCHE_PER_SESSION_RECORD_TYPMOD] = "PerSessionRecordTypmod",
     161             :     [LWTRANCHE_SHARED_TUPLESTORE] = "SharedTupleStore",
     162             :     [LWTRANCHE_SHARED_TIDBITMAP] = "SharedTidBitmap",
     163             :     [LWTRANCHE_PARALLEL_APPEND] = "ParallelAppend",
     164             :     [LWTRANCHE_PER_XACT_PREDICATE_LIST] = "PerXactPredicateList",
     165             :     [LWTRANCHE_PGSTATS_DSA] = "PgStatsDSA",
     166             :     [LWTRANCHE_PGSTATS_HASH] = "PgStatsHash",
     167             :     [LWTRANCHE_PGSTATS_DATA] = "PgStatsData",
     168             :     [LWTRANCHE_LAUNCHER_DSA] = "LogicalRepLauncherDSA",
     169             :     [LWTRANCHE_LAUNCHER_HASH] = "LogicalRepLauncherHash",
     170             :     [LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
     171             :     [LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
     172             :     [LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
     173             :     [LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
     174             :     [LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
     175             :     [LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
     176             :     [LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
     177             :     [LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
     178             :     [LWTRANCHE_XACT_SLRU] = "XactSLRU",
     179             :     [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
     180             :     [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
     181             : };
     182             : 
     183             : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
     184             :                  LWTRANCHE_FIRST_USER_DEFINED,
     185             :                  "missing entries in BuiltinTrancheNames[]");
     186             : 
     187             : /*
     188             :  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
     189             :  * stores the names of all dynamically-created tranches known to the current
     190             :  * process.  Any unused entries in the array will contain NULL.
     191             :  */
     192             : static const char **LWLockTrancheNames = NULL;
     193             : static int  LWLockTrancheNamesAllocated = 0;
     194             : 
     195             : /*
     196             :  * This points to the main array of LWLocks in shared memory.  Backends inherit
     197             :  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
     198             :  * where we have special measures to pass it down).
     199             :  */
     200             : LWLockPadded *MainLWLockArray = NULL;
     201             : 
     202             : /*
     203             :  * We use this structure to keep track of locked LWLocks for release
     204             :  * during error recovery.  Normally, only a few will be held at once, but
     205             :  * occasionally the number can be much higher; for example, the pg_buffercache
     206             :  * extension locks all buffer partitions simultaneously.
     207             :  */
     208             : #define MAX_SIMUL_LWLOCKS   200
     209             : 
     210             : /* struct representing the LWLocks we're holding */
     211             : typedef struct LWLockHandle
     212             : {
     213             :     LWLock     *lock;
     214             :     LWLockMode  mode;
     215             : } LWLockHandle;
     216             : 
     217             : static int  num_held_lwlocks = 0;
     218             : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
     219             : 
     220             : /* struct representing the LWLock tranche request for named tranche */
     221             : typedef struct NamedLWLockTrancheRequest
     222             : {
     223             :     char        tranche_name[NAMEDATALEN];
     224             :     int         num_lwlocks;
     225             : } NamedLWLockTrancheRequest;
     226             : 
     227             : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
     228             : static int  NamedLWLockTrancheRequestsAllocated = 0;
     229             : 
     230             : /*
     231             :  * NamedLWLockTrancheRequests is both the valid length of the request array,
     232             :  * and the length of the shared-memory NamedLWLockTrancheArray later on.
     233             :  * This variable and NamedLWLockTrancheArray are non-static so that
     234             :  * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
     235             :  */
     236             : int         NamedLWLockTrancheRequests = 0;
     237             : 
     238             : /* points to data in shared memory: */
     239             : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
     240             : 
     241             : static void InitializeLWLocks(void);
     242             : static inline void LWLockReportWaitStart(LWLock *lock);
     243             : static inline void LWLockReportWaitEnd(void);
     244             : static const char *GetLWTrancheName(uint16 trancheId);
     245             : 
     246             : #define T_NAME(lock) \
     247             :     GetLWTrancheName((lock)->tranche)
     248             : 
     249             : #ifdef LWLOCK_STATS
     250             : typedef struct lwlock_stats_key
     251             : {
     252             :     int         tranche;
     253             :     void       *instance;
     254             : }           lwlock_stats_key;
     255             : 
     256             : typedef struct lwlock_stats
     257             : {
     258             :     lwlock_stats_key key;
     259             :     int         sh_acquire_count;
     260             :     int         ex_acquire_count;
     261             :     int         block_count;
     262             :     int         dequeue_self_count;
     263             :     int         spin_delay_count;
     264             : }           lwlock_stats;
     265             : 
     266             : static HTAB *lwlock_stats_htab;
     267             : static lwlock_stats lwlock_stats_dummy;
     268             : #endif
     269             : 
     270             : #ifdef LOCK_DEBUG
     271             : bool        Trace_lwlocks = false;
     272             : 
     273             : inline static void
     274             : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
     275             : {
     276             :     /* hide statement & context here, otherwise the log is just too verbose */
     277             :     if (Trace_lwlocks)
     278             :     {
     279             :         uint32      state = pg_atomic_read_u32(&lock->state);
     280             : 
     281             :         ereport(LOG,
     282             :                 (errhidestmt(true),
     283             :                  errhidecontext(true),
     284             :                  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
     285             :                                  MyProcPid,
     286             :                                  where, T_NAME(lock), lock,
     287             :                                  (state & LW_VAL_EXCLUSIVE) != 0,
     288             :                                  state & LW_SHARED_MASK,
     289             :                                  (state & LW_FLAG_HAS_WAITERS) != 0,
     290             :                                  pg_atomic_read_u32(&lock->nwaiters),
     291             :                                  (state & LW_FLAG_RELEASE_OK) != 0)));
     292             :     }
     293             : }
     294             : 
     295             : inline static void
     296             : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
     297             : {
     298             :     /* hide statement & context here, otherwise the log is just too verbose */
     299             :     if (Trace_lwlocks)
     300             :     {
     301             :         ereport(LOG,
     302             :                 (errhidestmt(true),
     303             :                  errhidecontext(true),
     304             :                  errmsg_internal("%s(%s %p): %s", where,
     305             :                                  T_NAME(lock), lock, msg)));
     306             :     }
     307             : }
     308             : 
     309             : #else                           /* not LOCK_DEBUG */
     310             : #define PRINT_LWDEBUG(a,b,c) ((void)0)
     311             : #define LOG_LWDEBUG(a,b,c) ((void)0)
     312             : #endif                          /* LOCK_DEBUG */
     313             : 
     314             : #ifdef LWLOCK_STATS
     315             : 
     316             : static void init_lwlock_stats(void);
     317             : static void print_lwlock_stats(int code, Datum arg);
     318             : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
     319             : 
     320             : static void
     321             : init_lwlock_stats(void)
     322             : {
     323             :     HASHCTL     ctl;
     324             :     static MemoryContext lwlock_stats_cxt = NULL;
     325             :     static bool exit_registered = false;
     326             : 
     327             :     if (lwlock_stats_cxt != NULL)
     328             :         MemoryContextDelete(lwlock_stats_cxt);
     329             : 
     330             :     /*
     331             :      * The LWLock stats will be updated within a critical section, which
     332             :      * requires allocating new hash entries. Allocations within a critical
     333             :      * section are normally not allowed because running out of memory would
     334             :      * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
     335             :      * turned on in production, so that's an acceptable risk. The hash entries
     336             :      * are small, so the risk of running out of memory is minimal in practice.
     337             :      */
     338             :     lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
     339             :                                              "LWLock stats",
     340             :                                              ALLOCSET_DEFAULT_SIZES);
     341             :     MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
     342             : 
     343             :     ctl.keysize = sizeof(lwlock_stats_key);
     344             :     ctl.entrysize = sizeof(lwlock_stats);
     345             :     ctl.hcxt = lwlock_stats_cxt;
     346             :     lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
     347             :                                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     348             :     if (!exit_registered)
     349             :     {
     350             :         on_shmem_exit(print_lwlock_stats, 0);
     351             :         exit_registered = true;
     352             :     }
     353             : }
     354             : 
     355             : static void
     356             : print_lwlock_stats(int code, Datum arg)
     357             : {
     358             :     HASH_SEQ_STATUS scan;
     359             :     lwlock_stats *lwstats;
     360             : 
     361             :     hash_seq_init(&scan, lwlock_stats_htab);
     362             : 
     363             :     /* Grab an LWLock to keep different backends from mixing reports */
     364             :     LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
     365             : 
     366             :     while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
     367             :     {
     368             :         fprintf(stderr,
     369             :                 "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
     370             :                 MyProcPid, GetLWTrancheName(lwstats->key.tranche),
     371             :                 lwstats->key.instance, lwstats->sh_acquire_count,
     372             :                 lwstats->ex_acquire_count, lwstats->block_count,
     373             :                 lwstats->spin_delay_count, lwstats->dequeue_self_count);
     374             :     }
     375             : 
     376             :     LWLockRelease(&MainLWLockArray[0].lock);
     377             : }
     378             : 
     379             : static lwlock_stats *
     380             : get_lwlock_stats_entry(LWLock *lock)
     381             : {
     382             :     lwlock_stats_key key;
     383             :     lwlock_stats *lwstats;
     384             :     bool        found;
     385             : 
     386             :     /*
     387             :      * During shared memory initialization, the hash table doesn't exist yet.
     388             :      * Stats of that phase aren't very interesting, so just collect operations
     389             :      * on all locks in a single dummy entry.
     390             :      */
     391             :     if (lwlock_stats_htab == NULL)
     392             :         return &lwlock_stats_dummy;
     393             : 
     394             :     /* Fetch or create the entry. */
     395             :     MemSet(&key, 0, sizeof(key));
     396             :     key.tranche = lock->tranche;
     397             :     key.instance = lock;
     398             :     lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
     399             :     if (!found)
     400             :     {
     401             :         lwstats->sh_acquire_count = 0;
     402             :         lwstats->ex_acquire_count = 0;
     403             :         lwstats->block_count = 0;
     404             :         lwstats->dequeue_self_count = 0;
     405             :         lwstats->spin_delay_count = 0;
     406             :     }
     407             :     return lwstats;
     408             : }
     409             : #endif                          /* LWLOCK_STATS */
     410             : 
     411             : 
     412             : /*
     413             :  * Compute number of LWLocks required by named tranches.  These will be
     414             :  * allocated in the main array.
     415             :  */
     416             : static int
     417        7858 : NumLWLocksForNamedTranches(void)
     418             : {
     419        7858 :     int         numLocks = 0;
     420             :     int         i;
     421             : 
     422        7914 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     423          56 :         numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     424             : 
     425        7858 :     return numLocks;
     426             : }
     427             : 
     428             : /*
     429             :  * Compute shmem space needed for LWLocks and named tranches.
     430             :  */
     431             : Size
     432        5826 : LWLockShmemSize(void)
     433             : {
     434             :     Size        size;
     435             :     int         i;
     436        5826 :     int         numLocks = NUM_FIXED_LWLOCKS;
     437             : 
     438             :     /* Calculate total number of locks needed in the main array. */
     439        5826 :     numLocks += NumLWLocksForNamedTranches();
     440             : 
     441             :     /* Space for the LWLock array. */
     442        5826 :     size = mul_size(numLocks, sizeof(LWLockPadded));
     443             : 
     444             :     /* Space for dynamic allocation counter, plus room for alignment. */
     445        5826 :     size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
     446             : 
     447             :     /* space for named tranches. */
     448        5826 :     size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
     449             : 
     450             :     /* space for name of each tranche. */
     451        5868 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     452          42 :         size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
     453             : 
     454        5826 :     return size;
     455             : }
     456             : 
     457             : /*
     458             :  * Allocate shmem space for the main LWLock array and all tranches and
     459             :  * initialize it.  We also register extension LWLock tranches here.
     460             :  */
     461             : void
     462        2032 : CreateLWLocks(void)
     463             : {
     464        2032 :     if (!IsUnderPostmaster)
     465             :     {
     466        2032 :         Size        spaceLocks = LWLockShmemSize();
     467             :         int        *LWLockCounter;
     468             :         char       *ptr;
     469             : 
     470             :         /* Allocate space */
     471        2032 :         ptr = (char *) ShmemAlloc(spaceLocks);
     472             : 
     473             :         /* Leave room for dynamic allocation of tranches */
     474        2032 :         ptr += sizeof(int);
     475             : 
     476             :         /* Ensure desired alignment of LWLock array */
     477        2032 :         ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
     478             : 
     479        2032 :         MainLWLockArray = (LWLockPadded *) ptr;
     480             : 
     481             :         /*
     482             :          * Initialize the dynamic-allocation counter for tranches, which is
     483             :          * stored just before the first LWLock.
     484             :          */
     485        2032 :         LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     486        2032 :         *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
     487             : 
     488             :         /* Initialize all LWLocks */
     489        2032 :         InitializeLWLocks();
     490             :     }
     491             : 
     492             :     /* Register named extension LWLock tranches in the current process. */
     493        2046 :     for (int i = 0; i < NamedLWLockTrancheRequests; i++)
     494          14 :         LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
     495          14 :                               NamedLWLockTrancheArray[i].trancheName);
     496        2032 : }
     497             : 
     498             : /*
     499             :  * Initialize LWLocks that are fixed and those belonging to named tranches.
     500             :  */
     501             : static void
     502        2032 : InitializeLWLocks(void)
     503             : {
     504        2032 :     int         numNamedLocks = NumLWLocksForNamedTranches();
     505             :     int         id;
     506             :     int         i;
     507             :     int         j;
     508             :     LWLockPadded *lock;
     509             : 
     510             :     /* Initialize all individual LWLocks in main array */
     511      111760 :     for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
     512      109728 :         LWLockInitialize(&lock->lock, id);
     513             : 
     514             :     /* Initialize buffer mapping LWLocks in main array */
     515        2032 :     lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
     516      262128 :     for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
     517      260096 :         LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
     518             : 
     519             :     /* Initialize lmgrs' LWLocks in main array */
     520        2032 :     lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
     521       34544 :     for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
     522       32512 :         LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
     523             : 
     524             :     /* Initialize predicate lmgrs' LWLocks in main array */
     525        2032 :     lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
     526       34544 :     for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
     527       32512 :         LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
     528             : 
     529             :     /*
     530             :      * Copy the info about any named tranches into shared memory (so that
     531             :      * other processes can see it), and initialize the requested LWLocks.
     532             :      */
     533        2032 :     if (NamedLWLockTrancheRequests > 0)
     534             :     {
     535             :         char       *trancheNames;
     536             : 
     537          14 :         NamedLWLockTrancheArray = (NamedLWLockTranche *)
     538          14 :             &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
     539             : 
     540          14 :         trancheNames = (char *) NamedLWLockTrancheArray +
     541          14 :             (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
     542          14 :         lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
     543             : 
     544          28 :         for (i = 0; i < NamedLWLockTrancheRequests; i++)
     545             :         {
     546             :             NamedLWLockTrancheRequest *request;
     547             :             NamedLWLockTranche *tranche;
     548             :             char       *name;
     549             : 
     550          14 :             request = &NamedLWLockTrancheRequestArray[i];
     551          14 :             tranche = &NamedLWLockTrancheArray[i];
     552             : 
     553          14 :             name = trancheNames;
     554          14 :             trancheNames += strlen(request->tranche_name) + 1;
     555          14 :             strcpy(name, request->tranche_name);
     556          14 :             tranche->trancheId = LWLockNewTrancheId();
     557          14 :             tranche->trancheName = name;
     558             : 
     559          28 :             for (j = 0; j < request->num_lwlocks; j++, lock++)
     560          14 :                 LWLockInitialize(&lock->lock, tranche->trancheId);
     561             :         }
     562             :     }
     563        2032 : }
     564             : 
     565             : /*
     566             :  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
     567             :  */
     568             : void
     569       42318 : InitLWLockAccess(void)
     570             : {
     571             : #ifdef LWLOCK_STATS
     572             :     init_lwlock_stats();
     573             : #endif
     574       42318 : }
     575             : 
     576             : /*
     577             :  * GetNamedLWLockTranche - returns the base address of LWLock from the
     578             :  *      specified tranche.
     579             :  *
     580             :  * Caller needs to retrieve the requested number of LWLocks starting from
     581             :  * the base lock address returned by this API.  This can be used for
     582             :  * tranches that are requested by using RequestNamedLWLockTranche() API.
     583             :  */
     584             : LWLockPadded *
     585          14 : GetNamedLWLockTranche(const char *tranche_name)
     586             : {
     587             :     int         lock_pos;
     588             :     int         i;
     589             : 
     590             :     /*
     591             :      * Obtain the position of base address of LWLock belonging to requested
     592             :      * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed
     593             :      * in MainLWLockArray after fixed locks.
     594             :      */
     595          14 :     lock_pos = NUM_FIXED_LWLOCKS;
     596          14 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     597             :     {
     598          14 :         if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
     599             :                    tranche_name) == 0)
     600          14 :             return &MainLWLockArray[lock_pos];
     601             : 
     602           0 :         lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     603             :     }
     604             : 
     605           0 :     elog(ERROR, "requested tranche is not registered");
     606             : 
     607             :     /* just to keep compiler quiet */
     608             :     return NULL;
     609             : }
     610             : 
     611             : /*
     612             :  * Allocate a new tranche ID.
     613             :  */
     614             : int
     615          34 : LWLockNewTrancheId(void)
     616             : {
     617             :     int         result;
     618             :     int        *LWLockCounter;
     619             : 
     620          34 :     LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     621             :     /* We use the ShmemLock spinlock to protect LWLockCounter */
     622          34 :     SpinLockAcquire(ShmemLock);
     623          34 :     result = (*LWLockCounter)++;
     624          34 :     SpinLockRelease(ShmemLock);
     625             : 
     626          34 :     return result;
     627             : }
     628             : 
     629             : /*
     630             :  * Register a dynamic tranche name in the lookup table of the current process.
     631             :  *
     632             :  * This routine will save a pointer to the tranche name passed as an argument,
     633             :  * so the name should be allocated in a backend-lifetime context
     634             :  * (shared memory, TopMemoryContext, static constant, or similar).
     635             :  *
     636             :  * The tranche name will be user-visible as a wait event name, so try to
     637             :  * use a name that fits the style for those.
     638             :  */
     639             : void
     640          40 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
     641             : {
     642             :     /* This should only be called for user-defined tranches. */
     643          40 :     if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
     644           0 :         return;
     645             : 
     646             :     /* Convert to array index. */
     647          40 :     tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
     648             : 
     649             :     /* If necessary, create or enlarge array. */
     650          40 :     if (tranche_id >= LWLockTrancheNamesAllocated)
     651             :     {
     652             :         int         newalloc;
     653             : 
     654          34 :         newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
     655             : 
     656          34 :         if (LWLockTrancheNames == NULL)
     657          34 :             LWLockTrancheNames = (const char **)
     658          34 :                 MemoryContextAllocZero(TopMemoryContext,
     659             :                                        newalloc * sizeof(char *));
     660             :         else
     661           0 :             LWLockTrancheNames =
     662           0 :                 repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
     663          34 :         LWLockTrancheNamesAllocated = newalloc;
     664             :     }
     665             : 
     666          40 :     LWLockTrancheNames[tranche_id] = tranche_name;
     667             : }
     668             : 
     669             : /*
     670             :  * RequestNamedLWLockTranche
     671             :  *      Request that extra LWLocks be allocated during postmaster
     672             :  *      startup.
     673             :  *
     674             :  * This may only be called via the shmem_request_hook of a library that is
     675             :  * loaded into the postmaster via shared_preload_libraries.  Calls from
     676             :  * elsewhere will fail.
     677             :  *
     678             :  * The tranche name will be user-visible as a wait event name, so try to
     679             :  * use a name that fits the style for those.
     680             :  */
     681             : void
     682          14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
     683             : {
     684             :     NamedLWLockTrancheRequest *request;
     685             : 
     686          14 :     if (!process_shmem_requests_in_progress)
     687           0 :         elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
     688             : 
     689          14 :     if (NamedLWLockTrancheRequestArray == NULL)
     690             :     {
     691          14 :         NamedLWLockTrancheRequestsAllocated = 16;
     692          14 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     693          14 :             MemoryContextAlloc(TopMemoryContext,
     694             :                                NamedLWLockTrancheRequestsAllocated
     695             :                                * sizeof(NamedLWLockTrancheRequest));
     696             :     }
     697             : 
     698          14 :     if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
     699             :     {
     700           0 :         int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
     701             : 
     702           0 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     703           0 :             repalloc(NamedLWLockTrancheRequestArray,
     704             :                      i * sizeof(NamedLWLockTrancheRequest));
     705           0 :         NamedLWLockTrancheRequestsAllocated = i;
     706             :     }
     707             : 
     708          14 :     request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
     709             :     Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
     710          14 :     strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
     711          14 :     request->num_lwlocks = num_lwlocks;
     712          14 :     NamedLWLockTrancheRequests++;
     713          14 : }
     714             : 
     715             : /*
     716             :  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
     717             :  */
     718             : void
     719    23438908 : LWLockInitialize(LWLock *lock, int tranche_id)
     720             : {
     721    23438908 :     pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
     722             : #ifdef LOCK_DEBUG
     723             :     pg_atomic_init_u32(&lock->nwaiters, 0);
     724             : #endif
     725    23438908 :     lock->tranche = tranche_id;
     726    23438908 :     proclist_init(&lock->waiters);
     727    23438908 : }
     728             : 
     729             : /*
     730             :  * Report start of wait event for light-weight locks.
     731             :  *
     732             :  * This function will be used by all the light-weight lock calls which
     733             :  * needs to wait to acquire the lock.  This function distinguishes wait
     734             :  * event based on tranche and lock id.
     735             :  */
     736             : static inline void
     737     4424112 : LWLockReportWaitStart(LWLock *lock)
     738             : {
     739     4424112 :     pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
     740     4424112 : }
     741             : 
     742             : /*
     743             :  * Report end of wait event for light-weight locks.
     744             :  */
     745             : static inline void
     746     4424112 : LWLockReportWaitEnd(void)
     747             : {
     748     4424112 :     pgstat_report_wait_end();
     749     4424112 : }
     750             : 
     751             : /*
     752             :  * Return the name of an LWLock tranche.
     753             :  */
     754             : static const char *
     755          58 : GetLWTrancheName(uint16 trancheId)
     756             : {
     757             :     /* Built-in tranche or individual LWLock? */
     758          58 :     if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
     759          58 :         return BuiltinTrancheNames[trancheId];
     760             : 
     761             :     /*
     762             :      * It's an extension tranche, so look in LWLockTrancheNames[].  However,
     763             :      * it's possible that the tranche has never been registered in the current
     764             :      * process, in which case give up and return "extension".
     765             :      */
     766           0 :     trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
     767             : 
     768           0 :     if (trancheId >= LWLockTrancheNamesAllocated ||
     769           0 :         LWLockTrancheNames[trancheId] == NULL)
     770           0 :         return "extension";
     771             : 
     772           0 :     return LWLockTrancheNames[trancheId];
     773             : }
     774             : 
     775             : /*
     776             :  * Return an identifier for an LWLock based on the wait class and event.
     777             :  */
     778             : const char *
     779          58 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
     780             : {
     781             :     Assert(classId == PG_WAIT_LWLOCK);
     782             :     /* The event IDs are just tranche numbers. */
     783          58 :     return GetLWTrancheName(eventId);
     784             : }
     785             : 
     786             : /*
     787             :  * Internal function that tries to atomically acquire the lwlock in the passed
     788             :  * in mode.
     789             :  *
     790             :  * This function will not block waiting for a lock to become free - that's the
     791             :  * caller's job.
     792             :  *
     793             :  * Returns true if the lock isn't free and we need to wait.
     794             :  */
     795             : static bool
     796   712671378 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
     797             : {
     798             :     uint32      old_state;
     799             : 
     800             :     Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
     801             : 
     802             :     /*
     803             :      * Read once outside the loop, later iterations will get the newer value
     804             :      * via compare & exchange.
     805             :      */
     806   712671378 :     old_state = pg_atomic_read_u32(&lock->state);
     807             : 
     808             :     /* loop until we've determined whether we could acquire the lock or not */
     809             :     while (true)
     810      524722 :     {
     811             :         uint32      desired_state;
     812             :         bool        lock_free;
     813             : 
     814   713196100 :         desired_state = old_state;
     815             : 
     816   713196100 :         if (mode == LW_EXCLUSIVE)
     817             :         {
     818   448145268 :             lock_free = (old_state & LW_LOCK_MASK) == 0;
     819   448145268 :             if (lock_free)
     820   444385480 :                 desired_state += LW_VAL_EXCLUSIVE;
     821             :         }
     822             :         else
     823             :         {
     824   265050832 :             lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
     825   265050832 :             if (lock_free)
     826   259762118 :                 desired_state += LW_VAL_SHARED;
     827             :         }
     828             : 
     829             :         /*
     830             :          * Attempt to swap in the state we are expecting. If we didn't see
     831             :          * lock to be free, that's just the old value. If we saw it as free,
     832             :          * we'll attempt to mark it acquired. The reason that we always swap
     833             :          * in the value is that this doubles as a memory barrier. We could try
     834             :          * to be smarter and only swap in values if we saw the lock as free,
     835             :          * but benchmark haven't shown it as beneficial so far.
     836             :          *
     837             :          * Retry if the value changed since we last looked at it.
     838             :          */
     839   713196100 :         if (pg_atomic_compare_exchange_u32(&lock->state,
     840             :                                            &old_state, desired_state))
     841             :         {
     842   712671378 :             if (lock_free)
     843             :             {
     844             :                 /* Great! Got the lock. */
     845             : #ifdef LOCK_DEBUG
     846             :                 if (mode == LW_EXCLUSIVE)
     847             :                     lock->owner = MyProc;
     848             : #endif
     849   703790038 :                 return false;
     850             :             }
     851             :             else
     852     8881340 :                 return true;    /* somebody else has the lock */
     853             :         }
     854             :     }
     855             :     pg_unreachable();
     856             : }
     857             : 
     858             : /*
     859             :  * Lock the LWLock's wait list against concurrent activity.
     860             :  *
     861             :  * NB: even though the wait list is locked, non-conflicting lock operations
     862             :  * may still happen concurrently.
     863             :  *
     864             :  * Time spent holding mutex should be short!
     865             :  */
     866             : static void
     867    15185748 : LWLockWaitListLock(LWLock *lock)
     868             : {
     869             :     uint32      old_state;
     870             : #ifdef LWLOCK_STATS
     871             :     lwlock_stats *lwstats;
     872             :     uint32      delays = 0;
     873             : 
     874             :     lwstats = get_lwlock_stats_entry(lock);
     875             : #endif
     876             : 
     877             :     while (true)
     878             :     {
     879             :         /* always try once to acquire lock directly */
     880    15185748 :         old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
     881    15185748 :         if (!(old_state & LW_FLAG_LOCKED))
     882    14993540 :             break;              /* got lock */
     883             : 
     884             :         /* and then spin without atomic operations until lock is released */
     885             :         {
     886             :             SpinDelayStatus delayStatus;
     887             : 
     888      192208 :             init_local_spin_delay(&delayStatus);
     889             : 
     890      639478 :             while (old_state & LW_FLAG_LOCKED)
     891             :             {
     892      447270 :                 perform_spin_delay(&delayStatus);
     893      447270 :                 old_state = pg_atomic_read_u32(&lock->state);
     894             :             }
     895             : #ifdef LWLOCK_STATS
     896             :             delays += delayStatus.delays;
     897             : #endif
     898      192208 :             finish_spin_delay(&delayStatus);
     899             :         }
     900             : 
     901             :         /*
     902             :          * Retry. The lock might obviously already be re-acquired by the time
     903             :          * we're attempting to get it again.
     904             :          */
     905             :     }
     906             : 
     907             : #ifdef LWLOCK_STATS
     908             :     lwstats->spin_delay_count += delays;
     909             : #endif
     910    14993540 : }
     911             : 
     912             : /*
     913             :  * Unlock the LWLock's wait list.
     914             :  *
     915             :  * Note that it can be more efficient to manipulate flags and release the
     916             :  * locks in a single atomic operation.
     917             :  */
     918             : static void
     919     9794082 : LWLockWaitListUnlock(LWLock *lock)
     920             : {
     921             :     uint32      old_state PG_USED_FOR_ASSERTS_ONLY;
     922             : 
     923     9794082 :     old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
     924             : 
     925             :     Assert(old_state & LW_FLAG_LOCKED);
     926     9794082 : }
     927             : 
     928             : /*
     929             :  * Wakeup all the lockers that currently have a chance to acquire the lock.
     930             :  */
     931             : static void
     932     5199458 : LWLockWakeup(LWLock *lock)
     933             : {
     934             :     bool        new_release_ok;
     935     5199458 :     bool        wokeup_somebody = false;
     936             :     proclist_head wakeup;
     937             :     proclist_mutable_iter iter;
     938             : 
     939     5199458 :     proclist_init(&wakeup);
     940             : 
     941     5199458 :     new_release_ok = true;
     942             : 
     943             :     /* lock wait list while collecting backends to wake up */
     944     5199458 :     LWLockWaitListLock(lock);
     945             : 
     946     7919134 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
     947             :     {
     948     4481156 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     949             : 
     950     4481156 :         if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
     951        2828 :             continue;
     952             : 
     953     4478328 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
     954     4478328 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
     955             : 
     956     4478328 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
     957             :         {
     958             :             /*
     959             :              * Prevent additional wakeups until retryer gets to run. Backends
     960             :              * that are just waiting for the lock to become free don't retry
     961             :              * automatically.
     962             :              */
     963     4370744 :             new_release_ok = false;
     964             : 
     965             :             /*
     966             :              * Don't wakeup (further) exclusive locks.
     967             :              */
     968     4370744 :             wokeup_somebody = true;
     969             :         }
     970             : 
     971             :         /*
     972             :          * Signal that the process isn't on the wait list anymore. This allows
     973             :          * LWLockDequeueSelf() to remove itself of the waitlist with a
     974             :          * proclist_delete(), rather than having to check if it has been
     975             :          * removed from the list.
     976             :          */
     977             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
     978     4478328 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
     979             : 
     980             :         /*
     981             :          * Once we've woken up an exclusive lock, there's no point in waking
     982             :          * up anybody else.
     983             :          */
     984     4478328 :         if (waiter->lwWaitMode == LW_EXCLUSIVE)
     985     1761480 :             break;
     986             :     }
     987             : 
     988             :     Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
     989             : 
     990             :     /* unset required flags, and release lock, in one fell swoop */
     991             :     {
     992             :         uint32      old_state;
     993             :         uint32      desired_state;
     994             : 
     995     5199458 :         old_state = pg_atomic_read_u32(&lock->state);
     996             :         while (true)
     997             :         {
     998     5256820 :             desired_state = old_state;
     999             : 
    1000             :             /* compute desired flags */
    1001             : 
    1002     5256820 :             if (new_release_ok)
    1003      885444 :                 desired_state |= LW_FLAG_RELEASE_OK;
    1004             :             else
    1005     4371376 :                 desired_state &= ~LW_FLAG_RELEASE_OK;
    1006             : 
    1007     5256820 :             if (proclist_is_empty(&wakeup))
    1008      840444 :                 desired_state &= ~LW_FLAG_HAS_WAITERS;
    1009             : 
    1010     5256820 :             desired_state &= ~LW_FLAG_LOCKED;   /* release lock */
    1011             : 
    1012     5256820 :             if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
    1013             :                                                desired_state))
    1014     5199458 :                 break;
    1015             :         }
    1016             :     }
    1017             : 
    1018             :     /* Awaken any waiters I removed from the queue. */
    1019     9677786 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1020             :     {
    1021     4478328 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1022             : 
    1023             :         LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
    1024     4478328 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1025             : 
    1026             :         /*
    1027             :          * Guarantee that lwWaiting being unset only becomes visible once the
    1028             :          * unlink from the link has completed. Otherwise the target backend
    1029             :          * could be woken up for other reason and enqueue for a new lock - if
    1030             :          * that happens before the list unlink happens, the list would end up
    1031             :          * being corrupted.
    1032             :          *
    1033             :          * The barrier pairs with the LWLockWaitListLock() when enqueuing for
    1034             :          * another lock.
    1035             :          */
    1036     4478328 :         pg_write_barrier();
    1037     4478328 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1038     4478328 :         PGSemaphoreUnlock(waiter->sem);
    1039             :     }
    1040     5199458 : }
    1041             : 
    1042             : /*
    1043             :  * Add ourselves to the end of the queue.
    1044             :  *
    1045             :  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
    1046             :  */
    1047             : static void
    1048     4660644 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
    1049             : {
    1050             :     /*
    1051             :      * If we don't have a PGPROC structure, there's no way to wait. This
    1052             :      * should never occur, since MyProc should only be null during shared
    1053             :      * memory initialization.
    1054             :      */
    1055     4660644 :     if (MyProc == NULL)
    1056           0 :         elog(PANIC, "cannot wait without a PGPROC structure");
    1057             : 
    1058     4660644 :     if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
    1059           0 :         elog(PANIC, "queueing for lock while waiting on another one");
    1060             : 
    1061     4660644 :     LWLockWaitListLock(lock);
    1062             : 
    1063             :     /* setting the flag is protected by the spinlock */
    1064     4660644 :     pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
    1065             : 
    1066     4660644 :     MyProc->lwWaiting = LW_WS_WAITING;
    1067     4660644 :     MyProc->lwWaitMode = mode;
    1068             : 
    1069             :     /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
    1070     4660644 :     if (mode == LW_WAIT_UNTIL_FREE)
    1071      115296 :         proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
    1072             :     else
    1073     4545348 :         proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
    1074             : 
    1075             :     /* Can release the mutex now */
    1076     4660644 :     LWLockWaitListUnlock(lock);
    1077             : 
    1078             : #ifdef LOCK_DEBUG
    1079             :     pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
    1080             : #endif
    1081     4660644 : }
    1082             : 
    1083             : /*
    1084             :  * Remove ourselves from the waitlist.
    1085             :  *
    1086             :  * This is used if we queued ourselves because we thought we needed to sleep
    1087             :  * but, after further checking, we discovered that we don't actually need to
    1088             :  * do so.
    1089             :  */
    1090             : static void
    1091      236532 : LWLockDequeueSelf(LWLock *lock)
    1092             : {
    1093             :     bool        on_waitlist;
    1094             : 
    1095             : #ifdef LWLOCK_STATS
    1096             :     lwlock_stats *lwstats;
    1097             : 
    1098             :     lwstats = get_lwlock_stats_entry(lock);
    1099             : 
    1100             :     lwstats->dequeue_self_count++;
    1101             : #endif
    1102             : 
    1103      236532 :     LWLockWaitListLock(lock);
    1104             : 
    1105             :     /*
    1106             :      * Remove ourselves from the waitlist, unless we've already been removed.
    1107             :      * The removal happens with the wait list lock held, so there's no race in
    1108             :      * this check.
    1109             :      */
    1110      236532 :     on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
    1111      236532 :     if (on_waitlist)
    1112      179396 :         proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
    1113             : 
    1114      236532 :     if (proclist_is_empty(&lock->waiters) &&
    1115      220940 :         (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
    1116             :     {
    1117      220792 :         pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
    1118             :     }
    1119             : 
    1120             :     /* XXX: combine with fetch_and above? */
    1121      236532 :     LWLockWaitListUnlock(lock);
    1122             : 
    1123             :     /* clear waiting state again, nice for debugging */
    1124      236532 :     if (on_waitlist)
    1125      179396 :         MyProc->lwWaiting = LW_WS_NOT_WAITING;
    1126             :     else
    1127             :     {
    1128       57136 :         int         extraWaits = 0;
    1129             : 
    1130             :         /*
    1131             :          * Somebody else dequeued us and has or will wake us up. Deal with the
    1132             :          * superfluous absorption of a wakeup.
    1133             :          */
    1134             : 
    1135             :         /*
    1136             :          * Reset RELEASE_OK flag if somebody woke us before we removed
    1137             :          * ourselves - they'll have set it to false.
    1138             :          */
    1139       57136 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1140             : 
    1141             :         /*
    1142             :          * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
    1143             :          * get reset at some inconvenient point later. Most of the time this
    1144             :          * will immediately return.
    1145             :          */
    1146             :         for (;;)
    1147             :         {
    1148       57136 :             PGSemaphoreLock(MyProc->sem);
    1149       57136 :             if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
    1150       57136 :                 break;
    1151           0 :             extraWaits++;
    1152             :         }
    1153             : 
    1154             :         /*
    1155             :          * Fix the process wait semaphore's count for any absorbed wakeups.
    1156             :          */
    1157       57136 :         while (extraWaits-- > 0)
    1158           0 :             PGSemaphoreUnlock(MyProc->sem);
    1159             :     }
    1160             : 
    1161             : #ifdef LOCK_DEBUG
    1162             :     {
    1163             :         /* not waiting anymore */
    1164             :         uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1165             : 
    1166             :         Assert(nwaiters < MAX_BACKENDS);
    1167             :     }
    1168             : #endif
    1169      236532 : }
    1170             : 
    1171             : /*
    1172             :  * LWLockAcquire - acquire a lightweight lock in the specified mode
    1173             :  *
    1174             :  * If the lock is not available, sleep until it is.  Returns true if the lock
    1175             :  * was available immediately, false if we had to sleep.
    1176             :  *
    1177             :  * Side effect: cancel/die interrupts are held off until lock release.
    1178             :  */
    1179             : bool
    1180   699251824 : LWLockAcquire(LWLock *lock, LWLockMode mode)
    1181             : {
    1182   699251824 :     PGPROC     *proc = MyProc;
    1183   699251824 :     bool        result = true;
    1184   699251824 :     int         extraWaits = 0;
    1185             : #ifdef LWLOCK_STATS
    1186             :     lwlock_stats *lwstats;
    1187             : 
    1188             :     lwstats = get_lwlock_stats_entry(lock);
    1189             : #endif
    1190             : 
    1191             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1192             : 
    1193             :     PRINT_LWDEBUG("LWLockAcquire", lock, mode);
    1194             : 
    1195             : #ifdef LWLOCK_STATS
    1196             :     /* Count lock acquisition attempts */
    1197             :     if (mode == LW_EXCLUSIVE)
    1198             :         lwstats->ex_acquire_count++;
    1199             :     else
    1200             :         lwstats->sh_acquire_count++;
    1201             : #endif                          /* LWLOCK_STATS */
    1202             : 
    1203             :     /*
    1204             :      * We can't wait if we haven't got a PGPROC.  This should only occur
    1205             :      * during bootstrap or shared memory initialization.  Put an Assert here
    1206             :      * to catch unsafe coding practices.
    1207             :      */
    1208             :     Assert(!(proc == NULL && IsUnderPostmaster));
    1209             : 
    1210             :     /* Ensure we will have room to remember the lock */
    1211   699251824 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1212           0 :         elog(ERROR, "too many LWLocks taken");
    1213             : 
    1214             :     /*
    1215             :      * Lock out cancel/die interrupts until we exit the code section protected
    1216             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1217             :      * manipulations of data structures in shared memory.
    1218             :      */
    1219   699251824 :     HOLD_INTERRUPTS();
    1220             : 
    1221             :     /*
    1222             :      * Loop here to try to acquire lock after each time we are signaled by
    1223             :      * LWLockRelease.
    1224             :      *
    1225             :      * NOTE: it might seem better to have LWLockRelease actually grant us the
    1226             :      * lock, rather than retrying and possibly having to go back to sleep. But
    1227             :      * in practice that is no good because it means a process swap for every
    1228             :      * lock acquisition when two or more processes are contending for the same
    1229             :      * lock.  Since LWLocks are normally used to protect not-very-long
    1230             :      * sections of computation, a process needs to be able to acquire and
    1231             :      * release the same lock many times during a single CPU time slice, even
    1232             :      * in the presence of contention.  The efficiency of being able to do that
    1233             :      * outweighs the inefficiency of sometimes wasting a process dispatch
    1234             :      * cycle because the lock is not free when a released waiter finally gets
    1235             :      * to run.  See pgsql-hackers archives for 29-Dec-01.
    1236             :      */
    1237             :     for (;;)
    1238     4314550 :     {
    1239             :         bool        mustwait;
    1240             : 
    1241             :         /*
    1242             :          * Try to grab the lock the first time, we're not in the waitqueue
    1243             :          * yet/anymore.
    1244             :          */
    1245   703566374 :         mustwait = LWLockAttemptLock(lock, mode);
    1246             : 
    1247   703566374 :         if (!mustwait)
    1248             :         {
    1249             :             LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
    1250   699021026 :             break;              /* got the lock */
    1251             :         }
    1252             : 
    1253             :         /*
    1254             :          * Ok, at this point we couldn't grab the lock on the first try. We
    1255             :          * cannot simply queue ourselves to the end of the list and wait to be
    1256             :          * woken up because by now the lock could long have been released.
    1257             :          * Instead add us to the queue and try to grab the lock again. If we
    1258             :          * succeed we need to revert the queuing and be happy, otherwise we
    1259             :          * recheck the lock. If we still couldn't grab it, we know that the
    1260             :          * other locker will see our queue entries when releasing since they
    1261             :          * existed before we checked for the lock.
    1262             :          */
    1263             : 
    1264             :         /* add to the queue */
    1265     4545348 :         LWLockQueueSelf(lock, mode);
    1266             : 
    1267             :         /* we're now guaranteed to be woken up if necessary */
    1268     4545348 :         mustwait = LWLockAttemptLock(lock, mode);
    1269             : 
    1270             :         /* ok, grabbed the lock the second time round, need to undo queueing */
    1271     4545348 :         if (!mustwait)
    1272             :         {
    1273             :             LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
    1274             : 
    1275      230798 :             LWLockDequeueSelf(lock);
    1276      230798 :             break;
    1277             :         }
    1278             : 
    1279             :         /*
    1280             :          * Wait until awakened.
    1281             :          *
    1282             :          * It is possible that we get awakened for a reason other than being
    1283             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1284             :          * we've gotten the LWLock, re-increment the sema by the number of
    1285             :          * additional signals received.
    1286             :          */
    1287             :         LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
    1288             : 
    1289             : #ifdef LWLOCK_STATS
    1290             :         lwstats->block_count++;
    1291             : #endif
    1292             : 
    1293     4314550 :         LWLockReportWaitStart(lock);
    1294             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1295             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1296             : 
    1297             :         for (;;)
    1298             :         {
    1299     4314550 :             PGSemaphoreLock(proc->sem);
    1300     4314550 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1301     4314550 :                 break;
    1302           0 :             extraWaits++;
    1303             :         }
    1304             : 
    1305             :         /* Retrying, allow LWLockRelease to release waiters again. */
    1306     4314550 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1307             : 
    1308             : #ifdef LOCK_DEBUG
    1309             :         {
    1310             :             /* not waiting anymore */
    1311             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1312             : 
    1313             :             Assert(nwaiters < MAX_BACKENDS);
    1314             :         }
    1315             : #endif
    1316             : 
    1317             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1318             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1319     4314550 :         LWLockReportWaitEnd();
    1320             : 
    1321             :         LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
    1322             : 
    1323             :         /* Now loop back and try to acquire lock again. */
    1324     4314550 :         result = false;
    1325             :     }
    1326             : 
    1327             :     if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
    1328             :         TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    1329             : 
    1330             :     /* Add lock to list of locks held by this backend */
    1331   699251824 :     held_lwlocks[num_held_lwlocks].lock = lock;
    1332   699251824 :     held_lwlocks[num_held_lwlocks++].mode = mode;
    1333             : 
    1334             :     /*
    1335             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1336             :      */
    1337   699251824 :     while (extraWaits-- > 0)
    1338           0 :         PGSemaphoreUnlock(proc->sem);
    1339             : 
    1340   699251824 :     return result;
    1341             : }
    1342             : 
    1343             : /*
    1344             :  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
    1345             :  *
    1346             :  * If the lock is not available, return false with no side-effects.
    1347             :  *
    1348             :  * If successful, cancel/die interrupts are held off until lock release.
    1349             :  */
    1350             : bool
    1351     4298104 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
    1352             : {
    1353             :     bool        mustwait;
    1354             : 
    1355             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1356             : 
    1357             :     PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
    1358             : 
    1359             :     /* Ensure we will have room to remember the lock */
    1360     4298104 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1361           0 :         elog(ERROR, "too many LWLocks taken");
    1362             : 
    1363             :     /*
    1364             :      * Lock out cancel/die interrupts until we exit the code section protected
    1365             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1366             :      * manipulations of data structures in shared memory.
    1367             :      */
    1368     4298104 :     HOLD_INTERRUPTS();
    1369             : 
    1370             :     /* Check for the lock */
    1371     4298104 :     mustwait = LWLockAttemptLock(lock, mode);
    1372             : 
    1373     4298104 :     if (mustwait)
    1374             :     {
    1375             :         /* Failed to get lock, so release interrupt holdoff */
    1376        1720 :         RESUME_INTERRUPTS();
    1377             : 
    1378             :         LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
    1379             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
    1380             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
    1381             :     }
    1382             :     else
    1383             :     {
    1384             :         /* Add lock to list of locks held by this backend */
    1385     4296384 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1386     4296384 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1387             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
    1388             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
    1389             :     }
    1390     4298104 :     return !mustwait;
    1391             : }
    1392             : 
    1393             : /*
    1394             :  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
    1395             :  *
    1396             :  * The semantics of this function are a bit funky.  If the lock is currently
    1397             :  * free, it is acquired in the given mode, and the function returns true.  If
    1398             :  * the lock isn't immediately free, the function waits until it is released
    1399             :  * and returns false, but does not acquire the lock.
    1400             :  *
    1401             :  * This is currently used for WALWriteLock: when a backend flushes the WAL,
    1402             :  * holding WALWriteLock, it can flush the commit records of many other
    1403             :  * backends as a side-effect.  Those other backends need to wait until the
    1404             :  * flush finishes, but don't need to acquire the lock anymore.  They can just
    1405             :  * wake up, observe that their records have already been flushed, and return.
    1406             :  */
    1407             : bool
    1408      251558 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
    1409             : {
    1410      251558 :     PGPROC     *proc = MyProc;
    1411             :     bool        mustwait;
    1412      251558 :     int         extraWaits = 0;
    1413             : #ifdef LWLOCK_STATS
    1414             :     lwlock_stats *lwstats;
    1415             : 
    1416             :     lwstats = get_lwlock_stats_entry(lock);
    1417             : #endif
    1418             : 
    1419             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1420             : 
    1421             :     PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
    1422             : 
    1423             :     /* Ensure we will have room to remember the lock */
    1424      251558 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1425           0 :         elog(ERROR, "too many LWLocks taken");
    1426             : 
    1427             :     /*
    1428             :      * Lock out cancel/die interrupts until we exit the code section protected
    1429             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1430             :      * manipulations of data structures in shared memory.
    1431             :      */
    1432      251558 :     HOLD_INTERRUPTS();
    1433             : 
    1434             :     /*
    1435             :      * NB: We're using nearly the same twice-in-a-row lock acquisition
    1436             :      * protocol as LWLockAcquire(). Check its comments for details.
    1437             :      */
    1438      251558 :     mustwait = LWLockAttemptLock(lock, mode);
    1439             : 
    1440      251558 :     if (mustwait)
    1441             :     {
    1442        9994 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1443             : 
    1444        9994 :         mustwait = LWLockAttemptLock(lock, mode);
    1445             : 
    1446        9994 :         if (mustwait)
    1447             :         {
    1448             :             /*
    1449             :              * Wait until awakened.  Like in LWLockAcquire, be prepared for
    1450             :              * bogus wakeups.
    1451             :              */
    1452             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
    1453             : 
    1454             : #ifdef LWLOCK_STATS
    1455             :             lwstats->block_count++;
    1456             : #endif
    1457             : 
    1458        9728 :             LWLockReportWaitStart(lock);
    1459             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1460             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1461             : 
    1462             :             for (;;)
    1463             :             {
    1464        9728 :                 PGSemaphoreLock(proc->sem);
    1465        9728 :                 if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1466        9728 :                     break;
    1467           0 :                 extraWaits++;
    1468             :             }
    1469             : 
    1470             : #ifdef LOCK_DEBUG
    1471             :             {
    1472             :                 /* not waiting anymore */
    1473             :                 uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1474             : 
    1475             :                 Assert(nwaiters < MAX_BACKENDS);
    1476             :             }
    1477             : #endif
    1478             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1479             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1480        9728 :             LWLockReportWaitEnd();
    1481             : 
    1482             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
    1483             :         }
    1484             :         else
    1485             :         {
    1486             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
    1487             : 
    1488             :             /*
    1489             :              * Got lock in the second attempt, undo queueing. We need to treat
    1490             :              * this as having successfully acquired the lock, otherwise we'd
    1491             :              * not necessarily wake up people we've prevented from acquiring
    1492             :              * the lock.
    1493             :              */
    1494         266 :             LWLockDequeueSelf(lock);
    1495             :         }
    1496             :     }
    1497             : 
    1498             :     /*
    1499             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1500             :      */
    1501      251558 :     while (extraWaits-- > 0)
    1502           0 :         PGSemaphoreUnlock(proc->sem);
    1503             : 
    1504      251558 :     if (mustwait)
    1505             :     {
    1506             :         /* Failed to get lock, so release interrupt holdoff */
    1507        9728 :         RESUME_INTERRUPTS();
    1508             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
    1509             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
    1510             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
    1511             :     }
    1512             :     else
    1513             :     {
    1514             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
    1515             :         /* Add lock to list of locks held by this backend */
    1516      241830 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1517      241830 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1518             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
    1519             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
    1520             :     }
    1521             : 
    1522      251558 :     return !mustwait;
    1523             : }
    1524             : 
    1525             : /*
    1526             :  * Does the lwlock in its current state need to wait for the variable value to
    1527             :  * change?
    1528             :  *
    1529             :  * If we don't need to wait, and it's because the value of the variable has
    1530             :  * changed, store the current value in newval.
    1531             :  *
    1532             :  * *result is set to true if the lock was free, and false otherwise.
    1533             :  */
    1534             : static bool
    1535     6627634 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1536             :                        uint64 *newval, bool *result)
    1537             : {
    1538             :     bool        mustwait;
    1539             :     uint64      value;
    1540             : 
    1541             :     /*
    1542             :      * Test first to see if it the slot is free right now.
    1543             :      *
    1544             :      * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
    1545             :      * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
    1546             :      * this, so we don't need a memory barrier here as far as the current
    1547             :      * usage is concerned.  But that might not be safe in general.
    1548             :      */
    1549     6627634 :     mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
    1550             : 
    1551     6627634 :     if (!mustwait)
    1552             :     {
    1553     4685650 :         *result = true;
    1554     4685650 :         return false;
    1555             :     }
    1556             : 
    1557     1941984 :     *result = false;
    1558             : 
    1559             :     /*
    1560             :      * Reading this value atomically is safe even on platforms where uint64
    1561             :      * cannot be read without observing a torn value.
    1562             :      */
    1563     1941984 :     value = pg_atomic_read_u64(valptr);
    1564             : 
    1565     1941984 :     if (value != oldval)
    1566             :     {
    1567     1736848 :         mustwait = false;
    1568     1736848 :         *newval = value;
    1569             :     }
    1570             :     else
    1571             :     {
    1572      205136 :         mustwait = true;
    1573             :     }
    1574             : 
    1575     1941984 :     return mustwait;
    1576             : }
    1577             : 
    1578             : /*
    1579             :  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
    1580             :  *
    1581             :  * If the lock is held and *valptr equals oldval, waits until the lock is
    1582             :  * either freed, or the lock holder updates *valptr by calling
    1583             :  * LWLockUpdateVar.  If the lock is free on exit (immediately or after
    1584             :  * waiting), returns true.  If the lock is still held, but *valptr no longer
    1585             :  * matches oldval, returns false and sets *newval to the current value in
    1586             :  * *valptr.
    1587             :  *
    1588             :  * Note: this function ignores shared lock holders; if the lock is held
    1589             :  * in shared mode, returns 'true'.
    1590             :  *
    1591             :  * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
    1592             :  * hence the caller of this function may want to rely on an explicit barrier or
    1593             :  * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
    1594             :  */
    1595             : bool
    1596     6422498 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1597             :                  uint64 *newval)
    1598             : {
    1599     6422498 :     PGPROC     *proc = MyProc;
    1600     6422498 :     int         extraWaits = 0;
    1601     6422498 :     bool        result = false;
    1602             : #ifdef LWLOCK_STATS
    1603             :     lwlock_stats *lwstats;
    1604             : 
    1605             :     lwstats = get_lwlock_stats_entry(lock);
    1606             : #endif
    1607             : 
    1608             :     PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
    1609             : 
    1610             :     /*
    1611             :      * Lock out cancel/die interrupts while we sleep on the lock.  There is no
    1612             :      * cleanup mechanism to remove us from the wait queue if we got
    1613             :      * interrupted.
    1614             :      */
    1615     6422498 :     HOLD_INTERRUPTS();
    1616             : 
    1617             :     /*
    1618             :      * Loop here to check the lock's status after each time we are signaled.
    1619             :      */
    1620             :     for (;;)
    1621       99834 :     {
    1622             :         bool        mustwait;
    1623             : 
    1624     6522332 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1625             :                                           &result);
    1626             : 
    1627     6522332 :         if (!mustwait)
    1628     6417030 :             break;              /* the lock was free or value didn't match */
    1629             : 
    1630             :         /*
    1631             :          * Add myself to wait queue. Note that this is racy, somebody else
    1632             :          * could wakeup before we're finished queuing. NB: We're using nearly
    1633             :          * the same twice-in-a-row lock acquisition protocol as
    1634             :          * LWLockAcquire(). Check its comments for details. The only
    1635             :          * difference is that we also have to check the variable's values when
    1636             :          * checking the state of the lock.
    1637             :          */
    1638      105302 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1639             : 
    1640             :         /*
    1641             :          * Set RELEASE_OK flag, to make sure we get woken up as soon as the
    1642             :          * lock is released.
    1643             :          */
    1644      105302 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1645             : 
    1646             :         /*
    1647             :          * We're now guaranteed to be woken up if necessary. Recheck the lock
    1648             :          * and variables state.
    1649             :          */
    1650      105302 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1651             :                                           &result);
    1652             : 
    1653             :         /* Ok, no conflict after we queued ourselves. Undo queueing. */
    1654      105302 :         if (!mustwait)
    1655             :         {
    1656             :             LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
    1657             : 
    1658        5468 :             LWLockDequeueSelf(lock);
    1659        5468 :             break;
    1660             :         }
    1661             : 
    1662             :         /*
    1663             :          * Wait until awakened.
    1664             :          *
    1665             :          * It is possible that we get awakened for a reason other than being
    1666             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1667             :          * we've gotten the LWLock, re-increment the sema by the number of
    1668             :          * additional signals received.
    1669             :          */
    1670             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
    1671             : 
    1672             : #ifdef LWLOCK_STATS
    1673             :         lwstats->block_count++;
    1674             : #endif
    1675             : 
    1676       99834 :         LWLockReportWaitStart(lock);
    1677             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1678             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
    1679             : 
    1680             :         for (;;)
    1681             :         {
    1682       99834 :             PGSemaphoreLock(proc->sem);
    1683       99834 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1684       99834 :                 break;
    1685           0 :             extraWaits++;
    1686             :         }
    1687             : 
    1688             : #ifdef LOCK_DEBUG
    1689             :         {
    1690             :             /* not waiting anymore */
    1691             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1692             : 
    1693             :             Assert(nwaiters < MAX_BACKENDS);
    1694             :         }
    1695             : #endif
    1696             : 
    1697             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1698             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
    1699       99834 :         LWLockReportWaitEnd();
    1700             : 
    1701             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
    1702             : 
    1703             :         /* Now loop back and check the status of the lock again. */
    1704             :     }
    1705             : 
    1706             :     /*
    1707             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1708             :      */
    1709     6422498 :     while (extraWaits-- > 0)
    1710           0 :         PGSemaphoreUnlock(proc->sem);
    1711             : 
    1712             :     /*
    1713             :      * Now okay to allow cancel/die interrupts.
    1714             :      */
    1715     6422498 :     RESUME_INTERRUPTS();
    1716             : 
    1717     6422498 :     return result;
    1718             : }
    1719             : 
    1720             : 
    1721             : /*
    1722             :  * LWLockUpdateVar - Update a variable and wake up waiters atomically
    1723             :  *
    1724             :  * Sets *valptr to 'val', and wakes up all processes waiting for us with
    1725             :  * LWLockWaitForVar().  It first sets the value atomically and then wakes up
    1726             :  * waiting processes so that any process calling LWLockWaitForVar() on the same
    1727             :  * lock is guaranteed to see the new value, and act accordingly.
    1728             :  *
    1729             :  * The caller must be holding the lock in exclusive mode.
    1730             :  */
    1731             : void
    1732     4896906 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1733             : {
    1734             :     proclist_head wakeup;
    1735             :     proclist_mutable_iter iter;
    1736             : 
    1737             :     PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
    1738             : 
    1739             :     /*
    1740             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1741             :      * that the variable is updated before waking up waiters.
    1742             :      */
    1743     4896906 :     pg_atomic_exchange_u64(valptr, val);
    1744             : 
    1745     4896906 :     proclist_init(&wakeup);
    1746             : 
    1747     4896906 :     LWLockWaitListLock(lock);
    1748             : 
    1749             :     Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
    1750             : 
    1751             :     /*
    1752             :      * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
    1753             :      * up. They are always in the front of the queue.
    1754             :      */
    1755     4900618 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1756             :     {
    1757       93306 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1758             : 
    1759       93306 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
    1760       89594 :             break;
    1761             : 
    1762        3712 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1763        3712 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
    1764             : 
    1765             :         /* see LWLockWakeup() */
    1766             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
    1767        3712 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
    1768             :     }
    1769             : 
    1770             :     /* We are done updating shared state of the lock itself. */
    1771     4896906 :     LWLockWaitListUnlock(lock);
    1772             : 
    1773             :     /*
    1774             :      * Awaken any waiters I removed from the queue.
    1775             :      */
    1776     4900618 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1777             :     {
    1778        3712 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1779             : 
    1780        3712 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1781             :         /* check comment in LWLockWakeup() about this barrier */
    1782        3712 :         pg_write_barrier();
    1783        3712 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1784        3712 :         PGSemaphoreUnlock(waiter->sem);
    1785             :     }
    1786     4896906 : }
    1787             : 
    1788             : 
    1789             : /*
    1790             :  * Stop treating lock as held by current backend.
    1791             :  *
    1792             :  * This is the code that can be shared between actually releasing a lock
    1793             :  * (LWLockRelease()) and just not tracking ownership of the lock anymore
    1794             :  * without releasing the lock (LWLockDisown()).
    1795             :  *
    1796             :  * Returns the mode in which the lock was held by the current backend.
    1797             :  *
    1798             :  * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
    1799             :  * of the caller.
    1800             :  *
    1801             :  * NB: This will leave lock->owner pointing to the current backend (if
    1802             :  * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
    1803             :  * debug cases of missing wakeups during lock release.
    1804             :  */
    1805             : static inline LWLockMode
    1806   703790038 : LWLockDisownInternal(LWLock *lock)
    1807             : {
    1808             :     LWLockMode  mode;
    1809             :     int         i;
    1810             : 
    1811             :     /*
    1812             :      * Remove lock from list of locks held.  Usually, but not always, it will
    1813             :      * be the latest-acquired lock; so search array backwards.
    1814             :      */
    1815   782831686 :     for (i = num_held_lwlocks; --i >= 0;)
    1816   782831686 :         if (lock == held_lwlocks[i].lock)
    1817   703790038 :             break;
    1818             : 
    1819   703790038 :     if (i < 0)
    1820           0 :         elog(ERROR, "lock %s is not held", T_NAME(lock));
    1821             : 
    1822   703790038 :     mode = held_lwlocks[i].mode;
    1823             : 
    1824   703790038 :     num_held_lwlocks--;
    1825   782831686 :     for (; i < num_held_lwlocks; i++)
    1826    79041648 :         held_lwlocks[i] = held_lwlocks[i + 1];
    1827             : 
    1828   703790038 :     return mode;
    1829             : }
    1830             : 
    1831             : /*
    1832             :  * Helper function to release lock, shared between LWLockRelease() and
    1833             :  * LWLockeleaseDisowned().
    1834             :  */
    1835             : static void
    1836   703790038 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
    1837             : {
    1838             :     uint32      oldstate;
    1839             :     bool        check_waiters;
    1840             : 
    1841             :     /*
    1842             :      * Release my hold on lock, after that it can immediately be acquired by
    1843             :      * others, even if we still have to wakeup other waiters.
    1844             :      */
    1845   703790038 :     if (mode == LW_EXCLUSIVE)
    1846   444177476 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    1847             :     else
    1848   259612562 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
    1849             : 
    1850             :     /* nobody else can have that kind of lock */
    1851             :     Assert(!(oldstate & LW_VAL_EXCLUSIVE));
    1852             : 
    1853             :     if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
    1854             :         TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
    1855             : 
    1856             :     /*
    1857             :      * We're still waiting for backends to get scheduled, don't wake them up
    1858             :      * again.
    1859             :      */
    1860   703790038 :     if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
    1861     5216046 :         (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
    1862     5216046 :         (oldstate & LW_LOCK_MASK) == 0)
    1863     5199458 :         check_waiters = true;
    1864             :     else
    1865   698590580 :         check_waiters = false;
    1866             : 
    1867             :     /*
    1868             :      * As waking up waiters requires the spinlock to be acquired, only do so
    1869             :      * if necessary.
    1870             :      */
    1871   703790038 :     if (check_waiters)
    1872             :     {
    1873             :         /* XXX: remove before commit? */
    1874             :         LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
    1875     5199458 :         LWLockWakeup(lock);
    1876             :     }
    1877   703790038 : }
    1878             : 
    1879             : 
    1880             : /*
    1881             :  * Stop treating lock as held by current backend.
    1882             :  *
    1883             :  * After calling this function it's the callers responsibility to ensure that
    1884             :  * the lock gets released (via LWLockReleaseDisowned()), even in case of an
    1885             :  * error. This only is desirable if the lock is going to be released in a
    1886             :  * different process than the process that acquired it.
    1887             :  */
    1888             : void
    1889           0 : LWLockDisown(LWLock *lock)
    1890             : {
    1891           0 :     LWLockDisownInternal(lock);
    1892             : 
    1893           0 :     RESUME_INTERRUPTS();
    1894           0 : }
    1895             : 
    1896             : /*
    1897             :  * LWLockRelease - release a previously acquired lock
    1898             :  */
    1899             : void
    1900   703790038 : LWLockRelease(LWLock *lock)
    1901             : {
    1902             :     LWLockMode  mode;
    1903             : 
    1904   703790038 :     mode = LWLockDisownInternal(lock);
    1905             : 
    1906             :     PRINT_LWDEBUG("LWLockRelease", lock, mode);
    1907             : 
    1908   703790038 :     LWLockReleaseInternal(lock, mode);
    1909             : 
    1910             :     /*
    1911             :      * Now okay to allow cancel/die interrupts.
    1912             :      */
    1913   703790038 :     RESUME_INTERRUPTS();
    1914   703790038 : }
    1915             : 
    1916             : /*
    1917             :  * Release lock previously disowned with LWLockDisown().
    1918             :  */
    1919             : void
    1920           0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
    1921             : {
    1922           0 :     LWLockReleaseInternal(lock, mode);
    1923           0 : }
    1924             : 
    1925             : /*
    1926             :  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
    1927             :  */
    1928             : void
    1929    28483938 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1930             : {
    1931             :     /*
    1932             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1933             :      * that the variable is updated before releasing the lock.
    1934             :      */
    1935    28483938 :     pg_atomic_exchange_u64(valptr, val);
    1936             : 
    1937    28483938 :     LWLockRelease(lock);
    1938    28483938 : }
    1939             : 
    1940             : 
    1941             : /*
    1942             :  * LWLockReleaseAll - release all currently-held locks
    1943             :  *
    1944             :  * Used to clean up after ereport(ERROR). An important difference between this
    1945             :  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
    1946             :  * unchanged by this operation.  This is necessary since InterruptHoldoffCount
    1947             :  * has been set to an appropriate level earlier in error recovery. We could
    1948             :  * decrement it below zero if we allow it to drop for each released lock!
    1949             :  */
    1950             : void
    1951      107942 : LWLockReleaseAll(void)
    1952             : {
    1953      108330 :     while (num_held_lwlocks > 0)
    1954             :     {
    1955         388 :         HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */
    1956             : 
    1957         388 :         LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
    1958             :     }
    1959      107942 : }
    1960             : 
    1961             : 
    1962             : /*
    1963             :  * LWLockHeldByMe - test whether my process holds a lock in any mode
    1964             :  *
    1965             :  * This is meant as debug support only.
    1966             :  */
    1967             : bool
    1968           0 : LWLockHeldByMe(LWLock *lock)
    1969             : {
    1970             :     int         i;
    1971             : 
    1972           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1973             :     {
    1974           0 :         if (held_lwlocks[i].lock == lock)
    1975           0 :             return true;
    1976             :     }
    1977           0 :     return false;
    1978             : }
    1979             : 
    1980             : /*
    1981             :  * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
    1982             :  *
    1983             :  * This is meant as debug support only.
    1984             :  */
    1985             : bool
    1986           0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
    1987             : {
    1988             :     char       *held_lock_addr;
    1989             :     char       *begin;
    1990             :     char       *end;
    1991             :     int         i;
    1992             : 
    1993           0 :     begin = (char *) lock;
    1994           0 :     end = begin + nlocks * stride;
    1995           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1996             :     {
    1997           0 :         held_lock_addr = (char *) held_lwlocks[i].lock;
    1998           0 :         if (held_lock_addr >= begin &&
    1999           0 :             held_lock_addr < end &&
    2000           0 :             (held_lock_addr - begin) % stride == 0)
    2001           0 :             return true;
    2002             :     }
    2003           0 :     return false;
    2004             : }
    2005             : 
    2006             : /*
    2007             :  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
    2008             :  *
    2009             :  * This is meant as debug support only.
    2010             :  */
    2011             : bool
    2012           0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
    2013             : {
    2014             :     int         i;
    2015             : 
    2016           0 :     for (i = 0; i < num_held_lwlocks; i++)
    2017             :     {
    2018           0 :         if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
    2019           0 :             return true;
    2020             :     }
    2021           0 :     return false;
    2022             : }

Generated by: LCOV version 1.14