LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - lwlock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 342 390 87.7 %
Date: 2021-12-05 02:08:31 Functions: 27 31 87.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * lwlock.c
       4             :  *    Lightweight lock manager
       5             :  *
       6             :  * Lightweight locks are intended primarily to provide mutual exclusion of
       7             :  * access to shared-memory data structures.  Therefore, they offer both
       8             :  * exclusive and shared lock modes (to support read/write and read-only
       9             :  * access to a shared object).  There are few other frammishes.  User-level
      10             :  * locking should be done with the full lock manager --- which depends on
      11             :  * LWLocks to protect its shared state.
      12             :  *
      13             :  * In addition to exclusive and shared modes, lightweight locks can be used to
      14             :  * wait until a variable changes value.  The variable is initially not set
      15             :  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
      16             :  * value it was set to when the lock was released last, and can be updated
      17             :  * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar
      18             :  * waits for the variable to be updated, or until the lock is free.  When
      19             :  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
      20             :  * appropriate value for a free lock.  The meaning of the variable is up to
      21             :  * the caller, the lightweight lock code just assigns and compares it.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/storage/lmgr/lwlock.c
      28             :  *
      29             :  * NOTES:
      30             :  *
      31             :  * This used to be a pretty straight forward reader-writer lock
      32             :  * implementation, in which the internal state was protected by a
      33             :  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
      34             :  * too high for workloads/locks that were taken in shared mode very
      35             :  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
      36             :  * while trying to acquire a shared lock that was actually free.
      37             :  *
      38             :  * Thus a new implementation was devised that provides wait-free shared lock
      39             :  * acquisition for locks that aren't exclusively locked.
      40             :  *
      41             :  * The basic idea is to have a single atomic variable 'lockcount' instead of
      42             :  * the formerly separate shared and exclusive counters and to use atomic
      43             :  * operations to acquire the lock. That's fairly easy to do for plain
      44             :  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
      45             :  * in the OS.
      46             :  *
      47             :  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
      48             :  * variable. For exclusive lock we swap in a sentinel value
      49             :  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
      50             :  *
      51             :  * To release the lock we use an atomic decrement to release the lock. If the
      52             :  * new value is zero (we get that atomically), we know we can/have to release
      53             :  * waiters.
      54             :  *
      55             :  * Obviously it is important that the sentinel value for exclusive locks
      56             :  * doesn't conflict with the maximum number of possible share lockers -
      57             :  * luckily MAX_BACKENDS makes that easily possible.
      58             :  *
      59             :  *
      60             :  * The attentive reader might have noticed that naively doing the above has a
      61             :  * glaring race condition: We try to lock using the atomic operations and
      62             :  * notice that we have to wait. Unfortunately by the time we have finished
      63             :  * queuing, the former locker very well might have already finished it's
      64             :  * work. That's problematic because we're now stuck waiting inside the OS.
      65             : 
      66             :  * To mitigate those races we use a two phased attempt at locking:
      67             :  *   Phase 1: Try to do it atomically, if we succeed, nice
      68             :  *   Phase 2: Add ourselves to the waitqueue of the lock
      69             :  *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
      70             :  *            the queue
      71             :  *   Phase 4: Sleep till wake-up, goto Phase 1
      72             :  *
      73             :  * This protects us against the problem from above as nobody can release too
      74             :  *    quick, before we're queued, since after Phase 2 we're already queued.
      75             :  * -------------------------------------------------------------------------
      76             :  */
      77             : #include "postgres.h"
      78             : 
      79             : #include "miscadmin.h"
      80             : #include "pg_trace.h"
      81             : #include "pgstat.h"
      82             : #include "port/pg_bitutils.h"
      83             : #include "postmaster/postmaster.h"
      84             : #include "replication/slot.h"
      85             : #include "storage/ipc.h"
      86             : #include "storage/predicate.h"
      87             : #include "storage/proc.h"
      88             : #include "storage/proclist.h"
      89             : #include "storage/spin.h"
      90             : #include "utils/memutils.h"
      91             : 
      92             : #ifdef LWLOCK_STATS
      93             : #include "utils/hsearch.h"
      94             : #endif
      95             : 
      96             : 
      97             : /* We use the ShmemLock spinlock to protect LWLockCounter */
      98             : extern slock_t *ShmemLock;
      99             : 
     100             : #define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 30)
     101             : #define LW_FLAG_RELEASE_OK          ((uint32) 1 << 29)
     102             : #define LW_FLAG_LOCKED              ((uint32) 1 << 28)
     103             : 
     104             : #define LW_VAL_EXCLUSIVE            ((uint32) 1 << 24)
     105             : #define LW_VAL_SHARED               1
     106             : 
     107             : #define LW_LOCK_MASK                ((uint32) ((1 << 25)-1))
     108             : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
     109             : #define LW_SHARED_MASK              ((uint32) ((1 << 24)-1))
     110             : 
     111             : /*
     112             :  * There are three sorts of LWLock "tranches":
     113             :  *
     114             :  * 1. The individually-named locks defined in lwlocknames.h each have their
     115             :  * own tranche.  The names of these tranches appear in IndividualLWLockNames[]
     116             :  * in lwlocknames.c.
     117             :  *
     118             :  * 2. There are some predefined tranches for built-in groups of locks.
     119             :  * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
     120             :  * appear in BuiltinTrancheNames[] below.
     121             :  *
     122             :  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
     123             :  * or LWLockRegisterTranche.  The names of these that are known in the current
     124             :  * process appear in LWLockTrancheNames[].
     125             :  *
     126             :  * All these names are user-visible as wait event names, so choose with care
     127             :  * ... and do not forget to update the documentation's list of wait events.
     128             :  */
     129             : extern const char *const IndividualLWLockNames[];   /* in lwlocknames.c */
     130             : 
     131             : static const char *const BuiltinTrancheNames[] = {
     132             :     /* LWTRANCHE_XACT_BUFFER: */
     133             :     "XactBuffer",
     134             :     /* LWTRANCHE_COMMITTS_BUFFER: */
     135             :     "CommitTSBuffer",
     136             :     /* LWTRANCHE_SUBTRANS_BUFFER: */
     137             :     "SubtransBuffer",
     138             :     /* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */
     139             :     "MultiXactOffsetBuffer",
     140             :     /* LWTRANCHE_MULTIXACTMEMBER_BUFFER: */
     141             :     "MultiXactMemberBuffer",
     142             :     /* LWTRANCHE_NOTIFY_BUFFER: */
     143             :     "NotifyBuffer",
     144             :     /* LWTRANCHE_SERIAL_BUFFER: */
     145             :     "SerialBuffer",
     146             :     /* LWTRANCHE_WAL_INSERT: */
     147             :     "WALInsert",
     148             :     /* LWTRANCHE_BUFFER_CONTENT: */
     149             :     "BufferContent",
     150             :     /* LWTRANCHE_REPLICATION_ORIGIN_STATE: */
     151             :     "ReplicationOriginState",
     152             :     /* LWTRANCHE_REPLICATION_SLOT_IO: */
     153             :     "ReplicationSlotIO",
     154             :     /* LWTRANCHE_LOCK_FASTPATH: */
     155             :     "LockFastPath",
     156             :     /* LWTRANCHE_BUFFER_MAPPING: */
     157             :     "BufferMapping",
     158             :     /* LWTRANCHE_LOCK_MANAGER: */
     159             :     "LockManager",
     160             :     /* LWTRANCHE_PREDICATE_LOCK_MANAGER: */
     161             :     "PredicateLockManager",
     162             :     /* LWTRANCHE_PARALLEL_HASH_JOIN: */
     163             :     "ParallelHashJoin",
     164             :     /* LWTRANCHE_PARALLEL_QUERY_DSA: */
     165             :     "ParallelQueryDSA",
     166             :     /* LWTRANCHE_PER_SESSION_DSA: */
     167             :     "PerSessionDSA",
     168             :     /* LWTRANCHE_PER_SESSION_RECORD_TYPE: */
     169             :     "PerSessionRecordType",
     170             :     /* LWTRANCHE_PER_SESSION_RECORD_TYPMOD: */
     171             :     "PerSessionRecordTypmod",
     172             :     /* LWTRANCHE_SHARED_TUPLESTORE: */
     173             :     "SharedTupleStore",
     174             :     /* LWTRANCHE_SHARED_TIDBITMAP: */
     175             :     "SharedTidBitmap",
     176             :     /* LWTRANCHE_PARALLEL_APPEND: */
     177             :     "ParallelAppend",
     178             :     /* LWTRANCHE_PER_XACT_PREDICATE_LIST: */
     179             :     "PerXactPredicateList"
     180             : };
     181             : 
     182             : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
     183             :                  LWTRANCHE_FIRST_USER_DEFINED - NUM_INDIVIDUAL_LWLOCKS,
     184             :                  "missing entries in BuiltinTrancheNames[]");
     185             : 
     186             : /*
     187             :  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
     188             :  * stores the names of all dynamically-created tranches known to the current
     189             :  * process.  Any unused entries in the array will contain NULL.
     190             :  */
     191             : static const char **LWLockTrancheNames = NULL;
     192             : static int  LWLockTrancheNamesAllocated = 0;
     193             : 
     194             : /*
     195             :  * This points to the main array of LWLocks in shared memory.  Backends inherit
     196             :  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
     197             :  * where we have special measures to pass it down).
     198             :  */
     199             : LWLockPadded *MainLWLockArray = NULL;
     200             : 
     201             : /*
     202             :  * We use this structure to keep track of locked LWLocks for release
     203             :  * during error recovery.  Normally, only a few will be held at once, but
     204             :  * occasionally the number can be much higher; for example, the pg_buffercache
     205             :  * extension locks all buffer partitions simultaneously.
     206             :  */
     207             : #define MAX_SIMUL_LWLOCKS   200
     208             : 
     209             : /* struct representing the LWLocks we're holding */
     210             : typedef struct LWLockHandle
     211             : {
     212             :     LWLock     *lock;
     213             :     LWLockMode  mode;
     214             : } LWLockHandle;
     215             : 
     216             : static int  num_held_lwlocks = 0;
     217             : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
     218             : 
     219             : /* struct representing the LWLock tranche request for named tranche */
     220             : typedef struct NamedLWLockTrancheRequest
     221             : {
     222             :     char        tranche_name[NAMEDATALEN];
     223             :     int         num_lwlocks;
     224             : } NamedLWLockTrancheRequest;
     225             : 
     226             : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
     227             : static int  NamedLWLockTrancheRequestsAllocated = 0;
     228             : 
     229             : /*
     230             :  * NamedLWLockTrancheRequests is both the valid length of the request array,
     231             :  * and the length of the shared-memory NamedLWLockTrancheArray later on.
     232             :  * This variable and NamedLWLockTrancheArray are non-static so that
     233             :  * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
     234             :  */
     235             : int         NamedLWLockTrancheRequests = 0;
     236             : 
     237             : /* points to data in shared memory: */
     238             : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
     239             : 
     240             : static bool lock_named_request_allowed = true;
     241             : 
     242             : static void InitializeLWLocks(void);
     243             : static inline void LWLockReportWaitStart(LWLock *lock);
     244             : static inline void LWLockReportWaitEnd(void);
     245             : static const char *GetLWTrancheName(uint16 trancheId);
     246             : 
     247             : #define T_NAME(lock) \
     248             :     GetLWTrancheName((lock)->tranche)
     249             : 
     250             : #ifdef LWLOCK_STATS
     251             : typedef struct lwlock_stats_key
     252             : {
     253             :     int         tranche;
     254             :     void       *instance;
     255             : }           lwlock_stats_key;
     256             : 
     257             : typedef struct lwlock_stats
     258             : {
     259             :     lwlock_stats_key key;
     260             :     int         sh_acquire_count;
     261             :     int         ex_acquire_count;
     262             :     int         block_count;
     263             :     int         dequeue_self_count;
     264             :     int         spin_delay_count;
     265             : }           lwlock_stats;
     266             : 
     267             : static HTAB *lwlock_stats_htab;
     268             : static lwlock_stats lwlock_stats_dummy;
     269             : #endif
     270             : 
     271             : #ifdef LOCK_DEBUG
     272             : bool        Trace_lwlocks = false;
     273             : 
     274             : inline static void
     275             : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
     276             : {
     277             :     /* hide statement & context here, otherwise the log is just too verbose */
     278             :     if (Trace_lwlocks)
     279             :     {
     280             :         uint32      state = pg_atomic_read_u32(&lock->state);
     281             : 
     282             :         ereport(LOG,
     283             :                 (errhidestmt(true),
     284             :                  errhidecontext(true),
     285             :                  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
     286             :                                  MyProcPid,
     287             :                                  where, T_NAME(lock), lock,
     288             :                                  (state & LW_VAL_EXCLUSIVE) != 0,
     289             :                                  state & LW_SHARED_MASK,
     290             :                                  (state & LW_FLAG_HAS_WAITERS) != 0,
     291             :                                  pg_atomic_read_u32(&lock->nwaiters),
     292             :                                  (state & LW_FLAG_RELEASE_OK) != 0)));
     293             :     }
     294             : }
     295             : 
     296             : inline static void
     297             : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
     298             : {
     299             :     /* hide statement & context here, otherwise the log is just too verbose */
     300             :     if (Trace_lwlocks)
     301             :     {
     302             :         ereport(LOG,
     303             :                 (errhidestmt(true),
     304             :                  errhidecontext(true),
     305             :                  errmsg_internal("%s(%s %p): %s", where,
     306             :                                  T_NAME(lock), lock, msg)));
     307             :     }
     308             : }
     309             : 
     310             : #else                           /* not LOCK_DEBUG */
     311             : #define PRINT_LWDEBUG(a,b,c) ((void)0)
     312             : #define LOG_LWDEBUG(a,b,c) ((void)0)
     313             : #endif                          /* LOCK_DEBUG */
     314             : 
     315             : #ifdef LWLOCK_STATS
     316             : 
     317             : static void init_lwlock_stats(void);
     318             : static void print_lwlock_stats(int code, Datum arg);
     319             : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
     320             : 
     321             : static void
     322             : init_lwlock_stats(void)
     323             : {
     324             :     HASHCTL     ctl;
     325             :     static MemoryContext lwlock_stats_cxt = NULL;
     326             :     static bool exit_registered = false;
     327             : 
     328             :     if (lwlock_stats_cxt != NULL)
     329             :         MemoryContextDelete(lwlock_stats_cxt);
     330             : 
     331             :     /*
     332             :      * The LWLock stats will be updated within a critical section, which
     333             :      * requires allocating new hash entries. Allocations within a critical
     334             :      * section are normally not allowed because running out of memory would
     335             :      * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
     336             :      * turned on in production, so that's an acceptable risk. The hash entries
     337             :      * are small, so the risk of running out of memory is minimal in practice.
     338             :      */
     339             :     lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
     340             :                                              "LWLock stats",
     341             :                                              ALLOCSET_DEFAULT_SIZES);
     342             :     MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
     343             : 
     344             :     ctl.keysize = sizeof(lwlock_stats_key);
     345             :     ctl.entrysize = sizeof(lwlock_stats);
     346             :     ctl.hcxt = lwlock_stats_cxt;
     347             :     lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
     348             :                                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     349             :     if (!exit_registered)
     350             :     {
     351             :         on_shmem_exit(print_lwlock_stats, 0);
     352             :         exit_registered = true;
     353             :     }
     354             : }
     355             : 
     356             : static void
     357             : print_lwlock_stats(int code, Datum arg)
     358             : {
     359             :     HASH_SEQ_STATUS scan;
     360             :     lwlock_stats *lwstats;
     361             : 
     362             :     hash_seq_init(&scan, lwlock_stats_htab);
     363             : 
     364             :     /* Grab an LWLock to keep different backends from mixing reports */
     365             :     LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
     366             : 
     367             :     while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
     368             :     {
     369             :         fprintf(stderr,
     370             :                 "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
     371             :                 MyProcPid, GetLWTrancheName(lwstats->key.tranche),
     372             :                 lwstats->key.instance, lwstats->sh_acquire_count,
     373             :                 lwstats->ex_acquire_count, lwstats->block_count,
     374             :                 lwstats->spin_delay_count, lwstats->dequeue_self_count);
     375             :     }
     376             : 
     377             :     LWLockRelease(&MainLWLockArray[0].lock);
     378             : }
     379             : 
     380             : static lwlock_stats *
     381             : get_lwlock_stats_entry(LWLock *lock)
     382             : {
     383             :     lwlock_stats_key key;
     384             :     lwlock_stats *lwstats;
     385             :     bool        found;
     386             : 
     387             :     /*
     388             :      * During shared memory initialization, the hash table doesn't exist yet.
     389             :      * Stats of that phase aren't very interesting, so just collect operations
     390             :      * on all locks in a single dummy entry.
     391             :      */
     392             :     if (lwlock_stats_htab == NULL)
     393             :         return &lwlock_stats_dummy;
     394             : 
     395             :     /* Fetch or create the entry. */
     396             :     MemSet(&key, 0, sizeof(key));
     397             :     key.tranche = lock->tranche;
     398             :     key.instance = lock;
     399             :     lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
     400             :     if (!found)
     401             :     {
     402             :         lwstats->sh_acquire_count = 0;
     403             :         lwstats->ex_acquire_count = 0;
     404             :         lwstats->block_count = 0;
     405             :         lwstats->dequeue_self_count = 0;
     406             :         lwstats->spin_delay_count = 0;
     407             :     }
     408             :     return lwstats;
     409             : }
     410             : #endif                          /* LWLOCK_STATS */
     411             : 
     412             : 
     413             : /*
     414             :  * Compute number of LWLocks required by named tranches.  These will be
     415             :  * allocated in the main array.
     416             :  */
     417             : static int
     418        9630 : NumLWLocksForNamedTranches(void)
     419             : {
     420        9630 :     int         numLocks = 0;
     421             :     int         i;
     422             : 
     423        9638 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     424           8 :         numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     425             : 
     426        9630 :     return numLocks;
     427             : }
     428             : 
     429             : /*
     430             :  * Compute shmem space needed for LWLocks and named tranches.
     431             :  */
     432             : Size
     433        6736 : LWLockShmemSize(void)
     434             : {
     435             :     Size        size;
     436             :     int         i;
     437        6736 :     int         numLocks = NUM_FIXED_LWLOCKS;
     438             : 
     439             :     /* Calculate total number of locks needed in the main array. */
     440        6736 :     numLocks += NumLWLocksForNamedTranches();
     441             : 
     442             :     /* Space for the LWLock array. */
     443        6736 :     size = mul_size(numLocks, sizeof(LWLockPadded));
     444             : 
     445             :     /* Space for dynamic allocation counter, plus room for alignment. */
     446        6736 :     size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
     447             : 
     448             :     /* space for named tranches. */
     449        6736 :     size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
     450             : 
     451             :     /* space for name of each tranche. */
     452        6742 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     453           6 :         size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
     454             : 
     455             :     /* Disallow adding any more named tranches. */
     456        6736 :     lock_named_request_allowed = false;
     457             : 
     458        6736 :     return size;
     459             : }
     460             : 
     461             : /*
     462             :  * Allocate shmem space for the main LWLock array and all tranches and
     463             :  * initialize it.  We also register extension LWLock tranches here.
     464             :  */
     465             : void
     466        2894 : CreateLWLocks(void)
     467             : {
     468             :     StaticAssertStmt(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
     469             :                      "MAX_BACKENDS too big for lwlock.c");
     470             : 
     471             :     StaticAssertStmt(sizeof(LWLock) <= LWLOCK_PADDED_SIZE,
     472             :                      "Miscalculated LWLock padding");
     473             : 
     474        2894 :     if (!IsUnderPostmaster)
     475             :     {
     476        2894 :         Size        spaceLocks = LWLockShmemSize();
     477             :         int        *LWLockCounter;
     478             :         char       *ptr;
     479             : 
     480             :         /* Allocate space */
     481        2894 :         ptr = (char *) ShmemAlloc(spaceLocks);
     482             : 
     483             :         /* Leave room for dynamic allocation of tranches */
     484        2894 :         ptr += sizeof(int);
     485             : 
     486             :         /* Ensure desired alignment of LWLock array */
     487        2894 :         ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
     488             : 
     489        2894 :         MainLWLockArray = (LWLockPadded *) ptr;
     490             : 
     491             :         /*
     492             :          * Initialize the dynamic-allocation counter for tranches, which is
     493             :          * stored just before the first LWLock.
     494             :          */
     495        2894 :         LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     496        2894 :         *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
     497             : 
     498             :         /* Initialize all LWLocks */
     499        2894 :         InitializeLWLocks();
     500             :     }
     501             : 
     502             :     /* Register named extension LWLock tranches in the current process. */
     503        2896 :     for (int i = 0; i < NamedLWLockTrancheRequests; i++)
     504           2 :         LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
     505           2 :                               NamedLWLockTrancheArray[i].trancheName);
     506        2894 : }
     507             : 
     508             : /*
     509             :  * Initialize LWLocks that are fixed and those belonging to named tranches.
     510             :  */
     511             : static void
     512        2894 : InitializeLWLocks(void)
     513             : {
     514        2894 :     int         numNamedLocks = NumLWLocksForNamedTranches();
     515             :     int         id;
     516             :     int         i;
     517             :     int         j;
     518             :     LWLockPadded *lock;
     519             : 
     520             :     /* Initialize all individual LWLocks in main array */
     521      141806 :     for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
     522      138912 :         LWLockInitialize(&lock->lock, id);
     523             : 
     524             :     /* Initialize buffer mapping LWLocks in main array */
     525        2894 :     lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
     526      373326 :     for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
     527      370432 :         LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
     528             : 
     529             :     /* Initialize lmgrs' LWLocks in main array */
     530        2894 :     lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
     531       49198 :     for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
     532       46304 :         LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
     533             : 
     534             :     /* Initialize predicate lmgrs' LWLocks in main array */
     535        2894 :     lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
     536       49198 :     for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
     537       46304 :         LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
     538             : 
     539             :     /*
     540             :      * Copy the info about any named tranches into shared memory (so that
     541             :      * other processes can see it), and initialize the requested LWLocks.
     542             :      */
     543        2894 :     if (NamedLWLockTrancheRequests > 0)
     544             :     {
     545             :         char       *trancheNames;
     546             : 
     547           2 :         NamedLWLockTrancheArray = (NamedLWLockTranche *)
     548           2 :             &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
     549             : 
     550           2 :         trancheNames = (char *) NamedLWLockTrancheArray +
     551           2 :             (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
     552           2 :         lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
     553             : 
     554           4 :         for (i = 0; i < NamedLWLockTrancheRequests; i++)
     555             :         {
     556             :             NamedLWLockTrancheRequest *request;
     557             :             NamedLWLockTranche *tranche;
     558             :             char       *name;
     559             : 
     560           2 :             request = &NamedLWLockTrancheRequestArray[i];
     561           2 :             tranche = &NamedLWLockTrancheArray[i];
     562             : 
     563           2 :             name = trancheNames;
     564           2 :             trancheNames += strlen(request->tranche_name) + 1;
     565           2 :             strcpy(name, request->tranche_name);
     566           2 :             tranche->trancheId = LWLockNewTrancheId();
     567           2 :             tranche->trancheName = name;
     568             : 
     569           4 :             for (j = 0; j < request->num_lwlocks; j++, lock++)
     570           2 :                 LWLockInitialize(&lock->lock, tranche->trancheId);
     571             :         }
     572             :     }
     573        2894 : }
     574             : 
     575             : /*
     576             :  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
     577             :  */
     578             : void
     579       15808 : InitLWLockAccess(void)
     580             : {
     581             : #ifdef LWLOCK_STATS
     582             :     init_lwlock_stats();
     583             : #endif
     584       15808 : }
     585             : 
     586             : /*
     587             :  * GetNamedLWLockTranche - returns the base address of LWLock from the
     588             :  *      specified tranche.
     589             :  *
     590             :  * Caller needs to retrieve the requested number of LWLocks starting from
     591             :  * the base lock address returned by this API.  This can be used for
     592             :  * tranches that are requested by using RequestNamedLWLockTranche() API.
     593             :  */
     594             : LWLockPadded *
     595           2 : GetNamedLWLockTranche(const char *tranche_name)
     596             : {
     597             :     int         lock_pos;
     598             :     int         i;
     599             : 
     600             :     /*
     601             :      * Obtain the position of base address of LWLock belonging to requested
     602             :      * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed
     603             :      * in MainLWLockArray after fixed locks.
     604             :      */
     605           2 :     lock_pos = NUM_FIXED_LWLOCKS;
     606           2 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     607             :     {
     608           2 :         if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
     609             :                    tranche_name) == 0)
     610           2 :             return &MainLWLockArray[lock_pos];
     611             : 
     612           0 :         lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     613             :     }
     614             : 
     615           0 :     elog(ERROR, "requested tranche is not registered");
     616             : 
     617             :     /* just to keep compiler quiet */
     618             :     return NULL;
     619             : }
     620             : 
     621             : /*
     622             :  * Allocate a new tranche ID.
     623             :  */
     624             : int
     625           2 : LWLockNewTrancheId(void)
     626             : {
     627             :     int         result;
     628             :     int        *LWLockCounter;
     629             : 
     630           2 :     LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     631           2 :     SpinLockAcquire(ShmemLock);
     632           2 :     result = (*LWLockCounter)++;
     633           2 :     SpinLockRelease(ShmemLock);
     634             : 
     635           2 :     return result;
     636             : }
     637             : 
     638             : /*
     639             :  * Register a dynamic tranche name in the lookup table of the current process.
     640             :  *
     641             :  * This routine will save a pointer to the tranche name passed as an argument,
     642             :  * so the name should be allocated in a backend-lifetime context
     643             :  * (shared memory, TopMemoryContext, static constant, or similar).
     644             :  *
     645             :  * The tranche name will be user-visible as a wait event name, so try to
     646             :  * use a name that fits the style for those.
     647             :  */
     648             : void
     649           2 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
     650             : {
     651             :     /* This should only be called for user-defined tranches. */
     652           2 :     if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
     653           0 :         return;
     654             : 
     655             :     /* Convert to array index. */
     656           2 :     tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
     657             : 
     658             :     /* If necessary, create or enlarge array. */
     659           2 :     if (tranche_id >= LWLockTrancheNamesAllocated)
     660             :     {
     661             :         int         newalloc;
     662             : 
     663           2 :         newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
     664             : 
     665           2 :         if (LWLockTrancheNames == NULL)
     666           2 :             LWLockTrancheNames = (const char **)
     667           2 :                 MemoryContextAllocZero(TopMemoryContext,
     668             :                                        newalloc * sizeof(char *));
     669             :         else
     670             :         {
     671           0 :             LWLockTrancheNames = (const char **)
     672           0 :                 repalloc(LWLockTrancheNames, newalloc * sizeof(char *));
     673           0 :             memset(LWLockTrancheNames + LWLockTrancheNamesAllocated,
     674             :                    0,
     675           0 :                    (newalloc - LWLockTrancheNamesAllocated) * sizeof(char *));
     676             :         }
     677           2 :         LWLockTrancheNamesAllocated = newalloc;
     678             :     }
     679             : 
     680           2 :     LWLockTrancheNames[tranche_id] = tranche_name;
     681             : }
     682             : 
     683             : /*
     684             :  * RequestNamedLWLockTranche
     685             :  *      Request that extra LWLocks be allocated during postmaster
     686             :  *      startup.
     687             :  *
     688             :  * This is only useful for extensions if called from the _PG_init hook
     689             :  * of a library that is loaded into the postmaster via
     690             :  * shared_preload_libraries.  Once shared memory has been allocated, calls
     691             :  * will be ignored.  (We could raise an error, but it seems better to make
     692             :  * it a no-op, so that libraries containing such calls can be reloaded if
     693             :  * needed.)
     694             :  *
     695             :  * The tranche name will be user-visible as a wait event name, so try to
     696             :  * use a name that fits the style for those.
     697             :  */
     698             : void
     699           2 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
     700             : {
     701             :     NamedLWLockTrancheRequest *request;
     702             : 
     703           2 :     if (IsUnderPostmaster || !lock_named_request_allowed)
     704           0 :         return;                 /* too late */
     705             : 
     706           2 :     if (NamedLWLockTrancheRequestArray == NULL)
     707             :     {
     708           2 :         NamedLWLockTrancheRequestsAllocated = 16;
     709           2 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     710           2 :             MemoryContextAlloc(TopMemoryContext,
     711             :                                NamedLWLockTrancheRequestsAllocated
     712             :                                * sizeof(NamedLWLockTrancheRequest));
     713             :     }
     714             : 
     715           2 :     if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
     716             :     {
     717           0 :         int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
     718             : 
     719           0 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     720           0 :             repalloc(NamedLWLockTrancheRequestArray,
     721             :                      i * sizeof(NamedLWLockTrancheRequest));
     722           0 :         NamedLWLockTrancheRequestsAllocated = i;
     723             :     }
     724             : 
     725           2 :     request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
     726             :     Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
     727           2 :     strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
     728           2 :     request->num_lwlocks = num_lwlocks;
     729           2 :     NamedLWLockTrancheRequests++;
     730             : }
     731             : 
     732             : /*
     733             :  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
     734             :  */
     735             : void
     736    35985828 : LWLockInitialize(LWLock *lock, int tranche_id)
     737             : {
     738    35985828 :     pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
     739             : #ifdef LOCK_DEBUG
     740             :     pg_atomic_init_u32(&lock->nwaiters, 0);
     741             : #endif
     742    35985828 :     lock->tranche = tranche_id;
     743    35985828 :     proclist_init(&lock->waiters);
     744    35985828 : }
     745             : 
     746             : /*
     747             :  * Report start of wait event for light-weight locks.
     748             :  *
     749             :  * This function will be used by all the light-weight lock calls which
     750             :  * needs to wait to acquire the lock.  This function distinguishes wait
     751             :  * event based on tranche and lock id.
     752             :  */
     753             : static inline void
     754       16286 : LWLockReportWaitStart(LWLock *lock)
     755             : {
     756       16286 :     pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
     757       16286 : }
     758             : 
     759             : /*
     760             :  * Report end of wait event for light-weight locks.
     761             :  */
     762             : static inline void
     763       16286 : LWLockReportWaitEnd(void)
     764             : {
     765       16286 :     pgstat_report_wait_end();
     766       16286 : }
     767             : 
     768             : /*
     769             :  * Return the name of an LWLock tranche.
     770             :  */
     771             : static const char *
     772           0 : GetLWTrancheName(uint16 trancheId)
     773             : {
     774             :     /* Individual LWLock? */
     775           0 :     if (trancheId < NUM_INDIVIDUAL_LWLOCKS)
     776           0 :         return IndividualLWLockNames[trancheId];
     777             : 
     778             :     /* Built-in tranche? */
     779           0 :     if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
     780           0 :         return BuiltinTrancheNames[trancheId - NUM_INDIVIDUAL_LWLOCKS];
     781             : 
     782             :     /*
     783             :      * It's an extension tranche, so look in LWLockTrancheNames[].  However,
     784             :      * it's possible that the tranche has never been registered in the current
     785             :      * process, in which case give up and return "extension".
     786             :      */
     787           0 :     trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
     788             : 
     789           0 :     if (trancheId >= LWLockTrancheNamesAllocated ||
     790           0 :         LWLockTrancheNames[trancheId] == NULL)
     791           0 :         return "extension";
     792             : 
     793           0 :     return LWLockTrancheNames[trancheId];
     794             : }
     795             : 
     796             : /*
     797             :  * Return an identifier for an LWLock based on the wait class and event.
     798             :  */
     799             : const char *
     800           0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
     801             : {
     802             :     Assert(classId == PG_WAIT_LWLOCK);
     803             :     /* The event IDs are just tranche numbers. */
     804           0 :     return GetLWTrancheName(eventId);
     805             : }
     806             : 
     807             : /*
     808             :  * Internal function that tries to atomically acquire the lwlock in the passed
     809             :  * in mode.
     810             :  *
     811             :  * This function will not block waiting for a lock to become free - that's the
     812             :  * callers job.
     813             :  *
     814             :  * Returns true if the lock isn't free and we need to wait.
     815             :  */
     816             : static bool
     817   406217580 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
     818             : {
     819             :     uint32      old_state;
     820             : 
     821             :     AssertArg(mode == LW_EXCLUSIVE || mode == LW_SHARED);
     822             : 
     823             :     /*
     824             :      * Read once outside the loop, later iterations will get the newer value
     825             :      * via compare & exchange.
     826             :      */
     827   406217580 :     old_state = pg_atomic_read_u32(&lock->state);
     828             : 
     829             :     /* loop until we've determined whether we could acquire the lock or not */
     830             :     while (true)
     831       88830 :     {
     832             :         uint32      desired_state;
     833             :         bool        lock_free;
     834             : 
     835   406306410 :         desired_state = old_state;
     836             : 
     837   406306410 :         if (mode == LW_EXCLUSIVE)
     838             :         {
     839   193265486 :             lock_free = (old_state & LW_LOCK_MASK) == 0;
     840   193265486 :             if (lock_free)
     841   193217824 :                 desired_state += LW_VAL_EXCLUSIVE;
     842             :         }
     843             :         else
     844             :         {
     845   213040924 :             lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
     846   213040924 :             if (lock_free)
     847   213025906 :                 desired_state += LW_VAL_SHARED;
     848             :         }
     849             : 
     850             :         /*
     851             :          * Attempt to swap in the state we are expecting. If we didn't see
     852             :          * lock to be free, that's just the old value. If we saw it as free,
     853             :          * we'll attempt to mark it acquired. The reason that we always swap
     854             :          * in the value is that this doubles as a memory barrier. We could try
     855             :          * to be smarter and only swap in values if we saw the lock as free,
     856             :          * but benchmark haven't shown it as beneficial so far.
     857             :          *
     858             :          * Retry if the value changed since we last looked at it.
     859             :          */
     860   406306410 :         if (pg_atomic_compare_exchange_u32(&lock->state,
     861             :                                            &old_state, desired_state))
     862             :         {
     863   406217580 :             if (lock_free)
     864             :             {
     865             :                 /* Great! Got the lock. */
     866             : #ifdef LOCK_DEBUG
     867             :                 if (mode == LW_EXCLUSIVE)
     868             :                     lock->owner = MyProc;
     869             : #endif
     870   406170424 :                 return false;
     871             :             }
     872             :             else
     873       47156 :                 return true;    /* somebody else has the lock */
     874             :         }
     875             :     }
     876             :     pg_unreachable();
     877             : }
     878             : 
     879             : /*
     880             :  * Lock the LWLock's wait list against concurrent activity.
     881             :  *
     882             :  * NB: even though the wait list is locked, non-conflicting lock operations
     883             :  * may still happen concurrently.
     884             :  *
     885             :  * Time spent holding mutex should be short!
     886             :  */
     887             : static void
     888    33659488 : LWLockWaitListLock(LWLock *lock)
     889             : {
     890             :     uint32      old_state;
     891             : #ifdef LWLOCK_STATS
     892             :     lwlock_stats *lwstats;
     893             :     uint32      delays = 0;
     894             : 
     895             :     lwstats = get_lwlock_stats_entry(lock);
     896             : #endif
     897             : 
     898             :     while (true)
     899             :     {
     900             :         /* always try once to acquire lock directly */
     901    33659488 :         old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
     902    33659488 :         if (!(old_state & LW_FLAG_LOCKED))
     903    33651180 :             break;              /* got lock */
     904             : 
     905             :         /* and then spin without atomic operations until lock is released */
     906             :         {
     907             :             SpinDelayStatus delayStatus;
     908             : 
     909        8308 :             init_local_spin_delay(&delayStatus);
     910             : 
     911       74100 :             while (old_state & LW_FLAG_LOCKED)
     912             :             {
     913       65792 :                 perform_spin_delay(&delayStatus);
     914       65792 :                 old_state = pg_atomic_read_u32(&lock->state);
     915             :             }
     916             : #ifdef LWLOCK_STATS
     917             :             delays += delayStatus.delays;
     918             : #endif
     919        8308 :             finish_spin_delay(&delayStatus);
     920             :         }
     921             : 
     922             :         /*
     923             :          * Retry. The lock might obviously already be re-acquired by the time
     924             :          * we're attempting to get it again.
     925             :          */
     926             :     }
     927             : 
     928             : #ifdef LWLOCK_STATS
     929             :     lwstats->spin_delay_count += delays;
     930             : #endif
     931    33651180 : }
     932             : 
     933             : /*
     934             :  * Unlock the LWLock's wait list.
     935             :  *
     936             :  * Note that it can be more efficient to manipulate flags and release the
     937             :  * locks in a single atomic operation.
     938             :  */
     939             : static void
     940    33615818 : LWLockWaitListUnlock(LWLock *lock)
     941             : {
     942             :     uint32      old_state PG_USED_FOR_ASSERTS_ONLY;
     943             : 
     944    33615818 :     old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
     945             : 
     946             :     Assert(old_state & LW_FLAG_LOCKED);
     947    33615818 : }
     948             : 
     949             : /*
     950             :  * Wakeup all the lockers that currently have a chance to acquire the lock.
     951             :  */
     952             : static void
     953       35362 : LWLockWakeup(LWLock *lock)
     954             : {
     955             :     bool        new_release_ok;
     956       35362 :     bool        wokeup_somebody = false;
     957             :     proclist_head wakeup;
     958             :     proclist_mutable_iter iter;
     959             : 
     960       35362 :     proclist_init(&wakeup);
     961             : 
     962       35362 :     new_release_ok = true;
     963             : 
     964             :     /* lock wait list while collecting backends to wake up */
     965       35362 :     LWLockWaitListLock(lock);
     966             : 
     967       44274 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
     968             :     {
     969       20462 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     970             : 
     971       20462 :         if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
     972         144 :             continue;
     973             : 
     974       20318 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
     975       20318 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
     976             : 
     977       20318 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
     978             :         {
     979             :             /*
     980             :              * Prevent additional wakeups until retryer gets to run. Backends
     981             :              * that are just waiting for the lock to become free don't retry
     982             :              * automatically.
     983             :              */
     984       18432 :             new_release_ok = false;
     985             : 
     986             :             /*
     987             :              * Don't wakeup (further) exclusive locks.
     988             :              */
     989       18432 :             wokeup_somebody = true;
     990             :         }
     991             : 
     992             :         /*
     993             :          * Once we've woken up an exclusive lock, there's no point in waking
     994             :          * up anybody else.
     995             :          */
     996       20318 :         if (waiter->lwWaitMode == LW_EXCLUSIVE)
     997       11550 :             break;
     998             :     }
     999             : 
    1000             :     Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
    1001             : 
    1002             :     /* unset required flags, and release lock, in one fell swoop */
    1003             :     {
    1004             :         uint32      old_state;
    1005             :         uint32      desired_state;
    1006             : 
    1007       35362 :         old_state = pg_atomic_read_u32(&lock->state);
    1008             :         while (true)
    1009             :         {
    1010       35836 :             desired_state = old_state;
    1011             : 
    1012             :             /* compute desired flags */
    1013             : 
    1014       35836 :             if (new_release_ok)
    1015       17644 :                 desired_state |= LW_FLAG_RELEASE_OK;
    1016             :             else
    1017       18192 :                 desired_state &= ~LW_FLAG_RELEASE_OK;
    1018             : 
    1019       35836 :             if (proclist_is_empty(&wakeup))
    1020       16184 :                 desired_state &= ~LW_FLAG_HAS_WAITERS;
    1021             : 
    1022       35836 :             desired_state &= ~LW_FLAG_LOCKED;   /* release lock */
    1023             : 
    1024       35836 :             if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
    1025             :                                                desired_state))
    1026       35362 :                 break;
    1027             :         }
    1028             :     }
    1029             : 
    1030             :     /* Awaken any waiters I removed from the queue. */
    1031       55680 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1032             :     {
    1033       20318 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1034             : 
    1035             :         LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
    1036       20318 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1037             : 
    1038             :         /*
    1039             :          * Guarantee that lwWaiting being unset only becomes visible once the
    1040             :          * unlink from the link has completed. Otherwise the target backend
    1041             :          * could be woken up for other reason and enqueue for a new lock - if
    1042             :          * that happens before the list unlink happens, the list would end up
    1043             :          * being corrupted.
    1044             :          *
    1045             :          * The barrier pairs with the LWLockWaitListLock() when enqueuing for
    1046             :          * another lock.
    1047             :          */
    1048       20318 :         pg_write_barrier();
    1049       20318 :         waiter->lwWaiting = false;
    1050       20318 :         PGSemaphoreUnlock(waiter->sem);
    1051             :     }
    1052       35362 : }
    1053             : 
    1054             : /*
    1055             :  * Add ourselves to the end of the queue.
    1056             :  *
    1057             :  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
    1058             :  */
    1059             : static void
    1060       33018 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
    1061             : {
    1062             :     /*
    1063             :      * If we don't have a PGPROC structure, there's no way to wait. This
    1064             :      * should never occur, since MyProc should only be null during shared
    1065             :      * memory initialization.
    1066             :      */
    1067       33018 :     if (MyProc == NULL)
    1068           0 :         elog(PANIC, "cannot wait without a PGPROC structure");
    1069             : 
    1070       33018 :     if (MyProc->lwWaiting)
    1071           0 :         elog(PANIC, "queueing for lock while waiting on another one");
    1072             : 
    1073       33018 :     LWLockWaitListLock(lock);
    1074             : 
    1075             :     /* setting the flag is protected by the spinlock */
    1076       33018 :     pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
    1077             : 
    1078       33018 :     MyProc->lwWaiting = true;
    1079       33018 :     MyProc->lwWaitMode = mode;
    1080             : 
    1081             :     /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
    1082       33018 :     if (mode == LW_WAIT_UNTIL_FREE)
    1083        2470 :         proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink);
    1084             :     else
    1085       30548 :         proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink);
    1086             : 
    1087             :     /* Can release the mutex now */
    1088       33018 :     LWLockWaitListUnlock(lock);
    1089             : 
    1090             : #ifdef LOCK_DEBUG
    1091             :     pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
    1092             : #endif
    1093             : 
    1094       33018 : }
    1095             : 
    1096             : /*
    1097             :  * Remove ourselves from the waitlist.
    1098             :  *
    1099             :  * This is used if we queued ourselves because we thought we needed to sleep
    1100             :  * but, after further checking, we discovered that we don't actually need to
    1101             :  * do so.
    1102             :  */
    1103             : static void
    1104       16732 : LWLockDequeueSelf(LWLock *lock)
    1105             : {
    1106       16732 :     bool        found = false;
    1107             :     proclist_mutable_iter iter;
    1108             : 
    1109             : #ifdef LWLOCK_STATS
    1110             :     lwlock_stats *lwstats;
    1111             : 
    1112             :     lwstats = get_lwlock_stats_entry(lock);
    1113             : 
    1114             :     lwstats->dequeue_self_count++;
    1115             : #endif
    1116             : 
    1117       16732 :     LWLockWaitListLock(lock);
    1118             : 
    1119             :     /*
    1120             :      * Can't just remove ourselves from the list, but we need to iterate over
    1121             :      * all entries as somebody else could have dequeued us.
    1122             :      */
    1123       16928 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1124             :     {
    1125       12878 :         if (iter.cur == MyProc->pgprocno)
    1126             :         {
    1127       12682 :             found = true;
    1128       12682 :             proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1129       12682 :             break;
    1130             :         }
    1131             :     }
    1132             : 
    1133       16732 :     if (proclist_is_empty(&lock->waiters) &&
    1134       16544 :         (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
    1135             :     {
    1136       16506 :         pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
    1137             :     }
    1138             : 
    1139             :     /* XXX: combine with fetch_and above? */
    1140       16732 :     LWLockWaitListUnlock(lock);
    1141             : 
    1142             :     /* clear waiting state again, nice for debugging */
    1143       16732 :     if (found)
    1144       12682 :         MyProc->lwWaiting = false;
    1145             :     else
    1146             :     {
    1147        4050 :         int         extraWaits = 0;
    1148             : 
    1149             :         /*
    1150             :          * Somebody else dequeued us and has or will wake us up. Deal with the
    1151             :          * superfluous absorption of a wakeup.
    1152             :          */
    1153             : 
    1154             :         /*
    1155             :          * Reset RELEASE_OK flag if somebody woke us before we removed
    1156             :          * ourselves - they'll have set it to false.
    1157             :          */
    1158        4050 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1159             : 
    1160             :         /*
    1161             :          * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
    1162             :          * get reset at some inconvenient point later. Most of the time this
    1163             :          * will immediately return.
    1164             :          */
    1165             :         for (;;)
    1166             :         {
    1167        4050 :             PGSemaphoreLock(MyProc->sem);
    1168        4050 :             if (!MyProc->lwWaiting)
    1169        4050 :                 break;
    1170           0 :             extraWaits++;
    1171             :         }
    1172             : 
    1173             :         /*
    1174             :          * Fix the process wait semaphore's count for any absorbed wakeups.
    1175             :          */
    1176        4050 :         while (extraWaits-- > 0)
    1177           0 :             PGSemaphoreUnlock(MyProc->sem);
    1178             :     }
    1179             : 
    1180             : #ifdef LOCK_DEBUG
    1181             :     {
    1182             :         /* not waiting anymore */
    1183             :         uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1184             : 
    1185             :         Assert(nwaiters < MAX_BACKENDS);
    1186             :     }
    1187             : #endif
    1188       16732 : }
    1189             : 
    1190             : /*
    1191             :  * LWLockAcquire - acquire a lightweight lock in the specified mode
    1192             :  *
    1193             :  * If the lock is not available, sleep until it is.  Returns true if the lock
    1194             :  * was available immediately, false if we had to sleep.
    1195             :  *
    1196             :  * Side effect: cancel/die interrupts are held off until lock release.
    1197             :  */
    1198             : bool
    1199   403033190 : LWLockAcquire(LWLock *lock, LWLockMode mode)
    1200             : {
    1201   403033190 :     PGPROC     *proc = MyProc;
    1202   403033190 :     bool        result = true;
    1203   403033190 :     int         extraWaits = 0;
    1204             : #ifdef LWLOCK_STATS
    1205             :     lwlock_stats *lwstats;
    1206             : 
    1207             :     lwstats = get_lwlock_stats_entry(lock);
    1208             : #endif
    1209             : 
    1210             :     AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1211             : 
    1212             :     PRINT_LWDEBUG("LWLockAcquire", lock, mode);
    1213             : 
    1214             : #ifdef LWLOCK_STATS
    1215             :     /* Count lock acquisition attempts */
    1216             :     if (mode == LW_EXCLUSIVE)
    1217             :         lwstats->ex_acquire_count++;
    1218             :     else
    1219             :         lwstats->sh_acquire_count++;
    1220             : #endif                          /* LWLOCK_STATS */
    1221             : 
    1222             :     /*
    1223             :      * We can't wait if we haven't got a PGPROC.  This should only occur
    1224             :      * during bootstrap or shared memory initialization.  Put an Assert here
    1225             :      * to catch unsafe coding practices.
    1226             :      */
    1227             :     Assert(!(proc == NULL && IsUnderPostmaster));
    1228             : 
    1229             :     /* Ensure we will have room to remember the lock */
    1230   403033190 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1231           0 :         elog(ERROR, "too many LWLocks taken");
    1232             : 
    1233             :     /*
    1234             :      * Lock out cancel/die interrupts until we exit the code section protected
    1235             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1236             :      * manipulations of data structures in shared memory.
    1237             :      */
    1238   403033190 :     HOLD_INTERRUPTS();
    1239             : 
    1240             :     /*
    1241             :      * Loop here to try to acquire lock after each time we are signaled by
    1242             :      * LWLockRelease.
    1243             :      *
    1244             :      * NOTE: it might seem better to have LWLockRelease actually grant us the
    1245             :      * lock, rather than retrying and possibly having to go back to sleep. But
    1246             :      * in practice that is no good because it means a process swap for every
    1247             :      * lock acquisition when two or more processes are contending for the same
    1248             :      * lock.  Since LWLocks are normally used to protect not-very-long
    1249             :      * sections of computation, a process needs to be able to acquire and
    1250             :      * release the same lock many times during a single CPU time slice, even
    1251             :      * in the presence of contention.  The efficiency of being able to do that
    1252             :      * outweighs the inefficiency of sometimes wasting a process dispatch
    1253             :      * cycle because the lock is not free when a released waiter finally gets
    1254             :      * to run.  See pgsql-hackers archives for 29-Dec-01.
    1255             :      */
    1256             :     for (;;)
    1257       14420 :     {
    1258             :         bool        mustwait;
    1259             : 
    1260             :         /*
    1261             :          * Try to grab the lock the first time, we're not in the waitqueue
    1262             :          * yet/anymore.
    1263             :          */
    1264   403047610 :         mustwait = LWLockAttemptLock(lock, mode);
    1265             : 
    1266   403047610 :         if (!mustwait)
    1267             :         {
    1268             :             LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
    1269   403017062 :             break;              /* got the lock */
    1270             :         }
    1271             : 
    1272             :         /*
    1273             :          * Ok, at this point we couldn't grab the lock on the first try. We
    1274             :          * cannot simply queue ourselves to the end of the list and wait to be
    1275             :          * woken up because by now the lock could long have been released.
    1276             :          * Instead add us to the queue and try to grab the lock again. If we
    1277             :          * succeed we need to revert the queuing and be happy, otherwise we
    1278             :          * recheck the lock. If we still couldn't grab it, we know that the
    1279             :          * other locker will see our queue entries when releasing since they
    1280             :          * existed before we checked for the lock.
    1281             :          */
    1282             : 
    1283             :         /* add to the queue */
    1284       30548 :         LWLockQueueSelf(lock, mode);
    1285             : 
    1286             :         /* we're now guaranteed to be woken up if necessary */
    1287       30548 :         mustwait = LWLockAttemptLock(lock, mode);
    1288             : 
    1289             :         /* ok, grabbed the lock the second time round, need to undo queueing */
    1290       30548 :         if (!mustwait)
    1291             :         {
    1292             :             LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
    1293             : 
    1294       16128 :             LWLockDequeueSelf(lock);
    1295       16128 :             break;
    1296             :         }
    1297             : 
    1298             :         /*
    1299             :          * Wait until awakened.
    1300             :          *
    1301             :          * It is possible that we get awakened for a reason other than being
    1302             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1303             :          * we've gotten the LWLock, re-increment the sema by the number of
    1304             :          * additional signals received.
    1305             :          */
    1306             :         LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
    1307             : 
    1308             : #ifdef LWLOCK_STATS
    1309             :         lwstats->block_count++;
    1310             : #endif
    1311             : 
    1312       14420 :         LWLockReportWaitStart(lock);
    1313             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1314             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1315             : 
    1316             :         for (;;)
    1317             :         {
    1318       14420 :             PGSemaphoreLock(proc->sem);
    1319       14420 :             if (!proc->lwWaiting)
    1320       14420 :                 break;
    1321           0 :             extraWaits++;
    1322             :         }
    1323             : 
    1324             :         /* Retrying, allow LWLockRelease to release waiters again. */
    1325       14420 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1326             : 
    1327             : #ifdef LOCK_DEBUG
    1328             :         {
    1329             :             /* not waiting anymore */
    1330             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1331             : 
    1332             :             Assert(nwaiters < MAX_BACKENDS);
    1333             :         }
    1334             : #endif
    1335             : 
    1336             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1337             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1338       14420 :         LWLockReportWaitEnd();
    1339             : 
    1340             :         LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
    1341             : 
    1342             :         /* Now loop back and try to acquire lock again. */
    1343       14420 :         result = false;
    1344             :     }
    1345             : 
    1346             :     if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
    1347             :         TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    1348             : 
    1349             :     /* Add lock to list of locks held by this backend */
    1350   403033190 :     held_lwlocks[num_held_lwlocks].lock = lock;
    1351   403033190 :     held_lwlocks[num_held_lwlocks++].mode = mode;
    1352             : 
    1353             :     /*
    1354             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1355             :      */
    1356   403033190 :     while (extraWaits-- > 0)
    1357           0 :         PGSemaphoreUnlock(proc->sem);
    1358             : 
    1359   403033190 :     return result;
    1360             : }
    1361             : 
    1362             : /*
    1363             :  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
    1364             :  *
    1365             :  * If the lock is not available, return false with no side-effects.
    1366             :  *
    1367             :  * If successful, cancel/die interrupts are held off until lock release.
    1368             :  */
    1369             : bool
    1370     2692482 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
    1371             : {
    1372             :     bool        mustwait;
    1373             : 
    1374             :     AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1375             : 
    1376             :     PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
    1377             : 
    1378             :     /* Ensure we will have room to remember the lock */
    1379     2692482 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1380           0 :         elog(ERROR, "too many LWLocks taken");
    1381             : 
    1382             :     /*
    1383             :      * Lock out cancel/die interrupts until we exit the code section protected
    1384             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1385             :      * manipulations of data structures in shared memory.
    1386             :      */
    1387     2692482 :     HOLD_INTERRUPTS();
    1388             : 
    1389             :     /* Check for the lock */
    1390     2692482 :     mustwait = LWLockAttemptLock(lock, mode);
    1391             : 
    1392     2692482 :     if (mustwait)
    1393             :     {
    1394             :         /* Failed to get lock, so release interrupt holdoff */
    1395         902 :         RESUME_INTERRUPTS();
    1396             : 
    1397             :         LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
    1398             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
    1399             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
    1400             :     }
    1401             :     else
    1402             :     {
    1403             :         /* Add lock to list of locks held by this backend */
    1404     2691580 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1405     2691580 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1406             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
    1407             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
    1408             :     }
    1409     2692482 :     return !mustwait;
    1410             : }
    1411             : 
    1412             : /*
    1413             :  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
    1414             :  *
    1415             :  * The semantics of this function are a bit funky.  If the lock is currently
    1416             :  * free, it is acquired in the given mode, and the function returns true.  If
    1417             :  * the lock isn't immediately free, the function waits until it is released
    1418             :  * and returns false, but does not acquire the lock.
    1419             :  *
    1420             :  * This is currently used for WALWriteLock: when a backend flushes the WAL,
    1421             :  * holding WALWriteLock, it can flush the commit records of many other
    1422             :  * backends as a side-effect.  Those other backends need to wait until the
    1423             :  * flush finishes, but don't need to acquire the lock anymore.  They can just
    1424             :  * wake up, observe that their records have already been flushed, and return.
    1425             :  */
    1426             : bool
    1427      446292 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
    1428             : {
    1429      446292 :     PGPROC     *proc = MyProc;
    1430             :     bool        mustwait;
    1431      446292 :     int         extraWaits = 0;
    1432             : #ifdef LWLOCK_STATS
    1433             :     lwlock_stats *lwstats;
    1434             : 
    1435             :     lwstats = get_lwlock_stats_entry(lock);
    1436             : #endif
    1437             : 
    1438             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1439             : 
    1440             :     PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
    1441             : 
    1442             :     /* Ensure we will have room to remember the lock */
    1443      446292 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1444           0 :         elog(ERROR, "too many LWLocks taken");
    1445             : 
    1446             :     /*
    1447             :      * Lock out cancel/die interrupts until we exit the code section protected
    1448             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1449             :      * manipulations of data structures in shared memory.
    1450             :      */
    1451      446292 :     HOLD_INTERRUPTS();
    1452             : 
    1453             :     /*
    1454             :      * NB: We're using nearly the same twice-in-a-row lock acquisition
    1455             :      * protocol as LWLockAcquire(). Check its comments for details.
    1456             :      */
    1457      446292 :     mustwait = LWLockAttemptLock(lock, mode);
    1458             : 
    1459      446292 :     if (mustwait)
    1460             :     {
    1461         648 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1462             : 
    1463         648 :         mustwait = LWLockAttemptLock(lock, mode);
    1464             : 
    1465         648 :         if (mustwait)
    1466             :         {
    1467             :             /*
    1468             :              * Wait until awakened.  Like in LWLockAcquire, be prepared for
    1469             :              * bogus wakeups.
    1470             :              */
    1471             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
    1472             : 
    1473             : #ifdef LWLOCK_STATS
    1474             :             lwstats->block_count++;
    1475             : #endif
    1476             : 
    1477         638 :             LWLockReportWaitStart(lock);
    1478             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1479             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1480             : 
    1481             :             for (;;)
    1482             :             {
    1483         638 :                 PGSemaphoreLock(proc->sem);
    1484         638 :                 if (!proc->lwWaiting)
    1485         638 :                     break;
    1486           0 :                 extraWaits++;
    1487             :             }
    1488             : 
    1489             : #ifdef LOCK_DEBUG
    1490             :             {
    1491             :                 /* not waiting anymore */
    1492             :                 uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1493             : 
    1494             :                 Assert(nwaiters < MAX_BACKENDS);
    1495             :             }
    1496             : #endif
    1497             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1498             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1499         638 :             LWLockReportWaitEnd();
    1500             : 
    1501             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
    1502             :         }
    1503             :         else
    1504             :         {
    1505             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
    1506             : 
    1507             :             /*
    1508             :              * Got lock in the second attempt, undo queueing. We need to treat
    1509             :              * this as having successfully acquired the lock, otherwise we'd
    1510             :              * not necessarily wake up people we've prevented from acquiring
    1511             :              * the lock.
    1512             :              */
    1513          10 :             LWLockDequeueSelf(lock);
    1514             :         }
    1515             :     }
    1516             : 
    1517             :     /*
    1518             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1519             :      */
    1520      446292 :     while (extraWaits-- > 0)
    1521           0 :         PGSemaphoreUnlock(proc->sem);
    1522             : 
    1523      446292 :     if (mustwait)
    1524             :     {
    1525             :         /* Failed to get lock, so release interrupt holdoff */
    1526         638 :         RESUME_INTERRUPTS();
    1527             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
    1528             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
    1529             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
    1530             :     }
    1531             :     else
    1532             :     {
    1533             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
    1534             :         /* Add lock to list of locks held by this backend */
    1535      445654 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1536      445654 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1537             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
    1538             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
    1539             :     }
    1540             : 
    1541      446292 :     return !mustwait;
    1542             : }
    1543             : 
    1544             : /*
    1545             :  * Does the lwlock in its current state need to wait for the variable value to
    1546             :  * change?
    1547             :  *
    1548             :  * If we don't need to wait, and it's because the value of the variable has
    1549             :  * changed, store the current value in newval.
    1550             :  *
    1551             :  * *result is set to true if the lock was free, and false otherwise.
    1552             :  */
    1553             : static bool
    1554     7913240 : LWLockConflictsWithVar(LWLock *lock,
    1555             :                        uint64 *valptr, uint64 oldval, uint64 *newval,
    1556             :                        bool *result)
    1557             : {
    1558             :     bool        mustwait;
    1559             :     uint64      value;
    1560             : 
    1561             :     /*
    1562             :      * Test first to see if it the slot is free right now.
    1563             :      *
    1564             :      * XXX: the caller uses a spinlock before this, so we don't need a memory
    1565             :      * barrier here as far as the current usage is concerned.  But that might
    1566             :      * not be safe in general.
    1567             :      */
    1568     7913240 :     mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
    1569             : 
    1570     7913240 :     if (!mustwait)
    1571             :     {
    1572     3949466 :         *result = true;
    1573     3949466 :         return false;
    1574             :     }
    1575             : 
    1576     3963774 :     *result = false;
    1577             : 
    1578             :     /*
    1579             :      * Read value using the lwlock's wait list lock, as we can't generally
    1580             :      * rely on atomic 64 bit reads/stores.  TODO: On platforms with a way to
    1581             :      * do atomic 64 bit reads/writes the spinlock should be optimized away.
    1582             :      */
    1583     3963774 :     LWLockWaitListLock(lock);
    1584     3963774 :     value = *valptr;
    1585     3963774 :     LWLockWaitListUnlock(lock);
    1586             : 
    1587     3963774 :     if (value != oldval)
    1588             :     {
    1589     3960724 :         mustwait = false;
    1590     3960724 :         *newval = value;
    1591             :     }
    1592             :     else
    1593             :     {
    1594        3050 :         mustwait = true;
    1595             :     }
    1596             : 
    1597     3963774 :     return mustwait;
    1598             : }
    1599             : 
    1600             : /*
    1601             :  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
    1602             :  *
    1603             :  * If the lock is held and *valptr equals oldval, waits until the lock is
    1604             :  * either freed, or the lock holder updates *valptr by calling
    1605             :  * LWLockUpdateVar.  If the lock is free on exit (immediately or after
    1606             :  * waiting), returns true.  If the lock is still held, but *valptr no longer
    1607             :  * matches oldval, returns false and sets *newval to the current value in
    1608             :  * *valptr.
    1609             :  *
    1610             :  * Note: this function ignores shared lock holders; if the lock is held
    1611             :  * in shared mode, returns 'true'.
    1612             :  */
    1613             : bool
    1614     7910190 : LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
    1615             : {
    1616     7910190 :     PGPROC     *proc = MyProc;
    1617     7910190 :     int         extraWaits = 0;
    1618     7910190 :     bool        result = false;
    1619             : #ifdef LWLOCK_STATS
    1620             :     lwlock_stats *lwstats;
    1621             : 
    1622             :     lwstats = get_lwlock_stats_entry(lock);
    1623             : #endif
    1624             : 
    1625             :     PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
    1626             : 
    1627             :     /*
    1628             :      * Lock out cancel/die interrupts while we sleep on the lock.  There is no
    1629             :      * cleanup mechanism to remove us from the wait queue if we got
    1630             :      * interrupted.
    1631             :      */
    1632     7910190 :     HOLD_INTERRUPTS();
    1633             : 
    1634             :     /*
    1635             :      * Loop here to check the lock's status after each time we are signaled.
    1636             :      */
    1637             :     for (;;)
    1638        1228 :     {
    1639             :         bool        mustwait;
    1640             : 
    1641     7911418 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1642             :                                           &result);
    1643             : 
    1644     7911418 :         if (!mustwait)
    1645     7909596 :             break;              /* the lock was free or value didn't match */
    1646             : 
    1647             :         /*
    1648             :          * Add myself to wait queue. Note that this is racy, somebody else
    1649             :          * could wakeup before we're finished queuing. NB: We're using nearly
    1650             :          * the same twice-in-a-row lock acquisition protocol as
    1651             :          * LWLockAcquire(). Check its comments for details. The only
    1652             :          * difference is that we also have to check the variable's values when
    1653             :          * checking the state of the lock.
    1654             :          */
    1655        1822 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1656             : 
    1657             :         /*
    1658             :          * Set RELEASE_OK flag, to make sure we get woken up as soon as the
    1659             :          * lock is released.
    1660             :          */
    1661        1822 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1662             : 
    1663             :         /*
    1664             :          * We're now guaranteed to be woken up if necessary. Recheck the lock
    1665             :          * and variables state.
    1666             :          */
    1667        1822 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1668             :                                           &result);
    1669             : 
    1670             :         /* Ok, no conflict after we queued ourselves. Undo queueing. */
    1671        1822 :         if (!mustwait)
    1672             :         {
    1673             :             LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
    1674             : 
    1675         594 :             LWLockDequeueSelf(lock);
    1676         594 :             break;
    1677             :         }
    1678             : 
    1679             :         /*
    1680             :          * Wait until awakened.
    1681             :          *
    1682             :          * It is possible that we get awakened for a reason other than being
    1683             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1684             :          * we've gotten the LWLock, re-increment the sema by the number of
    1685             :          * additional signals received.
    1686             :          */
    1687             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
    1688             : 
    1689             : #ifdef LWLOCK_STATS
    1690             :         lwstats->block_count++;
    1691             : #endif
    1692             : 
    1693        1228 :         LWLockReportWaitStart(lock);
    1694             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1695             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
    1696             : 
    1697             :         for (;;)
    1698             :         {
    1699        1228 :             PGSemaphoreLock(proc->sem);
    1700        1228 :             if (!proc->lwWaiting)
    1701        1228 :                 break;
    1702           0 :             extraWaits++;
    1703             :         }
    1704             : 
    1705             : #ifdef LOCK_DEBUG
    1706             :         {
    1707             :             /* not waiting anymore */
    1708             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1709             : 
    1710             :             Assert(nwaiters < MAX_BACKENDS);
    1711             :         }
    1712             : #endif
    1713             : 
    1714             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1715             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
    1716        1228 :         LWLockReportWaitEnd();
    1717             : 
    1718             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
    1719             : 
    1720             :         /* Now loop back and check the status of the lock again. */
    1721             :     }
    1722             : 
    1723             :     /*
    1724             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1725             :      */
    1726     7910190 :     while (extraWaits-- > 0)
    1727           0 :         PGSemaphoreUnlock(proc->sem);
    1728             : 
    1729             :     /*
    1730             :      * Now okay to allow cancel/die interrupts.
    1731             :      */
    1732     7910190 :     RESUME_INTERRUPTS();
    1733             : 
    1734     7910190 :     return result;
    1735             : }
    1736             : 
    1737             : 
    1738             : /*
    1739             :  * LWLockUpdateVar - Update a variable and wake up waiters atomically
    1740             :  *
    1741             :  * Sets *valptr to 'val', and wakes up all processes waiting for us with
    1742             :  * LWLockWaitForVar().  Setting the value and waking up the processes happen
    1743             :  * atomically so that any process calling LWLockWaitForVar() on the same lock
    1744             :  * is guaranteed to see the new value, and act accordingly.
    1745             :  *
    1746             :  * The caller must be holding the lock in exclusive mode.
    1747             :  */
    1748             : void
    1749      940562 : LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val)
    1750             : {
    1751             :     proclist_head wakeup;
    1752             :     proclist_mutable_iter iter;
    1753             : 
    1754             :     PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
    1755             : 
    1756      940562 :     proclist_init(&wakeup);
    1757             : 
    1758      940562 :     LWLockWaitListLock(lock);
    1759             : 
    1760             :     Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
    1761             : 
    1762             :     /* Update the lock's value */
    1763      940562 :     *valptr = val;
    1764             : 
    1765             :     /*
    1766             :      * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
    1767             :      * up. They are always in the front of the queue.
    1768             :      */
    1769      940566 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1770             :     {
    1771           6 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1772             : 
    1773           6 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
    1774           2 :             break;
    1775             : 
    1776           4 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1777           4 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
    1778             :     }
    1779             : 
    1780             :     /* We are done updating shared state of the lock itself. */
    1781      940562 :     LWLockWaitListUnlock(lock);
    1782             : 
    1783             :     /*
    1784             :      * Awaken any waiters I removed from the queue.
    1785             :      */
    1786      940566 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1787             :     {
    1788           4 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1789             : 
    1790           4 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1791             :         /* check comment in LWLockWakeup() about this barrier */
    1792           4 :         pg_write_barrier();
    1793           4 :         waiter->lwWaiting = false;
    1794           4 :         PGSemaphoreUnlock(waiter->sem);
    1795             :     }
    1796      940562 : }
    1797             : 
    1798             : 
    1799             : /*
    1800             :  * LWLockRelease - release a previously acquired lock
    1801             :  */
    1802             : void
    1803   406170424 : LWLockRelease(LWLock *lock)
    1804             : {
    1805             :     LWLockMode  mode;
    1806             :     uint32      oldstate;
    1807             :     bool        check_waiters;
    1808             :     int         i;
    1809             : 
    1810             :     /*
    1811             :      * Remove lock from list of locks held.  Usually, but not always, it will
    1812             :      * be the latest-acquired lock; so search array backwards.
    1813             :      */
    1814   407651462 :     for (i = num_held_lwlocks; --i >= 0;)
    1815   407651462 :         if (lock == held_lwlocks[i].lock)
    1816   406170424 :             break;
    1817             : 
    1818   406170424 :     if (i < 0)
    1819           0 :         elog(ERROR, "lock %s is not held", T_NAME(lock));
    1820             : 
    1821   406170424 :     mode = held_lwlocks[i].mode;
    1822             : 
    1823   406170424 :     num_held_lwlocks--;
    1824   407651462 :     for (; i < num_held_lwlocks; i++)
    1825     1481038 :         held_lwlocks[i] = held_lwlocks[i + 1];
    1826             : 
    1827             :     PRINT_LWDEBUG("LWLockRelease", lock, mode);
    1828             : 
    1829             :     /*
    1830             :      * Release my hold on lock, after that it can immediately be acquired by
    1831             :      * others, even if we still have to wakeup other waiters.
    1832             :      */
    1833   406170424 :     if (mode == LW_EXCLUSIVE)
    1834   193201416 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    1835             :     else
    1836   212969008 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
    1837             : 
    1838             :     /* nobody else can have that kind of lock */
    1839             :     Assert(!(oldstate & LW_VAL_EXCLUSIVE));
    1840             : 
    1841             :     if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
    1842             :         TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
    1843             : 
    1844             :     /*
    1845             :      * We're still waiting for backends to get scheduled, don't wake them up
    1846             :      * again.
    1847             :      */
    1848   406170424 :     if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
    1849       69804 :         (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
    1850       69804 :         (oldstate & LW_LOCK_MASK) == 0)
    1851       35362 :         check_waiters = true;
    1852             :     else
    1853   406135062 :         check_waiters = false;
    1854             : 
    1855             :     /*
    1856             :      * As waking up waiters requires the spinlock to be acquired, only do so
    1857             :      * if necessary.
    1858             :      */
    1859   406170424 :     if (check_waiters)
    1860             :     {
    1861             :         /* XXX: remove before commit? */
    1862             :         LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
    1863       35362 :         LWLockWakeup(lock);
    1864             :     }
    1865             : 
    1866             :     /*
    1867             :      * Now okay to allow cancel/die interrupts.
    1868             :      */
    1869   406170424 :     RESUME_INTERRUPTS();
    1870   406170424 : }
    1871             : 
    1872             : /*
    1873             :  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
    1874             :  */
    1875             : void
    1876    28661732 : LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
    1877             : {
    1878    28661732 :     LWLockWaitListLock(lock);
    1879             : 
    1880             :     /*
    1881             :      * Set the variable's value before releasing the lock, that prevents race
    1882             :      * a race condition wherein a new locker acquires the lock, but hasn't yet
    1883             :      * set the variables value.
    1884             :      */
    1885    28661732 :     *valptr = val;
    1886    28661732 :     LWLockWaitListUnlock(lock);
    1887             : 
    1888    28661732 :     LWLockRelease(lock);
    1889    28661732 : }
    1890             : 
    1891             : 
    1892             : /*
    1893             :  * LWLockReleaseAll - release all currently-held locks
    1894             :  *
    1895             :  * Used to clean up after ereport(ERROR). An important difference between this
    1896             :  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
    1897             :  * unchanged by this operation.  This is necessary since InterruptHoldoffCount
    1898             :  * has been set to an appropriate level earlier in error recovery. We could
    1899             :  * decrement it below zero if we allow it to drop for each released lock!
    1900             :  */
    1901             : void
    1902       51202 : LWLockReleaseAll(void)
    1903             : {
    1904       51570 :     while (num_held_lwlocks > 0)
    1905             :     {
    1906         368 :         HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */
    1907             : 
    1908         368 :         LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
    1909             :     }
    1910       51202 : }
    1911             : 
    1912             : 
    1913             : /*
    1914             :  * LWLockHeldByMe - test whether my process holds a lock in any mode
    1915             :  *
    1916             :  * This is meant as debug support only.
    1917             :  */
    1918             : bool
    1919           0 : LWLockHeldByMe(LWLock *l)
    1920             : {
    1921             :     int         i;
    1922             : 
    1923           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1924             :     {
    1925           0 :         if (held_lwlocks[i].lock == l)
    1926           0 :             return true;
    1927             :     }
    1928           0 :     return false;
    1929             : }
    1930             : 
    1931             : /*
    1932             :  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
    1933             :  *
    1934             :  * This is meant as debug support only.
    1935             :  */
    1936             : bool
    1937           0 : LWLockHeldByMeInMode(LWLock *l, LWLockMode mode)
    1938             : {
    1939             :     int         i;
    1940             : 
    1941           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1942             :     {
    1943           0 :         if (held_lwlocks[i].lock == l && held_lwlocks[i].mode == mode)
    1944           0 :             return true;
    1945             :     }
    1946           0 :     return false;
    1947             : }

Generated by: LCOV version 1.14