LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - lwlock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 340 403 84.4 %
Date: 2025-09-10 22:18:18 Functions: 30 36 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * lwlock.c
       4             :  *    Lightweight lock manager
       5             :  *
       6             :  * Lightweight locks are intended primarily to provide mutual exclusion of
       7             :  * access to shared-memory data structures.  Therefore, they offer both
       8             :  * exclusive and shared lock modes (to support read/write and read-only
       9             :  * access to a shared object).  There are few other frammishes.  User-level
      10             :  * locking should be done with the full lock manager --- which depends on
      11             :  * LWLocks to protect its shared state.
      12             :  *
      13             :  * In addition to exclusive and shared modes, lightweight locks can be used to
      14             :  * wait until a variable changes value.  The variable is initially not set
      15             :  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
      16             :  * value it was set to when the lock was released last, and can be updated
      17             :  * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar
      18             :  * waits for the variable to be updated, or until the lock is free.  When
      19             :  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
      20             :  * appropriate value for a free lock.  The meaning of the variable is up to
      21             :  * the caller, the lightweight lock code just assigns and compares it.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/storage/lmgr/lwlock.c
      28             :  *
      29             :  * NOTES:
      30             :  *
      31             :  * This used to be a pretty straight forward reader-writer lock
      32             :  * implementation, in which the internal state was protected by a
      33             :  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
      34             :  * too high for workloads/locks that were taken in shared mode very
      35             :  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
      36             :  * while trying to acquire a shared lock that was actually free.
      37             :  *
      38             :  * Thus a new implementation was devised that provides wait-free shared lock
      39             :  * acquisition for locks that aren't exclusively locked.
      40             :  *
      41             :  * The basic idea is to have a single atomic variable 'lockcount' instead of
      42             :  * the formerly separate shared and exclusive counters and to use atomic
      43             :  * operations to acquire the lock. That's fairly easy to do for plain
      44             :  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
      45             :  * in the OS.
      46             :  *
      47             :  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
      48             :  * variable. For exclusive lock we swap in a sentinel value
      49             :  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
      50             :  *
      51             :  * To release the lock we use an atomic decrement to release the lock. If the
      52             :  * new value is zero (we get that atomically), we know we can/have to release
      53             :  * waiters.
      54             :  *
      55             :  * Obviously it is important that the sentinel value for exclusive locks
      56             :  * doesn't conflict with the maximum number of possible share lockers -
      57             :  * luckily MAX_BACKENDS makes that easily possible.
      58             :  *
      59             :  *
      60             :  * The attentive reader might have noticed that naively doing the above has a
      61             :  * glaring race condition: We try to lock using the atomic operations and
      62             :  * notice that we have to wait. Unfortunately by the time we have finished
      63             :  * queuing, the former locker very well might have already finished its
      64             :  * work. That's problematic because we're now stuck waiting inside the OS.
      65             : 
      66             :  * To mitigate those races we use a two phased attempt at locking:
      67             :  *   Phase 1: Try to do it atomically, if we succeed, nice
      68             :  *   Phase 2: Add ourselves to the waitqueue of the lock
      69             :  *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
      70             :  *            the queue
      71             :  *   Phase 4: Sleep till wake-up, goto Phase 1
      72             :  *
      73             :  * This protects us against the problem from above as nobody can release too
      74             :  *    quick, before we're queued, since after Phase 2 we're already queued.
      75             :  * -------------------------------------------------------------------------
      76             :  */
      77             : #include "postgres.h"
      78             : 
      79             : #include "miscadmin.h"
      80             : #include "pg_trace.h"
      81             : #include "pgstat.h"
      82             : #include "port/pg_bitutils.h"
      83             : #include "storage/proc.h"
      84             : #include "storage/proclist.h"
      85             : #include "storage/procnumber.h"
      86             : #include "storage/spin.h"
      87             : #include "utils/memutils.h"
      88             : 
      89             : #ifdef LWLOCK_STATS
      90             : #include "utils/hsearch.h"
      91             : #endif
      92             : 
      93             : 
      94             : #define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 31)
      95             : #define LW_FLAG_RELEASE_OK          ((uint32) 1 << 30)
      96             : #define LW_FLAG_LOCKED              ((uint32) 1 << 29)
      97             : #define LW_FLAG_BITS                3
      98             : #define LW_FLAG_MASK                (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
      99             : 
     100             : /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
     101             : #define LW_VAL_EXCLUSIVE            (MAX_BACKENDS + 1)
     102             : #define LW_VAL_SHARED               1
     103             : 
     104             : /* already (power of 2)-1, i.e. suitable for a mask */
     105             : #define LW_SHARED_MASK              MAX_BACKENDS
     106             : #define LW_LOCK_MASK                (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
     107             : 
     108             : 
     109             : StaticAssertDecl(((MAX_BACKENDS + 1) & MAX_BACKENDS) == 0,
     110             :                  "MAX_BACKENDS + 1 needs to be a power of 2");
     111             : 
     112             : StaticAssertDecl((MAX_BACKENDS & LW_FLAG_MASK) == 0,
     113             :                  "MAX_BACKENDS and LW_FLAG_MASK overlap");
     114             : 
     115             : StaticAssertDecl((LW_VAL_EXCLUSIVE & LW_FLAG_MASK) == 0,
     116             :                  "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap");
     117             : 
     118             : /*
     119             :  * There are three sorts of LWLock "tranches":
     120             :  *
     121             :  * 1. The individually-named locks defined in lwlocklist.h each have their
     122             :  * own tranche.  We absorb the names of these tranches from there into
     123             :  * BuiltinTrancheNames here.
     124             :  *
     125             :  * 2. There are some predefined tranches for built-in groups of locks defined
     126             :  * in lwlocklist.h.  We absorb the names of these tranches, too.
     127             :  *
     128             :  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
     129             :  * or LWLockNewTrancheId.  These names are stored in shared memory and can be
     130             :  * accessed via LWLockTrancheNames.
     131             :  *
     132             :  * All these names are user-visible as wait event names, so choose with care
     133             :  * ... and do not forget to update the documentation's list of wait events.
     134             :  */
     135             : static const char *const BuiltinTrancheNames[] = {
     136             : #define PG_LWLOCK(id, lockname) [id] = CppAsString(lockname),
     137             : #define PG_LWLOCKTRANCHE(id, lockname) [LWTRANCHE_##id] = CppAsString(lockname),
     138             : #include "storage/lwlocklist.h"
     139             : #undef PG_LWLOCK
     140             : #undef PG_LWLOCKTRANCHE
     141             : };
     142             : 
     143             : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
     144             :                  LWTRANCHE_FIRST_USER_DEFINED,
     145             :                  "missing entries in BuiltinTrancheNames[]");
     146             : 
     147             : /*
     148             :  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
     149             :  * points to the shared memory locations of the names of all
     150             :  * dynamically-created tranches.  Backends inherit the pointer by fork from the
     151             :  * postmaster (except in the EXEC_BACKEND case, where we have special measures
     152             :  * to pass it down).
     153             :  */
     154             : char      **LWLockTrancheNames = NULL;
     155             : 
     156             : /*
     157             :  * This points to the main array of LWLocks in shared memory.  Backends inherit
     158             :  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
     159             :  * where we have special measures to pass it down).
     160             :  */
     161             : LWLockPadded *MainLWLockArray = NULL;
     162             : 
     163             : /*
     164             :  * We use this structure to keep track of locked LWLocks for release
     165             :  * during error recovery.  Normally, only a few will be held at once, but
     166             :  * occasionally the number can be much higher.
     167             :  */
     168             : #define MAX_SIMUL_LWLOCKS   200
     169             : 
     170             : /* struct representing the LWLocks we're holding */
     171             : typedef struct LWLockHandle
     172             : {
     173             :     LWLock     *lock;
     174             :     LWLockMode  mode;
     175             : } LWLockHandle;
     176             : 
     177             : static int  num_held_lwlocks = 0;
     178             : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
     179             : 
     180             : /* struct representing the LWLock tranche request for named tranche */
     181             : typedef struct NamedLWLockTrancheRequest
     182             : {
     183             :     char        tranche_name[NAMEDATALEN];
     184             :     int         num_lwlocks;
     185             : } NamedLWLockTrancheRequest;
     186             : 
     187             : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
     188             : 
     189             : /*
     190             :  * NamedLWLockTrancheRequests is the valid length of the request array.  This
     191             :  * variable is non-static so that postmaster.c can copy them to child processes
     192             :  * in EXEC_BACKEND builds.
     193             :  */
     194             : int         NamedLWLockTrancheRequests = 0;
     195             : 
     196             : /* shared memory counter of registered tranches */
     197             : int        *LWLockCounter = NULL;
     198             : 
     199             : /* backend-local counter of registered tranches */
     200             : static int  LocalLWLockCounter;
     201             : 
     202             : #define MAX_NAMED_TRANCHES 256
     203             : 
     204             : static void InitializeLWLocks(void);
     205             : static inline void LWLockReportWaitStart(LWLock *lock);
     206             : static inline void LWLockReportWaitEnd(void);
     207             : static const char *GetLWTrancheName(uint16 trancheId);
     208             : 
     209             : #define T_NAME(lock) \
     210             :     GetLWTrancheName((lock)->tranche)
     211             : 
     212             : #ifdef LWLOCK_STATS
     213             : typedef struct lwlock_stats_key
     214             : {
     215             :     int         tranche;
     216             :     void       *instance;
     217             : }           lwlock_stats_key;
     218             : 
     219             : typedef struct lwlock_stats
     220             : {
     221             :     lwlock_stats_key key;
     222             :     int         sh_acquire_count;
     223             :     int         ex_acquire_count;
     224             :     int         block_count;
     225             :     int         dequeue_self_count;
     226             :     int         spin_delay_count;
     227             : }           lwlock_stats;
     228             : 
     229             : static HTAB *lwlock_stats_htab;
     230             : static lwlock_stats lwlock_stats_dummy;
     231             : #endif
     232             : 
     233             : #ifdef LOCK_DEBUG
     234             : bool        Trace_lwlocks = false;
     235             : 
     236             : inline static void
     237             : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
     238             : {
     239             :     /* hide statement & context here, otherwise the log is just too verbose */
     240             :     if (Trace_lwlocks)
     241             :     {
     242             :         uint32      state = pg_atomic_read_u32(&lock->state);
     243             : 
     244             :         ereport(LOG,
     245             :                 (errhidestmt(true),
     246             :                  errhidecontext(true),
     247             :                  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
     248             :                                  MyProcPid,
     249             :                                  where, T_NAME(lock), lock,
     250             :                                  (state & LW_VAL_EXCLUSIVE) != 0,
     251             :                                  state & LW_SHARED_MASK,
     252             :                                  (state & LW_FLAG_HAS_WAITERS) != 0,
     253             :                                  pg_atomic_read_u32(&lock->nwaiters),
     254             :                                  (state & LW_FLAG_RELEASE_OK) != 0)));
     255             :     }
     256             : }
     257             : 
     258             : inline static void
     259             : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
     260             : {
     261             :     /* hide statement & context here, otherwise the log is just too verbose */
     262             :     if (Trace_lwlocks)
     263             :     {
     264             :         ereport(LOG,
     265             :                 (errhidestmt(true),
     266             :                  errhidecontext(true),
     267             :                  errmsg_internal("%s(%s %p): %s", where,
     268             :                                  T_NAME(lock), lock, msg)));
     269             :     }
     270             : }
     271             : 
     272             : #else                           /* not LOCK_DEBUG */
     273             : #define PRINT_LWDEBUG(a,b,c) ((void)0)
     274             : #define LOG_LWDEBUG(a,b,c) ((void)0)
     275             : #endif                          /* LOCK_DEBUG */
     276             : 
     277             : #ifdef LWLOCK_STATS
     278             : 
     279             : static void init_lwlock_stats(void);
     280             : static void print_lwlock_stats(int code, Datum arg);
     281             : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
     282             : 
     283             : static void
     284             : init_lwlock_stats(void)
     285             : {
     286             :     HASHCTL     ctl;
     287             :     static MemoryContext lwlock_stats_cxt = NULL;
     288             :     static bool exit_registered = false;
     289             : 
     290             :     if (lwlock_stats_cxt != NULL)
     291             :         MemoryContextDelete(lwlock_stats_cxt);
     292             : 
     293             :     /*
     294             :      * The LWLock stats will be updated within a critical section, which
     295             :      * requires allocating new hash entries. Allocations within a critical
     296             :      * section are normally not allowed because running out of memory would
     297             :      * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
     298             :      * turned on in production, so that's an acceptable risk. The hash entries
     299             :      * are small, so the risk of running out of memory is minimal in practice.
     300             :      */
     301             :     lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
     302             :                                              "LWLock stats",
     303             :                                              ALLOCSET_DEFAULT_SIZES);
     304             :     MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
     305             : 
     306             :     ctl.keysize = sizeof(lwlock_stats_key);
     307             :     ctl.entrysize = sizeof(lwlock_stats);
     308             :     ctl.hcxt = lwlock_stats_cxt;
     309             :     lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
     310             :                                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     311             :     if (!exit_registered)
     312             :     {
     313             :         on_shmem_exit(print_lwlock_stats, 0);
     314             :         exit_registered = true;
     315             :     }
     316             : }
     317             : 
     318             : static void
     319             : print_lwlock_stats(int code, Datum arg)
     320             : {
     321             :     HASH_SEQ_STATUS scan;
     322             :     lwlock_stats *lwstats;
     323             : 
     324             :     hash_seq_init(&scan, lwlock_stats_htab);
     325             : 
     326             :     /* Grab an LWLock to keep different backends from mixing reports */
     327             :     LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
     328             : 
     329             :     while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
     330             :     {
     331             :         fprintf(stderr,
     332             :                 "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
     333             :                 MyProcPid, GetLWTrancheName(lwstats->key.tranche),
     334             :                 lwstats->key.instance, lwstats->sh_acquire_count,
     335             :                 lwstats->ex_acquire_count, lwstats->block_count,
     336             :                 lwstats->spin_delay_count, lwstats->dequeue_self_count);
     337             :     }
     338             : 
     339             :     LWLockRelease(&MainLWLockArray[0].lock);
     340             : }
     341             : 
     342             : static lwlock_stats *
     343             : get_lwlock_stats_entry(LWLock *lock)
     344             : {
     345             :     lwlock_stats_key key;
     346             :     lwlock_stats *lwstats;
     347             :     bool        found;
     348             : 
     349             :     /*
     350             :      * During shared memory initialization, the hash table doesn't exist yet.
     351             :      * Stats of that phase aren't very interesting, so just collect operations
     352             :      * on all locks in a single dummy entry.
     353             :      */
     354             :     if (lwlock_stats_htab == NULL)
     355             :         return &lwlock_stats_dummy;
     356             : 
     357             :     /* Fetch or create the entry. */
     358             :     MemSet(&key, 0, sizeof(key));
     359             :     key.tranche = lock->tranche;
     360             :     key.instance = lock;
     361             :     lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
     362             :     if (!found)
     363             :     {
     364             :         lwstats->sh_acquire_count = 0;
     365             :         lwstats->ex_acquire_count = 0;
     366             :         lwstats->block_count = 0;
     367             :         lwstats->dequeue_self_count = 0;
     368             :         lwstats->spin_delay_count = 0;
     369             :     }
     370             :     return lwstats;
     371             : }
     372             : #endif                          /* LWLOCK_STATS */
     373             : 
     374             : 
     375             : /*
     376             :  * Compute number of LWLocks required by named tranches.  These will be
     377             :  * allocated in the main array.
     378             :  */
     379             : static int
     380        6222 : NumLWLocksForNamedTranches(void)
     381             : {
     382        6222 :     int         numLocks = 0;
     383             :     int         i;
     384             : 
     385        6264 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     386          42 :         numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     387             : 
     388        6222 :     return numLocks;
     389             : }
     390             : 
     391             : /*
     392             :  * Compute shmem space needed for LWLocks and named tranches.
     393             :  */
     394             : Size
     395        6222 : LWLockShmemSize(void)
     396             : {
     397             :     Size        size;
     398        6222 :     int         numLocks = NUM_FIXED_LWLOCKS;
     399             : 
     400             :     /* Calculate total number of locks needed in the main array. */
     401        6222 :     numLocks += NumLWLocksForNamedTranches();
     402             : 
     403             :     /* Space for dynamic allocation counter. */
     404        6222 :     size = MAXALIGN(sizeof(int));
     405             : 
     406             :     /* Space for named tranches. */
     407        6222 :     size = add_size(size, mul_size(MAX_NAMED_TRANCHES, sizeof(char *)));
     408        6222 :     size = add_size(size, mul_size(MAX_NAMED_TRANCHES, NAMEDATALEN));
     409             : 
     410             :     /* Space for the LWLock array, plus room for cache line alignment. */
     411        6222 :     size = add_size(size, LWLOCK_PADDED_SIZE);
     412        6222 :     size = add_size(size, mul_size(numLocks, sizeof(LWLockPadded)));
     413             : 
     414        6222 :     return size;
     415             : }
     416             : 
     417             : /*
     418             :  * Allocate shmem space for the main LWLock array and all tranches and
     419             :  * initialize it.
     420             :  */
     421             : void
     422        2174 : CreateLWLocks(void)
     423             : {
     424        2174 :     if (!IsUnderPostmaster)
     425             :     {
     426        2174 :         Size        spaceLocks = LWLockShmemSize();
     427             :         char       *ptr;
     428             : 
     429             :         /* Allocate space */
     430        2174 :         ptr = (char *) ShmemAlloc(spaceLocks);
     431             : 
     432             :         /* Initialize the dynamic-allocation counter for tranches */
     433        2174 :         LWLockCounter = (int *) ptr;
     434        2174 :         *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
     435        2174 :         ptr += MAXALIGN(sizeof(int));
     436             : 
     437             :         /* Initialize tranche names */
     438        2174 :         LWLockTrancheNames = (char **) ptr;
     439        2174 :         ptr += MAX_NAMED_TRANCHES * sizeof(char *);
     440      558718 :         for (int i = 0; i < MAX_NAMED_TRANCHES; i++)
     441             :         {
     442      556544 :             LWLockTrancheNames[i] = ptr;
     443      556544 :             ptr += NAMEDATALEN;
     444             :         }
     445             : 
     446             :         /* Ensure desired alignment of LWLock array */
     447        2174 :         ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
     448        2174 :         MainLWLockArray = (LWLockPadded *) ptr;
     449             : 
     450             :         /* Initialize all LWLocks */
     451        2174 :         InitializeLWLocks();
     452             :     }
     453        2174 : }
     454             : 
     455             : /*
     456             :  * Initialize LWLocks that are fixed and those belonging to named tranches.
     457             :  */
     458             : static void
     459        2174 : InitializeLWLocks(void)
     460             : {
     461             :     int         id;
     462             :     int         i;
     463             :     int         j;
     464             :     LWLockPadded *lock;
     465             : 
     466             :     /* Initialize all individual LWLocks in main array */
     467      119570 :     for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
     468      117396 :         LWLockInitialize(&lock->lock, id);
     469             : 
     470             :     /* Initialize buffer mapping LWLocks in main array */
     471        2174 :     lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
     472      280446 :     for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
     473      278272 :         LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
     474             : 
     475             :     /* Initialize lmgrs' LWLocks in main array */
     476        2174 :     lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
     477       36958 :     for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
     478       34784 :         LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
     479             : 
     480             :     /* Initialize predicate lmgrs' LWLocks in main array */
     481        2174 :     lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
     482       36958 :     for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
     483       34784 :         LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
     484             : 
     485             :     /*
     486             :      * Copy the info about any named tranches into shared memory (so that
     487             :      * other processes can see it), and initialize the requested LWLocks.
     488             :      */
     489        2174 :     if (NamedLWLockTrancheRequests > 0)
     490             :     {
     491          14 :         lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
     492             : 
     493          28 :         for (i = 0; i < NamedLWLockTrancheRequests; i++)
     494             :         {
     495             :             NamedLWLockTrancheRequest *request;
     496             :             int         tranche;
     497             : 
     498          14 :             request = &NamedLWLockTrancheRequestArray[i];
     499          14 :             tranche = LWLockNewTrancheId(request->tranche_name);
     500             : 
     501          28 :             for (j = 0; j < request->num_lwlocks; j++, lock++)
     502          14 :                 LWLockInitialize(&lock->lock, tranche);
     503             :         }
     504             :     }
     505        2174 : }
     506             : 
     507             : /*
     508             :  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
     509             :  */
     510             : void
     511       44518 : InitLWLockAccess(void)
     512             : {
     513             : #ifdef LWLOCK_STATS
     514             :     init_lwlock_stats();
     515             : #endif
     516       44518 : }
     517             : 
     518             : /*
     519             :  * GetNamedLWLockTranche - returns the base address of LWLock from the
     520             :  *      specified tranche.
     521             :  *
     522             :  * Caller needs to retrieve the requested number of LWLocks starting from
     523             :  * the base lock address returned by this API.  This can be used for
     524             :  * tranches that are requested by using RequestNamedLWLockTranche() API.
     525             :  */
     526             : LWLockPadded *
     527          14 : GetNamedLWLockTranche(const char *tranche_name)
     528             : {
     529             :     int         lock_pos;
     530             :     int         i;
     531             : 
     532             :     /*
     533             :      * Obtain the position of base address of LWLock belonging to requested
     534             :      * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed
     535             :      * in MainLWLockArray after fixed locks.
     536             :      */
     537          14 :     lock_pos = NUM_FIXED_LWLOCKS;
     538          14 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     539             :     {
     540          14 :         if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
     541             :                    tranche_name) == 0)
     542          14 :             return &MainLWLockArray[lock_pos];
     543             : 
     544           0 :         lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     545             :     }
     546             : 
     547           0 :     elog(ERROR, "requested tranche is not registered");
     548             : 
     549             :     /* just to keep compiler quiet */
     550             :     return NULL;
     551             : }
     552             : 
     553             : /*
     554             :  * Allocate a new tranche ID with the provided name.
     555             :  */
     556             : int
     557          38 : LWLockNewTrancheId(const char *name)
     558             : {
     559             :     int         result;
     560             : 
     561          38 :     if (!name)
     562           0 :         ereport(ERROR,
     563             :                 (errcode(ERRCODE_INVALID_NAME),
     564             :                  errmsg("tranche name cannot be NULL")));
     565             : 
     566          38 :     if (strlen(name) >= NAMEDATALEN)
     567           0 :         ereport(ERROR,
     568             :                 (errcode(ERRCODE_NAME_TOO_LONG),
     569             :                  errmsg("tranche name too long"),
     570             :                  errdetail("LWLock tranche names must be no longer than %d bytes.",
     571             :                            NAMEDATALEN - 1)));
     572             : 
     573             :     /*
     574             :      * We use the ShmemLock spinlock to protect LWLockCounter and
     575             :      * LWLockTrancheNames.
     576             :      */
     577          38 :     SpinLockAcquire(ShmemLock);
     578             : 
     579          38 :     if (*LWLockCounter - LWTRANCHE_FIRST_USER_DEFINED >= MAX_NAMED_TRANCHES)
     580             :     {
     581           0 :         SpinLockRelease(ShmemLock);
     582           0 :         ereport(ERROR,
     583             :                 (errmsg("maximum number of tranches already registered"),
     584             :                  errdetail("No more than %d tranches may be registered.",
     585             :                            MAX_NAMED_TRANCHES)));
     586             :     }
     587             : 
     588          38 :     result = (*LWLockCounter)++;
     589          38 :     LocalLWLockCounter = *LWLockCounter;
     590          38 :     strlcpy(LWLockTrancheNames[result - LWTRANCHE_FIRST_USER_DEFINED], name, NAMEDATALEN);
     591             : 
     592          38 :     SpinLockRelease(ShmemLock);
     593             : 
     594          38 :     return result;
     595             : }
     596             : 
     597             : /*
     598             :  * RequestNamedLWLockTranche
     599             :  *      Request that extra LWLocks be allocated during postmaster
     600             :  *      startup.
     601             :  *
     602             :  * This may only be called via the shmem_request_hook of a library that is
     603             :  * loaded into the postmaster via shared_preload_libraries.  Calls from
     604             :  * elsewhere will fail.
     605             :  *
     606             :  * The tranche name will be user-visible as a wait event name, so try to
     607             :  * use a name that fits the style for those.
     608             :  */
     609             : void
     610          14 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
     611             : {
     612             :     NamedLWLockTrancheRequest *request;
     613             :     static int  NamedLWLockTrancheRequestsAllocated;
     614             : 
     615          14 :     if (!process_shmem_requests_in_progress)
     616           0 :         elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
     617             : 
     618          14 :     if (!tranche_name)
     619           0 :         ereport(ERROR,
     620             :                 (errcode(ERRCODE_INVALID_NAME),
     621             :                  errmsg("tranche name cannot be NULL")));
     622             : 
     623          14 :     if (strlen(tranche_name) >= NAMEDATALEN)
     624           0 :         ereport(ERROR,
     625             :                 (errcode(ERRCODE_NAME_TOO_LONG),
     626             :                  errmsg("tranche name too long"),
     627             :                  errdetail("LWLock tranche names must be no longer than %d bytes.",
     628             :                            NAMEDATALEN - 1)));
     629             : 
     630          14 :     if (NamedLWLockTrancheRequestArray == NULL)
     631             :     {
     632          14 :         NamedLWLockTrancheRequestsAllocated = 16;
     633          14 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     634          14 :             MemoryContextAlloc(TopMemoryContext,
     635             :                                NamedLWLockTrancheRequestsAllocated
     636             :                                * sizeof(NamedLWLockTrancheRequest));
     637             :     }
     638             : 
     639          14 :     if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
     640             :     {
     641           0 :         int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
     642             : 
     643           0 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     644           0 :             repalloc(NamedLWLockTrancheRequestArray,
     645             :                      i * sizeof(NamedLWLockTrancheRequest));
     646           0 :         NamedLWLockTrancheRequestsAllocated = i;
     647             :     }
     648             : 
     649          14 :     request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
     650          14 :     strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
     651          14 :     request->num_lwlocks = num_lwlocks;
     652          14 :     NamedLWLockTrancheRequests++;
     653          14 : }
     654             : 
     655             : /*
     656             :  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
     657             :  */
     658             : void
     659    24636708 : LWLockInitialize(LWLock *lock, int tranche_id)
     660             : {
     661             :     /* verify the tranche_id is valid */
     662    24636708 :     (void) GetLWTrancheName(tranche_id);
     663             : 
     664    24636708 :     pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
     665             : #ifdef LOCK_DEBUG
     666             :     pg_atomic_init_u32(&lock->nwaiters, 0);
     667             : #endif
     668    24636708 :     lock->tranche = tranche_id;
     669    24636708 :     proclist_init(&lock->waiters);
     670    24636708 : }
     671             : 
     672             : /*
     673             :  * Report start of wait event for light-weight locks.
     674             :  *
     675             :  * This function will be used by all the light-weight lock calls which
     676             :  * needs to wait to acquire the lock.  This function distinguishes wait
     677             :  * event based on tranche and lock id.
     678             :  */
     679             : static inline void
     680     3172830 : LWLockReportWaitStart(LWLock *lock)
     681             : {
     682     3172830 :     pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
     683     3172830 : }
     684             : 
     685             : /*
     686             :  * Report end of wait event for light-weight locks.
     687             :  */
     688             : static inline void
     689     3172830 : LWLockReportWaitEnd(void)
     690             : {
     691     3172830 :     pgstat_report_wait_end();
     692     3172830 : }
     693             : 
     694             : /*
     695             :  * Return the name of an LWLock tranche.
     696             :  */
     697             : static const char *
     698    24636802 : GetLWTrancheName(uint16 trancheId)
     699             : {
     700             :     /* Built-in tranche or individual LWLock? */
     701    24636802 :     if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
     702    24636066 :         return BuiltinTrancheNames[trancheId];
     703             : 
     704             :     /*
     705             :      * We only ever add new entries to LWLockTrancheNames, so most lookups can
     706             :      * avoid taking the spinlock as long as the backend-local counter
     707             :      * (LocalLWLockCounter) is greater than the requested tranche ID.  Else,
     708             :      * we need to first update the backend-local counter with ShmemLock held
     709             :      * before attempting the lookup again.  In practice, the latter case is
     710             :      * probably rare.
     711             :      */
     712         736 :     if (trancheId >= LocalLWLockCounter)
     713             :     {
     714           0 :         SpinLockAcquire(ShmemLock);
     715           0 :         LocalLWLockCounter = *LWLockCounter;
     716           0 :         SpinLockRelease(ShmemLock);
     717             : 
     718           0 :         if (trancheId >= LocalLWLockCounter)
     719           0 :             elog(ERROR, "tranche %d is not registered", trancheId);
     720             :     }
     721             : 
     722             :     /*
     723             :      * It's an extension tranche, so look in LWLockTrancheNames.
     724             :      */
     725         736 :     trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
     726             : 
     727         736 :     return LWLockTrancheNames[trancheId];
     728             : }
     729             : 
     730             : /*
     731             :  * Return an identifier for an LWLock based on the wait class and event.
     732             :  */
     733             : const char *
     734          94 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
     735             : {
     736             :     Assert(classId == PG_WAIT_LWLOCK);
     737             :     /* The event IDs are just tranche numbers. */
     738          94 :     return GetLWTrancheName(eventId);
     739             : }
     740             : 
     741             : /*
     742             :  * Internal function that tries to atomically acquire the lwlock in the passed
     743             :  * in mode.
     744             :  *
     745             :  * This function will not block waiting for a lock to become free - that's the
     746             :  * caller's job.
     747             :  *
     748             :  * Returns true if the lock isn't free and we need to wait.
     749             :  */
     750             : static bool
     751   757368250 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
     752             : {
     753             :     uint32      old_state;
     754             : 
     755             :     Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
     756             : 
     757             :     /*
     758             :      * Read once outside the loop, later iterations will get the newer value
     759             :      * via compare & exchange.
     760             :      */
     761   757368250 :     old_state = pg_atomic_read_u32(&lock->state);
     762             : 
     763             :     /* loop until we've determined whether we could acquire the lock or not */
     764             :     while (true)
     765     1049430 :     {
     766             :         uint32      desired_state;
     767             :         bool        lock_free;
     768             : 
     769   758417680 :         desired_state = old_state;
     770             : 
     771   758417680 :         if (mode == LW_EXCLUSIVE)
     772             :         {
     773   464309204 :             lock_free = (old_state & LW_LOCK_MASK) == 0;
     774   464309204 :             if (lock_free)
     775   461802494 :                 desired_state += LW_VAL_EXCLUSIVE;
     776             :         }
     777             :         else
     778             :         {
     779   294108476 :             lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
     780   294108476 :             if (lock_free)
     781   289950938 :                 desired_state += LW_VAL_SHARED;
     782             :         }
     783             : 
     784             :         /*
     785             :          * Attempt to swap in the state we are expecting. If we didn't see
     786             :          * lock to be free, that's just the old value. If we saw it as free,
     787             :          * we'll attempt to mark it acquired. The reason that we always swap
     788             :          * in the value is that this doubles as a memory barrier. We could try
     789             :          * to be smarter and only swap in values if we saw the lock as free,
     790             :          * but benchmark haven't shown it as beneficial so far.
     791             :          *
     792             :          * Retry if the value changed since we last looked at it.
     793             :          */
     794   758417680 :         if (pg_atomic_compare_exchange_u32(&lock->state,
     795             :                                            &old_state, desired_state))
     796             :         {
     797   757368250 :             if (lock_free)
     798             :             {
     799             :                 /* Great! Got the lock. */
     800             : #ifdef LOCK_DEBUG
     801             :                 if (mode == LW_EXCLUSIVE)
     802             :                     lock->owner = MyProc;
     803             : #endif
     804   751036336 :                 return false;
     805             :             }
     806             :             else
     807     6331914 :                 return true;    /* somebody else has the lock */
     808             :         }
     809             :     }
     810             :     pg_unreachable();
     811             : }
     812             : 
     813             : /*
     814             :  * Lock the LWLock's wait list against concurrent activity.
     815             :  *
     816             :  * NB: even though the wait list is locked, non-conflicting lock operations
     817             :  * may still happen concurrently.
     818             :  *
     819             :  * Time spent holding mutex should be short!
     820             :  */
     821             : static void
     822    13258780 : LWLockWaitListLock(LWLock *lock)
     823             : {
     824             :     uint32      old_state;
     825             : #ifdef LWLOCK_STATS
     826             :     lwlock_stats *lwstats;
     827             :     uint32      delays = 0;
     828             : 
     829             :     lwstats = get_lwlock_stats_entry(lock);
     830             : #endif
     831             : 
     832             :     while (true)
     833             :     {
     834             :         /* always try once to acquire lock directly */
     835    13559194 :         old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
     836    13559194 :         if (!(old_state & LW_FLAG_LOCKED))
     837    13258780 :             break;              /* got lock */
     838             : 
     839             :         /* and then spin without atomic operations until lock is released */
     840             :         {
     841             :             SpinDelayStatus delayStatus;
     842             : 
     843      300414 :             init_local_spin_delay(&delayStatus);
     844             : 
     845      841142 :             while (old_state & LW_FLAG_LOCKED)
     846             :             {
     847      540728 :                 perform_spin_delay(&delayStatus);
     848      540728 :                 old_state = pg_atomic_read_u32(&lock->state);
     849             :             }
     850             : #ifdef LWLOCK_STATS
     851             :             delays += delayStatus.delays;
     852             : #endif
     853      300414 :             finish_spin_delay(&delayStatus);
     854             :         }
     855             : 
     856             :         /*
     857             :          * Retry. The lock might obviously already be re-acquired by the time
     858             :          * we're attempting to get it again.
     859             :          */
     860             :     }
     861             : 
     862             : #ifdef LWLOCK_STATS
     863             :     lwstats->spin_delay_count += delays;
     864             : #endif
     865    13258780 : }
     866             : 
     867             : /*
     868             :  * Unlock the LWLock's wait list.
     869             :  *
     870             :  * Note that it can be more efficient to manipulate flags and release the
     871             :  * locks in a single atomic operation.
     872             :  */
     873             : static void
     874     9181302 : LWLockWaitListUnlock(LWLock *lock)
     875             : {
     876             :     uint32      old_state PG_USED_FOR_ASSERTS_ONLY;
     877             : 
     878     9181302 :     old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
     879             : 
     880             :     Assert(old_state & LW_FLAG_LOCKED);
     881     9181302 : }
     882             : 
     883             : /*
     884             :  * Wakeup all the lockers that currently have a chance to acquire the lock.
     885             :  */
     886             : static void
     887     4077478 : LWLockWakeup(LWLock *lock)
     888             : {
     889             :     bool        new_release_ok;
     890     4077478 :     bool        wokeup_somebody = false;
     891             :     proclist_head wakeup;
     892             :     proclist_mutable_iter iter;
     893             : 
     894     4077478 :     proclist_init(&wakeup);
     895             : 
     896     4077478 :     new_release_ok = true;
     897             : 
     898             :     /* lock wait list while collecting backends to wake up */
     899     4077478 :     LWLockWaitListLock(lock);
     900             : 
     901     6270994 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
     902             :     {
     903     3275506 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     904             : 
     905     3275506 :         if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
     906       19656 :             continue;
     907             : 
     908     3255850 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
     909     3255850 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
     910             : 
     911     3255850 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
     912             :         {
     913             :             /*
     914             :              * Prevent additional wakeups until retryer gets to run. Backends
     915             :              * that are just waiting for the lock to become free don't retry
     916             :              * automatically.
     917             :              */
     918     3068690 :             new_release_ok = false;
     919             : 
     920             :             /*
     921             :              * Don't wakeup (further) exclusive locks.
     922             :              */
     923     3068690 :             wokeup_somebody = true;
     924             :         }
     925             : 
     926             :         /*
     927             :          * Signal that the process isn't on the wait list anymore. This allows
     928             :          * LWLockDequeueSelf() to remove itself of the waitlist with a
     929             :          * proclist_delete(), rather than having to check if it has been
     930             :          * removed from the list.
     931             :          */
     932             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
     933     3255850 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
     934             : 
     935             :         /*
     936             :          * Once we've woken up an exclusive lock, there's no point in waking
     937             :          * up anybody else.
     938             :          */
     939     3255850 :         if (waiter->lwWaitMode == LW_EXCLUSIVE)
     940     1081990 :             break;
     941             :     }
     942             : 
     943             :     Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
     944             : 
     945             :     /* unset required flags, and release lock, in one fell swoop */
     946             :     {
     947             :         uint32      old_state;
     948             :         uint32      desired_state;
     949             : 
     950     4077478 :         old_state = pg_atomic_read_u32(&lock->state);
     951             :         while (true)
     952             :         {
     953     4243928 :             desired_state = old_state;
     954             : 
     955             :             /* compute desired flags */
     956             : 
     957     4243928 :             if (new_release_ok)
     958     1369390 :                 desired_state |= LW_FLAG_RELEASE_OK;
     959             :             else
     960     2874538 :                 desired_state &= ~LW_FLAG_RELEASE_OK;
     961             : 
     962     4243928 :             if (proclist_is_empty(&wakeup))
     963     1286380 :                 desired_state &= ~LW_FLAG_HAS_WAITERS;
     964             : 
     965     4243928 :             desired_state &= ~LW_FLAG_LOCKED;   /* release lock */
     966             : 
     967     4243928 :             if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
     968             :                                                desired_state))
     969     4077478 :                 break;
     970             :         }
     971             :     }
     972             : 
     973             :     /* Awaken any waiters I removed from the queue. */
     974     7333328 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
     975             :     {
     976     3255850 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     977             : 
     978             :         LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
     979     3255850 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
     980             : 
     981             :         /*
     982             :          * Guarantee that lwWaiting being unset only becomes visible once the
     983             :          * unlink from the link has completed. Otherwise the target backend
     984             :          * could be woken up for other reason and enqueue for a new lock - if
     985             :          * that happens before the list unlink happens, the list would end up
     986             :          * being corrupted.
     987             :          *
     988             :          * The barrier pairs with the LWLockWaitListLock() when enqueuing for
     989             :          * another lock.
     990             :          */
     991     3255850 :         pg_write_barrier();
     992     3255850 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
     993     3255850 :         PGSemaphoreUnlock(waiter->sem);
     994             :     }
     995     4077478 : }
     996             : 
     997             : /*
     998             :  * Add ourselves to the end of the queue.
     999             :  *
    1000             :  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
    1001             :  */
    1002             : static void
    1003     3505912 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
    1004             : {
    1005             :     /*
    1006             :      * If we don't have a PGPROC structure, there's no way to wait. This
    1007             :      * should never occur, since MyProc should only be null during shared
    1008             :      * memory initialization.
    1009             :      */
    1010     3505912 :     if (MyProc == NULL)
    1011           0 :         elog(PANIC, "cannot wait without a PGPROC structure");
    1012             : 
    1013     3505912 :     if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
    1014           0 :         elog(PANIC, "queueing for lock while waiting on another one");
    1015             : 
    1016     3505912 :     LWLockWaitListLock(lock);
    1017             : 
    1018             :     /* setting the flag is protected by the spinlock */
    1019     3505912 :     pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
    1020             : 
    1021     3505912 :     MyProc->lwWaiting = LW_WS_WAITING;
    1022     3505912 :     MyProc->lwWaitMode = mode;
    1023             : 
    1024             :     /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
    1025     3505912 :     if (mode == LW_WAIT_UNTIL_FREE)
    1026      194334 :         proclist_push_head(&lock->waiters, MyProcNumber, lwWaitLink);
    1027             :     else
    1028     3311578 :         proclist_push_tail(&lock->waiters, MyProcNumber, lwWaitLink);
    1029             : 
    1030             :     /* Can release the mutex now */
    1031     3505912 :     LWLockWaitListUnlock(lock);
    1032             : 
    1033             : #ifdef LOCK_DEBUG
    1034             :     pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
    1035             : #endif
    1036     3505912 : }
    1037             : 
    1038             : /*
    1039             :  * Remove ourselves from the waitlist.
    1040             :  *
    1041             :  * This is used if we queued ourselves because we thought we needed to sleep
    1042             :  * but, after further checking, we discovered that we don't actually need to
    1043             :  * do so.
    1044             :  */
    1045             : static void
    1046      333082 : LWLockDequeueSelf(LWLock *lock)
    1047             : {
    1048             :     bool        on_waitlist;
    1049             : 
    1050             : #ifdef LWLOCK_STATS
    1051             :     lwlock_stats *lwstats;
    1052             : 
    1053             :     lwstats = get_lwlock_stats_entry(lock);
    1054             : 
    1055             :     lwstats->dequeue_self_count++;
    1056             : #endif
    1057             : 
    1058      333082 :     LWLockWaitListLock(lock);
    1059             : 
    1060             :     /*
    1061             :      * Remove ourselves from the waitlist, unless we've already been removed.
    1062             :      * The removal happens with the wait list lock held, so there's no race in
    1063             :      * this check.
    1064             :      */
    1065      333082 :     on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
    1066      333082 :     if (on_waitlist)
    1067      252434 :         proclist_delete(&lock->waiters, MyProcNumber, lwWaitLink);
    1068             : 
    1069      333082 :     if (proclist_is_empty(&lock->waiters) &&
    1070      315844 :         (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
    1071             :     {
    1072      315348 :         pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
    1073             :     }
    1074             : 
    1075             :     /* XXX: combine with fetch_and above? */
    1076      333082 :     LWLockWaitListUnlock(lock);
    1077             : 
    1078             :     /* clear waiting state again, nice for debugging */
    1079      333082 :     if (on_waitlist)
    1080      252434 :         MyProc->lwWaiting = LW_WS_NOT_WAITING;
    1081             :     else
    1082             :     {
    1083       80648 :         int         extraWaits = 0;
    1084             : 
    1085             :         /*
    1086             :          * Somebody else dequeued us and has or will wake us up. Deal with the
    1087             :          * superfluous absorption of a wakeup.
    1088             :          */
    1089             : 
    1090             :         /*
    1091             :          * Reset RELEASE_OK flag if somebody woke us before we removed
    1092             :          * ourselves - they'll have set it to false.
    1093             :          */
    1094       80648 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1095             : 
    1096             :         /*
    1097             :          * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
    1098             :          * get reset at some inconvenient point later. Most of the time this
    1099             :          * will immediately return.
    1100             :          */
    1101             :         for (;;)
    1102             :         {
    1103       80648 :             PGSemaphoreLock(MyProc->sem);
    1104       80648 :             if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
    1105       80648 :                 break;
    1106           0 :             extraWaits++;
    1107             :         }
    1108             : 
    1109             :         /*
    1110             :          * Fix the process wait semaphore's count for any absorbed wakeups.
    1111             :          */
    1112       80648 :         while (extraWaits-- > 0)
    1113           0 :             PGSemaphoreUnlock(MyProc->sem);
    1114             :     }
    1115             : 
    1116             : #ifdef LOCK_DEBUG
    1117             :     {
    1118             :         /* not waiting anymore */
    1119             :         uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1120             : 
    1121             :         Assert(nwaiters < MAX_BACKENDS);
    1122             :     }
    1123             : #endif
    1124      333082 : }
    1125             : 
    1126             : /*
    1127             :  * LWLockAcquire - acquire a lightweight lock in the specified mode
    1128             :  *
    1129             :  * If the lock is not available, sleep until it is.  Returns true if the lock
    1130             :  * was available immediately, false if we had to sleep.
    1131             :  *
    1132             :  * Side effect: cancel/die interrupts are held off until lock release.
    1133             :  */
    1134             : bool
    1135   745964966 : LWLockAcquire(LWLock *lock, LWLockMode mode)
    1136             : {
    1137   745964966 :     PGPROC     *proc = MyProc;
    1138   745964966 :     bool        result = true;
    1139   745964966 :     int         extraWaits = 0;
    1140             : #ifdef LWLOCK_STATS
    1141             :     lwlock_stats *lwstats;
    1142             : 
    1143             :     lwstats = get_lwlock_stats_entry(lock);
    1144             : #endif
    1145             : 
    1146             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1147             : 
    1148             :     PRINT_LWDEBUG("LWLockAcquire", lock, mode);
    1149             : 
    1150             : #ifdef LWLOCK_STATS
    1151             :     /* Count lock acquisition attempts */
    1152             :     if (mode == LW_EXCLUSIVE)
    1153             :         lwstats->ex_acquire_count++;
    1154             :     else
    1155             :         lwstats->sh_acquire_count++;
    1156             : #endif                          /* LWLOCK_STATS */
    1157             : 
    1158             :     /*
    1159             :      * We can't wait if we haven't got a PGPROC.  This should only occur
    1160             :      * during bootstrap or shared memory initialization.  Put an Assert here
    1161             :      * to catch unsafe coding practices.
    1162             :      */
    1163             :     Assert(!(proc == NULL && IsUnderPostmaster));
    1164             : 
    1165             :     /* Ensure we will have room to remember the lock */
    1166   745964966 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1167           0 :         elog(ERROR, "too many LWLocks taken");
    1168             : 
    1169             :     /*
    1170             :      * Lock out cancel/die interrupts until we exit the code section protected
    1171             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1172             :      * manipulations of data structures in shared memory.
    1173             :      */
    1174   745964966 :     HOLD_INTERRUPTS();
    1175             : 
    1176             :     /*
    1177             :      * Loop here to try to acquire lock after each time we are signaled by
    1178             :      * LWLockRelease.
    1179             :      *
    1180             :      * NOTE: it might seem better to have LWLockRelease actually grant us the
    1181             :      * lock, rather than retrying and possibly having to go back to sleep. But
    1182             :      * in practice that is no good because it means a process swap for every
    1183             :      * lock acquisition when two or more processes are contending for the same
    1184             :      * lock.  Since LWLocks are normally used to protect not-very-long
    1185             :      * sections of computation, a process needs to be able to acquire and
    1186             :      * release the same lock many times during a single CPU time slice, even
    1187             :      * in the presence of contention.  The efficiency of being able to do that
    1188             :      * outweighs the inefficiency of sometimes wasting a process dispatch
    1189             :      * cycle because the lock is not free when a released waiter finally gets
    1190             :      * to run.  See pgsql-hackers archives for 29-Dec-01.
    1191             :      */
    1192             :     for (;;)
    1193     2984024 :     {
    1194             :         bool        mustwait;
    1195             : 
    1196             :         /*
    1197             :          * Try to grab the lock the first time, we're not in the waitqueue
    1198             :          * yet/anymore.
    1199             :          */
    1200   748948990 :         mustwait = LWLockAttemptLock(lock, mode);
    1201             : 
    1202   748948990 :         if (!mustwait)
    1203             :         {
    1204             :             LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
    1205   745637412 :             break;              /* got the lock */
    1206             :         }
    1207             : 
    1208             :         /*
    1209             :          * Ok, at this point we couldn't grab the lock on the first try. We
    1210             :          * cannot simply queue ourselves to the end of the list and wait to be
    1211             :          * woken up because by now the lock could long have been released.
    1212             :          * Instead add us to the queue and try to grab the lock again. If we
    1213             :          * succeed we need to revert the queuing and be happy, otherwise we
    1214             :          * recheck the lock. If we still couldn't grab it, we know that the
    1215             :          * other locker will see our queue entries when releasing since they
    1216             :          * existed before we checked for the lock.
    1217             :          */
    1218             : 
    1219             :         /* add to the queue */
    1220     3311578 :         LWLockQueueSelf(lock, mode);
    1221             : 
    1222             :         /* we're now guaranteed to be woken up if necessary */
    1223     3311578 :         mustwait = LWLockAttemptLock(lock, mode);
    1224             : 
    1225             :         /* ok, grabbed the lock the second time round, need to undo queueing */
    1226     3311578 :         if (!mustwait)
    1227             :         {
    1228             :             LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
    1229             : 
    1230      327554 :             LWLockDequeueSelf(lock);
    1231      327554 :             break;
    1232             :         }
    1233             : 
    1234             :         /*
    1235             :          * Wait until awakened.
    1236             :          *
    1237             :          * It is possible that we get awakened for a reason other than being
    1238             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1239             :          * we've gotten the LWLock, re-increment the sema by the number of
    1240             :          * additional signals received.
    1241             :          */
    1242             :         LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
    1243             : 
    1244             : #ifdef LWLOCK_STATS
    1245             :         lwstats->block_count++;
    1246             : #endif
    1247             : 
    1248     2984024 :         LWLockReportWaitStart(lock);
    1249             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1250             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1251             : 
    1252             :         for (;;)
    1253             :         {
    1254     2984024 :             PGSemaphoreLock(proc->sem);
    1255     2984024 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1256     2984024 :                 break;
    1257           0 :             extraWaits++;
    1258             :         }
    1259             : 
    1260             :         /* Retrying, allow LWLockRelease to release waiters again. */
    1261     2984024 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1262             : 
    1263             : #ifdef LOCK_DEBUG
    1264             :         {
    1265             :             /* not waiting anymore */
    1266             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1267             : 
    1268             :             Assert(nwaiters < MAX_BACKENDS);
    1269             :         }
    1270             : #endif
    1271             : 
    1272             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1273             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1274     2984024 :         LWLockReportWaitEnd();
    1275             : 
    1276             :         LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
    1277             : 
    1278             :         /* Now loop back and try to acquire lock again. */
    1279     2984024 :         result = false;
    1280             :     }
    1281             : 
    1282             :     if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
    1283             :         TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    1284             : 
    1285             :     /* Add lock to list of locks held by this backend */
    1286   745964966 :     held_lwlocks[num_held_lwlocks].lock = lock;
    1287   745964966 :     held_lwlocks[num_held_lwlocks++].mode = mode;
    1288             : 
    1289             :     /*
    1290             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1291             :      */
    1292   745964966 :     while (extraWaits-- > 0)
    1293           0 :         PGSemaphoreUnlock(proc->sem);
    1294             : 
    1295   745964966 :     return result;
    1296             : }
    1297             : 
    1298             : /*
    1299             :  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
    1300             :  *
    1301             :  * If the lock is not available, return false with no side-effects.
    1302             :  *
    1303             :  * If successful, cancel/die interrupts are held off until lock release.
    1304             :  */
    1305             : bool
    1306     4826502 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
    1307             : {
    1308             :     bool        mustwait;
    1309             : 
    1310             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1311             : 
    1312             :     PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
    1313             : 
    1314             :     /* Ensure we will have room to remember the lock */
    1315     4826502 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1316           0 :         elog(ERROR, "too many LWLocks taken");
    1317             : 
    1318             :     /*
    1319             :      * Lock out cancel/die interrupts until we exit the code section protected
    1320             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1321             :      * manipulations of data structures in shared memory.
    1322             :      */
    1323     4826502 :     HOLD_INTERRUPTS();
    1324             : 
    1325             :     /* Check for the lock */
    1326     4826502 :     mustwait = LWLockAttemptLock(lock, mode);
    1327             : 
    1328     4826502 :     if (mustwait)
    1329             :     {
    1330             :         /* Failed to get lock, so release interrupt holdoff */
    1331       12070 :         RESUME_INTERRUPTS();
    1332             : 
    1333             :         LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
    1334             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
    1335             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
    1336             :     }
    1337             :     else
    1338             :     {
    1339             :         /* Add lock to list of locks held by this backend */
    1340     4814432 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1341     4814432 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1342             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
    1343             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
    1344             :     }
    1345     4826502 :     return !mustwait;
    1346             : }
    1347             : 
    1348             : /*
    1349             :  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
    1350             :  *
    1351             :  * The semantics of this function are a bit funky.  If the lock is currently
    1352             :  * free, it is acquired in the given mode, and the function returns true.  If
    1353             :  * the lock isn't immediately free, the function waits until it is released
    1354             :  * and returns false, but does not acquire the lock.
    1355             :  *
    1356             :  * This is currently used for WALWriteLock: when a backend flushes the WAL,
    1357             :  * holding WALWriteLock, it can flush the commit records of many other
    1358             :  * backends as a side-effect.  Those other backends need to wait until the
    1359             :  * flush finishes, but don't need to acquire the lock anymore.  They can just
    1360             :  * wake up, observe that their records have already been flushed, and return.
    1361             :  */
    1362             : bool
    1363      268926 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
    1364             : {
    1365      268926 :     PGPROC     *proc = MyProc;
    1366             :     bool        mustwait;
    1367      268926 :     int         extraWaits = 0;
    1368             : #ifdef LWLOCK_STATS
    1369             :     lwlock_stats *lwstats;
    1370             : 
    1371             :     lwstats = get_lwlock_stats_entry(lock);
    1372             : #endif
    1373             : 
    1374             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1375             : 
    1376             :     PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
    1377             : 
    1378             :     /* Ensure we will have room to remember the lock */
    1379      268926 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1380           0 :         elog(ERROR, "too many LWLocks taken");
    1381             : 
    1382             :     /*
    1383             :      * Lock out cancel/die interrupts until we exit the code section protected
    1384             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1385             :      * manipulations of data structures in shared memory.
    1386             :      */
    1387      268926 :     HOLD_INTERRUPTS();
    1388             : 
    1389             :     /*
    1390             :      * NB: We're using nearly the same twice-in-a-row lock acquisition
    1391             :      * protocol as LWLockAcquire(). Check its comments for details.
    1392             :      */
    1393      268926 :     mustwait = LWLockAttemptLock(lock, mode);
    1394             : 
    1395      268926 :     if (mustwait)
    1396             :     {
    1397       12254 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1398             : 
    1399       12254 :         mustwait = LWLockAttemptLock(lock, mode);
    1400             : 
    1401       12254 :         if (mustwait)
    1402             :         {
    1403             :             /*
    1404             :              * Wait until awakened.  Like in LWLockAcquire, be prepared for
    1405             :              * bogus wakeups.
    1406             :              */
    1407             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
    1408             : 
    1409             : #ifdef LWLOCK_STATS
    1410             :             lwstats->block_count++;
    1411             : #endif
    1412             : 
    1413       11988 :             LWLockReportWaitStart(lock);
    1414             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1415             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1416             : 
    1417             :             for (;;)
    1418             :             {
    1419       11988 :                 PGSemaphoreLock(proc->sem);
    1420       11988 :                 if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1421       11988 :                     break;
    1422           0 :                 extraWaits++;
    1423             :             }
    1424             : 
    1425             : #ifdef LOCK_DEBUG
    1426             :             {
    1427             :                 /* not waiting anymore */
    1428             :                 uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1429             : 
    1430             :                 Assert(nwaiters < MAX_BACKENDS);
    1431             :             }
    1432             : #endif
    1433             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1434             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1435       11988 :             LWLockReportWaitEnd();
    1436             : 
    1437             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
    1438             :         }
    1439             :         else
    1440             :         {
    1441             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
    1442             : 
    1443             :             /*
    1444             :              * Got lock in the second attempt, undo queueing. We need to treat
    1445             :              * this as having successfully acquired the lock, otherwise we'd
    1446             :              * not necessarily wake up people we've prevented from acquiring
    1447             :              * the lock.
    1448             :              */
    1449         266 :             LWLockDequeueSelf(lock);
    1450             :         }
    1451             :     }
    1452             : 
    1453             :     /*
    1454             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1455             :      */
    1456      268926 :     while (extraWaits-- > 0)
    1457           0 :         PGSemaphoreUnlock(proc->sem);
    1458             : 
    1459      268926 :     if (mustwait)
    1460             :     {
    1461             :         /* Failed to get lock, so release interrupt holdoff */
    1462       11988 :         RESUME_INTERRUPTS();
    1463             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
    1464             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
    1465             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
    1466             :     }
    1467             :     else
    1468             :     {
    1469             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
    1470             :         /* Add lock to list of locks held by this backend */
    1471      256938 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1472      256938 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1473             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
    1474             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
    1475             :     }
    1476             : 
    1477      268926 :     return !mustwait;
    1478             : }
    1479             : 
    1480             : /*
    1481             :  * Does the lwlock in its current state need to wait for the variable value to
    1482             :  * change?
    1483             :  *
    1484             :  * If we don't need to wait, and it's because the value of the variable has
    1485             :  * changed, store the current value in newval.
    1486             :  *
    1487             :  * *result is set to true if the lock was free, and false otherwise.
    1488             :  */
    1489             : static bool
    1490     7596340 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1491             :                        uint64 *newval, bool *result)
    1492             : {
    1493             :     bool        mustwait;
    1494             :     uint64      value;
    1495             : 
    1496             :     /*
    1497             :      * Test first to see if it the slot is free right now.
    1498             :      *
    1499             :      * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
    1500             :      * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
    1501             :      * this, so we don't need a memory barrier here as far as the current
    1502             :      * usage is concerned.  But that might not be safe in general.
    1503             :      */
    1504     7596340 :     mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
    1505             : 
    1506     7596340 :     if (!mustwait)
    1507             :     {
    1508     5081660 :         *result = true;
    1509     5081660 :         return false;
    1510             :     }
    1511             : 
    1512     2514680 :     *result = false;
    1513             : 
    1514             :     /*
    1515             :      * Reading this value atomically is safe even on platforms where uint64
    1516             :      * cannot be read without observing a torn value.
    1517             :      */
    1518     2514680 :     value = pg_atomic_read_u64(valptr);
    1519             : 
    1520     2514680 :     if (value != oldval)
    1521             :     {
    1522     2155782 :         mustwait = false;
    1523     2155782 :         *newval = value;
    1524             :     }
    1525             :     else
    1526             :     {
    1527      358898 :         mustwait = true;
    1528             :     }
    1529             : 
    1530     2514680 :     return mustwait;
    1531             : }
    1532             : 
    1533             : /*
    1534             :  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
    1535             :  *
    1536             :  * If the lock is held and *valptr equals oldval, waits until the lock is
    1537             :  * either freed, or the lock holder updates *valptr by calling
    1538             :  * LWLockUpdateVar.  If the lock is free on exit (immediately or after
    1539             :  * waiting), returns true.  If the lock is still held, but *valptr no longer
    1540             :  * matches oldval, returns false and sets *newval to the current value in
    1541             :  * *valptr.
    1542             :  *
    1543             :  * Note: this function ignores shared lock holders; if the lock is held
    1544             :  * in shared mode, returns 'true'.
    1545             :  *
    1546             :  * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
    1547             :  * hence the caller of this function may want to rely on an explicit barrier or
    1548             :  * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
    1549             :  */
    1550             : bool
    1551     7237442 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1552             :                  uint64 *newval)
    1553             : {
    1554     7237442 :     PGPROC     *proc = MyProc;
    1555     7237442 :     int         extraWaits = 0;
    1556     7237442 :     bool        result = false;
    1557             : #ifdef LWLOCK_STATS
    1558             :     lwlock_stats *lwstats;
    1559             : 
    1560             :     lwstats = get_lwlock_stats_entry(lock);
    1561             : #endif
    1562             : 
    1563             :     PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
    1564             : 
    1565             :     /*
    1566             :      * Lock out cancel/die interrupts while we sleep on the lock.  There is no
    1567             :      * cleanup mechanism to remove us from the wait queue if we got
    1568             :      * interrupted.
    1569             :      */
    1570     7237442 :     HOLD_INTERRUPTS();
    1571             : 
    1572             :     /*
    1573             :      * Loop here to check the lock's status after each time we are signaled.
    1574             :      */
    1575             :     for (;;)
    1576      176818 :     {
    1577             :         bool        mustwait;
    1578             : 
    1579     7414260 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1580             :                                           &result);
    1581             : 
    1582     7414260 :         if (!mustwait)
    1583     7232180 :             break;              /* the lock was free or value didn't match */
    1584             : 
    1585             :         /*
    1586             :          * Add myself to wait queue. Note that this is racy, somebody else
    1587             :          * could wakeup before we're finished queuing. NB: We're using nearly
    1588             :          * the same twice-in-a-row lock acquisition protocol as
    1589             :          * LWLockAcquire(). Check its comments for details. The only
    1590             :          * difference is that we also have to check the variable's values when
    1591             :          * checking the state of the lock.
    1592             :          */
    1593      182080 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1594             : 
    1595             :         /*
    1596             :          * Set RELEASE_OK flag, to make sure we get woken up as soon as the
    1597             :          * lock is released.
    1598             :          */
    1599      182080 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1600             : 
    1601             :         /*
    1602             :          * We're now guaranteed to be woken up if necessary. Recheck the lock
    1603             :          * and variables state.
    1604             :          */
    1605      182080 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1606             :                                           &result);
    1607             : 
    1608             :         /* Ok, no conflict after we queued ourselves. Undo queueing. */
    1609      182080 :         if (!mustwait)
    1610             :         {
    1611             :             LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
    1612             : 
    1613        5262 :             LWLockDequeueSelf(lock);
    1614        5262 :             break;
    1615             :         }
    1616             : 
    1617             :         /*
    1618             :          * Wait until awakened.
    1619             :          *
    1620             :          * It is possible that we get awakened for a reason other than being
    1621             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1622             :          * we've gotten the LWLock, re-increment the sema by the number of
    1623             :          * additional signals received.
    1624             :          */
    1625             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
    1626             : 
    1627             : #ifdef LWLOCK_STATS
    1628             :         lwstats->block_count++;
    1629             : #endif
    1630             : 
    1631      176818 :         LWLockReportWaitStart(lock);
    1632             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1633             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
    1634             : 
    1635             :         for (;;)
    1636             :         {
    1637      176818 :             PGSemaphoreLock(proc->sem);
    1638      176818 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1639      176818 :                 break;
    1640           0 :             extraWaits++;
    1641             :         }
    1642             : 
    1643             : #ifdef LOCK_DEBUG
    1644             :         {
    1645             :             /* not waiting anymore */
    1646             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1647             : 
    1648             :             Assert(nwaiters < MAX_BACKENDS);
    1649             :         }
    1650             : #endif
    1651             : 
    1652             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1653             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
    1654      176818 :         LWLockReportWaitEnd();
    1655             : 
    1656             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
    1657             : 
    1658             :         /* Now loop back and check the status of the lock again. */
    1659             :     }
    1660             : 
    1661             :     /*
    1662             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1663             :      */
    1664     7237442 :     while (extraWaits-- > 0)
    1665           0 :         PGSemaphoreUnlock(proc->sem);
    1666             : 
    1667             :     /*
    1668             :      * Now okay to allow cancel/die interrupts.
    1669             :      */
    1670     7237442 :     RESUME_INTERRUPTS();
    1671             : 
    1672     7237442 :     return result;
    1673             : }
    1674             : 
    1675             : 
    1676             : /*
    1677             :  * LWLockUpdateVar - Update a variable and wake up waiters atomically
    1678             :  *
    1679             :  * Sets *valptr to 'val', and wakes up all processes waiting for us with
    1680             :  * LWLockWaitForVar().  It first sets the value atomically and then wakes up
    1681             :  * waiting processes so that any process calling LWLockWaitForVar() on the same
    1682             :  * lock is guaranteed to see the new value, and act accordingly.
    1683             :  *
    1684             :  * The caller must be holding the lock in exclusive mode.
    1685             :  */
    1686             : void
    1687     5342308 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1688             : {
    1689             :     proclist_head wakeup;
    1690             :     proclist_mutable_iter iter;
    1691             : 
    1692             :     PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
    1693             : 
    1694             :     /*
    1695             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1696             :      * that the variable is updated before waking up waiters.
    1697             :      */
    1698     5342308 :     pg_atomic_exchange_u64(valptr, val);
    1699             : 
    1700     5342308 :     proclist_init(&wakeup);
    1701             : 
    1702     5342308 :     LWLockWaitListLock(lock);
    1703             : 
    1704             :     Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
    1705             : 
    1706             :     /*
    1707             :      * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
    1708             :      * up. They are always in the front of the queue.
    1709             :      */
    1710     5345646 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1711             :     {
    1712      129832 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1713             : 
    1714      129832 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
    1715      126494 :             break;
    1716             : 
    1717        3338 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1718        3338 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
    1719             : 
    1720             :         /* see LWLockWakeup() */
    1721             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
    1722        3338 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
    1723             :     }
    1724             : 
    1725             :     /* We are done updating shared state of the lock itself. */
    1726     5342308 :     LWLockWaitListUnlock(lock);
    1727             : 
    1728             :     /*
    1729             :      * Awaken any waiters I removed from the queue.
    1730             :      */
    1731     5345646 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1732             :     {
    1733        3338 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1734             : 
    1735        3338 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1736             :         /* check comment in LWLockWakeup() about this barrier */
    1737        3338 :         pg_write_barrier();
    1738        3338 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1739        3338 :         PGSemaphoreUnlock(waiter->sem);
    1740             :     }
    1741     5342308 : }
    1742             : 
    1743             : 
    1744             : /*
    1745             :  * Stop treating lock as held by current backend.
    1746             :  *
    1747             :  * This is the code that can be shared between actually releasing a lock
    1748             :  * (LWLockRelease()) and just not tracking ownership of the lock anymore
    1749             :  * without releasing the lock (LWLockDisown()).
    1750             :  *
    1751             :  * Returns the mode in which the lock was held by the current backend.
    1752             :  *
    1753             :  * NB: This does not call RESUME_INTERRUPTS(), but leaves that responsibility
    1754             :  * of the caller.
    1755             :  *
    1756             :  * NB: This will leave lock->owner pointing to the current backend (if
    1757             :  * LOCK_DEBUG is set). This is somewhat intentional, as it makes it easier to
    1758             :  * debug cases of missing wakeups during lock release.
    1759             :  */
    1760             : static inline LWLockMode
    1761   751036336 : LWLockDisownInternal(LWLock *lock)
    1762             : {
    1763             :     LWLockMode  mode;
    1764             :     int         i;
    1765             : 
    1766             :     /*
    1767             :      * Remove lock from list of locks held.  Usually, but not always, it will
    1768             :      * be the latest-acquired lock; so search array backwards.
    1769             :      */
    1770   834120548 :     for (i = num_held_lwlocks; --i >= 0;)
    1771   834120548 :         if (lock == held_lwlocks[i].lock)
    1772   751036336 :             break;
    1773             : 
    1774   751036336 :     if (i < 0)
    1775           0 :         elog(ERROR, "lock %s is not held", T_NAME(lock));
    1776             : 
    1777   751036336 :     mode = held_lwlocks[i].mode;
    1778             : 
    1779   751036336 :     num_held_lwlocks--;
    1780   834120548 :     for (; i < num_held_lwlocks; i++)
    1781    83084212 :         held_lwlocks[i] = held_lwlocks[i + 1];
    1782             : 
    1783   751036336 :     return mode;
    1784             : }
    1785             : 
    1786             : /*
    1787             :  * Helper function to release lock, shared between LWLockRelease() and
    1788             :  * LWLockReleaseDisowned().
    1789             :  */
    1790             : static void
    1791   751036336 : LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
    1792             : {
    1793             :     uint32      oldstate;
    1794             :     bool        check_waiters;
    1795             : 
    1796             :     /*
    1797             :      * Release my hold on lock, after that it can immediately be acquired by
    1798             :      * others, even if we still have to wakeup other waiters.
    1799             :      */
    1800   751036336 :     if (mode == LW_EXCLUSIVE)
    1801   461431018 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    1802             :     else
    1803   289605318 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
    1804             : 
    1805             :     /* nobody else can have that kind of lock */
    1806             :     Assert(!(oldstate & LW_VAL_EXCLUSIVE));
    1807             : 
    1808             :     if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
    1809             :         TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
    1810             : 
    1811             :     /*
    1812             :      * We're still waiting for backends to get scheduled, don't wake them up
    1813             :      * again.
    1814             :      */
    1815   751036336 :     if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
    1816     4127172 :         (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
    1817     4127172 :         (oldstate & LW_LOCK_MASK) == 0)
    1818     4077478 :         check_waiters = true;
    1819             :     else
    1820   746958858 :         check_waiters = false;
    1821             : 
    1822             :     /*
    1823             :      * As waking up waiters requires the spinlock to be acquired, only do so
    1824             :      * if necessary.
    1825             :      */
    1826   751036336 :     if (check_waiters)
    1827             :     {
    1828             :         /* XXX: remove before commit? */
    1829             :         LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
    1830     4077478 :         LWLockWakeup(lock);
    1831             :     }
    1832   751036336 : }
    1833             : 
    1834             : 
    1835             : /*
    1836             :  * Stop treating lock as held by current backend.
    1837             :  *
    1838             :  * After calling this function it's the callers responsibility to ensure that
    1839             :  * the lock gets released (via LWLockReleaseDisowned()), even in case of an
    1840             :  * error. This only is desirable if the lock is going to be released in a
    1841             :  * different process than the process that acquired it.
    1842             :  */
    1843             : void
    1844           0 : LWLockDisown(LWLock *lock)
    1845             : {
    1846           0 :     LWLockDisownInternal(lock);
    1847             : 
    1848           0 :     RESUME_INTERRUPTS();
    1849           0 : }
    1850             : 
    1851             : /*
    1852             :  * LWLockRelease - release a previously acquired lock
    1853             :  */
    1854             : void
    1855   751036336 : LWLockRelease(LWLock *lock)
    1856             : {
    1857             :     LWLockMode  mode;
    1858             : 
    1859   751036336 :     mode = LWLockDisownInternal(lock);
    1860             : 
    1861             :     PRINT_LWDEBUG("LWLockRelease", lock, mode);
    1862             : 
    1863   751036336 :     LWLockReleaseInternal(lock, mode);
    1864             : 
    1865             :     /*
    1866             :      * Now okay to allow cancel/die interrupts.
    1867             :      */
    1868   751036336 :     RESUME_INTERRUPTS();
    1869   751036336 : }
    1870             : 
    1871             : /*
    1872             :  * Release lock previously disowned with LWLockDisown().
    1873             :  */
    1874             : void
    1875           0 : LWLockReleaseDisowned(LWLock *lock, LWLockMode mode)
    1876             : {
    1877           0 :     LWLockReleaseInternal(lock, mode);
    1878           0 : }
    1879             : 
    1880             : /*
    1881             :  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
    1882             :  */
    1883             : void
    1884    29188408 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1885             : {
    1886             :     /*
    1887             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1888             :      * that the variable is updated before releasing the lock.
    1889             :      */
    1890    29188408 :     pg_atomic_exchange_u64(valptr, val);
    1891             : 
    1892    29188408 :     LWLockRelease(lock);
    1893    29188408 : }
    1894             : 
    1895             : 
    1896             : /*
    1897             :  * LWLockReleaseAll - release all currently-held locks
    1898             :  *
    1899             :  * Used to clean up after ereport(ERROR). An important difference between this
    1900             :  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
    1901             :  * unchanged by this operation.  This is necessary since InterruptHoldoffCount
    1902             :  * has been set to an appropriate level earlier in error recovery. We could
    1903             :  * decrement it below zero if we allow it to drop for each released lock!
    1904             :  */
    1905             : void
    1906      112364 : LWLockReleaseAll(void)
    1907             : {
    1908      112760 :     while (num_held_lwlocks > 0)
    1909             :     {
    1910         396 :         HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */
    1911             : 
    1912         396 :         LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
    1913             :     }
    1914      112364 : }
    1915             : 
    1916             : 
    1917             : /*
    1918             :  * ForEachLWLockHeldByMe - run a callback for each held lock
    1919             :  *
    1920             :  * This is meant as debug support only.
    1921             :  */
    1922             : void
    1923           0 : ForEachLWLockHeldByMe(void (*callback) (LWLock *, LWLockMode, void *),
    1924             :                       void *context)
    1925             : {
    1926             :     int         i;
    1927             : 
    1928           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1929           0 :         callback(held_lwlocks[i].lock, held_lwlocks[i].mode, context);
    1930           0 : }
    1931             : 
    1932             : /*
    1933             :  * LWLockHeldByMe - test whether my process holds a lock in any mode
    1934             :  *
    1935             :  * This is meant as debug support only.
    1936             :  */
    1937             : bool
    1938           0 : LWLockHeldByMe(LWLock *lock)
    1939             : {
    1940             :     int         i;
    1941             : 
    1942           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1943             :     {
    1944           0 :         if (held_lwlocks[i].lock == lock)
    1945           0 :             return true;
    1946             :     }
    1947           0 :     return false;
    1948             : }
    1949             : 
    1950             : /*
    1951             :  * LWLockAnyHeldByMe - test whether my process holds any of an array of locks
    1952             :  *
    1953             :  * This is meant as debug support only.
    1954             :  */
    1955             : bool
    1956           0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
    1957             : {
    1958             :     char       *held_lock_addr;
    1959             :     char       *begin;
    1960             :     char       *end;
    1961             :     int         i;
    1962             : 
    1963           0 :     begin = (char *) lock;
    1964           0 :     end = begin + nlocks * stride;
    1965           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1966             :     {
    1967           0 :         held_lock_addr = (char *) held_lwlocks[i].lock;
    1968           0 :         if (held_lock_addr >= begin &&
    1969           0 :             held_lock_addr < end &&
    1970           0 :             (held_lock_addr - begin) % stride == 0)
    1971           0 :             return true;
    1972             :     }
    1973           0 :     return false;
    1974             : }
    1975             : 
    1976             : /*
    1977             :  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
    1978             :  *
    1979             :  * This is meant as debug support only.
    1980             :  */
    1981             : bool
    1982           0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
    1983             : {
    1984             :     int         i;
    1985             : 
    1986           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1987             :     {
    1988           0 :         if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
    1989           0 :             return true;
    1990             :     }
    1991           0 :     return false;
    1992             : }

Generated by: LCOV version 1.16