LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - lwlock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 337 393 85.8 %
Date: 2023-12-05 09:10:49 Functions: 27 32 84.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * lwlock.c
       4             :  *    Lightweight lock manager
       5             :  *
       6             :  * Lightweight locks are intended primarily to provide mutual exclusion of
       7             :  * access to shared-memory data structures.  Therefore, they offer both
       8             :  * exclusive and shared lock modes (to support read/write and read-only
       9             :  * access to a shared object).  There are few other frammishes.  User-level
      10             :  * locking should be done with the full lock manager --- which depends on
      11             :  * LWLocks to protect its shared state.
      12             :  *
      13             :  * In addition to exclusive and shared modes, lightweight locks can be used to
      14             :  * wait until a variable changes value.  The variable is initially not set
      15             :  * when the lock is acquired with LWLockAcquire, i.e. it remains set to the
      16             :  * value it was set to when the lock was released last, and can be updated
      17             :  * without releasing the lock by calling LWLockUpdateVar.  LWLockWaitForVar
      18             :  * waits for the variable to be updated, or until the lock is free.  When
      19             :  * releasing the lock with LWLockReleaseClearVar() the value can be set to an
      20             :  * appropriate value for a free lock.  The meaning of the variable is up to
      21             :  * the caller, the lightweight lock code just assigns and compares it.
      22             :  *
      23             :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      24             :  * Portions Copyright (c) 1994, Regents of the University of California
      25             :  *
      26             :  * IDENTIFICATION
      27             :  *    src/backend/storage/lmgr/lwlock.c
      28             :  *
      29             :  * NOTES:
      30             :  *
      31             :  * This used to be a pretty straight forward reader-writer lock
      32             :  * implementation, in which the internal state was protected by a
      33             :  * spinlock. Unfortunately the overhead of taking the spinlock proved to be
      34             :  * too high for workloads/locks that were taken in shared mode very
      35             :  * frequently. Often we were spinning in the (obviously exclusive) spinlock,
      36             :  * while trying to acquire a shared lock that was actually free.
      37             :  *
      38             :  * Thus a new implementation was devised that provides wait-free shared lock
      39             :  * acquisition for locks that aren't exclusively locked.
      40             :  *
      41             :  * The basic idea is to have a single atomic variable 'lockcount' instead of
      42             :  * the formerly separate shared and exclusive counters and to use atomic
      43             :  * operations to acquire the lock. That's fairly easy to do for plain
      44             :  * rw-spinlocks, but a lot harder for something like LWLocks that want to wait
      45             :  * in the OS.
      46             :  *
      47             :  * For lock acquisition we use an atomic compare-and-exchange on the lockcount
      48             :  * variable. For exclusive lock we swap in a sentinel value
      49             :  * (LW_VAL_EXCLUSIVE), for shared locks we count the number of holders.
      50             :  *
      51             :  * To release the lock we use an atomic decrement to release the lock. If the
      52             :  * new value is zero (we get that atomically), we know we can/have to release
      53             :  * waiters.
      54             :  *
      55             :  * Obviously it is important that the sentinel value for exclusive locks
      56             :  * doesn't conflict with the maximum number of possible share lockers -
      57             :  * luckily MAX_BACKENDS makes that easily possible.
      58             :  *
      59             :  *
      60             :  * The attentive reader might have noticed that naively doing the above has a
      61             :  * glaring race condition: We try to lock using the atomic operations and
      62             :  * notice that we have to wait. Unfortunately by the time we have finished
      63             :  * queuing, the former locker very well might have already finished it's
      64             :  * work. That's problematic because we're now stuck waiting inside the OS.
      65             : 
      66             :  * To mitigate those races we use a two phased attempt at locking:
      67             :  *   Phase 1: Try to do it atomically, if we succeed, nice
      68             :  *   Phase 2: Add ourselves to the waitqueue of the lock
      69             :  *   Phase 3: Try to grab the lock again, if we succeed, remove ourselves from
      70             :  *            the queue
      71             :  *   Phase 4: Sleep till wake-up, goto Phase 1
      72             :  *
      73             :  * This protects us against the problem from above as nobody can release too
      74             :  *    quick, before we're queued, since after Phase 2 we're already queued.
      75             :  * -------------------------------------------------------------------------
      76             :  */
      77             : #include "postgres.h"
      78             : 
      79             : #include "miscadmin.h"
      80             : #include "pg_trace.h"
      81             : #include "pgstat.h"
      82             : #include "port/pg_bitutils.h"
      83             : #include "postmaster/postmaster.h"
      84             : #include "replication/slot.h"
      85             : #include "storage/ipc.h"
      86             : #include "storage/predicate.h"
      87             : #include "storage/proc.h"
      88             : #include "storage/proclist.h"
      89             : #include "storage/spin.h"
      90             : #include "utils/memutils.h"
      91             : 
      92             : #ifdef LWLOCK_STATS
      93             : #include "utils/hsearch.h"
      94             : #endif
      95             : 
      96             : 
      97             : /* We use the ShmemLock spinlock to protect LWLockCounter */
      98             : extern slock_t *ShmemLock;
      99             : 
     100             : #define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 30)
     101             : #define LW_FLAG_RELEASE_OK          ((uint32) 1 << 29)
     102             : #define LW_FLAG_LOCKED              ((uint32) 1 << 28)
     103             : 
     104             : #define LW_VAL_EXCLUSIVE            ((uint32) 1 << 24)
     105             : #define LW_VAL_SHARED               1
     106             : 
     107             : #define LW_LOCK_MASK                ((uint32) ((1 << 25)-1))
     108             : /* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
     109             : #define LW_SHARED_MASK              ((uint32) ((1 << 24)-1))
     110             : 
     111             : StaticAssertDecl(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
     112             :                  "MAX_BACKENDS too big for lwlock.c");
     113             : 
     114             : /*
     115             :  * There are three sorts of LWLock "tranches":
     116             :  *
     117             :  * 1. The individually-named locks defined in lwlocknames.h each have their
     118             :  * own tranche.  The names of these tranches appear in IndividualLWLockNames[]
     119             :  * in lwlocknames.c.
     120             :  *
     121             :  * 2. There are some predefined tranches for built-in groups of locks.
     122             :  * These are listed in enum BuiltinTrancheIds in lwlock.h, and their names
     123             :  * appear in BuiltinTrancheNames[] below.
     124             :  *
     125             :  * 3. Extensions can create new tranches, via either RequestNamedLWLockTranche
     126             :  * or LWLockRegisterTranche.  The names of these that are known in the current
     127             :  * process appear in LWLockTrancheNames[].
     128             :  *
     129             :  * All these names are user-visible as wait event names, so choose with care
     130             :  * ... and do not forget to update the documentation's list of wait events.
     131             :  */
     132             : extern const char *const IndividualLWLockNames[];   /* in lwlocknames.c */
     133             : 
     134             : static const char *const BuiltinTrancheNames[] = {
     135             :     /* LWTRANCHE_XACT_BUFFER: */
     136             :     "XactBuffer",
     137             :     /* LWTRANCHE_COMMITTS_BUFFER: */
     138             :     "CommitTsBuffer",
     139             :     /* LWTRANCHE_SUBTRANS_BUFFER: */
     140             :     "SubtransBuffer",
     141             :     /* LWTRANCHE_MULTIXACTOFFSET_BUFFER: */
     142             :     "MultiXactOffsetBuffer",
     143             :     /* LWTRANCHE_MULTIXACTMEMBER_BUFFER: */
     144             :     "MultiXactMemberBuffer",
     145             :     /* LWTRANCHE_NOTIFY_BUFFER: */
     146             :     "NotifyBuffer",
     147             :     /* LWTRANCHE_SERIAL_BUFFER: */
     148             :     "SerialBuffer",
     149             :     /* LWTRANCHE_WAL_INSERT: */
     150             :     "WALInsert",
     151             :     /* LWTRANCHE_BUFFER_CONTENT: */
     152             :     "BufferContent",
     153             :     /* LWTRANCHE_REPLICATION_ORIGIN_STATE: */
     154             :     "ReplicationOriginState",
     155             :     /* LWTRANCHE_REPLICATION_SLOT_IO: */
     156             :     "ReplicationSlotIO",
     157             :     /* LWTRANCHE_LOCK_FASTPATH: */
     158             :     "LockFastPath",
     159             :     /* LWTRANCHE_BUFFER_MAPPING: */
     160             :     "BufferMapping",
     161             :     /* LWTRANCHE_LOCK_MANAGER: */
     162             :     "LockManager",
     163             :     /* LWTRANCHE_PREDICATE_LOCK_MANAGER: */
     164             :     "PredicateLockManager",
     165             :     /* LWTRANCHE_PARALLEL_HASH_JOIN: */
     166             :     "ParallelHashJoin",
     167             :     /* LWTRANCHE_PARALLEL_QUERY_DSA: */
     168             :     "ParallelQueryDSA",
     169             :     /* LWTRANCHE_PER_SESSION_DSA: */
     170             :     "PerSessionDSA",
     171             :     /* LWTRANCHE_PER_SESSION_RECORD_TYPE: */
     172             :     "PerSessionRecordType",
     173             :     /* LWTRANCHE_PER_SESSION_RECORD_TYPMOD: */
     174             :     "PerSessionRecordTypmod",
     175             :     /* LWTRANCHE_SHARED_TUPLESTORE: */
     176             :     "SharedTupleStore",
     177             :     /* LWTRANCHE_SHARED_TIDBITMAP: */
     178             :     "SharedTidBitmap",
     179             :     /* LWTRANCHE_PARALLEL_APPEND: */
     180             :     "ParallelAppend",
     181             :     /* LWTRANCHE_PER_XACT_PREDICATE_LIST: */
     182             :     "PerXactPredicateList",
     183             :     /* LWTRANCHE_PGSTATS_DSA: */
     184             :     "PgStatsDSA",
     185             :     /* LWTRANCHE_PGSTATS_HASH: */
     186             :     "PgStatsHash",
     187             :     /* LWTRANCHE_PGSTATS_DATA: */
     188             :     "PgStatsData",
     189             :     /* LWTRANCHE_LAUNCHER_DSA: */
     190             :     "LogicalRepLauncherDSA",
     191             :     /* LWTRANCHE_LAUNCHER_HASH: */
     192             :     "LogicalRepLauncherHash",
     193             : };
     194             : 
     195             : StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
     196             :                  LWTRANCHE_FIRST_USER_DEFINED - NUM_INDIVIDUAL_LWLOCKS,
     197             :                  "missing entries in BuiltinTrancheNames[]");
     198             : 
     199             : /*
     200             :  * This is indexed by tranche ID minus LWTRANCHE_FIRST_USER_DEFINED, and
     201             :  * stores the names of all dynamically-created tranches known to the current
     202             :  * process.  Any unused entries in the array will contain NULL.
     203             :  */
     204             : static const char **LWLockTrancheNames = NULL;
     205             : static int  LWLockTrancheNamesAllocated = 0;
     206             : 
     207             : /*
     208             :  * This points to the main array of LWLocks in shared memory.  Backends inherit
     209             :  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
     210             :  * where we have special measures to pass it down).
     211             :  */
     212             : LWLockPadded *MainLWLockArray = NULL;
     213             : 
     214             : /*
     215             :  * We use this structure to keep track of locked LWLocks for release
     216             :  * during error recovery.  Normally, only a few will be held at once, but
     217             :  * occasionally the number can be much higher; for example, the pg_buffercache
     218             :  * extension locks all buffer partitions simultaneously.
     219             :  */
     220             : #define MAX_SIMUL_LWLOCKS   200
     221             : 
     222             : /* struct representing the LWLocks we're holding */
     223             : typedef struct LWLockHandle
     224             : {
     225             :     LWLock     *lock;
     226             :     LWLockMode  mode;
     227             : } LWLockHandle;
     228             : 
     229             : static int  num_held_lwlocks = 0;
     230             : static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
     231             : 
     232             : /* struct representing the LWLock tranche request for named tranche */
     233             : typedef struct NamedLWLockTrancheRequest
     234             : {
     235             :     char        tranche_name[NAMEDATALEN];
     236             :     int         num_lwlocks;
     237             : } NamedLWLockTrancheRequest;
     238             : 
     239             : static NamedLWLockTrancheRequest *NamedLWLockTrancheRequestArray = NULL;
     240             : static int  NamedLWLockTrancheRequestsAllocated = 0;
     241             : 
     242             : /*
     243             :  * NamedLWLockTrancheRequests is both the valid length of the request array,
     244             :  * and the length of the shared-memory NamedLWLockTrancheArray later on.
     245             :  * This variable and NamedLWLockTrancheArray are non-static so that
     246             :  * postmaster.c can copy them to child processes in EXEC_BACKEND builds.
     247             :  */
     248             : int         NamedLWLockTrancheRequests = 0;
     249             : 
     250             : /* points to data in shared memory: */
     251             : NamedLWLockTranche *NamedLWLockTrancheArray = NULL;
     252             : 
     253             : static void InitializeLWLocks(void);
     254             : static inline void LWLockReportWaitStart(LWLock *lock);
     255             : static inline void LWLockReportWaitEnd(void);
     256             : static const char *GetLWTrancheName(uint16 trancheId);
     257             : 
     258             : #define T_NAME(lock) \
     259             :     GetLWTrancheName((lock)->tranche)
     260             : 
     261             : #ifdef LWLOCK_STATS
     262             : typedef struct lwlock_stats_key
     263             : {
     264             :     int         tranche;
     265             :     void       *instance;
     266             : }           lwlock_stats_key;
     267             : 
     268             : typedef struct lwlock_stats
     269             : {
     270             :     lwlock_stats_key key;
     271             :     int         sh_acquire_count;
     272             :     int         ex_acquire_count;
     273             :     int         block_count;
     274             :     int         dequeue_self_count;
     275             :     int         spin_delay_count;
     276             : }           lwlock_stats;
     277             : 
     278             : static HTAB *lwlock_stats_htab;
     279             : static lwlock_stats lwlock_stats_dummy;
     280             : #endif
     281             : 
     282             : #ifdef LOCK_DEBUG
     283             : bool        Trace_lwlocks = false;
     284             : 
     285             : inline static void
     286             : PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
     287             : {
     288             :     /* hide statement & context here, otherwise the log is just too verbose */
     289             :     if (Trace_lwlocks)
     290             :     {
     291             :         uint32      state = pg_atomic_read_u32(&lock->state);
     292             : 
     293             :         ereport(LOG,
     294             :                 (errhidestmt(true),
     295             :                  errhidecontext(true),
     296             :                  errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
     297             :                                  MyProcPid,
     298             :                                  where, T_NAME(lock), lock,
     299             :                                  (state & LW_VAL_EXCLUSIVE) != 0,
     300             :                                  state & LW_SHARED_MASK,
     301             :                                  (state & LW_FLAG_HAS_WAITERS) != 0,
     302             :                                  pg_atomic_read_u32(&lock->nwaiters),
     303             :                                  (state & LW_FLAG_RELEASE_OK) != 0)));
     304             :     }
     305             : }
     306             : 
     307             : inline static void
     308             : LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
     309             : {
     310             :     /* hide statement & context here, otherwise the log is just too verbose */
     311             :     if (Trace_lwlocks)
     312             :     {
     313             :         ereport(LOG,
     314             :                 (errhidestmt(true),
     315             :                  errhidecontext(true),
     316             :                  errmsg_internal("%s(%s %p): %s", where,
     317             :                                  T_NAME(lock), lock, msg)));
     318             :     }
     319             : }
     320             : 
     321             : #else                           /* not LOCK_DEBUG */
     322             : #define PRINT_LWDEBUG(a,b,c) ((void)0)
     323             : #define LOG_LWDEBUG(a,b,c) ((void)0)
     324             : #endif                          /* LOCK_DEBUG */
     325             : 
     326             : #ifdef LWLOCK_STATS
     327             : 
     328             : static void init_lwlock_stats(void);
     329             : static void print_lwlock_stats(int code, Datum arg);
     330             : static lwlock_stats * get_lwlock_stats_entry(LWLock *lock);
     331             : 
     332             : static void
     333             : init_lwlock_stats(void)
     334             : {
     335             :     HASHCTL     ctl;
     336             :     static MemoryContext lwlock_stats_cxt = NULL;
     337             :     static bool exit_registered = false;
     338             : 
     339             :     if (lwlock_stats_cxt != NULL)
     340             :         MemoryContextDelete(lwlock_stats_cxt);
     341             : 
     342             :     /*
     343             :      * The LWLock stats will be updated within a critical section, which
     344             :      * requires allocating new hash entries. Allocations within a critical
     345             :      * section are normally not allowed because running out of memory would
     346             :      * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
     347             :      * turned on in production, so that's an acceptable risk. The hash entries
     348             :      * are small, so the risk of running out of memory is minimal in practice.
     349             :      */
     350             :     lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
     351             :                                              "LWLock stats",
     352             :                                              ALLOCSET_DEFAULT_SIZES);
     353             :     MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
     354             : 
     355             :     ctl.keysize = sizeof(lwlock_stats_key);
     356             :     ctl.entrysize = sizeof(lwlock_stats);
     357             :     ctl.hcxt = lwlock_stats_cxt;
     358             :     lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
     359             :                                     HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
     360             :     if (!exit_registered)
     361             :     {
     362             :         on_shmem_exit(print_lwlock_stats, 0);
     363             :         exit_registered = true;
     364             :     }
     365             : }
     366             : 
     367             : static void
     368             : print_lwlock_stats(int code, Datum arg)
     369             : {
     370             :     HASH_SEQ_STATUS scan;
     371             :     lwlock_stats *lwstats;
     372             : 
     373             :     hash_seq_init(&scan, lwlock_stats_htab);
     374             : 
     375             :     /* Grab an LWLock to keep different backends from mixing reports */
     376             :     LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
     377             : 
     378             :     while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
     379             :     {
     380             :         fprintf(stderr,
     381             :                 "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
     382             :                 MyProcPid, GetLWTrancheName(lwstats->key.tranche),
     383             :                 lwstats->key.instance, lwstats->sh_acquire_count,
     384             :                 lwstats->ex_acquire_count, lwstats->block_count,
     385             :                 lwstats->spin_delay_count, lwstats->dequeue_self_count);
     386             :     }
     387             : 
     388             :     LWLockRelease(&MainLWLockArray[0].lock);
     389             : }
     390             : 
     391             : static lwlock_stats *
     392             : get_lwlock_stats_entry(LWLock *lock)
     393             : {
     394             :     lwlock_stats_key key;
     395             :     lwlock_stats *lwstats;
     396             :     bool        found;
     397             : 
     398             :     /*
     399             :      * During shared memory initialization, the hash table doesn't exist yet.
     400             :      * Stats of that phase aren't very interesting, so just collect operations
     401             :      * on all locks in a single dummy entry.
     402             :      */
     403             :     if (lwlock_stats_htab == NULL)
     404             :         return &lwlock_stats_dummy;
     405             : 
     406             :     /* Fetch or create the entry. */
     407             :     MemSet(&key, 0, sizeof(key));
     408             :     key.tranche = lock->tranche;
     409             :     key.instance = lock;
     410             :     lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
     411             :     if (!found)
     412             :     {
     413             :         lwstats->sh_acquire_count = 0;
     414             :         lwstats->ex_acquire_count = 0;
     415             :         lwstats->block_count = 0;
     416             :         lwstats->dequeue_self_count = 0;
     417             :         lwstats->spin_delay_count = 0;
     418             :     }
     419             :     return lwstats;
     420             : }
     421             : #endif                          /* LWLOCK_STATS */
     422             : 
     423             : 
     424             : /*
     425             :  * Compute number of LWLocks required by named tranches.  These will be
     426             :  * allocated in the main array.
     427             :  */
     428             : static int
     429        6058 : NumLWLocksForNamedTranches(void)
     430             : {
     431        6058 :     int         numLocks = 0;
     432             :     int         i;
     433             : 
     434        6082 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     435          24 :         numLocks += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     436             : 
     437        6058 :     return numLocks;
     438             : }
     439             : 
     440             : /*
     441             :  * Compute shmem space needed for LWLocks and named tranches.
     442             :  */
     443             : Size
     444        4496 : LWLockShmemSize(void)
     445             : {
     446             :     Size        size;
     447             :     int         i;
     448        4496 :     int         numLocks = NUM_FIXED_LWLOCKS;
     449             : 
     450             :     /* Calculate total number of locks needed in the main array. */
     451        4496 :     numLocks += NumLWLocksForNamedTranches();
     452             : 
     453             :     /* Space for the LWLock array. */
     454        4496 :     size = mul_size(numLocks, sizeof(LWLockPadded));
     455             : 
     456             :     /* Space for dynamic allocation counter, plus room for alignment. */
     457        4496 :     size = add_size(size, sizeof(int) + LWLOCK_PADDED_SIZE);
     458             : 
     459             :     /* space for named tranches. */
     460        4496 :     size = add_size(size, mul_size(NamedLWLockTrancheRequests, sizeof(NamedLWLockTranche)));
     461             : 
     462             :     /* space for name of each tranche. */
     463        4514 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     464          18 :         size = add_size(size, strlen(NamedLWLockTrancheRequestArray[i].tranche_name) + 1);
     465             : 
     466        4496 :     return size;
     467             : }
     468             : 
     469             : /*
     470             :  * Allocate shmem space for the main LWLock array and all tranches and
     471             :  * initialize it.  We also register extension LWLock tranches here.
     472             :  */
     473             : void
     474        1562 : CreateLWLocks(void)
     475             : {
     476        1562 :     if (!IsUnderPostmaster)
     477             :     {
     478        1562 :         Size        spaceLocks = LWLockShmemSize();
     479             :         int        *LWLockCounter;
     480             :         char       *ptr;
     481             : 
     482             :         /* Allocate space */
     483        1562 :         ptr = (char *) ShmemAlloc(spaceLocks);
     484             : 
     485             :         /* Leave room for dynamic allocation of tranches */
     486        1562 :         ptr += sizeof(int);
     487             : 
     488             :         /* Ensure desired alignment of LWLock array */
     489        1562 :         ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
     490             : 
     491        1562 :         MainLWLockArray = (LWLockPadded *) ptr;
     492             : 
     493             :         /*
     494             :          * Initialize the dynamic-allocation counter for tranches, which is
     495             :          * stored just before the first LWLock.
     496             :          */
     497        1562 :         LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     498        1562 :         *LWLockCounter = LWTRANCHE_FIRST_USER_DEFINED;
     499             : 
     500             :         /* Initialize all LWLocks */
     501        1562 :         InitializeLWLocks();
     502             :     }
     503             : 
     504             :     /* Register named extension LWLock tranches in the current process. */
     505        1568 :     for (int i = 0; i < NamedLWLockTrancheRequests; i++)
     506           6 :         LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
     507           6 :                               NamedLWLockTrancheArray[i].trancheName);
     508        1562 : }
     509             : 
     510             : /*
     511             :  * Initialize LWLocks that are fixed and those belonging to named tranches.
     512             :  */
     513             : static void
     514        1562 : InitializeLWLocks(void)
     515             : {
     516        1562 :     int         numNamedLocks = NumLWLocksForNamedTranches();
     517             :     int         id;
     518             :     int         i;
     519             :     int         j;
     520             :     LWLockPadded *lock;
     521             : 
     522             :     /* Initialize all individual LWLocks in main array */
     523       78100 :     for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
     524       76538 :         LWLockInitialize(&lock->lock, id);
     525             : 
     526             :     /* Initialize buffer mapping LWLocks in main array */
     527        1562 :     lock = MainLWLockArray + BUFFER_MAPPING_LWLOCK_OFFSET;
     528      201498 :     for (id = 0; id < NUM_BUFFER_PARTITIONS; id++, lock++)
     529      199936 :         LWLockInitialize(&lock->lock, LWTRANCHE_BUFFER_MAPPING);
     530             : 
     531             :     /* Initialize lmgrs' LWLocks in main array */
     532        1562 :     lock = MainLWLockArray + LOCK_MANAGER_LWLOCK_OFFSET;
     533       26554 :     for (id = 0; id < NUM_LOCK_PARTITIONS; id++, lock++)
     534       24992 :         LWLockInitialize(&lock->lock, LWTRANCHE_LOCK_MANAGER);
     535             : 
     536             :     /* Initialize predicate lmgrs' LWLocks in main array */
     537        1562 :     lock = MainLWLockArray + PREDICATELOCK_MANAGER_LWLOCK_OFFSET;
     538       26554 :     for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++)
     539       24992 :         LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER);
     540             : 
     541             :     /*
     542             :      * Copy the info about any named tranches into shared memory (so that
     543             :      * other processes can see it), and initialize the requested LWLocks.
     544             :      */
     545        1562 :     if (NamedLWLockTrancheRequests > 0)
     546             :     {
     547             :         char       *trancheNames;
     548             : 
     549           6 :         NamedLWLockTrancheArray = (NamedLWLockTranche *)
     550           6 :             &MainLWLockArray[NUM_FIXED_LWLOCKS + numNamedLocks];
     551             : 
     552           6 :         trancheNames = (char *) NamedLWLockTrancheArray +
     553           6 :             (NamedLWLockTrancheRequests * sizeof(NamedLWLockTranche));
     554           6 :         lock = &MainLWLockArray[NUM_FIXED_LWLOCKS];
     555             : 
     556          12 :         for (i = 0; i < NamedLWLockTrancheRequests; i++)
     557             :         {
     558             :             NamedLWLockTrancheRequest *request;
     559             :             NamedLWLockTranche *tranche;
     560             :             char       *name;
     561             : 
     562           6 :             request = &NamedLWLockTrancheRequestArray[i];
     563           6 :             tranche = &NamedLWLockTrancheArray[i];
     564             : 
     565           6 :             name = trancheNames;
     566           6 :             trancheNames += strlen(request->tranche_name) + 1;
     567           6 :             strcpy(name, request->tranche_name);
     568           6 :             tranche->trancheId = LWLockNewTrancheId();
     569           6 :             tranche->trancheName = name;
     570             : 
     571          12 :             for (j = 0; j < request->num_lwlocks; j++, lock++)
     572           6 :                 LWLockInitialize(&lock->lock, tranche->trancheId);
     573             :         }
     574             :     }
     575        1562 : }
     576             : 
     577             : /*
     578             :  * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
     579             :  */
     580             : void
     581       27836 : InitLWLockAccess(void)
     582             : {
     583             : #ifdef LWLOCK_STATS
     584             :     init_lwlock_stats();
     585             : #endif
     586       27836 : }
     587             : 
     588             : /*
     589             :  * GetNamedLWLockTranche - returns the base address of LWLock from the
     590             :  *      specified tranche.
     591             :  *
     592             :  * Caller needs to retrieve the requested number of LWLocks starting from
     593             :  * the base lock address returned by this API.  This can be used for
     594             :  * tranches that are requested by using RequestNamedLWLockTranche() API.
     595             :  */
     596             : LWLockPadded *
     597           6 : GetNamedLWLockTranche(const char *tranche_name)
     598             : {
     599             :     int         lock_pos;
     600             :     int         i;
     601             : 
     602             :     /*
     603             :      * Obtain the position of base address of LWLock belonging to requested
     604             :      * tranche_name in MainLWLockArray.  LWLocks for named tranches are placed
     605             :      * in MainLWLockArray after fixed locks.
     606             :      */
     607           6 :     lock_pos = NUM_FIXED_LWLOCKS;
     608           6 :     for (i = 0; i < NamedLWLockTrancheRequests; i++)
     609             :     {
     610           6 :         if (strcmp(NamedLWLockTrancheRequestArray[i].tranche_name,
     611             :                    tranche_name) == 0)
     612           6 :             return &MainLWLockArray[lock_pos];
     613             : 
     614           0 :         lock_pos += NamedLWLockTrancheRequestArray[i].num_lwlocks;
     615             :     }
     616             : 
     617           0 :     elog(ERROR, "requested tranche is not registered");
     618             : 
     619             :     /* just to keep compiler quiet */
     620             :     return NULL;
     621             : }
     622             : 
     623             : /*
     624             :  * Allocate a new tranche ID.
     625             :  */
     626             : int
     627          16 : LWLockNewTrancheId(void)
     628             : {
     629             :     int         result;
     630             :     int        *LWLockCounter;
     631             : 
     632          16 :     LWLockCounter = (int *) ((char *) MainLWLockArray - sizeof(int));
     633          16 :     SpinLockAcquire(ShmemLock);
     634          16 :     result = (*LWLockCounter)++;
     635          16 :     SpinLockRelease(ShmemLock);
     636             : 
     637          16 :     return result;
     638             : }
     639             : 
     640             : /*
     641             :  * Register a dynamic tranche name in the lookup table of the current process.
     642             :  *
     643             :  * This routine will save a pointer to the tranche name passed as an argument,
     644             :  * so the name should be allocated in a backend-lifetime context
     645             :  * (shared memory, TopMemoryContext, static constant, or similar).
     646             :  *
     647             :  * The tranche name will be user-visible as a wait event name, so try to
     648             :  * use a name that fits the style for those.
     649             :  */
     650             : void
     651          20 : LWLockRegisterTranche(int tranche_id, const char *tranche_name)
     652             : {
     653             :     /* This should only be called for user-defined tranches. */
     654          20 :     if (tranche_id < LWTRANCHE_FIRST_USER_DEFINED)
     655           0 :         return;
     656             : 
     657             :     /* Convert to array index. */
     658          20 :     tranche_id -= LWTRANCHE_FIRST_USER_DEFINED;
     659             : 
     660             :     /* If necessary, create or enlarge array. */
     661          20 :     if (tranche_id >= LWLockTrancheNamesAllocated)
     662             :     {
     663             :         int         newalloc;
     664             : 
     665          18 :         newalloc = pg_nextpower2_32(Max(8, tranche_id + 1));
     666             : 
     667          18 :         if (LWLockTrancheNames == NULL)
     668          18 :             LWLockTrancheNames = (const char **)
     669          18 :                 MemoryContextAllocZero(TopMemoryContext,
     670             :                                        newalloc * sizeof(char *));
     671             :         else
     672           0 :             LWLockTrancheNames =
     673           0 :                 repalloc0_array(LWLockTrancheNames, const char *, LWLockTrancheNamesAllocated, newalloc);
     674          18 :         LWLockTrancheNamesAllocated = newalloc;
     675             :     }
     676             : 
     677          20 :     LWLockTrancheNames[tranche_id] = tranche_name;
     678             : }
     679             : 
     680             : /*
     681             :  * RequestNamedLWLockTranche
     682             :  *      Request that extra LWLocks be allocated during postmaster
     683             :  *      startup.
     684             :  *
     685             :  * This may only be called via the shmem_request_hook of a library that is
     686             :  * loaded into the postmaster via shared_preload_libraries.  Calls from
     687             :  * elsewhere will fail.
     688             :  *
     689             :  * The tranche name will be user-visible as a wait event name, so try to
     690             :  * use a name that fits the style for those.
     691             :  */
     692             : void
     693           6 : RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
     694             : {
     695             :     NamedLWLockTrancheRequest *request;
     696             : 
     697           6 :     if (!process_shmem_requests_in_progress)
     698           0 :         elog(FATAL, "cannot request additional LWLocks outside shmem_request_hook");
     699             : 
     700           6 :     if (NamedLWLockTrancheRequestArray == NULL)
     701             :     {
     702           6 :         NamedLWLockTrancheRequestsAllocated = 16;
     703           6 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     704           6 :             MemoryContextAlloc(TopMemoryContext,
     705             :                                NamedLWLockTrancheRequestsAllocated
     706             :                                * sizeof(NamedLWLockTrancheRequest));
     707             :     }
     708             : 
     709           6 :     if (NamedLWLockTrancheRequests >= NamedLWLockTrancheRequestsAllocated)
     710             :     {
     711           0 :         int         i = pg_nextpower2_32(NamedLWLockTrancheRequests + 1);
     712             : 
     713           0 :         NamedLWLockTrancheRequestArray = (NamedLWLockTrancheRequest *)
     714           0 :             repalloc(NamedLWLockTrancheRequestArray,
     715             :                      i * sizeof(NamedLWLockTrancheRequest));
     716           0 :         NamedLWLockTrancheRequestsAllocated = i;
     717             :     }
     718             : 
     719           6 :     request = &NamedLWLockTrancheRequestArray[NamedLWLockTrancheRequests];
     720             :     Assert(strlen(tranche_name) + 1 <= NAMEDATALEN);
     721           6 :     strlcpy(request->tranche_name, tranche_name, NAMEDATALEN);
     722           6 :     request->num_lwlocks = num_lwlocks;
     723           6 :     NamedLWLockTrancheRequests++;
     724           6 : }
     725             : 
     726             : /*
     727             :  * LWLockInitialize - initialize a new lwlock; it's initially unlocked
     728             :  */
     729             : void
     730    17424632 : LWLockInitialize(LWLock *lock, int tranche_id)
     731             : {
     732    17424632 :     pg_atomic_init_u32(&lock->state, LW_FLAG_RELEASE_OK);
     733             : #ifdef LOCK_DEBUG
     734             :     pg_atomic_init_u32(&lock->nwaiters, 0);
     735             : #endif
     736    17424632 :     lock->tranche = tranche_id;
     737    17424632 :     proclist_init(&lock->waiters);
     738    17424632 : }
     739             : 
     740             : /*
     741             :  * Report start of wait event for light-weight locks.
     742             :  *
     743             :  * This function will be used by all the light-weight lock calls which
     744             :  * needs to wait to acquire the lock.  This function distinguishes wait
     745             :  * event based on tranche and lock id.
     746             :  */
     747             : static inline void
     748       31618 : LWLockReportWaitStart(LWLock *lock)
     749             : {
     750       31618 :     pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
     751       31618 : }
     752             : 
     753             : /*
     754             :  * Report end of wait event for light-weight locks.
     755             :  */
     756             : static inline void
     757       31618 : LWLockReportWaitEnd(void)
     758             : {
     759       31618 :     pgstat_report_wait_end();
     760       31618 : }
     761             : 
     762             : /*
     763             :  * Return the name of an LWLock tranche.
     764             :  */
     765             : static const char *
     766           0 : GetLWTrancheName(uint16 trancheId)
     767             : {
     768             :     /* Individual LWLock? */
     769           0 :     if (trancheId < NUM_INDIVIDUAL_LWLOCKS)
     770           0 :         return IndividualLWLockNames[trancheId];
     771             : 
     772             :     /* Built-in tranche? */
     773           0 :     if (trancheId < LWTRANCHE_FIRST_USER_DEFINED)
     774           0 :         return BuiltinTrancheNames[trancheId - NUM_INDIVIDUAL_LWLOCKS];
     775             : 
     776             :     /*
     777             :      * It's an extension tranche, so look in LWLockTrancheNames[].  However,
     778             :      * it's possible that the tranche has never been registered in the current
     779             :      * process, in which case give up and return "extension".
     780             :      */
     781           0 :     trancheId -= LWTRANCHE_FIRST_USER_DEFINED;
     782             : 
     783           0 :     if (trancheId >= LWLockTrancheNamesAllocated ||
     784           0 :         LWLockTrancheNames[trancheId] == NULL)
     785           0 :         return "extension";
     786             : 
     787           0 :     return LWLockTrancheNames[trancheId];
     788             : }
     789             : 
     790             : /*
     791             :  * Return an identifier for an LWLock based on the wait class and event.
     792             :  */
     793             : const char *
     794           0 : GetLWLockIdentifier(uint32 classId, uint16 eventId)
     795             : {
     796             :     Assert(classId == PG_WAIT_LWLOCK);
     797             :     /* The event IDs are just tranche numbers. */
     798           0 :     return GetLWTrancheName(eventId);
     799             : }
     800             : 
     801             : /*
     802             :  * Internal function that tries to atomically acquire the lwlock in the passed
     803             :  * in mode.
     804             :  *
     805             :  * This function will not block waiting for a lock to become free - that's the
     806             :  * callers job.
     807             :  *
     808             :  * Returns true if the lock isn't free and we need to wait.
     809             :  */
     810             : static bool
     811   359945910 : LWLockAttemptLock(LWLock *lock, LWLockMode mode)
     812             : {
     813             :     uint32      old_state;
     814             : 
     815             :     Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED);
     816             : 
     817             :     /*
     818             :      * Read once outside the loop, later iterations will get the newer value
     819             :      * via compare & exchange.
     820             :      */
     821   359945910 :     old_state = pg_atomic_read_u32(&lock->state);
     822             : 
     823             :     /* loop until we've determined whether we could acquire the lock or not */
     824             :     while (true)
     825       73368 :     {
     826             :         uint32      desired_state;
     827             :         bool        lock_free;
     828             : 
     829   360019278 :         desired_state = old_state;
     830             : 
     831   360019278 :         if (mode == LW_EXCLUSIVE)
     832             :         {
     833   157238196 :             lock_free = (old_state & LW_LOCK_MASK) == 0;
     834   157238196 :             if (lock_free)
     835   157164900 :                 desired_state += LW_VAL_EXCLUSIVE;
     836             :         }
     837             :         else
     838             :         {
     839   202781082 :             lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
     840   202781082 :             if (lock_free)
     841   202765884 :                 desired_state += LW_VAL_SHARED;
     842             :         }
     843             : 
     844             :         /*
     845             :          * Attempt to swap in the state we are expecting. If we didn't see
     846             :          * lock to be free, that's just the old value. If we saw it as free,
     847             :          * we'll attempt to mark it acquired. The reason that we always swap
     848             :          * in the value is that this doubles as a memory barrier. We could try
     849             :          * to be smarter and only swap in values if we saw the lock as free,
     850             :          * but benchmark haven't shown it as beneficial so far.
     851             :          *
     852             :          * Retry if the value changed since we last looked at it.
     853             :          */
     854   360019278 :         if (pg_atomic_compare_exchange_u32(&lock->state,
     855             :                                            &old_state, desired_state))
     856             :         {
     857   359945910 :             if (lock_free)
     858             :             {
     859             :                 /* Great! Got the lock. */
     860             : #ifdef LOCK_DEBUG
     861             :                 if (mode == LW_EXCLUSIVE)
     862             :                     lock->owner = MyProc;
     863             : #endif
     864   359873078 :                 return false;
     865             :             }
     866             :             else
     867       72832 :                 return true;    /* somebody else has the lock */
     868             :         }
     869             :     }
     870             :     pg_unreachable();
     871             : }
     872             : 
     873             : /*
     874             :  * Lock the LWLock's wait list against concurrent activity.
     875             :  *
     876             :  * NB: even though the wait list is locked, non-conflicting lock operations
     877             :  * may still happen concurrently.
     878             :  *
     879             :  * Time spent holding mutex should be short!
     880             :  */
     881             : static void
     882     1377612 : LWLockWaitListLock(LWLock *lock)
     883             : {
     884             :     uint32      old_state;
     885             : #ifdef LWLOCK_STATS
     886             :     lwlock_stats *lwstats;
     887             :     uint32      delays = 0;
     888             : 
     889             :     lwstats = get_lwlock_stats_entry(lock);
     890             : #endif
     891             : 
     892             :     while (true)
     893             :     {
     894             :         /* always try once to acquire lock directly */
     895     1377612 :         old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
     896     1377612 :         if (!(old_state & LW_FLAG_LOCKED))
     897     1372996 :             break;              /* got lock */
     898             : 
     899             :         /* and then spin without atomic operations until lock is released */
     900             :         {
     901             :             SpinDelayStatus delayStatus;
     902             : 
     903        4616 :             init_local_spin_delay(&delayStatus);
     904             : 
     905       17544 :             while (old_state & LW_FLAG_LOCKED)
     906             :             {
     907       12928 :                 perform_spin_delay(&delayStatus);
     908       12928 :                 old_state = pg_atomic_read_u32(&lock->state);
     909             :             }
     910             : #ifdef LWLOCK_STATS
     911             :             delays += delayStatus.delays;
     912             : #endif
     913        4616 :             finish_spin_delay(&delayStatus);
     914             :         }
     915             : 
     916             :         /*
     917             :          * Retry. The lock might obviously already be re-acquired by the time
     918             :          * we're attempting to get it again.
     919             :          */
     920             :     }
     921             : 
     922             : #ifdef LWLOCK_STATS
     923             :     lwstats->spin_delay_count += delays;
     924             : #endif
     925     1372996 : }
     926             : 
     927             : /*
     928             :  * Unlock the LWLock's wait list.
     929             :  *
     930             :  * Note that it can be more efficient to manipulate flags and release the
     931             :  * locks in a single atomic operation.
     932             :  */
     933             : static void
     934     1316064 : LWLockWaitListUnlock(LWLock *lock)
     935             : {
     936             :     uint32      old_state PG_USED_FOR_ASSERTS_ONLY;
     937             : 
     938     1316064 :     old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);
     939             : 
     940             :     Assert(old_state & LW_FLAG_LOCKED);
     941     1316064 : }
     942             : 
     943             : /*
     944             :  * Wakeup all the lockers that currently have a chance to acquire the lock.
     945             :  */
     946             : static void
     947       56932 : LWLockWakeup(LWLock *lock)
     948             : {
     949             :     bool        new_release_ok;
     950       56932 :     bool        wokeup_somebody = false;
     951             :     proclist_head wakeup;
     952             :     proclist_mutable_iter iter;
     953             : 
     954       56932 :     proclist_init(&wakeup);
     955             : 
     956       56932 :     new_release_ok = true;
     957             : 
     958             :     /* lock wait list while collecting backends to wake up */
     959       56932 :     LWLockWaitListLock(lock);
     960             : 
     961       69498 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
     962             :     {
     963       33638 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
     964             : 
     965       33638 :         if (wokeup_somebody && waiter->lwWaitMode == LW_EXCLUSIVE)
     966         182 :             continue;
     967             : 
     968       33456 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
     969       33456 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
     970             : 
     971       33456 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
     972             :         {
     973             :             /*
     974             :              * Prevent additional wakeups until retryer gets to run. Backends
     975             :              * that are just waiting for the lock to become free don't retry
     976             :              * automatically.
     977             :              */
     978       27786 :             new_release_ok = false;
     979             : 
     980             :             /*
     981             :              * Don't wakeup (further) exclusive locks.
     982             :              */
     983       27786 :             wokeup_somebody = true;
     984             :         }
     985             : 
     986             :         /*
     987             :          * Signal that the process isn't on the wait list anymore. This allows
     988             :          * LWLockDequeueSelf() to remove itself of the waitlist with a
     989             :          * proclist_delete(), rather than having to check if it has been
     990             :          * removed from the list.
     991             :          */
     992             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
     993       33456 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
     994             : 
     995             :         /*
     996             :          * Once we've woken up an exclusive lock, there's no point in waking
     997             :          * up anybody else.
     998             :          */
     999       33456 :         if (waiter->lwWaitMode == LW_EXCLUSIVE)
    1000       21072 :             break;
    1001             :     }
    1002             : 
    1003             :     Assert(proclist_is_empty(&wakeup) || pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS);
    1004             : 
    1005             :     /* unset required flags, and release lock, in one fell swoop */
    1006             :     {
    1007             :         uint32      old_state;
    1008             :         uint32      desired_state;
    1009             : 
    1010       56932 :         old_state = pg_atomic_read_u32(&lock->state);
    1011             :         while (true)
    1012             :         {
    1013       57166 :             desired_state = old_state;
    1014             : 
    1015             :             /* compute desired flags */
    1016             : 
    1017       57166 :             if (new_release_ok)
    1018       29618 :                 desired_state |= LW_FLAG_RELEASE_OK;
    1019             :             else
    1020       27548 :                 desired_state &= ~LW_FLAG_RELEASE_OK;
    1021             : 
    1022       57166 :             if (proclist_is_empty(&wakeup))
    1023       26422 :                 desired_state &= ~LW_FLAG_HAS_WAITERS;
    1024             : 
    1025       57166 :             desired_state &= ~LW_FLAG_LOCKED;   /* release lock */
    1026             : 
    1027       57166 :             if (pg_atomic_compare_exchange_u32(&lock->state, &old_state,
    1028             :                                                desired_state))
    1029       56932 :                 break;
    1030             :         }
    1031             :     }
    1032             : 
    1033             :     /* Awaken any waiters I removed from the queue. */
    1034       90388 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1035             :     {
    1036       33456 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1037             : 
    1038             :         LOG_LWDEBUG("LWLockRelease", lock, "release waiter");
    1039       33456 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1040             : 
    1041             :         /*
    1042             :          * Guarantee that lwWaiting being unset only becomes visible once the
    1043             :          * unlink from the link has completed. Otherwise the target backend
    1044             :          * could be woken up for other reason and enqueue for a new lock - if
    1045             :          * that happens before the list unlink happens, the list would end up
    1046             :          * being corrupted.
    1047             :          *
    1048             :          * The barrier pairs with the LWLockWaitListLock() when enqueuing for
    1049             :          * another lock.
    1050             :          */
    1051       33456 :         pg_write_barrier();
    1052       33456 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1053       33456 :         PGSemaphoreUnlock(waiter->sem);
    1054             :     }
    1055       56932 : }
    1056             : 
    1057             : /*
    1058             :  * Add ourselves to the end of the queue.
    1059             :  *
    1060             :  * NB: Mode can be LW_WAIT_UNTIL_FREE here!
    1061             :  */
    1062             : static void
    1063       48916 : LWLockQueueSelf(LWLock *lock, LWLockMode mode)
    1064             : {
    1065             :     /*
    1066             :      * If we don't have a PGPROC structure, there's no way to wait. This
    1067             :      * should never occur, since MyProc should only be null during shared
    1068             :      * memory initialization.
    1069             :      */
    1070       48916 :     if (MyProc == NULL)
    1071           0 :         elog(PANIC, "cannot wait without a PGPROC structure");
    1072             : 
    1073       48916 :     if (MyProc->lwWaiting != LW_WS_NOT_WAITING)
    1074           0 :         elog(PANIC, "queueing for lock while waiting on another one");
    1075             : 
    1076       48916 :     LWLockWaitListLock(lock);
    1077             : 
    1078             :     /* setting the flag is protected by the spinlock */
    1079       48916 :     pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS);
    1080             : 
    1081       48916 :     MyProc->lwWaiting = LW_WS_WAITING;
    1082       48916 :     MyProc->lwWaitMode = mode;
    1083             : 
    1084             :     /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */
    1085       48916 :     if (mode == LW_WAIT_UNTIL_FREE)
    1086        7168 :         proclist_push_head(&lock->waiters, MyProc->pgprocno, lwWaitLink);
    1087             :     else
    1088       41748 :         proclist_push_tail(&lock->waiters, MyProc->pgprocno, lwWaitLink);
    1089             : 
    1090             :     /* Can release the mutex now */
    1091       48916 :     LWLockWaitListUnlock(lock);
    1092             : 
    1093             : #ifdef LOCK_DEBUG
    1094             :     pg_atomic_fetch_add_u32(&lock->nwaiters, 1);
    1095             : #endif
    1096       48916 : }
    1097             : 
    1098             : /*
    1099             :  * Remove ourselves from the waitlist.
    1100             :  *
    1101             :  * This is used if we queued ourselves because we thought we needed to sleep
    1102             :  * but, after further checking, we discovered that we don't actually need to
    1103             :  * do so.
    1104             :  */
    1105             : static void
    1106       17298 : LWLockDequeueSelf(LWLock *lock)
    1107             : {
    1108             :     bool        on_waitlist;
    1109             : 
    1110             : #ifdef LWLOCK_STATS
    1111             :     lwlock_stats *lwstats;
    1112             : 
    1113             :     lwstats = get_lwlock_stats_entry(lock);
    1114             : 
    1115             :     lwstats->dequeue_self_count++;
    1116             : #endif
    1117             : 
    1118       17298 :     LWLockWaitListLock(lock);
    1119             : 
    1120             :     /*
    1121             :      * Remove ourselves from the waitlist, unless we've already been removed.
    1122             :      * The removal happens with the wait list lock held, so there's no race in
    1123             :      * this check.
    1124             :      */
    1125       17298 :     on_waitlist = MyProc->lwWaiting == LW_WS_WAITING;
    1126       17298 :     if (on_waitlist)
    1127       15226 :         proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink);
    1128             : 
    1129       17298 :     if (proclist_is_empty(&lock->waiters) &&
    1130       17098 :         (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0)
    1131             :     {
    1132       17090 :         pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_HAS_WAITERS);
    1133             :     }
    1134             : 
    1135             :     /* XXX: combine with fetch_and above? */
    1136       17298 :     LWLockWaitListUnlock(lock);
    1137             : 
    1138             :     /* clear waiting state again, nice for debugging */
    1139       17298 :     if (on_waitlist)
    1140       15226 :         MyProc->lwWaiting = LW_WS_NOT_WAITING;
    1141             :     else
    1142             :     {
    1143        2072 :         int         extraWaits = 0;
    1144             : 
    1145             :         /*
    1146             :          * Somebody else dequeued us and has or will wake us up. Deal with the
    1147             :          * superfluous absorption of a wakeup.
    1148             :          */
    1149             : 
    1150             :         /*
    1151             :          * Reset RELEASE_OK flag if somebody woke us before we removed
    1152             :          * ourselves - they'll have set it to false.
    1153             :          */
    1154        2072 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1155             : 
    1156             :         /*
    1157             :          * Now wait for the scheduled wakeup, otherwise our ->lwWaiting would
    1158             :          * get reset at some inconvenient point later. Most of the time this
    1159             :          * will immediately return.
    1160             :          */
    1161             :         for (;;)
    1162             :         {
    1163        2072 :             PGSemaphoreLock(MyProc->sem);
    1164        2072 :             if (MyProc->lwWaiting == LW_WS_NOT_WAITING)
    1165        2072 :                 break;
    1166           0 :             extraWaits++;
    1167             :         }
    1168             : 
    1169             :         /*
    1170             :          * Fix the process wait semaphore's count for any absorbed wakeups.
    1171             :          */
    1172        2072 :         while (extraWaits-- > 0)
    1173           0 :             PGSemaphoreUnlock(MyProc->sem);
    1174             :     }
    1175             : 
    1176             : #ifdef LOCK_DEBUG
    1177             :     {
    1178             :         /* not waiting anymore */
    1179             :         uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1180             : 
    1181             :         Assert(nwaiters < MAX_BACKENDS);
    1182             :     }
    1183             : #endif
    1184       17298 : }
    1185             : 
    1186             : /*
    1187             :  * LWLockAcquire - acquire a lightweight lock in the specified mode
    1188             :  *
    1189             :  * If the lock is not available, sleep until it is.  Returns true if the lock
    1190             :  * was available immediately, false if we had to sleep.
    1191             :  *
    1192             :  * Side effect: cancel/die interrupts are held off until lock release.
    1193             :  */
    1194             : bool
    1195   356423598 : LWLockAcquire(LWLock *lock, LWLockMode mode)
    1196             : {
    1197   356423598 :     PGPROC     *proc = MyProc;
    1198   356423598 :     bool        result = true;
    1199   356423598 :     int         extraWaits = 0;
    1200             : #ifdef LWLOCK_STATS
    1201             :     lwlock_stats *lwstats;
    1202             : 
    1203             :     lwstats = get_lwlock_stats_entry(lock);
    1204             : #endif
    1205             : 
    1206             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1207             : 
    1208             :     PRINT_LWDEBUG("LWLockAcquire", lock, mode);
    1209             : 
    1210             : #ifdef LWLOCK_STATS
    1211             :     /* Count lock acquisition attempts */
    1212             :     if (mode == LW_EXCLUSIVE)
    1213             :         lwstats->ex_acquire_count++;
    1214             :     else
    1215             :         lwstats->sh_acquire_count++;
    1216             : #endif                          /* LWLOCK_STATS */
    1217             : 
    1218             :     /*
    1219             :      * We can't wait if we haven't got a PGPROC.  This should only occur
    1220             :      * during bootstrap or shared memory initialization.  Put an Assert here
    1221             :      * to catch unsafe coding practices.
    1222             :      */
    1223             :     Assert(!(proc == NULL && IsUnderPostmaster));
    1224             : 
    1225             :     /* Ensure we will have room to remember the lock */
    1226   356423598 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1227           0 :         elog(ERROR, "too many LWLocks taken");
    1228             : 
    1229             :     /*
    1230             :      * Lock out cancel/die interrupts until we exit the code section protected
    1231             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1232             :      * manipulations of data structures in shared memory.
    1233             :      */
    1234   356423598 :     HOLD_INTERRUPTS();
    1235             : 
    1236             :     /*
    1237             :      * Loop here to try to acquire lock after each time we are signaled by
    1238             :      * LWLockRelease.
    1239             :      *
    1240             :      * NOTE: it might seem better to have LWLockRelease actually grant us the
    1241             :      * lock, rather than retrying and possibly having to go back to sleep. But
    1242             :      * in practice that is no good because it means a process swap for every
    1243             :      * lock acquisition when two or more processes are contending for the same
    1244             :      * lock.  Since LWLocks are normally used to protect not-very-long
    1245             :      * sections of computation, a process needs to be able to acquire and
    1246             :      * release the same lock many times during a single CPU time slice, even
    1247             :      * in the presence of contention.  The efficiency of being able to do that
    1248             :      * outweighs the inefficiency of sometimes wasting a process dispatch
    1249             :      * cycle because the lock is not free when a released waiter finally gets
    1250             :      * to run.  See pgsql-hackers archives for 29-Dec-01.
    1251             :      */
    1252             :     for (;;)
    1253       26034 :     {
    1254             :         bool        mustwait;
    1255             : 
    1256             :         /*
    1257             :          * Try to grab the lock the first time, we're not in the waitqueue
    1258             :          * yet/anymore.
    1259             :          */
    1260   356449632 :         mustwait = LWLockAttemptLock(lock, mode);
    1261             : 
    1262   356449632 :         if (!mustwait)
    1263             :         {
    1264             :             LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
    1265   356407884 :             break;              /* got the lock */
    1266             :         }
    1267             : 
    1268             :         /*
    1269             :          * Ok, at this point we couldn't grab the lock on the first try. We
    1270             :          * cannot simply queue ourselves to the end of the list and wait to be
    1271             :          * woken up because by now the lock could long have been released.
    1272             :          * Instead add us to the queue and try to grab the lock again. If we
    1273             :          * succeed we need to revert the queuing and be happy, otherwise we
    1274             :          * recheck the lock. If we still couldn't grab it, we know that the
    1275             :          * other locker will see our queue entries when releasing since they
    1276             :          * existed before we checked for the lock.
    1277             :          */
    1278             : 
    1279             :         /* add to the queue */
    1280       41748 :         LWLockQueueSelf(lock, mode);
    1281             : 
    1282             :         /* we're now guaranteed to be woken up if necessary */
    1283       41748 :         mustwait = LWLockAttemptLock(lock, mode);
    1284             : 
    1285             :         /* ok, grabbed the lock the second time round, need to undo queueing */
    1286       41748 :         if (!mustwait)
    1287             :         {
    1288             :             LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
    1289             : 
    1290       15714 :             LWLockDequeueSelf(lock);
    1291       15714 :             break;
    1292             :         }
    1293             : 
    1294             :         /*
    1295             :          * Wait until awakened.
    1296             :          *
    1297             :          * It is possible that we get awakened for a reason other than being
    1298             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1299             :          * we've gotten the LWLock, re-increment the sema by the number of
    1300             :          * additional signals received.
    1301             :          */
    1302             :         LOG_LWDEBUG("LWLockAcquire", lock, "waiting");
    1303             : 
    1304             : #ifdef LWLOCK_STATS
    1305             :         lwstats->block_count++;
    1306             : #endif
    1307             : 
    1308       26034 :         LWLockReportWaitStart(lock);
    1309             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1310             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1311             : 
    1312             :         for (;;)
    1313             :         {
    1314       26034 :             PGSemaphoreLock(proc->sem);
    1315       26034 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1316       26034 :                 break;
    1317           0 :             extraWaits++;
    1318             :         }
    1319             : 
    1320             :         /* Retrying, allow LWLockRelease to release waiters again. */
    1321       26034 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1322             : 
    1323             : #ifdef LOCK_DEBUG
    1324             :         {
    1325             :             /* not waiting anymore */
    1326             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1327             : 
    1328             :             Assert(nwaiters < MAX_BACKENDS);
    1329             :         }
    1330             : #endif
    1331             : 
    1332             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1333             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1334       26034 :         LWLockReportWaitEnd();
    1335             : 
    1336             :         LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
    1337             : 
    1338             :         /* Now loop back and try to acquire lock again. */
    1339       26034 :         result = false;
    1340             :     }
    1341             : 
    1342             :     if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_ENABLED())
    1343             :         TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    1344             : 
    1345             :     /* Add lock to list of locks held by this backend */
    1346   356423598 :     held_lwlocks[num_held_lwlocks].lock = lock;
    1347   356423598 :     held_lwlocks[num_held_lwlocks++].mode = mode;
    1348             : 
    1349             :     /*
    1350             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1351             :      */
    1352   356423598 :     while (extraWaits-- > 0)
    1353           0 :         PGSemaphoreUnlock(proc->sem);
    1354             : 
    1355   356423598 :     return result;
    1356             : }
    1357             : 
    1358             : /*
    1359             :  * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
    1360             :  *
    1361             :  * If the lock is not available, return false with no side-effects.
    1362             :  *
    1363             :  * If successful, cancel/die interrupts are held off until lock release.
    1364             :  */
    1365             : bool
    1366     3258300 : LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
    1367             : {
    1368             :     bool        mustwait;
    1369             : 
    1370             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1371             : 
    1372             :     PRINT_LWDEBUG("LWLockConditionalAcquire", lock, mode);
    1373             : 
    1374             :     /* Ensure we will have room to remember the lock */
    1375     3258300 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1376           0 :         elog(ERROR, "too many LWLocks taken");
    1377             : 
    1378             :     /*
    1379             :      * Lock out cancel/die interrupts until we exit the code section protected
    1380             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1381             :      * manipulations of data structures in shared memory.
    1382             :      */
    1383     3258300 :     HOLD_INTERRUPTS();
    1384             : 
    1385             :     /* Check for the lock */
    1386     3258300 :     mustwait = LWLockAttemptLock(lock, mode);
    1387             : 
    1388     3258300 :     if (mustwait)
    1389             :     {
    1390             :         /* Failed to get lock, so release interrupt holdoff */
    1391        1396 :         RESUME_INTERRUPTS();
    1392             : 
    1393             :         LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
    1394             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL_ENABLED())
    1395             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
    1396             :     }
    1397             :     else
    1398             :     {
    1399             :         /* Add lock to list of locks held by this backend */
    1400     3256904 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1401     3256904 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1402             :         if (TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_ENABLED())
    1403             :             TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
    1404             :     }
    1405     3258300 :     return !mustwait;
    1406             : }
    1407             : 
    1408             : /*
    1409             :  * LWLockAcquireOrWait - Acquire lock, or wait until it's free
    1410             :  *
    1411             :  * The semantics of this function are a bit funky.  If the lock is currently
    1412             :  * free, it is acquired in the given mode, and the function returns true.  If
    1413             :  * the lock isn't immediately free, the function waits until it is released
    1414             :  * and returns false, but does not acquire the lock.
    1415             :  *
    1416             :  * This is currently used for WALWriteLock: when a backend flushes the WAL,
    1417             :  * holding WALWriteLock, it can flush the commit records of many other
    1418             :  * backends as a side-effect.  Those other backends need to wait until the
    1419             :  * flush finishes, but don't need to acquire the lock anymore.  They can just
    1420             :  * wake up, observe that their records have already been flushed, and return.
    1421             :  */
    1422             : bool
    1423      194392 : LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
    1424             : {
    1425      194392 :     PGPROC     *proc = MyProc;
    1426             :     bool        mustwait;
    1427      194392 :     int         extraWaits = 0;
    1428             : #ifdef LWLOCK_STATS
    1429             :     lwlock_stats *lwstats;
    1430             : 
    1431             :     lwstats = get_lwlock_stats_entry(lock);
    1432             : #endif
    1433             : 
    1434             :     Assert(mode == LW_SHARED || mode == LW_EXCLUSIVE);
    1435             : 
    1436             :     PRINT_LWDEBUG("LWLockAcquireOrWait", lock, mode);
    1437             : 
    1438             :     /* Ensure we will have room to remember the lock */
    1439      194392 :     if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
    1440           0 :         elog(ERROR, "too many LWLocks taken");
    1441             : 
    1442             :     /*
    1443             :      * Lock out cancel/die interrupts until we exit the code section protected
    1444             :      * by the LWLock.  This ensures that interrupts will not interfere with
    1445             :      * manipulations of data structures in shared memory.
    1446             :      */
    1447      194392 :     HOLD_INTERRUPTS();
    1448             : 
    1449             :     /*
    1450             :      * NB: We're using nearly the same twice-in-a-row lock acquisition
    1451             :      * protocol as LWLockAcquire(). Check its comments for details.
    1452             :      */
    1453      194392 :     mustwait = LWLockAttemptLock(lock, mode);
    1454             : 
    1455      194392 :     if (mustwait)
    1456             :     {
    1457        1838 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1458             : 
    1459        1838 :         mustwait = LWLockAttemptLock(lock, mode);
    1460             : 
    1461        1838 :         if (mustwait)
    1462             :         {
    1463             :             /*
    1464             :              * Wait until awakened.  Like in LWLockAcquire, be prepared for
    1465             :              * bogus wakeups.
    1466             :              */
    1467             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "waiting");
    1468             : 
    1469             : #ifdef LWLOCK_STATS
    1470             :             lwstats->block_count++;
    1471             : #endif
    1472             : 
    1473        1816 :             LWLockReportWaitStart(lock);
    1474             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1475             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
    1476             : 
    1477             :             for (;;)
    1478             :             {
    1479        1816 :                 PGSemaphoreLock(proc->sem);
    1480        1816 :                 if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1481        1816 :                     break;
    1482           0 :                 extraWaits++;
    1483             :             }
    1484             : 
    1485             : #ifdef LOCK_DEBUG
    1486             :             {
    1487             :                 /* not waiting anymore */
    1488             :                 uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1489             : 
    1490             :                 Assert(nwaiters < MAX_BACKENDS);
    1491             :             }
    1492             : #endif
    1493             :             if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1494             :                 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
    1495        1816 :             LWLockReportWaitEnd();
    1496             : 
    1497             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
    1498             :         }
    1499             :         else
    1500             :         {
    1501             :             LOG_LWDEBUG("LWLockAcquireOrWait", lock, "acquired, undoing queue");
    1502             : 
    1503             :             /*
    1504             :              * Got lock in the second attempt, undo queueing. We need to treat
    1505             :              * this as having successfully acquired the lock, otherwise we'd
    1506             :              * not necessarily wake up people we've prevented from acquiring
    1507             :              * the lock.
    1508             :              */
    1509          22 :             LWLockDequeueSelf(lock);
    1510             :         }
    1511             :     }
    1512             : 
    1513             :     /*
    1514             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1515             :      */
    1516      194392 :     while (extraWaits-- > 0)
    1517           0 :         PGSemaphoreUnlock(proc->sem);
    1518             : 
    1519      194392 :     if (mustwait)
    1520             :     {
    1521             :         /* Failed to get lock, so release interrupt holdoff */
    1522        1816 :         RESUME_INTERRUPTS();
    1523             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
    1524             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL_ENABLED())
    1525             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
    1526             :     }
    1527             :     else
    1528             :     {
    1529             :         LOG_LWDEBUG("LWLockAcquireOrWait", lock, "succeeded");
    1530             :         /* Add lock to list of locks held by this backend */
    1531      192576 :         held_lwlocks[num_held_lwlocks].lock = lock;
    1532      192576 :         held_lwlocks[num_held_lwlocks++].mode = mode;
    1533             :         if (TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_ENABLED())
    1534             :             TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
    1535             :     }
    1536             : 
    1537      194392 :     return !mustwait;
    1538             : }
    1539             : 
    1540             : /*
    1541             :  * Does the lwlock in its current state need to wait for the variable value to
    1542             :  * change?
    1543             :  *
    1544             :  * If we don't need to wait, and it's because the value of the variable has
    1545             :  * changed, store the current value in newval.
    1546             :  *
    1547             :  * *result is set to true if the lock was free, and false otherwise.
    1548             :  */
    1549             : static bool
    1550     8595212 : LWLockConflictsWithVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1551             :                        uint64 *newval, bool *result)
    1552             : {
    1553             :     bool        mustwait;
    1554             :     uint64      value;
    1555             : 
    1556             :     /*
    1557             :      * Test first to see if it the slot is free right now.
    1558             :      *
    1559             :      * XXX: the unique caller of this routine, WaitXLogInsertionsToFinish()
    1560             :      * via LWLockWaitForVar(), uses an implied barrier with a spinlock before
    1561             :      * this, so we don't need a memory barrier here as far as the current
    1562             :      * usage is concerned.  But that might not be safe in general.
    1563             :      */
    1564     8595212 :     mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
    1565             : 
    1566     8595212 :     if (!mustwait)
    1567             :     {
    1568     2747096 :         *result = true;
    1569     2747096 :         return false;
    1570             :     }
    1571             : 
    1572     5848116 :     *result = false;
    1573             : 
    1574             :     /*
    1575             :      * Reading this value atomically is safe even on platforms where uint64
    1576             :      * cannot be read without observing a torn value.
    1577             :      */
    1578     5848116 :     value = pg_atomic_read_u64(valptr);
    1579             : 
    1580     5848116 :     if (value != oldval)
    1581             :     {
    1582     5839018 :         mustwait = false;
    1583     5839018 :         *newval = value;
    1584             :     }
    1585             :     else
    1586             :     {
    1587        9098 :         mustwait = true;
    1588             :     }
    1589             : 
    1590     5848116 :     return mustwait;
    1591             : }
    1592             : 
    1593             : /*
    1594             :  * LWLockWaitForVar - Wait until lock is free, or a variable is updated.
    1595             :  *
    1596             :  * If the lock is held and *valptr equals oldval, waits until the lock is
    1597             :  * either freed, or the lock holder updates *valptr by calling
    1598             :  * LWLockUpdateVar.  If the lock is free on exit (immediately or after
    1599             :  * waiting), returns true.  If the lock is still held, but *valptr no longer
    1600             :  * matches oldval, returns false and sets *newval to the current value in
    1601             :  * *valptr.
    1602             :  *
    1603             :  * Note: this function ignores shared lock holders; if the lock is held
    1604             :  * in shared mode, returns 'true'.
    1605             :  *
    1606             :  * Be aware that LWLockConflictsWithVar() does not include a memory barrier,
    1607             :  * hence the caller of this function may want to rely on an explicit barrier or
    1608             :  * an implied barrier via spinlock or LWLock to avoid memory ordering issues.
    1609             :  */
    1610             : bool
    1611     8586114 : LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval,
    1612             :                  uint64 *newval)
    1613             : {
    1614     8586114 :     PGPROC     *proc = MyProc;
    1615     8586114 :     int         extraWaits = 0;
    1616     8586114 :     bool        result = false;
    1617             : #ifdef LWLOCK_STATS
    1618             :     lwlock_stats *lwstats;
    1619             : 
    1620             :     lwstats = get_lwlock_stats_entry(lock);
    1621             : #endif
    1622             : 
    1623             :     PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
    1624             : 
    1625             :     /*
    1626             :      * Lock out cancel/die interrupts while we sleep on the lock.  There is no
    1627             :      * cleanup mechanism to remove us from the wait queue if we got
    1628             :      * interrupted.
    1629             :      */
    1630     8586114 :     HOLD_INTERRUPTS();
    1631             : 
    1632             :     /*
    1633             :      * Loop here to check the lock's status after each time we are signaled.
    1634             :      */
    1635             :     for (;;)
    1636        3768 :     {
    1637             :         bool        mustwait;
    1638             : 
    1639     8589882 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1640             :                                           &result);
    1641             : 
    1642     8589882 :         if (!mustwait)
    1643     8584552 :             break;              /* the lock was free or value didn't match */
    1644             : 
    1645             :         /*
    1646             :          * Add myself to wait queue. Note that this is racy, somebody else
    1647             :          * could wakeup before we're finished queuing. NB: We're using nearly
    1648             :          * the same twice-in-a-row lock acquisition protocol as
    1649             :          * LWLockAcquire(). Check its comments for details. The only
    1650             :          * difference is that we also have to check the variable's values when
    1651             :          * checking the state of the lock.
    1652             :          */
    1653        5330 :         LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
    1654             : 
    1655             :         /*
    1656             :          * Set RELEASE_OK flag, to make sure we get woken up as soon as the
    1657             :          * lock is released.
    1658             :          */
    1659        5330 :         pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
    1660             : 
    1661             :         /*
    1662             :          * We're now guaranteed to be woken up if necessary. Recheck the lock
    1663             :          * and variables state.
    1664             :          */
    1665        5330 :         mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
    1666             :                                           &result);
    1667             : 
    1668             :         /* Ok, no conflict after we queued ourselves. Undo queueing. */
    1669        5330 :         if (!mustwait)
    1670             :         {
    1671             :             LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
    1672             : 
    1673        1562 :             LWLockDequeueSelf(lock);
    1674        1562 :             break;
    1675             :         }
    1676             : 
    1677             :         /*
    1678             :          * Wait until awakened.
    1679             :          *
    1680             :          * It is possible that we get awakened for a reason other than being
    1681             :          * signaled by LWLockRelease.  If so, loop back and wait again.  Once
    1682             :          * we've gotten the LWLock, re-increment the sema by the number of
    1683             :          * additional signals received.
    1684             :          */
    1685             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "waiting");
    1686             : 
    1687             : #ifdef LWLOCK_STATS
    1688             :         lwstats->block_count++;
    1689             : #endif
    1690             : 
    1691        3768 :         LWLockReportWaitStart(lock);
    1692             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
    1693             :             TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
    1694             : 
    1695             :         for (;;)
    1696             :         {
    1697        3768 :             PGSemaphoreLock(proc->sem);
    1698        3768 :             if (proc->lwWaiting == LW_WS_NOT_WAITING)
    1699        3768 :                 break;
    1700           0 :             extraWaits++;
    1701             :         }
    1702             : 
    1703             : #ifdef LOCK_DEBUG
    1704             :         {
    1705             :             /* not waiting anymore */
    1706             :             uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);
    1707             : 
    1708             :             Assert(nwaiters < MAX_BACKENDS);
    1709             :         }
    1710             : #endif
    1711             : 
    1712             :         if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
    1713             :             TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
    1714        3768 :         LWLockReportWaitEnd();
    1715             : 
    1716             :         LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
    1717             : 
    1718             :         /* Now loop back and check the status of the lock again. */
    1719             :     }
    1720             : 
    1721             :     /*
    1722             :      * Fix the process wait semaphore's count for any absorbed wakeups.
    1723             :      */
    1724     8586114 :     while (extraWaits-- > 0)
    1725           0 :         PGSemaphoreUnlock(proc->sem);
    1726             : 
    1727             :     /*
    1728             :      * Now okay to allow cancel/die interrupts.
    1729             :      */
    1730     8586114 :     RESUME_INTERRUPTS();
    1731             : 
    1732     8586114 :     return result;
    1733             : }
    1734             : 
    1735             : 
    1736             : /*
    1737             :  * LWLockUpdateVar - Update a variable and wake up waiters atomically
    1738             :  *
    1739             :  * Sets *valptr to 'val', and wakes up all processes waiting for us with
    1740             :  * LWLockWaitForVar().  It first sets the value atomically and then wakes up
    1741             :  * waiting processes so that any process calling LWLockWaitForVar() on the same
    1742             :  * lock is guaranteed to see the new value, and act accordingly.
    1743             :  *
    1744             :  * The caller must be holding the lock in exclusive mode.
    1745             :  */
    1746             : void
    1747     1249850 : LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1748             : {
    1749             :     proclist_head wakeup;
    1750             :     proclist_mutable_iter iter;
    1751             : 
    1752             :     PRINT_LWDEBUG("LWLockUpdateVar", lock, LW_EXCLUSIVE);
    1753             : 
    1754             :     /*
    1755             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1756             :      * that the variable is updated before waking up waiters.
    1757             :      */
    1758     1249850 :     pg_atomic_exchange_u64(valptr, val);
    1759             : 
    1760     1249850 :     proclist_init(&wakeup);
    1761             : 
    1762     1249850 :     LWLockWaitListLock(lock);
    1763             : 
    1764             :     Assert(pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE);
    1765             : 
    1766             :     /*
    1767             :      * See if there are any LW_WAIT_UNTIL_FREE waiters that need to be woken
    1768             :      * up. They are always in the front of the queue.
    1769             :      */
    1770     1250098 :     proclist_foreach_modify(iter, &lock->waiters, lwWaitLink)
    1771             :     {
    1772         374 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1773             : 
    1774         374 :         if (waiter->lwWaitMode != LW_WAIT_UNTIL_FREE)
    1775         126 :             break;
    1776             : 
    1777         248 :         proclist_delete(&lock->waiters, iter.cur, lwWaitLink);
    1778         248 :         proclist_push_tail(&wakeup, iter.cur, lwWaitLink);
    1779             : 
    1780             :         /* see LWLockWakeup() */
    1781             :         Assert(waiter->lwWaiting == LW_WS_WAITING);
    1782         248 :         waiter->lwWaiting = LW_WS_PENDING_WAKEUP;
    1783             :     }
    1784             : 
    1785             :     /* We are done updating shared state of the lock itself. */
    1786     1249850 :     LWLockWaitListUnlock(lock);
    1787             : 
    1788             :     /*
    1789             :      * Awaken any waiters I removed from the queue.
    1790             :      */
    1791     1250098 :     proclist_foreach_modify(iter, &wakeup, lwWaitLink)
    1792             :     {
    1793         248 :         PGPROC     *waiter = GetPGProcByNumber(iter.cur);
    1794             : 
    1795         248 :         proclist_delete(&wakeup, iter.cur, lwWaitLink);
    1796             :         /* check comment in LWLockWakeup() about this barrier */
    1797         248 :         pg_write_barrier();
    1798         248 :         waiter->lwWaiting = LW_WS_NOT_WAITING;
    1799         248 :         PGSemaphoreUnlock(waiter->sem);
    1800             :     }
    1801     1249850 : }
    1802             : 
    1803             : 
    1804             : /*
    1805             :  * LWLockRelease - release a previously acquired lock
    1806             :  */
    1807             : void
    1808   359873078 : LWLockRelease(LWLock *lock)
    1809             : {
    1810             :     LWLockMode  mode;
    1811             :     uint32      oldstate;
    1812             :     bool        check_waiters;
    1813             :     int         i;
    1814             : 
    1815             :     /*
    1816             :      * Remove lock from list of locks held.  Usually, but not always, it will
    1817             :      * be the latest-acquired lock; so search array backwards.
    1818             :      */
    1819   397300666 :     for (i = num_held_lwlocks; --i >= 0;)
    1820   397300666 :         if (lock == held_lwlocks[i].lock)
    1821   359873078 :             break;
    1822             : 
    1823   359873078 :     if (i < 0)
    1824           0 :         elog(ERROR, "lock %s is not held", T_NAME(lock));
    1825             : 
    1826   359873078 :     mode = held_lwlocks[i].mode;
    1827             : 
    1828   359873078 :     num_held_lwlocks--;
    1829   397300666 :     for (; i < num_held_lwlocks; i++)
    1830    37427588 :         held_lwlocks[i] = held_lwlocks[i + 1];
    1831             : 
    1832             :     PRINT_LWDEBUG("LWLockRelease", lock, mode);
    1833             : 
    1834             :     /*
    1835             :      * Release my hold on lock, after that it can immediately be acquired by
    1836             :      * others, even if we still have to wakeup other waiters.
    1837             :      */
    1838   359873078 :     if (mode == LW_EXCLUSIVE)
    1839   157153000 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    1840             :     else
    1841   202720078 :         oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);
    1842             : 
    1843             :     /* nobody else can have that kind of lock */
    1844             :     Assert(!(oldstate & LW_VAL_EXCLUSIVE));
    1845             : 
    1846             :     if (TRACE_POSTGRESQL_LWLOCK_RELEASE_ENABLED())
    1847             :         TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
    1848             : 
    1849             :     /*
    1850             :      * We're still waiting for backends to get scheduled, don't wake them up
    1851             :      * again.
    1852             :      */
    1853   359873078 :     if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
    1854       89800 :         (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
    1855       89800 :         (oldstate & LW_LOCK_MASK) == 0)
    1856       56932 :         check_waiters = true;
    1857             :     else
    1858   359816146 :         check_waiters = false;
    1859             : 
    1860             :     /*
    1861             :      * As waking up waiters requires the spinlock to be acquired, only do so
    1862             :      * if necessary.
    1863             :      */
    1864   359873078 :     if (check_waiters)
    1865             :     {
    1866             :         /* XXX: remove before commit? */
    1867             :         LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
    1868       56932 :         LWLockWakeup(lock);
    1869             :     }
    1870             : 
    1871             :     /*
    1872             :      * Now okay to allow cancel/die interrupts.
    1873             :      */
    1874   359873078 :     RESUME_INTERRUPTS();
    1875   359873078 : }
    1876             : 
    1877             : /*
    1878             :  * LWLockReleaseClearVar - release a previously acquired lock, reset variable
    1879             :  */
    1880             : void
    1881    24718748 : LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
    1882             : {
    1883             :     /*
    1884             :      * Note that pg_atomic_exchange_u64 is a full barrier, so we're guaranteed
    1885             :      * that the variable is updated before releasing the lock.
    1886             :      */
    1887    24718748 :     pg_atomic_exchange_u64(valptr, val);
    1888             : 
    1889    24718748 :     LWLockRelease(lock);
    1890    24718748 : }
    1891             : 
    1892             : 
    1893             : /*
    1894             :  * LWLockReleaseAll - release all currently-held locks
    1895             :  *
    1896             :  * Used to clean up after ereport(ERROR). An important difference between this
    1897             :  * function and retail LWLockRelease calls is that InterruptHoldoffCount is
    1898             :  * unchanged by this operation.  This is necessary since InterruptHoldoffCount
    1899             :  * has been set to an appropriate level earlier in error recovery. We could
    1900             :  * decrement it below zero if we allow it to drop for each released lock!
    1901             :  */
    1902             : void
    1903       82122 : LWLockReleaseAll(void)
    1904             : {
    1905       82504 :     while (num_held_lwlocks > 0)
    1906             :     {
    1907         382 :         HOLD_INTERRUPTS();      /* match the upcoming RESUME_INTERRUPTS */
    1908             : 
    1909         382 :         LWLockRelease(held_lwlocks[num_held_lwlocks - 1].lock);
    1910             :     }
    1911       82122 : }
    1912             : 
    1913             : 
    1914             : /*
    1915             :  * LWLockHeldByMe - test whether my process holds a lock in any mode
    1916             :  *
    1917             :  * This is meant as debug support only.
    1918             :  */
    1919             : bool
    1920           0 : LWLockHeldByMe(LWLock *lock)
    1921             : {
    1922             :     int         i;
    1923             : 
    1924           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1925             :     {
    1926           0 :         if (held_lwlocks[i].lock == lock)
    1927           0 :             return true;
    1928             :     }
    1929           0 :     return false;
    1930             : }
    1931             : 
    1932             : /*
    1933             :  * LWLockHeldByMe - test whether my process holds any of an array of locks
    1934             :  *
    1935             :  * This is meant as debug support only.
    1936             :  */
    1937             : bool
    1938           0 : LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride)
    1939             : {
    1940             :     char       *held_lock_addr;
    1941             :     char       *begin;
    1942             :     char       *end;
    1943             :     int         i;
    1944             : 
    1945           0 :     begin = (char *) lock;
    1946           0 :     end = begin + nlocks * stride;
    1947           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1948             :     {
    1949           0 :         held_lock_addr = (char *) held_lwlocks[i].lock;
    1950           0 :         if (held_lock_addr >= begin &&
    1951           0 :             held_lock_addr < end &&
    1952           0 :             (held_lock_addr - begin) % stride == 0)
    1953           0 :             return true;
    1954             :     }
    1955           0 :     return false;
    1956             : }
    1957             : 
    1958             : /*
    1959             :  * LWLockHeldByMeInMode - test whether my process holds a lock in given mode
    1960             :  *
    1961             :  * This is meant as debug support only.
    1962             :  */
    1963             : bool
    1964           0 : LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode)
    1965             : {
    1966             :     int         i;
    1967             : 
    1968           0 :     for (i = 0; i < num_held_lwlocks; i++)
    1969             :     {
    1970           0 :         if (held_lwlocks[i].lock == lock && held_lwlocks[i].mode == mode)
    1971           0 :             return true;
    1972             :     }
    1973           0 :     return false;
    1974             : }

Generated by: LCOV version 1.14