LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - s_lock.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 84.2 % 38 32
Test Date: 2026-02-28 08:14:42 Functions: 83.3 % 6 5
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * s_lock.c
       4              :  *     Implementation of spinlocks.
       5              :  *
       6              :  * When waiting for a contended spinlock we loop tightly for awhile, then
       7              :  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
       8              :  * small multiple of the maximum time we expect a spinlock to be held.  100
       9              :  * iterations seems about right as an initial guess.  However, on a
      10              :  * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario
      11              :  * it's usually better to spin a bit longer than to call the kernel, so we try
      12              :  * to adapt the spin loop count depending on whether we seem to be in a
      13              :  * uniprocessor or multiprocessor.
      14              :  *
      15              :  * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd
      16              :  * be wrong; there are platforms where that can result in a "stuck
      17              :  * spinlock" failure.  This has been seen particularly on Alphas; it seems
      18              :  * that the first TAS after returning from kernel space will always fail
      19              :  * on that hardware.
      20              :  *
      21              :  * Once we do decide to block, we use randomly increasing pg_usleep()
      22              :  * delays. The first delay is 1 msec, then the delay randomly increases to
      23              :  * about one second, after which we reset to 1 msec and start again.  The
      24              :  * idea here is that in the presence of heavy contention we need to
      25              :  * increase the delay, else the spinlock holder may never get to run and
      26              :  * release the lock.  (Consider situation where spinlock holder has been
      27              :  * nice'd down in priority by the scheduler --- it will not get scheduled
      28              :  * until all would-be acquirers are sleeping, so if we always use a 1-msec
      29              :  * sleep, there is a real possibility of starvation.)  But we can't just
      30              :  * clamp the delay to an upper bound, else it would take a long time to
      31              :  * make a reasonable number of tries.
      32              :  *
      33              :  * We time out and declare error after NUM_DELAYS delays (thus, exactly
      34              :  * that many tries).  With the given settings, this will usually take 2 or
      35              :  * so minutes.  It seems better to fix the total number of tries (and thus
      36              :  * the probability of unintended failure) than to fix the total time
      37              :  * spent.
      38              :  *
      39              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      40              :  * Portions Copyright (c) 1994, Regents of the University of California
      41              :  *
      42              :  *
      43              :  * IDENTIFICATION
      44              :  *    src/backend/storage/lmgr/s_lock.c
      45              :  *
      46              :  *-------------------------------------------------------------------------
      47              :  */
      48              : #include "postgres.h"
      49              : 
      50              : #include <time.h>
      51              : #include <unistd.h>
      52              : 
      53              : #include "common/pg_prng.h"
      54              : #include "storage/s_lock.h"
      55              : #include "utils/wait_event.h"
      56              : 
      57              : #define MIN_SPINS_PER_DELAY 10
      58              : #define MAX_SPINS_PER_DELAY 1000
      59              : #define NUM_DELAYS          1000
      60              : #define MIN_DELAY_USEC      1000L
      61              : #define MAX_DELAY_USEC      1000000L
      62              : 
      63              : #ifdef S_LOCK_TEST
      64              : /*
      65              :  * These are needed by pgstat_report_wait_start in the standalone compile of
      66              :  * s_lock_test.
      67              :  */
      68              : static uint32 local_my_wait_event_info;
      69              : uint32     *my_wait_event_info = &local_my_wait_event_info;
      70              : #endif
      71              : 
      72              : static int  spins_per_delay = DEFAULT_SPINS_PER_DELAY;
      73              : 
      74              : 
      75              : /*
      76              :  * s_lock_stuck() - complain about a stuck spinlock
      77              :  */
      78              : static void
      79            0 : s_lock_stuck(const char *file, int line, const char *func)
      80              : {
      81            0 :     if (!func)
      82            0 :         func = "(unknown)";
      83              : #if defined(S_LOCK_TEST)
      84              :     fprintf(stderr,
      85              :             "\nStuck spinlock detected at %s, %s:%d.\n",
      86              :             func, file, line);
      87              :     exit(1);
      88              : #else
      89            0 :     elog(PANIC, "stuck spinlock detected at %s, %s:%d",
      90              :          func, file, line);
      91              : #endif
      92              : }
      93              : 
      94              : /*
      95              :  * s_lock(lock) - platform-independent portion of waiting for a spinlock.
      96              :  */
      97              : int
      98         8123 : s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
      99              : {
     100              :     SpinDelayStatus delayStatus;
     101              : 
     102         8123 :     init_spin_delay(&delayStatus, file, line, func);
     103              : 
     104       204438 :     while (TAS_SPIN(lock))
     105              :     {
     106       196315 :         perform_spin_delay(&delayStatus);
     107              :     }
     108              : 
     109         8123 :     finish_spin_delay(&delayStatus);
     110              : 
     111         8123 :     return delayStatus.delays;
     112              : }
     113              : 
     114              : #ifdef USE_DEFAULT_S_UNLOCK
     115              : void
     116              : s_unlock(volatile slock_t *lock)
     117              : {
     118              :     *lock = 0;
     119              : }
     120              : #endif
     121              : 
     122              : /*
     123              :  * Wait while spinning on a contended spinlock.
     124              :  */
     125              : void
     126       369721 : perform_spin_delay(SpinDelayStatus *status)
     127              : {
     128              :     /* CPU-specific delay each time through the loop */
     129       369721 :     SPIN_DELAY();
     130              : 
     131              :     /* Block the process every spins_per_delay tries */
     132       369721 :     if (++(status->spins) >= spins_per_delay)
     133              :     {
     134          331 :         if (++(status->delays) > NUM_DELAYS)
     135            0 :             s_lock_stuck(status->file, status->line, status->func);
     136              : 
     137          331 :         if (status->cur_delay == 0) /* first time to delay? */
     138          181 :             status->cur_delay = MIN_DELAY_USEC;
     139              : 
     140              :         /*
     141              :          * Once we start sleeping, the overhead of reporting a wait event is
     142              :          * justified. Actively spinning easily stands out in profilers, but
     143              :          * sleeping with an exponential backoff is harder to spot...
     144              :          *
     145              :          * We might want to report something more granular at some point, but
     146              :          * this is better than nothing.
     147              :          */
     148          331 :         pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
     149          331 :         pg_usleep(status->cur_delay);
     150          331 :         pgstat_report_wait_end();
     151              : 
     152              : #if defined(S_LOCK_TEST)
     153              :         fprintf(stdout, "*");
     154              :         fflush(stdout);
     155              : #endif
     156              : 
     157              :         /* increase delay by a random fraction between 1X and 2X */
     158          662 :         status->cur_delay += (int) (status->cur_delay *
     159          331 :                                     pg_prng_double(&pg_global_prng_state) + 0.5);
     160              :         /* wrap back to minimum delay when max is exceeded */
     161          331 :         if (status->cur_delay > MAX_DELAY_USEC)
     162            0 :             status->cur_delay = MIN_DELAY_USEC;
     163              : 
     164          331 :         status->spins = 0;
     165              :     }
     166       369721 : }
     167              : 
     168              : /*
     169              :  * After acquiring a spinlock, update estimates about how long to loop.
     170              :  *
     171              :  * If we were able to acquire the lock without delaying, it's a good
     172              :  * indication we are in a multiprocessor.  If we had to delay, it's a sign
     173              :  * (but not a sure thing) that we are in a uniprocessor. Hence, we
     174              :  * decrement spins_per_delay slowly when we had to delay, and increase it
     175              :  * rapidly when we didn't.  It's expected that spins_per_delay will
     176              :  * converge to the minimum value on a uniprocessor and to the maximum
     177              :  * value on a multiprocessor.
     178              :  *
     179              :  * Note: spins_per_delay is local within our current process. We want to
     180              :  * average these observations across multiple backends, since it's
     181              :  * relatively rare for this function to even get entered, and so a single
     182              :  * backend might not live long enough to converge on a good value.  That
     183              :  * is handled by the two routines below.
     184              :  */
     185              : void
     186        53284 : finish_spin_delay(SpinDelayStatus *status)
     187              : {
     188        53284 :     if (status->cur_delay == 0)
     189              :     {
     190              :         /* we never had to delay */
     191        53103 :         if (spins_per_delay < MAX_SPINS_PER_DELAY)
     192         3342 :             spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY);
     193              :     }
     194              :     else
     195              :     {
     196          181 :         if (spins_per_delay > MIN_SPINS_PER_DELAY)
     197          181 :             spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY);
     198              :     }
     199        53284 : }
     200              : 
     201              : /*
     202              :  * Set local copy of spins_per_delay during backend startup.
     203              :  *
     204              :  * NB: this has to be pretty fast as it is called while holding a spinlock
     205              :  */
     206              : void
     207        23420 : set_spins_per_delay(int shared_spins_per_delay)
     208              : {
     209        23420 :     spins_per_delay = shared_spins_per_delay;
     210        23420 : }
     211              : 
     212              : /*
     213              :  * Update shared estimate of spins_per_delay during backend exit.
     214              :  *
     215              :  * NB: this has to be pretty fast as it is called while holding a spinlock
     216              :  */
     217              : int
     218        23417 : update_spins_per_delay(int shared_spins_per_delay)
     219              : {
     220              :     /*
     221              :      * We use an exponential moving average with a relatively slow adaption
     222              :      * rate, so that noise in any one backend's result won't affect the shared
     223              :      * value too much.  As long as both inputs are within the allowed range,
     224              :      * the result must be too, so we need not worry about clamping the result.
     225              :      *
     226              :      * We deliberately truncate rather than rounding; this is so that single
     227              :      * adjustments inside a backend can affect the shared estimate (see the
     228              :      * asymmetric adjustment rules above).
     229              :      */
     230        23417 :     return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
     231              : }
     232              : 
     233              : 
     234              : /*****************************************************************************/
     235              : #if defined(S_LOCK_TEST)
     236              : 
     237              : /*
     238              :  * test program for verifying a port's spinlock support.
     239              :  */
     240              : 
     241              : struct test_lock_struct
     242              : {
     243              :     char        pad1;
     244              :     slock_t     lock;
     245              :     char        pad2;
     246              : };
     247              : 
     248              : volatile struct test_lock_struct test_lock;
     249              : 
     250              : int
     251              : main()
     252              : {
     253              :     pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
     254              : 
     255              :     test_lock.pad1 = test_lock.pad2 = 0x44;
     256              : 
     257              :     S_INIT_LOCK(&test_lock.lock);
     258              : 
     259              :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     260              :     {
     261              :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     262              :         return 1;
     263              :     }
     264              : 
     265              :     S_LOCK(&test_lock.lock);
     266              : 
     267              :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     268              :     {
     269              :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     270              :         return 1;
     271              :     }
     272              : 
     273              :     S_UNLOCK(&test_lock.lock);
     274              : 
     275              :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     276              :     {
     277              :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     278              :         return 1;
     279              :     }
     280              : 
     281              :     S_LOCK(&test_lock.lock);
     282              : 
     283              :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     284              :     {
     285              :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     286              :         return 1;
     287              :     }
     288              : 
     289              :     printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
     290              :     printf("             exit with a 'stuck spinlock' message\n");
     291              :     printf("             if S_LOCK() and TAS() are working.\n");
     292              :     fflush(stdout);
     293              : 
     294              :     s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
     295              : 
     296              :     printf("S_LOCK_TEST: failed, lock not locked\n");
     297              :     return 1;
     298              : }
     299              : 
     300              : #endif                          /* S_LOCK_TEST */
        

Generated by: LCOV version 2.0-1