LCOV - code coverage report
Current view: top level - src/backend/storage/lmgr - s_lock.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 32 38 84.2 %
Date: 2024-04-19 02:11:33 Functions: 5 6 83.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * s_lock.c
       4             :  *     Hardware-dependent implementation of spinlocks.
       5             :  *
       6             :  * When waiting for a contended spinlock we loop tightly for awhile, then
       7             :  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
       8             :  * small multiple of the maximum time we expect a spinlock to be held.  100
       9             :  * iterations seems about right as an initial guess.  However, on a
      10             :  * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario
      11             :  * it's usually better to spin a bit longer than to call the kernel, so we try
      12             :  * to adapt the spin loop count depending on whether we seem to be in a
      13             :  * uniprocessor or multiprocessor.
      14             :  *
      15             :  * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd
      16             :  * be wrong; there are platforms where that can result in a "stuck
      17             :  * spinlock" failure.  This has been seen particularly on Alphas; it seems
      18             :  * that the first TAS after returning from kernel space will always fail
      19             :  * on that hardware.
      20             :  *
      21             :  * Once we do decide to block, we use randomly increasing pg_usleep()
      22             :  * delays. The first delay is 1 msec, then the delay randomly increases to
      23             :  * about one second, after which we reset to 1 msec and start again.  The
      24             :  * idea here is that in the presence of heavy contention we need to
      25             :  * increase the delay, else the spinlock holder may never get to run and
      26             :  * release the lock.  (Consider situation where spinlock holder has been
      27             :  * nice'd down in priority by the scheduler --- it will not get scheduled
      28             :  * until all would-be acquirers are sleeping, so if we always use a 1-msec
      29             :  * sleep, there is a real possibility of starvation.)  But we can't just
      30             :  * clamp the delay to an upper bound, else it would take a long time to
      31             :  * make a reasonable number of tries.
      32             :  *
      33             :  * We time out and declare error after NUM_DELAYS delays (thus, exactly
      34             :  * that many tries).  With the given settings, this will usually take 2 or
      35             :  * so minutes.  It seems better to fix the total number of tries (and thus
      36             :  * the probability of unintended failure) than to fix the total time
      37             :  * spent.
      38             :  *
      39             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      40             :  * Portions Copyright (c) 1994, Regents of the University of California
      41             :  *
      42             :  *
      43             :  * IDENTIFICATION
      44             :  *    src/backend/storage/lmgr/s_lock.c
      45             :  *
      46             :  *-------------------------------------------------------------------------
      47             :  */
      48             : #include "postgres.h"
      49             : 
      50             : #include <time.h>
      51             : #include <unistd.h>
      52             : 
      53             : #include "common/pg_prng.h"
      54             : #include "port/atomics.h"
      55             : #include "storage/s_lock.h"
      56             : #include "utils/wait_event.h"
      57             : 
      58             : #define MIN_SPINS_PER_DELAY 10
      59             : #define MAX_SPINS_PER_DELAY 1000
      60             : #define NUM_DELAYS          1000
      61             : #define MIN_DELAY_USEC      1000L
      62             : #define MAX_DELAY_USEC      1000000L
      63             : 
      64             : #ifdef S_LOCK_TEST
      65             : /*
      66             :  * These are needed by pgstat_report_wait_start in the standalone compile of
      67             :  * s_lock_test.
      68             :  */
      69             : static uint32 local_my_wait_event_info;
      70             : uint32     *my_wait_event_info = &local_my_wait_event_info;
      71             : #endif
      72             : 
      73             : static int  spins_per_delay = DEFAULT_SPINS_PER_DELAY;
      74             : 
      75             : 
      76             : /*
      77             :  * s_lock_stuck() - complain about a stuck spinlock
      78             :  */
      79             : static void
      80           0 : s_lock_stuck(const char *file, int line, const char *func)
      81             : {
      82           0 :     if (!func)
      83           0 :         func = "(unknown)";
      84             : #if defined(S_LOCK_TEST)
      85             :     fprintf(stderr,
      86             :             "\nStuck spinlock detected at %s, %s:%d.\n",
      87             :             func, file, line);
      88             :     exit(1);
      89             : #else
      90           0 :     elog(PANIC, "stuck spinlock detected at %s, %s:%d",
      91             :          func, file, line);
      92             : #endif
      93             : }
      94             : 
      95             : /*
      96             :  * s_lock(lock) - platform-independent portion of waiting for a spinlock.
      97             :  */
      98             : int
      99       12604 : s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
     100             : {
     101             :     SpinDelayStatus delayStatus;
     102             : 
     103       12604 :     init_spin_delay(&delayStatus, file, line, func);
     104             : 
     105      199504 :     while (TAS_SPIN(lock))
     106             :     {
     107      186900 :         perform_spin_delay(&delayStatus);
     108             :     }
     109             : 
     110       12604 :     finish_spin_delay(&delayStatus);
     111             : 
     112       12604 :     return delayStatus.delays;
     113             : }
     114             : 
     115             : #ifdef USE_DEFAULT_S_UNLOCK
     116             : void
     117             : s_unlock(volatile slock_t *lock)
     118             : {
     119             : #ifdef TAS_ACTIVE_WORD
     120             :     /* HP's PA-RISC */
     121             :     *TAS_ACTIVE_WORD(lock) = -1;
     122             : #else
     123             :     *lock = 0;
     124             : #endif
     125             : }
     126             : #endif
     127             : 
     128             : /*
     129             :  * Wait while spinning on a contended spinlock.
     130             :  */
     131             : void
     132      204286 : perform_spin_delay(SpinDelayStatus *status)
     133             : {
     134             :     /* CPU-specific delay each time through the loop */
     135      204286 :     SPIN_DELAY();
     136             : 
     137             :     /* Block the process every spins_per_delay tries */
     138      204286 :     if (++(status->spins) >= spins_per_delay)
     139             :     {
     140         198 :         if (++(status->delays) > NUM_DELAYS)
     141           0 :             s_lock_stuck(status->file, status->line, status->func);
     142             : 
     143         198 :         if (status->cur_delay == 0) /* first time to delay? */
     144          90 :             status->cur_delay = MIN_DELAY_USEC;
     145             : 
     146             :         /*
     147             :          * Once we start sleeping, the overhead of reporting a wait event is
     148             :          * justified. Actively spinning easily stands out in profilers, but
     149             :          * sleeping with an exponential backoff is harder to spot...
     150             :          *
     151             :          * We might want to report something more granular at some point, but
     152             :          * this is better than nothing.
     153             :          */
     154         198 :         pgstat_report_wait_start(WAIT_EVENT_SPIN_DELAY);
     155         198 :         pg_usleep(status->cur_delay);
     156         198 :         pgstat_report_wait_end();
     157             : 
     158             : #if defined(S_LOCK_TEST)
     159             :         fprintf(stdout, "*");
     160             :         fflush(stdout);
     161             : #endif
     162             : 
     163             :         /* increase delay by a random fraction between 1X and 2X */
     164         396 :         status->cur_delay += (int) (status->cur_delay *
     165         198 :                                     pg_prng_double(&pg_global_prng_state) + 0.5);
     166             :         /* wrap back to minimum delay when max is exceeded */
     167         198 :         if (status->cur_delay > MAX_DELAY_USEC)
     168           0 :             status->cur_delay = MIN_DELAY_USEC;
     169             : 
     170         198 :         status->spins = 0;
     171             :     }
     172      204286 : }
     173             : 
     174             : /*
     175             :  * After acquiring a spinlock, update estimates about how long to loop.
     176             :  *
     177             :  * If we were able to acquire the lock without delaying, it's a good
     178             :  * indication we are in a multiprocessor.  If we had to delay, it's a sign
     179             :  * (but not a sure thing) that we are in a uniprocessor. Hence, we
     180             :  * decrement spins_per_delay slowly when we had to delay, and increase it
     181             :  * rapidly when we didn't.  It's expected that spins_per_delay will
     182             :  * converge to the minimum value on a uniprocessor and to the maximum
     183             :  * value on a multiprocessor.
     184             :  *
     185             :  * Note: spins_per_delay is local within our current process. We want to
     186             :  * average these observations across multiple backends, since it's
     187             :  * relatively rare for this function to even get entered, and so a single
     188             :  * backend might not live long enough to converge on a good value.  That
     189             :  * is handled by the two routines below.
     190             :  */
     191             : void
     192    47369574 : finish_spin_delay(SpinDelayStatus *status)
     193             : {
     194    47369574 :     if (status->cur_delay == 0)
     195             :     {
     196             :         /* we never had to delay */
     197    47369484 :         if (spins_per_delay < MAX_SPINS_PER_DELAY)
     198       91994 :             spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY);
     199             :     }
     200             :     else
     201             :     {
     202          90 :         if (spins_per_delay > MIN_SPINS_PER_DELAY)
     203          90 :             spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY);
     204             :     }
     205    47369574 : }
     206             : 
     207             : /*
     208             :  * Set local copy of spins_per_delay during backend startup.
     209             :  *
     210             :  * NB: this has to be pretty fast as it is called while holding a spinlock
     211             :  */
     212             : void
     213       29864 : set_spins_per_delay(int shared_spins_per_delay)
     214             : {
     215       29864 :     spins_per_delay = shared_spins_per_delay;
     216       29864 : }
     217             : 
     218             : /*
     219             :  * Update shared estimate of spins_per_delay during backend exit.
     220             :  *
     221             :  * NB: this has to be pretty fast as it is called while holding a spinlock
     222             :  */
     223             : int
     224       29860 : update_spins_per_delay(int shared_spins_per_delay)
     225             : {
     226             :     /*
     227             :      * We use an exponential moving average with a relatively slow adaption
     228             :      * rate, so that noise in any one backend's result won't affect the shared
     229             :      * value too much.  As long as both inputs are within the allowed range,
     230             :      * the result must be too, so we need not worry about clamping the result.
     231             :      *
     232             :      * We deliberately truncate rather than rounding; this is so that single
     233             :      * adjustments inside a backend can affect the shared estimate (see the
     234             :      * asymmetric adjustment rules above).
     235             :      */
     236       29860 :     return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
     237             : }
     238             : 
     239             : 
     240             : /*****************************************************************************/
     241             : #if defined(S_LOCK_TEST)
     242             : 
     243             : /*
     244             :  * test program for verifying a port's spinlock support.
     245             :  */
     246             : 
     247             : struct test_lock_struct
     248             : {
     249             :     char        pad1;
     250             :     slock_t     lock;
     251             :     char        pad2;
     252             : };
     253             : 
     254             : volatile struct test_lock_struct test_lock;
     255             : 
     256             : int
     257             : main()
     258             : {
     259             :     pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
     260             : 
     261             :     test_lock.pad1 = test_lock.pad2 = 0x44;
     262             : 
     263             :     S_INIT_LOCK(&test_lock.lock);
     264             : 
     265             :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     266             :     {
     267             :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     268             :         return 1;
     269             :     }
     270             : 
     271             :     if (!S_LOCK_FREE(&test_lock.lock))
     272             :     {
     273             :         printf("S_LOCK_TEST: failed, lock not initialized\n");
     274             :         return 1;
     275             :     }
     276             : 
     277             :     S_LOCK(&test_lock.lock);
     278             : 
     279             :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     280             :     {
     281             :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     282             :         return 1;
     283             :     }
     284             : 
     285             :     if (S_LOCK_FREE(&test_lock.lock))
     286             :     {
     287             :         printf("S_LOCK_TEST: failed, lock not locked\n");
     288             :         return 1;
     289             :     }
     290             : 
     291             :     S_UNLOCK(&test_lock.lock);
     292             : 
     293             :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     294             :     {
     295             :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     296             :         return 1;
     297             :     }
     298             : 
     299             :     if (!S_LOCK_FREE(&test_lock.lock))
     300             :     {
     301             :         printf("S_LOCK_TEST: failed, lock not unlocked\n");
     302             :         return 1;
     303             :     }
     304             : 
     305             :     S_LOCK(&test_lock.lock);
     306             : 
     307             :     if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
     308             :     {
     309             :         printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
     310             :         return 1;
     311             :     }
     312             : 
     313             :     if (S_LOCK_FREE(&test_lock.lock))
     314             :     {
     315             :         printf("S_LOCK_TEST: failed, lock not re-locked\n");
     316             :         return 1;
     317             :     }
     318             : 
     319             :     printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
     320             :     printf("             exit with a 'stuck spinlock' message\n");
     321             :     printf("             if S_LOCK() and TAS() are working.\n");
     322             :     fflush(stdout);
     323             : 
     324             :     s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
     325             : 
     326             :     printf("S_LOCK_TEST: failed, lock not locked\n");
     327             :     return 1;
     328             : }
     329             : 
     330             : #endif                          /* S_LOCK_TEST */

Generated by: LCOV version 1.14