LCOV - code coverage report
Current view: top level - src/include/storage - s_lock.h (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 15 15 100.0 %
Date: 2021-12-05 02:08:31 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * s_lock.h
       4             :  *     Hardware-dependent implementation of spinlocks.
       5             :  *
       6             :  *  NOTE: none of the macros in this file are intended to be called directly.
       7             :  *  Call them through the hardware-independent macros in spin.h.
       8             :  *
       9             :  *  The following hardware-dependent macros must be provided for each
      10             :  *  supported platform:
      11             :  *
      12             :  *  void S_INIT_LOCK(slock_t *lock)
      13             :  *      Initialize a spinlock (to the unlocked state).
      14             :  *
      15             :  *  int S_LOCK(slock_t *lock)
      16             :  *      Acquire a spinlock, waiting if necessary.
      17             :  *      Time out and abort() if unable to acquire the lock in a
      18             :  *      "reasonable" amount of time --- typically ~ 1 minute.
      19             :  *      Should return number of "delays"; see s_lock.c
      20             :  *
      21             :  *  void S_UNLOCK(slock_t *lock)
      22             :  *      Unlock a previously acquired lock.
      23             :  *
      24             :  *  bool S_LOCK_FREE(slock_t *lock)
      25             :  *      Tests if the lock is free. Returns true if free, false if locked.
      26             :  *      This does *not* change the state of the lock.
      27             :  *
      28             :  *  void SPIN_DELAY(void)
      29             :  *      Delay operation to occur inside spinlock wait loop.
      30             :  *
      31             :  *  Note to implementors: there are default implementations for all these
      32             :  *  macros at the bottom of the file.  Check if your platform can use
      33             :  *  these or needs to override them.
      34             :  *
      35             :  *  Usually, S_LOCK() is implemented in terms of even lower-level macros
      36             :  *  TAS() and TAS_SPIN():
      37             :  *
      38             :  *  int TAS(slock_t *lock)
      39             :  *      Atomic test-and-set instruction.  Attempt to acquire the lock,
      40             :  *      but do *not* wait.  Returns 0 if successful, nonzero if unable
      41             :  *      to acquire the lock.
      42             :  *
      43             :  *  int TAS_SPIN(slock_t *lock)
      44             :  *      Like TAS(), but this version is used when waiting for a lock
      45             :  *      previously found to be contended.  By default, this is the
      46             :  *      same as TAS(), but on some architectures it's better to poll a
      47             :  *      contended lock using an unlocked instruction and retry the
      48             :  *      atomic test-and-set only when it appears free.
      49             :  *
      50             :  *  TAS() and TAS_SPIN() are NOT part of the API, and should never be called
      51             :  *  directly.
      52             :  *
      53             :  *  CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
      54             :  *  failure to acquire a lock even when the lock is not locked.  For example,
      55             :  *  on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
      56             :  *  always be used, even if you are certain the lock is free.
      57             :  *
      58             :  *  It is the responsibility of these macros to make sure that the compiler
      59             :  *  does not re-order accesses to shared memory to precede the actual lock
      60             :  *  acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
      61             :  *  was the caller's responsibility, which meant that callers had to use
      62             :  *  volatile-qualified pointers to refer to both the spinlock itself and the
      63             :  *  shared data being accessed within the spinlocked critical section.  This
      64             :  *  was notationally awkward, easy to forget (and thus error-prone), and
      65             :  *  prevented some useful compiler optimizations.  For these reasons, we
      66             :  *  now require that the macros themselves prevent compiler re-ordering,
      67             :  *  so that the caller doesn't need to take special precautions.
      68             :  *
      69             :  *  On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
      70             :  *  S_UNLOCK() macros must further include hardware-level memory fence
      71             :  *  instructions to prevent similar re-ordering at the hardware level.
      72             :  *  TAS() and TAS_SPIN() must guarantee that loads and stores issued after
      73             :  *  the macro are not executed until the lock has been obtained.  Conversely,
      74             :  *  S_UNLOCK() must guarantee that loads and stores issued before the macro
      75             :  *  have been executed before the lock is released.
      76             :  *
      77             :  *  On most supported platforms, TAS() uses a tas() function written
      78             :  *  in assembly language to execute a hardware atomic-test-and-set
      79             :  *  instruction.  Equivalent OS-supplied mutex routines could be used too.
      80             :  *
      81             :  *  If no system-specific TAS() is available (ie, HAVE_SPINLOCKS is not
      82             :  *  defined), then we fall back on an emulation that uses SysV semaphores
      83             :  *  (see spin.c).  This emulation will be MUCH MUCH slower than a proper TAS()
      84             :  *  implementation, because of the cost of a kernel call per lock or unlock.
      85             :  *  An old report is that Postgres spends around 40% of its time in semop(2)
      86             :  *  when using the SysV semaphore code.
      87             :  *
      88             :  *
      89             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
      90             :  * Portions Copyright (c) 1994, Regents of the University of California
      91             :  *
      92             :  *    src/include/storage/s_lock.h
      93             :  *
      94             :  *-------------------------------------------------------------------------
      95             :  */
      96             : #ifndef S_LOCK_H
      97             : #define S_LOCK_H
      98             : 
      99             : #ifdef FRONTEND
     100             : #error "s_lock.h may not be included from frontend code"
     101             : #endif
     102             : 
     103             : #ifdef HAVE_SPINLOCKS   /* skip spinlocks if requested */
     104             : 
     105             : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     106             : /*************************************************************************
     107             :  * All the gcc inlines
     108             :  * Gcc consistently defines the CPU as __cpu__.
     109             :  * Other compilers use __cpu or __cpu__ so we test for both in those cases.
     110             :  */
     111             : 
     112             : /*----------
     113             :  * Standard gcc asm format (assuming "volatile slock_t *lock"):
     114             : 
     115             :     __asm__ __volatile__(
     116             :         "  instruction \n"
     117             :         "  instruction \n"
     118             :         "  instruction \n"
     119             : :       "=r"(_res), "+m"(*lock)     // return register, in/out lock value
     120             : :       "r"(lock)                 // lock pointer, in input register
     121             : :       "memory", "cc");            // show clobbered registers here
     122             : 
     123             :  * The output-operands list (after first colon) should always include
     124             :  * "+m"(*lock), whether or not the asm code actually refers to this
     125             :  * operand directly.  This ensures that gcc believes the value in the
     126             :  * lock variable is used and set by the asm code.  Also, the clobbers
     127             :  * list (after third colon) should always include "memory"; this prevents
     128             :  * gcc from thinking it can cache the values of shared-memory fields
     129             :  * across the asm code.  Add "cc" if your asm code changes the condition
     130             :  * code register, and also list any temp registers the code uses.
     131             :  *----------
     132             :  */
     133             : 
     134             : 
     135             : #ifdef __i386__     /* 32-bit i386 */
     136             : #define HAS_TEST_AND_SET
     137             : 
     138             : typedef unsigned char slock_t;
     139             : 
     140             : #define TAS(lock) tas(lock)
     141             : 
     142             : static __inline__ int
     143             : tas(volatile slock_t *lock)
     144             : {
     145             :     register slock_t _res = 1;
     146             : 
     147             :     /*
     148             :      * Use a non-locking test before asserting the bus lock.  Note that the
     149             :      * extra test appears to be a small loss on some x86 platforms and a small
     150             :      * win on others; it's by no means clear that we should keep it.
     151             :      *
     152             :      * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
     153             :      * macros.  Nowadays it probably would be better to do a non-locking test
     154             :      * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
     155             :      * testing to verify that.  Without some empirical evidence, better to
     156             :      * leave it alone.
     157             :      */
     158             :     __asm__ __volatile__(
     159             :         "  cmpb    $0,%1   \n"
     160             :         "  jne     1f      \n"
     161             :         "  lock            \n"
     162             :         "  xchgb   %0,%1   \n"
     163             :         "1: \n"
     164             : :       "+q"(_res), "+m"(*lock)
     165             : :       /* no inputs */
     166             : :       "memory", "cc");
     167             :     return (int) _res;
     168             : }
     169             : 
     170             : #define SPIN_DELAY() spin_delay()
     171             : 
     172             : static __inline__ void
     173             : spin_delay(void)
     174             : {
     175             :     /*
     176             :      * This sequence is equivalent to the PAUSE instruction ("rep" is
     177             :      * ignored by old IA32 processors if the following instruction is
     178             :      * not a string operation); the IA-32 Architecture Software
     179             :      * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
     180             :      * PAUSE in the inner loop of a spin lock is necessary for good
     181             :      * performance:
     182             :      *
     183             :      *     The PAUSE instruction improves the performance of IA-32
     184             :      *     processors supporting Hyper-Threading Technology when
     185             :      *     executing spin-wait loops and other routines where one
     186             :      *     thread is accessing a shared lock or semaphore in a tight
     187             :      *     polling loop. When executing a spin-wait loop, the
     188             :      *     processor can suffer a severe performance penalty when
     189             :      *     exiting the loop because it detects a possible memory order
     190             :      *     violation and flushes the core processor's pipeline. The
     191             :      *     PAUSE instruction provides a hint to the processor that the
     192             :      *     code sequence is a spin-wait loop. The processor uses this
     193             :      *     hint to avoid the memory order violation and prevent the
     194             :      *     pipeline flush. In addition, the PAUSE instruction
     195             :      *     de-pipelines the spin-wait loop to prevent it from
     196             :      *     consuming execution resources excessively.
     197             :      */
     198             :     __asm__ __volatile__(
     199             :         " rep; nop         \n");
     200             : }
     201             : 
     202             : #endif   /* __i386__ */
     203             : 
     204             : 
     205             : #ifdef __x86_64__       /* AMD Opteron, Intel EM64T */
     206             : #define HAS_TEST_AND_SET
     207             : 
     208             : typedef unsigned char slock_t;
     209             : 
     210             : #define TAS(lock) tas(lock)
     211             : 
     212             : /*
     213             :  * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
     214             :  * but only when spinning.
     215             :  *
     216             :  * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
     217             :  * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
     218             :  * available at:
     219             :  * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
     220             :  */
     221             : #define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
     222             : 
     223             : static __inline__ int
     224    74015026 : tas(volatile slock_t *lock)
     225             : {
     226    74015026 :     register slock_t _res = 1;
     227             : 
     228    74015026 :     __asm__ __volatile__(
     229             :         "  lock            \n"
     230             :         "  xchgb   %0,%1   \n"
     231             : :       "+q"(_res), "+m"(*lock)
     232             : :       /* no inputs */
     233             : :       "memory", "cc");
     234    74015026 :     return (int) _res;
     235             : }
     236             : 
     237             : #define SPIN_DELAY() spin_delay()
     238             : 
     239             : static __inline__ void
     240      109652 : spin_delay(void)
     241             : {
     242             :     /*
     243             :      * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
     244             :      * Opteron, but it may be of some use on EM64T, so we keep it.
     245             :      */
     246      109652 :     __asm__ __volatile__(
     247             :         " rep; nop         \n");
     248      109652 : }
     249             : 
     250             : #endif   /* __x86_64__ */
     251             : 
     252             : 
     253             : #if defined(__ia64__) || defined(__ia64)
     254             : /*
     255             :  * Intel Itanium, gcc or Intel's compiler.
     256             :  *
     257             :  * Itanium has weak memory ordering, but we rely on the compiler to enforce
     258             :  * strict ordering of accesses to volatile data.  In particular, while the
     259             :  * xchg instruction implicitly acts as a memory barrier with 'acquire'
     260             :  * semantics, we do not have an explicit memory fence instruction in the
     261             :  * S_UNLOCK macro.  We use a regular assignment to clear the spinlock, and
     262             :  * trust that the compiler marks the generated store instruction with the
     263             :  * ".rel" opcode.
     264             :  *
     265             :  * Testing shows that assumption to hold on gcc, although I could not find
     266             :  * any explicit statement on that in the gcc manual.  In Intel's compiler,
     267             :  * the -m[no-]serialize-volatile option controls that, and testing shows that
     268             :  * it is enabled by default.
     269             :  *
     270             :  * While icc accepts gcc asm blocks on x86[_64], this is not true on ia64
     271             :  * (at least not in icc versions before 12.x).  So we have to carry a separate
     272             :  * compiler-intrinsic-based implementation for it.
     273             :  */
     274             : #define HAS_TEST_AND_SET
     275             : 
     276             : typedef unsigned int slock_t;
     277             : 
     278             : #define TAS(lock) tas(lock)
     279             : 
     280             : /* On IA64, it's a win to use a non-locking test before the xchg proper */
     281             : #define TAS_SPIN(lock)  (*(lock) ? 1 : TAS(lock))
     282             : 
     283             : #ifndef __INTEL_COMPILER
     284             : 
     285             : static __inline__ int
     286             : tas(volatile slock_t *lock)
     287             : {
     288             :     long int    ret;
     289             : 
     290             :     __asm__ __volatile__(
     291             :         "  xchg4   %0=%1,%2    \n"
     292             : :       "=r"(ret), "+m"(*lock)
     293             : :       "r"(1)
     294             : :       "memory");
     295             :     return (int) ret;
     296             : }
     297             : 
     298             : #else /* __INTEL_COMPILER */
     299             : 
     300             : static __inline__ int
     301             : tas(volatile slock_t *lock)
     302             : {
     303             :     int     ret;
     304             : 
     305             :     ret = _InterlockedExchange(lock,1); /* this is a xchg asm macro */
     306             : 
     307             :     return ret;
     308             : }
     309             : 
     310             : /* icc can't use the regular gcc S_UNLOCK() macro either in this case */
     311             : #define S_UNLOCK(lock)  \
     312             :     do { __memory_barrier(); *(lock) = 0; } while (0)
     313             : 
     314             : #endif /* __INTEL_COMPILER */
     315             : #endif   /* __ia64__ || __ia64 */
     316             : 
     317             : 
     318             : /*
     319             :  * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
     320             :  *
     321             :  * We use the int-width variant of the builtin because it works on more chips
     322             :  * than other widths.
     323             :  */
     324             : #if defined(__arm__) || defined(__arm) || defined(__aarch64__) || defined(__aarch64)
     325             : #ifdef HAVE_GCC__SYNC_INT32_TAS
     326             : #define HAS_TEST_AND_SET
     327             : 
     328             : #define TAS(lock) tas(lock)
     329             : 
     330             : typedef int slock_t;
     331             : 
     332             : static __inline__ int
     333             : tas(volatile slock_t *lock)
     334             : {
     335             :     return __sync_lock_test_and_set(lock, 1);
     336             : }
     337             : 
     338             : #define S_UNLOCK(lock) __sync_lock_release(lock)
     339             : 
     340             : #endif   /* HAVE_GCC__SYNC_INT32_TAS */
     341             : #endif   /* __arm__ || __arm || __aarch64__ || __aarch64 */
     342             : 
     343             : 
     344             : /*
     345             :  * RISC-V likewise uses __sync_lock_test_and_set(int *, int) if available.
     346             :  */
     347             : #if defined(__riscv)
     348             : #ifdef HAVE_GCC__SYNC_INT32_TAS
     349             : #define HAS_TEST_AND_SET
     350             : 
     351             : #define TAS(lock) tas(lock)
     352             : 
     353             : typedef int slock_t;
     354             : 
     355             : static __inline__ int
     356             : tas(volatile slock_t *lock)
     357             : {
     358             :     return __sync_lock_test_and_set(lock, 1);
     359             : }
     360             : 
     361             : #define S_UNLOCK(lock) __sync_lock_release(lock)
     362             : 
     363             : #endif   /* HAVE_GCC__SYNC_INT32_TAS */
     364             : #endif   /* __riscv */
     365             : 
     366             : 
     367             : /* S/390 and S/390x Linux (32- and 64-bit zSeries) */
     368             : #if defined(__s390__) || defined(__s390x__)
     369             : #define HAS_TEST_AND_SET
     370             : 
     371             : typedef unsigned int slock_t;
     372             : 
     373             : #define TAS(lock)      tas(lock)
     374             : 
     375             : static __inline__ int
     376             : tas(volatile slock_t *lock)
     377             : {
     378             :     int         _res = 0;
     379             : 
     380             :     __asm__ __volatile__(
     381             :         "  cs  %0,%3,0(%2)     \n"
     382             : :       "+d"(_res), "+m"(*lock)
     383             : :       "a"(lock), "d"(1)
     384             : :       "memory", "cc");
     385             :     return _res;
     386             : }
     387             : 
     388             : #endif   /* __s390__ || __s390x__ */
     389             : 
     390             : 
     391             : #if defined(__sparc__)      /* Sparc */
     392             : /*
     393             :  * Solaris has always run sparc processors in TSO (total store) mode, but
     394             :  * linux didn't use to and the *BSDs still don't. So, be careful about
     395             :  * acquire/release semantics. The CPU will treat superfluous membars as
     396             :  * NOPs, so it's just code space.
     397             :  */
     398             : #define HAS_TEST_AND_SET
     399             : 
     400             : typedef unsigned char slock_t;
     401             : 
     402             : #define TAS(lock) tas(lock)
     403             : 
     404             : static __inline__ int
     405             : tas(volatile slock_t *lock)
     406             : {
     407             :     register slock_t _res;
     408             : 
     409             :     /*
     410             :      *  See comment in src/backend/port/tas/sunstudio_sparc.s for why this
     411             :      *  uses "ldstub", and that file uses "cas".  gcc currently generates
     412             :      *  sparcv7-targeted binaries, so "cas" use isn't possible.
     413             :      */
     414             :     __asm__ __volatile__(
     415             :         "  ldstub  [%2], %0    \n"
     416             : :       "=r"(_res), "+m"(*lock)
     417             : :       "r"(lock)
     418             : :       "memory");
     419             : #if defined(__sparcv7) || defined(__sparc_v7__)
     420             :     /*
     421             :      * No stbar or membar available, luckily no actually produced hardware
     422             :      * requires a barrier.
     423             :      */
     424             : #elif defined(__sparcv8) || defined(__sparc_v8__)
     425             :     /* stbar is available (and required for both PSO, RMO), membar isn't */
     426             :     __asm__ __volatile__ ("stbar    \n":::"memory");
     427             : #else
     428             :     /*
     429             :      * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
     430             :      * barrier for sparcv8+ upwards.
     431             :      */
     432             :     __asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
     433             : #endif
     434             :     return (int) _res;
     435             : }
     436             : 
     437             : #if defined(__sparcv7) || defined(__sparc_v7__)
     438             : /*
     439             :  * No stbar or membar available, luckily no actually produced hardware
     440             :  * requires a barrier.  We fall through to the default gcc definition of
     441             :  * S_UNLOCK in this case.
     442             :  */
     443             : #elif defined(__sparcv8) || defined(__sparc_v8__)
     444             : /* stbar is available (and required for both PSO, RMO), membar isn't */
     445             : #define S_UNLOCK(lock)  \
     446             : do \
     447             : { \
     448             :     __asm__ __volatile__ ("stbar    \n":::"memory"); \
     449             :     *((volatile slock_t *) (lock)) = 0; \
     450             : } while (0)
     451             : #else
     452             : /*
     453             :  * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
     454             :  * release barrier for sparcv8+ upwards.
     455             :  */
     456             : #define S_UNLOCK(lock)  \
     457             : do \
     458             : { \
     459             :     __asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
     460             :     *((volatile slock_t *) (lock)) = 0; \
     461             : } while (0)
     462             : #endif
     463             : 
     464             : #endif   /* __sparc__ */
     465             : 
     466             : 
     467             : /* PowerPC */
     468             : #if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
     469             : #define HAS_TEST_AND_SET
     470             : 
     471             : typedef unsigned int slock_t;
     472             : 
     473             : #define TAS(lock) tas(lock)
     474             : 
     475             : /* On PPC, it's a win to use a non-locking test before the lwarx */
     476             : #define TAS_SPIN(lock)  (*(lock) ? 1 : TAS(lock))
     477             : 
     478             : /*
     479             :  * The second operand of addi can hold a constant zero or a register number,
     480             :  * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
     481             :  * base register"; most operands having this register-or-zero property are
     482             :  * address bases, e.g. the second operand of lwax.
     483             :  *
     484             :  * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
     485             :  * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
     486             :  * On newer machines, we can use lwsync instead for better performance.
     487             :  *
     488             :  * Ordinarily, we'd code the branches here using GNU-style local symbols, that
     489             :  * is "1f" referencing "1:" and so on.  But some people run gcc on AIX with
     490             :  * IBM's assembler as backend, and IBM's assembler doesn't do local symbols.
     491             :  * So hand-code the branch offsets; fortunately, all PPC instructions are
     492             :  * exactly 4 bytes each, so it's not too hard to count.
     493             :  */
     494             : static __inline__ int
     495             : tas(volatile slock_t *lock)
     496             : {
     497             :     slock_t _t;
     498             :     int _res;
     499             : 
     500             :     __asm__ __volatile__(
     501             : #ifdef USE_PPC_LWARX_MUTEX_HINT
     502             : "  lwarx   %0,0,%3,1   \n"
     503             : #else
     504             : "  lwarx   %0,0,%3     \n"
     505             : #endif
     506             : "  cmpwi   %0,0        \n"
     507             : "  bne     $+16        \n"        /* branch to li %1,1 */
     508             : "  addi    %0,%0,1     \n"
     509             : "  stwcx.  %0,0,%3     \n"
     510             : "  beq     $+12        \n"        /* branch to lwsync/isync */
     511             : "  li      %1,1        \n"
     512             : "  b       $+12        \n"        /* branch to end of asm sequence */
     513             : #ifdef USE_PPC_LWSYNC
     514             : "  lwsync              \n"
     515             : #else
     516             : "  isync               \n"
     517             : #endif
     518             : "  li      %1,0        \n"
     519             : 
     520             : :   "=&b"(_t), "=r"(_res), "+m"(*lock)
     521             : :   "r"(lock)
     522             : :   "memory", "cc");
     523             :     return _res;
     524             : }
     525             : 
     526             : /*
     527             :  * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
     528             :  * On newer machines, we can use lwsync instead for better performance.
     529             :  */
     530             : #ifdef USE_PPC_LWSYNC
     531             : #define S_UNLOCK(lock)  \
     532             : do \
     533             : { \
     534             :     __asm__ __volatile__ ("    lwsync \n" ::: "memory"); \
     535             :     *((volatile slock_t *) (lock)) = 0; \
     536             : } while (0)
     537             : #else
     538             : #define S_UNLOCK(lock)  \
     539             : do \
     540             : { \
     541             :     __asm__ __volatile__ ("    sync \n" ::: "memory"); \
     542             :     *((volatile slock_t *) (lock)) = 0; \
     543             : } while (0)
     544             : #endif /* USE_PPC_LWSYNC */
     545             : 
     546             : #endif /* powerpc */
     547             : 
     548             : 
     549             : /* Linux Motorola 68k */
     550             : #if (defined(__mc68000__) || defined(__m68k__)) && defined(__linux__)
     551             : #define HAS_TEST_AND_SET
     552             : 
     553             : typedef unsigned char slock_t;
     554             : 
     555             : #define TAS(lock) tas(lock)
     556             : 
     557             : static __inline__ int
     558             : tas(volatile slock_t *lock)
     559             : {
     560             :     register int rv;
     561             : 
     562             :     __asm__ __volatile__(
     563             :         "  clrl    %0      \n"
     564             :         "  tas     %1      \n"
     565             :         "  sne     %0      \n"
     566             : :       "=d"(rv), "+m"(*lock)
     567             : :       /* no inputs */
     568             : :       "memory", "cc");
     569             :     return rv;
     570             : }
     571             : 
     572             : #endif   /* (__mc68000__ || __m68k__) && __linux__ */
     573             : 
     574             : 
     575             : /* Motorola 88k */
     576             : #if defined(__m88k__)
     577             : #define HAS_TEST_AND_SET
     578             : 
     579             : typedef unsigned int slock_t;
     580             : 
     581             : #define TAS(lock) tas(lock)
     582             : 
     583             : static __inline__ int
     584             : tas(volatile slock_t *lock)
     585             : {
     586             :     register slock_t _res = 1;
     587             : 
     588             :     __asm__ __volatile__(
     589             :         "  xmem    %0, %2, %%r0    \n"
     590             : :       "+r"(_res), "+m"(*lock)
     591             : :       "r"(lock)
     592             : :       "memory");
     593             :     return (int) _res;
     594             : }
     595             : 
     596             : #endif   /* __m88k__ */
     597             : 
     598             : 
     599             : /*
     600             :  * VAXen -- even multiprocessor ones
     601             :  * (thanks to Tom Ivar Helbekkmo)
     602             :  */
     603             : #if defined(__vax__)
     604             : #define HAS_TEST_AND_SET
     605             : 
     606             : typedef unsigned char slock_t;
     607             : 
     608             : #define TAS(lock) tas(lock)
     609             : 
     610             : static __inline__ int
     611             : tas(volatile slock_t *lock)
     612             : {
     613             :     register int    _res;
     614             : 
     615             :     __asm__ __volatile__(
     616             :         "  movl    $1, %0          \n"
     617             :         "  bbssi   $0, (%2), 1f    \n"
     618             :         "  clrl    %0              \n"
     619             :         "1: \n"
     620             : :       "=&r"(_res), "+m"(*lock)
     621             : :       "r"(lock)
     622             : :       "memory");
     623             :     return _res;
     624             : }
     625             : 
     626             : #endif   /* __vax__ */
     627             : 
     628             : 
     629             : #if defined(__mips__) && !defined(__sgi)    /* non-SGI MIPS */
     630             : #define HAS_TEST_AND_SET
     631             : 
     632             : typedef unsigned int slock_t;
     633             : 
     634             : #define TAS(lock) tas(lock)
     635             : 
     636             : /*
     637             :  * Original MIPS-I processors lacked the LL/SC instructions, but if we are
     638             :  * so unfortunate as to be running on one of those, we expect that the kernel
     639             :  * will handle the illegal-instruction traps and emulate them for us.  On
     640             :  * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
     641             :  * choice because any other synchronization method must involve a kernel
     642             :  * call.  Unfortunately, many toolchains still default to MIPS-I as the
     643             :  * codegen target; if the symbol __mips shows that that's the case, we
     644             :  * have to force the assembler to accept LL/SC.
     645             :  *
     646             :  * R10000 and up processors require a separate SYNC, which has the same
     647             :  * issues as LL/SC.
     648             :  */
     649             : #if __mips < 2
     650             : #define MIPS_SET_MIPS2  "       .set mips2          \n"
     651             : #else
     652             : #define MIPS_SET_MIPS2
     653             : #endif
     654             : 
     655             : static __inline__ int
     656             : tas(volatile slock_t *lock)
     657             : {
     658             :     register volatile slock_t *_l = lock;
     659             :     register int _res;
     660             :     register int _tmp;
     661             : 
     662             :     __asm__ __volatile__(
     663             :         "       .set push           \n"
     664             :         MIPS_SET_MIPS2
     665             :         "       .set noreorder      \n"
     666             :         "       .set nomacro        \n"
     667             :         "       ll      %0, %2      \n"
     668             :         "       or      %1, %0, 1   \n"
     669             :         "       sc      %1, %2      \n"
     670             :         "       xori    %1, 1       \n"
     671             :         "       or      %0, %0, %1  \n"
     672             :         "       sync                \n"
     673             :         "       .set pop              "
     674             : :       "=&r" (_res), "=&r" (_tmp), "+R" (*_l)
     675             : :       /* no inputs */
     676             : :       "memory");
     677             :     return _res;
     678             : }
     679             : 
     680             : /* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
     681             : #define S_UNLOCK(lock)  \
     682             : do \
     683             : { \
     684             :     __asm__ __volatile__( \
     685             :         "       .set push           \n" \
     686             :         MIPS_SET_MIPS2 \
     687             :         "       .set noreorder      \n" \
     688             :         "       .set nomacro        \n" \
     689             :         "       sync                \n" \
     690             :         "       .set pop              " \
     691             : :       /* no outputs */ \
     692             : :       /* no inputs */ \
     693             : :       "memory"); \
     694             :     *((volatile slock_t *) (lock)) = 0; \
     695             : } while (0)
     696             : 
     697             : #endif /* __mips__ && !__sgi */
     698             : 
     699             : 
     700             : #if defined(__m32r__) && defined(HAVE_SYS_TAS_H)    /* Renesas' M32R */
     701             : #define HAS_TEST_AND_SET
     702             : 
     703             : #include <sys/tas.h>
     704             : 
     705             : typedef int slock_t;
     706             : 
     707             : #define TAS(lock) tas(lock)
     708             : 
     709             : #endif /* __m32r__ */
     710             : 
     711             : 
     712             : #if defined(__sh__)             /* Renesas' SuperH */
     713             : #define HAS_TEST_AND_SET
     714             : 
     715             : typedef unsigned char slock_t;
     716             : 
     717             : #define TAS(lock) tas(lock)
     718             : 
     719             : static __inline__ int
     720             : tas(volatile slock_t *lock)
     721             : {
     722             :     register int _res;
     723             : 
     724             :     /*
     725             :      * This asm is coded as if %0 could be any register, but actually SuperH
     726             :      * restricts the target of xor-immediate to be R0.  That's handled by
     727             :      * the "z" constraint on _res.
     728             :      */
     729             :     __asm__ __volatile__(
     730             :         "  tas.b @%2    \n"
     731             :         "  movt  %0     \n"
     732             :         "  xor   #1,%0  \n"
     733             : :       "=z"(_res), "+m"(*lock)
     734             : :       "r"(lock)
     735             : :       "memory", "t");
     736             :     return _res;
     737             : }
     738             : 
     739             : #endif   /* __sh__ */
     740             : 
     741             : 
     742             : /* These live in s_lock.c, but only for gcc */
     743             : 
     744             : 
     745             : #if defined(__m68k__) && !defined(__linux__)    /* non-Linux Motorola 68k */
     746             : #define HAS_TEST_AND_SET
     747             : 
     748             : typedef unsigned char slock_t;
     749             : #endif
     750             : 
     751             : /*
     752             :  * Default implementation of S_UNLOCK() for gcc/icc.
     753             :  *
     754             :  * Note that this implementation is unsafe for any platform that can reorder
     755             :  * a memory access (either load or store) after a following store.  That
     756             :  * happens not to be possible on x86 and most legacy architectures (some are
     757             :  * single-processor!), but many modern systems have weaker memory ordering.
     758             :  * Those that do must define their own version of S_UNLOCK() rather than
     759             :  * relying on this one.
     760             :  */
     761             : #if !defined(S_UNLOCK)
     762             : #define S_UNLOCK(lock)  \
     763             :     do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
     764             : #endif
     765             : 
     766             : #endif  /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
     767             : 
     768             : 
     769             : 
     770             : /*
     771             :  * ---------------------------------------------------------------------
     772             :  * Platforms that use non-gcc inline assembly:
     773             :  * ---------------------------------------------------------------------
     774             :  */
     775             : 
     776             : #if !defined(HAS_TEST_AND_SET)  /* We didn't trigger above, let's try here */
     777             : 
     778             : 
     779             : #if defined(__hppa) || defined(__hppa__)    /* HP PA-RISC, GCC and HP compilers */
     780             : /*
     781             :  * HP's PA-RISC
     782             :  *
     783             :  * See src/backend/port/hpux/tas.c.template for details about LDCWX.  Because
     784             :  * LDCWX requires a 16-byte-aligned address, we declare slock_t as a 16-byte
     785             :  * struct.  The active word in the struct is whichever has the aligned address;
     786             :  * the other three words just sit at -1.
     787             :  *
     788             :  * When using gcc, we can inline the required assembly code.
     789             :  */
     790             : #define HAS_TEST_AND_SET
     791             : 
     792             : typedef struct
     793             : {
     794             :     int         sema[4];
     795             : } slock_t;
     796             : 
     797             : #define TAS_ACTIVE_WORD(lock)   ((volatile int *) (((uintptr_t) (lock) + 15) & ~15))
     798             : 
     799             : #if defined(__GNUC__)
     800             : 
     801             : static __inline__ int
     802             : tas(volatile slock_t *lock)
     803             : {
     804             :     volatile int *lockword = TAS_ACTIVE_WORD(lock);
     805             :     register int lockval;
     806             : 
     807             :     __asm__ __volatile__(
     808             :         "  ldcwx   0(0,%2),%0  \n"
     809             : :       "=r"(lockval), "+m"(*lockword)
     810             : :       "r"(lockword)
     811             : :       "memory");
     812             :     return (lockval == 0);
     813             : }
     814             : 
     815             : /*
     816             :  * The hppa implementation doesn't follow the rules of this files and provides
     817             :  * a gcc specific implementation outside of the above defined(__GNUC__). It
     818             :  * does so to avoid duplication between the HP compiler and gcc. So undefine
     819             :  * the generic fallback S_UNLOCK from above.
     820             :  */
     821             : #ifdef S_UNLOCK
     822             : #undef S_UNLOCK
     823             : #endif
     824             : #define S_UNLOCK(lock)  \
     825             :     do { \
     826             :         __asm__ __volatile__("" : : : "memory"); \
     827             :         *TAS_ACTIVE_WORD(lock) = -1; \
     828             :     } while (0)
     829             : 
     830             : #endif /* __GNUC__ */
     831             : 
     832             : #define S_INIT_LOCK(lock) \
     833             :     do { \
     834             :         volatile slock_t *lock_ = (lock); \
     835             :         lock_->sema[0] = -1; \
     836             :         lock_->sema[1] = -1; \
     837             :         lock_->sema[2] = -1; \
     838             :         lock_->sema[3] = -1; \
     839             :     } while (0)
     840             : 
     841             : #define S_LOCK_FREE(lock)   (*TAS_ACTIVE_WORD(lock) != 0)
     842             : 
     843             : #endif   /* __hppa || __hppa__ */
     844             : 
     845             : 
     846             : #if defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
     847             : /*
     848             :  * HP-UX on Itanium, non-gcc/icc compiler
     849             :  *
     850             :  * We assume that the compiler enforces strict ordering of loads/stores on
     851             :  * volatile data (see comments on the gcc-version earlier in this file).
     852             :  * Note that this assumption does *not* hold if you use the
     853             :  * +Ovolatile=__unordered option on the HP-UX compiler, so don't do that.
     854             :  *
     855             :  * See also Implementing Spinlocks on the Intel Itanium Architecture and
     856             :  * PA-RISC, by Tor Ekqvist and David Graves, for more information.  As of
     857             :  * this writing, version 1.0 of the manual is available at:
     858             :  * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
     859             :  */
     860             : #define HAS_TEST_AND_SET
     861             : 
     862             : typedef unsigned int slock_t;
     863             : 
     864             : #include <ia64/sys/inline.h>
     865             : #define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE)
     866             : /* On IA64, it's a win to use a non-locking test before the xchg proper */
     867             : #define TAS_SPIN(lock)  (*(lock) ? 1 : TAS(lock))
     868             : #define S_UNLOCK(lock)  \
     869             :     do { _Asm_mf(); (*(lock)) = 0; } while (0)
     870             : 
     871             : #endif  /* HPUX on IA64, non gcc/icc */
     872             : 
     873             : #if defined(_AIX)   /* AIX */
     874             : /*
     875             :  * AIX (POWER)
     876             :  */
     877             : #define HAS_TEST_AND_SET
     878             : 
     879             : #include <sys/atomic_op.h>
     880             : 
     881             : typedef int slock_t;
     882             : 
     883             : #define TAS(lock)           _check_lock((slock_t *) (lock), 0, 1)
     884             : #define S_UNLOCK(lock)      _clear_lock((slock_t *) (lock), 0)
     885             : #endif   /* _AIX */
     886             : 
     887             : 
     888             : /* These are in sunstudio_(sparc|x86).s */
     889             : 
     890             : #if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
     891             : #define HAS_TEST_AND_SET
     892             : 
     893             : #if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
     894             : typedef unsigned int slock_t;
     895             : #else
     896             : typedef unsigned char slock_t;
     897             : #endif
     898             : 
     899             : extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
     900             :                                       slock_t cmp);
     901             : 
     902             : #define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
     903             : #endif
     904             : 
     905             : 
     906             : #ifdef _MSC_VER
     907             : typedef LONG slock_t;
     908             : 
     909             : #define HAS_TEST_AND_SET
     910             : #define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
     911             : 
     912             : #define SPIN_DELAY() spin_delay()
     913             : 
     914             : /* If using Visual C++ on Win64, inline assembly is unavailable.
     915             :  * Use a _mm_pause intrinsic instead of rep nop.
     916             :  */
     917             : #if defined(_WIN64)
     918             : static __forceinline void
     919             : spin_delay(void)
     920             : {
     921             :     _mm_pause();
     922             : }
     923             : #else
     924             : static __forceinline void
     925             : spin_delay(void)
     926             : {
     927             :     /* See comment for gcc code. Same code, MASM syntax */
     928             :     __asm rep nop;
     929             : }
     930             : #endif
     931             : 
     932             : #include <intrin.h>
     933             : #pragma intrinsic(_ReadWriteBarrier)
     934             : 
     935             : #define S_UNLOCK(lock)  \
     936             :     do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
     937             : 
     938             : #endif
     939             : 
     940             : 
     941             : #endif  /* !defined(HAS_TEST_AND_SET) */
     942             : 
     943             : 
     944             : /* Blow up if we didn't have any way to do spinlocks */
     945             : #ifndef HAS_TEST_AND_SET
     946             : #error PostgreSQL does not have native spinlock support on this platform.  To continue the compilation, rerun configure using --disable-spinlocks.  However, performance will be poor.  Please report this to pgsql-bugs@lists.postgresql.org.
     947             : #endif
     948             : 
     949             : 
     950             : #else   /* !HAVE_SPINLOCKS */
     951             : 
     952             : 
     953             : /*
     954             :  * Fake spinlock implementation using semaphores --- slow and prone
     955             :  * to fall foul of kernel limits on number of semaphores, so don't use this
     956             :  * unless you must!  The subroutines appear in spin.c.
     957             :  */
     958             : typedef int slock_t;
     959             : 
     960             : extern bool s_lock_free_sema(volatile slock_t *lock);
     961             : extern void s_unlock_sema(volatile slock_t *lock);
     962             : extern void s_init_lock_sema(volatile slock_t *lock, bool nested);
     963             : extern int  tas_sema(volatile slock_t *lock);
     964             : 
     965             : #define S_LOCK_FREE(lock)   s_lock_free_sema(lock)
     966             : #define S_UNLOCK(lock)   s_unlock_sema(lock)
     967             : #define S_INIT_LOCK(lock)   s_init_lock_sema(lock, false)
     968             : #define TAS(lock)   tas_sema(lock)
     969             : 
     970             : 
     971             : #endif  /* HAVE_SPINLOCKS */
     972             : 
     973             : 
     974             : /*
     975             :  * Default Definitions - override these above as needed.
     976             :  */
     977             : 
     978             : #if !defined(S_LOCK)
     979             : #define S_LOCK(lock) \
     980             :     (TAS(lock) ? s_lock((lock), __FILE__, __LINE__, PG_FUNCNAME_MACRO) : 0)
     981             : #endif   /* S_LOCK */
     982             : 
     983             : #if !defined(S_LOCK_FREE)
     984             : #define S_LOCK_FREE(lock)   (*(lock) == 0)
     985             : #endif   /* S_LOCK_FREE */
     986             : 
     987             : #if !defined(S_UNLOCK)
     988             : /*
     989             :  * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
     990             :  * is unsafe if the platform can reorder a memory access (either load or
     991             :  * store) after a following store; platforms where this is possible must
     992             :  * define their own S_UNLOCK.  But CPU reordering is not the only concern:
     993             :  * if we simply defined S_UNLOCK() as an inline macro, the compiler might
     994             :  * reorder instructions from inside the critical section to occur after the
     995             :  * lock release.  Since the compiler probably can't know what the external
     996             :  * function s_unlock is doing, putting the same logic there should be adequate.
     997             :  * A sufficiently-smart globally optimizing compiler could break that
     998             :  * assumption, though, and the cost of a function call for every spinlock
     999             :  * release may hurt performance significantly, so we use this implementation
    1000             :  * only for platforms where we don't know of a suitable intrinsic.  For the
    1001             :  * most part, those are relatively obscure platform/compiler combinations to
    1002             :  * which the PostgreSQL project does not have access.
    1003             :  */
    1004             : #define USE_DEFAULT_S_UNLOCK
    1005             : extern void s_unlock(volatile slock_t *lock);
    1006             : #define S_UNLOCK(lock)      s_unlock(lock)
    1007             : #endif   /* S_UNLOCK */
    1008             : 
    1009             : #if !defined(S_INIT_LOCK)
    1010             : #define S_INIT_LOCK(lock)   S_UNLOCK(lock)
    1011             : #endif   /* S_INIT_LOCK */
    1012             : 
    1013             : #if !defined(SPIN_DELAY)
    1014             : #define SPIN_DELAY()    ((void) 0)
    1015             : #endif   /* SPIN_DELAY */
    1016             : 
    1017             : #if !defined(TAS)
    1018             : extern int  tas(volatile slock_t *lock);        /* in port/.../tas.s, or
    1019             :                                                  * s_lock.c */
    1020             : 
    1021             : #define TAS(lock)       tas(lock)
    1022             : #endif   /* TAS */
    1023             : 
    1024             : #if !defined(TAS_SPIN)
    1025             : #define TAS_SPIN(lock)  TAS(lock)
    1026             : #endif   /* TAS_SPIN */
    1027             : 
    1028             : extern slock_t dummy_spinlock;
    1029             : 
    1030             : /*
    1031             :  * Platform-independent out-of-line support routines
    1032             :  */
    1033             : extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
    1034             : 
    1035             : /* Support for dynamic adjustment of spins_per_delay */
    1036             : #define DEFAULT_SPINS_PER_DELAY  100
    1037             : 
    1038             : extern void set_spins_per_delay(int shared_spins_per_delay);
    1039             : extern int  update_spins_per_delay(int shared_spins_per_delay);
    1040             : 
    1041             : /*
    1042             :  * Support for spin delay which is useful in various places where
    1043             :  * spinlock-like procedures take place.
    1044             :  */
    1045             : typedef struct
    1046             : {
    1047             :     int         spins;
    1048             :     int         delays;
    1049             :     int         cur_delay;
    1050             :     const char *file;
    1051             :     int         line;
    1052             :     const char *func;
    1053             : } SpinDelayStatus;
    1054             : 
    1055             : static inline void
    1056    73551058 : init_spin_delay(SpinDelayStatus *status,
    1057             :                 const char *file, int line, const char *func)
    1058             : {
    1059    73551058 :     status->spins = 0;
    1060    73551058 :     status->delays = 0;
    1061    73551058 :     status->cur_delay = 0;
    1062    73551058 :     status->file = file;
    1063    73551058 :     status->line = line;
    1064    73551058 :     status->func = func;
    1065    73551058 : }
    1066             : 
    1067             : #define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, PG_FUNCNAME_MACRO)
    1068             : void perform_spin_delay(SpinDelayStatus *status);
    1069             : void finish_spin_delay(SpinDelayStatus *status);
    1070             : 
    1071             : #endif   /* S_LOCK_H */

Generated by: LCOV version 1.14