LCOV - code coverage report
Current view: top level - src/include/port/atomics - arch-x86.h (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 22 22 100.0 %
Date: 2024-04-27 00:11:45 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * arch-x86.h
       4             :  *    Atomic operations considerations specific to intel x86
       5             :  *
       6             :  * Note that we actually require a 486 upwards because the 386 doesn't have
       7             :  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
       8             :  * anymore that's not much of a restriction luckily.
       9             :  *
      10             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  * NOTES:
      14             :  *
      15             :  * src/include/port/atomics/arch-x86.h
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : 
      20             : /*
      21             :  * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
      22             :  * or stores to be reordered with other stores, but a load can be performed
      23             :  * before a subsequent store.
      24             :  *
      25             :  * Technically, some x86-ish chips support uncached memory access and/or
      26             :  * special instructions that are weakly ordered.  In those cases we'd need
      27             :  * the read and write barriers to be lfence and sfence.  But since we don't
      28             :  * do those things, a compiler barrier should be enough.
      29             :  *
      30             :  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
      31             :  * faster in many scenarios.
      32             :  */
      33             : 
      34             : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      35             : #if defined(__i386__) || defined(__i386)
      36             : #define pg_memory_barrier_impl()        \
      37             :     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
      38             : #elif defined(__x86_64__)
      39             : #define pg_memory_barrier_impl()        \
      40             :     __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
      41             : #endif
      42             : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      43             : 
      44             : #define pg_read_barrier_impl()      pg_compiler_barrier_impl()
      45             : #define pg_write_barrier_impl()     pg_compiler_barrier_impl()
      46             : 
      47             : /*
      48             :  * Provide implementation for atomics using inline assembly on x86 gcc. It's
      49             :  * nice to support older gcc's and the compare/exchange implementation here is
      50             :  * actually more efficient than the * __sync variant.
      51             :  */
      52             : #if defined(HAVE_ATOMICS)
      53             : 
      54             : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      55             : 
      56             : #define PG_HAVE_ATOMIC_FLAG_SUPPORT
      57             : typedef struct pg_atomic_flag
      58             : {
      59             :     volatile char value;
      60             : } pg_atomic_flag;
      61             : 
      62             : #define PG_HAVE_ATOMIC_U32_SUPPORT
      63             : typedef struct pg_atomic_uint32
      64             : {
      65             :     volatile uint32 value;
      66             : } pg_atomic_uint32;
      67             : 
      68             : /*
      69             :  * It's too complicated to write inline asm for 64bit types on 32bit and the
      70             :  * 486 can't do it anyway.
      71             :  */
      72             : #ifdef __x86_64__
      73             : #define PG_HAVE_ATOMIC_U64_SUPPORT
      74             : typedef struct pg_atomic_uint64
      75             : {
      76             :     /* alignment guaranteed due to being on a 64bit platform */
      77             :     volatile uint64 value;
      78             : } pg_atomic_uint64;
      79             : #endif  /* __x86_64__ */
      80             : 
      81             : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      82             : 
      83             : #endif /* defined(HAVE_ATOMICS) */
      84             : 
      85             : #if !defined(PG_HAVE_SPIN_DELAY)
      86             : /*
      87             :  * This sequence is equivalent to the PAUSE instruction ("rep" is
      88             :  * ignored by old IA32 processors if the following instruction is
      89             :  * not a string operation); the IA-32 Architecture Software
      90             :  * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
      91             :  * PAUSE in the inner loop of a spin lock is necessary for good
      92             :  * performance:
      93             :  *
      94             :  *     The PAUSE instruction improves the performance of IA-32
      95             :  *     processors supporting Hyper-Threading Technology when
      96             :  *     executing spin-wait loops and other routines where one
      97             :  *     thread is accessing a shared lock or semaphore in a tight
      98             :  *     polling loop. When executing a spin-wait loop, the
      99             :  *     processor can suffer a severe performance penalty when
     100             :  *     exiting the loop because it detects a possible memory order
     101             :  *     violation and flushes the core processor's pipeline. The
     102             :  *     PAUSE instruction provides a hint to the processor that the
     103             :  *     code sequence is a spin-wait loop. The processor uses this
     104             :  *     hint to avoid the memory order violation and prevent the
     105             :  *     pipeline flush. In addition, the PAUSE instruction
     106             :  *     de-pipelines the spin-wait loop to prevent it from
     107             :  *     consuming execution resources excessively.
     108             :  */
     109             : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     110             : #define PG_HAVE_SPIN_DELAY
     111             : static __inline__ void
     112             : pg_spin_delay_impl(void)
     113             : {
     114             :     __asm__ __volatile__(" rep; nop            \n");
     115             : }
     116             : #elif defined(_MSC_VER) && defined(__x86_64__)
     117             : #define PG_HAVE_SPIN_DELAY
     118             : static __forceinline void
     119             : pg_spin_delay_impl(void)
     120             : {
     121             :     _mm_pause();
     122             : }
     123             : #elif defined(_MSC_VER)
     124             : #define PG_HAVE_SPIN_DELAY
     125             : static __forceinline void
     126             : pg_spin_delay_impl(void)
     127             : {
     128             :     /* See comment for gcc code. Same code, MASM syntax */
     129             :     __asm rep nop;
     130             : }
     131             : #endif
     132             : #endif /* !defined(PG_HAVE_SPIN_DELAY) */
     133             : 
     134             : 
     135             : #if defined(HAVE_ATOMICS)
     136             : 
     137             : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     138             : 
     139             : #define PG_HAVE_ATOMIC_TEST_SET_FLAG
     140             : static inline bool
     141         254 : pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
     142             : {
     143         254 :     char        _res = 1;
     144             : 
     145         254 :     __asm__ __volatile__(
     146             :         "  lock            \n"
     147             :         "  xchgb   %0,%1   \n"
     148             : :       "+q"(_res), "+m"(ptr->value)
     149             : :
     150             : :       "memory");
     151         254 :     return _res == 0;
     152             : }
     153             : 
     154             : #define PG_HAVE_ATOMIC_CLEAR_FLAG
     155             : static inline void
     156        5342 : pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
     157             : {
     158             :     /*
     159             :      * On a TSO architecture like x86 it's sufficient to use a compiler
     160             :      * barrier to achieve release semantics.
     161             :      */
     162        5342 :     __asm__ __volatile__("" ::: "memory");
     163        5342 :     ptr->value = 0;
     164        5342 : }
     165             : 
     166             : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
     167             : static inline bool
     168   598182230 : pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
     169             :                                     uint32 *expected, uint32 newval)
     170             : {
     171             :     char    ret;
     172             : 
     173             :     /*
     174             :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     175             :      * equal to measure the success.
     176             :      */
     177   598182230 :     __asm__ __volatile__(
     178             :         "  lock                \n"
     179             :         "  cmpxchgl    %4,%5   \n"
     180             :         "   setz       %2      \n"
     181             : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     182   598182230 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     183             : :       "memory", "cc");
     184   598182230 :     return (bool) ret;
     185             : }
     186             : 
     187             : #define PG_HAVE_ATOMIC_FETCH_ADD_U32
     188             : static inline uint32
     189     9819864 : pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
     190             : {
     191             :     uint32 res;
     192     9819864 :     __asm__ __volatile__(
     193             :         "  lock                \n"
     194             :         "  xaddl   %0,%1       \n"
     195             : :       "=q"(res), "=m"(ptr->value)
     196             : :       "0" (add_), "m"(ptr->value)
     197             : :       "memory", "cc");
     198     9819864 :     return res;
     199             : }
     200             : 
     201             : #ifdef __x86_64__
     202             : 
     203             : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
     204             : static inline bool
     205     3262584 : pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
     206             :                                     uint64 *expected, uint64 newval)
     207             : {
     208             :     char    ret;
     209             : 
     210             :     /*
     211             :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     212             :      * equal to measure the success.
     213             :      */
     214     3262584 :     __asm__ __volatile__(
     215             :         "  lock                \n"
     216             :         "  cmpxchgq    %4,%5   \n"
     217             :         "   setz       %2      \n"
     218             : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     219     3262584 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     220             : :       "memory", "cc");
     221     3262584 :     return (bool) ret;
     222             : }
     223             : 
     224             : #define PG_HAVE_ATOMIC_FETCH_ADD_U64
     225             : static inline uint64
     226     1446222 : pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
     227             : {
     228             :     uint64 res;
     229     1446222 :     __asm__ __volatile__(
     230             :         "  lock                \n"
     231             :         "  xaddq   %0,%1       \n"
     232             : :       "=q"(res), "=m"(ptr->value)
     233             : :       "0" (add_), "m"(ptr->value)
     234             : :       "memory", "cc");
     235     1446222 :     return res;
     236             : }
     237             : 
     238             : #endif /* __x86_64__ */
     239             : 
     240             : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
     241             : 
     242             : /*
     243             :  * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
     244             :  * since at least the 586. As well as on all x86-64 cpus.
     245             :  */
     246             : #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
     247             :     (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
     248             :     defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
     249             : #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
     250             : #endif /* 8 byte single-copy atomicity */
     251             : 
     252             : #endif /* HAVE_ATOMICS */

Generated by: LCOV version 1.14