LCOV - code coverage report
Current view: top level - src/include/port/atomics - arch-x86.h (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 100.0 % 22 22
Test Date: 2026-03-02 04:14:39 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * arch-x86.h
       4              :  *    Atomic operations considerations specific to intel x86
       5              :  *
       6              :  * Note that we actually require a 486 upwards because the 386 doesn't have
       7              :  * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
       8              :  * anymore that's not much of a restriction luckily.
       9              :  *
      10              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      11              :  * Portions Copyright (c) 1994, Regents of the University of California
      12              :  *
      13              :  * NOTES:
      14              :  *
      15              :  * src/include/port/atomics/arch-x86.h
      16              :  *
      17              :  *-------------------------------------------------------------------------
      18              :  */
      19              : 
      20              : /*
      21              :  * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
      22              :  * or stores to be reordered with other stores, but a load can be performed
      23              :  * before a subsequent store.
      24              :  *
      25              :  * Technically, some x86-ish chips support uncached memory access and/or
      26              :  * special instructions that are weakly ordered.  In those cases we'd need
      27              :  * the read and write barriers to be lfence and sfence.  But since we don't
      28              :  * do those things, a compiler barrier should be enough.
      29              :  *
      30              :  * "lock; addl" has worked for longer than "mfence". It's also rumored to be
      31              :  * faster in many scenarios.
      32              :  */
      33              : 
      34              : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      35              : #if defined(__i386__) || defined(__i386)
      36              : #define pg_memory_barrier_impl()        \
      37              :     __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
      38              : #elif defined(__x86_64__)
      39              : #define pg_memory_barrier_impl()        \
      40              :     __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
      41              : #endif
      42              : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      43              : 
      44              : #define pg_read_barrier_impl()      pg_compiler_barrier_impl()
      45              : #define pg_write_barrier_impl()     pg_compiler_barrier_impl()
      46              : 
      47              : /*
      48              :  * Provide implementation for atomics using inline assembly on x86 gcc. It's
      49              :  * nice to support older gcc's and the compare/exchange implementation here is
      50              :  * actually more efficient than the * __sync variant.
      51              :  */
      52              : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
      53              : 
      54              : #define PG_HAVE_ATOMIC_FLAG_SUPPORT
      55              : typedef struct pg_atomic_flag
      56              : {
      57              :     volatile char value;
      58              : } pg_atomic_flag;
      59              : 
      60              : #define PG_HAVE_ATOMIC_U32_SUPPORT
      61              : typedef struct pg_atomic_uint32
      62              : {
      63              :     volatile uint32 value;
      64              : } pg_atomic_uint32;
      65              : 
      66              : /*
      67              :  * It's too complicated to write inline asm for 64bit types on 32bit and the
      68              :  * 486 can't do it anyway.
      69              :  */
      70              : #ifdef __x86_64__
      71              : #define PG_HAVE_ATOMIC_U64_SUPPORT
      72              : typedef struct pg_atomic_uint64
      73              : {
      74              :     /* alignment guaranteed due to being on a 64bit platform */
      75              :     volatile uint64 value;
      76              : } pg_atomic_uint64;
      77              : #endif  /* __x86_64__ */
      78              : 
      79              : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
      80              : 
      81              : #if !defined(PG_HAVE_SPIN_DELAY)
      82              : /*
      83              :  * This sequence is equivalent to the PAUSE instruction ("rep" is
      84              :  * ignored by old IA32 processors if the following instruction is
      85              :  * not a string operation); the IA-32 Architecture Software
      86              :  * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
      87              :  * PAUSE in the inner loop of a spin lock is necessary for good
      88              :  * performance:
      89              :  *
      90              :  *     The PAUSE instruction improves the performance of IA-32
      91              :  *     processors supporting Hyper-Threading Technology when
      92              :  *     executing spin-wait loops and other routines where one
      93              :  *     thread is accessing a shared lock or semaphore in a tight
      94              :  *     polling loop. When executing a spin-wait loop, the
      95              :  *     processor can suffer a severe performance penalty when
      96              :  *     exiting the loop because it detects a possible memory order
      97              :  *     violation and flushes the core processor's pipeline. The
      98              :  *     PAUSE instruction provides a hint to the processor that the
      99              :  *     code sequence is a spin-wait loop. The processor uses this
     100              :  *     hint to avoid the memory order violation and prevent the
     101              :  *     pipeline flush. In addition, the PAUSE instruction
     102              :  *     de-pipelines the spin-wait loop to prevent it from
     103              :  *     consuming execution resources excessively.
     104              :  */
     105              : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     106              : #define PG_HAVE_SPIN_DELAY
     107              : static inline void
     108              : pg_spin_delay_impl(void)
     109              : {
     110              :     __asm__ __volatile__(" rep; nop            \n");
     111              : }
     112              : #elif defined(_MSC_VER) && defined(__x86_64__)
     113              : #define PG_HAVE_SPIN_DELAY
     114              : static __forceinline void
     115              : pg_spin_delay_impl(void)
     116              : {
     117              :     _mm_pause();
     118              : }
     119              : #elif defined(_MSC_VER)
     120              : #define PG_HAVE_SPIN_DELAY
     121              : static __forceinline void
     122              : pg_spin_delay_impl(void)
     123              : {
     124              :     /* See comment for gcc code. Same code, MASM syntax */
     125              :     __asm rep nop;
     126              : }
     127              : #endif
     128              : #endif /* !defined(PG_HAVE_SPIN_DELAY) */
     129              : 
     130              : 
     131              : #if defined(__GNUC__) || defined(__INTEL_COMPILER)
     132              : 
     133              : #define PG_HAVE_ATOMIC_TEST_SET_FLAG
     134              : static inline bool
     135       209823 : pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
     136              : {
     137       209823 :     char        _res = 1;
     138              : 
     139       209823 :     __asm__ __volatile__(
     140              :         "  lock            \n"
     141              :         "  xchgb   %0,%1   \n"
     142              : :       "+q"(_res), "+m"(ptr->value)
     143              : :
     144              : :       "memory");
     145       209823 :     return _res == 0;
     146              : }
     147              : 
     148              : #define PG_HAVE_ATOMIC_CLEAR_FLAG
     149              : static inline void
     150        14407 : pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
     151              : {
     152              :     /*
     153              :      * On a TSO architecture like x86 it's sufficient to use a compiler
     154              :      * barrier to achieve release semantics.
     155              :      */
     156        14407 :     __asm__ __volatile__("" ::: "memory");
     157        14407 :     ptr->value = 0;
     158        14407 : }
     159              : 
     160              : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
     161              : static inline bool
     162    311363463 : pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
     163              :                                     uint32 *expected, uint32 newval)
     164              : {
     165              :     char    ret;
     166              : 
     167              :     /*
     168              :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     169              :      * equal to measure the success.
     170              :      */
     171    311363463 :     __asm__ __volatile__(
     172              :         "  lock                \n"
     173              :         "  cmpxchgl    %4,%5   \n"
     174              :         "   setz       %2      \n"
     175              : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     176    311363463 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     177              : :       "memory", "cc");
     178    311363463 :     return (bool) ret;
     179              : }
     180              : 
     181              : #define PG_HAVE_ATOMIC_FETCH_ADD_U32
     182              : static inline uint32
     183      7734136 : pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
     184              : {
     185              :     uint32 res;
     186      7734136 :     __asm__ __volatile__(
     187              :         "  lock                \n"
     188              :         "  xaddl   %0,%1       \n"
     189              : :       "=q"(res), "=m"(ptr->value)
     190              : :       "0" (add_), "m"(ptr->value)
     191              : :       "memory", "cc");
     192      7734136 :     return res;
     193              : }
     194              : 
     195              : #ifdef __x86_64__
     196              : 
     197              : #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
     198              : static inline bool
     199    194267678 : pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
     200              :                                     uint64 *expected, uint64 newval)
     201              : {
     202              :     char    ret;
     203              : 
     204              :     AssertPointerAlignment(expected, 8);
     205              : 
     206              :     /*
     207              :      * Perform cmpxchg and use the zero flag which it implicitly sets when
     208              :      * equal to measure the success.
     209              :      */
     210    194267678 :     __asm__ __volatile__(
     211              :         "  lock                \n"
     212              :         "  cmpxchgq    %4,%5   \n"
     213              :         "   setz       %2      \n"
     214              : :       "=a" (*expected), "=m"(ptr->value), "=q" (ret)
     215    194267678 : :       "a" (*expected), "r" (newval), "m"(ptr->value)
     216              : :       "memory", "cc");
     217    194267678 :     return (bool) ret;
     218              : }
     219              : 
     220              : #define PG_HAVE_ATOMIC_FETCH_ADD_U64
     221              : static inline uint64
     222      2450650 : pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
     223              : {
     224              :     uint64 res;
     225      2450650 :     __asm__ __volatile__(
     226              :         "  lock                \n"
     227              :         "  xaddq   %0,%1       \n"
     228              : :       "=q"(res), "=m"(ptr->value)
     229              : :       "0" (add_), "m"(ptr->value)
     230              : :       "memory", "cc");
     231      2450650 :     return res;
     232              : }
     233              : 
     234              : #endif /* __x86_64__ */
     235              : 
     236              : #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
     237              : 
     238              : /*
     239              :  * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
     240              :  * since at least the 586. As well as on all x86-64 cpus.
     241              :  */
     242              : #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
     243              :     (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
     244              :     defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, msvc */
     245              : #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
     246              : #endif /* 8 byte single-copy atomicity */
        

Generated by: LCOV version 2.0-1