LCOV - code coverage report
Current view: top level - src/port - pg_cpu_x86.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 81.2 % 48 39
Test Date: 2026-05-04 19:16:35 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_cpu_x86.c
       4              :  *    Runtime CPU feature detection for x86
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/port/pg_cpu_x86.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #include "c.h"
      17              : 
      18              : #if defined(USE_SSE2) || defined(__i386__)
      19              : 
      20              : #ifdef _MSC_VER
      21              : #include <intrin.h>
      22              : #else
      23              : #include <cpuid.h>
      24              : #endif
      25              : 
      26              : #ifdef HAVE_XSAVE_INTRINSICS
      27              : #include <immintrin.h>
      28              : #endif
      29              : 
      30              : #include "port/pg_cpu.h"
      31              : 
      32              : /*
      33              :  * XSAVE state component bits that we need
      34              :  *
      35              :  * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
      36              :  * Chapter "MANAGING STATE USING THE XSAVE FEATURE SET"
      37              :  */
      38              : #define XMM         (1<<1)
      39              : #define YMM         (1<<2)
      40              : #define OPMASK      (1<<5)
      41              : #define ZMM0_15     (1<<6)
      42              : #define ZMM16_31    (1<<7)
      43              : 
      44              : 
      45              : /* array indexed by enum X86FeatureId */
      46              : bool        X86Features[X86FeaturesSize] = {0};
      47              : 
      48              : static bool
      49         3630 : mask_available(uint32 value, uint32 mask)
      50              : {
      51         3630 :     return (value & mask) == mask;
      52              : }
      53              : 
      54              : /* Named indexes for CPUID register array */
      55              : #define EAX 0
      56              : #define EBX 1
      57              : #define ECX 2
      58              : #define EDX 3
      59              : 
      60              : /*
      61              :  * Request CPUID information for the specified leaf.
      62              :  */
      63              : static inline void
      64         5445 : pg_cpuid(int leaf, unsigned int *reg)
      65              : {
      66              : #if defined(HAVE__GET_CPUID)
      67         5445 :     __get_cpuid(leaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]);
      68              : #elif defined(HAVE__CPUID)
      69              :     __cpuid((int *) reg, leaf);
      70              : #else
      71              : #error cpuid instruction not available
      72              : #endif
      73         5445 : }
      74              : 
      75              : /*
      76              :  * Request CPUID information for the specified leaf and subleaf.
      77              :  *
      78              :  * Returns true if the CPUID leaf/subleaf is supported, false otherwise.
      79              :  */
      80              : static inline bool
      81         1815 : pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
      82              : {
      83         1815 :     memset(reg, 0, 4 * sizeof(unsigned int));
      84              : #if defined(HAVE__GET_CPUID_COUNT)
      85         1815 :     return __get_cpuid_count(leaf, subleaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]) == 1;
      86              : #elif defined(HAVE__CPUIDEX)
      87              :     __cpuidex((int *) reg, leaf, subleaf);
      88              :     return true;
      89              : #else
      90              :     return false;
      91              : #endif
      92              : }
      93              : 
      94              : /*
      95              :  * Parse the CPU ID info for runtime checks.
      96              :  */
      97              : #ifdef HAVE_XSAVE_INTRINSICS
      98              : pg_attribute_target("xsave")
      99              : #endif
     100              : void
     101         1815 : set_x86_features(void)
     102              : {
     103         1815 :     unsigned int reg[4] = {0};
     104              :     bool        have_osxsave;
     105              : 
     106         1815 :     pg_cpuid(0x01, reg);
     107              : 
     108         1815 :     X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
     109         1815 :     X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
     110         1815 :     X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
     111         1815 :     have_osxsave = reg[ECX] >> 27 & 1;
     112              : 
     113         1815 :     pg_cpuid_subleaf(0x07, 0, reg);
     114              : 
     115         1815 :     X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
     116              : 
     117              :     /* leaf 7 features that depend on OSXSAVE */
     118         1815 :     if (have_osxsave)
     119              :     {
     120         1815 :         uint32      xcr0_val = 0;
     121              : 
     122              : #ifdef HAVE_XSAVE_INTRINSICS
     123              :         /* get value of Extended Control Register */
     124         1815 :         xcr0_val = _xgetbv(0);
     125              : #endif
     126              : 
     127              :         /* Are YMM registers enabled? */
     128         1815 :         if (mask_available(xcr0_val, XMM | YMM))
     129         1815 :             X86Features[PG_AVX2] = reg[EBX] >> 5 & 1;
     130              : 
     131              :         /* Are ZMM registers enabled? */
     132         1815 :         if (mask_available(xcr0_val, XMM | YMM |
     133              :                            OPMASK | ZMM0_15 | ZMM16_31))
     134              :         {
     135         1815 :             X86Features[PG_AVX512_BW] = reg[EBX] >> 30 & 1;
     136         1815 :             X86Features[PG_AVX512_VL] = reg[EBX] >> 31 & 1;
     137              : 
     138         1815 :             X86Features[PG_AVX512_VPCLMULQDQ] = reg[ECX] >> 10 & 1;
     139         1815 :             X86Features[PG_AVX512_VPOPCNTDQ] = reg[ECX] >> 14 & 1;
     140              :         }
     141              :     }
     142              : 
     143              :     /* Check for other TSC related flags */
     144         1815 :     pg_cpuid(0x80000001, reg);
     145         1815 :     X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
     146              : 
     147         1815 :     pg_cpuid(0x80000007, reg);
     148         1815 :     X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
     149              : 
     150         1815 :     X86Features[INIT_PG_X86] = true;
     151         1815 : }
     152              : 
     153              : /* TSC (Time-stamp Counter) handling code */
     154              : 
     155              : static uint32 x86_hypervisor_tsc_frequency_khz(void);
     156              : 
     157              : /*
     158              :  * Determine the TSC frequency of the CPU through CPUID, where supported.
     159              :  *
     160              :  * Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
     161              :  * 0 indicates the frequency information was not accessible via CPUID.
     162              :  */
     163              : uint32
     164         1290 : x86_tsc_frequency_khz(void)
     165              : {
     166         1290 :     unsigned int reg[4] = {0};
     167              : 
     168              :     /*
     169              :      * If we're inside a virtual machine, try to fetch the TSC frequency from
     170              :      * the hypervisor, using a hypervisor specific method.
     171              :      *
     172              :      * Note it is not safe to utilize the regular 0x15/0x16 CPUID registers
     173              :      * (i.e. the logic below) in virtual machines, as they have been observed
     174              :      * to be wildly incorrect when virtualized.
     175              :      */
     176         1290 :     if (x86_feature_available(PG_HYPERVISOR))
     177         1290 :         return x86_hypervisor_tsc_frequency_khz();
     178              : 
     179              :     /*
     180              :      * On modern Intel CPUs, the TSC is implemented by invariant timekeeping
     181              :      * hardware, also called "Always Running Timer", or ART. The ART stays
     182              :      * consistent even if the CPU changes frequency due to changing power
     183              :      * levels.
     184              :      *
     185              :      * As documented in "Determining the Processor Base Frequency" in the
     186              :      * "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual",
     187              :      * February 2026 Edition, we can get the TSC frequency as follows:
     188              :      *
     189              :      * Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
     190              :      * CPUID.15H:EAX[31:0]
     191              :      *
     192              :      * With CPUID.15H:ECX representing the nominal core crystal clock
     193              :      * frequency, and EAX/EBX representing values used to translate the TSC
     194              :      * value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
     195              :      * that manual.
     196              :      *
     197              :      * Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
     198              :      * such we fall back to alternate approaches.
     199              :      */
     200            0 :     pg_cpuid(0x15, reg);
     201            0 :     if (reg[ECX] > 0)
     202              :     {
     203              :         /*
     204              :          * EBX not being set indicates invariant TSC is not available. Require
     205              :          * EAX being non-zero too, to avoid a theoretical divide by zero.
     206              :          */
     207            0 :         if (reg[EAX] == 0 || reg[EBX] == 0)
     208            0 :             return 0;
     209              : 
     210            0 :         return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
     211              :     }
     212              : 
     213              :     /*
     214              :      * When CPUID.15H is not available/incomplete, we can instead try to get
     215              :      * the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
     216              :      * Frequency Information Leaf".
     217              :      */
     218            0 :     pg_cpuid(0x16, reg);
     219            0 :     if (reg[EAX] > 0)
     220            0 :         return reg[EAX] * 1000;
     221              : 
     222            0 :     return 0;
     223              : }
     224              : 
     225              : /*
     226              :  * Support for reading TSC frequency for hypervisors passing it to a guest VM.
     227              :  *
     228              :  * Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
     229              :  * available at the vendor-specific 0x40000010 leaf in the EAX register.
     230              :  *
     231              :  * For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
     232              :  * need to access a model-specific register (MSR) to get the frequency. MSRs are
     233              :  * separate from CPUID and typically not available for unprivileged processes,
     234              :  * so we can't get the frequency this way.
     235              :  */
     236              : #define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177)   /* VMwareVMware */
     237              : #define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d)  /* KVMKVMKVM */
     238              : static uint32
     239         1290 : x86_hypervisor_tsc_frequency_khz(void)
     240              : {
     241              : #if defined(HAVE__CPUIDEX)
     242              :     unsigned int reg[4] = {0};
     243              : 
     244              :     /*
     245              :      * The hypervisor is determined using the 0x40000000 Hypervisor
     246              :      * information leaf, which requires use of __cpuidex to set ECX to 0 to
     247              :      * access it.
     248              :      *
     249              :      * The similar __get_cpuid_count function does not work as expected since
     250              :      * it contains a check for __get_cpuid_max, which has been observed to be
     251              :      * lower than the special Hypervisor leaf, despite it being available.
     252              :      */
     253              :     __cpuidex((int *) reg, 0x40000000, 0);
     254              : 
     255              :     if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
     256              :     {
     257              :         __cpuidex((int *) reg, 0x40000010, 0);
     258              :         if (reg[EAX] > 0)
     259              :             return reg[EAX];
     260              :     }
     261              : #endif                          /* HAVE__CPUIDEX */
     262              : 
     263         1290 :     return 0;
     264              : }
     265              : 
     266              : #else                           /* defined(USE_SSE2) || defined(__i386__) */
     267              : 
     268              : /* prevent linker complaints about empty module */
     269              : extern int  pg_cpu_x86_dummy_variable;
     270              : int         pg_cpu_x86_dummy_variable = 0;
     271              : 
     272              : #endif                          /* ! (USE_SSE2 || __i386__) */
        

Generated by: LCOV version 2.0-1