LCOV - code coverage report
Current view: top level - src/port - pg_cpu_x86.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 75.9 % 58 44
Test Date: 2026-05-25 09:16:16 Functions: 100.0 % 6 6
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * pg_cpu_x86.c
       4              :  *    Runtime CPU feature detection for x86
       5              :  *
       6              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7              :  * Portions Copyright (c) 1994, Regents of the University of California
       8              :  *
       9              :  *
      10              :  * IDENTIFICATION
      11              :  *    src/port/pg_cpu_x86.c
      12              :  *
      13              :  *-------------------------------------------------------------------------
      14              :  */
      15              : 
      16              : #ifndef FRONTEND
      17              : #include "postgres.h"
      18              : #else
      19              : #include "postgres_fe.h"
      20              : #endif
      21              : 
      22              : #if defined(USE_SSE2) || defined(__i386__)
      23              : 
      24              : #ifdef _MSC_VER
      25              : #include <intrin.h>
      26              : #else
      27              : #include <cpuid.h>
      28              : #endif
      29              : 
      30              : #ifdef HAVE_XSAVE_INTRINSICS
      31              : #include <immintrin.h>
      32              : #endif
      33              : 
      34              : #include "port/pg_cpu.h"
      35              : 
      36              : /*
      37              :  * XSAVE state component bits that we need
      38              :  *
      39              :  * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
      40              :  * Chapter "MANAGING STATE USING THE XSAVE FEATURE SET"
      41              :  */
      42              : #define XMM         (1<<1)
      43              : #define YMM         (1<<2)
      44              : #define OPMASK      (1<<5)
      45              : #define ZMM0_15     (1<<6)
      46              : #define ZMM16_31    (1<<7)
      47              : 
      48              : 
      49              : /* array indexed by enum X86FeatureId */
      50              : bool        X86Features[X86FeaturesSize] = {0};
      51              : 
      52              : static bool
      53         3638 : mask_available(uint32 value, uint32 mask)
      54              : {
      55         3638 :     return (value & mask) == mask;
      56              : }
      57              : 
      58              : /* Named indexes for CPUID register array */
      59              : #define EAX 0
      60              : #define EBX 1
      61              : #define ECX 2
      62              : #define EDX 3
      63              : 
      64              : /*
      65              :  * Request CPUID information for the specified leaf.
      66              :  */
      67              : static inline void
      68         5457 : pg_cpuid(int leaf, unsigned int *reg)
      69              : {
      70         5457 :     memset(reg, 0, 4 * sizeof(unsigned int));
      71              : #if defined(HAVE__GET_CPUID)
      72         5457 :     __get_cpuid(leaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]);
      73              : #elif defined(HAVE__CPUID)
      74              :     __cpuid((int *) reg, leaf);
      75              : #endif
      76         5457 : }
      77              : 
      78              : /*
      79              :  * Request CPUID information for the specified leaf and subleaf.
      80              :  *
      81              :  * Returns true if the CPUID leaf/subleaf is supported, false otherwise.
      82              :  */
      83              : static inline bool
      84         1819 : pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
      85              : {
      86         1819 :     memset(reg, 0, 4 * sizeof(unsigned int));
      87              : #if defined(HAVE__GET_CPUID_COUNT)
      88         1819 :     return __get_cpuid_count(leaf, subleaf, &reg[EAX], &reg[EBX], &reg[ECX], &reg[EDX]) == 1;
      89              : #elif defined(HAVE__CPUIDEX)
      90              :     __cpuidex((int *) reg, leaf, subleaf);
      91              :     return true;
      92              : #else
      93              :     return false;
      94              : #endif
      95              : }
      96              : 
      97              : /*
      98              :  * Parse the CPU ID info for runtime checks.
      99              :  */
     100              : #ifdef HAVE_XSAVE_INTRINSICS
     101              : pg_attribute_target("xsave")
     102              : #endif
     103              : void
     104         1819 : set_x86_features(void)
     105              : {
     106         1819 :     unsigned int reg[4] = {0};
     107              :     bool        have_osxsave;
     108              : 
     109         1819 :     pg_cpuid(0x01, reg);
     110              : 
     111         1819 :     X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
     112         1819 :     X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
     113         1819 :     X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
     114         1819 :     have_osxsave = reg[ECX] >> 27 & 1;
     115              : 
     116         1819 :     pg_cpuid_subleaf(0x07, 0, reg);
     117              : 
     118         1819 :     X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
     119              : 
     120              :     /* leaf 7 features that depend on OSXSAVE */
     121         1819 :     if (have_osxsave)
     122              :     {
     123         1819 :         uint32      xcr0_val = 0;
     124              : 
     125              : #ifdef HAVE_XSAVE_INTRINSICS
     126              :         /* get value of Extended Control Register */
     127         1819 :         xcr0_val = _xgetbv(0);
     128              : #endif
     129              : 
     130              :         /* Are YMM registers enabled? */
     131         1819 :         if (mask_available(xcr0_val, XMM | YMM))
     132         1819 :             X86Features[PG_AVX2] = reg[EBX] >> 5 & 1;
     133              : 
     134              :         /* Are ZMM registers enabled? */
     135         1819 :         if (mask_available(xcr0_val, XMM | YMM |
     136              :                            OPMASK | ZMM0_15 | ZMM16_31))
     137              :         {
     138         1819 :             X86Features[PG_AVX512_BW] = reg[EBX] >> 30 & 1;
     139         1819 :             X86Features[PG_AVX512_VL] = reg[EBX] >> 31 & 1;
     140              : 
     141         1819 :             X86Features[PG_AVX512_VPCLMULQDQ] = reg[ECX] >> 10 & 1;
     142         1819 :             X86Features[PG_AVX512_VPOPCNTDQ] = reg[ECX] >> 14 & 1;
     143              :         }
     144              :     }
     145              : 
     146              :     /* Check for other TSC related flags */
     147         1819 :     pg_cpuid(0x80000001, reg);
     148         1819 :     X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
     149              : 
     150         1819 :     pg_cpuid(0x80000007, reg);
     151         1819 :     X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
     152              : 
     153         1819 :     X86Features[INIT_PG_X86] = true;
     154         1819 : }
     155              : 
     156              : /* TSC (Time-stamp Counter) handling code */
     157              : 
     158              : static uint32 x86_hypervisor_tsc_frequency_khz(void);
     159              : 
     160              : /*
     161              :  * Determine the TSC frequency of the CPU through CPUID, where supported.
     162              :  *
     163              :  * Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
     164              :  * 0 indicates the frequency information was not accessible via CPUID.
     165              :  *
     166              :  * The optional source argument may contain a pre-allocated string of capacity
     167              :  * source_size that will be concatenated with info on the TSC frequency source.
     168              :  */
     169              : uint32
     170         1293 : x86_tsc_frequency_khz(char *source, size_t source_size)
     171              : {
     172         1293 :     unsigned int reg[4] = {0};
     173              : 
     174              :     /*
     175              :      * If we're inside a virtual machine, try to fetch the TSC frequency from
     176              :      * the hypervisor, using a hypervisor specific method.
     177              :      *
     178              :      * Note it is not safe to utilize the regular 0x15/0x16 CPUID registers
     179              :      * (i.e. the logic below) in virtual machines, as they have been observed
     180              :      * to be wildly incorrect when virtualized.
     181              :      */
     182         1293 :     if (x86_feature_available(PG_HYPERVISOR))
     183              :     {
     184         1293 :         uint32      freq = x86_hypervisor_tsc_frequency_khz();
     185              : 
     186         1293 :         if (source)
     187              :         {
     188         1293 :             strlcat(source, ", hypervisor", source_size);
     189         1293 :             if (freq > 0)
     190            0 :                 strlcat(source, ", cpuid 0x40000010", source_size);
     191              :         }
     192         1293 :         return freq;
     193              :     }
     194              : 
     195              :     /*
     196              :      * On modern Intel CPUs, the TSC is implemented by invariant timekeeping
     197              :      * hardware, also called "Always Running Timer", or ART. The ART stays
     198              :      * consistent even if the CPU changes frequency due to changing power
     199              :      * levels.
     200              :      *
     201              :      * As documented in "Determining the Processor Base Frequency" in the
     202              :      * "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual",
     203              :      * February 2026 Edition, we can get the TSC frequency as follows:
     204              :      *
     205              :      * Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
     206              :      * CPUID.15H:EAX[31:0]
     207              :      *
     208              :      * With CPUID.15H:ECX representing the nominal core crystal clock
     209              :      * frequency, and EAX/EBX representing values used to translate the TSC
     210              :      * value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
     211              :      * that manual.
     212              :      *
     213              :      * Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
     214              :      * such we fall back to alternate approaches.
     215              :      */
     216            0 :     pg_cpuid(0x15, reg);
     217            0 :     if (reg[ECX] > 0)
     218              :     {
     219              :         /*
     220              :          * EBX not being set indicates invariant TSC is not available. Require
     221              :          * EAX being non-zero too, to avoid a theoretical divide by zero.
     222              :          */
     223            0 :         if (reg[EAX] == 0 || reg[EBX] == 0)
     224            0 :             return 0;
     225              : 
     226            0 :         if (source)
     227            0 :             strlcat(source, ", cpuid 0x15", source_size);
     228              : 
     229            0 :         return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
     230              :     }
     231              : 
     232              :     /*
     233              :      * When CPUID.15H is not available/incomplete, we can instead try to get
     234              :      * the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
     235              :      * Frequency Information Leaf".
     236              :      */
     237            0 :     pg_cpuid(0x16, reg);
     238            0 :     if (reg[EAX] > 0)
     239              :     {
     240            0 :         if (source)
     241            0 :             strlcat(source, ", cpuid 0x16", source_size);
     242              : 
     243            0 :         return reg[EAX] * 1000;
     244              :     }
     245              : 
     246            0 :     return 0;
     247              : }
     248              : 
     249              : /*
     250              :  * Support for reading TSC frequency for hypervisors passing it to a guest VM.
     251              :  *
     252              :  * Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
     253              :  * available at the vendor-specific 0x40000010 leaf in the EAX register.
     254              :  *
     255              :  * For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
     256              :  * need to access a model-specific register (MSR) to get the frequency. MSRs are
     257              :  * separate from CPUID and typically not available for unprivileged processes,
     258              :  * so we can't get the frequency this way.
     259              :  */
     260              : #define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177)   /* VMwareVMware */
     261              : #define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d)  /* KVMKVMKVM */
     262              : static uint32
     263         1293 : x86_hypervisor_tsc_frequency_khz(void)
     264              : {
     265              : #if defined(HAVE__CPUIDEX)
     266              :     unsigned int reg[4] = {0};
     267              : 
     268              :     /*
     269              :      * The hypervisor is determined using the 0x40000000 Hypervisor
     270              :      * information leaf, which requires use of __cpuidex to set ECX to 0 to
     271              :      * access it.
     272              :      *
     273              :      * The similar __get_cpuid_count function does not work as expected since
     274              :      * it contains a check for __get_cpuid_max, which has been observed to be
     275              :      * lower than the special Hypervisor leaf, despite it being available.
     276              :      */
     277              :     __cpuidex((int *) reg, 0x40000000, 0);
     278              : 
     279              :     if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
     280              :     {
     281              :         __cpuidex((int *) reg, 0x40000010, 0);
     282              :         if (reg[EAX] > 0)
     283              :             return reg[EAX];
     284              :     }
     285              : #endif                          /* HAVE__CPUIDEX */
     286              : 
     287         1293 :     return 0;
     288              : }
     289              : 
     290              : #else                           /* defined(USE_SSE2) || defined(__i386__) */
     291              : 
     292              : /* prevent linker complaints about empty module */
     293              : extern int  pg_cpu_x86_dummy_variable;
     294              : int         pg_cpu_x86_dummy_variable = 0;
     295              : 
     296              : #endif                          /* ! (USE_SSE2 || __i386__) */
        

Generated by: LCOV version 2.0-1