Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_cpu_x86.c
4 : * Runtime CPU feature detection for x86
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/port/pg_cpu_x86.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "c.h"
17 :
18 : #if defined(USE_SSE2) || defined(__i386__)
19 :
20 : #ifdef _MSC_VER
21 : #include <intrin.h>
22 : #else
23 : #include <cpuid.h>
24 : #endif
25 :
26 : #ifdef HAVE_XSAVE_INTRINSICS
27 : #include <immintrin.h>
28 : #endif
29 :
30 : #include "port/pg_cpu.h"
31 :
32 : /*
33 : * XSAVE state component bits that we need
34 : *
35 : * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
36 : * Chapter "MANAGING STATE USING THE XSAVE FEATURE SET"
37 : */
38 : #define XMM (1<<1)
39 : #define YMM (1<<2)
40 : #define OPMASK (1<<5)
41 : #define ZMM0_15 (1<<6)
42 : #define ZMM16_31 (1<<7)
43 :
44 :
45 : /* array indexed by enum X86FeatureId */
46 : bool X86Features[X86FeaturesSize] = {0};
47 :
48 : static bool
49 3630 : mask_available(uint32 value, uint32 mask)
50 : {
51 3630 : return (value & mask) == mask;
52 : }
53 :
54 : /* Named indexes for CPUID register array */
55 : #define EAX 0
56 : #define EBX 1
57 : #define ECX 2
58 : #define EDX 3
59 :
60 : /*
61 : * Request CPUID information for the specified leaf.
62 : */
63 : static inline void
64 5445 : pg_cpuid(int leaf, unsigned int *reg)
65 : {
66 : #if defined(HAVE__GET_CPUID)
67 5445 : __get_cpuid(leaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]);
68 : #elif defined(HAVE__CPUID)
69 : __cpuid((int *) reg, leaf);
70 : #else
71 : #error cpuid instruction not available
72 : #endif
73 5445 : }
74 :
75 : /*
76 : * Request CPUID information for the specified leaf and subleaf.
77 : *
78 : * Returns true if the CPUID leaf/subleaf is supported, false otherwise.
79 : */
80 : static inline bool
81 1815 : pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
82 : {
83 1815 : memset(reg, 0, 4 * sizeof(unsigned int));
84 : #if defined(HAVE__GET_CPUID_COUNT)
85 1815 : return __get_cpuid_count(leaf, subleaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]) == 1;
86 : #elif defined(HAVE__CPUIDEX)
87 : __cpuidex((int *) reg, leaf, subleaf);
88 : return true;
89 : #else
90 : return false;
91 : #endif
92 : }
93 :
94 : /*
95 : * Parse the CPU ID info for runtime checks.
96 : */
97 : #ifdef HAVE_XSAVE_INTRINSICS
98 : pg_attribute_target("xsave")
99 : #endif
100 : void
101 1815 : set_x86_features(void)
102 : {
103 1815 : unsigned int reg[4] = {0};
104 : bool have_osxsave;
105 :
106 1815 : pg_cpuid(0x01, reg);
107 :
108 1815 : X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
109 1815 : X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
110 1815 : X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
111 1815 : have_osxsave = reg[ECX] >> 27 & 1;
112 :
113 1815 : pg_cpuid_subleaf(0x07, 0, reg);
114 :
115 1815 : X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
116 :
117 : /* leaf 7 features that depend on OSXSAVE */
118 1815 : if (have_osxsave)
119 : {
120 1815 : uint32 xcr0_val = 0;
121 :
122 : #ifdef HAVE_XSAVE_INTRINSICS
123 : /* get value of Extended Control Register */
124 1815 : xcr0_val = _xgetbv(0);
125 : #endif
126 :
127 : /* Are YMM registers enabled? */
128 1815 : if (mask_available(xcr0_val, XMM | YMM))
129 1815 : X86Features[PG_AVX2] = reg[EBX] >> 5 & 1;
130 :
131 : /* Are ZMM registers enabled? */
132 1815 : if (mask_available(xcr0_val, XMM | YMM |
133 : OPMASK | ZMM0_15 | ZMM16_31))
134 : {
135 1815 : X86Features[PG_AVX512_BW] = reg[EBX] >> 30 & 1;
136 1815 : X86Features[PG_AVX512_VL] = reg[EBX] >> 31 & 1;
137 :
138 1815 : X86Features[PG_AVX512_VPCLMULQDQ] = reg[ECX] >> 10 & 1;
139 1815 : X86Features[PG_AVX512_VPOPCNTDQ] = reg[ECX] >> 14 & 1;
140 : }
141 : }
142 :
143 : /* Check for other TSC related flags */
144 1815 : pg_cpuid(0x80000001, reg);
145 1815 : X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
146 :
147 1815 : pg_cpuid(0x80000007, reg);
148 1815 : X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
149 :
150 1815 : X86Features[INIT_PG_X86] = true;
151 1815 : }
152 :
153 : /* TSC (Time-stamp Counter) handling code */
154 :
155 : static uint32 x86_hypervisor_tsc_frequency_khz(void);
156 :
157 : /*
158 : * Determine the TSC frequency of the CPU through CPUID, where supported.
159 : *
160 : * Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
161 : * 0 indicates the frequency information was not accessible via CPUID.
162 : */
163 : uint32
164 1290 : x86_tsc_frequency_khz(void)
165 : {
166 1290 : unsigned int reg[4] = {0};
167 :
168 : /*
169 : * If we're inside a virtual machine, try to fetch the TSC frequency from
170 : * the hypervisor, using a hypervisor specific method.
171 : *
172 : * Note it is not safe to utilize the regular 0x15/0x16 CPUID registers
173 : * (i.e. the logic below) in virtual machines, as they have been observed
174 : * to be wildly incorrect when virtualized.
175 : */
176 1290 : if (x86_feature_available(PG_HYPERVISOR))
177 1290 : return x86_hypervisor_tsc_frequency_khz();
178 :
179 : /*
180 : * On modern Intel CPUs, the TSC is implemented by invariant timekeeping
181 : * hardware, also called "Always Running Timer", or ART. The ART stays
182 : * consistent even if the CPU changes frequency due to changing power
183 : * levels.
184 : *
185 : * As documented in "Determining the Processor Base Frequency" in the
186 : * "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual",
187 : * February 2026 Edition, we can get the TSC frequency as follows:
188 : *
189 : * Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
190 : * CPUID.15H:EAX[31:0]
191 : *
192 : * With CPUID.15H:ECX representing the nominal core crystal clock
193 : * frequency, and EAX/EBX representing values used to translate the TSC
194 : * value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
195 : * that manual.
196 : *
197 : * Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
198 : * such we fall back to alternate approaches.
199 : */
200 0 : pg_cpuid(0x15, reg);
201 0 : if (reg[ECX] > 0)
202 : {
203 : /*
204 : * EBX not being set indicates invariant TSC is not available. Require
205 : * EAX being non-zero too, to avoid a theoretical divide by zero.
206 : */
207 0 : if (reg[EAX] == 0 || reg[EBX] == 0)
208 0 : return 0;
209 :
210 0 : return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
211 : }
212 :
213 : /*
214 : * When CPUID.15H is not available/incomplete, we can instead try to get
215 : * the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
216 : * Frequency Information Leaf".
217 : */
218 0 : pg_cpuid(0x16, reg);
219 0 : if (reg[EAX] > 0)
220 0 : return reg[EAX] * 1000;
221 :
222 0 : return 0;
223 : }
224 :
225 : /*
226 : * Support for reading TSC frequency for hypervisors passing it to a guest VM.
227 : *
228 : * Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
229 : * available at the vendor-specific 0x40000010 leaf in the EAX register.
230 : *
231 : * For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
232 : * need to access a model-specific register (MSR) to get the frequency. MSRs are
233 : * separate from CPUID and typically not available for unprivileged processes,
234 : * so we can't get the frequency this way.
235 : */
236 : #define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177) /* VMwareVMware */
237 : #define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d) /* KVMKVMKVM */
238 : static uint32
239 1290 : x86_hypervisor_tsc_frequency_khz(void)
240 : {
241 : #if defined(HAVE__CPUIDEX)
242 : unsigned int reg[4] = {0};
243 :
244 : /*
245 : * The hypervisor is determined using the 0x40000000 Hypervisor
246 : * information leaf, which requires use of __cpuidex to set ECX to 0 to
247 : * access it.
248 : *
249 : * The similar __get_cpuid_count function does not work as expected since
250 : * it contains a check for __get_cpuid_max, which has been observed to be
251 : * lower than the special Hypervisor leaf, despite it being available.
252 : */
253 : __cpuidex((int *) reg, 0x40000000, 0);
254 :
255 : if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
256 : {
257 : __cpuidex((int *) reg, 0x40000010, 0);
258 : if (reg[EAX] > 0)
259 : return reg[EAX];
260 : }
261 : #endif /* HAVE__CPUIDEX */
262 :
263 1290 : return 0;
264 : }
265 :
266 : #else /* defined(USE_SSE2) || defined(__i386__) */
267 :
268 : /* prevent linker complaints about empty module */
269 : extern int pg_cpu_x86_dummy_variable;
270 : int pg_cpu_x86_dummy_variable = 0;
271 :
272 : #endif /* ! (USE_SSE2 || __i386__) */
|