Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_cpu_x86.c
4 : * Runtime CPU feature detection for x86
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/port/pg_cpu_x86.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #ifndef FRONTEND
17 : #include "postgres.h"
18 : #else
19 : #include "postgres_fe.h"
20 : #endif
21 :
22 : #if defined(USE_SSE2) || defined(__i386__)
23 :
24 : #ifdef _MSC_VER
25 : #include <intrin.h>
26 : #else
27 : #include <cpuid.h>
28 : #endif
29 :
30 : #ifdef HAVE_XSAVE_INTRINSICS
31 : #include <immintrin.h>
32 : #endif
33 :
34 : #include "port/pg_cpu.h"
35 :
36 : /*
37 : * XSAVE state component bits that we need
38 : *
39 : * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf
40 : * Chapter "MANAGING STATE USING THE XSAVE FEATURE SET"
41 : */
42 : #define XMM (1<<1)
43 : #define YMM (1<<2)
44 : #define OPMASK (1<<5)
45 : #define ZMM0_15 (1<<6)
46 : #define ZMM16_31 (1<<7)
47 :
48 :
49 : /* array indexed by enum X86FeatureId */
50 : bool X86Features[X86FeaturesSize] = {0};
51 :
52 : static bool
53 3638 : mask_available(uint32 value, uint32 mask)
54 : {
55 3638 : return (value & mask) == mask;
56 : }
57 :
58 : /* Named indexes for CPUID register array */
59 : #define EAX 0
60 : #define EBX 1
61 : #define ECX 2
62 : #define EDX 3
63 :
64 : /*
65 : * Request CPUID information for the specified leaf.
66 : */
67 : static inline void
68 5457 : pg_cpuid(int leaf, unsigned int *reg)
69 : {
70 5457 : memset(reg, 0, 4 * sizeof(unsigned int));
71 : #if defined(HAVE__GET_CPUID)
72 5457 : __get_cpuid(leaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]);
73 : #elif defined(HAVE__CPUID)
74 : __cpuid((int *) reg, leaf);
75 : #endif
76 5457 : }
77 :
78 : /*
79 : * Request CPUID information for the specified leaf and subleaf.
80 : *
81 : * Returns true if the CPUID leaf/subleaf is supported, false otherwise.
82 : */
83 : static inline bool
84 1819 : pg_cpuid_subleaf(int leaf, int subleaf, unsigned int *reg)
85 : {
86 1819 : memset(reg, 0, 4 * sizeof(unsigned int));
87 : #if defined(HAVE__GET_CPUID_COUNT)
88 1819 : return __get_cpuid_count(leaf, subleaf, ®[EAX], ®[EBX], ®[ECX], ®[EDX]) == 1;
89 : #elif defined(HAVE__CPUIDEX)
90 : __cpuidex((int *) reg, leaf, subleaf);
91 : return true;
92 : #else
93 : return false;
94 : #endif
95 : }
96 :
97 : /*
98 : * Parse the CPU ID info for runtime checks.
99 : */
100 : #ifdef HAVE_XSAVE_INTRINSICS
101 : pg_attribute_target("xsave")
102 : #endif
103 : void
104 1819 : set_x86_features(void)
105 : {
106 1819 : unsigned int reg[4] = {0};
107 : bool have_osxsave;
108 :
109 1819 : pg_cpuid(0x01, reg);
110 :
111 1819 : X86Features[PG_SSE4_2] = reg[ECX] >> 20 & 1;
112 1819 : X86Features[PG_POPCNT] = reg[ECX] >> 23 & 1;
113 1819 : X86Features[PG_HYPERVISOR] = reg[ECX] >> 31 & 1;
114 1819 : have_osxsave = reg[ECX] >> 27 & 1;
115 :
116 1819 : pg_cpuid_subleaf(0x07, 0, reg);
117 :
118 1819 : X86Features[PG_TSC_ADJUST] = reg[EBX] >> 1 & 1;
119 :
120 : /* leaf 7 features that depend on OSXSAVE */
121 1819 : if (have_osxsave)
122 : {
123 1819 : uint32 xcr0_val = 0;
124 :
125 : #ifdef HAVE_XSAVE_INTRINSICS
126 : /* get value of Extended Control Register */
127 1819 : xcr0_val = _xgetbv(0);
128 : #endif
129 :
130 : /* Are YMM registers enabled? */
131 1819 : if (mask_available(xcr0_val, XMM | YMM))
132 1819 : X86Features[PG_AVX2] = reg[EBX] >> 5 & 1;
133 :
134 : /* Are ZMM registers enabled? */
135 1819 : if (mask_available(xcr0_val, XMM | YMM |
136 : OPMASK | ZMM0_15 | ZMM16_31))
137 : {
138 1819 : X86Features[PG_AVX512_BW] = reg[EBX] >> 30 & 1;
139 1819 : X86Features[PG_AVX512_VL] = reg[EBX] >> 31 & 1;
140 :
141 1819 : X86Features[PG_AVX512_VPCLMULQDQ] = reg[ECX] >> 10 & 1;
142 1819 : X86Features[PG_AVX512_VPOPCNTDQ] = reg[ECX] >> 14 & 1;
143 : }
144 : }
145 :
146 : /* Check for other TSC related flags */
147 1819 : pg_cpuid(0x80000001, reg);
148 1819 : X86Features[PG_RDTSCP] = reg[EDX] >> 27 & 1;
149 :
150 1819 : pg_cpuid(0x80000007, reg);
151 1819 : X86Features[PG_TSC_INVARIANT] = reg[EDX] >> 8 & 1;
152 :
153 1819 : X86Features[INIT_PG_X86] = true;
154 1819 : }
155 :
156 : /* TSC (Time-stamp Counter) handling code */
157 :
158 : static uint32 x86_hypervisor_tsc_frequency_khz(void);
159 :
160 : /*
161 : * Determine the TSC frequency of the CPU through CPUID, where supported.
162 : *
163 : * Needed to interpret the tick value returned by RDTSC/RDTSCP. Return value of
164 : * 0 indicates the frequency information was not accessible via CPUID.
165 : *
166 : * The optional source argument may contain a pre-allocated string of capacity
167 : * source_size that will be concatenated with info on the TSC frequency source.
168 : */
169 : uint32
170 1293 : x86_tsc_frequency_khz(char *source, size_t source_size)
171 : {
172 1293 : unsigned int reg[4] = {0};
173 :
174 : /*
175 : * If we're inside a virtual machine, try to fetch the TSC frequency from
176 : * the hypervisor, using a hypervisor specific method.
177 : *
178 : * Note it is not safe to utilize the regular 0x15/0x16 CPUID registers
179 : * (i.e. the logic below) in virtual machines, as they have been observed
180 : * to be wildly incorrect when virtualized.
181 : */
182 1293 : if (x86_feature_available(PG_HYPERVISOR))
183 : {
184 1293 : uint32 freq = x86_hypervisor_tsc_frequency_khz();
185 :
186 1293 : if (source)
187 : {
188 1293 : strlcat(source, ", hypervisor", source_size);
189 1293 : if (freq > 0)
190 0 : strlcat(source, ", cpuid 0x40000010", source_size);
191 : }
192 1293 : return freq;
193 : }
194 :
195 : /*
196 : * On modern Intel CPUs, the TSC is implemented by invariant timekeeping
197 : * hardware, also called "Always Running Timer", or ART. The ART stays
198 : * consistent even if the CPU changes frequency due to changing power
199 : * levels.
200 : *
201 : * As documented in "Determining the Processor Base Frequency" in the
202 : * "IntelĀ® 64 and IA-32 Architectures Software Developer's Manual",
203 : * February 2026 Edition, we can get the TSC frequency as follows:
204 : *
205 : * Nominal TSC frequency = ( CPUID.15H:ECX[31:0] * CPUID.15H:EBX[31:0] ) /
206 : * CPUID.15H:EAX[31:0]
207 : *
208 : * With CPUID.15H:ECX representing the nominal core crystal clock
209 : * frequency, and EAX/EBX representing values used to translate the TSC
210 : * value to that frequency, see "Chapter 20.17 "Time-Stamp Counter" of
211 : * that manual.
212 : *
213 : * Older Intel CPUs, and other vendors do not set CPUID.15H:ECX, and as
214 : * such we fall back to alternate approaches.
215 : */
216 0 : pg_cpuid(0x15, reg);
217 0 : if (reg[ECX] > 0)
218 : {
219 : /*
220 : * EBX not being set indicates invariant TSC is not available. Require
221 : * EAX being non-zero too, to avoid a theoretical divide by zero.
222 : */
223 0 : if (reg[EAX] == 0 || reg[EBX] == 0)
224 0 : return 0;
225 :
226 0 : if (source)
227 0 : strlcat(source, ", cpuid 0x15", source_size);
228 :
229 0 : return reg[ECX] / 1000 * reg[EBX] / reg[EAX];
230 : }
231 :
232 : /*
233 : * When CPUID.15H is not available/incomplete, we can instead try to get
234 : * the processor base frequency in MHz from CPUID.16H:EAX, the "Processor
235 : * Frequency Information Leaf".
236 : */
237 0 : pg_cpuid(0x16, reg);
238 0 : if (reg[EAX] > 0)
239 : {
240 0 : if (source)
241 0 : strlcat(source, ", cpuid 0x16", source_size);
242 :
243 0 : return reg[EAX] * 1000;
244 : }
245 :
246 0 : return 0;
247 : }
248 :
249 : /*
250 : * Support for reading TSC frequency for hypervisors passing it to a guest VM.
251 : *
252 : * Two Hypervisors (VMware and KVM) are known to make TSC frequency in KHz
253 : * available at the vendor-specific 0x40000010 leaf in the EAX register.
254 : *
255 : * For some other Hypervisors that have an invariant TSC, e.g. HyperV, we would
256 : * need to access a model-specific register (MSR) to get the frequency. MSRs are
257 : * separate from CPUID and typically not available for unprivileged processes,
258 : * so we can't get the frequency this way.
259 : */
260 : #define CPUID_HYPERVISOR_VMWARE(r) (r[EBX] == 0x61774d56 && r[ECX] == 0x4d566572 && r[EDX] == 0x65726177) /* VMwareVMware */
261 : #define CPUID_HYPERVISOR_KVM(r) (r[EBX] == 0x4b4d564b && r[ECX] == 0x564b4d56 && r[EDX] == 0x0000004d) /* KVMKVMKVM */
262 : static uint32
263 1293 : x86_hypervisor_tsc_frequency_khz(void)
264 : {
265 : #if defined(HAVE__CPUIDEX)
266 : unsigned int reg[4] = {0};
267 :
268 : /*
269 : * The hypervisor is determined using the 0x40000000 Hypervisor
270 : * information leaf, which requires use of __cpuidex to set ECX to 0 to
271 : * access it.
272 : *
273 : * The similar __get_cpuid_count function does not work as expected since
274 : * it contains a check for __get_cpuid_max, which has been observed to be
275 : * lower than the special Hypervisor leaf, despite it being available.
276 : */
277 : __cpuidex((int *) reg, 0x40000000, 0);
278 :
279 : if (reg[EAX] >= 0x40000010 && (CPUID_HYPERVISOR_VMWARE(reg) || CPUID_HYPERVISOR_KVM(reg)))
280 : {
281 : __cpuidex((int *) reg, 0x40000010, 0);
282 : if (reg[EAX] > 0)
283 : return reg[EAX];
284 : }
285 : #endif /* HAVE__CPUIDEX */
286 :
287 1293 : return 0;
288 : }
289 :
290 : #else /* defined(USE_SSE2) || defined(__i386__) */
291 :
292 : /* prevent linker complaints about empty module */
293 : extern int pg_cpu_x86_dummy_variable;
294 : int pg_cpu_x86_dummy_variable = 0;
295 :
296 : #endif /* ! (USE_SSE2 || __i386__) */
|