Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * instr_time.h
4 : * portable high-precision interval timing
5 : *
6 : * This file provides an abstraction layer to hide portability issues in
7 : * interval timing. On x86 we use the RDTSC/RDTSCP instruction directly in
8 : * certain cases, or alternatively clock_gettime() on Unix-like systems and
9 : * QueryPerformanceCounter() on Windows. These macros also give some breathing
10 : * room to use other high-precision-timing APIs.
11 : *
12 : * The basic data type is instr_time, which all callers should treat as an
13 : * opaque typedef. instr_time can store either an absolute time (of
14 : * unspecified reference time) or an interval. The operations provided
15 : * for it are:
16 : *
17 : * INSTR_TIME_IS_ZERO(t) is t equal to zero?
18 : *
19 : * INSTR_TIME_SET_ZERO(t) set t to zero (memset is acceptable too)
20 : *
21 : * INSTR_TIME_SET_CURRENT_FAST(t) set t to current time without waiting
22 : * for instructions in out-of-order window
23 : *
24 : * INSTR_TIME_SET_CURRENT(t) set t to current time while waiting for
25 : * instructions in OOO to retire
26 : *
27 : *
28 : * INSTR_TIME_ADD(x, y) x += y
29 : *
30 : * INSTR_TIME_ADD_NANOSEC(t, n) t += n in nanoseconds (converts to ticks)
31 : *
32 : * INSTR_TIME_SUBTRACT(x, y) x -= y
33 : *
34 : * INSTR_TIME_ACCUM_DIFF(x, y, z) x += (y - z)
35 : *
36 : * INSTR_TIME_GT(x, y) x > y
37 : *
38 : * INSTR_TIME_GET_DOUBLE(t) convert t to double (in seconds)
39 : *
40 : * INSTR_TIME_GET_MILLISEC(t) convert t to double (in milliseconds)
41 : *
42 : * INSTR_TIME_GET_MICROSEC(t) convert t to int64 (in microseconds)
43 : *
44 : * INSTR_TIME_GET_NANOSEC(t) convert t to int64 (in nanoseconds)
45 : *
46 : * Note that INSTR_TIME_SUBTRACT and INSTR_TIME_ACCUM_DIFF convert
47 : * absolute times to intervals. The INSTR_TIME_GET_xxx operations are
48 : * only useful on intervals.
49 : *
50 : * When summing multiple measurements, it's recommended to leave the
51 : * running sum in instr_time form (ie, use INSTR_TIME_ADD or
52 : * INSTR_TIME_ACCUM_DIFF) and convert to a result format only at the end.
53 : *
54 : * Beware of multiple evaluations of the macro arguments.
55 : *
56 : *
57 : * Copyright (c) 2001-2026, PostgreSQL Global Development Group
58 : *
59 : * src/include/portability/instr_time.h
60 : *
61 : *-------------------------------------------------------------------------
62 : */
63 : #ifndef INSTR_TIME_H
64 : #define INSTR_TIME_H
65 :
66 :
67 : /*
68 : * We store interval times as an int64 integer on all platforms, as int64 is
69 : * cheap to add/subtract, the most common operation for instr_time. The
70 : * acquisition of time and converting to specific units of time is platform
71 : * specific.
72 : *
73 : * To avoid users of the API relying on the integer representation, we wrap
74 : * the 64bit integer in a struct.
75 : */
76 : typedef struct instr_time
77 : {
78 : int64 ticks; /* in platforms specific unit */
79 : } instr_time;
80 :
81 :
82 : /* helpers macros used in platform specific code below */
83 :
84 : #define NS_PER_S INT64CONST(1000000000)
85 : #define NS_PER_MS INT64CONST(1000000)
86 : #define NS_PER_US INT64CONST(1000)
87 :
88 : /* Shift amount for fixed-point ticks-to-nanoseconds conversion. */
89 : #define TICKS_TO_NS_SHIFT 14
90 :
91 : /*
92 : * PG_INSTR_TICKS_TO_NS controls whether pg_ticks_to_ns/pg_ns_to_ticks needs to
93 : * check ticks_per_ns_scaled and potentially convert ticks <=> nanoseconds.
94 : *
95 : * PG_INSTR_TSC_CLOCK controls whether the TSC clock source is compiled in, and
96 : * potentially used based on timing_tsc_enabled.
97 : */
98 : #if defined(__x86_64__) || defined(_M_X64)
99 : #define PG_INSTR_TICKS_TO_NS 1
100 : #define PG_INSTR_TSC_CLOCK 1
101 : #elif defined(WIN32)
102 : #define PG_INSTR_TICKS_TO_NS 1
103 : #define PG_INSTR_TSC_CLOCK 0
104 : #else
105 : #define PG_INSTR_TICKS_TO_NS 0
106 : #define PG_INSTR_TSC_CLOCK 0
107 : #endif
108 :
109 : /*
110 : * Variables used to translate ticks to nanoseconds, initialized by
111 : * pg_initialize_timing and adjusted by pg_set_timing_clock_source calls or
112 : * changes of the "timing_clock_source" GUC.
113 : *
114 : * Note that changing these values after setting an instr_time and before
115 : * reading/converting it will lead to incorrect results. This is technically
116 : * possible because the GUC can be changed at runtime, but unlikely, and we
117 : * allow changing this at runtime to simplify testing of different sources.
118 : */
119 : extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
120 : extern PGDLLIMPORT uint64 max_ticks_no_overflow;
121 : extern PGDLLIMPORT bool timing_initialized;
122 :
123 : typedef enum
124 : {
125 : TIMING_CLOCK_SOURCE_AUTO,
126 : TIMING_CLOCK_SOURCE_SYSTEM,
127 : #if PG_INSTR_TSC_CLOCK
128 : TIMING_CLOCK_SOURCE_TSC
129 : #endif
130 : } TimingClockSourceType;
131 :
132 : extern PGDLLIMPORT int timing_clock_source;
133 :
134 : /*
135 : * Initialize timing infrastructure
136 : *
137 : * This must be called at least once before using INSTR_TIME_SET_CURRENT*
138 : * macros.
139 : *
140 : * If you want to use the TSC clock source in a client program,
141 : * pg_set_timing_clock_source() needs to also be called.
142 : */
143 : extern void pg_initialize_timing(void);
144 :
145 : /*
146 : * Sets the time source to be used. Mainly intended for frontend programs,
147 : * the backend should set it via the timing_clock_source GUC instead.
148 : *
149 : * Returns false if the clock source could not be set, for example when TSC
150 : * is not available despite being explicitly set.
151 : */
152 : extern bool pg_set_timing_clock_source(TimingClockSourceType source);
153 :
154 : /* Whether to actually use TSC based on availability and GUC settings. */
155 : extern PGDLLIMPORT bool timing_tsc_enabled;
156 :
157 : /*
158 : * TSC frequency in kHz, set during initialization.
159 : *
160 : * -1 = not yet initialized, 0 = TSC not usable, >0 = frequency in kHz.
161 : */
162 : extern PGDLLIMPORT int32 timing_tsc_frequency_khz;
163 :
164 : #if PG_INSTR_TSC_CLOCK
165 :
166 : extern void pg_initialize_timing_tsc(void);
167 :
168 : extern uint32 pg_tsc_calibrate_frequency(void);
169 :
170 : #endif /* PG_INSTR_TSC_CLOCK */
171 :
172 : /*
173 : * Returns the current timing clock source effectively in use, resolving
174 : * TIMING_CLOCK_SOURCE_AUTO to either TIMING_CLOCK_SOURCE_SYSTEM or
175 : * TIMING_CLOCK_SOURCE_TSC.
176 : */
177 : static inline TimingClockSourceType
178 1957 : pg_current_timing_clock_source(void)
179 : {
180 : #if PG_INSTR_TSC_CLOCK
181 1957 : if (timing_tsc_enabled)
182 1956 : return TIMING_CLOCK_SOURCE_TSC;
183 : #endif
184 1 : return TIMING_CLOCK_SOURCE_SYSTEM;
185 : }
186 :
187 : #ifndef WIN32
188 :
189 : /* On POSIX, use clock_gettime() for system clock source */
190 :
191 : #include <time.h>
192 :
193 : /*
194 : * The best clockid to use according to the POSIX spec is CLOCK_MONOTONIC,
195 : * since that will give reliable interval timing even in the face of changes
196 : * to the system clock. However, POSIX doesn't require implementations to
197 : * provide anything except CLOCK_REALTIME, so fall back to that if we don't
198 : * find CLOCK_MONOTONIC.
199 : *
200 : * Also, some implementations have nonstandard clockids with better properties
201 : * than CLOCK_MONOTONIC. In particular, as of macOS 10.12, Apple provides
202 : * CLOCK_MONOTONIC_RAW which is both faster to read and higher resolution than
203 : * their version of CLOCK_MONOTONIC.
204 : *
205 : * Note this does not get used in case the TSC clock source logic is used,
206 : * which directly calls architecture specific timing instructions (e.g. RDTSC).
207 : */
208 : #if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW)
209 : #define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC_RAW
210 : #define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC_RAW)"
211 : #elif defined(CLOCK_MONOTONIC)
212 : #define PG_INSTR_SYSTEM_CLOCK CLOCK_MONOTONIC
213 : #define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC)"
214 : #else
215 : #define PG_INSTR_SYSTEM_CLOCK CLOCK_REALTIME
216 : #define PG_INSTR_SYSTEM_CLOCK_NAME "clock_gettime (CLOCK_REALTIME)"
217 : #endif
218 :
219 : static inline instr_time
220 5731604 : pg_get_ticks_system(void)
221 : {
222 : instr_time now;
223 : struct timespec tmp;
224 :
225 : Assert(timing_initialized);
226 :
227 5731604 : clock_gettime(PG_INSTR_SYSTEM_CLOCK, &tmp);
228 5731604 : now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
229 :
230 5731604 : return now;
231 : }
232 :
233 : #else /* WIN32 */
234 :
235 : /* On Windows, use QueryPerformanceCounter() for system clock source */
236 :
237 : #define PG_INSTR_SYSTEM_CLOCK_NAME "QueryPerformanceCounter"
238 : static inline instr_time
239 : pg_get_ticks_system(void)
240 : {
241 : instr_time now;
242 : LARGE_INTEGER tmp;
243 :
244 : Assert(timing_initialized);
245 :
246 : QueryPerformanceCounter(&tmp);
247 : now.ticks = tmp.QuadPart;
248 :
249 : return now;
250 : }
251 :
252 : #endif /* WIN32 */
253 :
254 : static inline int64
255 72067011 : pg_ticks_to_ns(int64 ticks)
256 : {
257 : #if PG_INSTR_TICKS_TO_NS
258 72067011 : int64 ns = 0;
259 :
260 : Assert(timing_initialized);
261 :
262 : /*
263 : * Avoid doing work if we don't use scaled ticks, e.g. system clock on
264 : * Unix (in that case ticks is counted in nanoseconds)
265 : */
266 72067011 : if (ticks_per_ns_scaled == 0)
267 5729069 : return ticks;
268 :
269 : /*
270 : * Would multiplication overflow? If so perform computation in two parts.
271 : */
272 66337942 : if (unlikely(ticks > (int64) max_ticks_no_overflow))
273 : {
274 : /*
275 : * To avoid overflow, first scale total ticks down by the fixed
276 : * factor, and *afterwards* multiply them by the frequency-based scale
277 : * factor.
278 : *
279 : * The remaining ticks can follow the regular formula, since they
280 : * won't overflow.
281 : */
282 4 : int64 count = ticks >> TICKS_TO_NS_SHIFT;
283 :
284 4 : ns = count * ticks_per_ns_scaled;
285 4 : ticks -= (count << TICKS_TO_NS_SHIFT);
286 : }
287 :
288 66337942 : ns += (ticks * ticks_per_ns_scaled) >> TICKS_TO_NS_SHIFT;
289 :
290 66337942 : return ns;
291 : #else
292 : Assert(timing_initialized);
293 :
294 : return ticks;
295 : #endif /* PG_INSTR_TICKS_TO_NS */
296 : }
297 :
298 : static inline int64
299 15 : pg_ns_to_ticks(int64 ns)
300 : {
301 : #if PG_INSTR_TICKS_TO_NS
302 15 : int64 ticks = 0;
303 :
304 : Assert(timing_initialized);
305 :
306 : /*
307 : * If ticks_per_ns_scaled is zero, ticks are already in nanoseconds (e.g.
308 : * system clock on Unix).
309 : */
310 15 : if (ticks_per_ns_scaled == 0)
311 1 : return ns;
312 :
313 : /*
314 : * The reverse of pg_ticks_to_ns to avoid a similar overflow problem.
315 : */
316 14 : if (unlikely(ns > (INT64_MAX >> TICKS_TO_NS_SHIFT)))
317 : {
318 4 : int64 count = ns / ticks_per_ns_scaled;
319 :
320 4 : ticks = count << TICKS_TO_NS_SHIFT;
321 4 : ns -= count * ticks_per_ns_scaled;
322 : }
323 :
324 14 : ticks += (ns << TICKS_TO_NS_SHIFT) / ticks_per_ns_scaled;
325 :
326 14 : return ticks;
327 : #else
328 : Assert(timing_initialized);
329 :
330 : return ns;
331 : #endif /* PG_INSTR_TICKS_TO_NS */
332 : }
333 :
334 : #if PG_INSTR_TSC_CLOCK
335 :
336 : #define PG_INSTR_TSC_CLOCK_NAME_FAST "RDTSC"
337 : #define PG_INSTR_TSC_CLOCK_NAME "RDTSCP"
338 :
339 : #ifdef _MSC_VER
340 : #include <intrin.h>
341 : #endif /* defined(_MSC_VER) */
342 :
343 : /* Helpers to abstract compiler differences for reading the x86 TSC. */
344 : static inline int64
345 29554194 : pg_rdtsc(void)
346 : {
347 : #ifdef _MSC_VER
348 : return __rdtsc();
349 : #else
350 29554194 : return __builtin_ia32_rdtsc();
351 : #endif /* defined(_MSC_VER) */
352 : }
353 :
354 : static inline int64
355 13842206 : pg_rdtscp(void)
356 : {
357 : uint32 unused;
358 :
359 : #ifdef _MSC_VER
360 : return __rdtscp(&unused);
361 : #else
362 13842206 : return __builtin_ia32_rdtscp(&unused);
363 : #endif /* defined(_MSC_VER) */
364 : }
365 :
366 : /*
367 : * Marked always_inline due to a shortcoming in gcc's heuristics leading to
368 : * only inlining the function partially.
369 : * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124795
370 : */
371 : static pg_attribute_always_inline instr_time
372 16803434 : pg_get_ticks(void)
373 : {
374 16803434 : if (likely(timing_tsc_enabled))
375 : {
376 : instr_time now;
377 :
378 11071830 : now.ticks = pg_rdtscp();
379 11071830 : return now;
380 : }
381 :
382 5731604 : return pg_get_ticks_system();
383 : }
384 :
385 : static pg_attribute_always_inline instr_time
386 29554194 : pg_get_ticks_fast(void)
387 : {
388 29554194 : if (likely(timing_tsc_enabled))
389 : {
390 : instr_time now;
391 :
392 29554194 : now.ticks = pg_rdtsc();
393 29554194 : return now;
394 : }
395 :
396 0 : return pg_get_ticks_system();
397 : }
398 :
399 : #else
400 :
401 : static pg_attribute_always_inline instr_time
402 : pg_get_ticks(void)
403 : {
404 : return pg_get_ticks_system();
405 : }
406 :
407 : static pg_attribute_always_inline instr_time
408 : pg_get_ticks_fast(void)
409 : {
410 : return pg_get_ticks_system();
411 : }
412 :
413 : #endif /* PG_INSTR_TSC_CLOCK */
414 :
415 : /*
416 : * Common macros
417 : */
418 :
419 : #define INSTR_TIME_IS_ZERO(t) ((t).ticks == 0)
420 :
421 : #define INSTR_TIME_SET_ZERO(t) ((t).ticks = 0)
422 :
423 : #define INSTR_TIME_SET_CURRENT_FAST(t) \
424 : ((t) = pg_get_ticks_fast())
425 :
426 : #define INSTR_TIME_SET_CURRENT(t) \
427 : ((t) = pg_get_ticks())
428 :
429 :
430 : #define INSTR_TIME_ADD(x,y) \
431 : ((x).ticks += (y).ticks)
432 :
433 : #define INSTR_TIME_ADD_NANOSEC(t, n) \
434 : ((t).ticks += pg_ns_to_ticks(n))
435 :
436 : #define INSTR_TIME_SUBTRACT(x,y) \
437 : ((x).ticks -= (y).ticks)
438 :
439 : #define INSTR_TIME_ACCUM_DIFF(x,y,z) \
440 : ((x).ticks += (y).ticks - (z).ticks)
441 :
442 : #define INSTR_TIME_GT(x,y) \
443 : ((x).ticks > (y).ticks)
444 :
445 : #define INSTR_TIME_GET_NANOSEC(t) \
446 : (pg_ticks_to_ns((t).ticks))
447 :
448 : #define INSTR_TIME_GET_DOUBLE(t) \
449 : ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
450 :
451 : #define INSTR_TIME_GET_MILLISEC(t) \
452 : ((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_MS)
453 :
454 : #define INSTR_TIME_GET_MICROSEC(t) \
455 : (INSTR_TIME_GET_NANOSEC(t) / NS_PER_US)
456 :
457 : #endif /* INSTR_TIME_H */
|