Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_bitutils.c
4 : * Miscellaneous functions for bit-wise operations.
5 : *
6 : * Copyright (c) 2019-2025, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/port/pg_bitutils.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "c.h"
14 :
15 : #ifdef HAVE__GET_CPUID
16 : #include <cpuid.h>
17 : #endif
18 : #ifdef HAVE__CPUID
19 : #include <intrin.h>
20 : #endif
21 :
22 : #include "port/pg_bitutils.h"
23 :
24 :
25 : /*
26 : * Array giving the position of the left-most set bit for each possible
27 : * byte value. We count the right-most position as the 0th bit, and the
28 : * left-most the 7th bit. The 0th entry of the array should not be used.
29 : *
30 : * Note: this is not used by the functions in pg_bitutils.h when
31 : * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 : * extensions possibly compiled with a different compiler can use it.
33 : */
34 : const uint8 pg_leftmost_one_pos[256] = {
35 : 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 : 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 : 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 : 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51 : };
52 :
53 : /*
54 : * Array giving the position of the right-most set bit for each possible
55 : * byte value. We count the right-most position as the 0th bit, and the
56 : * left-most the 7th bit. The 0th entry of the array should not be used.
57 : *
58 : * Note: this is not used by the functions in pg_bitutils.h when
59 : * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 : * extensions possibly compiled with a different compiler can use it.
61 : */
62 : const uint8 pg_rightmost_one_pos[256] = {
63 : 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 : 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 : 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 : 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79 : };
80 :
81 : /*
82 : * Array giving the number of 1-bits in each possible byte value.
83 : *
84 : * Note: we export this for use by functions in which explicit use
85 : * of the popcount functions seems unlikely to be a win.
86 : */
87 : const uint8 pg_number_of_ones[256] = {
88 : 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 : 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104 : };
105 :
106 : /*
107 : * If we are building the Neon versions, we don't need the "slow" fallbacks.
108 : */
109 : #ifndef POPCNT_AARCH64
110 : static inline int pg_popcount32_slow(uint32 word);
111 : static inline int pg_popcount64_slow(uint64 word);
112 : static uint64 pg_popcount_slow(const char *buf, int bytes);
113 : static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
114 : #endif
115 :
116 : #ifdef TRY_POPCNT_X86_64
117 : static bool pg_popcount_available(void);
118 : static int pg_popcount32_choose(uint32 word);
119 : static int pg_popcount64_choose(uint64 word);
120 : static uint64 pg_popcount_choose(const char *buf, int bytes);
121 : static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
122 : static inline int pg_popcount32_fast(uint32 word);
123 : static inline int pg_popcount64_fast(uint64 word);
124 : static uint64 pg_popcount_fast(const char *buf, int bytes);
125 : static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
126 :
127 : int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
128 : int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
129 : uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
130 : uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
131 : #endif /* TRY_POPCNT_X86_64 */
132 :
133 : #ifdef TRY_POPCNT_X86_64
134 :
135 : /*
136 : * Return true if CPUID indicates that the POPCNT instruction is available.
137 : */
138 : static bool
139 12974 : pg_popcount_available(void)
140 : {
141 12974 : unsigned int exx[4] = {0, 0, 0, 0};
142 :
143 : #if defined(HAVE__GET_CPUID)
144 12974 : __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
145 : #elif defined(HAVE__CPUID)
146 : __cpuid(exx, 1);
147 : #else
148 : #error cpuid instruction not available
149 : #endif
150 :
151 12974 : return (exx[2] & (1 << 23)) != 0; /* POPCNT */
152 : }
153 :
154 : /*
155 : * These functions get called on the first call to pg_popcount32 etc.
156 : * They detect whether we can use the asm implementations, and replace
157 : * the function pointers so that subsequent calls are routed directly to
158 : * the chosen implementation.
159 : */
160 : static inline void
161 12974 : choose_popcount_functions(void)
162 : {
163 12974 : if (pg_popcount_available())
164 : {
165 12974 : pg_popcount32 = pg_popcount32_fast;
166 12974 : pg_popcount64 = pg_popcount64_fast;
167 12974 : pg_popcount_optimized = pg_popcount_fast;
168 12974 : pg_popcount_masked_optimized = pg_popcount_masked_fast;
169 : }
170 : else
171 : {
172 0 : pg_popcount32 = pg_popcount32_slow;
173 0 : pg_popcount64 = pg_popcount64_slow;
174 0 : pg_popcount_optimized = pg_popcount_slow;
175 0 : pg_popcount_masked_optimized = pg_popcount_masked_slow;
176 : }
177 :
178 : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
179 12974 : if (pg_popcount_avx512_available())
180 : {
181 0 : pg_popcount_optimized = pg_popcount_avx512;
182 0 : pg_popcount_masked_optimized = pg_popcount_masked_avx512;
183 : }
184 : #endif
185 12974 : }
186 :
187 : static int
188 0 : pg_popcount32_choose(uint32 word)
189 : {
190 0 : choose_popcount_functions();
191 0 : return pg_popcount32(word);
192 : }
193 :
194 : static int
195 11866 : pg_popcount64_choose(uint64 word)
196 : {
197 11866 : choose_popcount_functions();
198 11866 : return pg_popcount64(word);
199 : }
200 :
201 : static uint64
202 4 : pg_popcount_choose(const char *buf, int bytes)
203 : {
204 4 : choose_popcount_functions();
205 4 : return pg_popcount_optimized(buf, bytes);
206 : }
207 :
208 : static uint64
209 1104 : pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
210 : {
211 1104 : choose_popcount_functions();
212 1104 : return pg_popcount_masked(buf, bytes, mask);
213 : }
214 :
215 : /*
216 : * pg_popcount32_fast
217 : * Return the number of 1 bits set in word
218 : */
219 : static inline int
220 0 : pg_popcount32_fast(uint32 word)
221 : {
222 : #ifdef _MSC_VER
223 : return __popcnt(word);
224 : #else
225 : uint32 res;
226 :
227 0 : __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
228 0 : return (int) res;
229 : #endif
230 : }
231 :
232 : /*
233 : * pg_popcount64_fast
234 : * Return the number of 1 bits set in word
235 : */
236 : static inline int
237 110298170 : pg_popcount64_fast(uint64 word)
238 : {
239 : #ifdef _MSC_VER
240 : return __popcnt64(word);
241 : #else
242 : uint64 res;
243 :
244 110298170 : __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
245 110298170 : return (int) res;
246 : #endif
247 : }
248 :
249 : /*
250 : * pg_popcount_fast
251 : * Returns the number of 1-bits in buf
252 : */
253 : static uint64
254 28 : pg_popcount_fast(const char *buf, int bytes)
255 : {
256 28 : uint64 popcnt = 0;
257 :
258 : #if SIZEOF_VOID_P >= 8
259 : /* Process in 64-bit chunks if the buffer is aligned. */
260 28 : if (buf == (const char *) TYPEALIGN(8, buf))
261 : {
262 28 : const uint64 *words = (const uint64 *) buf;
263 :
264 524508 : while (bytes >= 8)
265 : {
266 524480 : popcnt += pg_popcount64_fast(*words++);
267 524480 : bytes -= 8;
268 : }
269 :
270 28 : buf = (const char *) words;
271 : }
272 : #else
273 : /* Process in 32-bit chunks if the buffer is aligned. */
274 : if (buf == (const char *) TYPEALIGN(4, buf))
275 : {
276 : const uint32 *words = (const uint32 *) buf;
277 :
278 : while (bytes >= 4)
279 : {
280 : popcnt += pg_popcount32_fast(*words++);
281 : bytes -= 4;
282 : }
283 :
284 : buf = (const char *) words;
285 : }
286 : #endif
287 :
288 : /* Process any remaining bytes */
289 148 : while (bytes--)
290 120 : popcnt += pg_number_of_ones[(unsigned char) *buf++];
291 :
292 28 : return popcnt;
293 : }
294 :
295 : /*
296 : * pg_popcount_masked_fast
297 : * Returns the number of 1-bits in buf after applying the mask to each byte
298 : */
299 : static uint64
300 105676 : pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
301 : {
302 105676 : uint64 popcnt = 0;
303 :
304 : #if SIZEOF_VOID_P >= 8
305 : /* Process in 64-bit chunks if the buffer is aligned */
306 105676 : uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
307 :
308 105676 : if (buf == (const char *) TYPEALIGN(8, buf))
309 : {
310 105676 : const uint64 *words = (const uint64 *) buf;
311 :
312 108000872 : while (bytes >= 8)
313 : {
314 107895196 : popcnt += pg_popcount64_fast(*words++ & maskv);
315 107895196 : bytes -= 8;
316 : }
317 :
318 105676 : buf = (const char *) words;
319 : }
320 : #else
321 : /* Process in 32-bit chunks if the buffer is aligned. */
322 : uint32 maskv = ~((uint32) 0) / 0xFF * mask;
323 :
324 : if (buf == (const char *) TYPEALIGN(4, buf))
325 : {
326 : const uint32 *words = (const uint32 *) buf;
327 :
328 : while (bytes >= 4)
329 : {
330 : popcnt += pg_popcount32_fast(*words++ & maskv);
331 : bytes -= 4;
332 : }
333 :
334 : buf = (const char *) words;
335 : }
336 : #endif
337 :
338 : /* Process any remaining bytes */
339 105676 : while (bytes--)
340 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
341 :
342 105676 : return popcnt;
343 : }
344 :
345 : #endif /* TRY_POPCNT_X86_64 */
346 :
347 : /*
348 : * If we are building the Neon versions, we don't need the "slow" fallbacks.
349 : */
350 : #ifndef POPCNT_AARCH64
351 :
352 : /*
353 : * pg_popcount32_slow
354 : * Return the number of 1 bits set in word
355 : */
356 : static inline int
357 0 : pg_popcount32_slow(uint32 word)
358 : {
359 : #ifdef HAVE__BUILTIN_POPCOUNT
360 0 : return __builtin_popcount(word);
361 : #else /* !HAVE__BUILTIN_POPCOUNT */
362 : int result = 0;
363 :
364 : while (word != 0)
365 : {
366 : result += pg_number_of_ones[word & 255];
367 : word >>= 8;
368 : }
369 :
370 : return result;
371 : #endif /* HAVE__BUILTIN_POPCOUNT */
372 : }
373 :
374 : /*
375 : * pg_popcount64_slow
376 : * Return the number of 1 bits set in word
377 : */
378 : static inline int
379 0 : pg_popcount64_slow(uint64 word)
380 : {
381 : #ifdef HAVE__BUILTIN_POPCOUNT
382 : #if SIZEOF_LONG == 8
383 0 : return __builtin_popcountl(word);
384 : #elif SIZEOF_LONG_LONG == 8
385 : return __builtin_popcountll(word);
386 : #else
387 : #error "cannot find integer of the same size as uint64_t"
388 : #endif
389 : #else /* !HAVE__BUILTIN_POPCOUNT */
390 : int result = 0;
391 :
392 : while (word != 0)
393 : {
394 : result += pg_number_of_ones[word & 255];
395 : word >>= 8;
396 : }
397 :
398 : return result;
399 : #endif /* HAVE__BUILTIN_POPCOUNT */
400 : }
401 :
402 : /*
403 : * pg_popcount_slow
404 : * Returns the number of 1-bits in buf
405 : */
406 : static uint64
407 0 : pg_popcount_slow(const char *buf, int bytes)
408 : {
409 0 : uint64 popcnt = 0;
410 :
411 : #if SIZEOF_VOID_P >= 8
412 : /* Process in 64-bit chunks if the buffer is aligned. */
413 0 : if (buf == (const char *) TYPEALIGN(8, buf))
414 : {
415 0 : const uint64 *words = (const uint64 *) buf;
416 :
417 0 : while (bytes >= 8)
418 : {
419 0 : popcnt += pg_popcount64_slow(*words++);
420 0 : bytes -= 8;
421 : }
422 :
423 0 : buf = (const char *) words;
424 : }
425 : #else
426 : /* Process in 32-bit chunks if the buffer is aligned. */
427 : if (buf == (const char *) TYPEALIGN(4, buf))
428 : {
429 : const uint32 *words = (const uint32 *) buf;
430 :
431 : while (bytes >= 4)
432 : {
433 : popcnt += pg_popcount32_slow(*words++);
434 : bytes -= 4;
435 : }
436 :
437 : buf = (const char *) words;
438 : }
439 : #endif
440 :
441 : /* Process any remaining bytes */
442 0 : while (bytes--)
443 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++];
444 :
445 0 : return popcnt;
446 : }
447 :
448 : /*
449 : * pg_popcount_masked_slow
450 : * Returns the number of 1-bits in buf after applying the mask to each byte
451 : */
452 : static uint64
453 0 : pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
454 : {
455 0 : uint64 popcnt = 0;
456 :
457 : #if SIZEOF_VOID_P >= 8
458 : /* Process in 64-bit chunks if the buffer is aligned */
459 0 : uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
460 :
461 0 : if (buf == (const char *) TYPEALIGN(8, buf))
462 : {
463 0 : const uint64 *words = (const uint64 *) buf;
464 :
465 0 : while (bytes >= 8)
466 : {
467 0 : popcnt += pg_popcount64_slow(*words++ & maskv);
468 0 : bytes -= 8;
469 : }
470 :
471 0 : buf = (const char *) words;
472 : }
473 : #else
474 : /* Process in 32-bit chunks if the buffer is aligned. */
475 : uint32 maskv = ~((uint32) 0) / 0xFF * mask;
476 :
477 : if (buf == (const char *) TYPEALIGN(4, buf))
478 : {
479 : const uint32 *words = (const uint32 *) buf;
480 :
481 : while (bytes >= 4)
482 : {
483 : popcnt += pg_popcount32_slow(*words++ & maskv);
484 : bytes -= 4;
485 : }
486 :
487 : buf = (const char *) words;
488 : }
489 : #endif
490 :
491 : /* Process any remaining bytes */
492 0 : while (bytes--)
493 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
494 :
495 0 : return popcnt;
496 : }
497 :
498 : #endif /* ! POPCNT_AARCH64 */
499 :
500 : #if !defined(TRY_POPCNT_X86_64) && !defined(POPCNT_AARCH64)
501 :
502 : /*
503 : * When special CPU instructions are not available, there's no point in using
504 : * function pointers to vary the implementation between the fast and slow
505 : * method. We instead just make these actual external functions. The compiler
506 : * should be able to inline the slow versions here.
507 : */
508 : int
509 : pg_popcount32(uint32 word)
510 : {
511 : return pg_popcount32_slow(word);
512 : }
513 :
514 : int
515 : pg_popcount64(uint64 word)
516 : {
517 : return pg_popcount64_slow(word);
518 : }
519 :
520 : /*
521 : * pg_popcount_optimized
522 : * Returns the number of 1-bits in buf
523 : */
524 : uint64
525 : pg_popcount_optimized(const char *buf, int bytes)
526 : {
527 : return pg_popcount_slow(buf, bytes);
528 : }
529 :
530 : /*
531 : * pg_popcount_masked_optimized
532 : * Returns the number of 1-bits in buf after applying the mask to each byte
533 : */
534 : uint64
535 : pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
536 : {
537 : return pg_popcount_masked_slow(buf, bytes, mask);
538 : }
539 :
540 : #endif /* ! TRY_POPCNT_X86_64 && ! POPCNT_AARCH64 */
|