Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pg_bitutils.c
4 : * Miscellaneous functions for bit-wise operations.
5 : *
6 : * Copyright (c) 2019-2024, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/port/pg_bitutils.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "c.h"
14 :
15 : #ifdef HAVE__GET_CPUID
16 : #include <cpuid.h>
17 : #endif
18 : #ifdef HAVE__CPUID
19 : #include <intrin.h>
20 : #endif
21 :
22 : #include "port/pg_bitutils.h"
23 :
24 :
25 : /*
26 : * Array giving the position of the left-most set bit for each possible
27 : * byte value. We count the right-most position as the 0th bit, and the
28 : * left-most the 7th bit. The 0th entry of the array should not be used.
29 : *
30 : * Note: this is not used by the functions in pg_bitutils.h when
31 : * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 : * extensions possibly compiled with a different compiler can use it.
33 : */
34 : const uint8 pg_leftmost_one_pos[256] = {
35 : 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 : 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 : 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 : 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 : 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 : 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51 : };
52 :
53 : /*
54 : * Array giving the position of the right-most set bit for each possible
55 : * byte value. We count the right-most position as the 0th bit, and the
56 : * left-most the 7th bit. The 0th entry of the array should not be used.
57 : *
58 : * Note: this is not used by the functions in pg_bitutils.h when
59 : * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 : * extensions possibly compiled with a different compiler can use it.
61 : */
62 : const uint8 pg_rightmost_one_pos[256] = {
63 : 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 : 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 : 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 : 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 : 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 : 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79 : };
80 :
81 : /*
82 : * Array giving the number of 1-bits in each possible byte value.
83 : *
84 : * Note: we export this for use by functions in which explicit use
85 : * of the popcount functions seems unlikely to be a win.
86 : */
87 : const uint8 pg_number_of_ones[256] = {
88 : 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 : 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 : 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 : 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 : 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104 : };
105 :
106 : static inline int pg_popcount32_slow(uint32 word);
107 : static inline int pg_popcount64_slow(uint64 word);
108 : static uint64 pg_popcount_slow(const char *buf, int bytes);
109 : static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
110 :
111 : #ifdef TRY_POPCNT_FAST
112 : static bool pg_popcount_available(void);
113 : static int pg_popcount32_choose(uint32 word);
114 : static int pg_popcount64_choose(uint64 word);
115 : static uint64 pg_popcount_choose(const char *buf, int bytes);
116 : static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
117 : static inline int pg_popcount32_fast(uint32 word);
118 : static inline int pg_popcount64_fast(uint64 word);
119 : static uint64 pg_popcount_fast(const char *buf, int bytes);
120 : static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
121 :
122 : int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
123 : int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
124 : uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
125 : uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
126 : #endif /* TRY_POPCNT_FAST */
127 :
128 : #ifdef TRY_POPCNT_FAST
129 :
130 : /*
131 : * Return true if CPUID indicates that the POPCNT instruction is available.
132 : */
133 : static bool
134 12034 : pg_popcount_available(void)
135 : {
136 12034 : unsigned int exx[4] = {0, 0, 0, 0};
137 :
138 : #if defined(HAVE__GET_CPUID)
139 12034 : __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140 : #elif defined(HAVE__CPUID)
141 : __cpuid(exx, 1);
142 : #else
143 : #error cpuid instruction not available
144 : #endif
145 :
146 12034 : return (exx[2] & (1 << 23)) != 0; /* POPCNT */
147 : }
148 :
149 : /*
150 : * These functions get called on the first call to pg_popcount32 etc.
151 : * They detect whether we can use the asm implementations, and replace
152 : * the function pointers so that subsequent calls are routed directly to
153 : * the chosen implementation.
154 : */
155 : static inline void
156 12034 : choose_popcount_functions(void)
157 : {
158 12034 : if (pg_popcount_available())
159 : {
160 12034 : pg_popcount32 = pg_popcount32_fast;
161 12034 : pg_popcount64 = pg_popcount64_fast;
162 12034 : pg_popcount_optimized = pg_popcount_fast;
163 12034 : pg_popcount_masked_optimized = pg_popcount_masked_fast;
164 : }
165 : else
166 : {
167 0 : pg_popcount32 = pg_popcount32_slow;
168 0 : pg_popcount64 = pg_popcount64_slow;
169 0 : pg_popcount_optimized = pg_popcount_slow;
170 0 : pg_popcount_masked_optimized = pg_popcount_masked_slow;
171 : }
172 :
173 : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 12034 : if (pg_popcount_avx512_available())
175 : {
176 0 : pg_popcount_optimized = pg_popcount_avx512;
177 0 : pg_popcount_masked_optimized = pg_popcount_masked_avx512;
178 : }
179 : #endif
180 12034 : }
181 :
182 : static int
183 0 : pg_popcount32_choose(uint32 word)
184 : {
185 0 : choose_popcount_functions();
186 0 : return pg_popcount32(word);
187 : }
188 :
189 : static int
190 11098 : pg_popcount64_choose(uint64 word)
191 : {
192 11098 : choose_popcount_functions();
193 11098 : return pg_popcount64(word);
194 : }
195 :
196 : static uint64
197 10 : pg_popcount_choose(const char *buf, int bytes)
198 : {
199 10 : choose_popcount_functions();
200 10 : return pg_popcount_optimized(buf, bytes);
201 : }
202 :
203 : static uint64
204 926 : pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
205 : {
206 926 : choose_popcount_functions();
207 926 : return pg_popcount_masked(buf, bytes, mask);
208 : }
209 :
210 : /*
211 : * pg_popcount32_fast
212 : * Return the number of 1 bits set in word
213 : */
214 : static inline int
215 0 : pg_popcount32_fast(uint32 word)
216 : {
217 : #ifdef _MSC_VER
218 : return __popcnt(word);
219 : #else
220 : uint32 res;
221 :
222 0 : __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
223 0 : return (int) res;
224 : #endif
225 : }
226 :
227 : /*
228 : * pg_popcount64_fast
229 : * Return the number of 1 bits set in word
230 : */
231 : static inline int
232 46051852 : pg_popcount64_fast(uint64 word)
233 : {
234 : #ifdef _MSC_VER
235 : return __popcnt64(word);
236 : #else
237 : uint64 res;
238 :
239 46051852 : __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
240 46051852 : return (int) res;
241 : #endif
242 : }
243 :
244 : /*
245 : * pg_popcount_fast
246 : * Returns the number of 1-bits in buf
247 : */
248 : static uint64
249 28 : pg_popcount_fast(const char *buf, int bytes)
250 : {
251 28 : uint64 popcnt = 0;
252 :
253 : #if SIZEOF_VOID_P >= 8
254 : /* Process in 64-bit chunks if the buffer is aligned. */
255 28 : if (buf == (const char *) TYPEALIGN(8, buf))
256 : {
257 28 : const uint64 *words = (const uint64 *) buf;
258 :
259 524508 : while (bytes >= 8)
260 : {
261 524480 : popcnt += pg_popcount64_fast(*words++);
262 524480 : bytes -= 8;
263 : }
264 :
265 28 : buf = (const char *) words;
266 : }
267 : #else
268 : /* Process in 32-bit chunks if the buffer is aligned. */
269 : if (buf == (const char *) TYPEALIGN(4, buf))
270 : {
271 : const uint32 *words = (const uint32 *) buf;
272 :
273 : while (bytes >= 4)
274 : {
275 : popcnt += pg_popcount32_fast(*words++);
276 : bytes -= 4;
277 : }
278 :
279 : buf = (const char *) words;
280 : }
281 : #endif
282 :
283 : /* Process any remaining bytes */
284 148 : while (bytes--)
285 120 : popcnt += pg_number_of_ones[(unsigned char) *buf++];
286 :
287 28 : return popcnt;
288 : }
289 :
290 : /*
291 : * pg_popcount_masked_fast
292 : * Returns the number of 1-bits in buf after applying the mask to each byte
293 : */
294 : static uint64
295 43024 : pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
296 : {
297 43024 : uint64 popcnt = 0;
298 :
299 : #if SIZEOF_VOID_P >= 8
300 : /* Process in 64-bit chunks if the buffer is aligned */
301 43024 : uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
302 :
303 43024 : if (buf == (const char *) TYPEALIGN(8, buf))
304 : {
305 43024 : const uint64 *words = (const uint64 *) buf;
306 :
307 43970528 : while (bytes >= 8)
308 : {
309 43927504 : popcnt += pg_popcount64_fast(*words++ & maskv);
310 43927504 : bytes -= 8;
311 : }
312 :
313 43024 : buf = (const char *) words;
314 : }
315 : #else
316 : /* Process in 32-bit chunks if the buffer is aligned. */
317 : uint32 maskv = ~((uint32) 0) / 0xFF * mask;
318 :
319 : if (buf == (const char *) TYPEALIGN(4, buf))
320 : {
321 : const uint32 *words = (const uint32 *) buf;
322 :
323 : while (bytes >= 4)
324 : {
325 : popcnt += pg_popcount32_fast(*words++ & maskv);
326 : bytes -= 4;
327 : }
328 :
329 : buf = (const char *) words;
330 : }
331 : #endif
332 :
333 : /* Process any remaining bytes */
334 43024 : while (bytes--)
335 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
336 :
337 43024 : return popcnt;
338 : }
339 :
340 : #endif /* TRY_POPCNT_FAST */
341 :
342 :
343 : /*
344 : * pg_popcount32_slow
345 : * Return the number of 1 bits set in word
346 : */
347 : static inline int
348 0 : pg_popcount32_slow(uint32 word)
349 : {
350 : #ifdef HAVE__BUILTIN_POPCOUNT
351 0 : return __builtin_popcount(word);
352 : #else /* !HAVE__BUILTIN_POPCOUNT */
353 : int result = 0;
354 :
355 : while (word != 0)
356 : {
357 : result += pg_number_of_ones[word & 255];
358 : word >>= 8;
359 : }
360 :
361 : return result;
362 : #endif /* HAVE__BUILTIN_POPCOUNT */
363 : }
364 :
365 : /*
366 : * pg_popcount64_slow
367 : * Return the number of 1 bits set in word
368 : */
369 : static inline int
370 0 : pg_popcount64_slow(uint64 word)
371 : {
372 : #ifdef HAVE__BUILTIN_POPCOUNT
373 : #if defined(HAVE_LONG_INT_64)
374 0 : return __builtin_popcountl(word);
375 : #elif defined(HAVE_LONG_LONG_INT_64)
376 : return __builtin_popcountll(word);
377 : #else
378 : #error must have a working 64-bit integer datatype
379 : #endif
380 : #else /* !HAVE__BUILTIN_POPCOUNT */
381 : int result = 0;
382 :
383 : while (word != 0)
384 : {
385 : result += pg_number_of_ones[word & 255];
386 : word >>= 8;
387 : }
388 :
389 : return result;
390 : #endif /* HAVE__BUILTIN_POPCOUNT */
391 : }
392 :
393 : /*
394 : * pg_popcount_slow
395 : * Returns the number of 1-bits in buf
396 : */
397 : static uint64
398 0 : pg_popcount_slow(const char *buf, int bytes)
399 : {
400 0 : uint64 popcnt = 0;
401 :
402 : #if SIZEOF_VOID_P >= 8
403 : /* Process in 64-bit chunks if the buffer is aligned. */
404 0 : if (buf == (const char *) TYPEALIGN(8, buf))
405 : {
406 0 : const uint64 *words = (const uint64 *) buf;
407 :
408 0 : while (bytes >= 8)
409 : {
410 0 : popcnt += pg_popcount64_slow(*words++);
411 0 : bytes -= 8;
412 : }
413 :
414 0 : buf = (const char *) words;
415 : }
416 : #else
417 : /* Process in 32-bit chunks if the buffer is aligned. */
418 : if (buf == (const char *) TYPEALIGN(4, buf))
419 : {
420 : const uint32 *words = (const uint32 *) buf;
421 :
422 : while (bytes >= 4)
423 : {
424 : popcnt += pg_popcount32_slow(*words++);
425 : bytes -= 4;
426 : }
427 :
428 : buf = (const char *) words;
429 : }
430 : #endif
431 :
432 : /* Process any remaining bytes */
433 0 : while (bytes--)
434 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++];
435 :
436 0 : return popcnt;
437 : }
438 :
439 : /*
440 : * pg_popcount_masked_slow
441 : * Returns the number of 1-bits in buf after applying the mask to each byte
442 : */
443 : static uint64
444 0 : pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
445 : {
446 0 : uint64 popcnt = 0;
447 :
448 : #if SIZEOF_VOID_P >= 8
449 : /* Process in 64-bit chunks if the buffer is aligned */
450 0 : uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
451 :
452 0 : if (buf == (const char *) TYPEALIGN(8, buf))
453 : {
454 0 : const uint64 *words = (const uint64 *) buf;
455 :
456 0 : while (bytes >= 8)
457 : {
458 0 : popcnt += pg_popcount64_slow(*words++ & maskv);
459 0 : bytes -= 8;
460 : }
461 :
462 0 : buf = (const char *) words;
463 : }
464 : #else
465 : /* Process in 32-bit chunks if the buffer is aligned. */
466 : uint32 maskv = ~((uint32) 0) / 0xFF * mask;
467 :
468 : if (buf == (const char *) TYPEALIGN(4, buf))
469 : {
470 : const uint32 *words = (const uint32 *) buf;
471 :
472 : while (bytes >= 4)
473 : {
474 : popcnt += pg_popcount32_slow(*words++ & maskv);
475 : bytes -= 4;
476 : }
477 :
478 : buf = (const char *) words;
479 : }
480 : #endif
481 :
482 : /* Process any remaining bytes */
483 0 : while (bytes--)
484 0 : popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
485 :
486 0 : return popcnt;
487 : }
488 :
489 : #ifndef TRY_POPCNT_FAST
490 :
491 : /*
492 : * When the POPCNT instruction is not available, there's no point in using
493 : * function pointers to vary the implementation between the fast and slow
494 : * method. We instead just make these actual external functions when
495 : * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
496 : * the slow versions here.
497 : */
498 : int
499 : pg_popcount32(uint32 word)
500 : {
501 : return pg_popcount32_slow(word);
502 : }
503 :
504 : int
505 : pg_popcount64(uint64 word)
506 : {
507 : return pg_popcount64_slow(word);
508 : }
509 :
510 : /*
511 : * pg_popcount_optimized
512 : * Returns the number of 1-bits in buf
513 : */
514 : uint64
515 : pg_popcount_optimized(const char *buf, int bytes)
516 : {
517 : return pg_popcount_slow(buf, bytes);
518 : }
519 :
520 : /*
521 : * pg_popcount_masked_optimized
522 : * Returns the number of 1-bits in buf after applying the mask to each byte
523 : */
524 : uint64
525 : pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
526 : {
527 : return pg_popcount_masked_slow(buf, bytes, mask);
528 : }
529 :
530 : #endif /* !TRY_POPCNT_FAST */
|