Line data Source code
1 : /*------------------------------------------------------------------------- 2 : * 3 : * pg_crc32c_sse42_choose.c 4 : * Choose between Intel SSE 4.2 and software CRC-32C implementation. 5 : * 6 : * On first call, checks if the CPU we're running on supports Intel SSE 7 : * 4.2. If it does, use the special SSE instructions for CRC-32C 8 : * computation. Otherwise, fall back to the pure software implementation 9 : * (slicing-by-8). 10 : * 11 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group 12 : * Portions Copyright (c) 1994, Regents of the University of California 13 : * 14 : * 15 : * IDENTIFICATION 16 : * src/port/pg_crc32c_sse42_choose.c 17 : * 18 : *------------------------------------------------------------------------- 19 : */ 20 : 21 : #include "c.h" 22 : 23 : #if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) 24 : #include <cpuid.h> 25 : #endif 26 : 27 : #if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) 28 : #include <intrin.h> 29 : #endif 30 : 31 : #ifdef HAVE_XSAVE_INTRINSICS 32 : #include <immintrin.h> 33 : #endif 34 : 35 : #include "port/pg_crc32c.h" 36 : 37 : /* 38 : * Does XGETBV say the ZMM registers are enabled? 39 : * 40 : * NB: Caller is responsible for verifying that osxsave is available 41 : * before calling this. 42 : */ 43 : #ifdef HAVE_XSAVE_INTRINSICS 44 : pg_attribute_target("xsave") 45 : #endif 46 : static bool 47 2740 : zmm_regs_available(void) 48 : { 49 : #ifdef HAVE_XSAVE_INTRINSICS 50 2740 : return (_xgetbv(0) & 0xe6) == 0xe6; 51 : #else 52 : return false; 53 : #endif 54 : } 55 : 56 : /* 57 : * This gets called on the first call. It replaces the function pointer 58 : * so that subsequent calls are routed directly to the chosen implementation. 59 : */ 60 : static pg_crc32c 61 2740 : pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) 62 : { 63 2740 : unsigned int exx[4] = {0, 0, 0, 0}; 64 : 65 : /* 66 : * Set fallback. We must guard since slicing-by-8 is not visible 67 : * everywhere. 68 : */ 69 : #ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 70 2740 : pg_comp_crc32c = pg_comp_crc32c_sb8; 71 : #endif 72 : 73 : #if defined(HAVE__GET_CPUID) 74 2740 : __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); 75 : #elif defined(HAVE__CPUID) 76 : __cpuid(exx, 1); 77 : #else 78 : #error cpuid instruction not available 79 : #endif 80 : 81 2740 : if ((exx[2] & (1 << 20)) != 0) /* SSE 4.2 */ 82 : { 83 2740 : pg_comp_crc32c = pg_comp_crc32c_sse42; 84 : 85 5480 : if (exx[2] & (1 << 27) && /* OSXSAVE */ 86 2740 : zmm_regs_available()) 87 : { 88 : /* second cpuid call on leaf 7 to check extended AVX-512 support */ 89 : 90 2740 : memset(exx, 0, 4 * sizeof(exx[0])); 91 : 92 : #if defined(HAVE__GET_CPUID_COUNT) 93 2740 : __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); 94 : #elif defined(HAVE__CPUIDEX) 95 : __cpuidex(exx, 7, 0); 96 : #endif 97 : 98 : #ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK 99 2740 : if (exx[2] & (1 << 10) && /* VPCLMULQDQ */ 100 0 : exx[1] & (1 << 31)) /* AVX512-VL */ 101 0 : pg_comp_crc32c = pg_comp_crc32c_avx512; 102 : #endif 103 : } 104 : } 105 : 106 2740 : return pg_comp_crc32c(crc, data, len); 107 : } 108 : 109 : pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;