Line data Source code
1 : /*--------------------------------------------------------------------------- 2 : * 3 : * Ryu floating-point output for double precision. 4 : * 5 : * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group 6 : * 7 : * IDENTIFICATION 8 : * src/common/d2s_intrinsics.h 9 : * 10 : * This is a modification of code taken from github.com/ulfjack/ryu under the 11 : * terms of the Boost license (not the Apache license). The original copyright 12 : * notice follows: 13 : * 14 : * Copyright 2018 Ulf Adams 15 : * 16 : * The contents of this file may be used under the terms of the Apache 17 : * License, Version 2.0. 18 : * 19 : * (See accompanying file LICENSE-Apache or copy at 20 : * http://www.apache.org/licenses/LICENSE-2.0) 21 : * 22 : * Alternatively, the contents of this file may be used under the terms of the 23 : * Boost Software License, Version 1.0. 24 : * 25 : * (See accompanying file LICENSE-Boost or copy at 26 : * https://www.boost.org/LICENSE_1_0.txt) 27 : * 28 : * Unless required by applicable law or agreed to in writing, this software is 29 : * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 30 : * KIND, either express or implied. 31 : * 32 : *--------------------------------------------------------------------------- 33 : */ 34 : #ifndef RYU_D2S_INTRINSICS_H 35 : #define RYU_D2S_INTRINSICS_H 36 : 37 : #if defined(HAS_64_BIT_INTRINSICS) 38 : 39 : #include <intrin.h> 40 : 41 : static inline uint64 42 : umul128(const uint64 a, const uint64 b, uint64 *const productHi) 43 : { 44 : return _umul128(a, b, productHi); 45 : } 46 : 47 : static inline uint64 48 : shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) 49 : { 50 : /* 51 : * For the __shiftright128 intrinsic, the shift value is always modulo 64. 52 : * In the current implementation of the double-precision version of Ryu, 53 : * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, 54 : * the shift value is in the range [49, 58]. Otherwise in the range [2, 55 : * 59].) Check this here in case a future change requires larger shift 56 : * values. In this case this function needs to be adjusted. 57 : */ 58 : Assert(dist < 64); 59 : return __shiftright128(lo, hi, (unsigned char) dist); 60 : } 61 : 62 : #else /* defined(HAS_64_BIT_INTRINSICS) */ 63 : 64 : static inline uint64 65 : umul128(const uint64 a, const uint64 b, uint64 *const productHi) 66 : { 67 : /* 68 : * The casts here help MSVC to avoid calls to the __allmul library 69 : * function. 70 : */ 71 : const uint32 aLo = (uint32) a; 72 : const uint32 aHi = (uint32) (a >> 32); 73 : const uint32 bLo = (uint32) b; 74 : const uint32 bHi = (uint32) (b >> 32); 75 : 76 : const uint64 b00 = (uint64) aLo * bLo; 77 : const uint64 b01 = (uint64) aLo * bHi; 78 : const uint64 b10 = (uint64) aHi * bLo; 79 : const uint64 b11 = (uint64) aHi * bHi; 80 : 81 : const uint32 b00Lo = (uint32) b00; 82 : const uint32 b00Hi = (uint32) (b00 >> 32); 83 : 84 : const uint64 mid1 = b10 + b00Hi; 85 : const uint32 mid1Lo = (uint32) (mid1); 86 : const uint32 mid1Hi = (uint32) (mid1 >> 32); 87 : 88 : const uint64 mid2 = b01 + mid1Lo; 89 : const uint32 mid2Lo = (uint32) (mid2); 90 : const uint32 mid2Hi = (uint32) (mid2 >> 32); 91 : 92 : const uint64 pHi = b11 + mid1Hi + mid2Hi; 93 : const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; 94 : 95 : *productHi = pHi; 96 : return pLo; 97 : } 98 : 99 : static inline uint64 100 : shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) 101 : { 102 : /* We don't need to handle the case dist >= 64 here (see above). */ 103 : Assert(dist < 64); 104 : #if !defined(RYU_32_BIT_PLATFORM) 105 : Assert(dist > 0); 106 : return (hi << (64 - dist)) | (lo >> dist); 107 : #else 108 : /* Avoid a 64-bit shift by taking advantage of the range of shift values. */ 109 : Assert(dist >= 32); 110 : return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); 111 : #endif 112 : } 113 : 114 : #endif /* // defined(HAS_64_BIT_INTRINSICS) */ 115 : 116 : #ifdef RYU_32_BIT_PLATFORM 117 : 118 : /* Returns the high 64 bits of the 128-bit product of a and b. */ 119 : static inline uint64 120 : umulh(const uint64 a, const uint64 b) 121 : { 122 : /* 123 : * Reuse the umul128 implementation. Optimizers will likely eliminate the 124 : * instructions used to compute the low part of the product. 125 : */ 126 : uint64 hi; 127 : 128 : umul128(a, b, &hi); 129 : return hi; 130 : } 131 : 132 : /*---- 133 : * On 32-bit platforms, compilers typically generate calls to library 134 : * functions for 64-bit divisions, even if the divisor is a constant. 135 : * 136 : * E.g.: 137 : * https://bugs.llvm.org/show_bug.cgi?id=37932 138 : * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 139 : * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 140 : * 141 : * The functions here perform division-by-constant using multiplications 142 : * in the same way as 64-bit compilers would do. 143 : * 144 : * NB: 145 : * The multipliers and shift values are the ones generated by clang x64 146 : * for expressions like x/5, x/10, etc. 147 : *---- 148 : */ 149 : 150 : static inline uint64 151 : div5(const uint64 x) 152 : { 153 : return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; 154 : } 155 : 156 : static inline uint64 157 : div10(const uint64 x) 158 : { 159 : return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; 160 : } 161 : 162 : static inline uint64 163 : div100(const uint64 x) 164 : { 165 : return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; 166 : } 167 : 168 : static inline uint64 169 : div1e8(const uint64 x) 170 : { 171 : return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; 172 : } 173 : 174 : #else /* RYU_32_BIT_PLATFORM */ 175 : 176 : static inline uint64 177 7322 : div5(const uint64 x) 178 : { 179 7322 : return x / 5; 180 : } 181 : 182 : static inline uint64 183 74553632 : div10(const uint64 x) 184 : { 185 74553632 : return x / 10; 186 : } 187 : 188 : static inline uint64 189 8755154 : div100(const uint64 x) 190 : { 191 8755154 : return x / 100; 192 : } 193 : 194 : static inline uint64 195 1710134 : div1e8(const uint64 x) 196 : { 197 1710134 : return x / 100000000; 198 : } 199 : 200 : #endif /* RYU_32_BIT_PLATFORM */ 201 : 202 : #endif /* RYU_D2S_INTRINSICS_H */