LCOV - code coverage report
Current view: top level - src/common - d2s_intrinsics.h (source / functions) Hit Total Coverage
Test: PostgreSQL 17devel Lines: 8 8 100.0 %
Date: 2024-04-26 18:11:23 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*---------------------------------------------------------------------------
       2             :  *
       3             :  * Ryu floating-point output for double precision.
       4             :  *
       5             :  * Portions Copyright (c) 2018-2024, PostgreSQL Global Development Group
       6             :  *
       7             :  * IDENTIFICATION
       8             :  *    src/common/d2s_intrinsics.h
       9             :  *
      10             :  * This is a modification of code taken from github.com/ulfjack/ryu under the
      11             :  * terms of the Boost license (not the Apache license). The original copyright
      12             :  * notice follows:
      13             :  *
      14             :  * Copyright 2018 Ulf Adams
      15             :  *
      16             :  * The contents of this file may be used under the terms of the Apache
      17             :  * License, Version 2.0.
      18             :  *
      19             :  *     (See accompanying file LICENSE-Apache or copy at
      20             :  *      http://www.apache.org/licenses/LICENSE-2.0)
      21             :  *
      22             :  * Alternatively, the contents of this file may be used under the terms of the
      23             :  * Boost Software License, Version 1.0.
      24             :  *
      25             :  *     (See accompanying file LICENSE-Boost or copy at
      26             :  *      https://www.boost.org/LICENSE_1_0.txt)
      27             :  *
      28             :  * Unless required by applicable law or agreed to in writing, this software is
      29             :  * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
      30             :  * KIND, either express or implied.
      31             :  *
      32             :  *---------------------------------------------------------------------------
      33             :  */
      34             : #ifndef RYU_D2S_INTRINSICS_H
      35             : #define RYU_D2S_INTRINSICS_H
      36             : 
      37             : #if defined(HAS_64_BIT_INTRINSICS)
      38             : 
      39             : #include <intrin.h>
      40             : 
      41             : static inline uint64
      42             : umul128(const uint64 a, const uint64 b, uint64 *const productHi)
      43             : {
      44             :     return _umul128(a, b, productHi);
      45             : }
      46             : 
      47             : static inline uint64
      48             : shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
      49             : {
      50             :     /*
      51             :      * For the __shiftright128 intrinsic, the shift value is always modulo 64.
      52             :      * In the current implementation of the double-precision version of Ryu,
      53             :      * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0,
      54             :      * the shift value is in the range [49, 58]. Otherwise in the range [2,
      55             :      * 59].) Check this here in case a future change requires larger shift
      56             :      * values. In this case this function needs to be adjusted.
      57             :      */
      58             :     Assert(dist < 64);
      59             :     return __shiftright128(lo, hi, (unsigned char) dist);
      60             : }
      61             : 
      62             : #else                           /* defined(HAS_64_BIT_INTRINSICS) */
      63             : 
      64             : static inline uint64
      65             : umul128(const uint64 a, const uint64 b, uint64 *const productHi)
      66             : {
      67             :     /*
      68             :      * The casts here help MSVC to avoid calls to the __allmul library
      69             :      * function.
      70             :      */
      71             :     const uint32 aLo = (uint32) a;
      72             :     const uint32 aHi = (uint32) (a >> 32);
      73             :     const uint32 bLo = (uint32) b;
      74             :     const uint32 bHi = (uint32) (b >> 32);
      75             : 
      76             :     const uint64 b00 = (uint64) aLo * bLo;
      77             :     const uint64 b01 = (uint64) aLo * bHi;
      78             :     const uint64 b10 = (uint64) aHi * bLo;
      79             :     const uint64 b11 = (uint64) aHi * bHi;
      80             : 
      81             :     const uint32 b00Lo = (uint32) b00;
      82             :     const uint32 b00Hi = (uint32) (b00 >> 32);
      83             : 
      84             :     const uint64 mid1 = b10 + b00Hi;
      85             :     const uint32 mid1Lo = (uint32) (mid1);
      86             :     const uint32 mid1Hi = (uint32) (mid1 >> 32);
      87             : 
      88             :     const uint64 mid2 = b01 + mid1Lo;
      89             :     const uint32 mid2Lo = (uint32) (mid2);
      90             :     const uint32 mid2Hi = (uint32) (mid2 >> 32);
      91             : 
      92             :     const uint64 pHi = b11 + mid1Hi + mid2Hi;
      93             :     const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo;
      94             : 
      95             :     *productHi = pHi;
      96             :     return pLo;
      97             : }
      98             : 
      99             : static inline uint64
     100             : shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
     101             : {
     102             :     /* We don't need to handle the case dist >= 64 here (see above). */
     103             :     Assert(dist < 64);
     104             : #if !defined(RYU_32_BIT_PLATFORM)
     105             :     Assert(dist > 0);
     106             :     return (hi << (64 - dist)) | (lo >> dist);
     107             : #else
     108             :     /* Avoid a 64-bit shift by taking advantage of the range of shift values. */
     109             :     Assert(dist >= 32);
     110             :     return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32));
     111             : #endif
     112             : }
     113             : 
     114             : #endif                          /* // defined(HAS_64_BIT_INTRINSICS) */
     115             : 
     116             : #ifdef RYU_32_BIT_PLATFORM
     117             : 
     118             : /*  Returns the high 64 bits of the 128-bit product of a and b. */
     119             : static inline uint64
     120             : umulh(const uint64 a, const uint64 b)
     121             : {
     122             :     /*
     123             :      * Reuse the umul128 implementation. Optimizers will likely eliminate the
     124             :      * instructions used to compute the low part of the product.
     125             :      */
     126             :     uint64      hi;
     127             : 
     128             :     umul128(a, b, &hi);
     129             :     return hi;
     130             : }
     131             : 
     132             : /*----
     133             :  *  On 32-bit platforms, compilers typically generate calls to library
     134             :  *  functions for 64-bit divisions, even if the divisor is a constant.
     135             :  *
     136             :  *  E.g.:
     137             :  *  https://bugs.llvm.org/show_bug.cgi?id=37932
     138             :  *  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
     139             :  *  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
     140             :  *
     141             :  *  The functions here perform division-by-constant using multiplications
     142             :  *  in the same way as 64-bit compilers would do.
     143             :  *
     144             :  *  NB:
     145             :  *  The multipliers and shift values are the ones generated by clang x64
     146             :  *  for expressions like x/5, x/10, etc.
     147             :  *----
     148             :  */
     149             : 
     150             : static inline uint64
     151             : div5(const uint64 x)
     152             : {
     153             :     return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2;
     154             : }
     155             : 
     156             : static inline uint64
     157             : div10(const uint64 x)
     158             : {
     159             :     return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3;
     160             : }
     161             : 
     162             : static inline uint64
     163             : div100(const uint64 x)
     164             : {
     165             :     return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2;
     166             : }
     167             : 
     168             : static inline uint64
     169             : div1e8(const uint64 x)
     170             : {
     171             :     return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26;
     172             : }
     173             : 
     174             : #else                           /* RYU_32_BIT_PLATFORM */
     175             : 
     176             : static inline uint64
     177        7322 : div5(const uint64 x)
     178             : {
     179        7322 :     return x / 5;
     180             : }
     181             : 
     182             : static inline uint64
     183    72241348 : div10(const uint64 x)
     184             : {
     185    72241348 :     return x / 10;
     186             : }
     187             : 
     188             : static inline uint64
     189     8059246 : div100(const uint64 x)
     190             : {
     191     8059246 :     return x / 100;
     192             : }
     193             : 
     194             : static inline uint64
     195     1502388 : div1e8(const uint64 x)
     196             : {
     197     1502388 :     return x / 100000000;
     198             : }
     199             : 
     200             : #endif                          /* RYU_32_BIT_PLATFORM */
     201             : 
     202             : #endif                          /* RYU_D2S_INTRINSICS_H */

Generated by: LCOV version 1.14