LCOV - code coverage report
Current view: top level - src/include/port - simd.h (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 60 60 100.0 %
Date: 2026-02-02 14:17:46 Functions: 25 25 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * simd.h
       4             :  *    Support for platform-specific vector operations.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * src/include/port/simd.h
      10             :  *
      11             :  * NOTES
      12             :  * - VectorN in this file refers to a register where the element operands
      13             :  * are N bits wide. The vector width is platform-specific, so users that care
      14             :  * about that will need to inspect "sizeof(VectorN)".
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #ifndef SIMD_H
      19             : #define SIMD_H
      20             : 
      21             : #if defined(USE_SSE2)
      22             : /*
      23             :  * We use emmintrin.h rather than the comprehensive header immintrin.h in
      24             :  * order to exclude extensions beyond SSE2. This is because MSVC, at least,
      25             :  * will allow the use of intrinsics that haven't been enabled at compile
      26             :  * time.
      27             :  */
      28             : #include <emmintrin.h>
      29             : typedef __m128i Vector8;
      30             : typedef __m128i Vector32;
      31             : 
      32             : #elif defined(USE_NEON)
      33             : #include <arm_neon.h>
      34             : typedef uint8x16_t Vector8;
      35             : typedef uint32x4_t Vector32;
      36             : 
      37             : #else
      38             : /*
      39             :  * If no SIMD instructions are available, we can in some cases emulate vector
      40             :  * operations using bitwise operations on unsigned integers.  Note that many
      41             :  * of the functions in this file presently do not have non-SIMD
      42             :  * implementations.  In particular, none of the functions involving Vector32
      43             :  * are implemented without SIMD since it's likely not worthwhile to represent
      44             :  * two 32-bit integers using a uint64.
      45             :  */
      46             : #define USE_NO_SIMD
      47             : typedef uint64 Vector8;
      48             : #endif
      49             : 
      50             : /* load/store operations */
      51             : static inline void vector8_load(Vector8 *v, const uint8 *s);
      52             : #ifndef USE_NO_SIMD
      53             : static inline void vector32_load(Vector32 *v, const uint32 *s);
      54             : #endif
      55             : 
      56             : /* assignment operations */
      57             : static inline Vector8 vector8_broadcast(const uint8 c);
      58             : #ifndef USE_NO_SIMD
      59             : static inline Vector32 vector32_broadcast(const uint32 c);
      60             : #endif
      61             : 
      62             : /* element-wise comparisons to a scalar */
      63             : static inline bool vector8_has(const Vector8 v, const uint8 c);
      64             : static inline bool vector8_has_zero(const Vector8 v);
      65             : static inline bool vector8_has_le(const Vector8 v, const uint8 c);
      66             : static inline bool vector8_is_highbit_set(const Vector8 v);
      67             : #ifndef USE_NO_SIMD
      68             : static inline bool vector32_is_highbit_set(const Vector32 v);
      69             : static inline uint32 vector8_highbit_mask(const Vector8 v);
      70             : #endif
      71             : 
      72             : /* arithmetic operations */
      73             : static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
      74             : #ifndef USE_NO_SIMD
      75             : static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
      76             : #endif
      77             : 
      78             : /*
      79             :  * comparisons between vectors
      80             :  *
      81             :  * Note: These return a vector rather than boolean, which is why we don't
      82             :  * have non-SIMD implementations.
      83             :  */
      84             : #ifndef USE_NO_SIMD
      85             : static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2);
      86             : static inline Vector8 vector8_min(const Vector8 v1, const Vector8 v2);
      87             : static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2);
      88             : #endif
      89             : 
      90             : /*
      91             :  * Load a chunk of memory into the given vector.
      92             :  */
      93             : static inline void
      94    29842100 : vector8_load(Vector8 *v, const uint8 *s)
      95             : {
      96             : #if defined(USE_SSE2)
      97    29842100 :     *v = _mm_loadu_si128((const __m128i *) s);
      98             : #elif defined(USE_NEON)
      99             :     *v = vld1q_u8(s);
     100             : #else
     101             :     memcpy(v, s, sizeof(Vector8));
     102             : #endif
     103    29842100 : }
     104             : 
     105             : #ifndef USE_NO_SIMD
     106             : static inline void
     107        1184 : vector32_load(Vector32 *v, const uint32 *s)
     108             : {
     109             : #ifdef USE_SSE2
     110        1184 :     *v = _mm_loadu_si128((const __m128i *) s);
     111             : #elif defined(USE_NEON)
     112             :     *v = vld1q_u32(s);
     113             : #endif
     114        1184 : }
     115             : #endif                          /* ! USE_NO_SIMD */
     116             : 
     117             : /*
     118             :  * Store a vector into the given memory address.
     119             :  */
     120             : #ifndef USE_NO_SIMD
     121             : static inline void
     122     4635480 : vector8_store(uint8 *s, Vector8 v)
     123             : {
     124             : #ifdef USE_SSE2
     125             :     _mm_storeu_si128((Vector8 *) s, v);
     126             : #elif defined(USE_NEON)
     127             :     vst1q_u8(s, v);
     128             : #endif
     129     4635480 : }
     130             : #endif                          /* ! USE_NO_SIMD */
     131             : 
     132             : /*
     133             :  * Create a vector with all elements set to the same value.
     134             :  */
     135             : static inline Vector8
     136    48039362 : vector8_broadcast(const uint8 c)
     137             : {
     138             : #if defined(USE_SSE2)
     139    96078724 :     return _mm_set1_epi8(c);
     140             : #elif defined(USE_NEON)
     141             :     return vdupq_n_u8(c);
     142             : #else
     143             :     return ~UINT64CONST(0) / 0xFF * c;
     144             : #endif
     145             : }
     146             : 
     147             : #ifndef USE_NO_SIMD
     148             : static inline Vector32
     149    19538992 : vector32_broadcast(const uint32 c)
     150             : {
     151             : #ifdef USE_SSE2
     152    39077984 :     return _mm_set1_epi32(c);
     153             : #elif defined(USE_NEON)
     154             :     return vdupq_n_u32(c);
     155             : #endif
     156             : }
     157             : #endif                          /* ! USE_NO_SIMD */
     158             : 
     159             : /*
     160             :  * Return true if any elements in the vector are equal to the given scalar.
     161             :  */
     162             : static inline bool
     163     6773288 : vector8_has(const Vector8 v, const uint8 c)
     164             : {
     165             :     bool        result;
     166             : 
     167             :     /* pre-compute the result for assert checking */
     168             : #ifdef USE_ASSERT_CHECKING
     169             :     bool        assert_result = false;
     170             : 
     171             :     for (Size i = 0; i < sizeof(Vector8); i++)
     172             :     {
     173             :         if (((const uint8 *) &v)[i] == c)
     174             :         {
     175             :             assert_result = true;
     176             :             break;
     177             :         }
     178             :     }
     179             : #endif                          /* USE_ASSERT_CHECKING */
     180             : 
     181             : #if defined(USE_NO_SIMD)
     182             :     /* any bytes in v equal to c will evaluate to zero via XOR */
     183             :     result = vector8_has_zero(v ^ vector8_broadcast(c));
     184             : #else
     185     6773288 :     result = vector8_is_highbit_set(vector8_eq(v, vector8_broadcast(c)));
     186             : #endif
     187             : 
     188             :     Assert(assert_result == result);
     189     6773288 :     return result;
     190             : }
     191             : 
     192             : /*
     193             :  * Convenience function equivalent to vector8_has(v, 0)
     194             :  */
     195             : static inline bool
     196             : vector8_has_zero(const Vector8 v)
     197             : {
     198             : #if defined(USE_NO_SIMD)
     199             :     /*
     200             :      * We cannot call vector8_has() here, because that would lead to a
     201             :      * circular definition.
     202             :      */
     203             :     return vector8_has_le(v, 0);
     204             : #else
     205             :     return vector8_has(v, 0);
     206             : #endif
     207             : }
     208             : 
     209             : /*
     210             :  * Return true if any elements in the vector are less than or equal to the
     211             :  * given scalar.
     212             :  */
     213             : static inline bool
     214      836288 : vector8_has_le(const Vector8 v, const uint8 c)
     215             : {
     216      836288 :     bool        result = false;
     217             : #ifdef USE_SSE2
     218             :     Vector8     umin;
     219             :     Vector8     cmpe;
     220             : #endif
     221             : 
     222             :     /* pre-compute the result for assert checking */
     223             : #ifdef USE_ASSERT_CHECKING
     224             :     bool        assert_result = false;
     225             : 
     226             :     for (Size i = 0; i < sizeof(Vector8); i++)
     227             :     {
     228             :         if (((const uint8 *) &v)[i] <= c)
     229             :         {
     230             :             assert_result = true;
     231             :             break;
     232             :         }
     233             :     }
     234             : #endif                          /* USE_ASSERT_CHECKING */
     235             : 
     236             : #if defined(USE_NO_SIMD)
     237             : 
     238             :     /*
     239             :      * To find bytes <= c, we can use bitwise operations to find bytes < c+1,
     240             :      * but it only works if c+1 <= 128 and if the highest bit in v is not set.
     241             :      * Adapted from
     242             :      * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
     243             :      */
     244             :     if ((int64) v >= 0 && c < 0x80)
     245             :         result = (v - vector8_broadcast(c + 1)) & ~v & vector8_broadcast(0x80);
     246             :     else
     247             :     {
     248             :         /* one byte at a time */
     249             :         for (Size i = 0; i < sizeof(Vector8); i++)
     250             :         {
     251             :             if (((const uint8 *) &v)[i] <= c)
     252             :             {
     253             :                 result = true;
     254             :                 break;
     255             :             }
     256             :         }
     257             :     }
     258             : #elif defined(USE_SSE2)
     259      836288 :     umin = vector8_min(v, vector8_broadcast(c));
     260      836288 :     cmpe = vector8_eq(umin, v);
     261      836288 :     result = vector8_is_highbit_set(cmpe);
     262             : #elif defined(USE_NEON)
     263             :     result = vminvq_u8(v) <= c;
     264             : #endif
     265             : 
     266             :     Assert(assert_result == result);
     267      836288 :     return result;
     268             : }
     269             : 
     270             : /*
     271             :  * Returns true if any elements in the vector are greater than or equal to the
     272             :  * given scalar.
     273             :  */
     274             : #ifndef USE_NO_SIMD
     275             : static inline bool
     276      492216 : vector8_has_ge(const Vector8 v, const uint8 c)
     277             : {
     278             : #ifdef USE_SSE2
     279      492216 :     Vector8     umax = _mm_max_epu8(v, vector8_broadcast(c));
     280      492216 :     Vector8     cmpe = vector8_eq(umax, v);
     281             : 
     282      492216 :     return vector8_is_highbit_set(cmpe);
     283             : #elif defined(USE_NEON)
     284             :     return vmaxvq_u8(v) >= c;
     285             : #endif
     286             : }
     287             : #endif                          /* ! USE_NO_SIMD */
     288             : 
     289             : /*
     290             :  * Return true if the high bit of any element is set
     291             :  */
     292             : static inline bool
     293    11682614 : vector8_is_highbit_set(const Vector8 v)
     294             : {
     295             : #ifdef USE_SSE2
     296    11682614 :     return _mm_movemask_epi8(v) != 0;
     297             : #elif defined(USE_NEON)
     298             :     return vmaxvq_u8(v) > 0x7F;
     299             : #else
     300             :     return v & vector8_broadcast(0x80);
     301             : #endif
     302             : }
     303             : 
     304             : /*
     305             :  * Exactly like vector8_is_highbit_set except for the input type, so it
     306             :  * looks at each byte separately.
     307             :  *
     308             :  * XXX x86 uses the same underlying type for 8-bit, 16-bit, and 32-bit
     309             :  * integer elements, but Arm does not, hence the need for a separate
     310             :  * function. We could instead adopt the behavior of Arm's vmaxvq_u32(), i.e.
     311             :  * check each 32-bit element, but that would require an additional mask
     312             :  * operation on x86.
     313             :  */
     314             : #ifndef USE_NO_SIMD
     315             : static inline bool
     316         296 : vector32_is_highbit_set(const Vector32 v)
     317             : {
     318             : #if defined(USE_NEON)
     319             :     return vector8_is_highbit_set((Vector8) v);
     320             : #else
     321         296 :     return vector8_is_highbit_set(v);
     322             : #endif
     323             : }
     324             : #endif                          /* ! USE_NO_SIMD */
     325             : 
     326             : /*
     327             :  * Return a bitmask formed from the high-bit of each element.
     328             :  */
     329             : #ifndef USE_NO_SIMD
     330             : static inline uint32
     331    12448256 : vector8_highbit_mask(const Vector8 v)
     332             : {
     333             : #ifdef USE_SSE2
     334    12448256 :     return (uint32) _mm_movemask_epi8(v);
     335             : #elif defined(USE_NEON)
     336             :     /*
     337             :      * Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that
     338             :      * returns a uint64, making it inconvenient to combine mask values from
     339             :      * multiple vectors.
     340             :      */
     341             :     static const uint8 mask[16] = {
     342             :         1 << 0, 1 << 1, 1 << 2, 1 << 3,
     343             :         1 << 4, 1 << 5, 1 << 6, 1 << 7,
     344             :         1 << 0, 1 << 1, 1 << 2, 1 << 3,
     345             :         1 << 4, 1 << 5, 1 << 6, 1 << 7,
     346             :     };
     347             : 
     348             :     uint8x16_t  masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8((int8x16_t) v, 7));
     349             :     uint8x16_t  maskedhi = vextq_u8(masked, masked, 8);
     350             : 
     351             :     return (uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi));
     352             : #endif
     353             : }
     354             : #endif                          /* ! USE_NO_SIMD */
     355             : 
     356             : /*
     357             :  * Return the bitwise OR of the inputs
     358             :  */
     359             : static inline Vector8
     360    14814320 : vector8_or(const Vector8 v1, const Vector8 v2)
     361             : {
     362             : #ifdef USE_SSE2
     363    14814320 :     return _mm_or_si128(v1, v2);
     364             : #elif defined(USE_NEON)
     365             :     return vorrq_u8(v1, v2);
     366             : #else
     367             :     return v1 | v2;
     368             : #endif
     369             : }
     370             : 
     371             : #ifndef USE_NO_SIMD
     372             : static inline Vector32
     373         888 : vector32_or(const Vector32 v1, const Vector32 v2)
     374             : {
     375             : #ifdef USE_SSE2
     376         888 :     return _mm_or_si128(v1, v2);
     377             : #elif defined(USE_NEON)
     378             :     return vorrq_u32(v1, v2);
     379             : #endif
     380             : }
     381             : #endif                          /* ! USE_NO_SIMD */
     382             : 
     383             : /*
     384             :  * Return the bitwise AND of the inputs.
     385             :  */
     386             : #ifndef USE_NO_SIMD
     387             : static inline Vector8
     388    11239824 : vector8_and(const Vector8 v1, const Vector8 v2)
     389             : {
     390             : #ifdef USE_SSE2
     391    11239824 :     return _mm_and_si128(v1, v2);
     392             : #elif defined(USE_NEON)
     393             :     return vandq_u8(v1, v2);
     394             : #endif
     395             : }
     396             : #endif                          /* ! USE_NO_SIMD */
     397             : 
     398             : /*
     399             :  * Return the result of adding the respective elements of the input vectors.
     400             :  */
     401             : #ifndef USE_NO_SIMD
     402             : static inline Vector8
     403     9763176 : vector8_add(const Vector8 v1, const Vector8 v2)
     404             : {
     405             : #ifdef USE_SSE2
     406     9763176 :     return _mm_add_epi8(v1, v2);
     407             : #elif defined(USE_NEON)
     408             :     return vaddq_u8(v1, v2);
     409             : #endif
     410             : }
     411             : #endif                          /* ! USE_NO_SIMD */
     412             : 
     413             : /*
     414             :  * Return the result of subtracting the respective elements of the input
     415             :  * vectors using signed saturation (i.e., if the operation would yield a value
     416             :  * less than -128, -128 is returned instead).  For more information on
     417             :  * saturation arithmetic, see
     418             :  * https://en.wikipedia.org/wiki/Saturation_arithmetic
     419             :  */
     420             : #ifndef USE_NO_SIMD
     421             : static inline Vector8
     422      492216 : vector8_issub(const Vector8 v1, const Vector8 v2)
     423             : {
     424             : #ifdef USE_SSE2
     425      492216 :     return _mm_subs_epi8(v1, v2);
     426             : #elif defined(USE_NEON)
     427             :     return (Vector8) vqsubq_s8((int8x16_t) v1, (int8x16_t) v2);
     428             : #endif
     429             : }
     430             : #endif                          /* ! USE_NO_SIMD */
     431             : 
     432             : /*
     433             :  * Return a vector with all bits set in each lane where the corresponding
     434             :  * lanes in the inputs are equal.
     435             :  */
     436             : #ifndef USE_NO_SIMD
     437             : static inline Vector8
     438    27711100 : vector8_eq(const Vector8 v1, const Vector8 v2)
     439             : {
     440             : #ifdef USE_SSE2
     441    27711100 :     return _mm_cmpeq_epi8(v1, v2);
     442             : #elif defined(USE_NEON)
     443             :     return vceqq_u8(v1, v2);
     444             : #endif
     445             : }
     446             : #endif                          /* ! USE_NO_SIMD */
     447             : 
     448             : #ifndef USE_NO_SIMD
     449             : static inline Vector32
     450        1184 : vector32_eq(const Vector32 v1, const Vector32 v2)
     451             : {
     452             : #ifdef USE_SSE2
     453        1184 :     return _mm_cmpeq_epi32(v1, v2);
     454             : #elif defined(USE_NEON)
     455             :     return vceqq_u32(v1, v2);
     456             : #endif
     457             : }
     458             : #endif                          /* ! USE_NO_SIMD */
     459             : 
     460             : /*
     461             :  * Return a vector with all bits set for each lane of v1 that is greater than
     462             :  * the corresponding lane of v2.  NB: The comparison treats the elements as
     463             :  * signed.
     464             :  */
     465             : #ifndef USE_NO_SIMD
     466             : static inline Vector8
     467     5866020 : vector8_gt(const Vector8 v1, const Vector8 v2)
     468             : {
     469             : #ifdef USE_SSE2
     470     5866020 :     return _mm_cmpgt_epi8(v1, v2);
     471             : #elif defined(USE_NEON)
     472             :     return vcgtq_s8((int8x16_t) v1, (int8x16_t) v2);
     473             : #endif
     474             : }
     475             : #endif                          /* ! USE_NO_SIMD */
     476             : 
     477             : /*
     478             :  * Given two vectors, return a vector with the minimum element of each.
     479             :  */
     480             : #ifndef USE_NO_SIMD
     481             : static inline Vector8
     482     1045624 : vector8_min(const Vector8 v1, const Vector8 v2)
     483             : {
     484             : #ifdef USE_SSE2
     485     1045624 :     return _mm_min_epu8(v1, v2);
     486             : #elif defined(USE_NEON)
     487             :     return vminq_u8(v1, v2);
     488             : #endif
     489             : }
     490             : #endif                          /* ! USE_NO_SIMD */
     491             : 
     492             : /*
     493             :  * Interleave elements of low halves (e.g., for SSE2, bits 0-63) of given
     494             :  * vectors.  Bytes 0, 2, 4, etc. use v1, and bytes 1, 3, 5, etc. use v2.
     495             :  */
     496             : #ifndef USE_NO_SIMD
     497             : static inline Vector8
     498     3425226 : vector8_interleave_low(const Vector8 v1, const Vector8 v2)
     499             : {
     500             : #ifdef USE_SSE2
     501     3425226 :     return _mm_unpacklo_epi8(v1, v2);
     502             : #elif defined(USE_NEON)
     503             :     return vzip1q_u8(v1, v2);
     504             : #endif
     505             : }
     506             : #endif                          /* ! USE_NO_SIMD */
     507             : 
     508             : /*
     509             :  * Interleave elements of high halves (e.g., for SSE2, bits 64-127) of given
     510             :  * vectors.  Bytes 0, 2, 4, etc. use v1, and bytes 1, 3, 5, etc. use v2.
     511             :  */
     512             : #ifndef USE_NO_SIMD
     513             : static inline Vector8
     514     2194686 : vector8_interleave_high(const Vector8 v1, const Vector8 v2)
     515             : {
     516             : #ifdef USE_SSE2
     517     2194686 :     return _mm_unpackhi_epi8(v1, v2);
     518             : #elif defined(USE_NEON)
     519             :     return vzip2q_u8(v1, v2);
     520             : #endif
     521             : }
     522             : #endif                          /* ! USE_NO_SIMD */
     523             : 
     524             : /*
     525             :  * Pack 16-bit elements in the given vectors into a single vector of 8-bit
     526             :  * elements.  The first half of the return vector (e.g., for SSE2, bits 0-63)
     527             :  * uses v1, and the second half (e.g., for SSE2, bits 64-127) uses v2.
     528             :  *
     529             :  * NB: The upper 8-bits of each 16-bit element must be zeros, else this will
     530             :  * produce different results on different architectures.
     531             :  */
     532             : #ifndef USE_NO_SIMD
     533             : static inline Vector8
     534      246108 : vector8_pack_16(const Vector8 v1, const Vector8 v2)
     535             : {
     536             :     Vector8     mask PG_USED_FOR_ASSERTS_ONLY;
     537             : 
     538      246108 :     mask = vector8_interleave_low(vector8_broadcast(0), vector8_broadcast(0xff));
     539             :     Assert(!vector8_has_ge(vector8_and(v1, mask), 1));
     540             :     Assert(!vector8_has_ge(vector8_and(v2, mask), 1));
     541             : #ifdef USE_SSE2
     542      246108 :     return _mm_packus_epi16(v1, v2);
     543             : #elif defined(USE_NEON)
     544             :     return vuzp1q_u8(v1, v2);
     545             : #endif
     546             : }
     547             : #endif                          /* ! USE_NO_SIMD */
     548             : 
     549             : /*
     550             :  * Unsigned shift left of each 32-bit element in the vector by "i" bits.
     551             :  *
     552             :  * XXX AArch64 requires an integer literal, so we have to list all expected
     553             :  * values of "i" from all callers in a switch statement.  If you add a new
     554             :  * caller, be sure your expected values of "i" are handled.
     555             :  */
     556             : #ifndef USE_NO_SIMD
     557             : static inline Vector8
     558      492216 : vector8_shift_left(const Vector8 v1, int i)
     559             : {
     560             : #ifdef USE_SSE2
     561      492216 :     return _mm_slli_epi32(v1, i);
     562             : #elif defined(USE_NEON)
     563             :     switch (i)
     564             :     {
     565             :         case 4:
     566             :             return (Vector8) vshlq_n_u32((Vector32) v1, 4);
     567             :         default:
     568             :             Assert(false);
     569             :             return vector8_broadcast(0);
     570             :     }
     571             : #endif
     572             : }
     573             : #endif                          /* ! USE_NO_SIMD */
     574             : 
     575             : /*
     576             :  * Unsigned shift right of each 32-bit element in the vector by "i" bits.
     577             :  *
     578             :  * XXX AArch64 requires an integer literal, so we have to list all expected
     579             :  * values of "i" from all callers in a switch statement.  If you add a new
     580             :  * caller, be sure your expected values of "i" are handled.
     581             :  */
     582             : #ifndef USE_NO_SIMD
     583             : static inline Vector8
     584     2686902 : vector8_shift_right(const Vector8 v1, int i)
     585             : {
     586             : #ifdef USE_SSE2
     587     2686902 :     return _mm_srli_epi32(v1, i);
     588             : #elif defined(USE_NEON)
     589             :     switch (i)
     590             :     {
     591             :         case 4:
     592             :             return (Vector8) vshrq_n_u32((Vector32) v1, 4);
     593             :         case 8:
     594             :             return (Vector8) vshrq_n_u32((Vector32) v1, 8);
     595             :         default:
     596             :             Assert(false);
     597             :             return vector8_broadcast(0);
     598             :     }
     599             : #endif
     600             : }
     601             : #endif                          /* ! USE_NO_SIMD */
     602             : 
     603             : #endif                          /* SIMD_H */

Generated by: LCOV version 1.16