LCOV - code coverage report
Current view: top level - /usr/lib/gcc/x86_64-linux-gnu/14/include - smmintrin.h Coverage Total Hit
Test: PostgreSQL 19devel Lines: 100.0 % 3 3
Test Date: 2026-02-27 04:14:43 Functions: - 0 0
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 2007-2024 Free Software Foundation, Inc.
       2              : 
       3              :    This file is part of GCC.
       4              : 
       5              :    GCC is free software; you can redistribute it and/or modify
       6              :    it under the terms of the GNU General Public License as published by
       7              :    the Free Software Foundation; either version 3, or (at your option)
       8              :    any later version.
       9              : 
      10              :    GCC is distributed in the hope that it will be useful,
      11              :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              :    GNU General Public License for more details.
      14              : 
      15              :    Under Section 7 of GPL version 3, you are granted additional
      16              :    permissions described in the GCC Runtime Library Exception, version
      17              :    3.1, as published by the Free Software Foundation.
      18              : 
      19              :    You should have received a copy of the GNU General Public License and
      20              :    a copy of the GCC Runtime Library Exception along with this program;
      21              :    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22              :    <http://www.gnu.org/licenses/>.  */
      23              : 
      24              : /* Implemented from the specification included in the Intel C++ Compiler
      25              :    User Guide and Reference, version 10.0.  */
      26              : 
      27              : #ifndef _SMMINTRIN_H_INCLUDED
      28              : #define _SMMINTRIN_H_INCLUDED
      29              : 
      30              : /* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
      31              :    files.  */
      32              : #include <tmmintrin.h>
      33              : 
      34              : #ifndef __SSE4_1__
      35              : #pragma GCC push_options
      36              : #pragma GCC target("sse4.1")
      37              : #define __DISABLE_SSE4_1__
      38              : #endif /* __SSE4_1__ */
      39              : 
      40              : /* Rounding mode macros. */
      41              : #define _MM_FROUND_TO_NEAREST_INT   0x00
      42              : #define _MM_FROUND_TO_NEG_INF       0x01
      43              : #define _MM_FROUND_TO_POS_INF       0x02
      44              : #define _MM_FROUND_TO_ZERO      0x03
      45              : #define _MM_FROUND_CUR_DIRECTION    0x04
      46              : 
      47              : #define _MM_FROUND_RAISE_EXC        0x00
      48              : #define _MM_FROUND_NO_EXC       0x08
      49              : 
      50              : #define _MM_FROUND_NINT     \
      51              :   (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
      52              : #define _MM_FROUND_FLOOR    \
      53              :   (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
      54              : #define _MM_FROUND_CEIL     \
      55              :   (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
      56              : #define _MM_FROUND_TRUNC    \
      57              :   (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
      58              : #define _MM_FROUND_RINT     \
      59              :   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
      60              : #define _MM_FROUND_NEARBYINT    \
      61              :   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
      62              : 
      63              : /* Test Instruction */
      64              : /* Packed integer 128-bit bitwise comparison. Return 1 if
      65              :    (__V & __M) == 0.  */
      66              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      67              : _mm_testz_si128 (__m128i __M, __m128i __V)
      68              : {
      69              :   return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
      70              : }
      71              : 
      72              : /* Packed integer 128-bit bitwise comparison. Return 1 if
      73              :    (__V & ~__M) == 0.  */
      74              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      75              : _mm_testc_si128 (__m128i __M, __m128i __V)
      76              : {
      77              :   return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
      78              : }
      79              : 
      80              : /* Packed integer 128-bit bitwise comparison. Return 1 if
      81              :    (__V & __M) != 0 && (__V & ~__M) != 0.  */
      82              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      83              : _mm_testnzc_si128 (__m128i __M, __m128i __V)
      84              : {
      85              :   return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
      86              : }
      87              : 
      88              : /* Macros for packed integer 128-bit comparison intrinsics.  */
      89              : #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
      90              : 
      91              : #define _mm_test_all_ones(V) \
      92              :   _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
      93              : 
      94              : #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
      95              : 
      96              : /* Packed/scalar double precision floating point rounding.  */
      97              : 
      98              : #ifdef __OPTIMIZE__
      99              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     100              : _mm_round_pd (__m128d __V, const int __M)
     101              : {
     102              :   return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
     103              : }
     104              : 
     105              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     106              : _mm_round_sd(__m128d __D, __m128d __V, const int __M)
     107              : {
     108              :   return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
     109              :                        (__v2df)__V,
     110              :                        __M);
     111              : }
     112              : #else
     113              : #define _mm_round_pd(V, M) \
     114              :   ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
     115              : 
     116              : #define _mm_round_sd(D, V, M)                       \
     117              :   ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),      \
     118              :                      (__v2df)(__m128d)(V), (int)(M)))
     119              : #endif
     120              : 
     121              : /* Packed/scalar single precision floating point rounding.  */
     122              : 
     123              : #ifdef __OPTIMIZE__
     124              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     125              : _mm_round_ps (__m128 __V, const int __M)
     126              : {
     127              :   return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
     128              : }
     129              : 
     130              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     131              : _mm_round_ss (__m128 __D, __m128 __V, const int __M)
     132              : {
     133              :   return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
     134              :                       (__v4sf)__V,
     135              :                       __M);
     136              : }
     137              : #else
     138              : #define _mm_round_ps(V, M) \
     139              :   ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
     140              : 
     141              : #define _mm_round_ss(D, V, M)                       \
     142              :   ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),        \
     143              :                     (__v4sf)(__m128)(V), (int)(M)))
     144              : #endif
     145              : 
     146              : /* Macros for ceil/floor intrinsics.  */
     147              : #define _mm_ceil_pd(V)     _mm_round_pd ((V), _MM_FROUND_CEIL)
     148              : #define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
     149              : 
     150              : #define _mm_floor_pd(V)    _mm_round_pd((V), _MM_FROUND_FLOOR)
     151              : #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
     152              : 
     153              : #define _mm_ceil_ps(V)     _mm_round_ps ((V), _MM_FROUND_CEIL)
     154              : #define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
     155              : 
     156              : #define _mm_floor_ps(V)    _mm_round_ps ((V), _MM_FROUND_FLOOR)
     157              : #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
     158              : 
     159              : /* SSE4.1 */
     160              : 
     161              : /* Integer blend instructions - select data from 2 sources using
     162              :    constant/variable mask.  */
     163              : 
     164              : #ifdef __OPTIMIZE__
     165              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     166              : _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
     167              : {
     168              :   return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
     169              :                           (__v8hi)__Y,
     170              :                           __M);
     171              : }
     172              : #else
     173              : #define _mm_blend_epi16(X, Y, M)                    \
     174              :   ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),       \
     175              :                     (__v8hi)(__m128i)(Y), (int)(M)))
     176              : #endif
     177              : 
     178              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     179              : _mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
     180              : {
     181              :   return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
     182              :                            (__v16qi)__Y,
     183              :                            (__v16qi)__M);
     184              : }
     185              : 
     186              : /* Single precision floating point blend instructions - select data
     187              :    from 2 sources using constant/variable mask.  */
     188              : 
     189              : #ifdef __OPTIMIZE__
     190              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     191              : _mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
     192              : {
     193              :   return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
     194              :                       (__v4sf)__Y,
     195              :                       __M);
     196              : }
     197              : #else
     198              : #define _mm_blend_ps(X, Y, M)                       \
     199              :   ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),        \
     200              :                     (__v4sf)(__m128)(Y), (int)(M)))
     201              : #endif
     202              : 
     203              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     204              : _mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
     205              : {
     206              :   return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
     207              :                        (__v4sf)__Y,
     208              :                        (__v4sf)__M);
     209              : }
     210              : 
     211              : /* Double precision floating point blend instructions - select data
     212              :    from 2 sources using constant/variable mask.  */
     213              : 
     214              : #ifdef __OPTIMIZE__
     215              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     216              : _mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
     217              : {
     218              :   return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
     219              :                        (__v2df)__Y,
     220              :                        __M);
     221              : }
     222              : #else
     223              : #define _mm_blend_pd(X, Y, M)                       \
     224              :   ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),      \
     225              :                      (__v2df)(__m128d)(Y), (int)(M)))
     226              : #endif
     227              : 
     228              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     229              : _mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
     230              : {
     231              :   return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
     232              :                         (__v2df)__Y,
     233              :                         (__v2df)__M);
     234              : }
     235              : 
     236              : /* Dot product instructions with mask-defined summing and zeroing parts
     237              :    of result.  */
     238              : 
     239              : #ifdef __OPTIMIZE__
     240              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     241              : _mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
     242              : {
     243              :   return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
     244              :                        (__v4sf)__Y,
     245              :                        __M);
     246              : }
     247              : 
     248              : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     249              : _mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
     250              : {
     251              :   return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
     252              :                     (__v2df)__Y,
     253              :                     __M);
     254              : }
     255              : #else
     256              : #define _mm_dp_ps(X, Y, M)                      \
     257              :   ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),           \
     258              :                  (__v4sf)(__m128)(Y), (int)(M)))
     259              : 
     260              : #define _mm_dp_pd(X, Y, M)                      \
     261              :   ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),         \
     262              :                   (__v2df)(__m128d)(Y), (int)(M)))
     263              : #endif
     264              : 
     265              : /* Packed integer 64-bit comparison, zeroing or filling with ones
     266              :    corresponding parts of result.  */
     267              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     268              : _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
     269              : {
     270              :   return (__m128i) ((__v2di)__X == (__v2di)__Y);
     271              : }
     272              : 
     273              : /*  Min/max packed integer instructions.  */
     274              : 
     275              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     276              : _mm_min_epi8 (__m128i __X, __m128i __Y)
     277              : {
     278              :   return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
     279              : }
     280              : 
     281              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     282              : _mm_max_epi8 (__m128i __X, __m128i __Y)
     283              : {
     284              :   return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
     285              : }
     286              : 
     287              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     288              : _mm_min_epu16 (__m128i __X, __m128i __Y)
     289              : {
     290              :   return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
     291              : }
     292              : 
     293              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     294              : _mm_max_epu16 (__m128i __X, __m128i __Y)
     295              : {
     296              :   return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
     297              : }
     298              : 
     299              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     300              : _mm_min_epi32 (__m128i __X, __m128i __Y)
     301              : {
     302              :   return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
     303              : }
     304              : 
     305              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     306              : _mm_max_epi32 (__m128i __X, __m128i __Y)
     307              : {
     308              :   return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
     309              : }
     310              : 
     311              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     312              : _mm_min_epu32 (__m128i __X, __m128i __Y)
     313              : {
     314              :   return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
     315              : }
     316              : 
     317              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     318              : _mm_max_epu32 (__m128i __X, __m128i __Y)
     319              : {
     320              :   return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
     321              : }
     322              : 
     323              : /* Packed integer 32-bit multiplication with truncation of upper
     324              :    halves of results.  */
     325              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     326              : _mm_mullo_epi32 (__m128i __X, __m128i __Y)
     327              : {
     328              :   return (__m128i) ((__v4su)__X * (__v4su)__Y);
     329              : }
     330              : 
     331              : /* Packed integer 32-bit multiplication of 2 pairs of operands
     332              :    with two 64-bit results.  */
     333              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     334              : _mm_mul_epi32 (__m128i __X, __m128i __Y)
     335              : {
     336              :   return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
     337              : }
     338              : 
     339              : /* Insert single precision float into packed single precision array
     340              :    element selected by index N.  The bits [7-6] of N define S
     341              :    index, the bits [5-4] define D index, and bits [3-0] define
     342              :    zeroing mask for D.  */
     343              : 
     344              : #ifdef __OPTIMIZE__
     345              : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     346              : _mm_insert_ps (__m128 __D, __m128 __S, const int __N)
     347              : {
     348              :   return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
     349              :                           (__v4sf)__S,
     350              :                           __N);
     351              : }
     352              : #else
     353              : #define _mm_insert_ps(D, S, N)                      \
     354              :   ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),        \
     355              :                     (__v4sf)(__m128)(S), (int)(N)))
     356              : #endif
     357              : 
     358              : /* Helper macro to create the N value for _mm_insert_ps.  */
     359              : #define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
     360              : 
     361              : /* Extract binary representation of single precision float from packed
     362              :    single precision array element of X selected by index N.  */
     363              : 
     364              : #ifdef __OPTIMIZE__
     365              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     366              : _mm_extract_ps (__m128 __X, const int __N)
     367              : {
     368              :   union { int __i; float __f; } __tmp;
     369              :   __tmp.__f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
     370              :   return __tmp.__i;
     371              : }
     372              : #else
     373              : #define _mm_extract_ps(X, N)                        \
     374              :   (__extension__                            \
     375              :    ({                                   \
     376              :      union { int __i; float __f; } __tmp;               \
     377              :      __tmp.__f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X),  \
     378              :                           (int)(N));        \
     379              :      __tmp.__i;                             \
     380              :    }))
     381              : #endif
     382              : 
     383              : /* Extract binary representation of single precision float into
     384              :    D from packed single precision array element of S selected
     385              :    by index N.  */
     386              : #define _MM_EXTRACT_FLOAT(D, S, N) \
     387              :   { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
     388              :   
     389              : /* Extract specified single precision float element into the lower
     390              :    part of __m128.  */
     391              : #define _MM_PICK_OUT_PS(X, N)               \
     392              :   _mm_insert_ps (_mm_setzero_ps (), (X),        \
     393              :          _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
     394              : 
     395              : /* Insert integer, S, into packed integer array element of D
     396              :    selected by index N.  */
     397              : 
     398              : #ifdef __OPTIMIZE__
     399              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     400              : _mm_insert_epi8 (__m128i __D, int __S, const int __N)
     401              : {
     402              :   return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
     403              :                          __S, __N);
     404              : }
     405              : 
     406              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     407              : _mm_insert_epi32 (__m128i __D, int __S, const int __N)
     408              : {
     409              :   return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
     410              :                          __S, __N);
     411              : }
     412              : 
     413              : #ifdef __x86_64__
     414              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     415              : _mm_insert_epi64 (__m128i __D, long long __S, const int __N)
     416              : {
     417              :   return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
     418              :                          __S, __N);
     419              : }
     420              : #endif
     421              : #else
     422              : #define _mm_insert_epi8(D, S, N)                    \
     423              :   ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),   \
     424              :                        (int)(S), (int)(N)))
     425              : 
     426              : #define _mm_insert_epi32(D, S, N)               \
     427              :   ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
     428              :                       (int)(S), (int)(N)))
     429              : 
     430              : #ifdef __x86_64__
     431              : #define _mm_insert_epi64(D, S, N)                   \
     432              :   ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),     \
     433              :                       (long long)(S), (int)(N)))
     434              : #endif
     435              : #endif
     436              : 
     437              : /* Extract integer from packed integer array element of X selected by
     438              :    index N.  */
     439              : 
     440              : #ifdef __OPTIMIZE__
     441              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     442              : _mm_extract_epi8 (__m128i __X, const int __N)
     443              : {
     444              :    return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
     445              : }
     446              : 
     447              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     448              : _mm_extract_epi32 (__m128i __X, const int __N)
     449              : {
     450              :    return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
     451              : }
     452              : 
     453              : #ifdef __x86_64__
     454              : extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     455              : _mm_extract_epi64 (__m128i __X, const int __N)
     456              : {
     457              :   return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
     458              : }
     459              : #endif
     460              : #else
     461              : #define _mm_extract_epi8(X, N) \
     462              :   ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
     463              : #define _mm_extract_epi32(X, N) \
     464              :   ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
     465              : 
     466              : #ifdef __x86_64__
     467              : #define _mm_extract_epi64(X, N) \
     468              :   ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
     469              : #endif
     470              : #endif
     471              : 
     472              : /* Return horizontal packed word minimum and its index in bits [15:0]
     473              :    and bits [18:16] respectively.  */
     474              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     475              : _mm_minpos_epu16 (__m128i __X)
     476              : {
     477              :   return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
     478              : }
     479              : 
     480              : /* Packed integer sign-extension.  */
     481              : 
     482              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     483              : _mm_cvtepi8_epi32 (__m128i __X)
     484              : {
     485              :   return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
     486              : }
     487              : 
     488              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     489              : _mm_cvtepi16_epi32 (__m128i __X)
     490              : {
     491              :   return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
     492              : }
     493              : 
     494              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     495              : _mm_cvtepi8_epi64 (__m128i __X)
     496              : {
     497              :   return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
     498              : }
     499              : 
     500              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     501              : _mm_cvtepi32_epi64 (__m128i __X)
     502              : {
     503              :   return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
     504              : }
     505              : 
     506              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     507              : _mm_cvtepi16_epi64 (__m128i __X)
     508              : {
     509              :   return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
     510              : }
     511              : 
     512              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     513              : _mm_cvtepi8_epi16 (__m128i __X)
     514              : {
     515              :   return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
     516              : }
     517              : 
     518              : /* Packed integer zero-extension. */
     519              : 
     520              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     521              : _mm_cvtepu8_epi32 (__m128i __X)
     522              : {
     523              :   return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
     524              : }
     525              : 
     526              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     527              : _mm_cvtepu16_epi32 (__m128i __X)
     528              : {
     529              :   return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
     530              : }
     531              : 
     532              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     533              : _mm_cvtepu8_epi64 (__m128i __X)
     534              : {
     535              :   return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
     536              : }
     537              : 
     538              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     539              : _mm_cvtepu32_epi64 (__m128i __X)
     540              : {
     541              :   return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
     542              : }
     543              : 
     544              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     545              : _mm_cvtepu16_epi64 (__m128i __X)
     546              : {
     547              :   return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
     548              : }
     549              : 
     550              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     551              : _mm_cvtepu8_epi16 (__m128i __X)
     552              : {
     553              :   return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
     554              : }
     555              : 
     556              : /* Pack 8 double words from 2 operands into 8 words of result with
     557              :    unsigned saturation. */
     558              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     559              : _mm_packus_epi32 (__m128i __X, __m128i __Y)
     560              : {
     561              :   return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
     562              : }
     563              : 
     564              : /* Sum absolute 8-bit integer difference of adjacent groups of 4
     565              :    byte integers in the first 2 operands.  Starting offsets within
     566              :    operands are determined by the 3rd mask operand.  */
     567              : 
     568              : #ifdef __OPTIMIZE__
     569              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     570              : _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
     571              : {
     572              :   return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
     573              :                           (__v16qi)__Y, __M);
     574              : }
     575              : #else
     576              : #define _mm_mpsadbw_epu8(X, Y, M)                   \
     577              :   ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),      \
     578              :                     (__v16qi)(__m128i)(Y), (int)(M)))
     579              : #endif
     580              : 
     581              : /* Load double quadword using non-temporal aligned hint.  */
     582              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     583              : _mm_stream_load_si128 (__m128i *__X)
     584              : {
     585              :   return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
     586              : }
     587              : 
     588              : #ifndef __SSE4_2__
     589              : #pragma GCC push_options
     590              : #pragma GCC target("sse4.2")
     591              : #define __DISABLE_SSE4_2__
     592              : #endif /* __SSE4_2__ */
     593              : 
     594              : /* These macros specify the source data format.  */
     595              : #define _SIDD_UBYTE_OPS         0x00
     596              : #define _SIDD_UWORD_OPS         0x01
     597              : #define _SIDD_SBYTE_OPS         0x02
     598              : #define _SIDD_SWORD_OPS         0x03
     599              : 
     600              : /* These macros specify the comparison operation.  */
     601              : #define _SIDD_CMP_EQUAL_ANY     0x00
     602              : #define _SIDD_CMP_RANGES        0x04
     603              : #define _SIDD_CMP_EQUAL_EACH        0x08
     604              : #define _SIDD_CMP_EQUAL_ORDERED     0x0c
     605              : 
     606              : /* These macros specify the polarity.  */
     607              : #define _SIDD_POSITIVE_POLARITY     0x00
     608              : #define _SIDD_NEGATIVE_POLARITY     0x10
     609              : #define _SIDD_MASKED_POSITIVE_POLARITY  0x20
     610              : #define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
     611              : 
     612              : /* These macros specify the output selection in _mm_cmpXstri ().  */
     613              : #define _SIDD_LEAST_SIGNIFICANT     0x00
     614              : #define _SIDD_MOST_SIGNIFICANT      0x40
     615              : 
     616              : /* These macros specify the output selection in _mm_cmpXstrm ().  */
     617              : #define _SIDD_BIT_MASK          0x00
     618              : #define _SIDD_UNIT_MASK         0x40
     619              : 
     620              : /* Intrinsics for text/string processing.  */
     621              : 
     622              : #ifdef __OPTIMIZE__
     623              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     624              : _mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
     625              : {
     626              :   return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
     627              :                         (__v16qi)__Y,
     628              :                         __M);
     629              : }
     630              : 
     631              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     632              : _mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
     633              : {
     634              :   return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
     635              :                       (__v16qi)__Y,
     636              :                       __M);
     637              : }
     638              : 
     639              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     640              : _mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     641              : {
     642              :   return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
     643              :                         (__v16qi)__Y, __LY,
     644              :                         __M);
     645              : }
     646              : 
     647              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     648              : _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     649              : {
     650              :   return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
     651              :                       (__v16qi)__Y, __LY,
     652              :                       __M);
     653              : }
     654              : #else
     655              : #define _mm_cmpistrm(X, Y, M)                       \
     656              :   ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),    \
     657              :                       (__v16qi)(__m128i)(Y), (int)(M)))
     658              : #define _mm_cmpistri(X, Y, M)                       \
     659              :   ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),        \
     660              :                       (__v16qi)(__m128i)(Y), (int)(M)))
     661              : 
     662              : #define _mm_cmpestrm(X, LX, Y, LY, M)                   \
     663              :   ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),    \
     664              :                       (int)(LX), (__v16qi)(__m128i)(Y), \
     665              :                       (int)(LY), (int)(M)))
     666              : #define _mm_cmpestri(X, LX, Y, LY, M)                   \
     667              :   ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
     668              :                       (__v16qi)(__m128i)(Y), (int)(LY), \
     669              :                       (int)(M)))
     670              : #endif
     671              : 
     672              : /* Intrinsics for text/string processing and reading values of
     673              :    EFlags.  */
     674              : 
     675              : #ifdef __OPTIMIZE__
     676              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     677              : _mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
     678              : {
     679              :   return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
     680              :                        (__v16qi)__Y,
     681              :                        __M);
     682              : }
     683              : 
     684              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     685              : _mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
     686              : {
     687              :   return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
     688              :                        (__v16qi)__Y,
     689              :                        __M);
     690              : }
     691              : 
     692              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     693              : _mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
     694              : {
     695              :   return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
     696              :                        (__v16qi)__Y,
     697              :                        __M);
     698              : }
     699              : 
     700              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     701              : _mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
     702              : {
     703              :   return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
     704              :                        (__v16qi)__Y,
     705              :                        __M);
     706              : }
     707              : 
     708              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     709              : _mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
     710              : {
     711              :   return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
     712              :                        (__v16qi)__Y,
     713              :                        __M);
     714              : }
     715              : 
     716              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     717              : _mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     718              : {
     719              :   return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
     720              :                        (__v16qi)__Y, __LY,
     721              :                        __M);
     722              : }
     723              : 
     724              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     725              : _mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     726              : {
     727              :   return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
     728              :                        (__v16qi)__Y, __LY,
     729              :                        __M);
     730              : }
     731              : 
     732              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     733              : _mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     734              : {
     735              :   return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
     736              :                        (__v16qi)__Y, __LY,
     737              :                        __M);
     738              : }
     739              : 
     740              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     741              : _mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     742              : {
     743              :   return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
     744              :                        (__v16qi)__Y, __LY,
     745              :                        __M);
     746              : }
     747              : 
     748              : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     749              : _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     750              : {
     751              :   return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
     752              :                        (__v16qi)__Y, __LY,
     753              :                        __M);
     754              : }
     755              : #else
     756              : #define _mm_cmpistra(X, Y, M)                       \
     757              :   ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),       \
     758              :                        (__v16qi)(__m128i)(Y), (int)(M)))
     759              : #define _mm_cmpistrc(X, Y, M)                       \
     760              :   ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),       \
     761              :                        (__v16qi)(__m128i)(Y), (int)(M)))
     762              : #define _mm_cmpistro(X, Y, M)                       \
     763              :   ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),       \
     764              :                        (__v16qi)(__m128i)(Y), (int)(M)))
     765              : #define _mm_cmpistrs(X, Y, M)                       \
     766              :   ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),       \
     767              :                        (__v16qi)(__m128i)(Y), (int)(M)))
     768              : #define _mm_cmpistrz(X, Y, M)                       \
     769              :   ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),       \
     770              :                        (__v16qi)(__m128i)(Y), (int)(M)))
     771              : 
     772              : #define _mm_cmpestra(X, LX, Y, LY, M)                   \
     773              :   ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
     774              :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     775              :                        (int)(M)))
     776              : #define _mm_cmpestrc(X, LX, Y, LY, M)                   \
     777              :   ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
     778              :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     779              :                        (int)(M)))
     780              : #define _mm_cmpestro(X, LX, Y, LY, M)                   \
     781              :   ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
     782              :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     783              :                        (int)(M)))
     784              : #define _mm_cmpestrs(X, LX, Y, LY, M)                   \
     785              :   ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
     786              :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     787              :                        (int)(M)))
     788              : #define _mm_cmpestrz(X, LX, Y, LY, M)                   \
     789              :   ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
     790              :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     791              :                        (int)(M)))
     792              : #endif
     793              : 
     794              : /* Packed integer 64-bit comparison, zeroing or filling with ones
     795              :    corresponding parts of result.  */
     796              : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     797              : _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
     798              : {
     799              :   return (__m128i) ((__v2di)__X > (__v2di)__Y);
     800              : }
     801              : 
     802              : #ifdef __DISABLE_SSE4_2__
     803              : #undef __DISABLE_SSE4_2__
     804              : #pragma GCC pop_options
     805              : #endif /* __DISABLE_SSE4_2__ */
     806              : 
     807              : #ifdef __DISABLE_SSE4_1__
     808              : #undef __DISABLE_SSE4_1__
     809              : #pragma GCC pop_options
     810              : #endif /* __DISABLE_SSE4_1__ */
     811              : 
     812              : #include <popcntintrin.h>
     813              : 
     814              : #ifndef __CRC32__
     815              : #pragma GCC push_options
     816              : #pragma GCC target("crc32")
     817              : #define __DISABLE_CRC32__
     818              : #endif /* __CRC32__ */
     819              : 
     820              : /* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
     821              : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     822              : _mm_crc32_u8 (unsigned int __C, unsigned char __V)
     823              : {
     824     88242106 :   return __builtin_ia32_crc32qi (__C, __V);
     825              : }
     826              : 
     827              : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     828              : _mm_crc32_u16 (unsigned int __C, unsigned short __V)
     829              : {
     830              :   return __builtin_ia32_crc32hi (__C, __V);
     831              : }
     832              : 
     833              : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     834              : _mm_crc32_u32 (unsigned int __C, unsigned int __V)
     835              : {
     836     50926579 :   return __builtin_ia32_crc32si (__C, __V);
     837              : }
     838              : 
     839              : #ifdef __x86_64__
     840              : extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     841              : _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
     842              : {
     843   3978223312 :   return __builtin_ia32_crc32di (__C, __V);
     844              : }
     845              : #endif
     846              : 
     847              : #ifdef __DISABLE_CRC32__
     848              : #undef __DISABLE_CRC32__
     849              : #pragma GCC pop_options
     850              : #endif /* __DISABLE_CRC32__ */
     851              : 
     852              : #endif /* _SMMINTRIN_H_INCLUDED */
        

Generated by: LCOV version 2.0-1