LCOV - code coverage report
Current view: top level - usr/lib/gcc/x86_64-linux-gnu/9/include - emmintrin.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 89 96 92.7 %
Date: 2019-11-25 17:12:20 Functions: 0 0 -

          Line data    Source code
       1             : /* Copyright (C) 2003-2019 Free Software Foundation, Inc.
       2             : 
       3             :    This file is part of GCC.
       4             : 
       5             :    GCC is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3, or (at your option)
       8             :    any later version.
       9             : 
      10             :    GCC is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    Under Section 7 of GPL version 3, you are granted additional
      16             :    permissions described in the GCC Runtime Library Exception, version
      17             :    3.1, as published by the Free Software Foundation.
      18             : 
      19             :    You should have received a copy of the GNU General Public License and
      20             :    a copy of the GCC Runtime Library Exception along with this program;
      21             :    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22             :    <http://www.gnu.org/licenses/>.  */
      23             : 
      24             : /* Implemented from the specification included in the Intel C++ Compiler
      25             :    User Guide and Reference, version 9.0.  */
      26             : 
      27             : #ifndef _EMMINTRIN_H_INCLUDED
      28             : #define _EMMINTRIN_H_INCLUDED
      29             : 
      30             : /* We need definitions from the SSE header files*/
      31             : #include <xmmintrin.h>
      32             : 
      33             : #ifndef __SSE2__
      34             : #pragma GCC push_options
      35             : #pragma GCC target("sse2")
      36             : #define __DISABLE_SSE2__
      37             : #endif /* __SSE2__ */
      38             : 
      39             : /* SSE2 */
      40             : typedef double __v2df __attribute__ ((__vector_size__ (16)));
      41             : typedef long long __v2di __attribute__ ((__vector_size__ (16)));
      42             : typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
      43             : typedef int __v4si __attribute__ ((__vector_size__ (16)));
      44             : typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
      45             : typedef short __v8hi __attribute__ ((__vector_size__ (16)));
      46             : typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
      47             : typedef char __v16qi __attribute__ ((__vector_size__ (16)));
      48             : typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
      49             : typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
      50             : 
      51             : /* The Intel API is flexible enough that we must allow aliasing with other
      52             :    vector types, and their scalar components.  */
      53             : typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
      54             : typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
      55             : 
      56             : /* Unaligned version of the same types.  */
      57             : typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      58             : typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      59             : 
      60             : /* Create a selector for use with the SHUFPD instruction.  */
      61             : #define _MM_SHUFFLE2(fp1,fp0) \
      62             :  (((fp1) << 1) | (fp0))
      63             : 
      64             : /* Create a vector with element 0 as F and the rest zero.  */
      65             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      66             : _mm_set_sd (double __F)
      67             : {
      68    11448400 :   return __extension__ (__m128d){ __F, 0.0 };
      69             : }
      70             : 
      71             : /* Create a vector with both elements equal to F.  */
      72             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      73             : _mm_set1_pd (double __F)
      74             : {
      75             :   return __extension__ (__m128d){ __F, __F };
      76             : }
      77             : 
      78             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      79             : _mm_set_pd1 (double __F)
      80             : {
      81             :   return _mm_set1_pd (__F);
      82             : }
      83             : 
      84             : /* Create a vector with the lower value X and upper value W.  */
      85             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      86             : _mm_set_pd (double __W, double __X)
      87             : {
      88             :   return __extension__ (__m128d){ __X, __W };
      89             : }
      90             : 
      91             : /* Create a vector with the lower value W and upper value X.  */
      92             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      93             : _mm_setr_pd (double __W, double __X)
      94             : {
      95             :   return __extension__ (__m128d){ __W, __X };
      96             : }
      97             : 
      98             : /* Create an undefined vector.  */
      99             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     100             : _mm_undefined_pd (void)
     101             : {
     102             :   __m128d __Y = __Y;
     103             :   return __Y;
     104             : }
     105             : 
     106             : /* Create a vector of zeros.  */
     107             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     108             : _mm_setzero_pd (void)
     109             : {
     110             :   return __extension__ (__m128d){ 0.0, 0.0 };
     111             : }
     112             : 
     113             : /* Sets the low DPFP value of A from the low value of B.  */
     114             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     115             : _mm_move_sd (__m128d __A, __m128d __B)
     116             : {
     117             :   return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
     118             : }
     119             : 
     120             : /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
     121             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     122             : _mm_load_pd (double const *__P)
     123             : {
     124             :   return *(__m128d *)__P;
     125             : }
     126             : 
     127             : /* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
     128             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     129             : _mm_loadu_pd (double const *__P)
     130             : {
     131             :   return *(__m128d_u *)__P;
     132             : }
     133             : 
     134             : /* Create a vector with all two elements equal to *P.  */
     135             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     136             : _mm_load1_pd (double const *__P)
     137             : {
     138             :   return _mm_set1_pd (*__P);
     139             : }
     140             : 
     141             : /* Create a vector with element 0 as *P and the rest zero.  */
     142             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     143             : _mm_load_sd (double const *__P)
     144             : {
     145    22896700 :   return _mm_set_sd (*__P);
     146             : }
     147             : 
     148             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     149             : _mm_load_pd1 (double const *__P)
     150             : {
     151             :   return _mm_load1_pd (__P);
     152             : }
     153             : 
     154             : /* Load two DPFP values in reverse order.  The address must be aligned.  */
     155             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     156             : _mm_loadr_pd (double const *__P)
     157             : {
     158             :   __m128d __tmp = _mm_load_pd (__P);
     159             :   return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
     160             : }
     161             : 
     162             : /* Store two DPFP values.  The address must be 16-byte aligned.  */
     163             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     164             : _mm_store_pd (double *__P, __m128d __A)
     165             : {
     166             :   *(__m128d *)__P = __A;
     167             : }
     168             : 
     169             : /* Store two DPFP values.  The address need not be 16-byte aligned.  */
     170             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     171             : _mm_storeu_pd (double *__P, __m128d __A)
     172             : {
     173             :   *(__m128d_u *)__P = __A;
     174             : }
     175             : 
     176             : /* Stores the lower DPFP value.  */
     177             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     178             : _mm_store_sd (double *__P, __m128d __A)
     179             : {
     180    11448400 :   *__P = ((__v2df)__A)[0];
     181    11448400 : }
     182             : 
     183             : extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     184             : _mm_cvtsd_f64 (__m128d __A)
     185             : {
     186             :   return ((__v2df)__A)[0];
     187             : }
     188             : 
     189             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     190             : _mm_storel_pd (double *__P, __m128d __A)
     191             : {
     192             :   _mm_store_sd (__P, __A);
     193             : }
     194             : 
     195             : /* Stores the upper DPFP value.  */
     196             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     197             : _mm_storeh_pd (double *__P, __m128d __A)
     198             : {
     199   212645086 :   *__P = ((__v2df)__A)[1];
     200   212645086 : }
     201             : 
     202             : /* Store the lower DPFP value across two words.
     203             :    The address must be 16-byte aligned.  */
     204             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     205             : _mm_store1_pd (double *__P, __m128d __A)
     206             : {
     207             :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
     208             : }
     209             : 
     210             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     211             : _mm_store_pd1 (double *__P, __m128d __A)
     212             : {
     213             :   _mm_store1_pd (__P, __A);
     214             : }
     215             : 
     216             : /* Store two DPFP values in reverse order.  The address must be aligned.  */
     217             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     218             : _mm_storer_pd (double *__P, __m128d __A)
     219             : {
     220             :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
     221             : }
     222             : 
     223             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     224             : _mm_cvtsi128_si32 (__m128i __A)
     225             : {
     226  2158675906 :   return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
     227             : }
     228             : 
     229             : #ifdef __x86_64__
     230             : /* Intel intrinsic.  */
     231             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     232             : _mm_cvtsi128_si64 (__m128i __A)
     233             : {
     234      103837 :   return ((__v2di)__A)[0];
     235             : }
     236             : 
     237             : /* Microsoft intrinsic.  */
     238             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     239             : _mm_cvtsi128_si64x (__m128i __A)
     240             : {
     241             :   return ((__v2di)__A)[0];
     242             : }
     243             : #endif
     244             : 
     245             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     246             : _mm_add_pd (__m128d __A, __m128d __B)
     247             : {
     248             :   return (__m128d) ((__v2df)__A + (__v2df)__B);
     249             : }
     250             : 
     251             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     252             : _mm_add_sd (__m128d __A, __m128d __B)
     253             : {
     254             :   return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
     255             : }
     256             : 
     257             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     258             : _mm_sub_pd (__m128d __A, __m128d __B)
     259             : {
     260             :   return (__m128d) ((__v2df)__A - (__v2df)__B);
     261             : }
     262             : 
     263             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     264             : _mm_sub_sd (__m128d __A, __m128d __B)
     265             : {
     266             :   return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
     267             : }
     268             : 
     269             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     270             : _mm_mul_pd (__m128d __A, __m128d __B)
     271             : {
     272             :   return (__m128d) ((__v2df)__A * (__v2df)__B);
     273             : }
     274             : 
     275             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     276             : _mm_mul_sd (__m128d __A, __m128d __B)
     277             : {
     278             :   return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
     279             : }
     280             : 
     281             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     282             : _mm_div_pd (__m128d __A, __m128d __B)
     283             : {
     284             :   return (__m128d) ((__v2df)__A / (__v2df)__B);
     285             : }
     286             : 
     287             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     288             : _mm_div_sd (__m128d __A, __m128d __B)
     289             : {
     290             :   return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
     291             : }
     292             : 
     293             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     294             : _mm_sqrt_pd (__m128d __A)
     295             : {
     296             :   return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
     297             : }
     298             : 
     299             : /* Return pair {sqrt (B[0]), A[1]}.  */
     300             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     301             : _mm_sqrt_sd (__m128d __A, __m128d __B)
     302             : {
     303             :   __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
     304             :   return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
     305             : }
     306             : 
     307             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     308             : _mm_min_pd (__m128d __A, __m128d __B)
     309             : {
     310             :   return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
     311             : }
     312             : 
     313             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     314             : _mm_min_sd (__m128d __A, __m128d __B)
     315             : {
     316             :   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
     317             : }
     318             : 
     319             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     320             : _mm_max_pd (__m128d __A, __m128d __B)
     321             : {
     322             :   return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
     323             : }
     324             : 
     325             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     326             : _mm_max_sd (__m128d __A, __m128d __B)
     327             : {
     328             :   return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
     329             : }
     330             : 
     331             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     332             : _mm_and_pd (__m128d __A, __m128d __B)
     333             : {
     334             :   return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
     335             : }
     336             : 
     337             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     338             : _mm_andnot_pd (__m128d __A, __m128d __B)
     339             : {
     340             :   return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
     341             : }
     342             : 
     343             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     344             : _mm_or_pd (__m128d __A, __m128d __B)
     345             : {
     346             :   return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
     347             : }
     348             : 
     349             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     350             : _mm_xor_pd (__m128d __A, __m128d __B)
     351             : {
     352             :   return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
     353             : }
     354             : 
     355             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     356             : _mm_cmpeq_pd (__m128d __A, __m128d __B)
     357             : {
     358             :   return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
     359             : }
     360             : 
     361             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     362             : _mm_cmplt_pd (__m128d __A, __m128d __B)
     363             : {
     364             :   return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
     365             : }
     366             : 
     367             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     368             : _mm_cmple_pd (__m128d __A, __m128d __B)
     369             : {
     370             :   return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
     371             : }
     372             : 
     373             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     374             : _mm_cmpgt_pd (__m128d __A, __m128d __B)
     375             : {
     376             :   return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
     377             : }
     378             : 
     379             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     380             : _mm_cmpge_pd (__m128d __A, __m128d __B)
     381             : {
     382             :   return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
     383             : }
     384             : 
     385             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     386             : _mm_cmpneq_pd (__m128d __A, __m128d __B)
     387             : {
     388             :   return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
     389             : }
     390             : 
     391             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     392             : _mm_cmpnlt_pd (__m128d __A, __m128d __B)
     393             : {
     394             :   return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
     395             : }
     396             : 
     397             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     398             : _mm_cmpnle_pd (__m128d __A, __m128d __B)
     399             : {
     400             :   return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
     401             : }
     402             : 
     403             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     404             : _mm_cmpngt_pd (__m128d __A, __m128d __B)
     405             : {
     406             :   return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
     407             : }
     408             : 
     409             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     410             : _mm_cmpnge_pd (__m128d __A, __m128d __B)
     411             : {
     412             :   return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
     413             : }
     414             : 
     415             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     416             : _mm_cmpord_pd (__m128d __A, __m128d __B)
     417             : {
     418             :   return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
     419             : }
     420             : 
     421             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     422             : _mm_cmpunord_pd (__m128d __A, __m128d __B)
     423             : {
     424             :   return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
     425             : }
     426             : 
     427             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     428             : _mm_cmpeq_sd (__m128d __A, __m128d __B)
     429             : {
     430             :   return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
     431             : }
     432             : 
     433             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     434             : _mm_cmplt_sd (__m128d __A, __m128d __B)
     435             : {
     436             :   return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
     437             : }
     438             : 
     439             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     440             : _mm_cmple_sd (__m128d __A, __m128d __B)
     441             : {
     442             :   return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
     443             : }
     444             : 
     445             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     446             : _mm_cmpgt_sd (__m128d __A, __m128d __B)
     447             : {
     448             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     449             :                                          (__v2df)
     450             :                                          __builtin_ia32_cmpltsd ((__v2df) __B,
     451             :                                                                  (__v2df)
     452             :                                                                  __A));
     453             : }
     454             : 
     455             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     456             : _mm_cmpge_sd (__m128d __A, __m128d __B)
     457             : {
     458             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     459             :                                          (__v2df)
     460             :                                          __builtin_ia32_cmplesd ((__v2df) __B,
     461             :                                                                  (__v2df)
     462             :                                                                  __A));
     463             : }
     464             : 
     465             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     466             : _mm_cmpneq_sd (__m128d __A, __m128d __B)
     467             : {
     468             :   return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
     469             : }
     470             : 
     471             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     472             : _mm_cmpnlt_sd (__m128d __A, __m128d __B)
     473             : {
     474             :   return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
     475             : }
     476             : 
     477             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     478             : _mm_cmpnle_sd (__m128d __A, __m128d __B)
     479             : {
     480             :   return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
     481             : }
     482             : 
     483             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     484             : _mm_cmpngt_sd (__m128d __A, __m128d __B)
     485             : {
     486             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     487             :                                          (__v2df)
     488             :                                          __builtin_ia32_cmpnltsd ((__v2df) __B,
     489             :                                                                   (__v2df)
     490             :                                                                   __A));
     491             : }
     492             : 
     493             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     494             : _mm_cmpnge_sd (__m128d __A, __m128d __B)
     495             : {
     496             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     497             :                                          (__v2df)
     498             :                                          __builtin_ia32_cmpnlesd ((__v2df) __B,
     499             :                                                                   (__v2df)
     500             :                                                                   __A));
     501             : }
     502             : 
     503             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     504             : _mm_cmpord_sd (__m128d __A, __m128d __B)
     505             : {
     506             :   return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
     507             : }
     508             : 
     509             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     510             : _mm_cmpunord_sd (__m128d __A, __m128d __B)
     511             : {
     512             :   return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
     513             : }
     514             : 
     515             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     516             : _mm_comieq_sd (__m128d __A, __m128d __B)
     517             : {
     518             :   return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
     519             : }
     520             : 
     521             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     522             : _mm_comilt_sd (__m128d __A, __m128d __B)
     523             : {
     524             :   return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
     525             : }
     526             : 
     527             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     528             : _mm_comile_sd (__m128d __A, __m128d __B)
     529             : {
     530             :   return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
     531             : }
     532             : 
     533             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     534             : _mm_comigt_sd (__m128d __A, __m128d __B)
     535             : {
     536             :   return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
     537             : }
     538             : 
     539             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     540             : _mm_comige_sd (__m128d __A, __m128d __B)
     541             : {
     542             :   return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
     543             : }
     544             : 
     545             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     546             : _mm_comineq_sd (__m128d __A, __m128d __B)
     547             : {
     548             :   return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
     549             : }
     550             : 
     551             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     552             : _mm_ucomieq_sd (__m128d __A, __m128d __B)
     553             : {
     554             :   return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
     555             : }
     556             : 
     557             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     558             : _mm_ucomilt_sd (__m128d __A, __m128d __B)
     559             : {
     560             :   return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
     561             : }
     562             : 
     563             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     564             : _mm_ucomile_sd (__m128d __A, __m128d __B)
     565             : {
     566             :   return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
     567             : }
     568             : 
     569             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     570             : _mm_ucomigt_sd (__m128d __A, __m128d __B)
     571             : {
     572             :   return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
     573             : }
     574             : 
     575             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     576             : _mm_ucomige_sd (__m128d __A, __m128d __B)
     577             : {
     578             :   return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
     579             : }
     580             : 
     581             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     582             : _mm_ucomineq_sd (__m128d __A, __m128d __B)
     583             : {
     584             :   return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
     585             : }
     586             : 
     587             : /* Create a vector of Qi, where i is the element number.  */
     588             : 
     589             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     590             : _mm_set_epi64x (long long __q1, long long __q0)
     591             : {
     592 35142599124 :   return __extension__ (__m128i)(__v2di){ __q0, __q1 };
     593             : }
     594             : 
     595             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     596             : _mm_set_epi64 (__m64 __q1,  __m64 __q0)
     597             : {
     598 53166391603 :   return _mm_set_epi64x ((long long)__q1, (long long)__q0);
     599             : }
     600             : 
     601             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     602             : _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
     603             : {
     604  5787177992 :   return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
     605             : }
     606             : 
     607             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     608             : _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
     609             :                short __q3, short __q2, short __q1, short __q0)
     610             : {
     611  2428898418 :   return __extension__ (__m128i)(__v8hi){
     612             :     __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
     613             : }
     614             : 
     615             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     616             : _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
     617             :               char __q11, char __q10, char __q09, char __q08,
     618             :               char __q07, char __q06, char __q05, char __q04,
     619             :               char __q03, char __q02, char __q01, char __q00)
     620             : {
     621   843951020 :   return __extension__ (__m128i)(__v16qi){
     622             :     __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
     623             :     __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
     624             :   };
     625             : }
     626             : 
     627             : /* Set all of the elements of the vector to A.  */
     628             : 
     629             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     630             : _mm_set1_epi64x (long long __A)
     631             : {
     632    23259601 :   return _mm_set_epi64x (__A, __A);
     633             : }
     634             : 
     635             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     636             : _mm_set1_epi64 (__m64 __A)
     637             : {
     638             :   return _mm_set_epi64 (__A, __A);
     639             : }
     640             : 
     641             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     642             : _mm_set1_epi32 (int __A)
     643             : {
     644  2712960151 :   return _mm_set_epi32 (__A, __A, __A, __A);
     645             : }
     646             : 
     647             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     648             : _mm_set1_epi16 (short __A)
     649             : {
     650  4690385610 :   return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
     651             : }
     652             : 
     653             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     654             : _mm_set1_epi8 (char __A)
     655             : {
     656  1421318200 :   return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
     657             :                        __A, __A, __A, __A, __A, __A, __A, __A);
     658             : }
     659             : 
     660             : /* Create a vector of Qi, where i is the element number.
     661             :    The parameter order is reversed from the _mm_set_epi* functions.  */
     662             : 
     663             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     664             : _mm_setr_epi64 (__m64 __q0, __m64 __q1)
     665             : {
     666             :   return _mm_set_epi64 (__q1, __q0);
     667             : }
     668             : 
     669             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     670             : _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
     671             : {
     672   107634490 :   return _mm_set_epi32 (__q3, __q2, __q1, __q0);
     673             : }
     674             : 
     675             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     676             : _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
     677             :                 short __q4, short __q5, short __q6, short __q7)
     678             : {
     679   159031000 :   return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
     680             : }
     681             : 
     682             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     683             : _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
     684             :                char __q04, char __q05, char __q06, char __q07,
     685             :                char __q08, char __q09, char __q10, char __q11,
     686             :                char __q12, char __q13, char __q14, char __q15)
     687             : {
     688   308197240 :   return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
     689             :                        __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
     690             : }
     691             : 
     692             : /* Create a vector with element 0 as *P and the rest zero.  */
     693             : 
     694             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     695             : _mm_load_si128 (__m128i const *__P)
     696             : {
     697  3953729900 :   return *__P;
     698             : }
     699             : 
     700             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     701             : _mm_loadu_si128 (__m128i_u const *__P)
     702             : {
     703 78708654060 :   return *__P;
     704             : }
     705             : 
     706             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     707             : _mm_loadl_epi64 (__m128i_u const *__P)
     708             : {
     709 >10633*10^7 :   return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
     710             : }
     711             : 
     712             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     713             : _mm_loadu_si64 (void const *__P)
     714             : {
     715             :   return _mm_loadl_epi64 ((__m128i_u *)__P);
     716             : }
     717             : 
     718             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     719             : _mm_store_si128 (__m128i *__P, __m128i __B)
     720             : {
     721  2404882926 :   *__P = __B;
     722  1376486213 : }
     723             : 
     724             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     725             : _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
     726             : {
     727  4847199434 :   *__P = __B;
     728  3739346346 : }
     729             : 
     730             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     731             : _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
     732             : {
     733 11288178310 :   *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
     734  6611798031 : }
     735             : 
     736             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     737             : _mm_storeu_si64 (void *__P, __m128i __B)
     738             : {
     739             :   _mm_storel_epi64 ((__m128i_u *)__P, __B);
     740             : }
     741             : 
     742             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     743             : _mm_movepi64_pi64 (__m128i __B)
     744             : {
     745             :   return (__m64) ((__v2di)__B)[0];
     746             : }
     747             : 
     748             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     749             : _mm_movpi64_epi64 (__m64 __A)
     750             : {
     751             :   return _mm_set_epi64 ((__m64)0LL, __A);
     752             : }
     753             : 
     754             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     755             : _mm_move_epi64 (__m128i __A)
     756             : {
     757             :   return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
     758             : }
     759             : 
     760             : /* Create an undefined vector.  */
     761             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     762             : _mm_undefined_si128 (void)
     763             : {
     764             :   __m128i __Y = __Y;
     765             :   return __Y;
     766             : }
     767             : 
     768             : /* Create a vector of zeros.  */
     769             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     770             : _mm_setzero_si128 (void)
     771             : {
     772  2401899273 :   return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
     773             : }
     774             : 
     775             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     776             : _mm_cvtepi32_pd (__m128i __A)
     777             : {
     778             :   return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
     779             : }
     780             : 
     781             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     782             : _mm_cvtepi32_ps (__m128i __A)
     783             : {
     784             :   return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
     785             : }
     786             : 
     787             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     788             : _mm_cvtpd_epi32 (__m128d __A)
     789             : {
     790             :   return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
     791             : }
     792             : 
     793             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     794             : _mm_cvtpd_pi32 (__m128d __A)
     795             : {
     796             :   return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
     797             : }
     798             : 
     799             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     800             : _mm_cvtpd_ps (__m128d __A)
     801             : {
     802             :   return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
     803             : }
     804             : 
     805             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     806             : _mm_cvttpd_epi32 (__m128d __A)
     807             : {
     808             :   return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
     809             : }
     810             : 
     811             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     812             : _mm_cvttpd_pi32 (__m128d __A)
     813             : {
     814             :   return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
     815             : }
     816             : 
     817             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     818             : _mm_cvtpi32_pd (__m64 __A)
     819             : {
     820             :   return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
     821             : }
     822             : 
     823             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     824             : _mm_cvtps_epi32 (__m128 __A)
     825             : {
     826             :   return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
     827             : }
     828             : 
     829             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     830             : _mm_cvttps_epi32 (__m128 __A)
     831             : {
     832             :   return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
     833             : }
     834             : 
     835             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     836             : _mm_cvtps_pd (__m128 __A)
     837             : {
     838             :   return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
     839             : }
     840             : 
     841             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     842             : _mm_cvtsd_si32 (__m128d __A)
     843             : {
     844             :   return __builtin_ia32_cvtsd2si ((__v2df) __A);
     845             : }
     846             : 
     847             : #ifdef __x86_64__
     848             : /* Intel intrinsic.  */
     849             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     850             : _mm_cvtsd_si64 (__m128d __A)
     851             : {
     852             :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     853             : }
     854             : 
     855             : /* Microsoft intrinsic.  */
     856             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     857             : _mm_cvtsd_si64x (__m128d __A)
     858             : {
     859             :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     860             : }
     861             : #endif
     862             : 
     863             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     864             : _mm_cvttsd_si32 (__m128d __A)
     865             : {
     866             :   return __builtin_ia32_cvttsd2si ((__v2df) __A);
     867             : }
     868             : 
     869             : #ifdef __x86_64__
     870             : /* Intel intrinsic.  */
     871             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     872             : _mm_cvttsd_si64 (__m128d __A)
     873             : {
     874             :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     875             : }
     876             : 
     877             : /* Microsoft intrinsic.  */
     878             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     879             : _mm_cvttsd_si64x (__m128d __A)
     880             : {
     881             :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     882             : }
     883             : #endif
     884             : 
     885             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     886             : _mm_cvtsd_ss (__m128 __A, __m128d __B)
     887             : {
     888             :   return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
     889             : }
     890             : 
     891             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     892             : _mm_cvtsi32_sd (__m128d __A, int __B)
     893             : {
     894             :   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
     895             : }
     896             : 
     897             : #ifdef __x86_64__
     898             : /* Intel intrinsic.  */
     899             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     900             : _mm_cvtsi64_sd (__m128d __A, long long __B)
     901             : {
     902             :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     903             : }
     904             : 
     905             : /* Microsoft intrinsic.  */
     906             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     907             : _mm_cvtsi64x_sd (__m128d __A, long long __B)
     908             : {
     909             :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     910             : }
     911             : #endif
     912             : 
     913             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     914             : _mm_cvtss_sd (__m128d __A, __m128 __B)
     915             : {
     916             :   return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
     917             : }
     918             : 
     919             : #ifdef __OPTIMIZE__
     920             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     921             : _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
     922             : {
     923             :   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
     924             : }
     925             : #else
     926             : #define _mm_shuffle_pd(A, B, N)                                         \
     927             :   ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),                \
     928             :                                    (__v2df)(__m128d)(B), (int)(N)))
     929             : #endif
     930             : 
     931             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     932             : _mm_unpackhi_pd (__m128d __A, __m128d __B)
     933             : {
     934             :   return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
     935             : }
     936             : 
     937             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     938             : _mm_unpacklo_pd (__m128d __A, __m128d __B)
     939             : {
     940             :   return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
     941             : }
     942             : 
     943             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     944             : _mm_loadh_pd (__m128d __A, double const *__B)
     945             : {
     946  1253975340 :   return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
     947             : }
     948             : 
     949             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     950             : _mm_loadl_pd (__m128d __A, double const *__B)
     951             : {
     952             :   return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
     953             : }
     954             : 
     955             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     956             : _mm_movemask_pd (__m128d __A)
     957             : {
     958             :   return __builtin_ia32_movmskpd ((__v2df)__A);
     959             : }
     960             : 
     961             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     962             : _mm_packs_epi16 (__m128i __A, __m128i __B)
     963             : {
     964    76072050 :   return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
     965             : }
     966             : 
     967             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     968             : _mm_packs_epi32 (__m128i __A, __m128i __B)
     969             : {
     970  1722589471 :   return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
     971             : }
     972             : 
     973             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     974             : _mm_packus_epi16 (__m128i __A, __m128i __B)
     975             : {
     976  1949291367 :   return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
     977             : }
     978             : 
     979             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     980             : _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
     981             : {
     982   662399614 :   return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
     983             : }
     984             : 
     985             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     986             : _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
     987             : {
     988  2160674800 :   return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
     989             : }
     990             : 
     991             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     992             : _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
     993             : {
     994   863854524 :   return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
     995             : }
     996             : 
     997             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     998             : _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
     999             : {
    1000   976503458 :   return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
    1001             : }
    1002             : 
    1003             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1004             : _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
    1005             : {
    1006  3058256048 :   return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
    1007             : }
    1008             : 
    1009             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1010             : _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
    1011             : {
    1012 17049888926 :   return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
    1013             : }
    1014             : 
    1015             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1016             : _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
    1017             : {
    1018  1469104124 :   return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
    1019             : }
    1020             : 
    1021             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1022             : _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
    1023             : {
    1024  1289672228 :   return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
    1025             : }
    1026             : 
    1027             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1028             : _mm_add_epi8 (__m128i __A, __m128i __B)
    1029             : {
    1030  1411081696 :   return (__m128i) ((__v16qu)__A + (__v16qu)__B);
    1031             : }
    1032             : 
    1033             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1034             : _mm_add_epi16 (__m128i __A, __m128i __B)
    1035             : {
    1036  4215181985 :   return (__m128i) ((__v8hu)__A + (__v8hu)__B);
    1037             : }
    1038             : 
    1039             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1040             : _mm_add_epi32 (__m128i __A, __m128i __B)
    1041             : {
    1042  8856215428 :   return (__m128i) ((__v4su)__A + (__v4su)__B);
    1043             : }
    1044             : 
    1045             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1046             : _mm_add_epi64 (__m128i __A, __m128i __B)
    1047             : {
    1048   791344375 :   return (__m128i) ((__v2du)__A + (__v2du)__B);
    1049             : }
    1050             : 
    1051             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1052             : _mm_adds_epi8 (__m128i __A, __m128i __B)
    1053             : {
    1054     9539930 :   return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
    1055             : }
    1056             : 
    1057             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1058             : _mm_adds_epi16 (__m128i __A, __m128i __B)
    1059             : {
    1060  1148165360 :   return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
    1061             : }
    1062             : 
    1063             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1064             : _mm_adds_epu8 (__m128i __A, __m128i __B)
    1065             : {
    1066     7343670 :   return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
    1067             : }
    1068             : 
    1069             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1070             : _mm_adds_epu16 (__m128i __A, __m128i __B)
    1071             : {
    1072    53530400 :   return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
    1073             : }
    1074             : 
    1075             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1076             : _mm_sub_epi8 (__m128i __A, __m128i __B)
    1077             : {
    1078   600391000 :   return (__m128i) ((__v16qu)__A - (__v16qu)__B);
    1079             : }
    1080             : 
    1081             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1082             : _mm_sub_epi16 (__m128i __A, __m128i __B)
    1083             : {
    1084   563109866 :   return (__m128i) ((__v8hu)__A - (__v8hu)__B);
    1085             : }
    1086             : 
    1087             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1088             : _mm_sub_epi32 (__m128i __A, __m128i __B)
    1089             : {
    1090  1198966714 :   return (__m128i) ((__v4su)__A - (__v4su)__B);
    1091             : }
    1092             : 
    1093             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1094             : _mm_sub_epi64 (__m128i __A, __m128i __B)
    1095             : {
    1096             :   return (__m128i) ((__v2du)__A - (__v2du)__B);
    1097             : }
    1098             : 
    1099             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1100             : _mm_subs_epi8 (__m128i __A, __m128i __B)
    1101             : {
    1102     9555910 :   return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
    1103             : }
    1104             : 
    1105             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1106             : _mm_subs_epi16 (__m128i __A, __m128i __B)
    1107             : {
    1108   662313720 :   return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
    1109             : }
    1110             : 
    1111             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1112             : _mm_subs_epu8 (__m128i __A, __m128i __B)
    1113             : {
    1114    62622900 :   return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
    1115             : }
    1116             : 
    1117             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1118             : _mm_subs_epu16 (__m128i __A, __m128i __B)
    1119             : {
    1120    41329200 :   return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
    1121             : }
    1122             : 
    1123             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1124             : _mm_madd_epi16 (__m128i __A, __m128i __B)
    1125             : {
    1126  3203897919 :   return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
    1127             : }
    1128             : 
    1129             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1130             : _mm_mulhi_epi16 (__m128i __A, __m128i __B)
    1131             : {
    1132             :   return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
    1133             : }
    1134             : 
    1135             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1136             : _mm_mullo_epi16 (__m128i __A, __m128i __B)
    1137             : {
    1138   768335500 :   return (__m128i) ((__v8hu)__A * (__v8hu)__B);
    1139             : }
    1140             : 
    1141             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1142             : _mm_mul_su32 (__m64 __A, __m64 __B)
    1143             : {
    1144             :   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
    1145             : }
    1146             : 
    1147             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1148             : _mm_mul_epu32 (__m128i __A, __m128i __B)
    1149             : {
    1150           0 :   return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
    1151             : }
    1152             : 
    1153             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1154             : _mm_slli_epi16 (__m128i __A, int __B)
    1155             : {
    1156    82313800 :   return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
    1157             : }
    1158             : 
    1159             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1160             : _mm_slli_epi32 (__m128i __A, int __B)
    1161             : {
    1162   640091300 :   return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
    1163             : }
    1164             : 
    1165             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1166             : _mm_slli_epi64 (__m128i __A, int __B)
    1167             : {
    1168           0 :   return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
    1169             : }
    1170             : 
    1171             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1172             : _mm_srai_epi16 (__m128i __A, int __B)
    1173             : {
    1174  1594263696 :   return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
    1175             : }
    1176             : 
    1177             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1178             : _mm_srai_epi32 (__m128i __A, int __B)
    1179             : {
    1180  2886746280 :   return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
    1181             : }
    1182             : 
    1183             : #ifdef __OPTIMIZE__
    1184             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1185             : _mm_bsrli_si128 (__m128i __A, const int __N)
    1186             : {
    1187             :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1188             : }
    1189             : 
    1190             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1191             : _mm_bslli_si128 (__m128i __A, const int __N)
    1192             : {
    1193             :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1194             : }
    1195             : 
    1196             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1197             : _mm_srli_si128 (__m128i __A, const int __N)
    1198             : {
    1199             :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1200             : }
    1201             : 
    1202             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1203             : _mm_slli_si128 (__m128i __A, const int __N)
    1204             : {
    1205             :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1206             : }
    1207             : #else
    1208             : #define _mm_bsrli_si128(A, N) \
    1209             :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1210             : #define _mm_bslli_si128(A, N) \
    1211             :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1212             : #define _mm_srli_si128(A, N) \
    1213             :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1214             : #define _mm_slli_si128(A, N) \
    1215             :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1216             : #endif
    1217             : 
    1218             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1219             : _mm_srli_epi16 (__m128i __A, int __B)
    1220             : {
    1221   456380726 :   return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
    1222             : }
    1223             : 
    1224             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1225             : _mm_srli_epi32 (__m128i __A, int __B)
    1226             : {
    1227      470331 :   return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
    1228             : }
    1229             : 
    1230             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1231             : _mm_srli_epi64 (__m128i __A, int __B)
    1232             : {
    1233    84744500 :   return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
    1234             : }
    1235             : 
    1236             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1237             : _mm_sll_epi16 (__m128i __A, __m128i __B)
    1238             : {
    1239             :   return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
    1240             : }
    1241             : 
    1242             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1243             : _mm_sll_epi32 (__m128i __A, __m128i __B)
    1244             : {
    1245             :   return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
    1246             : }
    1247             : 
    1248             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1249             : _mm_sll_epi64 (__m128i __A, __m128i __B)
    1250             : {
    1251             :   return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
    1252             : }
    1253             : 
    1254             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1255             : _mm_sra_epi16 (__m128i __A, __m128i __B)
    1256             : {
    1257      567366 :   return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
    1258             : }
    1259             : 
    1260             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1261             : _mm_sra_epi32 (__m128i __A, __m128i __B)
    1262             : {
    1263           0 :   return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
    1264             : }
    1265             : 
    1266             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1267             : _mm_srl_epi16 (__m128i __A, __m128i __B)
    1268             : {
    1269     2206660 :   return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
    1270             : }
    1271             : 
    1272             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1273             : _mm_srl_epi32 (__m128i __A, __m128i __B)
    1274             : {
    1275           0 :   return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
    1276             : }
    1277             : 
    1278             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1279             : _mm_srl_epi64 (__m128i __A, __m128i __B)
    1280             : {
    1281             :   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
    1282             : }
    1283             : 
    1284             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1285             : _mm_and_si128 (__m128i __A, __m128i __B)
    1286             : {
    1287   635892880 :   return (__m128i) ((__v2du)__A & (__v2du)__B);
    1288             : }
    1289             : 
    1290             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1291             : _mm_andnot_si128 (__m128i __A, __m128i __B)
    1292             : {
    1293    68214980 :   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
    1294             : }
    1295             : 
    1296             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1297             : _mm_or_si128 (__m128i __A, __m128i __B)
    1298             : {
    1299    48103464 :   return (__m128i) ((__v2du)__A | (__v2du)__B);
    1300             : }
    1301             : 
    1302             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1303             : _mm_xor_si128 (__m128i __A, __m128i __B)
    1304             : {
    1305    14826700 :   return (__m128i) ((__v2du)__A ^ (__v2du)__B);
    1306             : }
    1307             : 
    1308             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1309             : _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
    1310             : {
    1311    12070500 :   return (__m128i) ((__v16qi)__A == (__v16qi)__B);
    1312             : }
    1313             : 
    1314             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1315             : _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
    1316             : {
    1317           0 :   return (__m128i) ((__v8hi)__A == (__v8hi)__B);
    1318             : }
    1319             : 
    1320             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1321             : _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
    1322             : {
    1323             :   return (__m128i) ((__v4si)__A == (__v4si)__B);
    1324             : }
    1325             : 
    1326             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1327             : _mm_cmplt_epi8 (__m128i __A, __m128i __B)
    1328             : {
    1329             :   return (__m128i) ((__v16qs)__A < (__v16qs)__B);
    1330             : }
    1331             : 
    1332             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1333             : _mm_cmplt_epi16 (__m128i __A, __m128i __B)
    1334             : {
    1335             :   return (__m128i) ((__v8hi)__A < (__v8hi)__B);
    1336             : }
    1337             : 
    1338             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1339             : _mm_cmplt_epi32 (__m128i __A, __m128i __B)
    1340             : {
    1341   544705000 :   return (__m128i) ((__v4si)__A < (__v4si)__B);
    1342             : }
    1343             : 
    1344             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1345             : _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
    1346             : {
    1347    56767900 :   return (__m128i) ((__v16qs)__A > (__v16qs)__B);
    1348             : }
    1349             : 
    1350             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1351             : _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
    1352             : {
    1353    47377394 :   return (__m128i) ((__v8hi)__A > (__v8hi)__B);
    1354             : }
    1355             : 
    1356             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1357             : _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
    1358             : {
    1359    10825300 :   return (__m128i) ((__v4si)__A > (__v4si)__B);
    1360             : }
    1361             : 
    1362             : #ifdef __OPTIMIZE__
    1363             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1364             : _mm_extract_epi16 (__m128i const __A, int const __N)
    1365             : {
    1366             :   return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
    1367             : }
    1368             : 
    1369             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1370             : _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
    1371             : {
    1372             :   return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
    1373             : }
    1374             : #else
    1375             : #define _mm_extract_epi16(A, N) \
    1376             :   ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
    1377             : #define _mm_insert_epi16(A, D, N)                               \
    1378             :   ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
    1379             :                                           (int)(D), (int)(N)))
    1380             : #endif
    1381             : 
    1382             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1383             : _mm_max_epi16 (__m128i __A, __m128i __B)
    1384             : {
    1385   105802200 :   return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
    1386             : }
    1387             : 
    1388             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1389             : _mm_max_epu8 (__m128i __A, __m128i __B)
    1390             : {
    1391    17655200 :   return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
    1392             : }
    1393             : 
    1394             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1395             : _mm_min_epi16 (__m128i __A, __m128i __B)
    1396             : {
    1397           0 :   return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
    1398             : }
    1399             : 
    1400             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1401             : _mm_min_epu8 (__m128i __A, __m128i __B)
    1402             : {
    1403  1664380000 :   return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
    1404             : }
    1405             : 
    1406             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1407             : _mm_movemask_epi8 (__m128i __A)
    1408             : {
    1409             :   return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
    1410             : }
    1411             : 
    1412             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1413             : _mm_mulhi_epu16 (__m128i __A, __m128i __B)
    1414             : {
    1415     2206660 :   return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
    1416             : }
    1417             : 
    1418             : #ifdef __OPTIMIZE__
    1419             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1420             : _mm_shufflehi_epi16 (__m128i __A, const int __mask)
    1421             : {
    1422             :   return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
    1423             : }
    1424             : 
    1425             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1426             : _mm_shufflelo_epi16 (__m128i __A, const int __mask)
    1427             : {
    1428             :   return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
    1429             : }
    1430             : 
    1431             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1432             : _mm_shuffle_epi32 (__m128i __A, const int __mask)
    1433             : {
    1434             :   return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
    1435             : }
    1436             : #else
    1437             : #define _mm_shufflehi_epi16(A, N) \
    1438             :   ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
    1439             : #define _mm_shufflelo_epi16(A, N) \
    1440             :   ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
    1441             : #define _mm_shuffle_epi32(A, N) \
    1442             :   ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
    1443             : #endif
    1444             : 
    1445             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1446             : _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
    1447             : {
    1448             :   __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
    1449             : }
    1450             : 
    1451             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1452             : _mm_avg_epu8 (__m128i __A, __m128i __B)
    1453             : {
    1454   580894300 :   return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
    1455             : }
    1456             : 
    1457             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1458             : _mm_avg_epu16 (__m128i __A, __m128i __B)
    1459             : {
    1460   293931487 :   return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
    1461             : }
    1462             : 
    1463             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1464             : _mm_sad_epu8 (__m128i __A, __m128i __B)
    1465             : {
    1466    14015155 :   return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
    1467             : }
    1468             : 
    1469             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1470             : _mm_stream_si32 (int *__A, int __B)
    1471             : {
    1472             :   __builtin_ia32_movnti (__A, __B);
    1473             : }
    1474             : 
    1475             : #ifdef __x86_64__
    1476             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1477             : _mm_stream_si64 (long long int *__A, long long int __B)
    1478             : {
    1479             :   __builtin_ia32_movnti64 (__A, __B);
    1480             : }
    1481             : #endif
    1482             : 
    1483             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1484             : _mm_stream_si128 (__m128i *__A, __m128i __B)
    1485             : {
    1486             :   __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
    1487             : }
    1488             : 
    1489             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1490             : _mm_stream_pd (double *__A, __m128d __B)
    1491             : {
    1492             :   __builtin_ia32_movntpd (__A, (__v2df)__B);
    1493             : }
    1494             : 
    1495             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1496             : _mm_clflush (void const *__A)
    1497             : {
    1498             :   __builtin_ia32_clflush (__A);
    1499             : }
    1500             : 
    1501             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1502             : _mm_lfence (void)
    1503             : {
    1504             :   __builtin_ia32_lfence ();
    1505             : }
    1506             : 
    1507             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1508             : _mm_mfence (void)
    1509             : {
    1510             :   __builtin_ia32_mfence ();
    1511             : }
    1512             : 
    1513             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1514             : _mm_cvtsi32_si128 (int __A)
    1515             : {
    1516  2962210812 :   return _mm_set_epi32 (0, 0, 0, __A);
    1517             : }
    1518             : 
    1519             : #ifdef __x86_64__
    1520             : /* Intel intrinsic.  */
    1521             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1522             : _mm_cvtsi64_si128 (long long __A)
    1523             : {
    1524           0 :   return _mm_set_epi64x (0, __A);
    1525             : }
    1526             : 
    1527             : /* Microsoft intrinsic.  */
    1528             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1529             : _mm_cvtsi64x_si128 (long long __A)
    1530             : {
    1531             :   return _mm_set_epi64x (0, __A);
    1532             : }
    1533             : #endif
    1534             : 
    1535             : /* Casts between various SP, DP, INT vector types.  Note that these do no
    1536             :    conversion of values, they just change the type.  */
    1537             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1538             : _mm_castpd_ps(__m128d __A)
    1539             : {
    1540             :   return (__m128) __A;
    1541             : }
    1542             : 
    1543             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1544             : _mm_castpd_si128(__m128d __A)
    1545             : {
    1546  1254218140 :   return (__m128i) __A;
    1547             : }
    1548             : 
    1549             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1550             : _mm_castps_pd(__m128 __A)
    1551             : {
    1552             :   return (__m128d) __A;
    1553             : }
    1554             : 
    1555             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1556             : _mm_castps_si128(__m128 __A)
    1557             : {
    1558   336922000 :   return (__m128i) __A;
    1559             : }
    1560             : 
    1561             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1562             : _mm_castsi128_ps(__m128i __A)
    1563             : {
    1564   336937000 :   return (__m128) __A;
    1565             : }
    1566             : 
    1567             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1568             : _mm_castsi128_pd(__m128i __A)
    1569             : {
    1570  1466619820 :   return (__m128d) __A;
    1571             : }
    1572             : 
    1573             : #ifdef __DISABLE_SSE2__
    1574             : #undef __DISABLE_SSE2__
    1575             : #pragma GCC pop_options
    1576             : #endif /* __DISABLE_SSE2__ */
    1577             : 
    1578             : #endif /* _EMMINTRIN_H_INCLUDED */

Generated by: LCOV version 1.14