Line data Source code
1 : /* Copyright (C) 2003-2019 Free Software Foundation, Inc.
2 :
3 : This file is part of GCC.
4 :
5 : GCC is free software; you can redistribute it and/or modify
6 : it under the terms of the GNU General Public License as published by
7 : the Free Software Foundation; either version 3, or (at your option)
8 : any later version.
9 :
10 : GCC is distributed in the hope that it will be useful,
11 : but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : GNU General Public License for more details.
14 :
15 : Under Section 7 of GPL version 3, you are granted additional
16 : permissions described in the GCC Runtime Library Exception, version
17 : 3.1, as published by the Free Software Foundation.
18 :
19 : You should have received a copy of the GNU General Public License and
20 : a copy of the GCC Runtime Library Exception along with this program;
21 : see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 : <http://www.gnu.org/licenses/>. */
23 :
24 : /* Implemented from the specification included in the Intel C++ Compiler
25 : User Guide and Reference, version 9.0. */
26 :
27 : #ifndef _EMMINTRIN_H_INCLUDED
28 : #define _EMMINTRIN_H_INCLUDED
29 :
30 : /* We need definitions from the SSE header files*/
31 : #include <xmmintrin.h>
32 :
33 : #ifndef __SSE2__
34 : #pragma GCC push_options
35 : #pragma GCC target("sse2")
36 : #define __DISABLE_SSE2__
37 : #endif /* __SSE2__ */
38 :
39 : /* SSE2 */
40 : typedef double __v2df __attribute__ ((__vector_size__ (16)));
41 : typedef long long __v2di __attribute__ ((__vector_size__ (16)));
42 : typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
43 : typedef int __v4si __attribute__ ((__vector_size__ (16)));
44 : typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
45 : typedef short __v8hi __attribute__ ((__vector_size__ (16)));
46 : typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
47 : typedef char __v16qi __attribute__ ((__vector_size__ (16)));
48 : typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
49 : typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
50 :
51 : /* The Intel API is flexible enough that we must allow aliasing with other
52 : vector types, and their scalar components. */
53 : typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
54 : typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
55 :
56 : /* Unaligned version of the same types. */
57 : typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
58 : typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
59 :
60 : /* Create a selector for use with the SHUFPD instruction. */
61 : #define _MM_SHUFFLE2(fp1,fp0) \
62 : (((fp1) << 1) | (fp0))
63 :
64 : /* Create a vector with element 0 as F and the rest zero. */
65 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
66 : _mm_set_sd (double __F)
67 : {
68 11448400 : return __extension__ (__m128d){ __F, 0.0 };
69 : }
70 :
71 : /* Create a vector with both elements equal to F. */
72 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73 : _mm_set1_pd (double __F)
74 : {
75 : return __extension__ (__m128d){ __F, __F };
76 : }
77 :
78 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
79 : _mm_set_pd1 (double __F)
80 : {
81 : return _mm_set1_pd (__F);
82 : }
83 :
84 : /* Create a vector with the lower value X and upper value W. */
85 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
86 : _mm_set_pd (double __W, double __X)
87 : {
88 : return __extension__ (__m128d){ __X, __W };
89 : }
90 :
91 : /* Create a vector with the lower value W and upper value X. */
92 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93 : _mm_setr_pd (double __W, double __X)
94 : {
95 : return __extension__ (__m128d){ __W, __X };
96 : }
97 :
98 : /* Create an undefined vector. */
99 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
100 : _mm_undefined_pd (void)
101 : {
102 : __m128d __Y = __Y;
103 : return __Y;
104 : }
105 :
106 : /* Create a vector of zeros. */
107 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
108 : _mm_setzero_pd (void)
109 : {
110 : return __extension__ (__m128d){ 0.0, 0.0 };
111 : }
112 :
113 : /* Sets the low DPFP value of A from the low value of B. */
114 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
115 : _mm_move_sd (__m128d __A, __m128d __B)
116 : {
117 : return __extension__ (__m128d) __builtin_shuffle ((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
118 : }
119 :
120 : /* Load two DPFP values from P. The address must be 16-byte aligned. */
121 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
122 : _mm_load_pd (double const *__P)
123 : {
124 : return *(__m128d *)__P;
125 : }
126 :
127 : /* Load two DPFP values from P. The address need not be 16-byte aligned. */
128 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129 : _mm_loadu_pd (double const *__P)
130 : {
131 : return *(__m128d_u *)__P;
132 : }
133 :
134 : /* Create a vector with all two elements equal to *P. */
135 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136 : _mm_load1_pd (double const *__P)
137 : {
138 : return _mm_set1_pd (*__P);
139 : }
140 :
141 : /* Create a vector with element 0 as *P and the rest zero. */
142 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
143 : _mm_load_sd (double const *__P)
144 : {
145 22896700 : return _mm_set_sd (*__P);
146 : }
147 :
148 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
149 : _mm_load_pd1 (double const *__P)
150 : {
151 : return _mm_load1_pd (__P);
152 : }
153 :
154 : /* Load two DPFP values in reverse order. The address must be aligned. */
155 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156 : _mm_loadr_pd (double const *__P)
157 : {
158 : __m128d __tmp = _mm_load_pd (__P);
159 : return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
160 : }
161 :
162 : /* Store two DPFP values. The address must be 16-byte aligned. */
163 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
164 : _mm_store_pd (double *__P, __m128d __A)
165 : {
166 : *(__m128d *)__P = __A;
167 : }
168 :
169 : /* Store two DPFP values. The address need not be 16-byte aligned. */
170 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
171 : _mm_storeu_pd (double *__P, __m128d __A)
172 : {
173 : *(__m128d_u *)__P = __A;
174 : }
175 :
176 : /* Stores the lower DPFP value. */
177 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178 : _mm_store_sd (double *__P, __m128d __A)
179 : {
180 11448400 : *__P = ((__v2df)__A)[0];
181 11448400 : }
182 :
183 : extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
184 : _mm_cvtsd_f64 (__m128d __A)
185 : {
186 : return ((__v2df)__A)[0];
187 : }
188 :
189 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
190 : _mm_storel_pd (double *__P, __m128d __A)
191 : {
192 : _mm_store_sd (__P, __A);
193 : }
194 :
195 : /* Stores the upper DPFP value. */
196 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
197 : _mm_storeh_pd (double *__P, __m128d __A)
198 : {
199 212645086 : *__P = ((__v2df)__A)[1];
200 212645086 : }
201 :
202 : /* Store the lower DPFP value across two words.
203 : The address must be 16-byte aligned. */
204 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
205 : _mm_store1_pd (double *__P, __m128d __A)
206 : {
207 : _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
208 : }
209 :
210 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
211 : _mm_store_pd1 (double *__P, __m128d __A)
212 : {
213 : _mm_store1_pd (__P, __A);
214 : }
215 :
216 : /* Store two DPFP values in reverse order. The address must be aligned. */
217 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
218 : _mm_storer_pd (double *__P, __m128d __A)
219 : {
220 : _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
221 : }
222 :
223 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
224 : _mm_cvtsi128_si32 (__m128i __A)
225 : {
226 2158675906 : return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
227 : }
228 :
229 : #ifdef __x86_64__
230 : /* Intel intrinsic. */
231 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
232 : _mm_cvtsi128_si64 (__m128i __A)
233 : {
234 103837 : return ((__v2di)__A)[0];
235 : }
236 :
237 : /* Microsoft intrinsic. */
238 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
239 : _mm_cvtsi128_si64x (__m128i __A)
240 : {
241 : return ((__v2di)__A)[0];
242 : }
243 : #endif
244 :
245 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
246 : _mm_add_pd (__m128d __A, __m128d __B)
247 : {
248 : return (__m128d) ((__v2df)__A + (__v2df)__B);
249 : }
250 :
251 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
252 : _mm_add_sd (__m128d __A, __m128d __B)
253 : {
254 : return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
255 : }
256 :
257 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
258 : _mm_sub_pd (__m128d __A, __m128d __B)
259 : {
260 : return (__m128d) ((__v2df)__A - (__v2df)__B);
261 : }
262 :
263 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264 : _mm_sub_sd (__m128d __A, __m128d __B)
265 : {
266 : return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
267 : }
268 :
269 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
270 : _mm_mul_pd (__m128d __A, __m128d __B)
271 : {
272 : return (__m128d) ((__v2df)__A * (__v2df)__B);
273 : }
274 :
275 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
276 : _mm_mul_sd (__m128d __A, __m128d __B)
277 : {
278 : return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
279 : }
280 :
281 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282 : _mm_div_pd (__m128d __A, __m128d __B)
283 : {
284 : return (__m128d) ((__v2df)__A / (__v2df)__B);
285 : }
286 :
287 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
288 : _mm_div_sd (__m128d __A, __m128d __B)
289 : {
290 : return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
291 : }
292 :
293 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294 : _mm_sqrt_pd (__m128d __A)
295 : {
296 : return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
297 : }
298 :
299 : /* Return pair {sqrt (B[0]), A[1]}. */
300 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
301 : _mm_sqrt_sd (__m128d __A, __m128d __B)
302 : {
303 : __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
304 : return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
305 : }
306 :
307 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308 : _mm_min_pd (__m128d __A, __m128d __B)
309 : {
310 : return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
311 : }
312 :
313 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
314 : _mm_min_sd (__m128d __A, __m128d __B)
315 : {
316 : return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
317 : }
318 :
319 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320 : _mm_max_pd (__m128d __A, __m128d __B)
321 : {
322 : return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
323 : }
324 :
325 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
326 : _mm_max_sd (__m128d __A, __m128d __B)
327 : {
328 : return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
329 : }
330 :
331 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332 : _mm_and_pd (__m128d __A, __m128d __B)
333 : {
334 : return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
335 : }
336 :
337 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
338 : _mm_andnot_pd (__m128d __A, __m128d __B)
339 : {
340 : return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
341 : }
342 :
343 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344 : _mm_or_pd (__m128d __A, __m128d __B)
345 : {
346 : return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
347 : }
348 :
349 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
350 : _mm_xor_pd (__m128d __A, __m128d __B)
351 : {
352 : return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
353 : }
354 :
355 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
356 : _mm_cmpeq_pd (__m128d __A, __m128d __B)
357 : {
358 : return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
359 : }
360 :
361 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
362 : _mm_cmplt_pd (__m128d __A, __m128d __B)
363 : {
364 : return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
365 : }
366 :
367 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
368 : _mm_cmple_pd (__m128d __A, __m128d __B)
369 : {
370 : return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
371 : }
372 :
373 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
374 : _mm_cmpgt_pd (__m128d __A, __m128d __B)
375 : {
376 : return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
377 : }
378 :
379 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
380 : _mm_cmpge_pd (__m128d __A, __m128d __B)
381 : {
382 : return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
383 : }
384 :
385 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
386 : _mm_cmpneq_pd (__m128d __A, __m128d __B)
387 : {
388 : return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
389 : }
390 :
391 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392 : _mm_cmpnlt_pd (__m128d __A, __m128d __B)
393 : {
394 : return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
395 : }
396 :
397 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398 : _mm_cmpnle_pd (__m128d __A, __m128d __B)
399 : {
400 : return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
401 : }
402 :
403 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404 : _mm_cmpngt_pd (__m128d __A, __m128d __B)
405 : {
406 : return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
407 : }
408 :
409 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
410 : _mm_cmpnge_pd (__m128d __A, __m128d __B)
411 : {
412 : return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
413 : }
414 :
415 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
416 : _mm_cmpord_pd (__m128d __A, __m128d __B)
417 : {
418 : return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
419 : }
420 :
421 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422 : _mm_cmpunord_pd (__m128d __A, __m128d __B)
423 : {
424 : return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
425 : }
426 :
427 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 : _mm_cmpeq_sd (__m128d __A, __m128d __B)
429 : {
430 : return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
431 : }
432 :
433 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 : _mm_cmplt_sd (__m128d __A, __m128d __B)
435 : {
436 : return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
437 : }
438 :
439 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
440 : _mm_cmple_sd (__m128d __A, __m128d __B)
441 : {
442 : return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
443 : }
444 :
445 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
446 : _mm_cmpgt_sd (__m128d __A, __m128d __B)
447 : {
448 : return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
449 : (__v2df)
450 : __builtin_ia32_cmpltsd ((__v2df) __B,
451 : (__v2df)
452 : __A));
453 : }
454 :
455 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
456 : _mm_cmpge_sd (__m128d __A, __m128d __B)
457 : {
458 : return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
459 : (__v2df)
460 : __builtin_ia32_cmplesd ((__v2df) __B,
461 : (__v2df)
462 : __A));
463 : }
464 :
465 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
466 : _mm_cmpneq_sd (__m128d __A, __m128d __B)
467 : {
468 : return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
469 : }
470 :
471 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
472 : _mm_cmpnlt_sd (__m128d __A, __m128d __B)
473 : {
474 : return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
475 : }
476 :
477 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
478 : _mm_cmpnle_sd (__m128d __A, __m128d __B)
479 : {
480 : return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
481 : }
482 :
483 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
484 : _mm_cmpngt_sd (__m128d __A, __m128d __B)
485 : {
486 : return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
487 : (__v2df)
488 : __builtin_ia32_cmpnltsd ((__v2df) __B,
489 : (__v2df)
490 : __A));
491 : }
492 :
493 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
494 : _mm_cmpnge_sd (__m128d __A, __m128d __B)
495 : {
496 : return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
497 : (__v2df)
498 : __builtin_ia32_cmpnlesd ((__v2df) __B,
499 : (__v2df)
500 : __A));
501 : }
502 :
503 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504 : _mm_cmpord_sd (__m128d __A, __m128d __B)
505 : {
506 : return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
507 : }
508 :
509 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510 : _mm_cmpunord_sd (__m128d __A, __m128d __B)
511 : {
512 : return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
513 : }
514 :
515 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
516 : _mm_comieq_sd (__m128d __A, __m128d __B)
517 : {
518 : return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
519 : }
520 :
521 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522 : _mm_comilt_sd (__m128d __A, __m128d __B)
523 : {
524 : return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
525 : }
526 :
527 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528 : _mm_comile_sd (__m128d __A, __m128d __B)
529 : {
530 : return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
531 : }
532 :
533 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534 : _mm_comigt_sd (__m128d __A, __m128d __B)
535 : {
536 : return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
537 : }
538 :
539 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540 : _mm_comige_sd (__m128d __A, __m128d __B)
541 : {
542 : return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
543 : }
544 :
545 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
546 : _mm_comineq_sd (__m128d __A, __m128d __B)
547 : {
548 : return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
549 : }
550 :
551 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
552 : _mm_ucomieq_sd (__m128d __A, __m128d __B)
553 : {
554 : return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
555 : }
556 :
557 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558 : _mm_ucomilt_sd (__m128d __A, __m128d __B)
559 : {
560 : return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
561 : }
562 :
563 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
564 : _mm_ucomile_sd (__m128d __A, __m128d __B)
565 : {
566 : return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
567 : }
568 :
569 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
570 : _mm_ucomigt_sd (__m128d __A, __m128d __B)
571 : {
572 : return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
573 : }
574 :
575 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
576 : _mm_ucomige_sd (__m128d __A, __m128d __B)
577 : {
578 : return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
579 : }
580 :
581 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
582 : _mm_ucomineq_sd (__m128d __A, __m128d __B)
583 : {
584 : return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
585 : }
586 :
587 : /* Create a vector of Qi, where i is the element number. */
588 :
589 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
590 : _mm_set_epi64x (long long __q1, long long __q0)
591 : {
592 35142599124 : return __extension__ (__m128i)(__v2di){ __q0, __q1 };
593 : }
594 :
595 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
596 : _mm_set_epi64 (__m64 __q1, __m64 __q0)
597 : {
598 53166391603 : return _mm_set_epi64x ((long long)__q1, (long long)__q0);
599 : }
600 :
601 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
602 : _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
603 : {
604 5787177992 : return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
605 : }
606 :
607 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
608 : _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
609 : short __q3, short __q2, short __q1, short __q0)
610 : {
611 2428898418 : return __extension__ (__m128i)(__v8hi){
612 : __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
613 : }
614 :
615 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616 : _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
617 : char __q11, char __q10, char __q09, char __q08,
618 : char __q07, char __q06, char __q05, char __q04,
619 : char __q03, char __q02, char __q01, char __q00)
620 : {
621 843951020 : return __extension__ (__m128i)(__v16qi){
622 : __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
623 : __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
624 : };
625 : }
626 :
627 : /* Set all of the elements of the vector to A. */
628 :
629 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
630 : _mm_set1_epi64x (long long __A)
631 : {
632 23259601 : return _mm_set_epi64x (__A, __A);
633 : }
634 :
635 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636 : _mm_set1_epi64 (__m64 __A)
637 : {
638 : return _mm_set_epi64 (__A, __A);
639 : }
640 :
641 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642 : _mm_set1_epi32 (int __A)
643 : {
644 2712960151 : return _mm_set_epi32 (__A, __A, __A, __A);
645 : }
646 :
647 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648 : _mm_set1_epi16 (short __A)
649 : {
650 4690385610 : return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
651 : }
652 :
653 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654 : _mm_set1_epi8 (char __A)
655 : {
656 1421318200 : return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
657 : __A, __A, __A, __A, __A, __A, __A, __A);
658 : }
659 :
660 : /* Create a vector of Qi, where i is the element number.
661 : The parameter order is reversed from the _mm_set_epi* functions. */
662 :
663 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
664 : _mm_setr_epi64 (__m64 __q0, __m64 __q1)
665 : {
666 : return _mm_set_epi64 (__q1, __q0);
667 : }
668 :
669 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
670 : _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
671 : {
672 107634490 : return _mm_set_epi32 (__q3, __q2, __q1, __q0);
673 : }
674 :
675 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676 : _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
677 : short __q4, short __q5, short __q6, short __q7)
678 : {
679 159031000 : return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
680 : }
681 :
682 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
683 : _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
684 : char __q04, char __q05, char __q06, char __q07,
685 : char __q08, char __q09, char __q10, char __q11,
686 : char __q12, char __q13, char __q14, char __q15)
687 : {
688 308197240 : return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
689 : __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
690 : }
691 :
692 : /* Create a vector with element 0 as *P and the rest zero. */
693 :
694 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
695 : _mm_load_si128 (__m128i const *__P)
696 : {
697 3953729900 : return *__P;
698 : }
699 :
700 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
701 : _mm_loadu_si128 (__m128i_u const *__P)
702 : {
703 78708654060 : return *__P;
704 : }
705 :
706 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
707 : _mm_loadl_epi64 (__m128i_u const *__P)
708 : {
709 >10633*10^7 : return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
710 : }
711 :
712 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
713 : _mm_loadu_si64 (void const *__P)
714 : {
715 : return _mm_loadl_epi64 ((__m128i_u *)__P);
716 : }
717 :
718 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
719 : _mm_store_si128 (__m128i *__P, __m128i __B)
720 : {
721 2404882926 : *__P = __B;
722 1376486213 : }
723 :
724 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
725 : _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
726 : {
727 4847199434 : *__P = __B;
728 3739346346 : }
729 :
730 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
731 : _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
732 : {
733 11288178310 : *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
734 6611798031 : }
735 :
736 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
737 : _mm_storeu_si64 (void *__P, __m128i __B)
738 : {
739 : _mm_storel_epi64 ((__m128i_u *)__P, __B);
740 : }
741 :
742 : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
743 : _mm_movepi64_pi64 (__m128i __B)
744 : {
745 : return (__m64) ((__v2di)__B)[0];
746 : }
747 :
748 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
749 : _mm_movpi64_epi64 (__m64 __A)
750 : {
751 : return _mm_set_epi64 ((__m64)0LL, __A);
752 : }
753 :
754 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
755 : _mm_move_epi64 (__m128i __A)
756 : {
757 : return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
758 : }
759 :
760 : /* Create an undefined vector. */
761 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762 : _mm_undefined_si128 (void)
763 : {
764 : __m128i __Y = __Y;
765 : return __Y;
766 : }
767 :
768 : /* Create a vector of zeros. */
769 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
770 : _mm_setzero_si128 (void)
771 : {
772 2401899273 : return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
773 : }
774 :
775 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
776 : _mm_cvtepi32_pd (__m128i __A)
777 : {
778 : return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
779 : }
780 :
781 : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
782 : _mm_cvtepi32_ps (__m128i __A)
783 : {
784 : return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
785 : }
786 :
787 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
788 : _mm_cvtpd_epi32 (__m128d __A)
789 : {
790 : return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
791 : }
792 :
793 : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794 : _mm_cvtpd_pi32 (__m128d __A)
795 : {
796 : return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
797 : }
798 :
799 : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
800 : _mm_cvtpd_ps (__m128d __A)
801 : {
802 : return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
803 : }
804 :
805 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
806 : _mm_cvttpd_epi32 (__m128d __A)
807 : {
808 : return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
809 : }
810 :
811 : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
812 : _mm_cvttpd_pi32 (__m128d __A)
813 : {
814 : return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
815 : }
816 :
817 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
818 : _mm_cvtpi32_pd (__m64 __A)
819 : {
820 : return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
821 : }
822 :
823 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
824 : _mm_cvtps_epi32 (__m128 __A)
825 : {
826 : return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
827 : }
828 :
829 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
830 : _mm_cvttps_epi32 (__m128 __A)
831 : {
832 : return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
833 : }
834 :
835 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
836 : _mm_cvtps_pd (__m128 __A)
837 : {
838 : return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
839 : }
840 :
841 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
842 : _mm_cvtsd_si32 (__m128d __A)
843 : {
844 : return __builtin_ia32_cvtsd2si ((__v2df) __A);
845 : }
846 :
847 : #ifdef __x86_64__
848 : /* Intel intrinsic. */
849 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
850 : _mm_cvtsd_si64 (__m128d __A)
851 : {
852 : return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
853 : }
854 :
855 : /* Microsoft intrinsic. */
856 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
857 : _mm_cvtsd_si64x (__m128d __A)
858 : {
859 : return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
860 : }
861 : #endif
862 :
863 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
864 : _mm_cvttsd_si32 (__m128d __A)
865 : {
866 : return __builtin_ia32_cvttsd2si ((__v2df) __A);
867 : }
868 :
869 : #ifdef __x86_64__
870 : /* Intel intrinsic. */
871 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
872 : _mm_cvttsd_si64 (__m128d __A)
873 : {
874 : return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
875 : }
876 :
877 : /* Microsoft intrinsic. */
878 : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
879 : _mm_cvttsd_si64x (__m128d __A)
880 : {
881 : return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
882 : }
883 : #endif
884 :
885 : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
886 : _mm_cvtsd_ss (__m128 __A, __m128d __B)
887 : {
888 : return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
889 : }
890 :
891 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
892 : _mm_cvtsi32_sd (__m128d __A, int __B)
893 : {
894 : return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
895 : }
896 :
897 : #ifdef __x86_64__
898 : /* Intel intrinsic. */
899 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
900 : _mm_cvtsi64_sd (__m128d __A, long long __B)
901 : {
902 : return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
903 : }
904 :
905 : /* Microsoft intrinsic. */
906 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
907 : _mm_cvtsi64x_sd (__m128d __A, long long __B)
908 : {
909 : return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
910 : }
911 : #endif
912 :
913 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
914 : _mm_cvtss_sd (__m128d __A, __m128 __B)
915 : {
916 : return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
917 : }
918 :
919 : #ifdef __OPTIMIZE__
920 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
921 : _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
922 : {
923 : return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
924 : }
925 : #else
926 : #define _mm_shuffle_pd(A, B, N) \
927 : ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \
928 : (__v2df)(__m128d)(B), (int)(N)))
929 : #endif
930 :
931 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
932 : _mm_unpackhi_pd (__m128d __A, __m128d __B)
933 : {
934 : return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
935 : }
936 :
937 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
938 : _mm_unpacklo_pd (__m128d __A, __m128d __B)
939 : {
940 : return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
941 : }
942 :
943 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
944 : _mm_loadh_pd (__m128d __A, double const *__B)
945 : {
946 1253975340 : return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
947 : }
948 :
949 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
950 : _mm_loadl_pd (__m128d __A, double const *__B)
951 : {
952 : return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
953 : }
954 :
955 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956 : _mm_movemask_pd (__m128d __A)
957 : {
958 : return __builtin_ia32_movmskpd ((__v2df)__A);
959 : }
960 :
961 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
962 : _mm_packs_epi16 (__m128i __A, __m128i __B)
963 : {
964 76072050 : return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
965 : }
966 :
967 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
968 : _mm_packs_epi32 (__m128i __A, __m128i __B)
969 : {
970 1722589471 : return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
971 : }
972 :
973 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
974 : _mm_packus_epi16 (__m128i __A, __m128i __B)
975 : {
976 1949291367 : return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
977 : }
978 :
979 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
980 : _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
981 : {
982 662399614 : return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
983 : }
984 :
985 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
986 : _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
987 : {
988 2160674800 : return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
989 : }
990 :
991 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
992 : _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
993 : {
994 863854524 : return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
995 : }
996 :
997 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
998 : _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
999 : {
1000 976503458 : return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
1001 : }
1002 :
1003 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1004 : _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
1005 : {
1006 3058256048 : return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
1007 : }
1008 :
1009 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1010 : _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
1011 : {
1012 17049888926 : return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
1013 : }
1014 :
1015 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1016 : _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
1017 : {
1018 1469104124 : return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
1019 : }
1020 :
1021 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1022 : _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
1023 : {
1024 1289672228 : return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
1025 : }
1026 :
1027 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1028 : _mm_add_epi8 (__m128i __A, __m128i __B)
1029 : {
1030 1411081696 : return (__m128i) ((__v16qu)__A + (__v16qu)__B);
1031 : }
1032 :
1033 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1034 : _mm_add_epi16 (__m128i __A, __m128i __B)
1035 : {
1036 4215181985 : return (__m128i) ((__v8hu)__A + (__v8hu)__B);
1037 : }
1038 :
1039 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1040 : _mm_add_epi32 (__m128i __A, __m128i __B)
1041 : {
1042 8856215428 : return (__m128i) ((__v4su)__A + (__v4su)__B);
1043 : }
1044 :
1045 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1046 : _mm_add_epi64 (__m128i __A, __m128i __B)
1047 : {
1048 791344375 : return (__m128i) ((__v2du)__A + (__v2du)__B);
1049 : }
1050 :
1051 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1052 : _mm_adds_epi8 (__m128i __A, __m128i __B)
1053 : {
1054 9539930 : return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1055 : }
1056 :
1057 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1058 : _mm_adds_epi16 (__m128i __A, __m128i __B)
1059 : {
1060 1148165360 : return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1061 : }
1062 :
1063 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1064 : _mm_adds_epu8 (__m128i __A, __m128i __B)
1065 : {
1066 7343670 : return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1067 : }
1068 :
1069 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1070 : _mm_adds_epu16 (__m128i __A, __m128i __B)
1071 : {
1072 53530400 : return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1073 : }
1074 :
1075 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1076 : _mm_sub_epi8 (__m128i __A, __m128i __B)
1077 : {
1078 600391000 : return (__m128i) ((__v16qu)__A - (__v16qu)__B);
1079 : }
1080 :
1081 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1082 : _mm_sub_epi16 (__m128i __A, __m128i __B)
1083 : {
1084 563109866 : return (__m128i) ((__v8hu)__A - (__v8hu)__B);
1085 : }
1086 :
1087 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1088 : _mm_sub_epi32 (__m128i __A, __m128i __B)
1089 : {
1090 1198966714 : return (__m128i) ((__v4su)__A - (__v4su)__B);
1091 : }
1092 :
1093 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1094 : _mm_sub_epi64 (__m128i __A, __m128i __B)
1095 : {
1096 : return (__m128i) ((__v2du)__A - (__v2du)__B);
1097 : }
1098 :
1099 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1100 : _mm_subs_epi8 (__m128i __A, __m128i __B)
1101 : {
1102 9555910 : return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1103 : }
1104 :
1105 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1106 : _mm_subs_epi16 (__m128i __A, __m128i __B)
1107 : {
1108 662313720 : return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1109 : }
1110 :
1111 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1112 : _mm_subs_epu8 (__m128i __A, __m128i __B)
1113 : {
1114 62622900 : return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1115 : }
1116 :
1117 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1118 : _mm_subs_epu16 (__m128i __A, __m128i __B)
1119 : {
1120 41329200 : return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1121 : }
1122 :
1123 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1124 : _mm_madd_epi16 (__m128i __A, __m128i __B)
1125 : {
1126 3203897919 : return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1127 : }
1128 :
1129 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1130 : _mm_mulhi_epi16 (__m128i __A, __m128i __B)
1131 : {
1132 : return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1133 : }
1134 :
1135 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1136 : _mm_mullo_epi16 (__m128i __A, __m128i __B)
1137 : {
1138 768335500 : return (__m128i) ((__v8hu)__A * (__v8hu)__B);
1139 : }
1140 :
1141 : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1142 : _mm_mul_su32 (__m64 __A, __m64 __B)
1143 : {
1144 : return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1145 : }
1146 :
1147 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1148 : _mm_mul_epu32 (__m128i __A, __m128i __B)
1149 : {
1150 0 : return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1151 : }
1152 :
1153 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1154 : _mm_slli_epi16 (__m128i __A, int __B)
1155 : {
1156 82313800 : return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1157 : }
1158 :
1159 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1160 : _mm_slli_epi32 (__m128i __A, int __B)
1161 : {
1162 640091300 : return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1163 : }
1164 :
1165 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1166 : _mm_slli_epi64 (__m128i __A, int __B)
1167 : {
1168 0 : return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1169 : }
1170 :
1171 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1172 : _mm_srai_epi16 (__m128i __A, int __B)
1173 : {
1174 1594263696 : return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1175 : }
1176 :
1177 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1178 : _mm_srai_epi32 (__m128i __A, int __B)
1179 : {
1180 2886746280 : return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1181 : }
1182 :
1183 : #ifdef __OPTIMIZE__
1184 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1185 : _mm_bsrli_si128 (__m128i __A, const int __N)
1186 : {
1187 : return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1188 : }
1189 :
1190 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191 : _mm_bslli_si128 (__m128i __A, const int __N)
1192 : {
1193 : return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1194 : }
1195 :
1196 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1197 : _mm_srli_si128 (__m128i __A, const int __N)
1198 : {
1199 : return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
1200 : }
1201 :
1202 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1203 : _mm_slli_si128 (__m128i __A, const int __N)
1204 : {
1205 : return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
1206 : }
1207 : #else
1208 : #define _mm_bsrli_si128(A, N) \
1209 : ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1210 : #define _mm_bslli_si128(A, N) \
1211 : ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1212 : #define _mm_srli_si128(A, N) \
1213 : ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
1214 : #define _mm_slli_si128(A, N) \
1215 : ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
1216 : #endif
1217 :
1218 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1219 : _mm_srli_epi16 (__m128i __A, int __B)
1220 : {
1221 456380726 : return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1222 : }
1223 :
1224 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1225 : _mm_srli_epi32 (__m128i __A, int __B)
1226 : {
1227 470331 : return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1228 : }
1229 :
1230 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1231 : _mm_srli_epi64 (__m128i __A, int __B)
1232 : {
1233 84744500 : return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1234 : }
1235 :
1236 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1237 : _mm_sll_epi16 (__m128i __A, __m128i __B)
1238 : {
1239 : return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1240 : }
1241 :
1242 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1243 : _mm_sll_epi32 (__m128i __A, __m128i __B)
1244 : {
1245 : return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1246 : }
1247 :
1248 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1249 : _mm_sll_epi64 (__m128i __A, __m128i __B)
1250 : {
1251 : return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1252 : }
1253 :
1254 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1255 : _mm_sra_epi16 (__m128i __A, __m128i __B)
1256 : {
1257 567366 : return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1258 : }
1259 :
1260 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1261 : _mm_sra_epi32 (__m128i __A, __m128i __B)
1262 : {
1263 0 : return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1264 : }
1265 :
1266 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1267 : _mm_srl_epi16 (__m128i __A, __m128i __B)
1268 : {
1269 2206660 : return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1270 : }
1271 :
1272 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1273 : _mm_srl_epi32 (__m128i __A, __m128i __B)
1274 : {
1275 0 : return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1276 : }
1277 :
1278 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1279 : _mm_srl_epi64 (__m128i __A, __m128i __B)
1280 : {
1281 : return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1282 : }
1283 :
1284 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1285 : _mm_and_si128 (__m128i __A, __m128i __B)
1286 : {
1287 635892880 : return (__m128i) ((__v2du)__A & (__v2du)__B);
1288 : }
1289 :
1290 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1291 : _mm_andnot_si128 (__m128i __A, __m128i __B)
1292 : {
1293 68214980 : return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1294 : }
1295 :
1296 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1297 : _mm_or_si128 (__m128i __A, __m128i __B)
1298 : {
1299 48103464 : return (__m128i) ((__v2du)__A | (__v2du)__B);
1300 : }
1301 :
1302 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1303 : _mm_xor_si128 (__m128i __A, __m128i __B)
1304 : {
1305 14826700 : return (__m128i) ((__v2du)__A ^ (__v2du)__B);
1306 : }
1307 :
1308 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1309 : _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1310 : {
1311 12070500 : return (__m128i) ((__v16qi)__A == (__v16qi)__B);
1312 : }
1313 :
1314 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1315 : _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1316 : {
1317 0 : return (__m128i) ((__v8hi)__A == (__v8hi)__B);
1318 : }
1319 :
1320 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1321 : _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1322 : {
1323 : return (__m128i) ((__v4si)__A == (__v4si)__B);
1324 : }
1325 :
1326 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1327 : _mm_cmplt_epi8 (__m128i __A, __m128i __B)
1328 : {
1329 : return (__m128i) ((__v16qs)__A < (__v16qs)__B);
1330 : }
1331 :
1332 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1333 : _mm_cmplt_epi16 (__m128i __A, __m128i __B)
1334 : {
1335 : return (__m128i) ((__v8hi)__A < (__v8hi)__B);
1336 : }
1337 :
1338 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1339 : _mm_cmplt_epi32 (__m128i __A, __m128i __B)
1340 : {
1341 544705000 : return (__m128i) ((__v4si)__A < (__v4si)__B);
1342 : }
1343 :
1344 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1345 : _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1346 : {
1347 56767900 : return (__m128i) ((__v16qs)__A > (__v16qs)__B);
1348 : }
1349 :
1350 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1351 : _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1352 : {
1353 47377394 : return (__m128i) ((__v8hi)__A > (__v8hi)__B);
1354 : }
1355 :
1356 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1357 : _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1358 : {
1359 10825300 : return (__m128i) ((__v4si)__A > (__v4si)__B);
1360 : }
1361 :
1362 : #ifdef __OPTIMIZE__
1363 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1364 : _mm_extract_epi16 (__m128i const __A, int const __N)
1365 : {
1366 : return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1367 : }
1368 :
1369 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1370 : _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1371 : {
1372 : return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1373 : }
1374 : #else
1375 : #define _mm_extract_epi16(A, N) \
1376 : ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
1377 : #define _mm_insert_epi16(A, D, N) \
1378 : ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
1379 : (int)(D), (int)(N)))
1380 : #endif
1381 :
1382 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1383 : _mm_max_epi16 (__m128i __A, __m128i __B)
1384 : {
1385 105802200 : return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1386 : }
1387 :
1388 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1389 : _mm_max_epu8 (__m128i __A, __m128i __B)
1390 : {
1391 17655200 : return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1392 : }
1393 :
1394 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1395 : _mm_min_epi16 (__m128i __A, __m128i __B)
1396 : {
1397 0 : return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1398 : }
1399 :
1400 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1401 : _mm_min_epu8 (__m128i __A, __m128i __B)
1402 : {
1403 1664380000 : return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1404 : }
1405 :
1406 : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1407 : _mm_movemask_epi8 (__m128i __A)
1408 : {
1409 : return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1410 : }
1411 :
1412 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1413 : _mm_mulhi_epu16 (__m128i __A, __m128i __B)
1414 : {
1415 2206660 : return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1416 : }
1417 :
1418 : #ifdef __OPTIMIZE__
1419 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1420 : _mm_shufflehi_epi16 (__m128i __A, const int __mask)
1421 : {
1422 : return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
1423 : }
1424 :
1425 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1426 : _mm_shufflelo_epi16 (__m128i __A, const int __mask)
1427 : {
1428 : return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
1429 : }
1430 :
1431 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1432 : _mm_shuffle_epi32 (__m128i __A, const int __mask)
1433 : {
1434 : return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
1435 : }
1436 : #else
1437 : #define _mm_shufflehi_epi16(A, N) \
1438 : ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
1439 : #define _mm_shufflelo_epi16(A, N) \
1440 : ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
1441 : #define _mm_shuffle_epi32(A, N) \
1442 : ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
1443 : #endif
1444 :
1445 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1446 : _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1447 : {
1448 : __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1449 : }
1450 :
1451 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1452 : _mm_avg_epu8 (__m128i __A, __m128i __B)
1453 : {
1454 580894300 : return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1455 : }
1456 :
1457 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1458 : _mm_avg_epu16 (__m128i __A, __m128i __B)
1459 : {
1460 293931487 : return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1461 : }
1462 :
1463 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1464 : _mm_sad_epu8 (__m128i __A, __m128i __B)
1465 : {
1466 14015155 : return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1467 : }
1468 :
1469 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1470 : _mm_stream_si32 (int *__A, int __B)
1471 : {
1472 : __builtin_ia32_movnti (__A, __B);
1473 : }
1474 :
1475 : #ifdef __x86_64__
1476 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1477 : _mm_stream_si64 (long long int *__A, long long int __B)
1478 : {
1479 : __builtin_ia32_movnti64 (__A, __B);
1480 : }
1481 : #endif
1482 :
1483 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1484 : _mm_stream_si128 (__m128i *__A, __m128i __B)
1485 : {
1486 : __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1487 : }
1488 :
1489 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1490 : _mm_stream_pd (double *__A, __m128d __B)
1491 : {
1492 : __builtin_ia32_movntpd (__A, (__v2df)__B);
1493 : }
1494 :
1495 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1496 : _mm_clflush (void const *__A)
1497 : {
1498 : __builtin_ia32_clflush (__A);
1499 : }
1500 :
1501 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1502 : _mm_lfence (void)
1503 : {
1504 : __builtin_ia32_lfence ();
1505 : }
1506 :
1507 : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1508 : _mm_mfence (void)
1509 : {
1510 : __builtin_ia32_mfence ();
1511 : }
1512 :
1513 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1514 : _mm_cvtsi32_si128 (int __A)
1515 : {
1516 2962210812 : return _mm_set_epi32 (0, 0, 0, __A);
1517 : }
1518 :
1519 : #ifdef __x86_64__
1520 : /* Intel intrinsic. */
1521 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1522 : _mm_cvtsi64_si128 (long long __A)
1523 : {
1524 0 : return _mm_set_epi64x (0, __A);
1525 : }
1526 :
1527 : /* Microsoft intrinsic. */
1528 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1529 : _mm_cvtsi64x_si128 (long long __A)
1530 : {
1531 : return _mm_set_epi64x (0, __A);
1532 : }
1533 : #endif
1534 :
1535 : /* Casts between various SP, DP, INT vector types. Note that these do no
1536 : conversion of values, they just change the type. */
1537 : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1538 : _mm_castpd_ps(__m128d __A)
1539 : {
1540 : return (__m128) __A;
1541 : }
1542 :
1543 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1544 : _mm_castpd_si128(__m128d __A)
1545 : {
1546 1254218140 : return (__m128i) __A;
1547 : }
1548 :
1549 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1550 : _mm_castps_pd(__m128 __A)
1551 : {
1552 : return (__m128d) __A;
1553 : }
1554 :
1555 : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1556 : _mm_castps_si128(__m128 __A)
1557 : {
1558 336922000 : return (__m128i) __A;
1559 : }
1560 :
1561 : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1562 : _mm_castsi128_ps(__m128i __A)
1563 : {
1564 336937000 : return (__m128) __A;
1565 : }
1566 :
1567 : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1568 : _mm_castsi128_pd(__m128i __A)
1569 : {
1570 1466619820 : return (__m128d) __A;
1571 : }
1572 :
1573 : #ifdef __DISABLE_SSE2__
1574 : #undef __DISABLE_SSE2__
1575 : #pragma GCC pop_options
1576 : #endif /* __DISABLE_SSE2__ */
1577 :
1578 : #endif /* _EMMINTRIN_H_INCLUDED */
|