simd.c - OpenGrok cross reference for /xen/tools/tests/x86

Lines Matching defs:x
8 #  define eq(x, y) ({ \  argument
16 #  define eq(x, y) ({ \  argument
31 #  define eq(x, y) ((BR(cmpps, _mask, x, y, 0, -1) & ALL_TRUE) == ALL_TRUE)  argument
33 #  define eq(x, y) (BR(cmppd, _mask, x, y, 0, -1) == ALL_TRUE)  argument
35 #  define eq(x, y) (B(pcmpeqb, _mask, (vqi_t)(x), (vqi_t)(y), -1) == ALL_TRUE)  argument
37 #  define eq(x, y) (B(pcmpeqw, _mask, (vhi_t)(x), (vhi_t)(y), -1) == ALL_TRUE)  argument
39 #  define eq(x, y) (B(pcmpeqd, _mask, (vsi_t)(x), (vsi_t)(y), -1) == ALL_TRUE)  argument
41 #  define eq(x, y) (B(pcmpeqq, _mask, (vdi_t)(x), (vdi_t)(y), -1) == ALL_TRUE)  argument
88 # define eq(x, y) to_bool((x) == (y))  argument
92 # define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); })  argument
94 #  define to_wint(x) ({ long l_ = (x)[0]; touch(l_); ((vec_t){ l_ }); })  argument
104 #   define to_u_int(type, x) ({ \  argument
112 #   define to_u_int(type, x) ({ \  argument
120 #  define to_uint(x) to_u_int(int, x)  argument
122 #   define to_uwint(x) to_u_int(long, x)  argument
126 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))  argument
130 #  define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0)  argument
131 #  define to_uint(x) BR(cvtudq2ps, _mask, BR(cvtps2udq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0)  argument
133 #   define to_w_int(x, s) ({ \  argument
146 #   define to_wint(x) to_w_int(x, )  argument
147 #   define to_uwint(x) to_w_int(x, u)  argument
150 #  define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0)  argument
151 #  define to_uint(x) B(cvtudq2pd, _mask, BR(cvtpd2udq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0)  argument
153 #   define to_wint(x) BR(cvtqq2pd, _mask, BR(cvtpd2qq, _mask, x, (vdi_t)undef(), ~0), undef(), ~0)  argument
154 #   define to_uwint(x) BR(cvtuqq2pd, _mask, BR(cvtpd2uqq, _mask, x, (vdi_t)undef(), ~0), undef(), ~…  argument
159 #  define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x))  argument
161 #  define to_int(x) __builtin_ia32_cvtdq2pd(__builtin_ia32_cvtpd2dq(x))  argument
165 #  define to_int(x) __builtin_ia32_cvtdq2ps256(__builtin_ia32_cvtps2dq256(x))  argument
167 #  define to_int(x) __builtin_ia32_cvtdq2pd256(__builtin_ia32_cvtpd2dq256(x))  argument
172 # define scalar_1op(x, op) ({ \  argument
177 # define scalar_2op(x, y, op) ({ \  argument
185 # define low_half(x) (x)  argument
186 # define high_half(x) B_(movhlps, , undef(), x)  argument
191 static inline vec_t movlhps(vec_t x, vec_t y) {  in movlhps()
194 # define insert_pair(x, y, p) \  argument
202 # define recip(x) ({ \  argument
209 # define rsqrt(x) ({ \  argument
218 #  define getexp(x) scalar_1op(x, "vgetexpss %[in], %[out], %[out]")  argument
219 #  define getmant(x) scalar_1op(x, "vgetmantss $0, %[in], %[out], %[out]")  argument
221 #   define recip(x) scalar_1op(x, "vrcp28ss %[in], %[out], %[out]")  argument
222 #   define rsqrt(x) scalar_1op(x, "vrsqrt28ss %[in], %[out], %[out]")  argument
224 #   define recip(x) scalar_1op(x, "vrcp14ss %[in], %[out], %[out]")  argument
225 #   define rsqrt(x) scalar_1op(x, "vrsqrt14ss %[in], %[out], %[out]")  argument
227 #  define scale(x, y) scalar_2op(x, y, "vscalefss %[in2], %[in1], %[out]")  argument
228 #  define sqrt(x) scalar_1op(x, "vsqrtss %[in], %[out], %[out]")  argument
229 #  define trunc(x) scalar_1op(x, "vrndscaless $0b1011, %[in], %[out], %[out]")  argument
231 #  define getexp(x) scalar_1op(x, "vgetexpsd %[in], %[out], %[out]")  argument
232 #  define getmant(x) scalar_1op(x, "vgetmantsd $0, %[in], %[out], %[out]")  argument
234 #   define recip(x) scalar_1op(x, "vrcp28sd %[in], %[out], %[out]")  argument
235 #   define rsqrt(x) scalar_1op(x, "vrsqrt28sd %[in], %[out], %[out]")  argument
237 #   define recip(x) scalar_1op(x, "vrcp14sd %[in], %[out], %[out]")  argument
238 #   define rsqrt(x) scalar_1op(x, "vrsqrt14sd %[in], %[out], %[out]")  argument
240 #  define scale(x, y) scalar_2op(x, y, "vscalefsd %[in2], %[in1], %[out]")  argument
241 #  define sqrt(x) scalar_1op(x, "vsqrtsd %[in], %[out], %[out]")  argument
242 #  define trunc(x) scalar_1op(x, "vrndscalesd $0b1011, %[in], %[out], %[out]")  argument
249 #  define _half(x, lh) ({ \  argument
257 #  define low_half(x)  _half(x, 0)  argument
258 #  define high_half(x) _half(x, 1)  argument
262 #  define low_quarter(x) ({ \  argument
271 #  define broadcast(x) ({ \  argument
278 #   define broadcast_pair(x) ({ \  argument
285 #   define broadcast_octet(x) B(broadcastf32x8_, _mask, x, undef(), ~0)  argument
286 #   define insert_octet(x, y, p) B(insertf32x8_, _mask, x, y, p, undef(), ~0)  argument
289 #   define frac(x) B(reduceps, _mask, x, 0b00001011, undef(), ~0)  argument
291 #  define getexp(x) BR(getexpps, _mask, x, undef(), ~0)  argument
292 #  define getmant(x) BR(getmantps, _mask, x, 0, undef(), ~0)  argument
294 #   define max(x, y) BR(rangeps, _mask, x, y, 0b0101, undef(), ~0)  argument
295 #   define min(x, y) BR(rangeps, _mask, x, y, 0b0100, undef(), ~0)  argument
297 #   define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0)  argument
298 #   define min(x, y) BR_(minps, _mask, x, y, undef(), ~0)  argument
300 #  define mix(x, y) B(blendmps_, _mask, x, y, (0b1010101010101010 & ALL_TRUE))  argument
301 #  define scale(x, y) BR(scalefps, _mask, x, y, undef(), ~0)  argument
303 #   define recip(x) BR(rcp28ps, _mask, x, undef(), ~0)  argument
304 #   define rsqrt(x) BR(rsqrt28ps, _mask, x, undef(), ~0)  argument
306 #   define recip(x) B(rcp14ps, _mask, x, undef(), ~0)  argument
307 #   define rsqrt(x) B(rsqrt14ps, _mask, x, undef(), ~0)  argument
309 #  define shrink1(x) BR_(cvtpd2ps, _mask, (vdf_t)(x), (vsf_half_t){}, ~0)  argument
310 #  define sqrt(x) BR(sqrtps, _mask, x, undef(), ~0)  argument
311 #  define trunc(x) BR(rndscaleps_, _mask, x, 0b1011, undef(), ~0)  argument
312 #  define widen1(x) ((vec_t)BR(cvtps2pd, _mask, x, (vdf_t)undef(), ~0))  argument
314 #   define interleave_hi(x, y) B(unpckhps, _mask, x, y, undef(), ~0)  argument
315 #   define interleave_lo(x, y) B(unpcklps, _mask, x, y, undef(), ~0)  argument
316 #   define swap(x) B(shufps, _mask, x, x, 0b00011011, undef(), ~0)  argument
317 #   define swap2(x) B_(vpermilps, _mask, x, 0b00011011, undef(), ~0)  argument
319 #   define broadcast_quartet(x) B(broadcastf32x4_, _mask, x, undef(), ~0)  argument
320 #   define insert_pair(x, y, p) \  argument
325 #   define insert_quartet(x, y, p) B(insertf32x4_, _mask, x, y, p, undef(), ~0)  argument
326 #   define interleave_hi(x, y) B(vpermi2varps, _mask, x, interleave_hi, y, ~0)  argument
327 #   define interleave_lo(x, y) B(vpermt2varps, _mask, interleave_lo, x, y, ~0)  argument
328 #   define swap(x) ({ \  argument
332 #   define swap2(x) B(vpermilps, _mask, \  argument
339 #   define broadcast(x) ({ \  argument
346 #   define broadcast(x) ({ \  argument
354 #   define broadcast_pair(x) B(broadcastf64x2_, _mask, x, undef(), ~0)  argument
355 #   define insert_pair(x, y, p) B(insertf64x2_, _mask, x, y, p, undef(), ~0)  argument
358 #   define broadcast_quartet(x) B(broadcastf64x4_, , x, undef(), ~0)  argument
359 #   define insert_quartet(x, y, p) B(insertf64x4_, _mask, x, y, p, undef(), ~0)  argument
362 #   define frac(x) B(reducepd, _mask, x, 0b00001011, undef(), ~0)  argument
364 #  define getexp(x) BR(getexppd, _mask, x, undef(), ~0)  argument
365 #  define getmant(x) BR(getmantpd, _mask, x, 0, undef(), ~0)  argument
367 #   define max(x, y) BR(rangepd, _mask, x, y, 0b0101, undef(), ~0)  argument
368 #   define min(x, y) BR(rangepd, _mask, x, y, 0b0100, undef(), ~0)  argument
370 #   define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0)  argument
371 #   define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0)  argument
373 #  define mix(x, y) B(blendmpd_, _mask, x, y, 0b10101010)  argument
374 #  define scale(x, y) BR(scalefpd, _mask, x, y, undef(), ~0)  argument
376 #   define recip(x) BR(rcp28pd, _mask, x, undef(), ~0)  argument
377 #   define rsqrt(x) BR(rsqrt28pd, _mask, x, undef(), ~0)  argument
379 #   define recip(x) B(rcp14pd, _mask, x, undef(), ~0)  argument
380 #   define rsqrt(x) B(rsqrt14pd, _mask, x, undef(), ~0)  argument
382 #  define sqrt(x) BR(sqrtpd, _mask, x, undef(), ~0)  argument
383 #  define trunc(x) BR(rndscalepd_, _mask, x, 0b1011, undef(), ~0)  argument
385 #   define interleave_hi(x, y) B(unpckhpd, _mask, x, y, undef(), ~0)  argument
386 #   define interleave_lo(x, y) B(unpcklpd, _mask, x, y, undef(), ~0)  argument
387 #   define swap(x) B(shufpd, _mask, x, x, 0b01, undef(), ~0)  argument
388 #   define swap2(x) B_(vpermilpd, _mask, x, 0b01, undef(), ~0)  argument
390 #   define interleave_hi(x, y) B(vpermi2varpd, _mask, x, interleave_hi, y, ~0)  argument
391 #   define interleave_lo(x, y) B(vpermt2varpd, _mask, interleave_lo, x, y, ~0)  argument
392 #   define swap(x) ({ \  argument
396 #   define swap2(x) B(vpermilpd, _mask, \  argument
405 #   define broadcast(x) \  argument
408 #   define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })  argument
410 #  define max(x, y) __builtin_ia32_maxps256(x, y)  argument
411 #  define min(x, y) __builtin_ia32_minps256(x, y)  argument
412 #  define recip(x) __builtin_ia32_rcpps256(x)  argument
413 #  define rsqrt(x) __builtin_ia32_rsqrtps256(x)  argument
414 #  define sqrt(x) __builtin_ia32_sqrtps256(x)  argument
415 #  define swap(x) ({ \  argument
420 #   define swap2(x) __builtin_ia32_permvarsf256(x, __builtin_ia32_cvtps2dq256(inv) - 1)  argument
422 #   define swap2(x) ({ \  argument
429 #   define broadcast(x) __builtin_ia32_vbroadcastss_ps((vec_t){ x })  argument
431 #   define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); })  argument
433 #  define interleave_hi(x, y) __builtin_ia32_unpckhps(x, y)  argument
434 #  define interleave_lo(x, y) __builtin_ia32_unpcklps(x, y)  argument
435 #  define max(x, y) __builtin_ia32_maxps(x, y)  argument
436 #  define min(x, y) __builtin_ia32_minps(x, y)  argument
437 #  define recip(x) __builtin_ia32_rcpps(x)  argument
438 #  define rsqrt(x) __builtin_ia32_rsqrtps(x)  argument
439 #  define sqrt(x) __builtin_ia32_sqrtps(x)  argument
440 #  define swap(x) __builtin_ia32_shufps(x, x, 0b00011011)  argument
442 #   define swap2(x) __builtin_ia32_vpermilvarps(x, __builtin_ia32_cvtps2dq(inv) - 1)  argument
445 #  define recip(x) scalar_1op(x, "rcpss %[in], %[out]")  argument
446 #  define rsqrt(x) scalar_1op(x, "rsqrtss %[in], %[out]")  argument
447 #  define sqrt(x) scalar_1op(x, "sqrtss %[in], %[out]")  argument
452 #   define broadcast(x) \  argument
455 #   define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); })  argument
457 #  define max(x, y) __builtin_ia32_maxpd256(x, y)  argument
458 #  define min(x, y) __builtin_ia32_minpd256(x, y)  argument
459 #  define recip(x) ({ \  argument
466 #  define rsqrt(x) ({ \  argument
473 #  define sqrt(x) __builtin_ia32_sqrtpd256(x)  argument
474 #  define swap(x) ({ \  argument
479 #   define swap2(x) __builtin_ia32_permdf256(x, 0b00011011)  argument
482 #  define interleave_hi(x, y) __builtin_ia32_unpckhpd(x, y)  argument
483 #  define interleave_lo(x, y) __builtin_ia32_unpcklpd(x, y)  argument
484 #  define max(x, y) __builtin_ia32_maxpd(x, y)  argument
485 #  define min(x, y) __builtin_ia32_minpd(x, y)  argument
486 #  define recip(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rcpps(__builtin_ia32_cvtpd2ps(x)))  argument
487 #  define rsqrt(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rsqrtps(__builtin_ia32_cvtpd2ps(x)))  argument
488 #  define sqrt(x) __builtin_ia32_sqrtpd(x)  argument
489 #  define swap(x) __builtin_ia32_shufpd(x, x, 0b01)  argument
491 #   define swap2(x) __builtin_ia32_vpermilvarpd(x, __builtin_ia32_pmovsxdq128( \  argument
495 #  define recip(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rcpss %[out], %[out]; cvtss2sd %[out], %[…  argument
496 #  define rsqrt(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rsqrtss %[out], %[out]; cvtss2sd %[out], …  argument
497 #  define sqrt(x) scalar_1op(x, "sqrtsd %[in], %[out]")  argument
505 #  define low_half(x) ({ \  argument
515 #  define low_quarter(x) ({ \  argument
524 #  define broadcast(x) ({ \  argument
530 #  define broadcast2(x) ({ \  argument
536 #   define broadcast_pair(x) ({ \  argument
543 #   define broadcast_octet(x) ((vec_t)B(broadcasti32x8_, _mask, (vsi_octet_t)(x), (vsi_t)undef(), ~…  argument
544 #   define insert_octet(x, y, p) ((vec_t)B(inserti32x8_, _mask, (vsi_t)(x), (vsi_octet_t)(y), p, (v…  argument
547 #   define interleave_hi(x, y) ((vec_t)B(punpckhdq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), …  argument
548 #   define interleave_lo(x, y) ((vec_t)B(punpckldq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), …  argument
549 #   define swap(x) ((vec_t)B(pshufd, _mask, (vsi_t)(x), 0b00011011, (vsi_t)undef(), ~0))  argument
551 #   define broadcast_quartet(x) ((vec_t)B(broadcasti32x4_, _mask, (vsi_quartet_t)(x), (vsi_t)undef(…  argument
552 #   define insert_pair(x, y, p) \  argument
558 #   define insert_quartet(x, y, p) ((vec_t)B(inserti32x4_, _mask, (vsi_t)(x), (vsi_quartet_t)(y), p…  argument
559 #   define interleave_hi(x, y) ((vec_t)B(vpermi2vard, _mask, (vsi_t)(x), interleave_hi, (vsi_t)(y),…  argument
560 #   define interleave_lo(x, y) ((vec_t)B(vpermt2vard, _mask, interleave_lo, (vsi_t)(x), (vsi_t)(y),…  argument
561 #   define swap(x) ((vec_t)B(pshufd, _mask, \  argument
565 #   define swap2(x) ((vec_t)B_(permvarsi, _mask, (vsi_t)(x), (vsi_t)(inv - 1), (vsi_t)undef(), ~0))  argument
567 #  define mix(x, y) ((vec_t)B(blendmd_, _mask, (vsi_t)(x), (vsi_t)(y), \  argument
569 #  define rotr(x, n) ((vec_t)B(alignd, _mask, (vsi_t)(x), (vsi_t)(x), n, (vsi_t)undef(), ~0))  argument
570 #  define shrink1(x) ((half_t)B(pmovqd, _mask, (vdi_t)(x), (vsi_half_t){}, ~0))  argument
572 #  define broadcast(x) ({ \  argument
579 #   define broadcast2(x) ({ \  argument
586 #   define broadcast_pair(x) ((vec_t)B(broadcasti64x2_, _mask, (vdi_pair_t)(x), (vdi_t)undef(), ~0))  argument
587 #   define insert_pair(x, y, p) ((vec_t)B(inserti64x2_, _mask, (vdi_t)(x), (vdi_pair_t)(y), p, (vdi…  argument
590 #   define broadcast_quartet(x) ((vec_t)B(broadcasti64x4_, , (vdi_quartet_t)(x), (vdi_t)undef(), ~0…  argument
591 #   define insert_quartet(x, y, p) ((vec_t)B(inserti64x4_, _mask, (vdi_t)(x), (vdi_quartet_t)(y), p…  argument
594 #   define interleave_hi(x, y) ((vec_t)B(punpckhqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(),…  argument
595 #   define interleave_lo(x, y) ((vec_t)B(punpcklqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(),…  argument
596 #   define swap(x) ((vec_t)B(pshufd, _mask, (vsi_t)(x), 0b01001110, (vsi_t)undef(), ~0))  argument
598 #   define interleave_hi(x, y) ((vec_t)B(vpermi2varq, _mask, (vdi_t)(x), interleave_hi, (vdi_t)(y),…  argument
599 #   define interleave_lo(x, y) ((vec_t)B(vpermt2varq, _mask, interleave_lo, (vdi_t)(x), (vdi_t)(y),…  argument
600 #   define swap(x) ((vec_t)B(pshufd, _mask, \  argument
604 #   define swap2(x) ((vec_t)B(permvardi, _mask, (vdi_t)(x), (vdi_t)(inv - 1), (vdi_t)undef(), ~0))  argument
606 #  define mix(x, y) ((vec_t)B(blendmq_, _mask, (vdi_t)(x), (vdi_t)(y), 0b10101010))  argument
607 #  define rotr(x, n) ((vec_t)B(alignq, _mask, (vdi_t)(x), (vdi_t)(x), n, (vdi_t)undef(), ~0))  argument
609 #   define swap3(x) ((vec_t)B_(permdi, _mask, (vdi_t)(x), 0b00011011, (vdi_t)undef(), ~0))  argument
611 #   define swap3(x) ({ \  argument
618 #  define abs(x) B(pabsd, _mask, x, undef(), ~0)  argument
619 #  define max(x, y) B(pmaxsd, _mask, x, y, undef(), ~0)  argument
620 #  define min(x, y) B(pminsd, _mask, x, y, undef(), ~0)  argument
621 #  define mul_full(x, y) ((vec_t)B(pmuldq, _mask, x, y, (vdi_t)undef(), ~0))  argument
622 #  define widen1(x) ((vec_t)B(pmovsxdq, _mask, x, (vdi_t)undef(), ~0))  argument
624 #  define max(x, y) ((vec_t)B(pmaxud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0))  argument
625 #  define min(x, y) ((vec_t)B(pminud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0))  argument
626 #  define mul_full(x, y) ((vec_t)B(pmuludq, _mask, (vsi_t)(x), (vsi_t)(y), (vdi_t)undef(), ~0))  argument
627 #  define widen1(x) ((vec_t)B(pmovzxdq, _mask, (vsi_half_t)(x), (vdi_t)undef(), ~0))  argument
629 #  define abs(x) ((vec_t)B(pabsq, _mask, (vdi_t)(x), (vdi_t)undef(), ~0))  argument
630 #  define max(x, y) ((vec_t)B(pmaxsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))  argument
631 #  define min(x, y) ((vec_t)B(pminsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))  argument
633 #  define max(x, y) ((vec_t)B(pmaxuq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))  argument
634 #  define min(x, y) ((vec_t)B(pminuq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0))  argument
639 #  define broadcast(x) ({ \  argument
645 #  define broadcast2(x) ({ \  argument
651 #   define interleave_hi(x, y) ((vec_t)B(punpckhbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), …  argument
652 #   define interleave_lo(x, y) ((vec_t)B(punpcklbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), …  argument
653 #   define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 8, (vdi_t)undef(), ~0…  argument
654 #   define swap(x) ((vec_t)B(pshufb, _mask, (vqi_t)(x), (vqi_t)(inv - 1), (vqi_t)undef(), ~0))  argument
656 #   define interleave_hi(x, y) ((vec_t)B(vpermi2varqi, _mask, (vqi_t)(x), interleave_hi, (vqi_t)(y)…  argument
657 #   define interleave_lo(x, y) ((vec_t)B(vpermt2varqi, _mask, interleave_lo, (vqi_t)(x), (vqi_t)(y)…  argument
659 #  define mix(x, y) ((vec_t)B(blendmb_, _mask, (vqi_t)(x), (vqi_t)(y), \  argument
661 #  define shrink1(x) ((half_t)B(pmovwb, _mask, (vhi_t)(x), (vqi_half_t){}, ~0))  argument
662 #  define shrink2(x) ((quarter_t)B(pmovdb, _mask, (vsi_t)(x), (vqi_quarter_t){}, ~0))  argument
663 #  define shrink3(x) ((eighth_t)B(pmovqb, _mask, (vdi_t)(x), (vqi_eighth_t){}, ~0))  argument
665 #   define swap2(x) ((vec_t)B(permvarqi, _mask, (vqi_t)(x), (vqi_t)(inv - 1), (vqi_t)undef(), ~0))  argument
668 #  define broadcast(x) ({ \  argument
674 #  define broadcast2(x) ({ \  argument
680 #   define interleave_hi(x, y) ((vec_t)B(punpckhwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), …  argument
681 #   define interleave_lo(x, y) ((vec_t)B(punpcklwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), …  argument
682 #   define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 16, (vdi_t)undef(), ~…  argument
683 #   define swap(x) ((vec_t)B(pshufd, _mask, \  argument
689 #   define interleave_hi(x, y) ((vec_t)B(vpermi2varhi, _mask, (vhi_t)(x), interleave_hi, (vhi_t)(y)…  argument
690 #   define interleave_lo(x, y) ((vec_t)B(vpermt2varhi, _mask, interleave_lo, (vhi_t)(x), (vhi_t)(y)…  argument
692 #  define mix(x, y) ((vec_t)B(blendmw_, _mask, (vhi_t)(x), (vhi_t)(y), \  argument
694 #  define shrink1(x) ((half_t)B(pmovdw, _mask, (vsi_t)(x), (vhi_half_t){}, ~0))  argument
695 #  define shrink2(x) ((quarter_t)B(pmovqw, _mask, (vdi_t)(x), (vhi_quarter_t){}, ~0))  argument
696 #  define swap2(x) ((vec_t)B(permvarhi, _mask, (vhi_t)(x), (vhi_t)(inv - 1), (vhi_t)undef(), ~0))  argument
699 #  define abs(x) ((vec_t)B(pabsb, _mask, (vqi_t)(x), (vqi_t)undef(), ~0))  argument
700 #  define max(x, y) ((vec_t)B(pmaxsb, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))  argument
701 #  define min(x, y) ((vec_t)B(pminsb, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))  argument
702 #  define widen1(x) ((vec_t)B(pmovsxbw, _mask, (vqi_half_t)(x), (vhi_t)undef(), ~0))  argument
703 #  define widen2(x) ((vec_t)B(pmovsxbd, _mask, (vqi_quarter_t)(x), (vsi_t)undef(), ~0))  argument
704 #  define widen3(x) ((vec_t)B(pmovsxbq, _mask, (vqi_eighth_t)(x), (vdi_t)undef(), ~0))  argument
706 #  define max(x, y) ((vec_t)B(pmaxub, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))  argument
707 #  define min(x, y) ((vec_t)B(pminub, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0))  argument
708 #  define widen1(x) ((vec_t)B(pmovzxbw, _mask, (vqi_half_t)(x), (vhi_t)undef(), ~0))  argument
709 #  define widen2(x) ((vec_t)B(pmovzxbd, _mask, (vqi_quarter_t)(x), (vsi_t)undef(), ~0))  argument
710 #  define widen3(x) ((vec_t)B(pmovzxbq, _mask, (vqi_eighth_t)(x), (vdi_t)undef(), ~0))  argument
712 #  define abs(x) B(pabsw, _mask, x, undef(), ~0)  argument
713 #  define max(x, y) B(pmaxsw, _mask, x, y, undef(), ~0)  argument
714 #  define min(x, y) B(pminsw, _mask, x, y, undef(), ~0)  argument
715 #  define mul_hi(x, y) B(pmulhw, _mask, x, y, undef(), ~0)  argument
716 #  define widen1(x) ((vec_t)B(pmovsxwd, _mask, x, (vsi_t)undef(), ~0))  argument
717 #  define widen2(x) ((vec_t)B(pmovsxwq, _mask, x, (vdi_t)undef(), ~0))  argument
719 #  define max(x, y) ((vec_t)B(pmaxuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0))  argument
720 #  define min(x, y) ((vec_t)B(pminuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0))  argument
721 #  define mul_hi(x, y) ((vec_t)B(pmulhuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0))  argument
722 #  define widen1(x) ((vec_t)B(pmovzxwd, _mask, (vhi_half_t)(x), (vsi_t)undef(), ~0))  argument
723 #  define widen2(x) ((vec_t)B(pmovzxwq, _mask, (vhi_quarter_t)(x), (vdi_t)undef(), ~0))  argument
727 #  define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhbw128((vqi_t)(x), (vqi_t)(y)))  argument
728 #  define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklbw128((vqi_t)(x), (vqi_t)(y)))  argument
730 #  define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhwd128((vhi_t)(x), (vhi_t)(y)))  argument
731 #  define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklwd128((vhi_t)(x), (vhi_t)(y)))  argument
732 #  define swap(x) ((vec_t)__builtin_ia32_pshufd( \  argument
736 #  define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhdq128((vsi_t)(x), (vsi_t)(y)))  argument
737 #  define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpckldq128((vsi_t)(x), (vsi_t)(y)))  argument
738 #  define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b00011011))  argument
740 #  define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhqdq128((vdi_t)(x), (vdi_t)(y)))  argument
741 #  define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklqdq128((vdi_t)(x), (vdi_t)(y)))  argument
742 #  define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b01001110))  argument
745 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxub128((vqi_t)(x), (vqi_t)(y)))  argument
746 #  define min(x, y) ((vec_t)__builtin_ia32_pminub128((vqi_t)(x), (vqi_t)(y)))  argument
748 #  define max(x, y) __builtin_ia32_pmaxsw128(x, y)  argument
749 #  define min(x, y) __builtin_ia32_pminsw128(x, y)  argument
750 #  define mul_hi(x, y) __builtin_ia32_pmulhw128(x, y)  argument
752 #  define mul_hi(x, y) ((vec_t)__builtin_ia32_pmulhuw128((vhi_t)(x), (vhi_t)(y)))  argument
754 #  define mul_full(x, y) ((vec_t)__builtin_ia32_pmuludq128((vsi_t)(x), (vsi_t)(y)))  argument
756 # define select(d, x, y, m) ({ \  argument
763 # define swap_lanes(x, y, func, type) ({ \  argument
771 #  define broadcast(x) ({ char s_ = (x); vec_t d_; asm ( "vpbroadcastb %1,%0" : "=x" (d_) : "m" (s_…  argument
772 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignb256((vqi_t)(x), (vqi_t)(y)))  argument
773 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)…  argument
776 #  define broadcast(x) ({ short s_ = (x); vec_t d_; asm ( "vpbroadcastw %1,%0" : "=x" (d_) : "m" (s…  argument
777 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignw256((vhi_t)(x), (vhi_t)(y)))  argument
778 #  define hadd(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phaddw256, vhi_t))  argument
779 #  define hsub(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phsubw256, vhi_t))  argument
780 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendw256((vhi_t)(x), (vhi_t)(y), 0b10101010))  argument
781 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)…  argument
784 #  define broadcast(x) ({ int s_ = (x); vec_t d_; asm ( "vpbroadcastd %1,%0" : "=x" (d_) : "m" (s_)…  argument
785 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignd256((vsi_t)(x), (vsi_t)(y)))  argument
786 #  define hadd(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phaddd256, vsi_t))  argument
787 #  define hsub(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phsubd256, vsi_t))  argument
788 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendd256((vsi_t)(x), (vsi_t)(y), 0b10101010))  argument
789 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)…  argument
791 #  define select(d, x, y, m) ({ \  argument
796 #  define swap(x) ((vec_t)__builtin_ia32_permvarsi256((vsi_t)(x), (vsi_t)inv - 1))  argument
798 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendd256((vsi_t)(x), (vsi_t)(y), 0b11001100))  argument
799 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)…  argument
801 #  define select(d, x, y, m) ({ \  argument
806 #  define swap(x) ((vec_t)__builtin_ia32_permdi256((vdi_t)(x), 0b00011011))  argument
807 #  define swap2(x) ({ \  argument
813 #  define abs(x) ((vec_t)__builtin_ia32_pabsb256((vqi_t)(x)))  argument
814 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxsb256((vqi_t)(x), (vqi_t)(y)))  argument
815 #  define min(x, y) ((vec_t)__builtin_ia32_pminsb256((vqi_t)(x), (vqi_t)(y)))  argument
816 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw256((vqi_t)(x)))  argument
817 #  define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd256((vqi_t)(x)))  argument
818 #  define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq256((vqi_t)(x)))  argument
820 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxub256((vqi_t)(x), (vqi_t)(y)))  argument
821 #  define min(x, y) ((vec_t)__builtin_ia32_pminub256((vqi_t)(x), (vqi_t)(y)))  argument
822 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw256((vqi_t)(x)))  argument
823 #  define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd256((vqi_t)(x)))  argument
824 #  define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq256((vqi_t)(x)))  argument
826 #  define abs(x) __builtin_ia32_pabsw256(x)  argument
827 #  define max(x, y) __builtin_ia32_pmaxsw256(x, y)  argument
828 #  define min(x, y) __builtin_ia32_pminsw256(x, y)  argument
829 #  define mul_hi(x, y) __builtin_ia32_pmulhw256(x, y)  argument
830 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd256(x))  argument
831 #  define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq256(x))  argument
833 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxuw256((vhi_t)(x), (vhi_t)(y)))  argument
834 #  define min(x, y) ((vec_t)__builtin_ia32_pminuw256((vhi_t)(x), (vhi_t)(y)))  argument
835 #  define mul_hi(x, y) ((vec_t)__builtin_ia32_pmulhuw256((vhi_t)(x), (vhi_t)(y)))  argument
836 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd256((vhi_t)(x)))  argument
837 #  define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq256((vhi_t)(x)))  argument
839 #  define abs(x) __builtin_ia32_pabsd256(x)  argument
840 #  define max(x, y) __builtin_ia32_pmaxsd256(x, y)  argument
841 #  define min(x, y) __builtin_ia32_pminsd256(x, y)  argument
842 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq256(x))  argument
844 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxud256((vsi_t)(x), (vsi_t)(y)))  argument
845 #  define min(x, y) ((vec_t)__builtin_ia32_pminud256((vsi_t)(x), (vsi_t)(y)))  argument
846 #  define mul_full(x, y) ((vec_t)__builtin_ia32_pmuludq256((vsi_t)(x), (vsi_t)(y)))  argument
847 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq256((vsi_t)(x)))  argument
849 #  define broadcast(x) ({ \  argument
858 #  define broadcast(x) ({ long long s_ = (x); vec_t d_; asm ( "vpbroadcastq %1,%0" : "=x" (d_) : "m…  argument
863 #  define addsub(x, y) __builtin_ia32_addsubps(x, y)  argument
864 #  define dup_hi(x) __builtin_ia32_movshdup(x)  argument
865 #  define dup_lo(x) __builtin_ia32_movsldup(x)  argument
866 #  define hadd(x, y) __builtin_ia32_haddps(x, y)  argument
867 #  define hsub(x, y) __builtin_ia32_hsubps(x, y)  argument
869 #  define addsub(x, y) __builtin_ia32_addsubpd(x, y)  argument
870 #  define dup_lo(x) ({ \  argument
875 #  define hadd(x, y) __builtin_ia32_haddpd(x, y)  argument
876 #  define hsub(x, y) __builtin_ia32_hsubpd(x, y)  argument
880 #  define addsub(x, y) __builtin_ia32_addsubps256(x, y)  argument
881 #  define dup_hi(x) __builtin_ia32_movshdup256(x)  argument
882 #  define dup_lo(x) __builtin_ia32_movsldup256(x)  argument
884 #   define hadd(x, y) __builtin_ia32_permvarsf256(__builtin_ia32_haddps256(x, y), \  argument
886 #   define hsub(x, y) __builtin_ia32_permvarsf256(__builtin_ia32_hsubps256(x, y), \  argument
889 #   define hadd(x, y) ({ \  argument
893 #   define hsub(x, y) ({ \  argument
899 #  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)  argument
900 #  define dup_lo(x) __builtin_ia32_movddup256(x)  argument
902 #   define hadd(x, y) __builtin_ia32_permdf256(__builtin_ia32_haddpd256(x, y), 0b11011000)  argument
903 #   define hsub(x, y) __builtin_ia32_permdf256(__builtin_ia32_hsubpd256(x, y), 0b11011000)  argument
905 #   define hadd(x, y) ({ \  argument
909 #   define hsub(x, y) ({ \  argument
918 #  define abs(x) ((vec_t)__builtin_ia32_pabsb128((vqi_t)(x)))  argument
920 #  define abs(x) __builtin_ia32_pabsw128(x)  argument
922 #  define abs(x) __builtin_ia32_pabsd128(x)  argument
925 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignb128((vqi_t)(x), (vqi_t)(y)))  argument
926 #  define swap(x) ((vec_t)__builtin_ia32_pshufb128((vqi_t)(x), (vqi_t)(inv - 1)))  argument
927 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 8))  argument
929 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignw128((vhi_t)(x), (vhi_t)(y)))  argument
930 #  define hadd(x, y) ((vec_t)__builtin_ia32_phaddw128((vhi_t)(x), (vhi_t)(y)))  argument
931 #  define hsub(x, y) ((vec_t)__builtin_ia32_phsubw128((vhi_t)(x), (vhi_t)(y)))  argument
932 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 16))  argument
934 #  define copysignz(x, y) ((vec_t)__builtin_ia32_psignd128((vsi_t)(x), (vsi_t)(y)))  argument
935 #  define hadd(x, y) ((vec_t)__builtin_ia32_phaddd128((vsi_t)(x), (vsi_t)(y)))  argument
936 #  define hsub(x, y) ((vec_t)__builtin_ia32_phsubd128((vsi_t)(x), (vsi_t)(y)))  argument
937 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 32))  argument
939 #  define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 64))  argument
944 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y)))  argument
945 #  define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y)))  argument
946 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw128((vqi_t)(x)))  argument
947 #  define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd128((vqi_t)(x)))  argument
948 #  define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq128((vqi_t)(x)))  argument
950 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd128(x))  argument
951 #  define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq128(x))  argument
953 #  define max(x, y) __builtin_ia32_pmaxsd128(x, y)  argument
954 #  define min(x, y) __builtin_ia32_pminsd128(x, y)  argument
955 #  define mul_full(x, y) ((vec_t)__builtin_ia32_pmuldq128(x, y))  argument
956 #  define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq128(x))  argument
958 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw128((vqi_t)(x)))  argument
959 #  define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd128((vqi_t)(x)))  argument
960 #  define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq128((vqi_t)(x)))  argument
962 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxuw128((vhi_t)(x), (vhi_t)(y)))  argument
963 #  define min(x, y) ((vec_t)__builtin_ia32_pminuw128((vhi_t)(x), (vhi_t)(y)))  argument
964 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd128((vhi_t)(x)))  argument
965 #  define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq128((vhi_t)(x)))  argument
967 #  define max(x, y) ((vec_t)__builtin_ia32_pmaxud128((vsi_t)(x), (vsi_t)(y)))  argument
968 #  define min(x, y) ((vec_t)__builtin_ia32_pminud128((vsi_t)(x), (vsi_t)(y)))  argument
969 #  define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq128((vsi_t)(x)))  argument
973 #  define select(d, x, y, m) \  argument
976 #  define dot_product(x, y) __builtin_ia32_dpps(x, y, 0b11110001)  argument
977 #  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps(y, x, m))  argument
978 #  define trunc(x) __builtin_ia32_roundps(x, 0b1011)  argument
980 #  define dot_product(x, y) __builtin_ia32_dppd(x, y, 0b00110001)  argument
981 #  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd(y, x, m))  argument
982 #  define trunc(x) __builtin_ia32_roundpd(x, 0b1011)  argument
985 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b10101010))  argument
987 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11001100))  argument
989 #  define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11110000))  argument
991 #  define mix(x, y) __builtin_ia32_blendps(x, y, 0b1010)  argument
993 #  define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10)  argument
998 #  define dot_product(x, y) ({ \  argument
1002 #  define mix(x, y) __builtin_ia32_blendps256(x, y, 0b10101010)  argument
1003 #  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps256(y, x, m))  argument
1004 #  define select2(d, x, y, m) ({ \  argument
1009 #  define trunc(x) __builtin_ia32_roundps256(x, 0b1011)  argument
1011 #  define mix(x, y) __builtin_ia32_blendpd256(x, y, 0b1010)  argument
1012 #  define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd256(y, x, m))  argument
1013 #  define select2(d, x, y, m) ({ \  argument
1018 #  define trunc(x) __builtin_ia32_roundpd256(x, 0b1011)  argument
1022 # define max(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ > y_ ? x_ : y_; })})  argument
1023 # define min(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ < y_ ? x_ : y_; })})  argument
1026 #   define trunc(x) scalar_1op(x, "roundss $0b1011, %[in], %[out]")  argument
1028 #   define trunc(x) scalar_1op(x, "roundsd $0b1011, %[in], %[out]")  argument
1038 #  define select(d, x, y, m) \  argument
1041 #   define swap2(x) ((vec_t)__builtin_ia32_vpperm((vqi_t)(x), (vqi_t)(x), (vqi_t)inv - 1))  argument
1043 #   define swap2(x) \  argument
1048 #   define frac(x) __builtin_ia32_vfrczps(x)  argument
1050 #   define swap2(x) ({ \  argument
1060 #   define frac(x) __builtin_ia32_vfrczpd(x)  argument
1062 #   define swap2(x) ({ \  argument
1076 #   define hadd(x, y) ((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphaddbw((vqi_t)(x)), \  argument
1078 #   define hsub(x, y) ((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphsubbw((vqi_t)(x)), \  argument
1081 #   define hadd(x, y) ((vec_t)__builtin_ia32_packuswb128(__builtin_ia32_vphaddubw((vqi_t)(x)), \  argument
1085 #   define hadd(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphaddwd(x), \  argument
1088 #   define hsub(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphsubwd(x), \  argument
1092 #   define hadd(x, y) ((vec_t)__builtin_ia32_packusdw128(__builtin_ia32_vphadduwd((vhi_t)(x)), \  argument
1097 #  define select(d, x, y, m) \  argument
1100 #   define frac(x) __builtin_ia32_vfrczps256(x)  argument
1102 #   define frac(x) __builtin_ia32_vfrczpd256(x)  argument
1106 #   define frac(x) scalar_1op(x, "vfrczss %[in], %[out]")  argument
1108 #   define frac(x) scalar_1op(x, "vfrczsd %[in], %[out]")  argument
1116 static inline half_t low_half(vec_t x)  in low_half()
1133 static inline quarter_t low_quarter(vec_t x)  in low_quarter()
1150 static inline eighth_t low_eighth(vec_t x)  in low_eighth()
1219     vec_t x, y, z, src, inv, alt, sh;  in simd_test()  local