Lines Matching defs:x

8 #  define eq(x, y) ({ \  argument
16 # define eq(x, y) ({ \ argument
31 # define eq(x, y) ((BR(cmpps, _mask, x, y, 0, -1) & ALL_TRUE) == ALL_TRUE) argument
33 # define eq(x, y) (BR(cmppd, _mask, x, y, 0, -1) == ALL_TRUE) argument
35 # define eq(x, y) (B(pcmpeqb, _mask, (vqi_t)(x), (vqi_t)(y), -1) == ALL_TRUE) argument
37 # define eq(x, y) (B(pcmpeqw, _mask, (vhi_t)(x), (vhi_t)(y), -1) == ALL_TRUE) argument
39 # define eq(x, y) (B(pcmpeqd, _mask, (vsi_t)(x), (vsi_t)(y), -1) == ALL_TRUE) argument
41 # define eq(x, y) (B(pcmpeqq, _mask, (vdi_t)(x), (vdi_t)(y), -1) == ALL_TRUE) argument
88 # define eq(x, y) to_bool((x) == (y)) argument
92 # define to_int(x) ({ int i_ = (x)[0]; touch(i_); ((vec_t){ i_ }); }) argument
94 # define to_wint(x) ({ long l_ = (x)[0]; touch(l_); ((vec_t){ l_ }); }) argument
104 # define to_u_int(type, x) ({ \ argument
112 # define to_u_int(type, x) ({ \ argument
120 # define to_uint(x) to_u_int(int, x) argument
122 # define to_uwint(x) to_u_int(long, x) argument
126 # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x)) argument
130 # define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0) argument
131 # define to_uint(x) BR(cvtudq2ps, _mask, BR(cvtps2udq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0) argument
133 # define to_w_int(x, s) ({ \ argument
146 # define to_wint(x) to_w_int(x, ) argument
147 # define to_uwint(x) to_w_int(x, u) argument
150 # define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0) argument
151 # define to_uint(x) B(cvtudq2pd, _mask, BR(cvtpd2udq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0) argument
153 # define to_wint(x) BR(cvtqq2pd, _mask, BR(cvtpd2qq, _mask, x, (vdi_t)undef(), ~0), undef(), ~0) argument
154 # define to_uwint(x) BR(cvtuqq2pd, _mask, BR(cvtpd2uqq, _mask, x, (vdi_t)undef(), ~0), undef(), ~… argument
159 # define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x)) argument
161 # define to_int(x) __builtin_ia32_cvtdq2pd(__builtin_ia32_cvtpd2dq(x)) argument
165 # define to_int(x) __builtin_ia32_cvtdq2ps256(__builtin_ia32_cvtps2dq256(x)) argument
167 # define to_int(x) __builtin_ia32_cvtdq2pd256(__builtin_ia32_cvtpd2dq256(x)) argument
172 # define scalar_1op(x, op) ({ \ argument
177 # define scalar_2op(x, y, op) ({ \ argument
185 # define low_half(x) (x) argument
186 # define high_half(x) B_(movhlps, , undef(), x) argument
191 static inline vec_t movlhps(vec_t x, vec_t y) { in movlhps()
194 # define insert_pair(x, y, p) \ argument
202 # define recip(x) ({ \ argument
209 # define rsqrt(x) ({ \ argument
218 # define getexp(x) scalar_1op(x, "vgetexpss %[in], %[out], %[out]") argument
219 # define getmant(x) scalar_1op(x, "vgetmantss $0, %[in], %[out], %[out]") argument
221 # define recip(x) scalar_1op(x, "vrcp28ss %[in], %[out], %[out]") argument
222 # define rsqrt(x) scalar_1op(x, "vrsqrt28ss %[in], %[out], %[out]") argument
224 # define recip(x) scalar_1op(x, "vrcp14ss %[in], %[out], %[out]") argument
225 # define rsqrt(x) scalar_1op(x, "vrsqrt14ss %[in], %[out], %[out]") argument
227 # define scale(x, y) scalar_2op(x, y, "vscalefss %[in2], %[in1], %[out]") argument
228 # define sqrt(x) scalar_1op(x, "vsqrtss %[in], %[out], %[out]") argument
229 # define trunc(x) scalar_1op(x, "vrndscaless $0b1011, %[in], %[out], %[out]") argument
231 # define getexp(x) scalar_1op(x, "vgetexpsd %[in], %[out], %[out]") argument
232 # define getmant(x) scalar_1op(x, "vgetmantsd $0, %[in], %[out], %[out]") argument
234 # define recip(x) scalar_1op(x, "vrcp28sd %[in], %[out], %[out]") argument
235 # define rsqrt(x) scalar_1op(x, "vrsqrt28sd %[in], %[out], %[out]") argument
237 # define recip(x) scalar_1op(x, "vrcp14sd %[in], %[out], %[out]") argument
238 # define rsqrt(x) scalar_1op(x, "vrsqrt14sd %[in], %[out], %[out]") argument
240 # define scale(x, y) scalar_2op(x, y, "vscalefsd %[in2], %[in1], %[out]") argument
241 # define sqrt(x) scalar_1op(x, "vsqrtsd %[in], %[out], %[out]") argument
242 # define trunc(x) scalar_1op(x, "vrndscalesd $0b1011, %[in], %[out], %[out]") argument
249 # define _half(x, lh) ({ \ argument
257 # define low_half(x) _half(x, 0) argument
258 # define high_half(x) _half(x, 1) argument
262 # define low_quarter(x) ({ \ argument
271 # define broadcast(x) ({ \ argument
278 # define broadcast_pair(x) ({ \ argument
285 # define broadcast_octet(x) B(broadcastf32x8_, _mask, x, undef(), ~0) argument
286 # define insert_octet(x, y, p) B(insertf32x8_, _mask, x, y, p, undef(), ~0) argument
289 # define frac(x) B(reduceps, _mask, x, 0b00001011, undef(), ~0) argument
291 # define getexp(x) BR(getexpps, _mask, x, undef(), ~0) argument
292 # define getmant(x) BR(getmantps, _mask, x, 0, undef(), ~0) argument
294 # define max(x, y) BR(rangeps, _mask, x, y, 0b0101, undef(), ~0) argument
295 # define min(x, y) BR(rangeps, _mask, x, y, 0b0100, undef(), ~0) argument
297 # define max(x, y) BR_(maxps, _mask, x, y, undef(), ~0) argument
298 # define min(x, y) BR_(minps, _mask, x, y, undef(), ~0) argument
300 # define mix(x, y) B(blendmps_, _mask, x, y, (0b1010101010101010 & ALL_TRUE)) argument
301 # define scale(x, y) BR(scalefps, _mask, x, y, undef(), ~0) argument
303 # define recip(x) BR(rcp28ps, _mask, x, undef(), ~0) argument
304 # define rsqrt(x) BR(rsqrt28ps, _mask, x, undef(), ~0) argument
306 # define recip(x) B(rcp14ps, _mask, x, undef(), ~0) argument
307 # define rsqrt(x) B(rsqrt14ps, _mask, x, undef(), ~0) argument
309 # define shrink1(x) BR_(cvtpd2ps, _mask, (vdf_t)(x), (vsf_half_t){}, ~0) argument
310 # define sqrt(x) BR(sqrtps, _mask, x, undef(), ~0) argument
311 # define trunc(x) BR(rndscaleps_, _mask, x, 0b1011, undef(), ~0) argument
312 # define widen1(x) ((vec_t)BR(cvtps2pd, _mask, x, (vdf_t)undef(), ~0)) argument
314 # define interleave_hi(x, y) B(unpckhps, _mask, x, y, undef(), ~0) argument
315 # define interleave_lo(x, y) B(unpcklps, _mask, x, y, undef(), ~0) argument
316 # define swap(x) B(shufps, _mask, x, x, 0b00011011, undef(), ~0) argument
317 # define swap2(x) B_(vpermilps, _mask, x, 0b00011011, undef(), ~0) argument
319 # define broadcast_quartet(x) B(broadcastf32x4_, _mask, x, undef(), ~0) argument
320 # define insert_pair(x, y, p) \ argument
325 # define insert_quartet(x, y, p) B(insertf32x4_, _mask, x, y, p, undef(), ~0) argument
326 # define interleave_hi(x, y) B(vpermi2varps, _mask, x, interleave_hi, y, ~0) argument
327 # define interleave_lo(x, y) B(vpermt2varps, _mask, interleave_lo, x, y, ~0) argument
328 # define swap(x) ({ \ argument
332 # define swap2(x) B(vpermilps, _mask, \ argument
339 # define broadcast(x) ({ \ argument
346 # define broadcast(x) ({ \ argument
354 # define broadcast_pair(x) B(broadcastf64x2_, _mask, x, undef(), ~0) argument
355 # define insert_pair(x, y, p) B(insertf64x2_, _mask, x, y, p, undef(), ~0) argument
358 # define broadcast_quartet(x) B(broadcastf64x4_, , x, undef(), ~0) argument
359 # define insert_quartet(x, y, p) B(insertf64x4_, _mask, x, y, p, undef(), ~0) argument
362 # define frac(x) B(reducepd, _mask, x, 0b00001011, undef(), ~0) argument
364 # define getexp(x) BR(getexppd, _mask, x, undef(), ~0) argument
365 # define getmant(x) BR(getmantpd, _mask, x, 0, undef(), ~0) argument
367 # define max(x, y) BR(rangepd, _mask, x, y, 0b0101, undef(), ~0) argument
368 # define min(x, y) BR(rangepd, _mask, x, y, 0b0100, undef(), ~0) argument
370 # define max(x, y) BR_(maxpd, _mask, x, y, undef(), ~0) argument
371 # define min(x, y) BR_(minpd, _mask, x, y, undef(), ~0) argument
373 # define mix(x, y) B(blendmpd_, _mask, x, y, 0b10101010) argument
374 # define scale(x, y) BR(scalefpd, _mask, x, y, undef(), ~0) argument
376 # define recip(x) BR(rcp28pd, _mask, x, undef(), ~0) argument
377 # define rsqrt(x) BR(rsqrt28pd, _mask, x, undef(), ~0) argument
379 # define recip(x) B(rcp14pd, _mask, x, undef(), ~0) argument
380 # define rsqrt(x) B(rsqrt14pd, _mask, x, undef(), ~0) argument
382 # define sqrt(x) BR(sqrtpd, _mask, x, undef(), ~0) argument
383 # define trunc(x) BR(rndscalepd_, _mask, x, 0b1011, undef(), ~0) argument
385 # define interleave_hi(x, y) B(unpckhpd, _mask, x, y, undef(), ~0) argument
386 # define interleave_lo(x, y) B(unpcklpd, _mask, x, y, undef(), ~0) argument
387 # define swap(x) B(shufpd, _mask, x, x, 0b01, undef(), ~0) argument
388 # define swap2(x) B_(vpermilpd, _mask, x, 0b01, undef(), ~0) argument
390 # define interleave_hi(x, y) B(vpermi2varpd, _mask, x, interleave_hi, y, ~0) argument
391 # define interleave_lo(x, y) B(vpermt2varpd, _mask, interleave_lo, x, y, ~0) argument
392 # define swap(x) ({ \ argument
396 # define swap2(x) B(vpermilpd, _mask, \ argument
405 # define broadcast(x) \ argument
408 # define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); }) argument
410 # define max(x, y) __builtin_ia32_maxps256(x, y) argument
411 # define min(x, y) __builtin_ia32_minps256(x, y) argument
412 # define recip(x) __builtin_ia32_rcpps256(x) argument
413 # define rsqrt(x) __builtin_ia32_rsqrtps256(x) argument
414 # define sqrt(x) __builtin_ia32_sqrtps256(x) argument
415 # define swap(x) ({ \ argument
420 # define swap2(x) __builtin_ia32_permvarsf256(x, __builtin_ia32_cvtps2dq256(inv) - 1) argument
422 # define swap2(x) ({ \ argument
429 # define broadcast(x) __builtin_ia32_vbroadcastss_ps((vec_t){ x }) argument
431 # define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); }) argument
433 # define interleave_hi(x, y) __builtin_ia32_unpckhps(x, y) argument
434 # define interleave_lo(x, y) __builtin_ia32_unpcklps(x, y) argument
435 # define max(x, y) __builtin_ia32_maxps(x, y) argument
436 # define min(x, y) __builtin_ia32_minps(x, y) argument
437 # define recip(x) __builtin_ia32_rcpps(x) argument
438 # define rsqrt(x) __builtin_ia32_rsqrtps(x) argument
439 # define sqrt(x) __builtin_ia32_sqrtps(x) argument
440 # define swap(x) __builtin_ia32_shufps(x, x, 0b00011011) argument
442 # define swap2(x) __builtin_ia32_vpermilvarps(x, __builtin_ia32_cvtps2dq(inv) - 1) argument
445 # define recip(x) scalar_1op(x, "rcpss %[in], %[out]") argument
446 # define rsqrt(x) scalar_1op(x, "rsqrtss %[in], %[out]") argument
447 # define sqrt(x) scalar_1op(x, "sqrtss %[in], %[out]") argument
452 # define broadcast(x) \ argument
455 # define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); }) argument
457 # define max(x, y) __builtin_ia32_maxpd256(x, y) argument
458 # define min(x, y) __builtin_ia32_minpd256(x, y) argument
459 # define recip(x) ({ \ argument
466 # define rsqrt(x) ({ \ argument
473 # define sqrt(x) __builtin_ia32_sqrtpd256(x) argument
474 # define swap(x) ({ \ argument
479 # define swap2(x) __builtin_ia32_permdf256(x, 0b00011011) argument
482 # define interleave_hi(x, y) __builtin_ia32_unpckhpd(x, y) argument
483 # define interleave_lo(x, y) __builtin_ia32_unpcklpd(x, y) argument
484 # define max(x, y) __builtin_ia32_maxpd(x, y) argument
485 # define min(x, y) __builtin_ia32_minpd(x, y) argument
486 # define recip(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rcpps(__builtin_ia32_cvtpd2ps(x))) argument
487 # define rsqrt(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rsqrtps(__builtin_ia32_cvtpd2ps(x))) argument
488 # define sqrt(x) __builtin_ia32_sqrtpd(x) argument
489 # define swap(x) __builtin_ia32_shufpd(x, x, 0b01) argument
491 # define swap2(x) __builtin_ia32_vpermilvarpd(x, __builtin_ia32_pmovsxdq128( \ argument
495 # define recip(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rcpss %[out], %[out]; cvtss2sd %[out], %[… argument
496 # define rsqrt(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rsqrtss %[out], %[out]; cvtss2sd %[out], … argument
497 # define sqrt(x) scalar_1op(x, "sqrtsd %[in], %[out]") argument
505 # define low_half(x) ({ \ argument
515 # define low_quarter(x) ({ \ argument
524 # define broadcast(x) ({ \ argument
530 # define broadcast2(x) ({ \ argument
536 # define broadcast_pair(x) ({ \ argument
543 # define broadcast_octet(x) ((vec_t)B(broadcasti32x8_, _mask, (vsi_octet_t)(x), (vsi_t)undef(), ~… argument
544 # define insert_octet(x, y, p) ((vec_t)B(inserti32x8_, _mask, (vsi_t)(x), (vsi_octet_t)(y), p, (v… argument
547 # define interleave_hi(x, y) ((vec_t)B(punpckhdq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), … argument
548 # define interleave_lo(x, y) ((vec_t)B(punpckldq, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), … argument
549 # define swap(x) ((vec_t)B(pshufd, _mask, (vsi_t)(x), 0b00011011, (vsi_t)undef(), ~0)) argument
551 # define broadcast_quartet(x) ((vec_t)B(broadcasti32x4_, _mask, (vsi_quartet_t)(x), (vsi_t)undef(… argument
552 # define insert_pair(x, y, p) \ argument
558 # define insert_quartet(x, y, p) ((vec_t)B(inserti32x4_, _mask, (vsi_t)(x), (vsi_quartet_t)(y), p… argument
559 # define interleave_hi(x, y) ((vec_t)B(vpermi2vard, _mask, (vsi_t)(x), interleave_hi, (vsi_t)(y),… argument
560 # define interleave_lo(x, y) ((vec_t)B(vpermt2vard, _mask, interleave_lo, (vsi_t)(x), (vsi_t)(y),… argument
561 # define swap(x) ((vec_t)B(pshufd, _mask, \ argument
565 # define swap2(x) ((vec_t)B_(permvarsi, _mask, (vsi_t)(x), (vsi_t)(inv - 1), (vsi_t)undef(), ~0)) argument
567 # define mix(x, y) ((vec_t)B(blendmd_, _mask, (vsi_t)(x), (vsi_t)(y), \ argument
569 # define rotr(x, n) ((vec_t)B(alignd, _mask, (vsi_t)(x), (vsi_t)(x), n, (vsi_t)undef(), ~0)) argument
570 # define shrink1(x) ((half_t)B(pmovqd, _mask, (vdi_t)(x), (vsi_half_t){}, ~0)) argument
572 # define broadcast(x) ({ \ argument
579 # define broadcast2(x) ({ \ argument
586 # define broadcast_pair(x) ((vec_t)B(broadcasti64x2_, _mask, (vdi_pair_t)(x), (vdi_t)undef(), ~0)) argument
587 # define insert_pair(x, y, p) ((vec_t)B(inserti64x2_, _mask, (vdi_t)(x), (vdi_pair_t)(y), p, (vdi… argument
590 # define broadcast_quartet(x) ((vec_t)B(broadcasti64x4_, , (vdi_quartet_t)(x), (vdi_t)undef(), ~0… argument
591 # define insert_quartet(x, y, p) ((vec_t)B(inserti64x4_, _mask, (vdi_t)(x), (vdi_quartet_t)(y), p… argument
594 # define interleave_hi(x, y) ((vec_t)B(punpckhqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(),… argument
595 # define interleave_lo(x, y) ((vec_t)B(punpcklqdq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(),… argument
596 # define swap(x) ((vec_t)B(pshufd, _mask, (vsi_t)(x), 0b01001110, (vsi_t)undef(), ~0)) argument
598 # define interleave_hi(x, y) ((vec_t)B(vpermi2varq, _mask, (vdi_t)(x), interleave_hi, (vdi_t)(y),… argument
599 # define interleave_lo(x, y) ((vec_t)B(vpermt2varq, _mask, interleave_lo, (vdi_t)(x), (vdi_t)(y),… argument
600 # define swap(x) ((vec_t)B(pshufd, _mask, \ argument
604 # define swap2(x) ((vec_t)B(permvardi, _mask, (vdi_t)(x), (vdi_t)(inv - 1), (vdi_t)undef(), ~0)) argument
606 # define mix(x, y) ((vec_t)B(blendmq_, _mask, (vdi_t)(x), (vdi_t)(y), 0b10101010)) argument
607 # define rotr(x, n) ((vec_t)B(alignq, _mask, (vdi_t)(x), (vdi_t)(x), n, (vdi_t)undef(), ~0)) argument
609 # define swap3(x) ((vec_t)B_(permdi, _mask, (vdi_t)(x), 0b00011011, (vdi_t)undef(), ~0)) argument
611 # define swap3(x) ({ \ argument
618 # define abs(x) B(pabsd, _mask, x, undef(), ~0) argument
619 # define max(x, y) B(pmaxsd, _mask, x, y, undef(), ~0) argument
620 # define min(x, y) B(pminsd, _mask, x, y, undef(), ~0) argument
621 # define mul_full(x, y) ((vec_t)B(pmuldq, _mask, x, y, (vdi_t)undef(), ~0)) argument
622 # define widen1(x) ((vec_t)B(pmovsxdq, _mask, x, (vdi_t)undef(), ~0)) argument
624 # define max(x, y) ((vec_t)B(pmaxud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) argument
625 # define min(x, y) ((vec_t)B(pminud, _mask, (vsi_t)(x), (vsi_t)(y), (vsi_t)undef(), ~0)) argument
626 # define mul_full(x, y) ((vec_t)B(pmuludq, _mask, (vsi_t)(x), (vsi_t)(y), (vdi_t)undef(), ~0)) argument
627 # define widen1(x) ((vec_t)B(pmovzxdq, _mask, (vsi_half_t)(x), (vdi_t)undef(), ~0)) argument
629 # define abs(x) ((vec_t)B(pabsq, _mask, (vdi_t)(x), (vdi_t)undef(), ~0)) argument
630 # define max(x, y) ((vec_t)B(pmaxsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) argument
631 # define min(x, y) ((vec_t)B(pminsq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) argument
633 # define max(x, y) ((vec_t)B(pmaxuq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) argument
634 # define min(x, y) ((vec_t)B(pminuq, _mask, (vdi_t)(x), (vdi_t)(y), (vdi_t)undef(), ~0)) argument
639 # define broadcast(x) ({ \ argument
645 # define broadcast2(x) ({ \ argument
651 # define interleave_hi(x, y) ((vec_t)B(punpckhbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), … argument
652 # define interleave_lo(x, y) ((vec_t)B(punpcklbw, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), … argument
653 # define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 8, (vdi_t)undef(), ~0… argument
654 # define swap(x) ((vec_t)B(pshufb, _mask, (vqi_t)(x), (vqi_t)(inv - 1), (vqi_t)undef(), ~0)) argument
656 # define interleave_hi(x, y) ((vec_t)B(vpermi2varqi, _mask, (vqi_t)(x), interleave_hi, (vqi_t)(y)… argument
657 # define interleave_lo(x, y) ((vec_t)B(vpermt2varqi, _mask, interleave_lo, (vqi_t)(x), (vqi_t)(y)… argument
659 # define mix(x, y) ((vec_t)B(blendmb_, _mask, (vqi_t)(x), (vqi_t)(y), \ argument
661 # define shrink1(x) ((half_t)B(pmovwb, _mask, (vhi_t)(x), (vqi_half_t){}, ~0)) argument
662 # define shrink2(x) ((quarter_t)B(pmovdb, _mask, (vsi_t)(x), (vqi_quarter_t){}, ~0)) argument
663 # define shrink3(x) ((eighth_t)B(pmovqb, _mask, (vdi_t)(x), (vqi_eighth_t){}, ~0)) argument
665 # define swap2(x) ((vec_t)B(permvarqi, _mask, (vqi_t)(x), (vqi_t)(inv - 1), (vqi_t)undef(), ~0)) argument
668 # define broadcast(x) ({ \ argument
674 # define broadcast2(x) ({ \ argument
680 # define interleave_hi(x, y) ((vec_t)B(punpckhwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), … argument
681 # define interleave_lo(x, y) ((vec_t)B(punpcklwd, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), … argument
682 # define rotr(x, n) ((vec_t)B(palignr, _mask, (vdi_t)(x), (vdi_t)(x), (n) * 16, (vdi_t)undef(), ~… argument
683 # define swap(x) ((vec_t)B(pshufd, _mask, \ argument
689 # define interleave_hi(x, y) ((vec_t)B(vpermi2varhi, _mask, (vhi_t)(x), interleave_hi, (vhi_t)(y)… argument
690 # define interleave_lo(x, y) ((vec_t)B(vpermt2varhi, _mask, interleave_lo, (vhi_t)(x), (vhi_t)(y)… argument
692 # define mix(x, y) ((vec_t)B(blendmw_, _mask, (vhi_t)(x), (vhi_t)(y), \ argument
694 # define shrink1(x) ((half_t)B(pmovdw, _mask, (vsi_t)(x), (vhi_half_t){}, ~0)) argument
695 # define shrink2(x) ((quarter_t)B(pmovqw, _mask, (vdi_t)(x), (vhi_quarter_t){}, ~0)) argument
696 # define swap2(x) ((vec_t)B(permvarhi, _mask, (vhi_t)(x), (vhi_t)(inv - 1), (vhi_t)undef(), ~0)) argument
699 # define abs(x) ((vec_t)B(pabsb, _mask, (vqi_t)(x), (vqi_t)undef(), ~0)) argument
700 # define max(x, y) ((vec_t)B(pmaxsb, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0)) argument
701 # define min(x, y) ((vec_t)B(pminsb, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0)) argument
702 # define widen1(x) ((vec_t)B(pmovsxbw, _mask, (vqi_half_t)(x), (vhi_t)undef(), ~0)) argument
703 # define widen2(x) ((vec_t)B(pmovsxbd, _mask, (vqi_quarter_t)(x), (vsi_t)undef(), ~0)) argument
704 # define widen3(x) ((vec_t)B(pmovsxbq, _mask, (vqi_eighth_t)(x), (vdi_t)undef(), ~0)) argument
706 # define max(x, y) ((vec_t)B(pmaxub, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0)) argument
707 # define min(x, y) ((vec_t)B(pminub, _mask, (vqi_t)(x), (vqi_t)(y), (vqi_t)undef(), ~0)) argument
708 # define widen1(x) ((vec_t)B(pmovzxbw, _mask, (vqi_half_t)(x), (vhi_t)undef(), ~0)) argument
709 # define widen2(x) ((vec_t)B(pmovzxbd, _mask, (vqi_quarter_t)(x), (vsi_t)undef(), ~0)) argument
710 # define widen3(x) ((vec_t)B(pmovzxbq, _mask, (vqi_eighth_t)(x), (vdi_t)undef(), ~0)) argument
712 # define abs(x) B(pabsw, _mask, x, undef(), ~0) argument
713 # define max(x, y) B(pmaxsw, _mask, x, y, undef(), ~0) argument
714 # define min(x, y) B(pminsw, _mask, x, y, undef(), ~0) argument
715 # define mul_hi(x, y) B(pmulhw, _mask, x, y, undef(), ~0) argument
716 # define widen1(x) ((vec_t)B(pmovsxwd, _mask, x, (vsi_t)undef(), ~0)) argument
717 # define widen2(x) ((vec_t)B(pmovsxwq, _mask, x, (vdi_t)undef(), ~0)) argument
719 # define max(x, y) ((vec_t)B(pmaxuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0)) argument
720 # define min(x, y) ((vec_t)B(pminuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0)) argument
721 # define mul_hi(x, y) ((vec_t)B(pmulhuw, _mask, (vhi_t)(x), (vhi_t)(y), (vhi_t)undef(), ~0)) argument
722 # define widen1(x) ((vec_t)B(pmovzxwd, _mask, (vhi_half_t)(x), (vsi_t)undef(), ~0)) argument
723 # define widen2(x) ((vec_t)B(pmovzxwq, _mask, (vhi_quarter_t)(x), (vdi_t)undef(), ~0)) argument
727 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhbw128((vqi_t)(x), (vqi_t)(y))) argument
728 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklbw128((vqi_t)(x), (vqi_t)(y))) argument
730 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhwd128((vhi_t)(x), (vhi_t)(y))) argument
731 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklwd128((vhi_t)(x), (vhi_t)(y))) argument
732 # define swap(x) ((vec_t)__builtin_ia32_pshufd( \ argument
736 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhdq128((vsi_t)(x), (vsi_t)(y))) argument
737 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpckldq128((vsi_t)(x), (vsi_t)(y))) argument
738 # define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b00011011)) argument
740 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhqdq128((vdi_t)(x), (vdi_t)(y))) argument
741 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklqdq128((vdi_t)(x), (vdi_t)(y))) argument
742 # define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b01001110)) argument
745 # define max(x, y) ((vec_t)__builtin_ia32_pmaxub128((vqi_t)(x), (vqi_t)(y))) argument
746 # define min(x, y) ((vec_t)__builtin_ia32_pminub128((vqi_t)(x), (vqi_t)(y))) argument
748 # define max(x, y) __builtin_ia32_pmaxsw128(x, y) argument
749 # define min(x, y) __builtin_ia32_pminsw128(x, y) argument
750 # define mul_hi(x, y) __builtin_ia32_pmulhw128(x, y) argument
752 # define mul_hi(x, y) ((vec_t)__builtin_ia32_pmulhuw128((vhi_t)(x), (vhi_t)(y))) argument
754 # define mul_full(x, y) ((vec_t)__builtin_ia32_pmuludq128((vsi_t)(x), (vsi_t)(y))) argument
756 # define select(d, x, y, m) ({ \ argument
763 # define swap_lanes(x, y, func, type) ({ \ argument
771 # define broadcast(x) ({ char s_ = (x); vec_t d_; asm ( "vpbroadcastb %1,%0" : "=x" (d_) : "m" (s_… argument
772 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignb256((vqi_t)(x), (vqi_t)(y))) argument
773 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)… argument
776 # define broadcast(x) ({ short s_ = (x); vec_t d_; asm ( "vpbroadcastw %1,%0" : "=x" (d_) : "m" (s… argument
777 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignw256((vhi_t)(x), (vhi_t)(y))) argument
778 # define hadd(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phaddw256, vhi_t)) argument
779 # define hsub(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phsubw256, vhi_t)) argument
780 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw256((vhi_t)(x), (vhi_t)(y), 0b10101010)) argument
781 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)… argument
784 # define broadcast(x) ({ int s_ = (x); vec_t d_; asm ( "vpbroadcastd %1,%0" : "=x" (d_) : "m" (s_)… argument
785 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignd256((vsi_t)(x), (vsi_t)(y))) argument
786 # define hadd(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phaddd256, vsi_t)) argument
787 # define hsub(x, y) ((vec_t)swap_lanes(x, y, __builtin_ia32_phsubd256, vsi_t)) argument
788 # define mix(x, y) ((vec_t)__builtin_ia32_pblendd256((vsi_t)(x), (vsi_t)(y), 0b10101010)) argument
789 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)… argument
791 # define select(d, x, y, m) ({ \ argument
796 # define swap(x) ((vec_t)__builtin_ia32_permvarsi256((vsi_t)(x), (vsi_t)inv - 1)) argument
798 # define mix(x, y) ((vec_t)__builtin_ia32_pblendd256((vsi_t)(x), (vsi_t)(y), 0b11001100)) argument
799 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr256(__builtin_ia32_permti256((vdi_t)(x), (vdi_t)… argument
801 # define select(d, x, y, m) ({ \ argument
806 # define swap(x) ((vec_t)__builtin_ia32_permdi256((vdi_t)(x), 0b00011011)) argument
807 # define swap2(x) ({ \ argument
813 # define abs(x) ((vec_t)__builtin_ia32_pabsb256((vqi_t)(x))) argument
814 # define max(x, y) ((vec_t)__builtin_ia32_pmaxsb256((vqi_t)(x), (vqi_t)(y))) argument
815 # define min(x, y) ((vec_t)__builtin_ia32_pminsb256((vqi_t)(x), (vqi_t)(y))) argument
816 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw256((vqi_t)(x))) argument
817 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd256((vqi_t)(x))) argument
818 # define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq256((vqi_t)(x))) argument
820 # define max(x, y) ((vec_t)__builtin_ia32_pmaxub256((vqi_t)(x), (vqi_t)(y))) argument
821 # define min(x, y) ((vec_t)__builtin_ia32_pminub256((vqi_t)(x), (vqi_t)(y))) argument
822 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw256((vqi_t)(x))) argument
823 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd256((vqi_t)(x))) argument
824 # define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq256((vqi_t)(x))) argument
826 # define abs(x) __builtin_ia32_pabsw256(x) argument
827 # define max(x, y) __builtin_ia32_pmaxsw256(x, y) argument
828 # define min(x, y) __builtin_ia32_pminsw256(x, y) argument
829 # define mul_hi(x, y) __builtin_ia32_pmulhw256(x, y) argument
830 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd256(x)) argument
831 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq256(x)) argument
833 # define max(x, y) ((vec_t)__builtin_ia32_pmaxuw256((vhi_t)(x), (vhi_t)(y))) argument
834 # define min(x, y) ((vec_t)__builtin_ia32_pminuw256((vhi_t)(x), (vhi_t)(y))) argument
835 # define mul_hi(x, y) ((vec_t)__builtin_ia32_pmulhuw256((vhi_t)(x), (vhi_t)(y))) argument
836 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd256((vhi_t)(x))) argument
837 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq256((vhi_t)(x))) argument
839 # define abs(x) __builtin_ia32_pabsd256(x) argument
840 # define max(x, y) __builtin_ia32_pmaxsd256(x, y) argument
841 # define min(x, y) __builtin_ia32_pminsd256(x, y) argument
842 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq256(x)) argument
844 # define max(x, y) ((vec_t)__builtin_ia32_pmaxud256((vsi_t)(x), (vsi_t)(y))) argument
845 # define min(x, y) ((vec_t)__builtin_ia32_pminud256((vsi_t)(x), (vsi_t)(y))) argument
846 # define mul_full(x, y) ((vec_t)__builtin_ia32_pmuludq256((vsi_t)(x), (vsi_t)(y))) argument
847 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq256((vsi_t)(x))) argument
849 # define broadcast(x) ({ \ argument
858 # define broadcast(x) ({ long long s_ = (x); vec_t d_; asm ( "vpbroadcastq %1,%0" : "=x" (d_) : "m… argument
863 # define addsub(x, y) __builtin_ia32_addsubps(x, y) argument
864 # define dup_hi(x) __builtin_ia32_movshdup(x) argument
865 # define dup_lo(x) __builtin_ia32_movsldup(x) argument
866 # define hadd(x, y) __builtin_ia32_haddps(x, y) argument
867 # define hsub(x, y) __builtin_ia32_hsubps(x, y) argument
869 # define addsub(x, y) __builtin_ia32_addsubpd(x, y) argument
870 # define dup_lo(x) ({ \ argument
875 # define hadd(x, y) __builtin_ia32_haddpd(x, y) argument
876 # define hsub(x, y) __builtin_ia32_hsubpd(x, y) argument
880 # define addsub(x, y) __builtin_ia32_addsubps256(x, y) argument
881 # define dup_hi(x) __builtin_ia32_movshdup256(x) argument
882 # define dup_lo(x) __builtin_ia32_movsldup256(x) argument
884 # define hadd(x, y) __builtin_ia32_permvarsf256(__builtin_ia32_haddps256(x, y), \ argument
886 # define hsub(x, y) __builtin_ia32_permvarsf256(__builtin_ia32_hsubps256(x, y), \ argument
889 # define hadd(x, y) ({ \ argument
893 # define hsub(x, y) ({ \ argument
899 # define addsub(x, y) __builtin_ia32_addsubpd256(x, y) argument
900 # define dup_lo(x) __builtin_ia32_movddup256(x) argument
902 # define hadd(x, y) __builtin_ia32_permdf256(__builtin_ia32_haddpd256(x, y), 0b11011000) argument
903 # define hsub(x, y) __builtin_ia32_permdf256(__builtin_ia32_hsubpd256(x, y), 0b11011000) argument
905 # define hadd(x, y) ({ \ argument
909 # define hsub(x, y) ({ \ argument
918 # define abs(x) ((vec_t)__builtin_ia32_pabsb128((vqi_t)(x))) argument
920 # define abs(x) __builtin_ia32_pabsw128(x) argument
922 # define abs(x) __builtin_ia32_pabsd128(x) argument
925 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignb128((vqi_t)(x), (vqi_t)(y))) argument
926 # define swap(x) ((vec_t)__builtin_ia32_pshufb128((vqi_t)(x), (vqi_t)(inv - 1))) argument
927 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 8)) argument
929 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignw128((vhi_t)(x), (vhi_t)(y))) argument
930 # define hadd(x, y) ((vec_t)__builtin_ia32_phaddw128((vhi_t)(x), (vhi_t)(y))) argument
931 # define hsub(x, y) ((vec_t)__builtin_ia32_phsubw128((vhi_t)(x), (vhi_t)(y))) argument
932 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 16)) argument
934 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignd128((vsi_t)(x), (vsi_t)(y))) argument
935 # define hadd(x, y) ((vec_t)__builtin_ia32_phaddd128((vsi_t)(x), (vsi_t)(y))) argument
936 # define hsub(x, y) ((vec_t)__builtin_ia32_phsubd128((vsi_t)(x), (vsi_t)(y))) argument
937 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 32)) argument
939 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 64)) argument
944 # define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y))) argument
945 # define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y))) argument
946 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw128((vqi_t)(x))) argument
947 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd128((vqi_t)(x))) argument
948 # define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq128((vqi_t)(x))) argument
950 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd128(x)) argument
951 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq128(x)) argument
953 # define max(x, y) __builtin_ia32_pmaxsd128(x, y) argument
954 # define min(x, y) __builtin_ia32_pminsd128(x, y) argument
955 # define mul_full(x, y) ((vec_t)__builtin_ia32_pmuldq128(x, y)) argument
956 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq128(x)) argument
958 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw128((vqi_t)(x))) argument
959 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd128((vqi_t)(x))) argument
960 # define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq128((vqi_t)(x))) argument
962 # define max(x, y) ((vec_t)__builtin_ia32_pmaxuw128((vhi_t)(x), (vhi_t)(y))) argument
963 # define min(x, y) ((vec_t)__builtin_ia32_pminuw128((vhi_t)(x), (vhi_t)(y))) argument
964 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd128((vhi_t)(x))) argument
965 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq128((vhi_t)(x))) argument
967 # define max(x, y) ((vec_t)__builtin_ia32_pmaxud128((vsi_t)(x), (vsi_t)(y))) argument
968 # define min(x, y) ((vec_t)__builtin_ia32_pminud128((vsi_t)(x), (vsi_t)(y))) argument
969 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq128((vsi_t)(x))) argument
973 # define select(d, x, y, m) \ argument
976 # define dot_product(x, y) __builtin_ia32_dpps(x, y, 0b11110001) argument
977 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps(y, x, m)) argument
978 # define trunc(x) __builtin_ia32_roundps(x, 0b1011) argument
980 # define dot_product(x, y) __builtin_ia32_dppd(x, y, 0b00110001) argument
981 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd(y, x, m)) argument
982 # define trunc(x) __builtin_ia32_roundpd(x, 0b1011) argument
985 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b10101010)) argument
987 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11001100)) argument
989 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11110000)) argument
991 # define mix(x, y) __builtin_ia32_blendps(x, y, 0b1010) argument
993 # define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10) argument
998 # define dot_product(x, y) ({ \ argument
1002 # define mix(x, y) __builtin_ia32_blendps256(x, y, 0b10101010) argument
1003 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps256(y, x, m)) argument
1004 # define select2(d, x, y, m) ({ \ argument
1009 # define trunc(x) __builtin_ia32_roundps256(x, 0b1011) argument
1011 # define mix(x, y) __builtin_ia32_blendpd256(x, y, 0b1010) argument
1012 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd256(y, x, m)) argument
1013 # define select2(d, x, y, m) ({ \ argument
1018 # define trunc(x) __builtin_ia32_roundpd256(x, 0b1011) argument
1022 # define max(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ > y_ ? x_ : y_; })}) argument
1023 # define min(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ < y_ ? x_ : y_; })}) argument
1026 # define trunc(x) scalar_1op(x, "roundss $0b1011, %[in], %[out]") argument
1028 # define trunc(x) scalar_1op(x, "roundsd $0b1011, %[in], %[out]") argument
1038 # define select(d, x, y, m) \ argument
1041 # define swap2(x) ((vec_t)__builtin_ia32_vpperm((vqi_t)(x), (vqi_t)(x), (vqi_t)inv - 1)) argument
1043 # define swap2(x) \ argument
1048 # define frac(x) __builtin_ia32_vfrczps(x) argument
1050 # define swap2(x) ({ \ argument
1060 # define frac(x) __builtin_ia32_vfrczpd(x) argument
1062 # define swap2(x) ({ \ argument
1076 # define hadd(x, y) ((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphaddbw((vqi_t)(x)), \ argument
1078 # define hsub(x, y) ((vec_t)__builtin_ia32_packsswb128(__builtin_ia32_vphsubbw((vqi_t)(x)), \ argument
1081 # define hadd(x, y) ((vec_t)__builtin_ia32_packuswb128(__builtin_ia32_vphaddubw((vqi_t)(x)), \ argument
1085 # define hadd(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphaddwd(x), \ argument
1088 # define hsub(x, y) __builtin_ia32_packssdw128(__builtin_ia32_vphsubwd(x), \ argument
1092 # define hadd(x, y) ((vec_t)__builtin_ia32_packusdw128(__builtin_ia32_vphadduwd((vhi_t)(x)), \ argument
1097 # define select(d, x, y, m) \ argument
1100 # define frac(x) __builtin_ia32_vfrczps256(x) argument
1102 # define frac(x) __builtin_ia32_vfrczpd256(x) argument
1106 # define frac(x) scalar_1op(x, "vfrczss %[in], %[out]") argument
1108 # define frac(x) scalar_1op(x, "vfrczsd %[in], %[out]") argument
1116 static inline half_t low_half(vec_t x) in low_half()
1133 static inline quarter_t low_quarter(vec_t x) in low_quarter()
1150 static inline eighth_t low_eighth(vec_t x) in low_eighth()
1219 vec_t x, y, z, src, inv, alt, sh; in simd_test() local