1/* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version. 2 Copyright (C) 2014-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_d_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVcN4vl8l8_sincos) 24WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos 25END (_ZGVcN4vl8l8_sincos) 26 27/* AVX ISA version as wrapper to SSE ISA version (for vector 28 function declared with #pragma omp declare simd notinbranch). */ 29.macro WRAPPER_IMPL_AVX_fFF_vvv callee 30#ifndef __ILP32__ 31 pushq %rbp 32 movq %rsp, %rbp 33 andq $-32, %rsp 34 subq $160, %rsp 35 vmovupd %ymm0, 64(%rsp) 36 lea (%rsp), %rdi 37 vmovdqu %xmm1, 96(%rdi) 38 vmovdqu %xmm2, 112(%rdi) 39 vmovdqu %xmm3, 128(%rdi) 40 vmovdqu %xmm4, 144(%rdi) 41 lea 32(%rsp), %rsi 42 vzeroupper 43 call HIDDEN_JUMPTARGET(\callee) 44 vmovdqu 80(%rsp), %xmm0 45 lea 16(%rsp), %rdi 46 lea 48(%rsp), %rsi 47 call HIDDEN_JUMPTARGET(\callee) 48 movq 96(%rsp), %rdx 49 movq 104(%rsp), %rsi 50 movq 112(%rsp), %r8 51 movq 120(%rsp), %r10 52 movq (%rsp), %rax 53 movq 8(%rsp), %rcx 54 movq 16(%rsp), %rdi 55 movq 24(%rsp), %r9 56 movq %rax, (%rdx) 57 movq %rcx, (%rsi) 58 movq 128(%rsp), %rax 59 movq 136(%rsp), %rcx 60 movq %rdi, (%r8) 61 movq %r9, (%r10) 62 movq 144(%rsp), %rdi 63 movq 152(%rsp), %r9 64 movq 32(%rsp), %r11 65 movq 40(%rsp), %rdx 66 movq 48(%rsp), %rsi 67 movq 56(%rsp), %r8 68 movq %r11, (%rax) 69 movq %rdx, (%rcx) 70 movq %rsi, (%rdi) 71 movq %r8, (%r9) 72 movq %rbp, %rsp 73 popq %rbp 74 ret 75#else 76 leal 8(%rsp), %r10d 77 .cfi_def_cfa 10, 0 78 andl $-32, %esp 79 pushq -8(%r10d) 80 pushq %rbp 81 .cfi_escape 0x10,0x6,0x2,0x76,0 82 movl %esp, %ebp 83 pushq %r12 84 leal -80(%rbp), %esi 85 pushq %r10 86 .cfi_escape 0xf,0x3,0x76,0x70,0x6 87 .cfi_escape 0x10,0xc,0x2,0x76,0x78 88 leal -112(%rbp), %edi 89 movq %rsi, %r12 90 pushq %rbx 91 .cfi_escape 0x10,0x3,0x2,0x76,0x68 92 movq %rdi, %rbx 93 subl $152, %esp 94 vmovaps %xmm1, -128(%ebp) 95 vmovaps %xmm2, -144(%ebp) 96 vmovapd %ymm0, -176(%ebp) 97 vzeroupper 98 call HIDDEN_JUMPTARGET(\callee) 99 leal 16(%r12), %esi 100 vmovupd -160(%ebp), %xmm0 101 leal 16(%rbx), %edi 102 call HIDDEN_JUMPTARGET(\callee) 103 movq -128(%ebp), %rax 104 vmovsd -112(%ebp), %xmm0 105 vmovdqa -128(%ebp), %xmm5 106 vmovdqa -144(%ebp), %xmm1 107 vmovsd %xmm0, (%eax) 108 vmovsd -104(%ebp), %xmm0 109 vpextrd $1, %xmm5, %eax 110 vmovsd %xmm0, (%eax) 111 movq -120(%ebp), %rax 112 vmovsd -96(%ebp), %xmm0 113 vmovsd %xmm0, (%eax) 114 vmovsd -88(%ebp), %xmm0 115 vpextrd $3, %xmm5, %eax 116 vmovsd %xmm0, (%eax) 117 movq -144(%ebp), %rax 118 vmovsd -80(%ebp), %xmm0 119 vmovsd %xmm0, (%eax) 120 vmovsd -72(%ebp), %xmm0 121 vpextrd $1, %xmm1, %eax 122 vmovsd %xmm0, (%eax) 123 movq -136(%ebp), %rax 124 vmovsd -64(%ebp), %xmm0 125 vmovsd %xmm0, (%eax) 126 vmovsd -56(%ebp), %xmm0 127 vpextrd $3, %xmm1, %eax 128 vmovsd %xmm0, (%eax) 129 addl $152, %esp 130 popq %rbx 131 popq %r10 132 .cfi_def_cfa 10, 0 133 popq %r12 134 popq %rbp 135 leal -8(%r10), %esp 136 .cfi_def_cfa 7, 8 137 ret 138#endif 139.endm 140 141ENTRY (_ZGVcN4vvv_sincos) 142WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos 143END (_ZGVcN4vvv_sincos) 144