1/* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version. 2 Copyright (C) 2014-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_s_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVcN8vl4l4_sincosf) 24WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf 25END (_ZGVcN8vl4l4_sincosf) 26 27/* AVX ISA version as wrapper to SSE ISA version (for vector 28 function declared with #pragma omp declare simd notinbranch). */ 29.macro WRAPPER_IMPL_AVX_fFF_vvv callee 30#ifndef __ILP32__ 31 pushq %rbp 32 movq %rsp, %rbp 33 andq $-32, %rsp 34 subq $224, %rsp 35 vmovups %ymm0, 64(%rsp) 36 lea (%rsp), %rdi 37 vmovdqu %xmm1, 96(%rdi) 38 vmovdqu %xmm2, 112(%rdi) 39 vmovdqu %xmm3, 128(%rdi) 40 vmovdqu %xmm4, 144(%rdi) 41 vmovdqu %xmm5, 160(%rdi) 42 lea 32(%rsp), %rsi 43 vmovdqu %xmm6, 144(%rsi) 44 vmovdqu %xmm7, 160(%rsi) 45 vzeroupper 46 call HIDDEN_JUMPTARGET(\callee) 47 vmovdqu 80(%rsp), %xmm0 48 lea 16(%rsp), %rdi 49 lea 48(%rsp), %rsi 50 call HIDDEN_JUMPTARGET(\callee) 51 movq 96(%rsp), %rdx 52 movq 104(%rsp), %rsi 53 movq 112(%rsp), %r8 54 movq 120(%rsp), %r10 55 movl (%rsp), %eax 56 movl 4(%rsp), %ecx 57 movl 8(%rsp), %edi 58 movl 12(%rsp), %r9d 59 movl %eax, (%rdx) 60 movl %ecx, (%rsi) 61 movq 128(%rsp), %rax 62 movq 136(%rsp), %rcx 63 movl %edi, (%r8) 64 movl %r9d, (%r10) 65 movq 144(%rsp), %rdi 66 movq 152(%rsp), %r9 67 movl 16(%rsp), %r11d 68 movl 20(%rsp), %edx 69 movl 24(%rsp), %esi 70 movl 28(%rsp), %r8d 71 movl %r11d, (%rax) 72 movl %edx, (%rcx) 73 movq 160(%rsp), %r11 74 movq 168(%rsp), %rdx 75 movl %esi, (%rdi) 76 movl %r8d, (%r9) 77 movq 176(%rsp), %rsi 78 movq 184(%rsp), %r8 79 movl 32(%rsp), %r10d 80 movl 36(%rsp), %eax 81 movl 40(%rsp), %ecx 82 movl 44(%rsp), %edi 83 movl %r10d, (%r11) 84 movl %eax, (%rdx) 85 movq 192(%rsp), %r10 86 movq 200(%rsp), %rax 87 movl %ecx, (%rsi) 88 movl %edi, (%r8) 89 movq 16(%rbp), %rcx 90 movq 24(%rbp), %rdi 91 movl 48(%rsp), %r9d 92 movl 52(%rsp), %r11d 93 movl 56(%rsp), %edx 94 movl 60(%rsp), %esi 95 movl %r9d, (%r10) 96 movl %r11d, (%rax) 97 movl %edx, (%rcx) 98 movl %esi, (%rdi) 99 movq %rbp, %rsp 100 popq %rbp 101 ret 102#else 103 leal 8(%rsp), %r10d 104 .cfi_def_cfa 10, 0 105 andl $-32, %esp 106 pushq -8(%r10d) 107 pushq %rbp 108 .cfi_escape 0x10,0x6,0x2,0x76,0 109 movl %esp, %ebp 110 pushq %r12 111 leal -80(%rbp), %esi 112 pushq %r10 113 .cfi_escape 0xf,0x3,0x76,0x70,0x6 114 .cfi_escape 0x10,0xc,0x2,0x76,0x78 115 leal -112(%rbp), %edi 116 movq %rsi, %r12 117 pushq %rbx 118 .cfi_escape 0x10,0x3,0x2,0x76,0x68 119 movq %rdi, %rbx 120 subl $184, %esp 121 vmovaps %xmm1, -128(%ebp) 122 vmovaps %xmm2, -144(%ebp) 123 vmovaps %xmm3, -160(%ebp) 124 vmovaps %xmm4, -176(%ebp) 125 vmovaps %ymm0, -208(%ebp) 126 vzeroupper 127 call HIDDEN_JUMPTARGET(\callee) 128 leal 16(%r12), %esi 129 vmovups -192(%ebp), %xmm0 130 leal 16(%rbx), %edi 131 call HIDDEN_JUMPTARGET(\callee) 132 movq -128(%ebp), %rax 133 vmovss -112(%ebp), %xmm0 134 vmovdqa -128(%ebp), %xmm7 135 vmovdqa -144(%ebp), %xmm3 136 vmovss %xmm0, (%eax) 137 vmovss -108(%ebp), %xmm0 138 vpextrd $1, %xmm7, %eax 139 vmovss %xmm0, (%eax) 140 movq -120(%ebp), %rax 141 vmovss -104(%ebp), %xmm0 142 vmovss %xmm0, (%eax) 143 vmovss -100(%ebp), %xmm0 144 vpextrd $3, %xmm7, %eax 145 vmovdqa -160(%ebp), %xmm7 146 vmovss %xmm0, (%eax) 147 movq -144(%ebp), %rax 148 vmovss -96(%ebp), %xmm0 149 vmovss %xmm0, (%eax) 150 vmovss -92(%ebp), %xmm0 151 vpextrd $1, %xmm3, %eax 152 vmovss %xmm0, (%eax) 153 movq -136(%ebp), %rax 154 vmovss -88(%ebp), %xmm0 155 vmovss %xmm0, (%eax) 156 vmovss -84(%ebp), %xmm0 157 vpextrd $3, %xmm3, %eax 158 vmovss %xmm0, (%eax) 159 movq -160(%ebp), %rax 160 vmovss -80(%ebp), %xmm0 161 vmovss %xmm0, (%eax) 162 vmovss -76(%ebp), %xmm0 163 vpextrd $1, %xmm7, %eax 164 vmovss %xmm0, (%eax) 165 movq -152(%ebp), %rax 166 vmovss -72(%ebp), %xmm0 167 vmovss %xmm0, (%eax) 168 vmovss -68(%ebp), %xmm0 169 vpextrd $3, %xmm7, %eax 170 vmovss %xmm0, (%eax) 171 movq -176(%ebp), %rax 172 vmovss -64(%ebp), %xmm0 173 vmovdqa -176(%ebp), %xmm3 174 vmovss %xmm0, (%eax) 175 vmovss -60(%ebp), %xmm0 176 vpextrd $1, %xmm3, %eax 177 vmovss %xmm0, (%eax) 178 movq -168(%ebp), %rax 179 vmovss -56(%ebp), %xmm0 180 vmovss %xmm0, (%eax) 181 vmovss -52(%ebp), %xmm0 182 vpextrd $3, %xmm3, %eax 183 vmovss %xmm0, (%eax) 184 addl $184, %esp 185 popq %rbx 186 popq %r10 187 .cfi_def_cfa 10, 0 188 popq %r12 189 popq %rbp 190 leal -8(%r10), %esp 191 .cfi_def_cfa 7, 8 192 ret 193#endif 194.endm 195 196ENTRY (_ZGVcN8vvv_sincosf) 197WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf 198END (_ZGVcN8vvv_sincosf) 199