1/* Function sincosf vectorized with AVX2, wrapper version. 2 Copyright (C) 2014-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_s_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVdN8vl4l4_sincosf) 24WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf 25END (_ZGVdN8vl4l4_sincosf) 26libmvec_hidden_def (_ZGVdN8vl4l4_sincosf) 27 28/* AVX2 ISA version as wrapper to SSE ISA version (for vector 29 function declared with #pragma omp declare simd notinbranch). */ 30.macro WRAPPER_IMPL_AVX2_fFF_vvv callee 31#ifndef __ILP32__ 32 pushq %rbp 33 cfi_adjust_cfa_offset (8) 34 cfi_rel_offset (%rbp, 0) 35 movq %rsp, %rbp 36 cfi_def_cfa_register (%rbp) 37 andq $-32, %rsp 38 subq $224, %rsp 39 vmovups %ymm0, 192(%rsp) 40 lea (%rsp), %rdi 41 vmovdqu %ymm1, 64(%rdi) 42 vmovdqu %ymm2, 96(%rdi) 43 vmovdqu %ymm3, 128(%rdi) 44 vmovdqu %ymm4, 160(%rdi) 45 lea 32(%rsp), %rsi 46 vzeroupper 47 call HIDDEN_JUMPTARGET(\callee) 48 vmovups 208(%rsp), %xmm0 49 lea 16(%rsp), %rdi 50 lea 48(%rsp), %rsi 51 call HIDDEN_JUMPTARGET(\callee) 52 movq 64(%rsp), %rdx 53 movq 72(%rsp), %rsi 54 movq 80(%rsp), %r8 55 movq 88(%rsp), %r10 56 movl (%rsp), %eax 57 movl 4(%rsp), %ecx 58 movl 8(%rsp), %edi 59 movl 12(%rsp), %r9d 60 movl %eax, (%rdx) 61 movl %ecx, (%rsi) 62 movq 96(%rsp), %rax 63 movq 104(%rsp), %rcx 64 movl %edi, (%r8) 65 movl %r9d, (%r10) 66 movq 112(%rsp), %rdi 67 movq 120(%rsp), %r9 68 movl 16(%rsp), %r11d 69 movl 20(%rsp), %edx 70 movl 24(%rsp), %esi 71 movl 28(%rsp), %r8d 72 movl %r11d, (%rax) 73 movl %edx, (%rcx) 74 movq 128(%rsp), %r11 75 movq 136(%rsp), %rdx 76 movl %esi, (%rdi) 77 movl %r8d, (%r9) 78 movq 144(%rsp), %rsi 79 movq 152(%rsp), %r8 80 movl 32(%rsp), %r10d 81 movl 36(%rsp), %eax 82 movl 40(%rsp), %ecx 83 movl 44(%rsp), %edi 84 movl %r10d, (%r11) 85 movl %eax, (%rdx) 86 movq 160(%rsp), %r10 87 movq 168(%rsp), %rax 88 movl %ecx, (%rsi) 89 movl %edi, (%r8) 90 movq 176(%rsp), %rcx 91 movq 184(%rsp), %rdi 92 movl 48(%rsp), %r9d 93 movl 52(%rsp), %r11d 94 movl 56(%rsp), %edx 95 movl 60(%rsp), %esi 96 movl %r9d, (%r10) 97 movl %r11d, (%rax) 98 movl %edx, (%rcx) 99 movl %esi, (%rdi) 100 movq %rbp, %rsp 101 cfi_def_cfa_register (%rsp) 102 popq %rbp 103 cfi_adjust_cfa_offset (-8) 104 cfi_restore (%rbp) 105 ret 106#else 107 leal 8(%rsp), %r10d 108 .cfi_def_cfa 10, 0 109 andl $-32, %esp 110 pushq -8(%r10d) 111 pushq %rbp 112 .cfi_escape 0x10,0x6,0x2,0x76,0 113 movl %esp, %ebp 114 pushq %r12 115 leal -80(%rbp), %esi 116 pushq %r10 117 .cfi_escape 0xf,0x3,0x76,0x70,0x6 118 .cfi_escape 0x10,0xc,0x2,0x76,0x78 119 leal -112(%rbp), %edi 120 movq %rsi, %r12 121 pushq %rbx 122 .cfi_escape 0x10,0x3,0x2,0x76,0x68 123 movq %rdi, %rbx 124 subl $184, %esp 125 vmovdqa %ymm1, -144(%ebp) 126 vmovdqa %ymm2, -176(%ebp) 127 vmovaps %ymm0, -208(%ebp) 128 vzeroupper 129 call HIDDEN_JUMPTARGET(\callee) 130 leal 16(%r12), %esi 131 vmovups -192(%ebp), %xmm0 132 leal 16(%rbx), %edi 133 call HIDDEN_JUMPTARGET(\callee) 134 movl -144(%ebp), %eax 135 vmovss -112(%ebp), %xmm0 136 vmovss %xmm0, (%eax) 137 movl -140(%ebp), %eax 138 vmovss -108(%ebp), %xmm0 139 vmovss %xmm0, (%eax) 140 movl -136(%ebp), %eax 141 vmovss -104(%ebp), %xmm0 142 vmovss %xmm0, (%eax) 143 movl -132(%ebp), %eax 144 vmovss -100(%ebp), %xmm0 145 vmovss %xmm0, (%eax) 146 movl -128(%ebp), %eax 147 vmovss -96(%ebp), %xmm0 148 vmovss %xmm0, (%eax) 149 movl -124(%ebp), %eax 150 vmovss -92(%ebp), %xmm0 151 vmovss %xmm0, (%eax) 152 movl -120(%ebp), %eax 153 vmovss -88(%ebp), %xmm0 154 vmovss %xmm0, (%eax) 155 movl -116(%ebp), %eax 156 vmovss -84(%ebp), %xmm0 157 vmovss %xmm0, (%eax) 158 movl -176(%ebp), %eax 159 vmovss -80(%ebp), %xmm0 160 vmovss %xmm0, (%eax) 161 movl -172(%ebp), %eax 162 vmovss -76(%ebp), %xmm0 163 vmovss %xmm0, (%eax) 164 movl -168(%ebp), %eax 165 vmovss -72(%ebp), %xmm0 166 vmovss %xmm0, (%eax) 167 movl -164(%ebp), %eax 168 vmovss -68(%ebp), %xmm0 169 vmovss %xmm0, (%eax) 170 movl -160(%ebp), %eax 171 vmovss -64(%ebp), %xmm0 172 vmovss %xmm0, (%eax) 173 movl -156(%ebp), %eax 174 vmovss -60(%ebp), %xmm0 175 vmovss %xmm0, (%eax) 176 movl -152(%ebp), %eax 177 vmovss -56(%ebp), %xmm0 178 vmovss %xmm0, (%eax) 179 movl -148(%ebp), %eax 180 vmovss -52(%ebp), %xmm0 181 vmovss %xmm0, (%eax) 182 addl $184, %esp 183 popq %rbx 184 popq %r10 185 .cfi_def_cfa 10, 0 186 popq %r12 187 popq %rbp 188 leal -8(%r10), %esp 189 .cfi_def_cfa 7, 8 190 ret 191#endif 192.endm 193 194ENTRY (_ZGVdN8vvv_sincosf) 195WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf 196END (_ZGVdN8vvv_sincosf) 197 198#ifndef USE_MULTIARCH 199 libmvec_hidden_def (_ZGVdN8vvv_sincosf) 200#endif 201