1/* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
2   Copyright (C) 2014-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "svml_s_wrapper_impl.h"
21
22        .text
23ENTRY (_ZGVcN8vl4l4_sincosf)
24WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
25END (_ZGVcN8vl4l4_sincosf)
26
27/* AVX ISA version as wrapper to SSE ISA version (for vector
28   function declared with #pragma omp declare simd notinbranch).  */
29.macro WRAPPER_IMPL_AVX_fFF_vvv callee
30#ifndef __ILP32__
31        pushq     %rbp
32        movq      %rsp, %rbp
33        andq      $-32, %rsp
34        subq      $224, %rsp
35        vmovups   %ymm0, 64(%rsp)
36        lea       (%rsp), %rdi
37        vmovdqu   %xmm1, 96(%rdi)
38        vmovdqu   %xmm2, 112(%rdi)
39        vmovdqu   %xmm3, 128(%rdi)
40        vmovdqu   %xmm4, 144(%rdi)
41        vmovdqu   %xmm5, 160(%rdi)
42        lea       32(%rsp), %rsi
43        vmovdqu   %xmm6, 144(%rsi)
44        vmovdqu   %xmm7, 160(%rsi)
45        vzeroupper
46        call      HIDDEN_JUMPTARGET(\callee)
47        vmovdqu   80(%rsp), %xmm0
48        lea       16(%rsp), %rdi
49        lea       48(%rsp), %rsi
50        call      HIDDEN_JUMPTARGET(\callee)
51        movq      96(%rsp), %rdx
52        movq      104(%rsp), %rsi
53        movq      112(%rsp), %r8
54        movq      120(%rsp), %r10
55        movl      (%rsp), %eax
56        movl      4(%rsp), %ecx
57        movl      8(%rsp), %edi
58        movl      12(%rsp), %r9d
59        movl      %eax, (%rdx)
60        movl      %ecx, (%rsi)
61        movq      128(%rsp), %rax
62        movq      136(%rsp), %rcx
63        movl      %edi, (%r8)
64        movl      %r9d, (%r10)
65        movq      144(%rsp), %rdi
66        movq      152(%rsp), %r9
67        movl      16(%rsp), %r11d
68        movl      20(%rsp), %edx
69        movl      24(%rsp), %esi
70        movl      28(%rsp), %r8d
71        movl      %r11d, (%rax)
72        movl      %edx, (%rcx)
73        movq      160(%rsp), %r11
74        movq      168(%rsp), %rdx
75        movl      %esi, (%rdi)
76        movl      %r8d, (%r9)
77        movq      176(%rsp), %rsi
78        movq      184(%rsp), %r8
79        movl      32(%rsp), %r10d
80        movl      36(%rsp), %eax
81        movl      40(%rsp), %ecx
82        movl      44(%rsp), %edi
83        movl      %r10d, (%r11)
84        movl      %eax, (%rdx)
85        movq      192(%rsp), %r10
86        movq      200(%rsp), %rax
87        movl      %ecx, (%rsi)
88        movl      %edi, (%r8)
89        movq      16(%rbp), %rcx
90        movq      24(%rbp), %rdi
91        movl      48(%rsp), %r9d
92        movl      52(%rsp), %r11d
93        movl      56(%rsp), %edx
94        movl      60(%rsp), %esi
95        movl      %r9d, (%r10)
96        movl      %r11d, (%rax)
97        movl      %edx, (%rcx)
98        movl      %esi, (%rdi)
99        movq      %rbp, %rsp
100        popq      %rbp
101        ret
102#else
103        leal    8(%rsp), %r10d
104        .cfi_def_cfa 10, 0
105        andl    $-32, %esp
106        pushq   -8(%r10d)
107        pushq   %rbp
108        .cfi_escape 0x10,0x6,0x2,0x76,0
109        movl    %esp, %ebp
110        pushq   %r12
111        leal    -80(%rbp), %esi
112        pushq   %r10
113        .cfi_escape 0xf,0x3,0x76,0x70,0x6
114        .cfi_escape 0x10,0xc,0x2,0x76,0x78
115        leal    -112(%rbp), %edi
116        movq    %rsi, %r12
117        pushq   %rbx
118        .cfi_escape 0x10,0x3,0x2,0x76,0x68
119        movq    %rdi, %rbx
120        subl    $184, %esp
121        vmovaps %xmm1, -128(%ebp)
122        vmovaps %xmm2, -144(%ebp)
123        vmovaps %xmm3, -160(%ebp)
124        vmovaps %xmm4, -176(%ebp)
125        vmovaps %ymm0, -208(%ebp)
126        vzeroupper
127        call    HIDDEN_JUMPTARGET(\callee)
128        leal    16(%r12), %esi
129        vmovups -192(%ebp), %xmm0
130        leal    16(%rbx), %edi
131        call    HIDDEN_JUMPTARGET(\callee)
132        movq    -128(%ebp), %rax
133        vmovss  -112(%ebp), %xmm0
134        vmovdqa -128(%ebp), %xmm7
135        vmovdqa -144(%ebp), %xmm3
136        vmovss  %xmm0, (%eax)
137        vmovss  -108(%ebp), %xmm0
138        vpextrd $1, %xmm7, %eax
139        vmovss  %xmm0, (%eax)
140        movq    -120(%ebp), %rax
141        vmovss  -104(%ebp), %xmm0
142        vmovss  %xmm0, (%eax)
143        vmovss  -100(%ebp), %xmm0
144        vpextrd $3, %xmm7, %eax
145        vmovdqa -160(%ebp), %xmm7
146        vmovss  %xmm0, (%eax)
147        movq    -144(%ebp), %rax
148        vmovss  -96(%ebp), %xmm0
149        vmovss  %xmm0, (%eax)
150        vmovss  -92(%ebp), %xmm0
151        vpextrd $1, %xmm3, %eax
152        vmovss  %xmm0, (%eax)
153        movq    -136(%ebp), %rax
154        vmovss  -88(%ebp), %xmm0
155        vmovss  %xmm0, (%eax)
156        vmovss  -84(%ebp), %xmm0
157        vpextrd $3, %xmm3, %eax
158        vmovss  %xmm0, (%eax)
159        movq    -160(%ebp), %rax
160        vmovss  -80(%ebp), %xmm0
161        vmovss  %xmm0, (%eax)
162        vmovss  -76(%ebp), %xmm0
163        vpextrd $1, %xmm7, %eax
164        vmovss  %xmm0, (%eax)
165        movq    -152(%ebp), %rax
166        vmovss  -72(%ebp), %xmm0
167        vmovss  %xmm0, (%eax)
168        vmovss  -68(%ebp), %xmm0
169        vpextrd $3, %xmm7, %eax
170        vmovss  %xmm0, (%eax)
171        movq    -176(%ebp), %rax
172        vmovss  -64(%ebp), %xmm0
173        vmovdqa -176(%ebp), %xmm3
174        vmovss  %xmm0, (%eax)
175        vmovss  -60(%ebp), %xmm0
176        vpextrd $1, %xmm3, %eax
177        vmovss  %xmm0, (%eax)
178        movq    -168(%ebp), %rax
179        vmovss  -56(%ebp), %xmm0
180        vmovss  %xmm0, (%eax)
181        vmovss  -52(%ebp), %xmm0
182        vpextrd $3, %xmm3, %eax
183        vmovss  %xmm0, (%eax)
184        addl    $184, %esp
185        popq    %rbx
186        popq    %r10
187        .cfi_def_cfa 10, 0
188        popq    %r12
189        popq    %rbp
190        leal    -8(%r10), %esp
191        .cfi_def_cfa 7, 8
192        ret
193#endif
194.endm
195
196ENTRY (_ZGVcN8vvv_sincosf)
197WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf
198END (_ZGVcN8vvv_sincosf)
199