1/* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.
2   Copyright (C) 2014-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22	.text
23ENTRY (_ZGVcN4vl8l8_sincos)
24WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
25END (_ZGVcN4vl8l8_sincos)
26
27/* AVX ISA version as wrapper to SSE ISA version (for vector
28   function declared with #pragma omp declare simd notinbranch).  */
29.macro WRAPPER_IMPL_AVX_fFF_vvv callee
30#ifndef __ILP32__
31        pushq     %rbp
32        movq      %rsp, %rbp
33        andq      $-32, %rsp
34        subq      $160, %rsp
35        vmovupd   %ymm0, 64(%rsp)
36        lea       (%rsp), %rdi
37        vmovdqu   %xmm1, 96(%rdi)
38        vmovdqu   %xmm2, 112(%rdi)
39        vmovdqu   %xmm3, 128(%rdi)
40        vmovdqu   %xmm4, 144(%rdi)
41        lea       32(%rsp), %rsi
42	vzeroupper
43        call      HIDDEN_JUMPTARGET(\callee)
44        vmovdqu   80(%rsp), %xmm0
45        lea       16(%rsp), %rdi
46        lea       48(%rsp), %rsi
47        call      HIDDEN_JUMPTARGET(\callee)
48        movq      96(%rsp), %rdx
49        movq      104(%rsp), %rsi
50        movq      112(%rsp), %r8
51        movq      120(%rsp), %r10
52        movq      (%rsp), %rax
53        movq      8(%rsp), %rcx
54        movq      16(%rsp), %rdi
55        movq      24(%rsp), %r9
56        movq      %rax, (%rdx)
57        movq      %rcx, (%rsi)
58        movq      128(%rsp), %rax
59        movq      136(%rsp), %rcx
60        movq      %rdi, (%r8)
61        movq      %r9, (%r10)
62        movq      144(%rsp), %rdi
63        movq      152(%rsp), %r9
64        movq      32(%rsp), %r11
65        movq      40(%rsp), %rdx
66        movq      48(%rsp), %rsi
67        movq      56(%rsp), %r8
68        movq      %r11, (%rax)
69        movq      %rdx, (%rcx)
70        movq      %rsi, (%rdi)
71        movq      %r8, (%r9)
72        movq      %rbp, %rsp
73        popq      %rbp
74        ret
75#else
76        leal    8(%rsp), %r10d
77        .cfi_def_cfa 10, 0
78        andl    $-32, %esp
79        pushq   -8(%r10d)
80        pushq   %rbp
81        .cfi_escape 0x10,0x6,0x2,0x76,0
82        movl    %esp, %ebp
83        pushq   %r12
84        leal    -80(%rbp), %esi
85        pushq   %r10
86        .cfi_escape 0xf,0x3,0x76,0x70,0x6
87        .cfi_escape 0x10,0xc,0x2,0x76,0x78
88        leal    -112(%rbp), %edi
89        movq    %rsi, %r12
90        pushq   %rbx
91        .cfi_escape 0x10,0x3,0x2,0x76,0x68
92        movq    %rdi, %rbx
93        subl    $152, %esp
94        vmovaps %xmm1, -128(%ebp)
95        vmovaps %xmm2, -144(%ebp)
96        vmovapd %ymm0, -176(%ebp)
97        vzeroupper
98        call    HIDDEN_JUMPTARGET(\callee)
99        leal    16(%r12), %esi
100        vmovupd -160(%ebp), %xmm0
101        leal    16(%rbx), %edi
102        call    HIDDEN_JUMPTARGET(\callee)
103        movq    -128(%ebp), %rax
104        vmovsd  -112(%ebp), %xmm0
105        vmovdqa -128(%ebp), %xmm5
106        vmovdqa -144(%ebp), %xmm1
107        vmovsd  %xmm0, (%eax)
108        vmovsd  -104(%ebp), %xmm0
109        vpextrd $1, %xmm5, %eax
110        vmovsd  %xmm0, (%eax)
111        movq    -120(%ebp), %rax
112        vmovsd  -96(%ebp), %xmm0
113        vmovsd  %xmm0, (%eax)
114        vmovsd  -88(%ebp), %xmm0
115        vpextrd $3, %xmm5, %eax
116        vmovsd  %xmm0, (%eax)
117        movq    -144(%ebp), %rax
118        vmovsd  -80(%ebp), %xmm0
119        vmovsd  %xmm0, (%eax)
120        vmovsd  -72(%ebp), %xmm0
121        vpextrd $1, %xmm1, %eax
122        vmovsd  %xmm0, (%eax)
123        movq    -136(%ebp), %rax
124        vmovsd  -64(%ebp), %xmm0
125        vmovsd  %xmm0, (%eax)
126        vmovsd  -56(%ebp), %xmm0
127        vpextrd $3, %xmm1, %eax
128        vmovsd  %xmm0, (%eax)
129        addl    $152, %esp
130        popq    %rbx
131        popq    %r10
132        .cfi_def_cfa 10, 0
133        popq    %r12
134        popq    %rbp
135        leal    -8(%r10), %esp
136        .cfi_def_cfa 7, 8
137        ret
138#endif
139.endm
140
141ENTRY (_ZGVcN4vvv_sincos)
142WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
143END (_ZGVcN4vvv_sincos)
144