1 /* Function sincosf vectorized with AVX2, wrapper version.
2 Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
20 #include "svml_s_wrapper_impl.h"
22 .section .text.avx2, "ax", @progbits
23 ENTRY (_ZGVdN8vl4l4_sincosf)
24 WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
25 END (_ZGVdN8vl4l4_sincosf)
26 libmvec_hidden_def (_ZGVdN8vl4l4_sincosf)
28 /* AVX2 ISA version as wrapper to SSE ISA version (for vector
29 function declared with #pragma omp declare simd notinbranch). */
30 .macro WRAPPER_IMPL_AVX2_fFF_vvv callee
33 cfi_adjust_cfa_offset (8)
34 cfi_rel_offset (%rbp, 0)
36 cfi_def_cfa_register (%rbp)
39 vmovups %ymm0, 192(%rsp)
41 vmovdqu %ymm1, 64(%rdi)
42 vmovdqu %ymm2, 96(%rdi)
43 vmovdqu %ymm3, 128(%rdi)
44 vmovdqu %ymm4, 160(%rdi)
47 call HIDDEN_JUMPTARGET(\callee)
48 vmovups 208(%rsp), %xmm0
51 call HIDDEN_JUMPTARGET(\callee)
101 cfi_def_cfa_register (%rsp)
103 cfi_adjust_cfa_offset (-8)
112 .cfi_escape 0x10,0x6,0x2,0x76,0
117 .cfi_escape 0xf,0x3,0x76,0x70,0x6
118 .cfi_escape 0x10,0xc,0x2,0x76,0x78
119 leal -112(%rbp), %edi
122 .cfi_escape 0x10,0x3,0x2,0x76,0x68
125 vmovdqa %ymm1, -144(%ebp)
126 vmovdqa %ymm2, -176(%ebp)
127 vmovaps %ymm0, -208(%ebp)
129 call HIDDEN_JUMPTARGET(\callee)
131 vmovups -192(%ebp), %xmm0
133 call HIDDEN_JUMPTARGET(\callee)
134 movl -144(%ebp), %eax
135 vmovss -112(%ebp), %xmm0
137 movl -140(%ebp), %eax
138 vmovss -108(%ebp), %xmm0
140 movl -136(%ebp), %eax
141 vmovss -104(%ebp), %xmm0
143 movl -132(%ebp), %eax
144 vmovss -100(%ebp), %xmm0
146 movl -128(%ebp), %eax
147 vmovss -96(%ebp), %xmm0
149 movl -124(%ebp), %eax
150 vmovss -92(%ebp), %xmm0
152 movl -120(%ebp), %eax
153 vmovss -88(%ebp), %xmm0
155 movl -116(%ebp), %eax
156 vmovss -84(%ebp), %xmm0
158 movl -176(%ebp), %eax
159 vmovss -80(%ebp), %xmm0
161 movl -172(%ebp), %eax
162 vmovss -76(%ebp), %xmm0
164 movl -168(%ebp), %eax
165 vmovss -72(%ebp), %xmm0
167 movl -164(%ebp), %eax
168 vmovss -68(%ebp), %xmm0
170 movl -160(%ebp), %eax
171 vmovss -64(%ebp), %xmm0
173 movl -156(%ebp), %eax
174 vmovss -60(%ebp), %xmm0
176 movl -152(%ebp), %eax
177 vmovss -56(%ebp), %xmm0
179 movl -148(%ebp), %eax
180 vmovss -52(%ebp), %xmm0
194 ENTRY (_ZGVdN8vvv_sincosf)
195 WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf
196 END (_ZGVdN8vvv_sincosf)
198 #ifndef USE_MULTIARCH
199 libmvec_hidden_def (_ZGVdN8vvv_sincosf)