clarify the purpose of this project
[nyanglibc.git] / mathvec / svml_d_sincos4_core.s
blob7da7182f03a0e2cc0b8e0b6b0d6619d67c6fc442
1 .macro WRAPPER_IMPL_SSE2 callee
2 subq $40, %rsp
3 movaps %xmm0, (%rsp)
4 call \callee
5 movsd %xmm0, 16(%rsp)
6 movsd 8(%rsp), %xmm0
7 call \callee
8 movsd 16(%rsp), %xmm1
9 movsd %xmm0, 24(%rsp)
10 unpcklpd %xmm0, %xmm1
11 movaps %xmm1, %xmm0
12 addq $40, %rsp
13 ret
14 .endm
15 .macro WRAPPER_IMPL_SSE2_ff callee
16 subq $56, %rsp
17 movaps %xmm0, (%rsp)
18 movaps %xmm1, 16(%rsp)
19 call \callee
20 movsd %xmm0, 32(%rsp)
21 movsd 8(%rsp), %xmm0
22 movsd 24(%rsp), %xmm1
23 call \callee
24 movsd 32(%rsp), %xmm1
25 movsd %xmm0, 40(%rsp)
26 unpcklpd %xmm0, %xmm1
27 movaps %xmm1, %xmm0
28 addq $56, %rsp
29 ret
30 .endm
31 .macro WRAPPER_IMPL_SSE2_fFF callee
32 pushq %rbp
33 pushq %rbx
34 movq %rdi, %rbp
35 movq %rsi, %rbx
36 subq $40, %rsp
37 leaq 16(%rsp), %rsi
38 leaq 24(%rsp), %rdi
39 movaps %xmm0, (%rsp)
40 call \callee
41 leaq 16(%rsp), %rsi
42 leaq 24(%rsp), %rdi
43 movsd 24(%rsp), %xmm0
44 movapd (%rsp), %xmm1
45 movsd %xmm0, 0(%rbp)
46 unpckhpd %xmm1, %xmm1
47 movsd 16(%rsp), %xmm0
48 movsd %xmm0, (%rbx)
49 movapd %xmm1, %xmm0
50 call \callee
51 movsd 24(%rsp), %xmm0
52 movsd %xmm0, 8(%rbp)
53 movsd 16(%rsp), %xmm0
54 movsd %xmm0, 8(%rbx)
55 addq $40, %rsp
56 popq %rbx
57 popq %rbp
58 ret
59 .endm
60 .macro WRAPPER_IMPL_AVX callee
61 pushq %rbp
62 movq %rsp, %rbp
63 andq $-32, %rsp
64 subq $32, %rsp
65 vextractf128 $1, %ymm0, (%rsp)
66 vzeroupper
67 call \callee
68 vmovapd %xmm0, 16(%rsp)
69 vmovaps (%rsp), %xmm0
70 call \callee
71 vmovapd %xmm0, %xmm1
72 vmovapd 16(%rsp), %xmm0
73 vinsertf128 $1, %xmm1, %ymm0, %ymm0
74 movq %rbp, %rsp
75 popq %rbp
76 ret
77 .endm
78 .macro WRAPPER_IMPL_AVX_ff callee
79 pushq %rbp
80 movq %rsp, %rbp
81 andq $-32, %rsp
82 subq $64, %rsp
83 vextractf128 $1, %ymm0, 16(%rsp)
84 vextractf128 $1, %ymm1, (%rsp)
85 vzeroupper
86 call \callee
87 vmovaps %xmm0, 32(%rsp)
88 vmovaps 16(%rsp), %xmm0
89 vmovaps (%rsp), %xmm1
90 call \callee
91 vmovaps %xmm0, %xmm1
92 vmovaps 32(%rsp), %xmm0
93 vinsertf128 $1, %xmm1, %ymm0, %ymm0
94 movq %rbp, %rsp
95 popq %rbp
96 ret
97 .endm
98 .macro WRAPPER_IMPL_AVX_fFF callee
99 pushq %rbp
100 movq %rsp, %rbp
101 andq $-32, %rsp
102 pushq %r13
103 pushq %r14
104 subq $48, %rsp
105 movq %rsi, %r14
106 movq %rdi, %r13
107 vextractf128 $1, %ymm0, 32(%rsp)
108 vzeroupper
109 call \callee
110 vmovaps 32(%rsp), %xmm0
111 lea (%rsp), %rdi
112 lea 16(%rsp), %rsi
113 call \callee
114 vmovapd (%rsp), %xmm0
115 vmovapd 16(%rsp), %xmm1
116 vmovapd %xmm0, 16(%r13)
117 vmovapd %xmm1, 16(%r14)
118 addq $48, %rsp
119 popq %r14
120 popq %r13
121 movq %rbp, %rsp
122 popq %rbp
124 .endm
125 .macro WRAPPER_IMPL_AVX512 callee
126 pushq %rbp
127 movq %rsp, %rbp
128 andq $-64, %rsp
129 subq $128, %rsp
130 vmovups %zmm0, (%rsp)
131 vmovupd (%rsp), %ymm0
132 call \callee
133 vmovupd %ymm0, 64(%rsp)
134 vmovupd 32(%rsp), %ymm0
135 call \callee
136 vmovupd %ymm0, 96(%rsp)
137 vmovups 64(%rsp), %zmm0
138 movq %rbp, %rsp
139 popq %rbp
141 .endm
142 .macro WRAPPER_IMPL_AVX512_ff callee
143 pushq %rbp
144 movq %rsp, %rbp
145 andq $-64, %rsp
146 subq $192, %rsp
147 vmovups %zmm0, (%rsp)
148 vmovups %zmm1, 64(%rsp)
149 vmovupd (%rsp), %ymm0
150 vmovupd 64(%rsp), %ymm1
151 call \callee
152 vmovupd %ymm0, 128(%rsp)
153 vmovupd 32(%rsp), %ymm0
154 vmovupd 96(%rsp), %ymm1
155 call \callee
156 vmovupd %ymm0, 160(%rsp)
157 vmovups 128(%rsp), %zmm0
158 movq %rbp, %rsp
159 popq %rbp
161 .endm
162 .macro WRAPPER_IMPL_AVX512_fFF callee
163 pushq %rbp
164 movq %rsp, %rbp
165 andq $-64, %rsp
166 pushq %r12
167 pushq %r13
168 subq $176, %rsp
169 movq %rsi, %r13
170 vmovups %zmm0, (%rsp)
171 movq %rdi, %r12
172 vmovupd (%rsp), %ymm0
173 call \callee
174 vmovupd 32(%rsp), %ymm0
175 lea 64(%rsp), %rdi
176 lea 96(%rsp), %rsi
177 call \callee
178 vmovupd 64(%rsp), %ymm0
179 vmovupd 96(%rsp), %ymm1
180 vmovupd %ymm0, 32(%r12)
181 vmovupd %ymm1, 32(%r13)
182 vzeroupper
183 addq $176, %rsp
184 popq %r13
185 popq %r12
186 movq %rbp, %rsp
187 popq %rbp
189 .endm
190 .text
191 .globl _ZGVdN4vl8l8_sincos
192 .type _ZGVdN4vl8l8_sincos,@function
193 .align 1<<4
194 _ZGVdN4vl8l8_sincos:
197 WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
198 .size _ZGVdN4vl8l8_sincos,.-_ZGVdN4vl8l8_sincos
201 .macro WRAPPER_IMPL_AVX2_fFF_vvv callee
202 pushq %rbp
203 movq %rsp, %rbp
204 andq $-32, %rsp
205 subq $160, %rsp
206 vmovupd %ymm0, 128(%rsp)
207 lea (%rsp), %rdi
208 vmovdqu %ymm1, 64(%rdi)
209 vmovdqu %ymm2, 96(%rdi)
210 lea 32(%rsp), %rsi
211 vzeroupper
212 call \callee
213 vmovupd 144(%rsp), %xmm0
214 lea 16(%rsp), %rdi
215 lea 48(%rsp), %rsi
216 call \callee
217 movq 64(%rsp), %rdx
218 movq 96(%rsp), %rsi
219 movq 72(%rsp), %r8
220 movq 104(%rsp), %r10
221 movq (%rsp), %rax
222 movq 32(%rsp), %rcx
223 movq 8(%rsp), %rdi
224 movq 40(%rsp), %r9
225 movq %rax, (%rdx)
226 movq %rcx, (%rsi)
227 movq 80(%rsp), %rax
228 movq 112(%rsp), %rcx
229 movq %rdi, (%r8)
230 movq %r9, (%r10)
231 movq 88(%rsp), %rdi
232 movq 120(%rsp), %r9
233 movq 16(%rsp), %r11
234 movq 48(%rsp), %rdx
235 movq 24(%rsp), %rsi
236 movq 56(%rsp), %r8
237 movq %r11, (%rax)
238 movq %rdx, (%rcx)
239 movq %rsi, (%rdi)
240 movq %r8, (%r9)
241 movq %rbp, %rsp
242 popq %rbp
244 .endm
245 .globl _ZGVdN4vvv_sincos
246 .type _ZGVdN4vvv_sincos,@function
247 .align 1<<4
248 _ZGVdN4vvv_sincos:
251 WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
252 .size _ZGVdN4vvv_sincos,.-_ZGVdN4vvv_sincos