libfaad2: cosmetics: Make local_changes.diff apply cleanly
[mplayer.git] / mp3lib / dct36_3dnow.c
blob4362d0582b7bc72529f83feff7935bca52e1d25a
1 /*
2 * dct36_3dnow.c - 3DNow! optimized dct36()
4 * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
5 * <squash@mb.kcom.ne.jp>, only two types of changes have been made:
7 * - removed PREFETCH instruction for speedup
8 * - changed function name for support 3DNow! automatic detection
10 * You can find Kashiyama's original 3dnow! support patch
11 * (for mpg123-0.59o) at
12 * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
14 * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
15 * <kim@comtec.co.jp> - after 1.Apr.1999
17 * Modified for use with MPlayer, for details see the changelog at
18 * http://svn.mplayerhq.hu/mplayer/trunk/
19 * $Id$
21 * Original disclaimer:
22 * The author of this program disclaim whole expressed or implied
23 * warranties with regard to this program, and in no event shall the
24 * author of this program liable to whatever resulted from the use of
25 * this program. Use it at your own risk.
27 * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
30 #include "config.h"
31 #include "mangle.h"
32 #include "mpg123.h"
33 #include "ffmpeg_files/x86_cpu.h"
35 #ifdef DCT36_OPTIMIZE_FOR_K7
36 void dct36_3dnowex(real *inbuf, real *o1,
37 real *o2, real *wintab, real *tsbuf)
38 #else
39 void dct36_3dnow(real *inbuf, real *o1,
40 real *o2, real *wintab, real *tsbuf)
41 #endif
43 __asm__ volatile(
44 "movq (%%"REG_a"),%%mm0\n\t"
45 "movq 4(%%"REG_a"),%%mm1\n\t"
46 "pfadd %%mm1,%%mm0\n\t"
47 "movq %%mm0,4(%%"REG_a")\n\t"
48 "psrlq $32,%%mm1\n\t"
49 "movq 12(%%"REG_a"),%%mm2\n\t"
50 "punpckldq %%mm2,%%mm1\n\t"
51 "pfadd %%mm2,%%mm1\n\t"
52 "movq %%mm1,12(%%"REG_a")\n\t"
53 "psrlq $32,%%mm2\n\t"
54 "movq 20(%%"REG_a"),%%mm3\n\t"
55 "punpckldq %%mm3,%%mm2\n\t"
56 "pfadd %%mm3,%%mm2\n\t"
57 "movq %%mm2,20(%%"REG_a")\n\t"
58 "psrlq $32,%%mm3\n\t"
59 "movq 28(%%"REG_a"),%%mm4\n\t"
60 "punpckldq %%mm4,%%mm3\n\t"
61 "pfadd %%mm4,%%mm3\n\t"
62 "movq %%mm3,28(%%"REG_a")\n\t"
63 "psrlq $32,%%mm4\n\t"
64 "movq 36(%%"REG_a"),%%mm5\n\t"
65 "punpckldq %%mm5,%%mm4\n\t"
66 "pfadd %%mm5,%%mm4\n\t"
67 "movq %%mm4,36(%%"REG_a")\n\t"
68 "psrlq $32,%%mm5\n\t"
69 "movq 44(%%"REG_a"),%%mm6\n\t"
70 "punpckldq %%mm6,%%mm5\n\t"
71 "pfadd %%mm6,%%mm5\n\t"
72 "movq %%mm5,44(%%"REG_a")\n\t"
73 "psrlq $32,%%mm6\n\t"
74 "movq 52(%%"REG_a"),%%mm7\n\t"
75 "punpckldq %%mm7,%%mm6\n\t"
76 "pfadd %%mm7,%%mm6\n\t"
77 "movq %%mm6,52(%%"REG_a")\n\t"
78 "psrlq $32,%%mm7\n\t"
79 "movq 60(%%"REG_a"),%%mm0\n\t"
80 "punpckldq %%mm0,%%mm7\n\t"
81 "pfadd %%mm0,%%mm7\n\t"
82 "movq %%mm7,60(%%"REG_a")\n\t"
83 "psrlq $32,%%mm0\n\t"
84 "movd 68(%%"REG_a"),%%mm1\n\t"
85 "pfadd %%mm1,%%mm0\n\t"
86 "movd %%mm0,68(%%"REG_a")\n\t"
87 "movd 4(%%"REG_a"),%%mm0\n\t"
88 "movd 12(%%"REG_a"),%%mm1\n\t"
89 "punpckldq %%mm1,%%mm0\n\t"
90 "punpckldq 20(%%"REG_a"),%%mm1\n\t"
91 "pfadd %%mm1,%%mm0\n\t"
92 "movd %%mm0,12(%%"REG_a")\n\t"
93 "psrlq $32,%%mm0\n\t"
94 "movd %%mm0,20(%%"REG_a")\n\t"
95 "psrlq $32,%%mm1\n\t"
96 "movd 28(%%"REG_a"),%%mm2\n\t"
97 "punpckldq %%mm2,%%mm1\n\t"
98 "punpckldq 36(%%"REG_a"),%%mm2\n\t"
99 "pfadd %%mm2,%%mm1\n\t"
100 "movd %%mm1,28(%%"REG_a")\n\t"
101 "psrlq $32,%%mm1\n\t"
102 "movd %%mm1,36(%%"REG_a")\n\t"
103 "psrlq $32,%%mm2\n\t"
104 "movd 44(%%"REG_a"),%%mm3\n\t"
105 "punpckldq %%mm3,%%mm2\n\t"
106 "punpckldq 52(%%"REG_a"),%%mm3\n\t"
107 "pfadd %%mm3,%%mm2\n\t"
108 "movd %%mm2,44(%%"REG_a")\n\t"
109 "psrlq $32,%%mm2\n\t"
110 "movd %%mm2,52(%%"REG_a")\n\t"
111 "psrlq $32,%%mm3\n\t"
112 "movd 60(%%"REG_a"),%%mm4\n\t"
113 "punpckldq %%mm4,%%mm3\n\t"
114 "punpckldq 68(%%"REG_a"),%%mm4\n\t"
115 "pfadd %%mm4,%%mm3\n\t"
116 "movd %%mm3,60(%%"REG_a")\n\t"
117 "psrlq $32,%%mm3\n\t"
118 "movd %%mm3,68(%%"REG_a")\n\t"
120 "movq 24(%%"REG_a"),%%mm0\n\t"
121 "movq 48(%%"REG_a"),%%mm1\n\t"
122 "movd "MANGLE(COS9)"+12,%%mm2\n\t"
123 "punpckldq %%mm2,%%mm2\n\t"
124 "movd "MANGLE(COS9)"+24,%%mm3\n\t"
125 "punpckldq %%mm3,%%mm3\n\t"
126 "pfmul %%mm2,%%mm0\n\t"
127 "pfmul %%mm3,%%mm1\n\t"
128 "push %%"REG_a"\n\t"
129 "movl $1,%%eax\n\t"
130 "movd %%eax,%%mm7\n\t"
131 "pi2fd %%mm7,%%mm7\n\t"
132 "pop %%"REG_a"\n\t"
133 "movq 8(%%"REG_a"),%%mm2\n\t"
134 "movd "MANGLE(COS9)"+4,%%mm3\n\t"
135 "punpckldq %%mm3,%%mm3\n\t"
136 "pfmul %%mm3,%%mm2\n\t"
137 "pfadd %%mm0,%%mm2\n\t"
138 "movq 40(%%"REG_a"),%%mm3\n\t"
139 "movd "MANGLE(COS9)"+20,%%mm4\n\t"
140 "punpckldq %%mm4,%%mm4\n\t"
141 "pfmul %%mm4,%%mm3\n\t"
142 "pfadd %%mm3,%%mm2\n\t"
143 "movq 56(%%"REG_a"),%%mm3\n\t"
144 "movd "MANGLE(COS9)"+28,%%mm4\n\t"
145 "punpckldq %%mm4,%%mm4\n\t"
146 "pfmul %%mm4,%%mm3\n\t"
147 "pfadd %%mm3,%%mm2\n\t"
148 "movq (%%"REG_a"),%%mm3\n\t"
149 "movq 16(%%"REG_a"),%%mm4\n\t"
150 "movd "MANGLE(COS9)"+8,%%mm5\n\t"
151 "punpckldq %%mm5,%%mm5\n\t"
152 "pfmul %%mm5,%%mm4\n\t"
153 "pfadd %%mm4,%%mm3\n\t"
154 "movq 32(%%"REG_a"),%%mm4\n\t"
155 "movd "MANGLE(COS9)"+16,%%mm5\n\t"
156 "punpckldq %%mm5,%%mm5\n\t"
157 "pfmul %%mm5,%%mm4\n\t"
158 "pfadd %%mm4,%%mm3\n\t"
159 "pfadd %%mm1,%%mm3\n\t"
160 "movq 64(%%"REG_a"),%%mm4\n\t"
161 "movd "MANGLE(COS9)"+32,%%mm5\n\t"
162 "punpckldq %%mm5,%%mm5\n\t"
163 "pfmul %%mm5,%%mm4\n\t"
164 "pfadd %%mm4,%%mm3\n\t"
165 "movq %%mm2,%%mm4\n\t"
166 "pfadd %%mm3,%%mm4\n\t"
167 "movq %%mm7,%%mm5\n\t"
168 "punpckldq "MANGLE(tfcos36)"+0,%%mm5\n\t"
169 "pfmul %%mm5,%%mm4\n\t"
170 "movq %%mm4,%%mm5\n\t"
171 "pfacc %%mm5,%%mm5\n\t"
172 "movd 108(%%"REG_d"),%%mm6\n\t"
173 "punpckldq 104(%%"REG_d"),%%mm6\n\t"
174 "pfmul %%mm6,%%mm5\n\t"
175 #ifdef DCT36_OPTIMIZE_FOR_K7
176 "pswapd %%mm5,%%mm5\n\t"
177 "movq %%mm5,32(%%"REG_c")\n\t"
178 #else
179 "movd %%mm5,36(%%"REG_c")\n\t"
180 "psrlq $32,%%mm5\n\t"
181 "movd %%mm5,32(%%"REG_c")\n\t"
182 #endif
183 "movq %%mm4,%%mm6\n\t"
184 "punpckldq %%mm6,%%mm5\n\t"
185 "pfsub %%mm6,%%mm5\n\t"
186 "punpckhdq %%mm5,%%mm5\n\t"
187 "movd 32(%%"REG_d"),%%mm6\n\t"
188 "punpckldq 36(%%"REG_d"),%%mm6\n\t"
189 "pfmul %%mm6,%%mm5\n\t"
190 "movd 32(%%"REG_S"),%%mm6\n\t"
191 "punpckldq 36(%%"REG_S"),%%mm6\n\t"
192 "pfadd %%mm6,%%mm5\n\t"
193 "movd %%mm5,1024(%%"REG_D")\n\t"
194 "psrlq $32,%%mm5\n\t"
195 "movd %%mm5,1152(%%"REG_D")\n\t"
196 "movq %%mm3,%%mm4\n\t"
197 "pfsub %%mm2,%%mm4\n\t"
198 "movq %%mm7,%%mm5\n\t"
199 "punpckldq "MANGLE(tfcos36)"+32,%%mm5\n\t"
200 "pfmul %%mm5,%%mm4\n\t"
201 "movq %%mm4,%%mm5\n\t"
202 "pfacc %%mm5,%%mm5\n\t"
203 "movd 140(%%"REG_d"),%%mm6\n\t"
204 "punpckldq 72(%%"REG_d"),%%mm6\n\t"
205 "pfmul %%mm6,%%mm5\n\t"
206 "movd %%mm5,68(%%"REG_c")\n\t"
207 "psrlq $32,%%mm5\n\t"
208 "movd %%mm5,0(%%"REG_c")\n\t"
209 "movq %%mm4,%%mm6\n\t"
210 "punpckldq %%mm6,%%mm5\n\t"
211 "pfsub %%mm6,%%mm5\n\t"
212 "punpckhdq %%mm5,%%mm5\n\t"
213 "movd 0(%%"REG_d"),%%mm6\n\t"
214 "punpckldq 68(%%"REG_d"),%%mm6\n\t"
215 "pfmul %%mm6,%%mm5\n\t"
216 "movd 0(%%"REG_S"),%%mm6\n\t"
217 "punpckldq 68(%%"REG_S"),%%mm6\n\t"
218 "pfadd %%mm6,%%mm5\n\t"
219 "movd %%mm5,0(%%"REG_D")\n\t"
220 "psrlq $32,%%mm5\n\t"
221 "movd %%mm5,2176(%%"REG_D")\n\t"
222 "movq 8(%%"REG_a"),%%mm2\n\t"
223 "movq 40(%%"REG_a"),%%mm3\n\t"
224 "pfsub %%mm3,%%mm2\n\t"
225 "movq 56(%%"REG_a"),%%mm3\n\t"
226 "pfsub %%mm3,%%mm2\n\t"
227 "movd "MANGLE(COS9)"+12,%%mm3\n\t"
228 "punpckldq %%mm3,%%mm3\n\t"
229 "pfmul %%mm3,%%mm2\n\t"
230 "movq 16(%%"REG_a"),%%mm3\n\t"
231 "movq 32(%%"REG_a"),%%mm4\n\t"
232 "pfsub %%mm4,%%mm3\n\t"
233 "movq 64(%%"REG_a"),%%mm4\n\t"
234 "pfsub %%mm4,%%mm3\n\t"
235 "movd "MANGLE(COS9)"+24,%%mm4\n\t"
236 "punpckldq %%mm4,%%mm4\n\t"
237 "pfmul %%mm4,%%mm3\n\t"
238 "movq 48(%%"REG_a"),%%mm4\n\t"
239 "pfsub %%mm4,%%mm3\n\t"
240 "movq (%%"REG_a"),%%mm4\n\t"
241 "pfadd %%mm4,%%mm3\n\t"
242 "movq %%mm2,%%mm4\n\t"
243 "pfadd %%mm3,%%mm4\n\t"
244 "movq %%mm7,%%mm5\n\t"
245 "punpckldq "MANGLE(tfcos36)"+4,%%mm5\n\t"
246 "pfmul %%mm5,%%mm4\n\t"
247 "movq %%mm4,%%mm5\n\t"
248 "pfacc %%mm5,%%mm5\n\t"
249 "movd 112(%%"REG_d"),%%mm6\n\t"
250 "punpckldq 100(%%"REG_d"),%%mm6\n\t"
251 "pfmul %%mm6,%%mm5\n\t"
252 "movd %%mm5,40(%%"REG_c")\n\t"
253 "psrlq $32,%%mm5\n\t"
254 "movd %%mm5,28(%%"REG_c")\n\t"
255 "movq %%mm4,%%mm6\n\t"
256 "punpckldq %%mm6,%%mm5\n\t"
257 "pfsub %%mm6,%%mm5\n\t"
258 "punpckhdq %%mm5,%%mm5\n\t"
259 "movd 28(%%"REG_d"),%%mm6\n\t"
260 "punpckldq 40(%%"REG_d"),%%mm6\n\t"
261 "pfmul %%mm6,%%mm5\n\t"
262 "movd 28(%%"REG_S"),%%mm6\n\t"
263 "punpckldq 40(%%"REG_S"),%%mm6\n\t"
264 "pfadd %%mm6,%%mm5\n\t"
265 "movd %%mm5,896(%%"REG_D")\n\t"
266 "psrlq $32,%%mm5\n\t"
267 "movd %%mm5,1280(%%"REG_D")\n\t"
268 "movq %%mm3,%%mm4\n\t"
269 "pfsub %%mm2,%%mm4\n\t"
270 "movq %%mm7,%%mm5\n\t"
271 "punpckldq "MANGLE(tfcos36)"+28,%%mm5\n\t"
272 "pfmul %%mm5,%%mm4\n\t"
273 "movq %%mm4,%%mm5\n\t"
274 "pfacc %%mm5,%%mm5\n\t"
275 "movd 136(%%"REG_d"),%%mm6\n\t"
276 "punpckldq 76(%%"REG_d"),%%mm6\n\t"
277 "pfmul %%mm6,%%mm5\n\t"
278 "movd %%mm5,64(%%"REG_c")\n\t"
279 "psrlq $32,%%mm5\n\t"
280 "movd %%mm5,4(%%"REG_c")\n\t"
281 "movq %%mm4,%%mm6\n\t"
282 "punpckldq %%mm6,%%mm5\n\t"
283 "pfsub %%mm6,%%mm5\n\t"
284 "punpckhdq %%mm5,%%mm5\n\t"
285 "movd 4(%%"REG_d"),%%mm6\n\t"
286 "punpckldq 64(%%"REG_d"),%%mm6\n\t"
287 "pfmul %%mm6,%%mm5\n\t"
288 "movd 4(%%"REG_S"),%%mm6\n\t"
289 "punpckldq 64(%%"REG_S"),%%mm6\n\t"
290 "pfadd %%mm6,%%mm5\n\t"
291 "movd %%mm5,128(%%"REG_D")\n\t"
292 "psrlq $32,%%mm5\n\t"
293 "movd %%mm5,2048(%%"REG_D")\n\t"
295 "movq 8(%%"REG_a"),%%mm2\n\t"
296 "movd "MANGLE(COS9)"+20,%%mm3\n\t"
297 "punpckldq %%mm3,%%mm3\n\t"
298 "pfmul %%mm3,%%mm2\n\t"
299 "pfsub %%mm0,%%mm2\n\t"
300 "movq 40(%%"REG_a"),%%mm3\n\t"
301 "movd "MANGLE(COS9)"+28,%%mm4\n\t"
302 "punpckldq %%mm4,%%mm4\n\t"
303 "pfmul %%mm4,%%mm3\n\t"
304 "pfsub %%mm3,%%mm2\n\t"
305 "movq 56(%%"REG_a"),%%mm3\n\t"
306 "movd "MANGLE(COS9)"+4,%%mm4\n\t"
307 "punpckldq %%mm4,%%mm4\n\t"
308 "pfmul %%mm4,%%mm3\n\t"
309 "pfadd %%mm3,%%mm2\n\t"
310 "movq (%%"REG_a"),%%mm3\n\t"
311 "movq 16(%%"REG_a"),%%mm4\n\t"
312 "movd "MANGLE(COS9)"+32,%%mm5\n\t"
313 "punpckldq %%mm5,%%mm5\n\t"
314 "pfmul %%mm5,%%mm4\n\t"
315 "pfsub %%mm4,%%mm3\n\t"
316 "movq 32(%%"REG_a"),%%mm4\n\t"
317 "movd "MANGLE(COS9)"+8,%%mm5\n\t"
318 "punpckldq %%mm5,%%mm5\n\t"
319 "pfmul %%mm5,%%mm4\n\t"
320 "pfsub %%mm4,%%mm3\n\t"
321 "pfadd %%mm1,%%mm3\n\t"
322 "movq 64(%%"REG_a"),%%mm4\n\t"
323 "movd "MANGLE(COS9)"+16,%%mm5\n\t"
324 "punpckldq %%mm5,%%mm5\n\t"
325 "pfmul %%mm5,%%mm4\n\t"
326 "pfadd %%mm4,%%mm3\n\t"
327 "movq %%mm2,%%mm4\n\t"
328 "pfadd %%mm3,%%mm4\n\t"
329 "movq %%mm7,%%mm5\n\t"
330 "punpckldq "MANGLE(tfcos36)"+8,%%mm5\n\t"
331 "pfmul %%mm5,%%mm4\n\t"
332 "movq %%mm4,%%mm5\n\t"
333 "pfacc %%mm5,%%mm5\n\t"
334 "movd 116(%%"REG_d"),%%mm6\n\t"
335 "punpckldq 96(%%"REG_d"),%%mm6\n\t"
336 "pfmul %%mm6,%%mm5\n\t"
337 "movd %%mm5,44(%%"REG_c")\n\t"
338 "psrlq $32,%%mm5\n\t"
339 "movd %%mm5,24(%%"REG_c")\n\t"
340 "movq %%mm4,%%mm6\n\t"
341 "punpckldq %%mm6,%%mm5\n\t"
342 "pfsub %%mm6,%%mm5\n\t"
343 "punpckhdq %%mm5,%%mm5\n\t"
344 "movd 24(%%"REG_d"),%%mm6\n\t"
345 "punpckldq 44(%%"REG_d"),%%mm6\n\t"
346 "pfmul %%mm6,%%mm5\n\t"
347 "movd 24(%%"REG_S"),%%mm6\n\t"
348 "punpckldq 44(%%"REG_S"),%%mm6\n\t"
349 "pfadd %%mm6,%%mm5\n\t"
350 "movd %%mm5,768(%%"REG_D")\n\t"
351 "psrlq $32,%%mm5\n\t"
352 "movd %%mm5,1408(%%"REG_D")\n\t"
353 "movq %%mm3,%%mm4\n\t"
354 "pfsub %%mm2,%%mm4\n\t"
355 "movq %%mm7,%%mm5\n\t"
356 "punpckldq "MANGLE(tfcos36)"+24,%%mm5\n\t"
357 "pfmul %%mm5,%%mm4\n\t"
358 "movq %%mm4,%%mm5\n\t"
359 "pfacc %%mm5,%%mm5\n\t"
360 "movd 132(%%"REG_d"),%%mm6\n\t"
361 "punpckldq 80(%%"REG_d"),%%mm6\n\t"
362 "pfmul %%mm6,%%mm5\n\t"
363 "movd %%mm5,60(%%"REG_c")\n\t"
364 "psrlq $32,%%mm5\n\t"
365 "movd %%mm5,8(%%"REG_c")\n\t"
366 "movq %%mm4,%%mm6\n\t"
367 "punpckldq %%mm6,%%mm5\n\t"
368 "pfsub %%mm6,%%mm5\n\t"
369 "punpckhdq %%mm5,%%mm5\n\t"
370 "movd 8(%%"REG_d"),%%mm6\n\t"
371 "punpckldq 60(%%"REG_d"),%%mm6\n\t"
372 "pfmul %%mm6,%%mm5\n\t"
373 "movd 8(%%"REG_S"),%%mm6\n\t"
374 "punpckldq 60(%%"REG_S"),%%mm6\n\t"
375 "pfadd %%mm6,%%mm5\n\t"
376 "movd %%mm5,256(%%"REG_D")\n\t"
377 "psrlq $32,%%mm5\n\t"
378 "movd %%mm5,1920(%%"REG_D")\n\t"
379 "movq 8(%%"REG_a"),%%mm2\n\t"
380 "movd "MANGLE(COS9)"+28,%%mm3\n\t"
381 "punpckldq %%mm3,%%mm3\n\t"
382 "pfmul %%mm3,%%mm2\n\t"
383 "pfsub %%mm0,%%mm2\n\t"
384 "movq 40(%%"REG_a"),%%mm3\n\t"
385 "movd "MANGLE(COS9)"+4,%%mm4\n\t"
386 "punpckldq %%mm4,%%mm4\n\t"
387 "pfmul %%mm4,%%mm3\n\t"
388 "pfadd %%mm3,%%mm2\n\t"
389 "movq 56(%%"REG_a"),%%mm3\n\t"
390 "movd "MANGLE(COS9)"+20,%%mm4\n\t"
391 "punpckldq %%mm4,%%mm4\n\t"
392 "pfmul %%mm4,%%mm3\n\t"
393 "pfsub %%mm3,%%mm2\n\t"
394 "movq (%%"REG_a"),%%mm3\n\t"
395 "movq 16(%%"REG_a"),%%mm4\n\t"
396 "movd "MANGLE(COS9)"+16,%%mm5\n\t"
397 "punpckldq %%mm5,%%mm5\n\t"
398 "pfmul %%mm5,%%mm4\n\t"
399 "pfsub %%mm4,%%mm3\n\t"
400 "movq 32(%%"REG_a"),%%mm4\n\t"
401 "movd "MANGLE(COS9)"+32,%%mm5\n\t"
402 "punpckldq %%mm5,%%mm5\n\t"
403 "pfmul %%mm5,%%mm4\n\t"
404 "pfadd %%mm4,%%mm3\n\t"
405 "pfadd %%mm1,%%mm3\n\t"
406 "movq 64(%%"REG_a"),%%mm4\n\t"
407 "movd "MANGLE(COS9)"+8,%%mm5\n\t"
408 "punpckldq %%mm5,%%mm5\n\t"
409 "pfmul %%mm5,%%mm4\n\t"
410 "pfsub %%mm4,%%mm3\n\t"
411 "movq %%mm2,%%mm4\n\t"
412 "pfadd %%mm3,%%mm4\n\t"
413 "movq %%mm7,%%mm5\n\t"
414 "punpckldq "MANGLE(tfcos36)"+12,%%mm5\n\t"
415 "pfmul %%mm5,%%mm4\n\t"
416 "movq %%mm4,%%mm5\n\t"
417 "pfacc %%mm5,%%mm5\n\t"
418 "movd 120(%%"REG_d"),%%mm6\n\t"
419 "punpckldq 92(%%"REG_d"),%%mm6\n\t"
420 "pfmul %%mm6,%%mm5\n\t"
421 "movd %%mm5,48(%%"REG_c")\n\t"
422 "psrlq $32,%%mm5\n\t"
423 "movd %%mm5,20(%%"REG_c")\n\t"
424 "movq %%mm4,%%mm6\n\t"
425 "punpckldq %%mm6,%%mm5\n\t"
426 "pfsub %%mm6,%%mm5\n\t"
427 "punpckhdq %%mm5,%%mm5\n\t"
428 "movd 20(%%"REG_d"),%%mm6\n\t"
429 "punpckldq 48(%%"REG_d"),%%mm6\n\t"
430 "pfmul %%mm6,%%mm5\n\t"
431 "movd 20(%%"REG_S"),%%mm6\n\t"
432 "punpckldq 48(%%"REG_S"),%%mm6\n\t"
433 "pfadd %%mm6,%%mm5\n\t"
434 "movd %%mm5,640(%%"REG_D")\n\t"
435 "psrlq $32,%%mm5\n\t"
436 "movd %%mm5,1536(%%"REG_D")\n\t"
437 "movq %%mm3,%%mm4\n\t"
438 "pfsub %%mm2,%%mm4\n\t"
439 "movq %%mm7,%%mm5\n\t"
440 "punpckldq "MANGLE(tfcos36)"+20,%%mm5\n\t"
441 "pfmul %%mm5,%%mm4\n\t"
442 "movq %%mm4,%%mm5\n\t"
443 "pfacc %%mm5,%%mm5\n\t"
444 "movd 128(%%"REG_d"),%%mm6\n\t"
445 "punpckldq 84(%%"REG_d"),%%mm6\n\t"
446 "pfmul %%mm6,%%mm5\n\t"
447 "movd %%mm5,56(%%"REG_c")\n\t"
448 "psrlq $32,%%mm5\n\t"
449 "movd %%mm5,12(%%"REG_c")\n\t"
450 "movq %%mm4,%%mm6\n\t"
451 "punpckldq %%mm6,%%mm5\n\t"
452 "pfsub %%mm6,%%mm5\n\t"
453 "punpckhdq %%mm5,%%mm5\n\t"
454 "movd 12(%%"REG_d"),%%mm6\n\t"
455 "punpckldq 56(%%"REG_d"),%%mm6\n\t"
456 "pfmul %%mm6,%%mm5\n\t"
457 "movd 12(%%"REG_S"),%%mm6\n\t"
458 "punpckldq 56(%%"REG_S"),%%mm6\n\t"
459 "pfadd %%mm6,%%mm5\n\t"
460 "movd %%mm5,384(%%"REG_D")\n\t"
461 "psrlq $32,%%mm5\n\t"
462 "movd %%mm5,1792(%%"REG_D")\n\t"
464 "movq (%%"REG_a"),%%mm4\n\t"
465 "movq 16(%%"REG_a"),%%mm3\n\t"
466 "pfsub %%mm3,%%mm4\n\t"
467 "movq 32(%%"REG_a"),%%mm3\n\t"
468 "pfadd %%mm3,%%mm4\n\t"
469 "movq 48(%%"REG_a"),%%mm3\n\t"
470 "pfsub %%mm3,%%mm4\n\t"
471 "movq 64(%%"REG_a"),%%mm3\n\t"
472 "pfadd %%mm3,%%mm4\n\t"
473 "movq %%mm7,%%mm5\n\t"
474 "punpckldq "MANGLE(tfcos36)"+16,%%mm5\n\t"
475 "pfmul %%mm5,%%mm4\n\t"
476 "movq %%mm4,%%mm5\n\t"
477 "pfacc %%mm5,%%mm5\n\t"
478 "movd 124(%%"REG_d"),%%mm6\n\t"
479 "punpckldq 88(%%"REG_d"),%%mm6\n\t"
480 "pfmul %%mm6,%%mm5\n\t"
481 "movd %%mm5,52(%%"REG_c")\n\t"
482 "psrlq $32,%%mm5\n\t"
483 "movd %%mm5,16(%%"REG_c")\n\t"
484 "movq %%mm4,%%mm6\n\t"
485 "punpckldq %%mm6,%%mm5\n\t"
486 "pfsub %%mm6,%%mm5\n\t"
487 "punpckhdq %%mm5,%%mm5\n\t"
488 "movd 16(%%"REG_d"),%%mm6\n\t"
489 "punpckldq 52(%%"REG_d"),%%mm6\n\t"
490 "pfmul %%mm6,%%mm5\n\t"
491 "movd 16(%%"REG_S"),%%mm6\n\t"
492 "punpckldq 52(%%"REG_S"),%%mm6\n\t"
493 "pfadd %%mm6,%%mm5\n\t"
494 "movd %%mm5,512(%%"REG_D")\n\t"
495 "psrlq $32,%%mm5\n\t"
496 "movd %%mm5,1664(%%"REG_D")\n\t"
498 "femms\n\t"
500 : "a" (inbuf), "S" (o1), "c" (o2), "d" (wintab), "D" (tsbuf)
501 : "memory");