vo_glamo: sub.h was moved to sub directory in c9026cb3210205b07e2e068467a18ee40f9259a3
[mplayer/glamo.git] / mp3lib / dct64_3dnow.c
blob7cd8603941fe480a5132f3db566c4e3f6f0f2837
1 /*
2 * This code was taken from http://www.mpg123.org
3 * See ChangeLog of mpg123-0.59s-pre.1 for detail
4 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
5 * Partial 3dnow! optimization by Nick Kurshev
7 * TODO: optimize scalar 3dnow! code
8 * Warning: Phases 7 & 8 are not tested
9 */
11 #include "config.h"
12 #include "mangle.h"
13 #include "mpg123.h"
14 #include "ffmpeg_files/x86_cpu.h"
16 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL;
17 static float attribute_used plus_1f = 1.0;
19 void dct64_MMX_3dnow(short *a,short *b,real *c)
21 char tmp[256];
22 __asm__ volatile(
23 " mov %2,%%"REG_a"\n\t"
25 " lea 128+%3,%%"REG_d"\n\t"
26 " mov %0,%%"REG_S"\n\t"
27 " mov %1,%%"REG_D"\n\t"
28 " mov $"MANGLE(costab_mmx)",%%"REG_b"\n\t"
29 " lea %3,%%"REG_c"\n\t"
31 /* Phase 1*/
32 " movq (%%"REG_a"), %%mm0\n\t"
33 " movq 8(%%"REG_a"), %%mm4\n\t"
34 " movq %%mm0, %%mm3\n\t"
35 " movq %%mm4, %%mm7\n\t"
36 " movq 120(%%"REG_a"), %%mm1\n\t"
37 " movq 112(%%"REG_a"), %%mm5\n\t"
38 /* n.b.: pswapd*/
39 " movq %%mm1, %%mm2\n\t"
40 " movq %%mm5, %%mm6\n\t"
41 " psrlq $32, %%mm1\n\t"
42 " psrlq $32, %%mm5\n\t"
43 " punpckldq %%mm2, %%mm1\n\t"
44 " punpckldq %%mm6, %%mm5\n\t"
45 /**/
46 " pfadd %%mm1, %%mm0\n\t"
47 " pfadd %%mm5, %%mm4\n\t"
48 " movq %%mm0, (%%"REG_d")\n\t"
49 " movq %%mm4, 8(%%"REG_d")\n\t"
50 " pfsub %%mm1, %%mm3\n\t"
51 " pfsub %%mm5, %%mm7\n\t"
52 " pfmul (%%"REG_b"), %%mm3\n\t"
53 " pfmul 8(%%"REG_b"), %%mm7\n\t"
54 " movd %%mm3, 124(%%"REG_d")\n\t"
55 " movd %%mm7, 116(%%"REG_d")\n\t"
56 " psrlq $32, %%mm3\n\t"
57 " psrlq $32, %%mm7\n\t"
58 " movd %%mm3, 120(%%"REG_d")\n\t"
59 " movd %%mm7, 112(%%"REG_d")\n\t"
61 " movq 16(%%"REG_a"), %%mm0\n\t"
62 " movq 24(%%"REG_a"), %%mm4\n\t"
63 " movq %%mm0, %%mm3\n\t"
64 " movq %%mm4, %%mm7\n\t"
65 " movq 104(%%"REG_a"), %%mm1\n\t"
66 " movq 96(%%"REG_a"), %%mm5\n\t"
67 /* n.b.: pswapd*/
68 " movq %%mm1, %%mm2\n\t"
69 " movq %%mm5, %%mm6\n\t"
70 " psrlq $32, %%mm1\n\t"
71 " psrlq $32, %%mm5\n\t"
72 " punpckldq %%mm2, %%mm1\n\t"
73 " punpckldq %%mm6, %%mm5\n\t"
74 /**/
75 " pfadd %%mm1, %%mm0\n\t"
76 " pfadd %%mm5, %%mm4\n\t"
77 " movq %%mm0, 16(%%"REG_d")\n\t"
78 " movq %%mm4, 24(%%"REG_d")\n\t"
79 " pfsub %%mm1, %%mm3\n\t"
80 " pfsub %%mm5, %%mm7\n\t"
81 " pfmul 16(%%"REG_b"), %%mm3\n\t"
82 " pfmul 24(%%"REG_b"), %%mm7\n\t"
83 " movd %%mm3, 108(%%"REG_d")\n\t"
84 " movd %%mm7, 100(%%"REG_d")\n\t"
85 " psrlq $32, %%mm3\n\t"
86 " psrlq $32, %%mm7\n\t"
87 " movd %%mm3, 104(%%"REG_d")\n\t"
88 " movd %%mm7, 96(%%"REG_d")\n\t"
90 " movq 32(%%"REG_a"), %%mm0\n\t"
91 " movq 40(%%"REG_a"), %%mm4\n\t"
92 " movq %%mm0, %%mm3\n\t"
93 " movq %%mm4, %%mm7\n\t"
94 " movq 88(%%"REG_a"), %%mm1\n\t"
95 " movq 80(%%"REG_a"), %%mm5\n\t"
96 /* n.b.: pswapd*/
97 " movq %%mm1, %%mm2\n\t"
98 " movq %%mm5, %%mm6\n\t"
99 " psrlq $32, %%mm1\n\t"
100 " psrlq $32, %%mm5\n\t"
101 " punpckldq %%mm2, %%mm1\n\t"
102 " punpckldq %%mm6, %%mm5\n\t"
103 /**/
104 " pfadd %%mm1, %%mm0\n\t"
105 " pfadd %%mm5, %%mm4\n\t"
106 " movq %%mm0, 32(%%"REG_d")\n\t"
107 " movq %%mm4, 40(%%"REG_d")\n\t"
108 " pfsub %%mm1, %%mm3\n\t"
109 " pfsub %%mm5, %%mm7\n\t"
110 " pfmul 32(%%"REG_b"), %%mm3\n\t"
111 " pfmul 40(%%"REG_b"), %%mm7\n\t"
112 " movd %%mm3, 92(%%"REG_d")\n\t"
113 " movd %%mm7, 84(%%"REG_d")\n\t"
114 " psrlq $32, %%mm3\n\t"
115 " psrlq $32, %%mm7\n\t"
116 " movd %%mm3, 88(%%"REG_d")\n\t"
117 " movd %%mm7, 80(%%"REG_d")\n\t"
119 " movq 48(%%"REG_a"), %%mm0\n\t"
120 " movq 56(%%"REG_a"), %%mm4\n\t"
121 " movq %%mm0, %%mm3\n\t"
122 " movq %%mm4, %%mm7\n\t"
123 " movq 72(%%"REG_a"), %%mm1\n\t"
124 " movq 64(%%"REG_a"), %%mm5\n\t"
125 /* n.b.: pswapd*/
126 " movq %%mm1, %%mm2\n\t"
127 " movq %%mm5, %%mm6\n\t"
128 " psrlq $32, %%mm1\n\t"
129 " psrlq $32, %%mm5\n\t"
130 " punpckldq %%mm2, %%mm1\n\t"
131 " punpckldq %%mm6, %%mm5\n\t"
132 /**/
133 " pfadd %%mm1, %%mm0\n\t"
134 " pfadd %%mm5, %%mm4\n\t"
135 " movq %%mm0, 48(%%"REG_d")\n\t"
136 " movq %%mm4, 56(%%"REG_d")\n\t"
137 " pfsub %%mm1, %%mm3\n\t"
138 " pfsub %%mm5, %%mm7\n\t"
139 " pfmul 48(%%"REG_b"), %%mm3\n\t"
140 " pfmul 56(%%"REG_b"), %%mm7\n\t"
141 " movd %%mm3, 76(%%"REG_d")\n\t"
142 " movd %%mm7, 68(%%"REG_d")\n\t"
143 " psrlq $32, %%mm3\n\t"
144 " psrlq $32, %%mm7\n\t"
145 " movd %%mm3, 72(%%"REG_d")\n\t"
146 " movd %%mm7, 64(%%"REG_d")\n\t"
148 /* Phase 2*/
150 " movq (%%"REG_d"), %%mm0\n\t"
151 " movq 8(%%"REG_d"), %%mm4\n\t"
152 " movq %%mm0, %%mm3\n\t"
153 " movq %%mm4, %%mm7\n\t"
154 " movq 56(%%"REG_d"), %%mm1\n\t"
155 " movq 48(%%"REG_d"), %%mm5\n\t"
156 /* n.b.: pswapd*/
157 " movq %%mm1, %%mm2\n\t"
158 " movq %%mm5, %%mm6\n\t"
159 " psrlq $32, %%mm1\n\t"
160 " psrlq $32, %%mm5\n\t"
161 " punpckldq %%mm2, %%mm1\n\t"
162 " punpckldq %%mm6, %%mm5\n\t"
163 /**/
164 " pfadd %%mm1, %%mm0\n\t"
165 " pfadd %%mm5, %%mm4\n\t"
166 " movq %%mm0, (%%"REG_c")\n\t"
167 " movq %%mm4, 8(%%"REG_c")\n\t"
168 " pfsub %%mm1, %%mm3\n\t"
169 " pfsub %%mm5, %%mm7\n\t"
170 " pfmul 64(%%"REG_b"), %%mm3\n\t"
171 " pfmul 72(%%"REG_b"), %%mm7\n\t"
172 " movd %%mm3, 60(%%"REG_c")\n\t"
173 " movd %%mm7, 52(%%"REG_c")\n\t"
174 " psrlq $32, %%mm3\n\t"
175 " psrlq $32, %%mm7\n\t"
176 " movd %%mm3, 56(%%"REG_c")\n\t"
177 " movd %%mm7, 48(%%"REG_c")\n\t"
179 " movq 16(%%"REG_d"), %%mm0\n\t"
180 " movq 24(%%"REG_d"), %%mm4\n\t"
181 " movq %%mm0, %%mm3\n\t"
182 " movq %%mm4, %%mm7\n\t"
183 " movq 40(%%"REG_d"), %%mm1\n\t"
184 " movq 32(%%"REG_d"), %%mm5\n\t"
185 /* n.b.: pswapd*/
186 " movq %%mm1, %%mm2\n\t"
187 " movq %%mm5, %%mm6\n\t"
188 " psrlq $32, %%mm1\n\t"
189 " psrlq $32, %%mm5\n\t"
190 " punpckldq %%mm2, %%mm1\n\t"
191 " punpckldq %%mm6, %%mm5\n\t"
192 /**/
193 " pfadd %%mm1, %%mm0\n\t"
194 " pfadd %%mm5, %%mm4\n\t"
195 " movq %%mm0, 16(%%"REG_c")\n\t"
196 " movq %%mm4, 24(%%"REG_c")\n\t"
197 " pfsub %%mm1, %%mm3\n\t"
198 " pfsub %%mm5, %%mm7\n\t"
199 " pfmul 80(%%"REG_b"), %%mm3\n\t"
200 " pfmul 88(%%"REG_b"), %%mm7\n\t"
201 " movd %%mm3, 44(%%"REG_c")\n\t"
202 " movd %%mm7, 36(%%"REG_c")\n\t"
203 " psrlq $32, %%mm3\n\t"
204 " psrlq $32, %%mm7\n\t"
205 " movd %%mm3, 40(%%"REG_c")\n\t"
206 " movd %%mm7, 32(%%"REG_c")\n\t"
208 /* Phase 3*/
210 " movq 64(%%"REG_d"), %%mm0\n\t"
211 " movq 72(%%"REG_d"), %%mm4\n\t"
212 " movq %%mm0, %%mm3\n\t"
213 " movq %%mm4, %%mm7\n\t"
214 " movq 120(%%"REG_d"), %%mm1\n\t"
215 " movq 112(%%"REG_d"), %%mm5\n\t"
216 /* n.b.: pswapd*/
217 " movq %%mm1, %%mm2\n\t"
218 " movq %%mm5, %%mm6\n\t"
219 " psrlq $32, %%mm1\n\t"
220 " psrlq $32, %%mm5\n\t"
221 " punpckldq %%mm2, %%mm1\n\t"
222 " punpckldq %%mm6, %%mm5\n\t"
223 /**/
224 " pfadd %%mm1, %%mm0\n\t"
225 " pfadd %%mm5, %%mm4\n\t"
226 " movq %%mm0, 64(%%"REG_c")\n\t"
227 " movq %%mm4, 72(%%"REG_c")\n\t"
228 " pfsubr %%mm1, %%mm3\n\t"
229 " pfsubr %%mm5, %%mm7\n\t"
230 " pfmul 64(%%"REG_b"), %%mm3\n\t"
231 " pfmul 72(%%"REG_b"), %%mm7\n\t"
232 " movd %%mm3, 124(%%"REG_c")\n\t"
233 " movd %%mm7, 116(%%"REG_c")\n\t"
234 " psrlq $32, %%mm3\n\t"
235 " psrlq $32, %%mm7\n\t"
236 " movd %%mm3, 120(%%"REG_c")\n\t"
237 " movd %%mm7, 112(%%"REG_c")\n\t"
239 " movq 80(%%"REG_d"), %%mm0\n\t"
240 " movq 88(%%"REG_d"), %%mm4\n\t"
241 " movq %%mm0, %%mm3\n\t"
242 " movq %%mm4, %%mm7\n\t"
243 " movq 104(%%"REG_d"), %%mm1\n\t"
244 " movq 96(%%"REG_d"), %%mm5\n\t"
245 /* n.b.: pswapd*/
246 " movq %%mm1, %%mm2\n\t"
247 " movq %%mm5, %%mm6\n\t"
248 " psrlq $32, %%mm1\n\t"
249 " psrlq $32, %%mm5\n\t"
250 " punpckldq %%mm2, %%mm1\n\t"
251 " punpckldq %%mm6, %%mm5\n\t"
252 /**/
253 " pfadd %%mm1, %%mm0\n\t"
254 " pfadd %%mm5, %%mm4\n\t"
255 " movq %%mm0, 80(%%"REG_c")\n\t"
256 " movq %%mm4, 88(%%"REG_c")\n\t"
257 " pfsubr %%mm1, %%mm3\n\t"
258 " pfsubr %%mm5, %%mm7\n\t"
259 " pfmul 80(%%"REG_b"), %%mm3\n\t"
260 " pfmul 88(%%"REG_b"), %%mm7\n\t"
261 " movd %%mm3, 108(%%"REG_c")\n\t"
262 " movd %%mm7, 100(%%"REG_c")\n\t"
263 " psrlq $32, %%mm3\n\t"
264 " psrlq $32, %%mm7\n\t"
265 " movd %%mm3, 104(%%"REG_c")\n\t"
266 " movd %%mm7, 96(%%"REG_c")\n\t"
268 /* Phase 4*/
270 " movq (%%"REG_c"), %%mm0\n\t"
271 " movq 8(%%"REG_c"), %%mm4\n\t"
272 " movq %%mm0, %%mm3\n\t"
273 " movq %%mm4, %%mm7\n\t"
274 " movq 24(%%"REG_c"), %%mm1\n\t"
275 " movq 16(%%"REG_c"), %%mm5\n\t"
276 /* n.b.: pswapd*/
277 " movq %%mm1, %%mm2\n\t"
278 " movq %%mm5, %%mm6\n\t"
279 " psrlq $32, %%mm1\n\t"
280 " psrlq $32, %%mm5\n\t"
281 " punpckldq %%mm2, %%mm1\n\t"
282 " punpckldq %%mm6, %%mm5\n\t"
283 /**/
284 " pfadd %%mm1, %%mm0\n\t"
285 " pfadd %%mm5, %%mm4\n\t"
286 " movq %%mm0, (%%"REG_d")\n\t"
287 " movq %%mm4, 8(%%"REG_d")\n\t"
288 " pfsub %%mm1, %%mm3\n\t"
289 " pfsub %%mm5, %%mm7\n\t"
290 " pfmul 96(%%"REG_b"), %%mm3\n\t"
291 " pfmul 104(%%"REG_b"), %%mm7\n\t"
292 " movd %%mm3, 28(%%"REG_d")\n\t"
293 " movd %%mm7, 20(%%"REG_d")\n\t"
294 " psrlq $32, %%mm3\n\t"
295 " psrlq $32, %%mm7\n\t"
296 " movd %%mm3, 24(%%"REG_d")\n\t"
297 " movd %%mm7, 16(%%"REG_d")\n\t"
299 " movq 32(%%"REG_c"), %%mm0\n\t"
300 " movq 40(%%"REG_c"), %%mm4\n\t"
301 " movq %%mm0, %%mm3\n\t"
302 " movq %%mm4, %%mm7\n\t"
303 " movq 56(%%"REG_c"), %%mm1\n\t"
304 " movq 48(%%"REG_c"), %%mm5\n\t"
305 /* n.b.: pswapd*/
306 " movq %%mm1, %%mm2\n\t"
307 " movq %%mm5, %%mm6\n\t"
308 " psrlq $32, %%mm1\n\t"
309 " psrlq $32, %%mm5\n\t"
310 " punpckldq %%mm2, %%mm1\n\t"
311 " punpckldq %%mm6, %%mm5\n\t"
312 /**/
313 " pfadd %%mm1, %%mm0\n\t"
314 " pfadd %%mm5, %%mm4\n\t"
315 " movq %%mm0, 32(%%"REG_d")\n\t"
316 " movq %%mm4, 40(%%"REG_d")\n\t"
317 " pfsubr %%mm1, %%mm3\n\t"
318 " pfsubr %%mm5, %%mm7\n\t"
319 " pfmul 96(%%"REG_b"), %%mm3\n\t"
320 " pfmul 104(%%"REG_b"), %%mm7\n\t"
321 " movd %%mm3, 60(%%"REG_d")\n\t"
322 " movd %%mm7, 52(%%"REG_d")\n\t"
323 " psrlq $32, %%mm3\n\t"
324 " psrlq $32, %%mm7\n\t"
325 " movd %%mm3, 56(%%"REG_d")\n\t"
326 " movd %%mm7, 48(%%"REG_d")\n\t"
328 " movq 64(%%"REG_c"), %%mm0\n\t"
329 " movq 72(%%"REG_c"), %%mm4\n\t"
330 " movq %%mm0, %%mm3\n\t"
331 " movq %%mm4, %%mm7\n\t"
332 " movq 88(%%"REG_c"), %%mm1\n\t"
333 " movq 80(%%"REG_c"), %%mm5\n\t"
334 /* n.b.: pswapd*/
335 " movq %%mm1, %%mm2\n\t"
336 " movq %%mm5, %%mm6\n\t"
337 " psrlq $32, %%mm1\n\t"
338 " psrlq $32, %%mm5\n\t"
339 " punpckldq %%mm2, %%mm1\n\t"
340 " punpckldq %%mm6, %%mm5\n\t"
341 /**/
342 " pfadd %%mm1, %%mm0\n\t"
343 " pfadd %%mm5, %%mm4\n\t"
344 " movq %%mm0, 64(%%"REG_d")\n\t"
345 " movq %%mm4, 72(%%"REG_d")\n\t"
346 " pfsub %%mm1, %%mm3\n\t"
347 " pfsub %%mm5, %%mm7\n\t"
348 " pfmul 96(%%"REG_b"), %%mm3\n\t"
349 " pfmul 104(%%"REG_b"), %%mm7\n\t"
350 " movd %%mm3, 92(%%"REG_d")\n\t"
351 " movd %%mm7, 84(%%"REG_d")\n\t"
352 " psrlq $32, %%mm3\n\t"
353 " psrlq $32, %%mm7\n\t"
354 " movd %%mm3, 88(%%"REG_d")\n\t"
355 " movd %%mm7, 80(%%"REG_d")\n\t"
357 " movq 96(%%"REG_c"), %%mm0\n\t"
358 " movq 104(%%"REG_c"), %%mm4\n\t"
359 " movq %%mm0, %%mm3\n\t"
360 " movq %%mm4, %%mm7\n\t"
361 " movq 120(%%"REG_c"), %%mm1\n\t"
362 " movq 112(%%"REG_c"), %%mm5\n\t"
363 /* n.b.: pswapd*/
364 " movq %%mm1, %%mm2\n\t"
365 " movq %%mm5, %%mm6\n\t"
366 " psrlq $32, %%mm1\n\t"
367 " psrlq $32, %%mm5\n\t"
368 " punpckldq %%mm2, %%mm1\n\t"
369 " punpckldq %%mm6, %%mm5\n\t"
370 /**/
371 " pfadd %%mm1, %%mm0\n\t"
372 " pfadd %%mm5, %%mm4\n\t"
373 " movq %%mm0, 96(%%"REG_d")\n\t"
374 " movq %%mm4, 104(%%"REG_d")\n\t"
375 " pfsubr %%mm1, %%mm3\n\t"
376 " pfsubr %%mm5, %%mm7\n\t"
377 " pfmul 96(%%"REG_b"), %%mm3\n\t"
378 " pfmul 104(%%"REG_b"), %%mm7\n\t"
379 " movd %%mm3, 124(%%"REG_d")\n\t"
380 " movd %%mm7, 116(%%"REG_d")\n\t"
381 " psrlq $32, %%mm3\n\t"
382 " psrlq $32, %%mm7\n\t"
383 " movd %%mm3, 120(%%"REG_d")\n\t"
384 " movd %%mm7, 112(%%"REG_d")\n\t"
386 /* Phase 5 */
388 " movq (%%"REG_d"), %%mm0\n\t"
389 " movq 16(%%"REG_d"), %%mm4\n\t"
390 " movq %%mm0, %%mm3\n\t"
391 " movq %%mm4, %%mm7\n\t"
392 " movq 8(%%"REG_d"), %%mm1\n\t"
393 " movq 24(%%"REG_d"), %%mm5\n\t"
394 /* n.b.: pswapd*/
395 " movq %%mm1, %%mm2\n\t"
396 " movq %%mm5, %%mm6\n\t"
397 " psrlq $32, %%mm1\n\t"
398 " psrlq $32, %%mm5\n\t"
399 " punpckldq %%mm2, %%mm1\n\t"
400 " punpckldq %%mm6, %%mm5\n\t"
401 /**/
402 " pfadd %%mm1, %%mm0\n\t"
403 " pfadd %%mm5, %%mm4\n\t"
404 " movq %%mm0, (%%"REG_c")\n\t"
405 " movq %%mm4, 16(%%"REG_c")\n\t"
406 " pfsub %%mm1, %%mm3\n\t"
407 " pfsubr %%mm5, %%mm7\n\t"
408 " pfmul 112(%%"REG_b"), %%mm3\n\t"
409 " pfmul 112(%%"REG_b"), %%mm7\n\t"
410 " movd %%mm3, 12(%%"REG_c")\n\t"
411 " movd %%mm7, 28(%%"REG_c")\n\t"
412 " psrlq $32, %%mm3\n\t"
413 " psrlq $32, %%mm7\n\t"
414 " movd %%mm3, 8(%%"REG_c")\n\t"
415 " movd %%mm7, 24(%%"REG_c")\n\t"
417 " movq 32(%%"REG_d"), %%mm0\n\t"
418 " movq 48(%%"REG_d"), %%mm4\n\t"
419 " movq %%mm0, %%mm3\n\t"
420 " movq %%mm4, %%mm7\n\t"
421 " movq 40(%%"REG_d"), %%mm1\n\t"
422 " movq 56(%%"REG_d"), %%mm5\n\t"
423 /* n.b.: pswapd*/
424 " movq %%mm1, %%mm2\n\t"
425 " movq %%mm5, %%mm6\n\t"
426 " psrlq $32, %%mm1\n\t"
427 " psrlq $32, %%mm5\n\t"
428 " punpckldq %%mm2, %%mm1\n\t"
429 " punpckldq %%mm6, %%mm5\n\t"
430 /**/
431 " pfadd %%mm1, %%mm0\n\t"
432 " pfadd %%mm5, %%mm4\n\t"
433 " movq %%mm0, 32(%%"REG_c")\n\t"
434 " movq %%mm4, 48(%%"REG_c")\n\t"
435 " pfsub %%mm1, %%mm3\n\t"
436 " pfsubr %%mm5, %%mm7\n\t"
437 " pfmul 112(%%"REG_b"), %%mm3\n\t"
438 " pfmul 112(%%"REG_b"), %%mm7\n\t"
439 " movd %%mm3, 44(%%"REG_c")\n\t"
440 " movd %%mm7, 60(%%"REG_c")\n\t"
441 " psrlq $32, %%mm3\n\t"
442 " psrlq $32, %%mm7\n\t"
443 " movd %%mm3, 40(%%"REG_c")\n\t"
444 " movd %%mm7, 56(%%"REG_c")\n\t"
446 " movq 64(%%"REG_d"), %%mm0\n\t"
447 " movq 80(%%"REG_d"), %%mm4\n\t"
448 " movq %%mm0, %%mm3\n\t"
449 " movq %%mm4, %%mm7\n\t"
450 " movq 72(%%"REG_d"), %%mm1\n\t"
451 " movq 88(%%"REG_d"), %%mm5\n\t"
452 /* n.b.: pswapd*/
453 " movq %%mm1, %%mm2\n\t"
454 " movq %%mm5, %%mm6\n\t"
455 " psrlq $32, %%mm1\n\t"
456 " psrlq $32, %%mm5\n\t"
457 " punpckldq %%mm2, %%mm1\n\t"
458 " punpckldq %%mm6, %%mm5\n\t"
459 /**/
460 " pfadd %%mm1, %%mm0\n\t"
461 " pfadd %%mm5, %%mm4\n\t"
462 " movq %%mm0, 64(%%"REG_c")\n\t"
463 " movq %%mm4, 80(%%"REG_c")\n\t"
464 " pfsub %%mm1, %%mm3\n\t"
465 " pfsubr %%mm5, %%mm7\n\t"
466 " pfmul 112(%%"REG_b"), %%mm3\n\t"
467 " pfmul 112(%%"REG_b"), %%mm7\n\t"
468 " movd %%mm3, 76(%%"REG_c")\n\t"
469 " movd %%mm7, 92(%%"REG_c")\n\t"
470 " psrlq $32, %%mm3\n\t"
471 " psrlq $32, %%mm7\n\t"
472 " movd %%mm3, 72(%%"REG_c")\n\t"
473 " movd %%mm7, 88(%%"REG_c")\n\t"
475 " movq 96(%%"REG_d"), %%mm0\n\t"
476 " movq 112(%%"REG_d"), %%mm4\n\t"
477 " movq %%mm0, %%mm3\n\t"
478 " movq %%mm4, %%mm7\n\t"
479 " movq 104(%%"REG_d"), %%mm1\n\t"
480 " movq 120(%%"REG_d"), %%mm5\n\t"
481 /* n.b.: pswapd*/
482 " movq %%mm1, %%mm2\n\t"
483 " movq %%mm5, %%mm6\n\t"
484 " psrlq $32, %%mm1\n\t"
485 " psrlq $32, %%mm5\n\t"
486 " punpckldq %%mm2, %%mm1\n\t"
487 " punpckldq %%mm6, %%mm5\n\t"
488 /**/
489 " pfadd %%mm1, %%mm0\n\t"
490 " pfadd %%mm5, %%mm4\n\t"
491 " movq %%mm0, 96(%%"REG_c")\n\t"
492 " movq %%mm4, 112(%%"REG_c")\n\t"
493 " pfsub %%mm1, %%mm3\n\t"
494 " pfsubr %%mm5, %%mm7\n\t"
495 " pfmul 112(%%"REG_b"), %%mm3\n\t"
496 " pfmul 112(%%"REG_b"), %%mm7\n\t"
497 " movd %%mm3, 108(%%"REG_c")\n\t"
498 " movd %%mm7, 124(%%"REG_c")\n\t"
499 " psrlq $32, %%mm3\n\t"
500 " psrlq $32, %%mm7\n\t"
501 " movd %%mm3, 104(%%"REG_c")\n\t"
502 " movd %%mm7, 120(%%"REG_c")\n\t"
504 /* Phase 6. This is the end of easy road. */
505 /* Code below is coded in scalar mode. Should be optimized */
507 " movd "MANGLE(plus_1f)", %%mm6\n\t"
508 " punpckldq 120(%%"REG_b"), %%mm6\n\t" /* mm6 = 1.0 | 120(%%"REG_b")*/
509 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */
511 " movq 32(%%"REG_c"), %%mm0\n\t"
512 " movq 64(%%"REG_c"), %%mm2\n\t"
513 " movq %%mm0, %%mm1\n\t"
514 " movq %%mm2, %%mm3\n\t"
515 " pxor %%mm7, %%mm1\n\t"
516 " pxor %%mm7, %%mm3\n\t"
517 " pfacc %%mm1, %%mm0\n\t"
518 " pfacc %%mm3, %%mm2\n\t"
519 " pfmul %%mm6, %%mm0\n\t"
520 " pfmul %%mm6, %%mm2\n\t"
521 " movq %%mm0, 32(%%"REG_d")\n\t"
522 " movq %%mm2, 64(%%"REG_d")\n\t"
524 " movd 44(%%"REG_c"), %%mm0\n\t"
525 " movd 40(%%"REG_c"), %%mm2\n\t"
526 " movd 120(%%"REG_b"), %%mm3\n\t"
527 " punpckldq 76(%%"REG_c"), %%mm0\n\t"
528 " punpckldq 72(%%"REG_c"), %%mm2\n\t"
529 " punpckldq %%mm3, %%mm3\n\t"
530 " movq %%mm0, %%mm4\n\t"
531 " movq %%mm2, %%mm5\n\t"
532 " pfsub %%mm2, %%mm0\n\t"
533 " pfmul %%mm3, %%mm0\n\t"
534 " movq %%mm0, %%mm1\n\t"
535 " pfadd %%mm5, %%mm0\n\t"
536 " pfadd %%mm4, %%mm0\n\t"
537 " movq %%mm0, %%mm2\n\t"
538 " punpckldq %%mm1, %%mm0\n\t"
539 " punpckhdq %%mm1, %%mm2\n\t"
540 " movq %%mm0, 40(%%"REG_d")\n\t"
541 " movq %%mm2, 72(%%"REG_d")\n\t"
543 " movd 48(%%"REG_c"), %%mm3\n\t"
544 " movd 60(%%"REG_c"), %%mm2\n\t"
545 " pfsub 52(%%"REG_c"), %%mm3\n\t"
546 " pfsub 56(%%"REG_c"), %%mm2\n\t"
547 " pfmul 120(%%"REG_b"), %%mm3\n\t"
548 " pfmul 120(%%"REG_b"), %%mm2\n\t"
549 " movq %%mm2, %%mm1\n\t"
551 " pfadd 56(%%"REG_c"), %%mm1\n\t"
552 " pfadd 60(%%"REG_c"), %%mm1\n\t"
553 " movq %%mm1, %%mm0\n\t"
555 " pfadd 48(%%"REG_c"), %%mm0\n\t"
556 " pfadd 52(%%"REG_c"), %%mm0\n\t"
557 " pfadd %%mm3, %%mm1\n\t"
558 " punpckldq %%mm2, %%mm1\n\t"
559 " pfadd %%mm3, %%mm2\n\t"
560 " punpckldq %%mm2, %%mm0\n\t"
561 " movq %%mm1, 56(%%"REG_d")\n\t"
562 " movq %%mm0, 48(%%"REG_d")\n\t"
564 /*---*/
566 " movd 92(%%"REG_c"), %%mm1\n\t"
567 " pfsub 88(%%"REG_c"), %%mm1\n\t"
568 " pfmul 120(%%"REG_b"), %%mm1\n\t"
569 " movd %%mm1, 92(%%"REG_d")\n\t"
570 " pfadd 92(%%"REG_c"), %%mm1\n\t"
571 " pfadd 88(%%"REG_c"), %%mm1\n\t"
572 " movq %%mm1, %%mm0\n\t"
574 " pfadd 80(%%"REG_c"), %%mm0\n\t"
575 " pfadd 84(%%"REG_c"), %%mm0\n\t"
576 " movd %%mm0, 80(%%"REG_d")\n\t"
578 " movd 80(%%"REG_c"), %%mm0\n\t"
579 " pfsub 84(%%"REG_c"), %%mm0\n\t"
580 " pfmul 120(%%"REG_b"), %%mm0\n\t"
581 " pfadd %%mm0, %%mm1\n\t"
582 " pfadd 92(%%"REG_d"), %%mm0\n\t"
583 " punpckldq %%mm1, %%mm0\n\t"
584 " movq %%mm0, 84(%%"REG_d")\n\t"
586 " movq 96(%%"REG_c"), %%mm0\n\t"
587 " movq %%mm0, %%mm1\n\t"
588 " pxor %%mm7, %%mm1\n\t"
589 " pfacc %%mm1, %%mm0\n\t"
590 " pfmul %%mm6, %%mm0\n\t"
591 " movq %%mm0, 96(%%"REG_d")\n\t"
593 " movd 108(%%"REG_c"), %%mm0\n\t"
594 " pfsub 104(%%"REG_c"), %%mm0\n\t"
595 " pfmul 120(%%"REG_b"), %%mm0\n\t"
596 " movd %%mm0, 108(%%"REG_d")\n\t"
597 " pfadd 104(%%"REG_c"), %%mm0\n\t"
598 " pfadd 108(%%"REG_c"), %%mm0\n\t"
599 " movd %%mm0, 104(%%"REG_d")\n\t"
601 " movd 124(%%"REG_c"), %%mm1\n\t"
602 " pfsub 120(%%"REG_c"), %%mm1\n\t"
603 " pfmul 120(%%"REG_b"), %%mm1\n\t"
604 " movd %%mm1, 124(%%"REG_d")\n\t"
605 " pfadd 120(%%"REG_c"), %%mm1\n\t"
606 " pfadd 124(%%"REG_c"), %%mm1\n\t"
607 " movq %%mm1, %%mm0\n\t"
609 " pfadd 112(%%"REG_c"), %%mm0\n\t"
610 " pfadd 116(%%"REG_c"), %%mm0\n\t"
611 " movd %%mm0, 112(%%"REG_d")\n\t"
613 " movd 112(%%"REG_c"), %%mm0\n\t"
614 " pfsub 116(%%"REG_c"), %%mm0\n\t"
615 " pfmul 120(%%"REG_b"), %%mm0\n\t"
616 " pfadd %%mm0,%%mm1\n\t"
617 " pfadd 124(%%"REG_d"), %%mm0\n\t"
618 " punpckldq %%mm1, %%mm0\n\t"
619 " movq %%mm0, 116(%%"REG_d")\n\t"
621 // this code is broken, there is nothing modifying the z flag above.
622 #if 0
623 " jnz .L01\n\t"
625 /* Phase 7*/
626 /* Code below is coded in scalar mode. Should be optimized */
628 " movd (%%"REG_c"), %%mm0\n\t"
629 " pfadd 4(%%"REG_c"), %%mm0\n\t"
630 " movd %%mm0, 1024(%%"REG_S")\n\t"
632 " movd (%%"REG_c"), %%mm0\n\t"
633 " pfsub 4(%%"REG_c"), %%mm0\n\t"
634 " pfmul 120(%%"REG_b"), %%mm0\n\t"
635 " movd %%mm0, (%%"REG_S")\n\t"
636 " movd %%mm0, (%%"REG_D")\n\t"
638 " movd 12(%%"REG_c"), %%mm0\n\t"
639 " pfsub 8(%%"REG_c"), %%mm0\n\t"
640 " pfmul 120(%%"REG_b"), %%mm0\n\t"
641 " movd %%mm0, 512(%%"REG_D")\n\t"
642 " pfadd 12(%%"REG_c"), %%mm0\n\t"
643 " pfadd 8(%%"REG_c"), %%mm0\n\t"
644 " movd %%mm0, 512(%%"REG_S")\n\t"
646 " movd 16(%%"REG_c"), %%mm0\n\t"
647 " pfsub 20(%%"REG_c"), %%mm0\n\t"
648 " pfmul 120(%%"REG_b"), %%mm0\n\t"
649 " movq %%mm0, %%mm3\n\t"
651 " movd 28(%%"REG_c"), %%mm0\n\t"
652 " pfsub 24(%%"REG_c"), %%mm0\n\t"
653 " pfmul 120(%%"REG_b"), %%mm0\n\t"
654 " movd %%mm0, 768(%%"REG_D")\n\t"
655 " movq %%mm0, %%mm2\n\t"
657 " pfadd 24(%%"REG_c"), %%mm0\n\t"
658 " pfadd 28(%%"REG_c"), %%mm0\n\t"
659 " movq %%mm0, %%mm1\n\t"
661 " pfadd 16(%%"REG_c"), %%mm0\n\t"
662 " pfadd 20(%%"REG_c"), %%mm0\n\t"
663 " movd %%mm0, 768(%%"REG_S")\n\t"
664 " pfadd %%mm3, %%mm1\n\t"
665 " movd %%mm1, 256(%%"REG_S")\n\t"
666 " pfadd %%mm3, %%mm2\n\t"
667 " movd %%mm2, 256(%%"REG_D")\n\t"
669 /* Phase 8*/
671 " movq 32(%%"REG_d"), %%mm0\n\t"
672 " movq 48(%%"REG_d"), %%mm1\n\t"
673 " pfadd 48(%%"REG_d"), %%mm0\n\t"
674 " pfadd 40(%%"REG_d"), %%mm1\n\t"
675 " movd %%mm0, 896(%%"REG_S")\n\t"
676 " movd %%mm1, 640(%%"REG_S")\n\t"
677 " psrlq $32, %%mm0\n\t"
678 " psrlq $32, %%mm1\n\t"
679 " movd %%mm0, 128(%%"REG_D")\n\t"
680 " movd %%mm1, 384(%%"REG_D")\n\t"
682 " movd 40(%%"REG_d"), %%mm0\n\t"
683 " pfadd 56(%%"REG_d"), %%mm0\n\t"
684 " movd %%mm0, 384(%%"REG_S")\n\t"
686 " movd 56(%%"REG_d"), %%mm0\n\t"
687 " pfadd 36(%%"REG_d"), %%mm0\n\t"
688 " movd %%mm0, 128(%%"REG_S")\n\t"
690 " movd 60(%%"REG_d"), %%mm0\n\t"
691 " movd %%mm0, 896(%%"REG_D")\n\t"
692 " pfadd 44(%%"REG_d"), %%mm0\n\t"
693 " movd %%mm0, 640(%%"REG_D")\n\t"
695 " movq 96(%%"REG_d"), %%mm0\n\t"
696 " movq 112(%%"REG_d"), %%mm2\n\t"
697 " movq 104(%%"REG_d"), %%mm4\n\t"
698 " pfadd 112(%%"REG_d"), %%mm0\n\t"
699 " pfadd 104(%%"REG_d"), %%mm2\n\t"
700 " pfadd 120(%%"REG_d"), %%mm4\n\t"
701 " movq %%mm0, %%mm1\n\t"
702 " movq %%mm2, %%mm3\n\t"
703 " movq %%mm4, %%mm5\n\t"
704 " pfadd 64(%%"REG_d"), %%mm0\n\t"
705 " pfadd 80(%%"REG_d"), %%mm2\n\t"
706 " pfadd 72(%%"REG_d"), %%mm4\n\t"
707 " movd %%mm0, 960(%%"REG_S")\n\t"
708 " movd %%mm2, 704(%%"REG_S")\n\t"
709 " movd %%mm4, 448(%%"REG_S")\n\t"
710 " psrlq $32, %%mm0\n\t"
711 " psrlq $32, %%mm2\n\t"
712 " psrlq $32, %%mm4\n\t"
713 " movd %%mm0, 64(%%"REG_D")\n\t"
714 " movd %%mm2, 320(%%"REG_D")\n\t"
715 " movd %%mm4, 576(%%"REG_D")\n\t"
716 " pfadd 80(%%"REG_d"), %%mm1\n\t"
717 " pfadd 72(%%"REG_d"), %%mm3\n\t"
718 " pfadd 88(%%"REG_d"), %%mm5\n\t"
719 " movd %%mm1, 832(%%"REG_S")\n\t"
720 " movd %%mm3, 576(%%"REG_S")\n\t"
721 " movd %%mm5, 320(%%"REG_S")\n\t"
722 " psrlq $32, %%mm1\n\t"
723 " psrlq $32, %%mm3\n\t"
724 " psrlq $32, %%mm5\n\t"
725 " movd %%mm1, 192(%%"REG_D")\n\t"
726 " movd %%mm3, 448(%%"REG_D")\n\t"
727 " movd %%mm5, 704(%%"REG_D")\n\t"
729 " movd 120(%%"REG_d"), %%mm0\n\t"
730 " pfadd 100(%%"REG_d"), %%mm0\n\t"
731 " movq %%mm0, %%mm1\n\t"
732 " pfadd 88(%%"REG_d"), %%mm0\n\t"
733 " movd %%mm0, 192(%%"REG_S")\n\t"
734 " pfadd 68(%%"REG_d"), %%mm1\n\t"
735 " movd %%mm1, 64(%%"REG_S")\n\t"
737 " movd 124(%%"REG_d"), %%mm0\n\t"
738 " movd %%mm0, 960(%%"REG_D")\n\t"
739 " pfadd 92(%%"REG_d"), %%mm0\n\t"
740 " movd %%mm0, 832(%%"REG_D")\n\t"
742 " jmp .L_bye\n\t"
743 ".L01:\n\t"
744 #endif
745 /* Phase 9*/
747 " movq (%%"REG_c"), %%mm0\n\t"
748 " movq %%mm0, %%mm1\n\t"
749 " pxor %%mm7, %%mm1\n\t"
750 " pfacc %%mm1, %%mm0\n\t"
751 " pfmul %%mm6, %%mm0\n\t"
752 " pf2id %%mm0, %%mm0\n\t"
753 " packssdw %%mm0, %%mm0\n\t"
754 " movd %%mm0, %%"REG_a"\n\t"
755 " movw %%ax, 512(%%"REG_S")\n\t"
756 " shr $16, %%"REG_a"\n\t"
757 " movw %%ax, (%%"REG_S")\n\t"
759 " movd 12(%%"REG_c"), %%mm0\n\t"
760 " pfsub 8(%%"REG_c"), %%mm0\n\t"
761 " pfmul 120(%%"REG_b"), %%mm0\n\t"
762 " pf2id %%mm0, %%mm7\n\t"
763 " packssdw %%mm7, %%mm7\n\t"
764 " movd %%mm7, %%"REG_a"\n\t"
765 " movw %%ax, 256(%%"REG_D")\n\t"
766 " pfadd 12(%%"REG_c"), %%mm0\n\t"
767 " pfadd 8(%%"REG_c"), %%mm0\n\t"
768 " pf2id %%mm0, %%mm0\n\t"
769 " packssdw %%mm0, %%mm0\n\t"
770 " movd %%mm0, %%"REG_a"\n\t"
771 " movw %%ax, 256(%%"REG_S")\n\t"
773 " movd 16(%%"REG_c"), %%mm3\n\t"
774 " pfsub 20(%%"REG_c"), %%mm3\n\t"
775 " pfmul 120(%%"REG_b"), %%mm3\n\t"
776 " movq %%mm3, %%mm2\n\t"
778 " movd 28(%%"REG_c"), %%mm2\n\t"
779 " pfsub 24(%%"REG_c"), %%mm2\n\t"
780 " pfmul 120(%%"REG_b"), %%mm2\n\t"
781 " movq %%mm2, %%mm1\n\t"
783 " pf2id %%mm2, %%mm7\n\t"
784 " packssdw %%mm7, %%mm7\n\t"
785 " movd %%mm7, %%"REG_a"\n\t"
786 " movw %%ax, 384(%%"REG_D")\n\t"
788 " pfadd 24(%%"REG_c"), %%mm1\n\t"
789 " pfadd 28(%%"REG_c"), %%mm1\n\t"
790 " movq %%mm1, %%mm0\n\t"
792 " pfadd 16(%%"REG_c"), %%mm0\n\t"
793 " pfadd 20(%%"REG_c"), %%mm0\n\t"
794 " pf2id %%mm0, %%mm0\n\t"
795 " packssdw %%mm0, %%mm0\n\t"
796 " movd %%mm0, %%"REG_a"\n\t"
797 " movw %%ax, 384(%%"REG_S")\n\t"
798 " pfadd %%mm3, %%mm1\n\t"
799 " pf2id %%mm1, %%mm1\n\t"
800 " packssdw %%mm1, %%mm1\n\t"
801 " movd %%mm1, %%"REG_a"\n\t"
802 " movw %%ax, 128(%%"REG_S")\n\t"
803 " pfadd %%mm3, %%mm2\n\t"
804 " pf2id %%mm2, %%mm2\n\t"
805 " packssdw %%mm2, %%mm2\n\t"
806 " movd %%mm2, %%"REG_a"\n\t"
807 " movw %%ax, 128(%%"REG_D")\n\t"
809 /* Phase 10*/
811 " movq 32(%%"REG_d"), %%mm0\n\t"
812 " movq 48(%%"REG_d"), %%mm1\n\t"
813 " pfadd 48(%%"REG_d"), %%mm0\n\t"
814 " pfadd 40(%%"REG_d"), %%mm1\n\t"
815 " pf2id %%mm0, %%mm0\n\t"
816 " pf2id %%mm1, %%mm1\n\t"
817 " packssdw %%mm0, %%mm0\n\t"
818 " packssdw %%mm1, %%mm1\n\t"
819 " movd %%mm0, %%"REG_a"\n\t"
820 " movd %%mm1, %%"REG_c"\n\t"
821 " movw %%ax, 448(%%"REG_S")\n\t"
822 " movw %%cx, 320(%%"REG_S")\n\t"
823 " shr $16, %%"REG_a"\n\t"
824 " shr $16, %%"REG_c"\n\t"
825 " movw %%ax, 64(%%"REG_D")\n\t"
826 " movw %%cx, 192(%%"REG_D")\n\t"
828 " movd 40(%%"REG_d"), %%mm3\n\t"
829 " movd 56(%%"REG_d"), %%mm4\n\t"
830 " movd 60(%%"REG_d"), %%mm0\n\t"
831 " movd 44(%%"REG_d"), %%mm2\n\t"
832 " movd 120(%%"REG_d"), %%mm5\n\t"
833 " punpckldq %%mm4, %%mm3\n\t"
834 " punpckldq 124(%%"REG_d"), %%mm0\n\t"
835 " pfadd 100(%%"REG_d"), %%mm5\n\t"
836 " punpckldq 36(%%"REG_d"), %%mm4\n\t"
837 " punpckldq 92(%%"REG_d"), %%mm2\n\t"
838 " movq %%mm5, %%mm6\n\t"
839 " pfadd %%mm4, %%mm3\n\t"
840 " pf2id %%mm0, %%mm1\n\t"
841 " pf2id %%mm3, %%mm3\n\t"
842 " packssdw %%mm1, %%mm1\n\t"
843 " packssdw %%mm3, %%mm3\n\t"
844 " pfadd 88(%%"REG_d"), %%mm5\n\t"
845 " movd %%mm1, %%"REG_a"\n\t"
846 " movd %%mm3, %%"REG_c"\n\t"
847 " movw %%ax, 448(%%"REG_D")\n\t"
848 " movw %%cx, 192(%%"REG_S")\n\t"
849 " pf2id %%mm5, %%mm5\n\t"
850 " packssdw %%mm5, %%mm5\n\t"
851 " shr $16, %%"REG_a"\n\t"
852 " shr $16, %%"REG_c"\n\t"
853 " movd %%mm5, %%"REG_b"\n\t"
854 " movw %%bx, 96(%%"REG_S")\n\t"
855 " movw %%ax, 480(%%"REG_D")\n\t"
856 " movw %%cx, 64(%%"REG_S")\n\t"
857 " pfadd %%mm2, %%mm0\n\t"
858 " pf2id %%mm0, %%mm0\n\t"
859 " packssdw %%mm0, %%mm0\n\t"
860 " movd %%mm0, %%"REG_a"\n\t"
861 " pfadd 68(%%"REG_d"), %%mm6\n\t"
862 " movw %%ax, 320(%%"REG_D")\n\t"
863 " shr $16, %%"REG_a"\n\t"
864 " pf2id %%mm6, %%mm6\n\t"
865 " packssdw %%mm6, %%mm6\n\t"
866 " movd %%mm6, %%"REG_b"\n\t"
867 " movw %%ax, 416(%%"REG_D")\n\t"
868 " movw %%bx, 32(%%"REG_S")\n\t"
870 " movq 96(%%"REG_d"), %%mm0\n\t"
871 " movq 112(%%"REG_d"), %%mm2\n\t"
872 " movq 104(%%"REG_d"), %%mm4\n\t"
873 " pfadd %%mm2, %%mm0\n\t"
874 " pfadd %%mm4, %%mm2\n\t"
875 " pfadd 120(%%"REG_d"), %%mm4\n\t"
876 " movq %%mm0, %%mm1\n\t"
877 " movq %%mm2, %%mm3\n\t"
878 " movq %%mm4, %%mm5\n\t"
879 " pfadd 64(%%"REG_d"), %%mm0\n\t"
880 " pfadd 80(%%"REG_d"), %%mm2\n\t"
881 " pfadd 72(%%"REG_d"), %%mm4\n\t"
882 " pf2id %%mm0, %%mm0\n\t"
883 " pf2id %%mm2, %%mm2\n\t"
884 " pf2id %%mm4, %%mm4\n\t"
885 " packssdw %%mm0, %%mm0\n\t"
886 " packssdw %%mm2, %%mm2\n\t"
887 " packssdw %%mm4, %%mm4\n\t"
888 " movd %%mm0, %%"REG_a"\n\t"
889 " movd %%mm2, %%"REG_c"\n\t"
890 " movd %%mm4, %%"REG_b"\n\t"
891 " movw %%ax, 480(%%"REG_S")\n\t"
892 " movw %%cx, 352(%%"REG_S")\n\t"
893 " movw %%bx, 224(%%"REG_S")\n\t"
894 " shr $16, %%"REG_a"\n\t"
895 " shr $16, %%"REG_c"\n\t"
896 " shr $16, %%"REG_b"\n\t"
897 " movw %%ax, 32(%%"REG_D")\n\t"
898 " movw %%cx, 160(%%"REG_D")\n\t"
899 " movw %%bx, 288(%%"REG_D")\n\t"
900 " pfadd 80(%%"REG_d"), %%mm1\n\t"
901 " pfadd 72(%%"REG_d"), %%mm3\n\t"
902 " pfadd 88(%%"REG_d"), %%mm5\n\t"
903 " pf2id %%mm1, %%mm1\n\t"
904 " pf2id %%mm3, %%mm3\n\t"
905 " pf2id %%mm5, %%mm5\n\t"
906 " packssdw %%mm1, %%mm1\n\t"
907 " packssdw %%mm3, %%mm3\n\t"
908 " packssdw %%mm5, %%mm5\n\t"
909 " movd %%mm1, %%"REG_a"\n\t"
910 " movd %%mm3, %%"REG_c"\n\t"
911 " movd %%mm5, %%"REG_b"\n\t"
912 " movw %%ax, 416(%%"REG_S")\n\t"
913 " movw %%cx, 288(%%"REG_S")\n\t"
914 " movw %%bx, 160(%%"REG_S")\n\t"
915 " shr $16, %%"REG_a"\n\t"
916 " shr $16, %%"REG_c"\n\t"
917 " shr $16, %%"REG_b"\n\t"
918 " movw %%ax, 96(%%"REG_D")\n\t"
919 " movw %%cx, 224(%%"REG_D")\n\t"
920 " movw %%bx, 352(%%"REG_D")\n\t"
922 " movsw\n\t"
924 ".L_bye:\n\t"
925 " femms\n\t"
927 :"m"(a),"m"(b),"m"(c),"m"(tmp[0])
928 :"memory","%eax","%ebx","%ecx","%edx","%esi","%edi");