Some people confuse vidix with kernel drivers, so let's add a note about it
[mplayer/glamo.git] / liba52 / resample_mmx.c
blob6f45d88ea75abdb7d34bd798abaa9170882f9112
2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL)
4 /* optimization TODO / NOTES
5 movntq is slightly faster (0.5% with the current test.c benchmark)
6 (but thats just test.c so that needs to be testd in reallity)
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions
8 */
10 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
11 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
12 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
13 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
15 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
16 int32_t * f = (int32_t *) _f;
17 asm volatile(
18 "movl $-512, %%esi \n\t"
19 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
20 "movq "MANGLE(wm1100)", %%mm3 \n\t"
21 "movq "MANGLE(wm0101)", %%mm4 \n\t"
22 "movq "MANGLE(wm1010)", %%mm5 \n\t"
23 "pxor %%mm6, %%mm6 \n\t"
24 "1: \n\t"
25 "movq (%1, %%esi, 2), %%mm0 \n\t"
26 "movq 8(%1, %%esi, 2), %%mm1 \n\t"
27 "leal (%%esi, %%esi, 4), %%edi \n\t"
28 "psubd %%mm7, %%mm0 \n\t"
29 "psubd %%mm7, %%mm1 \n\t"
30 "packssdw %%mm1, %%mm0 \n\t"
31 "movq %%mm0, %%mm1 \n\t"
32 "pand %%mm4, %%mm0 \n\t"
33 "pand %%mm5, %%mm1 \n\t"
34 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0
35 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0
36 "pand %%mm3, %%mm0 \n\t"
37 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0
38 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B
39 "pand %%mm3, %%mm1 \n\t"
40 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0
41 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0
42 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B
43 "addl $8, %%esi \n\t"
44 " jnz 1b \n\t"
45 "emms \n\t"
46 :: "r" (s16+1280), "r" (f+256)
47 :"%esi", "%edi", "memory"
49 return 5*256;
52 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
53 int32_t * f = (int32_t *) _f;
54 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
55 #ifdef HAVE_SSE
56 asm volatile(
57 "movl $-1024, %%esi \n\t"
58 "1: \n\t"
59 "cvtps2pi (%1, %%esi), %%mm0 \n\t"
60 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t"
61 "movq %%mm0, %%mm1 \n\t"
62 "punpcklwd %%mm2, %%mm0 \n\t"
63 "punpckhwd %%mm2, %%mm1 \n\t"
64 "movq %%mm0, (%0, %%esi) \n\t"
65 "movq %%mm1, 8(%0, %%esi) \n\t"
66 "addl $16, %%esi \n\t"
67 " jnz 1b \n\t"
68 "emms \n\t"
69 :: "r" (s16+512), "r" (f+256)
70 :"%esi", "memory"
71 );*/
72 asm volatile(
73 "movl $-1024, %%esi \n\t"
74 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
75 "1: \n\t"
76 "movq (%1, %%esi), %%mm0 \n\t"
77 "movq 8(%1, %%esi), %%mm1 \n\t"
78 "movq 1024(%1, %%esi), %%mm2 \n\t"
79 "movq 1032(%1, %%esi), %%mm3 \n\t"
80 "psubd %%mm7, %%mm0 \n\t"
81 "psubd %%mm7, %%mm1 \n\t"
82 "psubd %%mm7, %%mm2 \n\t"
83 "psubd %%mm7, %%mm3 \n\t"
84 "packssdw %%mm1, %%mm0 \n\t"
85 "packssdw %%mm3, %%mm2 \n\t"
86 "movq %%mm0, %%mm1 \n\t"
87 "punpcklwd %%mm2, %%mm0 \n\t"
88 "punpckhwd %%mm2, %%mm1 \n\t"
89 "movq %%mm0, (%0, %%esi) \n\t"
90 "movq %%mm1, 8(%0, %%esi) \n\t"
91 "addl $16, %%esi \n\t"
92 " jnz 1b \n\t"
93 "emms \n\t"
94 :: "r" (s16+512), "r" (f+256)
95 :"%esi", "memory"
97 return 2*256;
100 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
101 int32_t * f = (int32_t *) _f;
102 asm volatile(
103 "movl $-1024, %%esi \n\t"
104 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
105 "pxor %%mm6, %%mm6 \n\t"
106 "movq %%mm7, %%mm5 \n\t"
107 "punpckldq %%mm6, %%mm5 \n\t"
108 "1: \n\t"
109 "movd (%1, %%esi), %%mm0 \n\t"
110 "punpckldq 2048(%1, %%esi), %%mm0\n\t"
111 "movd 1024(%1, %%esi), %%mm1 \n\t"
112 "punpckldq 4(%1, %%esi), %%mm1 \n\t"
113 "movd 2052(%1, %%esi), %%mm2 \n\t"
114 "movq %%mm7, %%mm3 \n\t"
115 "punpckldq 1028(%1, %%esi), %%mm3\n\t"
116 "movd 8(%1, %%esi), %%mm4 \n\t"
117 "punpckldq 2056(%1, %%esi), %%mm4\n\t"
118 "leal (%%esi, %%esi, 4), %%edi \n\t"
119 "sarl $1, %%edi \n\t"
120 "psubd %%mm7, %%mm0 \n\t"
121 "psubd %%mm7, %%mm1 \n\t"
122 "psubd %%mm5, %%mm2 \n\t"
123 "psubd %%mm7, %%mm3 \n\t"
124 "psubd %%mm7, %%mm4 \n\t"
125 "packssdw %%mm6, %%mm0 \n\t"
126 "packssdw %%mm2, %%mm1 \n\t"
127 "packssdw %%mm4, %%mm3 \n\t"
128 "movq %%mm0, (%0, %%edi) \n\t"
129 "movq %%mm1, 8(%0, %%edi) \n\t"
130 "movq %%mm3, 16(%0, %%edi) \n\t"
132 "movd 1032(%1, %%esi), %%mm1 \n\t"
133 "punpckldq 12(%1, %%esi), %%mm1\n\t"
134 "movd 2060(%1, %%esi), %%mm2 \n\t"
135 "movq %%mm7, %%mm3 \n\t"
136 "punpckldq 1036(%1, %%esi), %%mm3\n\t"
137 "pxor %%mm0, %%mm0 \n\t"
138 "psubd %%mm7, %%mm1 \n\t"
139 "psubd %%mm5, %%mm2 \n\t"
140 "psubd %%mm7, %%mm3 \n\t"
141 "packssdw %%mm1, %%mm0 \n\t"
142 "packssdw %%mm3, %%mm2 \n\t"
143 "movq %%mm0, 24(%0, %%edi) \n\t"
144 "movq %%mm2, 32(%0, %%edi) \n\t"
146 "addl $16, %%esi \n\t"
147 " jnz 1b \n\t"
148 "emms \n\t"
149 :: "r" (s16+1280), "r" (f+256)
150 :"%esi", "%edi", "memory"
152 return 5*256;
155 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
156 int32_t * f = (int32_t *) _f;
157 asm volatile(
158 "movl $-1024, %%esi \n\t"
159 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
160 "1: \n\t"
161 "movq (%1, %%esi), %%mm0 \n\t"
162 "movq 8(%1, %%esi), %%mm1 \n\t"
163 "movq 1024(%1, %%esi), %%mm2 \n\t"
164 "movq 1032(%1, %%esi), %%mm3 \n\t"
165 "psubd %%mm7, %%mm0 \n\t"
166 "psubd %%mm7, %%mm1 \n\t"
167 "psubd %%mm7, %%mm2 \n\t"
168 "psubd %%mm7, %%mm3 \n\t"
169 "packssdw %%mm1, %%mm0 \n\t"
170 "packssdw %%mm3, %%mm2 \n\t"
171 "movq 2048(%1, %%esi), %%mm3 \n\t"
172 "movq 2056(%1, %%esi), %%mm4 \n\t"
173 "movq 3072(%1, %%esi), %%mm5 \n\t"
174 "movq 3080(%1, %%esi), %%mm6 \n\t"
175 "psubd %%mm7, %%mm3 \n\t"
176 "psubd %%mm7, %%mm4 \n\t"
177 "psubd %%mm7, %%mm5 \n\t"
178 "psubd %%mm7, %%mm6 \n\t"
179 "packssdw %%mm4, %%mm3 \n\t"
180 "packssdw %%mm6, %%mm5 \n\t"
181 "movq %%mm0, %%mm1 \n\t"
182 "movq %%mm3, %%mm4 \n\t"
183 "punpcklwd %%mm2, %%mm0 \n\t"
184 "punpckhwd %%mm2, %%mm1 \n\t"
185 "punpcklwd %%mm5, %%mm3 \n\t"
186 "punpckhwd %%mm5, %%mm4 \n\t"
187 "movq %%mm0, %%mm2 \n\t"
188 "movq %%mm1, %%mm5 \n\t"
189 "punpckldq %%mm3, %%mm0 \n\t"
190 "punpckhdq %%mm3, %%mm2 \n\t"
191 "punpckldq %%mm4, %%mm1 \n\t"
192 "punpckhdq %%mm4, %%mm5 \n\t"
193 "movq %%mm0, (%0, %%esi,2) \n\t"
194 "movq %%mm2, 8(%0, %%esi,2) \n\t"
195 "movq %%mm1, 16(%0, %%esi,2) \n\t"
196 "movq %%mm5, 24(%0, %%esi,2) \n\t"
197 "addl $16, %%esi \n\t"
198 " jnz 1b \n\t"
199 "emms \n\t"
200 :: "r" (s16+1024), "r" (f+256)
201 :"%esi", "memory"
203 return 4*256;
206 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
207 int32_t * f = (int32_t *) _f;
208 asm volatile(
209 "movl $-1024, %%esi \n\t"
210 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
211 "1: \n\t"
212 "movd (%1, %%esi), %%mm0 \n\t"
213 "punpckldq 2048(%1, %%esi), %%mm0\n\t"
214 "movd 3072(%1, %%esi), %%mm1 \n\t"
215 "punpckldq 4096(%1, %%esi), %%mm1\n\t"
216 "movd 1024(%1, %%esi), %%mm2 \n\t"
217 "punpckldq 4(%1, %%esi), %%mm2 \n\t"
218 "movd 2052(%1, %%esi), %%mm3 \n\t"
219 "punpckldq 3076(%1, %%esi), %%mm3\n\t"
220 "movd 4100(%1, %%esi), %%mm4 \n\t"
221 "punpckldq 1028(%1, %%esi), %%mm4\n\t"
222 "movd 8(%1, %%esi), %%mm5 \n\t"
223 "punpckldq 2056(%1, %%esi), %%mm5\n\t"
224 "leal (%%esi, %%esi, 4), %%edi \n\t"
225 "sarl $1, %%edi \n\t"
226 "psubd %%mm7, %%mm0 \n\t"
227 "psubd %%mm7, %%mm1 \n\t"
228 "psubd %%mm7, %%mm2 \n\t"
229 "psubd %%mm7, %%mm3 \n\t"
230 "psubd %%mm7, %%mm4 \n\t"
231 "psubd %%mm7, %%mm5 \n\t"
232 "packssdw %%mm1, %%mm0 \n\t"
233 "packssdw %%mm3, %%mm2 \n\t"
234 "packssdw %%mm5, %%mm4 \n\t"
235 "movq %%mm0, (%0, %%edi) \n\t"
236 "movq %%mm2, 8(%0, %%edi) \n\t"
237 "movq %%mm4, 16(%0, %%edi) \n\t"
239 "movd 3080(%1, %%esi), %%mm0 \n\t"
240 "punpckldq 4104(%1, %%esi), %%mm0\n\t"
241 "movd 1032(%1, %%esi), %%mm1 \n\t"
242 "punpckldq 12(%1, %%esi), %%mm1\n\t"
243 "movd 2060(%1, %%esi), %%mm2 \n\t"
244 "punpckldq 3084(%1, %%esi), %%mm2\n\t"
245 "movd 4108(%1, %%esi), %%mm3 \n\t"
246 "punpckldq 1036(%1, %%esi), %%mm3\n\t"
247 "psubd %%mm7, %%mm0 \n\t"
248 "psubd %%mm7, %%mm1 \n\t"
249 "psubd %%mm7, %%mm2 \n\t"
250 "psubd %%mm7, %%mm3 \n\t"
251 "packssdw %%mm1, %%mm0 \n\t"
252 "packssdw %%mm3, %%mm2 \n\t"
253 "movq %%mm0, 24(%0, %%edi) \n\t"
254 "movq %%mm2, 32(%0, %%edi) \n\t"
256 "addl $16, %%esi \n\t"
257 " jnz 1b \n\t"
258 "emms \n\t"
259 :: "r" (s16+1280), "r" (f+256)
260 :"%esi", "%edi", "memory"
262 return 5*256;
265 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
266 int32_t * f = (int32_t *) _f;
267 asm volatile(
268 "movl $-1024, %%esi \n\t"
269 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
270 "pxor %%mm6, %%mm6 \n\t"
271 "1: \n\t"
272 "movq 1024(%1, %%esi), %%mm0 \n\t"
273 "movq 1032(%1, %%esi), %%mm1 \n\t"
274 "movq (%1, %%esi), %%mm2 \n\t"
275 "movq 8(%1, %%esi), %%mm3 \n\t"
276 "psubd %%mm7, %%mm0 \n\t"
277 "psubd %%mm7, %%mm1 \n\t"
278 "psubd %%mm7, %%mm2 \n\t"
279 "psubd %%mm7, %%mm3 \n\t"
280 "packssdw %%mm1, %%mm0 \n\t"
281 "packssdw %%mm3, %%mm2 \n\t"
282 "movq %%mm0, %%mm1 \n\t"
283 "punpcklwd %%mm2, %%mm0 \n\t"
284 "punpckhwd %%mm2, %%mm1 \n\t"
285 "leal (%%esi, %%esi, 2), %%edi \n\t"
286 "movq %%mm6, (%0, %%edi) \n\t"
287 "movd %%mm0, 8(%0, %%edi) \n\t"
288 "punpckhdq %%mm0, %%mm0 \n\t"
289 "movq %%mm6, 12(%0, %%edi) \n\t"
290 "movd %%mm0, 20(%0, %%edi) \n\t"
291 "movq %%mm6, 24(%0, %%edi) \n\t"
292 "movd %%mm1, 32(%0, %%edi) \n\t"
293 "punpckhdq %%mm1, %%mm1 \n\t"
294 "movq %%mm6, 36(%0, %%edi) \n\t"
295 "movd %%mm1, 44(%0, %%edi) \n\t"
296 "addl $16, %%esi \n\t"
297 " jnz 1b \n\t"
298 "emms \n\t"
299 :: "r" (s16+1536), "r" (f+256)
300 :"%esi", "%edi", "memory"
302 return 6*256;
305 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
306 int32_t * f = (int32_t *) _f;
307 asm volatile(
308 "movl $-1024, %%esi \n\t"
309 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
310 "pxor %%mm6, %%mm6 \n\t"
311 "1: \n\t"
312 "movq 1024(%1, %%esi), %%mm0 \n\t"
313 "movq 2048(%1, %%esi), %%mm1 \n\t"
314 "movq (%1, %%esi), %%mm5 \n\t"
315 "psubd %%mm7, %%mm0 \n\t"
316 "psubd %%mm7, %%mm1 \n\t"
317 "psubd %%mm7, %%mm5 \n\t"
318 "leal (%%esi, %%esi, 2), %%edi \n\t"
320 "pxor %%mm4, %%mm4 \n\t"
321 "packssdw %%mm5, %%mm0 \n\t" // FfAa
322 "packssdw %%mm4, %%mm1 \n\t" // 00Bb
323 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
324 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
325 "movq %%mm0, %%mm1 \n\t" // BAba
326 "punpckldq %%mm4, %%mm3 \n\t" // f0XX
327 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
328 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
330 "movq %%mm0, (%0, %%edi) \n\t" // 00ba
331 "punpckhdq %%mm4, %%mm0 \n\t" // F000
332 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
333 "movq %%mm0, 16(%0, %%edi) \n\t" // F000
334 "addl $8, %%esi \n\t"
335 " jnz 1b \n\t"
336 "emms \n\t"
337 :: "r" (s16+1536), "r" (f+256)
338 :"%esi", "%edi", "memory"
340 return 6*256;
343 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
344 int32_t * f = (int32_t *) _f;
345 asm volatile(
346 "movl $-1024, %%esi \n\t"
347 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
348 "pxor %%mm6, %%mm6 \n\t"
349 "1: \n\t"
350 "movq 1024(%1, %%esi), %%mm0 \n\t"
351 "movq 3072(%1, %%esi), %%mm1 \n\t"
352 "movq 2048(%1, %%esi), %%mm4 \n\t"
353 "movq (%1, %%esi), %%mm5 \n\t"
354 "psubd %%mm7, %%mm0 \n\t"
355 "psubd %%mm7, %%mm1 \n\t"
356 "psubd %%mm7, %%mm4 \n\t"
357 "psubd %%mm7, %%mm5 \n\t"
358 "leal (%%esi, %%esi, 2), %%edi \n\t"
360 "packssdw %%mm4, %%mm0 \n\t" // EeAa
361 "packssdw %%mm5, %%mm1 \n\t" // FfBb
362 "movq %%mm0, %%mm2 \n\t" // EeAa
363 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
364 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe
365 "movq %%mm0, %%mm1 \n\t" // BAba
366 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
367 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
369 "movq %%mm0, (%0, %%edi) \n\t"
370 "punpckhdq %%mm2, %%mm0 \n\t" // FE00
371 "punpckldq %%mm1, %%mm2 \n\t" // BAfe
372 "movq %%mm2, 8(%0, %%edi) \n\t"
373 "movq %%mm0, 16(%0, %%edi) \n\t"
374 "addl $8, %%esi \n\t"
375 " jnz 1b \n\t"
376 "emms \n\t"
377 :: "r" (s16+1536), "r" (f+256)
378 :"%esi", "%edi", "memory"
380 return 6*256;
383 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
384 int32_t * f = (int32_t *) _f;
385 asm volatile(
386 "movl $-1024, %%esi \n\t"
387 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
388 // "pxor %%mm6, %%mm6 \n\t"
389 "1: \n\t"
390 "movq 1024(%1, %%esi), %%mm0 \n\t"
391 "movq 2048(%1, %%esi), %%mm1 \n\t"
392 "movq 3072(%1, %%esi), %%mm2 \n\t"
393 "movq 4096(%1, %%esi), %%mm3 \n\t"
394 "movq (%1, %%esi), %%mm5 \n\t"
395 "psubd %%mm7, %%mm0 \n\t"
396 "psubd %%mm7, %%mm1 \n\t"
397 "psubd %%mm7, %%mm2 \n\t"
398 "psubd %%mm7, %%mm3 \n\t"
399 "psubd %%mm7, %%mm5 \n\t"
400 "leal (%%esi, %%esi, 2), %%edi \n\t"
402 "packssdw %%mm2, %%mm0 \n\t" // CcAa
403 "packssdw %%mm3, %%mm1 \n\t" // DdBb
404 "packssdw %%mm5, %%mm5 \n\t" // FfFf
405 "movq %%mm0, %%mm2 \n\t" // CcAa
406 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
407 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
408 "pxor %%mm4, %%mm4 \n\t" // 0000
409 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0
410 "movq %%mm0, %%mm1 \n\t" // BAba
411 "movq %%mm4, %%mm3 \n\t" // F0f0
412 "punpckldq %%mm2, %%mm0 \n\t" // dcba
413 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
414 "punpckldq %%mm1, %%mm4 \n\t" // BAf0
415 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC
417 "movq %%mm0, (%0, %%edi) \n\t"
418 "movq %%mm4, 8(%0, %%edi) \n\t"
419 "movq %%mm2, 16(%0, %%edi) \n\t"
420 "addl $8, %%esi \n\t"
421 " jnz 1b \n\t"
422 "emms \n\t"
423 :: "r" (s16+1536), "r" (f+256)
424 :"%esi", "%edi", "memory"
426 return 6*256;
429 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
430 int32_t * f = (int32_t *) _f;
431 asm volatile(
432 "movl $-1024, %%esi \n\t"
433 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
434 // "pxor %%mm6, %%mm6 \n\t"
435 "1: \n\t"
436 "movq 1024(%1, %%esi), %%mm0 \n\t"
437 "movq 3072(%1, %%esi), %%mm1 \n\t"
438 "movq 4096(%1, %%esi), %%mm2 \n\t"
439 "movq 5120(%1, %%esi), %%mm3 \n\t"
440 "movq 2048(%1, %%esi), %%mm4 \n\t"
441 "movq (%1, %%esi), %%mm5 \n\t"
442 "psubd %%mm7, %%mm0 \n\t"
443 "psubd %%mm7, %%mm1 \n\t"
444 "psubd %%mm7, %%mm2 \n\t"
445 "psubd %%mm7, %%mm3 \n\t"
446 "psubd %%mm7, %%mm4 \n\t"
447 "psubd %%mm7, %%mm5 \n\t"
448 "leal (%%esi, %%esi, 2), %%edi \n\t"
450 "packssdw %%mm2, %%mm0 \n\t" // CcAa
451 "packssdw %%mm3, %%mm1 \n\t" // DdBb
452 "packssdw %%mm4, %%mm4 \n\t" // EeEe
453 "packssdw %%mm5, %%mm5 \n\t" // FfFf
454 "movq %%mm0, %%mm2 \n\t" // CcAa
455 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
456 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
457 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
458 "movq %%mm0, %%mm1 \n\t" // BAba
459 "movq %%mm4, %%mm3 \n\t" // FEfe
460 "punpckldq %%mm2, %%mm0 \n\t" // dcba
461 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
462 "punpckldq %%mm1, %%mm4 \n\t" // BAfe
463 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
465 "movq %%mm0, (%0, %%edi) \n\t"
466 "movq %%mm4, 8(%0, %%edi) \n\t"
467 "movq %%mm2, 16(%0, %%edi) \n\t"
468 "addl $8, %%esi \n\t"
469 " jnz 1b \n\t"
470 "emms \n\t"
471 :: "r" (s16+1536), "r" (f+256)
472 :"%esi", "%edi", "memory"
474 return 6*256;
478 static void* a52_resample_MMX(int flags, int ch){
479 switch (flags) {
480 case A52_MONO:
481 if(ch==5) return a52_resample_MONO_to_5_MMX;
482 break;
483 case A52_CHANNEL:
484 case A52_STEREO:
485 case A52_DOLBY:
486 if(ch==2) return a52_resample_STEREO_to_2_MMX;
487 break;
488 case A52_3F:
489 if(ch==5) return a52_resample_3F_to_5_MMX;
490 break;
491 case A52_2F2R:
492 if(ch==4) return a52_resample_2F_2R_to_4_MMX;
493 break;
494 case A52_3F2R:
495 if(ch==5) return a52_resample_3F_2R_to_5_MMX;
496 break;
497 case A52_MONO | A52_LFE:
498 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
499 break;
500 case A52_CHANNEL | A52_LFE:
501 case A52_STEREO | A52_LFE:
502 case A52_DOLBY | A52_LFE:
503 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
504 break;
505 case A52_3F | A52_LFE:
506 if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
507 break;
508 case A52_2F2R | A52_LFE:
509 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
510 break;
511 case A52_3F2R | A52_LFE:
512 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
513 break;
515 return NULL;