Add explanatory comments to the #endif part of multiple inclusion guards.
[mplayer/greg.git] / liba52 / resample_mmx.c
bloba08a7a8b0c81521059fd47077f2828697213f7b5
1 /*
2 * resample_mmx.c
3 * Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
5 * This file is part of a52dec, a free ATSC A-52 stream decoder.
6 * See http://liba52.sourceforge.net/ for updates.
8 * File added for use with MPlayer and not part of original a52dec.
10 * a52dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * a52dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 /* optimization TODO / NOTES
26 * movntq is slightly faster (0.5% with the current test.c benchmark)
27 * (but that is just test.c so that needs to be tested in reality)
28 * and it would mean (C / MMX2 / MMX / 3DNOW) versions.
31 #include "a52_internal.h"
34 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
35 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
36 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
37 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
39 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
40 int32_t * f = (int32_t *) _f;
41 asm volatile(
42 "mov $-512, %%"REG_S" \n\t"
43 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
44 "movq "MANGLE(wm1100)", %%mm3 \n\t"
45 "movq "MANGLE(wm0101)", %%mm4 \n\t"
46 "movq "MANGLE(wm1010)", %%mm5 \n\t"
47 "pxor %%mm6, %%mm6 \n\t"
48 "1: \n\t"
49 "movq (%1, %%"REG_S", 2), %%mm0 \n\t"
50 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t"
51 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
52 "psubd %%mm7, %%mm0 \n\t"
53 "psubd %%mm7, %%mm1 \n\t"
54 "packssdw %%mm1, %%mm0 \n\t"
55 "movq %%mm0, %%mm1 \n\t"
56 "pand %%mm4, %%mm0 \n\t"
57 "pand %%mm5, %%mm1 \n\t"
58 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0
59 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0
60 "pand %%mm3, %%mm0 \n\t"
61 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0
62 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B
63 "pand %%mm3, %%mm1 \n\t"
64 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0
65 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0
66 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B
67 "add $8, %%"REG_S" \n\t"
68 " jnz 1b \n\t"
69 "emms \n\t"
70 :: "r" (s16+1280), "r" (f+256)
71 :"%"REG_S, "%"REG_D, "memory"
73 return 5*256;
76 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
77 int32_t * f = (int32_t *) _f;
78 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
79 #ifdef HAVE_SSE
80 asm volatile(
81 "mov $-1024, %%"REG_S" \n\t"
82 "1: \n\t"
83 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
84 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t"
85 "movq %%mm0, %%mm1 \n\t"
86 "punpcklwd %%mm2, %%mm0 \n\t"
87 "punpckhwd %%mm2, %%mm1 \n\t"
88 "movq %%mm0, (%0, %%"REG_S") \n\t"
89 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
90 "add $16, %%"REG_S" \n\t"
91 " jnz 1b \n\t"
92 "emms \n\t"
93 :: "r" (s16+512), "r" (f+256)
94 :"%"REG_S, "memory"
95 );*/
96 asm volatile(
97 "mov $-1024, %%"REG_S" \n\t"
98 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
99 "1: \n\t"
100 "movq (%1, %%"REG_S"), %%mm0 \n\t"
101 "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
102 "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
103 "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
104 "psubd %%mm7, %%mm0 \n\t"
105 "psubd %%mm7, %%mm1 \n\t"
106 "psubd %%mm7, %%mm2 \n\t"
107 "psubd %%mm7, %%mm3 \n\t"
108 "packssdw %%mm1, %%mm0 \n\t"
109 "packssdw %%mm3, %%mm2 \n\t"
110 "movq %%mm0, %%mm1 \n\t"
111 "punpcklwd %%mm2, %%mm0 \n\t"
112 "punpckhwd %%mm2, %%mm1 \n\t"
113 "movq %%mm0, (%0, %%"REG_S") \n\t"
114 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
115 "add $16, %%"REG_S" \n\t"
116 " jnz 1b \n\t"
117 "emms \n\t"
118 :: "r" (s16+512), "r" (f+256)
119 :"%"REG_S, "memory"
121 return 2*256;
124 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
125 int32_t * f = (int32_t *) _f;
126 asm volatile(
127 "mov $-1024, %%"REG_S" \n\t"
128 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
129 "pxor %%mm6, %%mm6 \n\t"
130 "movq %%mm7, %%mm5 \n\t"
131 "punpckldq %%mm6, %%mm5 \n\t"
132 "1: \n\t"
133 "movd (%1, %%"REG_S"), %%mm0 \n\t"
134 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
135 "movd 1024(%1, %%"REG_S"), %%mm1\n\t"
136 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t"
137 "movd 2052(%1, %%"REG_S"), %%mm2\n\t"
138 "movq %%mm7, %%mm3 \n\t"
139 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t"
140 "movd 8(%1, %%"REG_S"), %%mm4 \n\t"
141 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t"
142 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
143 "sar $1, %%"REG_D" \n\t"
144 "psubd %%mm7, %%mm0 \n\t"
145 "psubd %%mm7, %%mm1 \n\t"
146 "psubd %%mm5, %%mm2 \n\t"
147 "psubd %%mm7, %%mm3 \n\t"
148 "psubd %%mm7, %%mm4 \n\t"
149 "packssdw %%mm6, %%mm0 \n\t"
150 "packssdw %%mm2, %%mm1 \n\t"
151 "packssdw %%mm4, %%mm3 \n\t"
152 "movq %%mm0, (%0, %%"REG_D") \n\t"
153 "movq %%mm1, 8(%0, %%"REG_D") \n\t"
154 "movq %%mm3, 16(%0, %%"REG_D") \n\t"
155 "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
156 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
157 "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
158 "movq %%mm7, %%mm3 \n\t"
159 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
160 "pxor %%mm0, %%mm0 \n\t"
161 "psubd %%mm7, %%mm1 \n\t"
162 "psubd %%mm5, %%mm2 \n\t"
163 "psubd %%mm7, %%mm3 \n\t"
164 "packssdw %%mm1, %%mm0 \n\t"
165 "packssdw %%mm3, %%mm2 \n\t"
166 "movq %%mm0, 24(%0, %%"REG_D") \n\t"
167 "movq %%mm2, 32(%0, %%"REG_D") \n\t"
169 "add $16, %%"REG_S" \n\t"
170 " jnz 1b \n\t"
171 "emms \n\t"
172 :: "r" (s16+1280), "r" (f+256)
173 :"%"REG_S, "%"REG_D, "memory"
175 return 5*256;
178 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
179 int32_t * f = (int32_t *) _f;
180 asm volatile(
181 "mov $-1024, %%"REG_S" \n\t"
182 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
183 "1: \n\t"
184 "movq (%1, %%"REG_S"), %%mm0 \n\t"
185 "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
186 "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
187 "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
188 "psubd %%mm7, %%mm0 \n\t"
189 "psubd %%mm7, %%mm1 \n\t"
190 "psubd %%mm7, %%mm2 \n\t"
191 "psubd %%mm7, %%mm3 \n\t"
192 "packssdw %%mm1, %%mm0 \n\t"
193 "packssdw %%mm3, %%mm2 \n\t"
194 "movq 2048(%1, %%"REG_S"), %%mm3\n\t"
195 "movq 2056(%1, %%"REG_S"), %%mm4\n\t"
196 "movq 3072(%1, %%"REG_S"), %%mm5\n\t"
197 "movq 3080(%1, %%"REG_S"), %%mm6\n\t"
198 "psubd %%mm7, %%mm3 \n\t"
199 "psubd %%mm7, %%mm4 \n\t"
200 "psubd %%mm7, %%mm5 \n\t"
201 "psubd %%mm7, %%mm6 \n\t"
202 "packssdw %%mm4, %%mm3 \n\t"
203 "packssdw %%mm6, %%mm5 \n\t"
204 "movq %%mm0, %%mm1 \n\t"
205 "movq %%mm3, %%mm4 \n\t"
206 "punpcklwd %%mm2, %%mm0 \n\t"
207 "punpckhwd %%mm2, %%mm1 \n\t"
208 "punpcklwd %%mm5, %%mm3 \n\t"
209 "punpckhwd %%mm5, %%mm4 \n\t"
210 "movq %%mm0, %%mm2 \n\t"
211 "movq %%mm1, %%mm5 \n\t"
212 "punpckldq %%mm3, %%mm0 \n\t"
213 "punpckhdq %%mm3, %%mm2 \n\t"
214 "punpckldq %%mm4, %%mm1 \n\t"
215 "punpckhdq %%mm4, %%mm5 \n\t"
216 "movq %%mm0, (%0, %%"REG_S",2) \n\t"
217 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t"
218 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t"
219 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t"
220 "add $16, %%"REG_S" \n\t"
221 " jnz 1b \n\t"
222 "emms \n\t"
223 :: "r" (s16+1024), "r" (f+256)
224 :"%"REG_S, "memory"
226 return 4*256;
229 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
230 int32_t * f = (int32_t *) _f;
231 asm volatile(
232 "mov $-1024, %%"REG_S" \n\t"
233 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
234 "1: \n\t"
235 "movd (%1, %%"REG_S"), %%mm0 \n\t"
236 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
237 "movd 3072(%1, %%"REG_S"), %%mm1\n\t"
238 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t"
239 "movd 1024(%1, %%"REG_S"), %%mm2\n\t"
240 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t"
241 "movd 2052(%1, %%"REG_S"), %%mm3\n\t"
242 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t"
243 "movd 4100(%1, %%"REG_S"), %%mm4\n\t"
244 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t"
245 "movd 8(%1, %%"REG_S"), %%mm5 \n\t"
246 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t"
247 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
248 "sar $1, %%"REG_D" \n\t"
249 "psubd %%mm7, %%mm0 \n\t"
250 "psubd %%mm7, %%mm1 \n\t"
251 "psubd %%mm7, %%mm2 \n\t"
252 "psubd %%mm7, %%mm3 \n\t"
253 "psubd %%mm7, %%mm4 \n\t"
254 "psubd %%mm7, %%mm5 \n\t"
255 "packssdw %%mm1, %%mm0 \n\t"
256 "packssdw %%mm3, %%mm2 \n\t"
257 "packssdw %%mm5, %%mm4 \n\t"
258 "movq %%mm0, (%0, %%"REG_D") \n\t"
259 "movq %%mm2, 8(%0, %%"REG_D") \n\t"
260 "movq %%mm4, 16(%0, %%"REG_D") \n\t"
262 "movd 3080(%1, %%"REG_S"), %%mm0\n\t"
263 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t"
264 "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
265 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
266 "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
267 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t"
268 "movd 4108(%1, %%"REG_S"), %%mm3\n\t"
269 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
270 "psubd %%mm7, %%mm0 \n\t"
271 "psubd %%mm7, %%mm1 \n\t"
272 "psubd %%mm7, %%mm2 \n\t"
273 "psubd %%mm7, %%mm3 \n\t"
274 "packssdw %%mm1, %%mm0 \n\t"
275 "packssdw %%mm3, %%mm2 \n\t"
276 "movq %%mm0, 24(%0, %%"REG_D") \n\t"
277 "movq %%mm2, 32(%0, %%"REG_D") \n\t"
279 "add $16, %%"REG_S" \n\t"
280 " jnz 1b \n\t"
281 "emms \n\t"
282 :: "r" (s16+1280), "r" (f+256)
283 :"%"REG_S, "%"REG_D, "memory"
285 return 5*256;
288 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
289 int32_t * f = (int32_t *) _f;
290 asm volatile(
291 "mov $-1024, %%"REG_S" \n\t"
292 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
293 "pxor %%mm6, %%mm6 \n\t"
294 "1: \n\t"
295 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
296 "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
297 "movq (%1, %%"REG_S"), %%mm2 \n\t"
298 "movq 8(%1, %%"REG_S"), %%mm3 \n\t"
299 "psubd %%mm7, %%mm0 \n\t"
300 "psubd %%mm7, %%mm1 \n\t"
301 "psubd %%mm7, %%mm2 \n\t"
302 "psubd %%mm7, %%mm3 \n\t"
303 "packssdw %%mm1, %%mm0 \n\t"
304 "packssdw %%mm3, %%mm2 \n\t"
305 "movq %%mm0, %%mm1 \n\t"
306 "punpcklwd %%mm2, %%mm0 \n\t"
307 "punpckhwd %%mm2, %%mm1 \n\t"
308 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
309 "movq %%mm6, (%0, %%"REG_D") \n\t"
310 "movd %%mm0, 8(%0, %%"REG_D") \n\t"
311 "punpckhdq %%mm0, %%mm0 \n\t"
312 "movq %%mm6, 12(%0, %%"REG_D") \n\t"
313 "movd %%mm0, 20(%0, %%"REG_D") \n\t"
314 "movq %%mm6, 24(%0, %%"REG_D") \n\t"
315 "movd %%mm1, 32(%0, %%"REG_D") \n\t"
316 "punpckhdq %%mm1, %%mm1 \n\t"
317 "movq %%mm6, 36(%0, %%"REG_D") \n\t"
318 "movd %%mm1, 44(%0, %%"REG_D") \n\t"
319 "add $16, %%"REG_S" \n\t"
320 " jnz 1b \n\t"
321 "emms \n\t"
322 :: "r" (s16+1536), "r" (f+256)
323 :"%"REG_S, "%"REG_D, "memory"
325 return 6*256;
328 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
329 int32_t * f = (int32_t *) _f;
330 asm volatile(
331 "mov $-1024, %%"REG_S" \n\t"
332 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
333 "pxor %%mm6, %%mm6 \n\t"
334 "1: \n\t"
335 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
336 "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
337 "movq (%1, %%"REG_S"), %%mm5 \n\t"
338 "psubd %%mm7, %%mm0 \n\t"
339 "psubd %%mm7, %%mm1 \n\t"
340 "psubd %%mm7, %%mm5 \n\t"
341 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
343 "pxor %%mm4, %%mm4 \n\t"
344 "packssdw %%mm5, %%mm0 \n\t" // FfAa
345 "packssdw %%mm4, %%mm1 \n\t" // 00Bb
346 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
347 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
348 "movq %%mm0, %%mm1 \n\t" // BAba
349 "punpckldq %%mm4, %%mm3 \n\t" // f0XX
350 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
351 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
353 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba
354 "punpckhdq %%mm4, %%mm0 \n\t" // F000
355 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0
356 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000
357 "add $8, %%"REG_S" \n\t"
358 " jnz 1b \n\t"
359 "emms \n\t"
360 :: "r" (s16+1536), "r" (f+256)
361 :"%"REG_S, "%"REG_D, "memory"
363 return 6*256;
366 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
367 int32_t * f = (int32_t *) _f;
368 asm volatile(
369 "mov $-1024, %%"REG_S" \n\t"
370 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
371 "pxor %%mm6, %%mm6 \n\t"
372 "1: \n\t"
373 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
374 "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
375 "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
376 "movq (%1, %%"REG_S"), %%mm5 \n\t"
377 "psubd %%mm7, %%mm0 \n\t"
378 "psubd %%mm7, %%mm1 \n\t"
379 "psubd %%mm7, %%mm4 \n\t"
380 "psubd %%mm7, %%mm5 \n\t"
381 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
383 "packssdw %%mm4, %%mm0 \n\t" // EeAa
384 "packssdw %%mm5, %%mm1 \n\t" // FfBb
385 "movq %%mm0, %%mm2 \n\t" // EeAa
386 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
387 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe
388 "movq %%mm0, %%mm1 \n\t" // BAba
389 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
390 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
392 "movq %%mm0, (%0, %%"REG_D") \n\t"
393 "punpckhdq %%mm2, %%mm0 \n\t" // FE00
394 "punpckldq %%mm1, %%mm2 \n\t" // BAfe
395 "movq %%mm2, 8(%0, %%"REG_D") \n\t"
396 "movq %%mm0, 16(%0, %%"REG_D") \n\t"
397 "add $8, %%"REG_S" \n\t"
398 " jnz 1b \n\t"
399 "emms \n\t"
400 :: "r" (s16+1536), "r" (f+256)
401 :"%"REG_S, "%"REG_D, "memory"
403 return 6*256;
406 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
407 int32_t * f = (int32_t *) _f;
408 asm volatile(
409 "mov $-1024, %%"REG_S" \n\t"
410 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
411 // "pxor %%mm6, %%mm6 \n\t"
412 "1: \n\t"
413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
414 "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
415 "movq 3072(%1, %%"REG_S"), %%mm2\n\t"
416 "movq 4096(%1, %%"REG_S"), %%mm3\n\t"
417 "movq (%1, %%"REG_S"), %%mm5 \n\t"
418 "psubd %%mm7, %%mm0 \n\t"
419 "psubd %%mm7, %%mm1 \n\t"
420 "psubd %%mm7, %%mm2 \n\t"
421 "psubd %%mm7, %%mm3 \n\t"
422 "psubd %%mm7, %%mm5 \n\t"
423 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
425 "packssdw %%mm2, %%mm0 \n\t" // CcAa
426 "packssdw %%mm3, %%mm1 \n\t" // DdBb
427 "packssdw %%mm5, %%mm5 \n\t" // FfFf
428 "movq %%mm0, %%mm2 \n\t" // CcAa
429 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
430 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
431 "pxor %%mm4, %%mm4 \n\t" // 0000
432 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0
433 "movq %%mm0, %%mm1 \n\t" // BAba
434 "movq %%mm4, %%mm3 \n\t" // F0f0
435 "punpckldq %%mm2, %%mm0 \n\t" // dcba
436 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
437 "punpckldq %%mm1, %%mm4 \n\t" // BAf0
438 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC
440 "movq %%mm0, (%0, %%"REG_D") \n\t"
441 "movq %%mm4, 8(%0, %%"REG_D") \n\t"
442 "movq %%mm2, 16(%0, %%"REG_D") \n\t"
443 "add $8, %%"REG_S" \n\t"
444 " jnz 1b \n\t"
445 "emms \n\t"
446 :: "r" (s16+1536), "r" (f+256)
447 :"%"REG_S, "%"REG_D, "memory"
449 return 6*256;
452 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
453 int32_t * f = (int32_t *) _f;
454 asm volatile(
455 "mov $-1024, %%"REG_S" \n\t"
456 "movq "MANGLE(magicF2W)", %%mm7 \n\t"
457 // "pxor %%mm6, %%mm6 \n\t"
458 "1: \n\t"
459 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
460 "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
461 "movq 4096(%1, %%"REG_S"), %%mm2\n\t"
462 "movq 5120(%1, %%"REG_S"), %%mm3\n\t"
463 "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
464 "movq (%1, %%"REG_S"), %%mm5 \n\t"
465 "psubd %%mm7, %%mm0 \n\t"
466 "psubd %%mm7, %%mm1 \n\t"
467 "psubd %%mm7, %%mm2 \n\t"
468 "psubd %%mm7, %%mm3 \n\t"
469 "psubd %%mm7, %%mm4 \n\t"
470 "psubd %%mm7, %%mm5 \n\t"
471 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
473 "packssdw %%mm2, %%mm0 \n\t" // CcAa
474 "packssdw %%mm3, %%mm1 \n\t" // DdBb
475 "packssdw %%mm4, %%mm4 \n\t" // EeEe
476 "packssdw %%mm5, %%mm5 \n\t" // FfFf
477 "movq %%mm0, %%mm2 \n\t" // CcAa
478 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
479 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
480 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
481 "movq %%mm0, %%mm1 \n\t" // BAba
482 "movq %%mm4, %%mm3 \n\t" // FEfe
483 "punpckldq %%mm2, %%mm0 \n\t" // dcba
484 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
485 "punpckldq %%mm1, %%mm4 \n\t" // BAfe
486 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
488 "movq %%mm0, (%0, %%"REG_D") \n\t"
489 "movq %%mm4, 8(%0, %%"REG_D") \n\t"
490 "movq %%mm2, 16(%0, %%"REG_D") \n\t"
491 "add $8, %%"REG_S" \n\t"
492 " jnz 1b \n\t"
493 "emms \n\t"
494 :: "r" (s16+1536), "r" (f+256)
495 :"%"REG_S, "%"REG_D, "memory"
497 return 6*256;
501 static void* a52_resample_MMX(int flags, int ch){
502 switch (flags) {
503 case A52_MONO:
504 if(ch==5) return a52_resample_MONO_to_5_MMX;
505 break;
506 case A52_CHANNEL:
507 case A52_STEREO:
508 case A52_DOLBY:
509 if(ch==2) return a52_resample_STEREO_to_2_MMX;
510 break;
511 case A52_3F:
512 if(ch==5) return a52_resample_3F_to_5_MMX;
513 break;
514 case A52_2F2R:
515 if(ch==4) return a52_resample_2F_2R_to_4_MMX;
516 break;
517 case A52_3F2R:
518 if(ch==5) return a52_resample_3F_2R_to_5_MMX;
519 break;
520 case A52_MONO | A52_LFE:
521 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
522 break;
523 case A52_CHANNEL | A52_LFE:
524 case A52_STEREO | A52_LFE:
525 case A52_DOLBY | A52_LFE:
526 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
527 break;
528 case A52_3F | A52_LFE:
529 if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
530 break;
531 case A52_2F2R | A52_LFE:
532 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
533 break;
534 case A52_3F2R | A52_LFE:
535 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
536 break;
538 return NULL;