typo fix
[mplayer/greg.git] / liba52 / downmix.c
blob91c21a2ef7f31546962a046e6acb88929b8af51e
1 /*
2 * downmix.c
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
7 * See http://liba52.sourceforge.net/ for updates.
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff.
10 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
11 * $Id$
13 * a52dec is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * a52dec is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
30 #include "config.h"
32 #include <string.h>
33 #include <inttypes.h>
35 #include "a52.h"
36 #include "a52_internal.h"
37 #include "mm_accel.h"
39 #define CONVERT(acmod,output) (((output) << 3) + (acmod))
42 void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias,
43 sample_t clev, sample_t slev)= NULL;
44 void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL;
46 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
47 sample_t clev, sample_t slev);
48 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
49 sample_t clev, sample_t slev);
50 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
51 sample_t clev, sample_t slev);
52 static void upmix_MMX (sample_t * samples, int acmod, int output);
53 static void upmix_C (sample_t * samples, int acmod, int output);
55 void downmix_accel_init(uint32_t mm_accel)
57 a52_upmix= upmix_C;
58 a52_downmix= downmix_C;
59 #if defined(ARCH_X86) || defined(ARCH_X86_64)
60 if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
61 if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
62 if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
63 #endif
66 int a52_downmix_init (int input, int flags, sample_t * level,
67 sample_t clev, sample_t slev)
69 static uint8_t table[11][8] = {
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
72 {A52_MONO, A52_MONO, A52_MONO, A52_MONO,
73 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
75 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
77 A52_STEREO, A52_3F, A52_STEREO, A52_3F},
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
79 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
81 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
82 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
83 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
84 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
85 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
86 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
87 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
88 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
89 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
90 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
91 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
93 int output;
95 output = flags & A52_CHANNEL_MASK;
96 if (output > A52_DOLBY)
97 return -1;
99 output = table[output][input & 7];
101 if ((output == A52_STEREO) &&
102 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB))))
103 output = A52_DOLBY;
105 if (flags & A52_ADJUST_LEVEL)
106 switch (CONVERT (input & 7, output)) {
108 case CONVERT (A52_3F, A52_MONO):
109 *level *= LEVEL_3DB / (1 + clev);
110 break;
112 case CONVERT (A52_STEREO, A52_MONO):
113 case CONVERT (A52_2F2R, A52_2F1R):
114 case CONVERT (A52_3F2R, A52_3F1R):
115 level_3db:
116 *level *= LEVEL_3DB;
117 break;
119 case CONVERT (A52_3F2R, A52_2F1R):
120 if (clev < LEVEL_PLUS3DB - 1)
121 goto level_3db;
122 /* break thru */
123 case CONVERT (A52_3F, A52_STEREO):
124 case CONVERT (A52_3F1R, A52_2F1R):
125 case CONVERT (A52_3F1R, A52_2F2R):
126 case CONVERT (A52_3F2R, A52_2F2R):
127 *level /= 1 + clev;
128 break;
130 case CONVERT (A52_2F1R, A52_MONO):
131 *level *= LEVEL_PLUS3DB / (2 + slev);
132 break;
134 case CONVERT (A52_2F1R, A52_STEREO):
135 case CONVERT (A52_3F1R, A52_3F):
136 *level /= 1 + slev * LEVEL_3DB;
137 break;
139 case CONVERT (A52_3F1R, A52_MONO):
140 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev);
141 break;
143 case CONVERT (A52_3F1R, A52_STEREO):
144 *level /= 1 + clev + slev * LEVEL_3DB;
145 break;
147 case CONVERT (A52_2F2R, A52_MONO):
148 *level *= LEVEL_3DB / (1 + slev);
149 break;
151 case CONVERT (A52_2F2R, A52_STEREO):
152 case CONVERT (A52_3F2R, A52_3F):
153 *level /= 1 + slev;
154 break;
156 case CONVERT (A52_3F2R, A52_MONO):
157 *level *= LEVEL_3DB / (1 + clev + slev);
158 break;
160 case CONVERT (A52_3F2R, A52_STEREO):
161 *level /= 1 + clev + slev;
162 break;
164 case CONVERT (A52_MONO, A52_DOLBY):
165 *level *= LEVEL_PLUS3DB;
166 break;
168 case CONVERT (A52_3F, A52_DOLBY):
169 case CONVERT (A52_2F1R, A52_DOLBY):
170 *level *= 1 / (1 + LEVEL_3DB);
171 break;
173 case CONVERT (A52_3F1R, A52_DOLBY):
174 case CONVERT (A52_2F2R, A52_DOLBY):
175 *level *= 1 / (1 + 2 * LEVEL_3DB);
176 break;
178 case CONVERT (A52_3F2R, A52_DOLBY):
179 *level *= 1 / (1 + 3 * LEVEL_3DB);
180 break;
183 return output;
186 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
187 sample_t clev, sample_t slev)
189 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
191 case CONVERT (A52_CHANNEL, A52_CHANNEL):
192 case CONVERT (A52_MONO, A52_MONO):
193 case CONVERT (A52_STEREO, A52_STEREO):
194 case CONVERT (A52_3F, A52_3F):
195 case CONVERT (A52_2F1R, A52_2F1R):
196 case CONVERT (A52_3F1R, A52_3F1R):
197 case CONVERT (A52_2F2R, A52_2F2R):
198 case CONVERT (A52_3F2R, A52_3F2R):
199 case CONVERT (A52_STEREO, A52_DOLBY):
200 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
201 return 0;
203 case CONVERT (A52_CHANNEL, A52_MONO):
204 coeff[0] = coeff[1] = level * LEVEL_6DB;
205 return 3;
207 case CONVERT (A52_STEREO, A52_MONO):
208 coeff[0] = coeff[1] = level * LEVEL_3DB;
209 return 3;
211 case CONVERT (A52_3F, A52_MONO):
212 coeff[0] = coeff[2] = level * LEVEL_3DB;
213 coeff[1] = level * clev * LEVEL_PLUS3DB;
214 return 7;
216 case CONVERT (A52_2F1R, A52_MONO):
217 coeff[0] = coeff[1] = level * LEVEL_3DB;
218 coeff[2] = level * slev * LEVEL_3DB;
219 return 7;
221 case CONVERT (A52_2F2R, A52_MONO):
222 coeff[0] = coeff[1] = level * LEVEL_3DB;
223 coeff[2] = coeff[3] = level * slev * LEVEL_3DB;
224 return 15;
226 case CONVERT (A52_3F1R, A52_MONO):
227 coeff[0] = coeff[2] = level * LEVEL_3DB;
228 coeff[1] = level * clev * LEVEL_PLUS3DB;
229 coeff[3] = level * slev * LEVEL_3DB;
230 return 15;
232 case CONVERT (A52_3F2R, A52_MONO):
233 coeff[0] = coeff[2] = level * LEVEL_3DB;
234 coeff[1] = level * clev * LEVEL_PLUS3DB;
235 coeff[3] = coeff[4] = level * slev * LEVEL_3DB;
236 return 31;
238 case CONVERT (A52_MONO, A52_DOLBY):
239 coeff[0] = level * LEVEL_3DB;
240 return 0;
242 case CONVERT (A52_3F, A52_DOLBY):
243 clev = LEVEL_3DB;
244 case CONVERT (A52_3F, A52_STEREO):
245 case CONVERT (A52_3F1R, A52_2F1R):
246 case CONVERT (A52_3F2R, A52_2F2R):
247 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
248 coeff[1] = level * clev;
249 return 7;
251 case CONVERT (A52_2F1R, A52_DOLBY):
252 slev = 1;
253 case CONVERT (A52_2F1R, A52_STEREO):
254 coeff[0] = coeff[1] = level;
255 coeff[2] = level * slev * LEVEL_3DB;
256 return 7;
258 case CONVERT (A52_3F1R, A52_DOLBY):
259 clev = LEVEL_3DB;
260 slev = 1;
261 case CONVERT (A52_3F1R, A52_STEREO):
262 coeff[0] = coeff[2] = level;
263 coeff[1] = level * clev;
264 coeff[3] = level * slev * LEVEL_3DB;
265 return 15;
267 case CONVERT (A52_2F2R, A52_DOLBY):
268 slev = LEVEL_3DB;
269 case CONVERT (A52_2F2R, A52_STEREO):
270 coeff[0] = coeff[1] = level;
271 coeff[2] = coeff[3] = level * slev;
272 return 15;
274 case CONVERT (A52_3F2R, A52_DOLBY):
275 clev = LEVEL_3DB;
276 case CONVERT (A52_3F2R, A52_2F1R):
277 slev = LEVEL_3DB;
278 case CONVERT (A52_3F2R, A52_STEREO):
279 coeff[0] = coeff[2] = level;
280 coeff[1] = level * clev;
281 coeff[3] = coeff[4] = level * slev;
282 return 31;
284 case CONVERT (A52_3F1R, A52_3F):
285 coeff[0] = coeff[1] = coeff[2] = level;
286 coeff[3] = level * slev * LEVEL_3DB;
287 return 13;
289 case CONVERT (A52_3F2R, A52_3F):
290 coeff[0] = coeff[1] = coeff[2] = level;
291 coeff[3] = coeff[4] = level * slev;
292 return 29;
294 case CONVERT (A52_2F2R, A52_2F1R):
295 coeff[0] = coeff[1] = level;
296 coeff[2] = coeff[3] = level * LEVEL_3DB;
297 return 12;
299 case CONVERT (A52_3F2R, A52_3F1R):
300 coeff[0] = coeff[1] = coeff[2] = level;
301 coeff[3] = coeff[4] = level * LEVEL_3DB;
302 return 24;
304 case CONVERT (A52_2F1R, A52_2F2R):
305 coeff[0] = coeff[1] = level;
306 coeff[2] = level * LEVEL_3DB;
307 return 0;
309 case CONVERT (A52_3F1R, A52_2F2R):
310 coeff[0] = coeff[2] = level;
311 coeff[1] = level * clev;
312 coeff[3] = level * LEVEL_3DB;
313 return 7;
315 case CONVERT (A52_3F1R, A52_3F2R):
316 coeff[0] = coeff[1] = coeff[2] = level;
317 coeff[3] = level * LEVEL_3DB;
318 return 0;
320 case CONVERT (A52_CHANNEL, A52_CHANNEL1):
321 coeff[0] = level;
322 coeff[1] = 0;
323 return 0;
325 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
326 coeff[0] = 0;
327 coeff[1] = level;
328 return 0;
331 return -1; /* NOTREACHED */
334 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
336 int i;
338 for (i = 0; i < 256; i++)
339 dest[i] += src[i] + bias;
342 static void mix3to1 (sample_t * samples, sample_t bias)
344 int i;
346 for (i = 0; i < 256; i++)
347 samples[i] += samples[i + 256] + samples[i + 512] + bias;
350 static void mix4to1 (sample_t * samples, sample_t bias)
352 int i;
354 for (i = 0; i < 256; i++)
355 samples[i] += (samples[i + 256] + samples[i + 512] +
356 samples[i + 768] + bias);
359 static void mix5to1 (sample_t * samples, sample_t bias)
361 int i;
363 for (i = 0; i < 256; i++)
364 samples[i] += (samples[i + 256] + samples[i + 512] +
365 samples[i + 768] + samples[i + 1024] + bias);
368 static void mix3to2 (sample_t * samples, sample_t bias)
370 int i;
371 sample_t common;
373 for (i = 0; i < 256; i++) {
374 common = samples[i + 256] + bias;
375 samples[i] += common;
376 samples[i + 256] = samples[i + 512] + common;
380 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
382 int i;
383 sample_t common;
385 for (i = 0; i < 256; i++) {
386 common = right[i + 256] + bias;
387 left[i] += common;
388 right[i] += common;
392 static void mix21toS (sample_t * samples, sample_t bias)
394 int i;
395 sample_t surround;
397 for (i = 0; i < 256; i++) {
398 surround = samples[i + 512];
399 samples[i] += bias - surround;
400 samples[i + 256] += bias + surround;
404 static void mix31to2 (sample_t * samples, sample_t bias)
406 int i;
407 sample_t common;
409 for (i = 0; i < 256; i++) {
410 common = samples[i + 256] + samples[i + 768] + bias;
411 samples[i] += common;
412 samples[i + 256] = samples[i + 512] + common;
416 static void mix31toS (sample_t * samples, sample_t bias)
418 int i;
419 sample_t common, surround;
421 for (i = 0; i < 256; i++) {
422 common = samples[i + 256] + bias;
423 surround = samples[i + 768];
424 samples[i] += common - surround;
425 samples[i + 256] = samples[i + 512] + common + surround;
429 static void mix22toS (sample_t * samples, sample_t bias)
431 int i;
432 sample_t surround;
434 for (i = 0; i < 256; i++) {
435 surround = samples[i + 512] + samples[i + 768];
436 samples[i] += bias - surround;
437 samples[i + 256] += bias + surround;
441 static void mix32to2 (sample_t * samples, sample_t bias)
443 int i;
444 sample_t common;
446 for (i = 0; i < 256; i++) {
447 common = samples[i + 256] + bias;
448 samples[i] += common + samples[i + 768];
449 samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
453 static void mix32toS (sample_t * samples, sample_t bias)
455 int i;
456 sample_t common, surround;
458 for (i = 0; i < 256; i++) {
459 common = samples[i + 256] + bias;
460 surround = samples[i + 768] + samples[i + 1024];
461 samples[i] += common - surround;
462 samples[i + 256] = samples[i + 512] + common + surround;
466 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
468 int i;
470 for (i = 0; i < 256; i++)
471 dest[i] = src[i] + src[i + 256] + bias;
474 static void zero (sample_t * samples)
476 int i;
478 for (i = 0; i < 256; i++)
479 samples[i] = 0;
482 void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
483 sample_t clev, sample_t slev)
485 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
487 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
488 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
489 break;
491 case CONVERT (A52_CHANNEL, A52_MONO):
492 case CONVERT (A52_STEREO, A52_MONO):
493 mix_2to1:
494 mix2to1 (samples, samples + 256, bias);
495 break;
497 case CONVERT (A52_2F1R, A52_MONO):
498 if (slev == 0)
499 goto mix_2to1;
500 case CONVERT (A52_3F, A52_MONO):
501 mix_3to1:
502 mix3to1 (samples, bias);
503 break;
505 case CONVERT (A52_3F1R, A52_MONO):
506 if (slev == 0)
507 goto mix_3to1;
508 case CONVERT (A52_2F2R, A52_MONO):
509 if (slev == 0)
510 goto mix_2to1;
511 mix4to1 (samples, bias);
512 break;
514 case CONVERT (A52_3F2R, A52_MONO):
515 if (slev == 0)
516 goto mix_3to1;
517 mix5to1 (samples, bias);
518 break;
520 case CONVERT (A52_MONO, A52_DOLBY):
521 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
522 break;
524 case CONVERT (A52_3F, A52_STEREO):
525 case CONVERT (A52_3F, A52_DOLBY):
526 mix_3to2:
527 mix3to2 (samples, bias);
528 break;
530 case CONVERT (A52_2F1R, A52_STEREO):
531 if (slev == 0)
532 break;
533 mix21to2 (samples, samples + 256, bias);
534 break;
536 case CONVERT (A52_2F1R, A52_DOLBY):
537 mix21toS (samples, bias);
538 break;
540 case CONVERT (A52_3F1R, A52_STEREO):
541 if (slev == 0)
542 goto mix_3to2;
543 mix31to2 (samples, bias);
544 break;
546 case CONVERT (A52_3F1R, A52_DOLBY):
547 mix31toS (samples, bias);
548 break;
550 case CONVERT (A52_2F2R, A52_STEREO):
551 if (slev == 0)
552 break;
553 mix2to1 (samples, samples + 512, bias);
554 mix2to1 (samples + 256, samples + 768, bias);
555 break;
557 case CONVERT (A52_2F2R, A52_DOLBY):
558 mix22toS (samples, bias);
559 break;
561 case CONVERT (A52_3F2R, A52_STEREO):
562 if (slev == 0)
563 goto mix_3to2;
564 mix32to2 (samples, bias);
565 break;
567 case CONVERT (A52_3F2R, A52_DOLBY):
568 mix32toS (samples, bias);
569 break;
571 case CONVERT (A52_3F1R, A52_3F):
572 if (slev == 0)
573 break;
574 mix21to2 (samples, samples + 512, bias);
575 break;
577 case CONVERT (A52_3F2R, A52_3F):
578 if (slev == 0)
579 break;
580 mix2to1 (samples, samples + 768, bias);
581 mix2to1 (samples + 512, samples + 1024, bias);
582 break;
584 case CONVERT (A52_3F1R, A52_2F1R):
585 mix3to2 (samples, bias);
586 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
587 break;
589 case CONVERT (A52_2F2R, A52_2F1R):
590 mix2to1 (samples + 512, samples + 768, bias);
591 break;
593 case CONVERT (A52_3F2R, A52_2F1R):
594 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
595 move2to1 (samples + 768, samples + 512, bias);
596 break;
598 case CONVERT (A52_3F2R, A52_3F1R):
599 mix2to1 (samples + 768, samples + 1024, bias);
600 break;
602 case CONVERT (A52_2F1R, A52_2F2R):
603 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
604 break;
606 case CONVERT (A52_3F1R, A52_2F2R):
607 mix3to2 (samples, bias);
608 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
609 break;
611 case CONVERT (A52_3F2R, A52_2F2R):
612 mix3to2 (samples, bias);
613 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
614 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
615 break;
617 case CONVERT (A52_3F1R, A52_3F2R):
618 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
619 break;
623 void upmix_C (sample_t * samples, int acmod, int output)
625 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
627 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
628 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
629 break;
631 case CONVERT (A52_3F2R, A52_MONO):
632 zero (samples + 1024);
633 case CONVERT (A52_3F1R, A52_MONO):
634 case CONVERT (A52_2F2R, A52_MONO):
635 zero (samples + 768);
636 case CONVERT (A52_3F, A52_MONO):
637 case CONVERT (A52_2F1R, A52_MONO):
638 zero (samples + 512);
639 case CONVERT (A52_CHANNEL, A52_MONO):
640 case CONVERT (A52_STEREO, A52_MONO):
641 zero (samples + 256);
642 break;
644 case CONVERT (A52_3F2R, A52_STEREO):
645 case CONVERT (A52_3F2R, A52_DOLBY):
646 zero (samples + 1024);
647 case CONVERT (A52_3F1R, A52_STEREO):
648 case CONVERT (A52_3F1R, A52_DOLBY):
649 zero (samples + 768);
650 case CONVERT (A52_3F, A52_STEREO):
651 case CONVERT (A52_3F, A52_DOLBY):
652 mix_3to2:
653 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
654 zero (samples + 256);
655 break;
657 case CONVERT (A52_2F2R, A52_STEREO):
658 case CONVERT (A52_2F2R, A52_DOLBY):
659 zero (samples + 768);
660 case CONVERT (A52_2F1R, A52_STEREO):
661 case CONVERT (A52_2F1R, A52_DOLBY):
662 zero (samples + 512);
663 break;
665 case CONVERT (A52_3F2R, A52_3F):
666 zero (samples + 1024);
667 case CONVERT (A52_3F1R, A52_3F):
668 case CONVERT (A52_2F2R, A52_2F1R):
669 zero (samples + 768);
670 break;
672 case CONVERT (A52_3F2R, A52_3F1R):
673 zero (samples + 1024);
674 break;
676 case CONVERT (A52_3F2R, A52_2F1R):
677 zero (samples + 1024);
678 case CONVERT (A52_3F1R, A52_2F1R):
679 mix_31to21:
680 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
681 goto mix_3to2;
683 case CONVERT (A52_3F2R, A52_2F2R):
684 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
685 goto mix_31to21;
689 #if defined(ARCH_X86) || defined(ARCH_X86_64)
690 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
692 asm volatile(
693 "movlps %2, %%xmm7 \n\t"
694 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
695 "mov $-1024, %%"REG_S" \n\t"
696 ASMALIGN(4)
697 "1: \n\t"
698 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
699 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
700 "addps (%1, %%"REG_S"), %%xmm0 \n\t"
701 "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
702 "addps %%xmm7, %%xmm0 \n\t"
703 "addps %%xmm7, %%xmm1 \n\t"
704 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
705 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
706 "add $32, %%"REG_S" \n\t"
707 " jnz 1b \n\t"
708 :: "r" (src+256), "r" (dest+256), "m" (bias)
709 : "%"REG_S
713 static void mix3to1_SSE (sample_t * samples, sample_t bias)
715 asm volatile(
716 "movlps %1, %%xmm7 \n\t"
717 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
718 "mov $-1024, %%"REG_S" \n\t"
719 ASMALIGN(4)
720 "1: \n\t"
721 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
722 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
723 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
724 "addps %%xmm7, %%xmm1 \n\t"
725 "addps %%xmm1, %%xmm0 \n\t"
726 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
727 "add $16, %%"REG_S" \n\t"
728 " jnz 1b \n\t"
729 :: "r" (samples+256), "m" (bias)
730 : "%"REG_S
734 static void mix4to1_SSE (sample_t * samples, sample_t bias)
736 asm volatile(
737 "movlps %1, %%xmm7 \n\t"
738 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
739 "mov $-1024, %%"REG_S" \n\t"
740 ASMALIGN(4)
741 "1: \n\t"
742 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
743 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
744 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
745 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
746 "addps %%xmm7, %%xmm0 \n\t"
747 "addps %%xmm1, %%xmm0 \n\t"
748 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
749 "add $16, %%"REG_S" \n\t"
750 " jnz 1b \n\t"
751 :: "r" (samples+256), "m" (bias)
752 : "%"REG_S
756 static void mix5to1_SSE (sample_t * samples, sample_t bias)
758 asm volatile(
759 "movlps %1, %%xmm7 \n\t"
760 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
761 "mov $-1024, %%"REG_S" \n\t"
762 ASMALIGN(4)
763 "1: \n\t"
764 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
765 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
766 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
767 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
768 "addps %%xmm7, %%xmm0 \n\t"
769 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
770 "addps %%xmm1, %%xmm0 \n\t"
771 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
772 "add $16, %%"REG_S" \n\t"
773 " jnz 1b \n\t"
774 :: "r" (samples+256), "m" (bias)
775 : "%"REG_S
779 static void mix3to2_SSE (sample_t * samples, sample_t bias)
781 asm volatile(
782 "movlps %1, %%xmm7 \n\t"
783 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
784 "mov $-1024, %%"REG_S" \n\t"
785 ASMALIGN(4)
786 "1: \n\t"
787 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
788 "addps %%xmm7, %%xmm0 \n\t" //common
789 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
790 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
791 "addps %%xmm0, %%xmm1 \n\t"
792 "addps %%xmm0, %%xmm2 \n\t"
793 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
794 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
795 "add $16, %%"REG_S" \n\t"
796 " jnz 1b \n\t"
797 :: "r" (samples+256), "m" (bias)
798 : "%"REG_S
802 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
804 asm volatile(
805 "movlps %2, %%xmm7 \n\t"
806 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
807 "mov $-1024, %%"REG_S" \n\t"
808 ASMALIGN(4)
809 "1: \n\t"
810 "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
811 "addps %%xmm7, %%xmm0 \n\t" //common
812 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
813 "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
814 "addps %%xmm0, %%xmm1 \n\t"
815 "addps %%xmm0, %%xmm2 \n\t"
816 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
817 "movaps %%xmm2, (%1, %%"REG_S") \n\t"
818 "add $16, %%"REG_S" \n\t"
819 " jnz 1b \n\t"
820 :: "r" (left+256), "r" (right+256), "m" (bias)
821 : "%"REG_S
825 static void mix21toS_SSE (sample_t * samples, sample_t bias)
827 asm volatile(
828 "movlps %1, %%xmm7 \n\t"
829 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
830 "mov $-1024, %%"REG_S" \n\t"
831 ASMALIGN(4)
832 "1: \n\t"
833 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
834 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
835 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
836 "addps %%xmm7, %%xmm1 \n\t"
837 "addps %%xmm7, %%xmm2 \n\t"
838 "subps %%xmm0, %%xmm1 \n\t"
839 "addps %%xmm0, %%xmm2 \n\t"
840 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
841 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
842 "add $16, %%"REG_S" \n\t"
843 " jnz 1b \n\t"
844 :: "r" (samples+256), "m" (bias)
845 : "%"REG_S
849 static void mix31to2_SSE (sample_t * samples, sample_t bias)
851 asm volatile(
852 "movlps %1, %%xmm7 \n\t"
853 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
854 "mov $-1024, %%"REG_S" \n\t"
855 ASMALIGN(4)
856 "1: \n\t"
857 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
858 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
859 "addps %%xmm7, %%xmm0 \n\t" // common
860 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
861 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
862 "addps %%xmm0, %%xmm1 \n\t"
863 "addps %%xmm0, %%xmm2 \n\t"
864 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
865 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
866 "add $16, %%"REG_S" \n\t"
867 " jnz 1b \n\t"
868 :: "r" (samples+256), "m" (bias)
869 : "%"REG_S
873 static void mix31toS_SSE (sample_t * samples, sample_t bias)
875 asm volatile(
876 "movlps %1, %%xmm7 \n\t"
877 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
878 "mov $-1024, %%"REG_S" \n\t"
879 ASMALIGN(4)
880 "1: \n\t"
881 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
882 "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
883 "addps %%xmm7, %%xmm0 \n\t" // common
884 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
885 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
886 "addps %%xmm0, %%xmm1 \n\t"
887 "addps %%xmm0, %%xmm2 \n\t"
888 "subps %%xmm3, %%xmm1 \n\t"
889 "addps %%xmm3, %%xmm2 \n\t"
890 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
891 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
892 "add $16, %%"REG_S" \n\t"
893 " jnz 1b \n\t"
894 :: "r" (samples+256), "m" (bias)
895 : "%"REG_S
899 static void mix22toS_SSE (sample_t * samples, sample_t bias)
901 asm volatile(
902 "movlps %1, %%xmm7 \n\t"
903 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
904 "mov $-1024, %%"REG_S" \n\t"
905 ASMALIGN(4)
906 "1: \n\t"
907 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
908 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
909 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
910 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
911 "addps %%xmm7, %%xmm1 \n\t"
912 "addps %%xmm7, %%xmm2 \n\t"
913 "subps %%xmm0, %%xmm1 \n\t"
914 "addps %%xmm0, %%xmm2 \n\t"
915 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
916 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
917 "add $16, %%"REG_S" \n\t"
918 " jnz 1b \n\t"
919 :: "r" (samples+256), "m" (bias)
920 : "%"REG_S
924 static void mix32to2_SSE (sample_t * samples, sample_t bias)
926 asm volatile(
927 "movlps %1, %%xmm7 \n\t"
928 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
929 "mov $-1024, %%"REG_S" \n\t"
930 ASMALIGN(4)
931 "1: \n\t"
932 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
933 "addps %%xmm7, %%xmm0 \n\t" // common
934 "movaps %%xmm0, %%xmm1 \n\t" // common
935 "addps (%0, %%"REG_S"), %%xmm0 \n\t"
936 "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
937 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
938 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
939 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
940 "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
941 "add $16, %%"REG_S" \n\t"
942 " jnz 1b \n\t"
943 :: "r" (samples+256), "m" (bias)
944 : "%"REG_S
948 static void mix32toS_SSE (sample_t * samples, sample_t bias)
950 asm volatile(
951 "movlps %1, %%xmm7 \n\t"
952 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
953 "mov $-1024, %%"REG_S" \n\t"
954 ASMALIGN(4)
955 "1: \n\t"
956 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
957 "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
958 "addps %%xmm7, %%xmm0 \n\t" // common
959 "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
960 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
961 "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
962 "subps %%xmm2, %%xmm1 \n\t"
963 "addps %%xmm2, %%xmm3 \n\t"
964 "addps %%xmm0, %%xmm1 \n\t"
965 "addps %%xmm0, %%xmm3 \n\t"
966 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
967 "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
968 "add $16, %%"REG_S" \n\t"
969 " jnz 1b \n\t"
970 :: "r" (samples+256), "m" (bias)
971 : "%"REG_S
975 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
977 asm volatile(
978 "movlps %2, %%xmm7 \n\t"
979 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
980 "mov $-1024, %%"REG_S" \n\t"
981 ASMALIGN(4)
982 "1: \n\t"
983 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
984 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
985 "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
986 "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
987 "addps %%xmm7, %%xmm0 \n\t"
988 "addps %%xmm7, %%xmm1 \n\t"
989 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
990 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
991 "add $32, %%"REG_S" \n\t"
992 " jnz 1b \n\t"
993 :: "r" (src+256), "r" (dest+256), "m" (bias)
994 : "%"REG_S
998 static void zero_MMX(sample_t * samples)
1000 asm volatile(
1001 "mov $-1024, %%"REG_S" \n\t"
1002 "pxor %%mm0, %%mm0 \n\t"
1003 ASMALIGN(4)
1004 "1: \n\t"
1005 "movq %%mm0, (%0, %%"REG_S") \n\t"
1006 "movq %%mm0, 8(%0, %%"REG_S") \n\t"
1007 "movq %%mm0, 16(%0, %%"REG_S") \n\t"
1008 "movq %%mm0, 24(%0, %%"REG_S") \n\t"
1009 "add $32, %%"REG_S" \n\t"
1010 " jnz 1b \n\t"
1011 "emms"
1012 :: "r" (samples+256)
1013 : "%"REG_S
1018 I hope dest and src will be at least 8 byte aligned and size
1019 will devide on 8 without remain
1020 Note: untested and unused.
1022 static void copy_MMX(void *dest,const void *src,unsigned size)
1024 unsigned i;
1025 size /= 64;
1026 for(i=0;i<size;i++)
1028 __asm __volatile(
1029 "movq %0, %%mm0\n\t"
1030 "movq 8%0, %%mm1\n\t"
1031 "movq 16%0, %%mm2\n\t"
1032 "movq 24%0, %%mm3\n\t"
1033 "movq 32%0, %%mm4\n\t"
1034 "movq 40%0, %%mm5\n\t"
1035 "movq 48%0, %%mm6\n\t"
1036 "movq 56%0, %%mm7\n\t"
1037 "movq %%mm0, %1\n\t"
1038 "movq %%mm1, 8%1\n\t"
1039 "movq %%mm2, 16%1\n\t"
1040 "movq %%mm3, 24%1\n\t"
1041 "movq %%mm4, 32%1\n\t"
1042 "movq %%mm5, 40%1\n\t"
1043 "movq %%mm6, 48%1\n\t"
1044 "movq %%mm7, 56%1\n\t"
1046 :"m"(src),"m"(dest));
1050 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
1051 sample_t clev, sample_t slev)
1053 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1055 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1056 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1057 break;
1059 case CONVERT (A52_CHANNEL, A52_MONO):
1060 case CONVERT (A52_STEREO, A52_MONO):
1061 mix_2to1_SSE:
1062 mix2to1_SSE (samples, samples + 256, bias);
1063 break;
1065 case CONVERT (A52_2F1R, A52_MONO):
1066 if (slev == 0)
1067 goto mix_2to1_SSE;
1068 case CONVERT (A52_3F, A52_MONO):
1069 mix_3to1_SSE:
1070 mix3to1_SSE (samples, bias);
1071 break;
1073 case CONVERT (A52_3F1R, A52_MONO):
1074 if (slev == 0)
1075 goto mix_3to1_SSE;
1076 case CONVERT (A52_2F2R, A52_MONO):
1077 if (slev == 0)
1078 goto mix_2to1_SSE;
1079 mix4to1_SSE (samples, bias);
1080 break;
1082 case CONVERT (A52_3F2R, A52_MONO):
1083 if (slev == 0)
1084 goto mix_3to1_SSE;
1085 mix5to1_SSE (samples, bias);
1086 break;
1088 case CONVERT (A52_MONO, A52_DOLBY):
1089 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1090 break;
1092 case CONVERT (A52_3F, A52_STEREO):
1093 case CONVERT (A52_3F, A52_DOLBY):
1094 mix_3to2_SSE:
1095 mix3to2_SSE (samples, bias);
1096 break;
1098 case CONVERT (A52_2F1R, A52_STEREO):
1099 if (slev == 0)
1100 break;
1101 mix21to2_SSE (samples, samples + 256, bias);
1102 break;
1104 case CONVERT (A52_2F1R, A52_DOLBY):
1105 mix21toS_SSE (samples, bias);
1106 break;
1108 case CONVERT (A52_3F1R, A52_STEREO):
1109 if (slev == 0)
1110 goto mix_3to2_SSE;
1111 mix31to2_SSE (samples, bias);
1112 break;
1114 case CONVERT (A52_3F1R, A52_DOLBY):
1115 mix31toS_SSE (samples, bias);
1116 break;
1118 case CONVERT (A52_2F2R, A52_STEREO):
1119 if (slev == 0)
1120 break;
1121 mix2to1_SSE (samples, samples + 512, bias);
1122 mix2to1_SSE (samples + 256, samples + 768, bias);
1123 break;
1125 case CONVERT (A52_2F2R, A52_DOLBY):
1126 mix22toS_SSE (samples, bias);
1127 break;
1129 case CONVERT (A52_3F2R, A52_STEREO):
1130 if (slev == 0)
1131 goto mix_3to2_SSE;
1132 mix32to2_SSE (samples, bias);
1133 break;
1135 case CONVERT (A52_3F2R, A52_DOLBY):
1136 mix32toS_SSE (samples, bias);
1137 break;
1139 case CONVERT (A52_3F1R, A52_3F):
1140 if (slev == 0)
1141 break;
1142 mix21to2_SSE (samples, samples + 512, bias);
1143 break;
1145 case CONVERT (A52_3F2R, A52_3F):
1146 if (slev == 0)
1147 break;
1148 mix2to1_SSE (samples, samples + 768, bias);
1149 mix2to1_SSE (samples + 512, samples + 1024, bias);
1150 break;
1152 case CONVERT (A52_3F1R, A52_2F1R):
1153 mix3to2_SSE (samples, bias);
1154 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1155 break;
1157 case CONVERT (A52_2F2R, A52_2F1R):
1158 mix2to1_SSE (samples + 512, samples + 768, bias);
1159 break;
1161 case CONVERT (A52_3F2R, A52_2F1R):
1162 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1163 move2to1_SSE (samples + 768, samples + 512, bias);
1164 break;
1166 case CONVERT (A52_3F2R, A52_3F1R):
1167 mix2to1_SSE (samples + 768, samples + 1024, bias);
1168 break;
1170 case CONVERT (A52_2F1R, A52_2F2R):
1171 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1172 break;
1174 case CONVERT (A52_3F1R, A52_2F2R):
1175 mix3to2_SSE (samples, bias);
1176 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1177 break;
1179 case CONVERT (A52_3F2R, A52_2F2R):
1180 mix3to2_SSE (samples, bias);
1181 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1182 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1183 break;
1185 case CONVERT (A52_3F1R, A52_3F2R):
1186 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1187 break;
1191 static void upmix_MMX (sample_t * samples, int acmod, int output)
1193 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1195 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1196 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1197 break;
1199 case CONVERT (A52_3F2R, A52_MONO):
1200 zero_MMX (samples + 1024);
1201 case CONVERT (A52_3F1R, A52_MONO):
1202 case CONVERT (A52_2F2R, A52_MONO):
1203 zero_MMX (samples + 768);
1204 case CONVERT (A52_3F, A52_MONO):
1205 case CONVERT (A52_2F1R, A52_MONO):
1206 zero_MMX (samples + 512);
1207 case CONVERT (A52_CHANNEL, A52_MONO):
1208 case CONVERT (A52_STEREO, A52_MONO):
1209 zero_MMX (samples + 256);
1210 break;
1212 case CONVERT (A52_3F2R, A52_STEREO):
1213 case CONVERT (A52_3F2R, A52_DOLBY):
1214 zero_MMX (samples + 1024);
1215 case CONVERT (A52_3F1R, A52_STEREO):
1216 case CONVERT (A52_3F1R, A52_DOLBY):
1217 zero_MMX (samples + 768);
1218 case CONVERT (A52_3F, A52_STEREO):
1219 case CONVERT (A52_3F, A52_DOLBY):
1220 mix_3to2_MMX:
1221 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
1222 zero_MMX (samples + 256);
1223 break;
1225 case CONVERT (A52_2F2R, A52_STEREO):
1226 case CONVERT (A52_2F2R, A52_DOLBY):
1227 zero_MMX (samples + 768);
1228 case CONVERT (A52_2F1R, A52_STEREO):
1229 case CONVERT (A52_2F1R, A52_DOLBY):
1230 zero_MMX (samples + 512);
1231 break;
1233 case CONVERT (A52_3F2R, A52_3F):
1234 zero_MMX (samples + 1024);
1235 case CONVERT (A52_3F1R, A52_3F):
1236 case CONVERT (A52_2F2R, A52_2F1R):
1237 zero_MMX (samples + 768);
1238 break;
1240 case CONVERT (A52_3F2R, A52_3F1R):
1241 zero_MMX (samples + 1024);
1242 break;
1244 case CONVERT (A52_3F2R, A52_2F1R):
1245 zero_MMX (samples + 1024);
1246 case CONVERT (A52_3F1R, A52_2F1R):
1247 mix_31to21_MMX:
1248 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1249 goto mix_3to2_MMX;
1251 case CONVERT (A52_3F2R, A52_2F2R):
1252 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1253 goto mix_31to21_MMX;
1257 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
1259 asm volatile(
1260 "movd %2, %%mm7 \n\t"
1261 "punpckldq %2, %%mm7 \n\t"
1262 "mov $-1024, %%"REG_S" \n\t"
1263 ASMALIGN(4)
1264 "1: \n\t"
1265 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1266 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1267 "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
1268 "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
1269 "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
1270 "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
1271 "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
1272 "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
1273 "pfadd %%mm7, %%mm0 \n\t"
1274 "pfadd %%mm7, %%mm1 \n\t"
1275 "pfadd %%mm7, %%mm2 \n\t"
1276 "pfadd %%mm7, %%mm3 \n\t"
1277 "movq %%mm0, (%1, %%"REG_S") \n\t"
1278 "movq %%mm1, 8(%1, %%"REG_S") \n\t"
1279 "movq %%mm2, 16(%1, %%"REG_S") \n\t"
1280 "movq %%mm3, 24(%1, %%"REG_S") \n\t"
1281 "add $32, %%"REG_S" \n\t"
1282 " jnz 1b \n\t"
1283 :: "r" (src+256), "r" (dest+256), "m" (bias)
1284 : "%"REG_S
1288 static void mix3to1_3dnow (sample_t * samples, sample_t bias)
1290 asm volatile(
1291 "movd %1, %%mm7 \n\t"
1292 "punpckldq %1, %%mm7 \n\t"
1293 "mov $-1024, %%"REG_S" \n\t"
1294 ASMALIGN(4)
1295 "1: \n\t"
1296 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1297 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1298 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1299 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1300 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1301 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1302 "pfadd %%mm7, %%mm0 \n\t"
1303 "pfadd %%mm7, %%mm1 \n\t"
1304 "pfadd %%mm2, %%mm0 \n\t"
1305 "pfadd %%mm3, %%mm1 \n\t"
1306 "movq %%mm0, (%0, %%"REG_S") \n\t"
1307 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1308 "add $16, %%"REG_S" \n\t"
1309 " jnz 1b \n\t"
1310 :: "r" (samples+256), "m" (bias)
1311 : "%"REG_S
1315 static void mix4to1_3dnow (sample_t * samples, sample_t bias)
1317 asm volatile(
1318 "movd %1, %%mm7 \n\t"
1319 "punpckldq %1, %%mm7 \n\t"
1320 "mov $-1024, %%"REG_S" \n\t"
1321 ASMALIGN(4)
1322 "1: \n\t"
1323 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1324 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1325 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1326 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1327 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1328 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1329 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
1330 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
1331 "pfadd %%mm7, %%mm0 \n\t"
1332 "pfadd %%mm7, %%mm1 \n\t"
1333 "pfadd %%mm2, %%mm0 \n\t"
1334 "pfadd %%mm3, %%mm1 \n\t"
1335 "movq %%mm0, (%0, %%"REG_S") \n\t"
1336 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1337 "add $16, %%"REG_S" \n\t"
1338 " jnz 1b \n\t"
1339 :: "r" (samples+256), "m" (bias)
1340 : "%"REG_S
1344 static void mix5to1_3dnow (sample_t * samples, sample_t bias)
1346 asm volatile(
1347 "movd %1, %%mm7 \n\t"
1348 "punpckldq %1, %%mm7 \n\t"
1349 "mov $-1024, %%"REG_S" \n\t"
1350 ASMALIGN(4)
1351 "1: \n\t"
1352 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1353 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1354 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1355 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1356 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1357 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1358 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
1359 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
1360 "pfadd %%mm7, %%mm0 \n\t"
1361 "pfadd %%mm7, %%mm1 \n\t"
1362 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
1363 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
1364 "pfadd %%mm2, %%mm0 \n\t"
1365 "pfadd %%mm3, %%mm1 \n\t"
1366 "movq %%mm0, (%0, %%"REG_S") \n\t"
1367 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1368 "add $16, %%"REG_S" \n\t"
1369 " jnz 1b \n\t"
1370 :: "r" (samples+256), "m" (bias)
1371 : "%"REG_S
1375 static void mix3to2_3dnow (sample_t * samples, sample_t bias)
1377 asm volatile(
1378 "movd %1, %%mm7 \n\t"
1379 "punpckldq %1, %%mm7 \n\t"
1380 "mov $-1024, %%"REG_S" \n\t"
1381 ASMALIGN(4)
1382 "1: \n\t"
1383 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1384 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1385 "pfadd %%mm7, %%mm0 \n\t" //common
1386 "pfadd %%mm7, %%mm1 \n\t" //common
1387 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1388 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1389 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1390 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1391 "pfadd %%mm0, %%mm2 \n\t"
1392 "pfadd %%mm1, %%mm3 \n\t"
1393 "pfadd %%mm0, %%mm4 \n\t"
1394 "pfadd %%mm1, %%mm5 \n\t"
1395 "movq %%mm2, (%0, %%"REG_S") \n\t"
1396 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1397 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1398 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1399 "add $16, %%"REG_S" \n\t"
1400 " jnz 1b \n\t"
1401 :: "r" (samples+256), "m" (bias)
1402 : "%"REG_S
1406 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
1408 asm volatile(
1409 "movd %2, %%mm7 \n\t"
1410 "punpckldq %2, %%mm7 \n\t"
1411 "mov $-1024, %%"REG_S" \n\t"
1412 ASMALIGN(4)
1413 "1: \n\t"
1414 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
1415 "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
1416 "pfadd %%mm7, %%mm0 \n\t" //common
1417 "pfadd %%mm7, %%mm1 \n\t" //common
1418 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1419 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1420 "movq (%1, %%"REG_S"), %%mm4 \n\t"
1421 "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
1422 "pfadd %%mm0, %%mm2 \n\t"
1423 "pfadd %%mm1, %%mm3 \n\t"
1424 "pfadd %%mm0, %%mm4 \n\t"
1425 "pfadd %%mm1, %%mm5 \n\t"
1426 "movq %%mm2, (%0, %%"REG_S") \n\t"
1427 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1428 "movq %%mm4, (%1, %%"REG_S") \n\t"
1429 "movq %%mm5, 8(%1, %%"REG_S") \n\t"
1430 "add $16, %%"REG_S" \n\t"
1431 " jnz 1b \n\t"
1432 :: "r" (left+256), "r" (right+256), "m" (bias)
1433 : "%"REG_S
1437 static void mix21toS_3dnow (sample_t * samples, sample_t bias)
1439 asm volatile(
1440 "movd %1, %%mm7 \n\t"
1441 "punpckldq %1, %%mm7 \n\t"
1442 "mov $-1024, %%"REG_S" \n\t"
1443 ASMALIGN(4)
1444 "1: \n\t"
1445 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
1446 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
1447 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1448 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1449 "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
1450 "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
1451 "pfadd %%mm7, %%mm2 \n\t"
1452 "pfadd %%mm7, %%mm3 \n\t"
1453 "pfadd %%mm7, %%mm4 \n\t"
1454 "pfadd %%mm7, %%mm5 \n\t"
1455 "pfsub %%mm0, %%mm2 \n\t"
1456 "pfsub %%mm1, %%mm3 \n\t"
1457 "pfadd %%mm0, %%mm4 \n\t"
1458 "pfadd %%mm1, %%mm5 \n\t"
1459 "movq %%mm2, (%0, %%"REG_S") \n\t"
1460 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1461 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1462 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1463 "add $16, %%"REG_S" \n\t"
1464 " jnz 1b \n\t"
1465 :: "r" (samples+256), "m" (bias)
1466 : "%"REG_S
1470 static void mix31to2_3dnow (sample_t * samples, sample_t bias)
1472 asm volatile(
1473 "movd %1, %%mm7 \n\t"
1474 "punpckldq %1, %%mm7 \n\t"
1475 "mov $-1024, %%"REG_S" \n\t"
1476 ASMALIGN(4)
1477 "1: \n\t"
1478 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1479 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1480 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
1481 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
1482 "pfadd %%mm7, %%mm0 \n\t" // common
1483 "pfadd %%mm7, %%mm1 \n\t" // common
1484 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1485 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1486 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1487 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1488 "pfadd %%mm0, %%mm2 \n\t"
1489 "pfadd %%mm1, %%mm3 \n\t"
1490 "pfadd %%mm0, %%mm4 \n\t"
1491 "pfadd %%mm1, %%mm5 \n\t"
1492 "movq %%mm2, (%0, %%"REG_S") \n\t"
1493 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1494 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1495 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1496 "add $16, %%"REG_S" \n\t"
1497 " jnz 1b \n\t"
1498 :: "r" (samples+256), "m" (bias)
1499 : "%"REG_S
1503 static void mix31toS_3dnow (sample_t * samples, sample_t bias)
1505 asm volatile(
1506 "movd %1, %%mm7 \n\t"
1507 "punpckldq %1, %%mm7 \n\t"
1508 "mov $-1024, %%"REG_S" \n\t"
1509 ASMALIGN(4)
1510 "1: \n\t"
1511 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1512 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1513 "pfadd %%mm7, %%mm0 \n\t" // common
1514 "pfadd %%mm7, %%mm1 \n\t" // common
1515 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1516 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1517 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1518 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1519 "pfadd %%mm0, %%mm2 \n\t"
1520 "pfadd %%mm1, %%mm3 \n\t"
1521 "pfadd %%mm0, %%mm4 \n\t"
1522 "pfadd %%mm1, %%mm5 \n\t"
1523 "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
1524 "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
1525 "pfsub %%mm0, %%mm2 \n\t"
1526 "pfsub %%mm1, %%mm3 \n\t"
1527 "pfadd %%mm0, %%mm4 \n\t"
1528 "pfadd %%mm1, %%mm5 \n\t"
1529 "movq %%mm2, (%0, %%"REG_S") \n\t"
1530 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1531 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1532 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1533 "add $16, %%"REG_S" \n\t"
1534 " jnz 1b \n\t"
1535 :: "r" (samples+256), "m" (bias)
1536 : "%"REG_S
1540 static void mix22toS_3dnow (sample_t * samples, sample_t bias)
1542 asm volatile(
1543 "movd %1, %%mm7 \n\t"
1544 "punpckldq %1, %%mm7 \n\t"
1545 "mov $-1024, %%"REG_S" \n\t"
1546 ASMALIGN(4)
1547 "1: \n\t"
1548 "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
1549 "movq 2056(%0, %%"REG_S"), %%mm1\n\t"
1550 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
1551 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
1552 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1553 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1554 "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
1555 "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
1556 "pfadd %%mm7, %%mm2 \n\t"
1557 "pfadd %%mm7, %%mm3 \n\t"
1558 "pfadd %%mm7, %%mm4 \n\t"
1559 "pfadd %%mm7, %%mm5 \n\t"
1560 "pfsub %%mm0, %%mm2 \n\t"
1561 "pfsub %%mm1, %%mm3 \n\t"
1562 "pfadd %%mm0, %%mm4 \n\t"
1563 "pfadd %%mm1, %%mm5 \n\t"
1564 "movq %%mm2, (%0, %%"REG_S") \n\t"
1565 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1566 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1567 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1568 "add $16, %%"REG_S" \n\t"
1569 " jnz 1b \n\t"
1570 :: "r" (samples+256), "m" (bias)
1571 : "%"REG_S
1575 static void mix32to2_3dnow (sample_t * samples, sample_t bias)
1577 asm volatile(
1578 "movd %1, %%mm7 \n\t"
1579 "punpckldq %1, %%mm7 \n\t"
1580 "mov $-1024, %%"REG_S" \n\t"
1581 ASMALIGN(4)
1582 "1: \n\t"
1583 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1584 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1585 "pfadd %%mm7, %%mm0 \n\t" // common
1586 "pfadd %%mm7, %%mm1 \n\t" // common
1587 "movq %%mm0, %%mm2 \n\t" // common
1588 "movq %%mm1, %%mm3 \n\t" // common
1589 "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
1590 "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t"
1591 "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
1592 "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t"
1593 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
1594 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
1595 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
1596 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
1597 "movq %%mm0, (%0, %%"REG_S") \n\t"
1598 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1599 "movq %%mm2, 1024(%0, %%"REG_S")\n\t"
1600 "movq %%mm3, 1032(%0, %%"REG_S")\n\t"
1601 "add $16, %%"REG_S" \n\t"
1602 " jnz 1b \n\t"
1603 :: "r" (samples+256), "m" (bias)
1604 : "%"REG_S
1608 /* todo: should be optimized better */
1609 static void mix32toS_3dnow (sample_t * samples, sample_t bias)
1611 asm volatile(
1612 "mov $-1024, %%"REG_S" \n\t"
1613 ASMALIGN(4)
1614 "1: \n\t"
1615 "movd %1, %%mm7 \n\t"
1616 "punpckldq %1, %%mm7 \n\t"
1617 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1618 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1619 "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
1620 "movq 3080(%0, %%"REG_S"), %%mm5\n\t"
1621 "pfadd %%mm7, %%mm0 \n\t" // common
1622 "pfadd %%mm7, %%mm1 \n\t" // common
1623 "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
1624 "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround
1625 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1626 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1627 "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
1628 "movq 2056(%0, %%"REG_S"), %%mm7\n\t"
1629 "pfsub %%mm4, %%mm2 \n\t"
1630 "pfsub %%mm5, %%mm3 \n\t"
1631 "pfadd %%mm4, %%mm6 \n\t"
1632 "pfadd %%mm5, %%mm7 \n\t"
1633 "pfadd %%mm0, %%mm2 \n\t"
1634 "pfadd %%mm1, %%mm3 \n\t"
1635 "pfadd %%mm0, %%mm6 \n\t"
1636 "pfadd %%mm1, %%mm7 \n\t"
1637 "movq %%mm2, (%0, %%"REG_S") \n\t"
1638 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1639 "movq %%mm6, 1024(%0, %%"REG_S")\n\t"
1640 "movq %%mm7, 1032(%0, %%"REG_S")\n\t"
1641 "add $16, %%"REG_S" \n\t"
1642 " jnz 1b \n\t"
1643 :: "r" (samples+256), "m" (bias)
1644 : "%"REG_S
1648 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
1650 asm volatile(
1651 "movd %2, %%mm7 \n\t"
1652 "punpckldq %2, %%mm7 \n\t"
1653 "mov $-1024, %%"REG_S" \n\t"
1654 ASMALIGN(4)
1655 "1: \n\t"
1656 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1657 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1658 "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
1659 "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
1660 "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t"
1661 "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t"
1662 "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t"
1663 "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t"
1664 "pfadd %%mm7, %%mm0 \n\t"
1665 "pfadd %%mm7, %%mm1 \n\t"
1666 "pfadd %%mm7, %%mm2 \n\t"
1667 "pfadd %%mm7, %%mm3 \n\t"
1668 "movq %%mm0, (%1, %%"REG_S") \n\t"
1669 "movq %%mm1, 8(%1, %%"REG_S") \n\t"
1670 "movq %%mm2, 16(%1, %%"REG_S") \n\t"
1671 "movq %%mm3, 24(%1, %%"REG_S") \n\t"
1672 "add $32, %%"REG_S" \n\t"
1673 " jnz 1b \n\t"
1674 :: "r" (src+256), "r" (dest+256), "m" (bias)
1675 : "%"REG_S
1679 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
1680 sample_t clev, sample_t slev)
1682 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1684 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1685 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1686 break;
1688 case CONVERT (A52_CHANNEL, A52_MONO):
1689 case CONVERT (A52_STEREO, A52_MONO):
1690 mix_2to1_3dnow:
1691 mix2to1_3dnow (samples, samples + 256, bias);
1692 break;
1694 case CONVERT (A52_2F1R, A52_MONO):
1695 if (slev == 0)
1696 goto mix_2to1_3dnow;
1697 case CONVERT (A52_3F, A52_MONO):
1698 mix_3to1_3dnow:
1699 mix3to1_3dnow (samples, bias);
1700 break;
1702 case CONVERT (A52_3F1R, A52_MONO):
1703 if (slev == 0)
1704 goto mix_3to1_3dnow;
1705 case CONVERT (A52_2F2R, A52_MONO):
1706 if (slev == 0)
1707 goto mix_2to1_3dnow;
1708 mix4to1_3dnow (samples, bias);
1709 break;
1711 case CONVERT (A52_3F2R, A52_MONO):
1712 if (slev == 0)
1713 goto mix_3to1_3dnow;
1714 mix5to1_3dnow (samples, bias);
1715 break;
1717 case CONVERT (A52_MONO, A52_DOLBY):
1718 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1719 break;
1721 case CONVERT (A52_3F, A52_STEREO):
1722 case CONVERT (A52_3F, A52_DOLBY):
1723 mix_3to2_3dnow:
1724 mix3to2_3dnow (samples, bias);
1725 break;
1727 case CONVERT (A52_2F1R, A52_STEREO):
1728 if (slev == 0)
1729 break;
1730 mix21to2_3dnow (samples, samples + 256, bias);
1731 break;
1733 case CONVERT (A52_2F1R, A52_DOLBY):
1734 mix21toS_3dnow (samples, bias);
1735 break;
1737 case CONVERT (A52_3F1R, A52_STEREO):
1738 if (slev == 0)
1739 goto mix_3to2_3dnow;
1740 mix31to2_3dnow (samples, bias);
1741 break;
1743 case CONVERT (A52_3F1R, A52_DOLBY):
1744 mix31toS_3dnow (samples, bias);
1745 break;
1747 case CONVERT (A52_2F2R, A52_STEREO):
1748 if (slev == 0)
1749 break;
1750 mix2to1_3dnow (samples, samples + 512, bias);
1751 mix2to1_3dnow (samples + 256, samples + 768, bias);
1752 break;
1754 case CONVERT (A52_2F2R, A52_DOLBY):
1755 mix22toS_3dnow (samples, bias);
1756 break;
1758 case CONVERT (A52_3F2R, A52_STEREO):
1759 if (slev == 0)
1760 goto mix_3to2_3dnow;
1761 mix32to2_3dnow (samples, bias);
1762 break;
1764 case CONVERT (A52_3F2R, A52_DOLBY):
1765 mix32toS_3dnow (samples, bias);
1766 break;
1768 case CONVERT (A52_3F1R, A52_3F):
1769 if (slev == 0)
1770 break;
1771 mix21to2_3dnow (samples, samples + 512, bias);
1772 break;
1774 case CONVERT (A52_3F2R, A52_3F):
1775 if (slev == 0)
1776 break;
1777 mix2to1_3dnow (samples, samples + 768, bias);
1778 mix2to1_3dnow (samples + 512, samples + 1024, bias);
1779 break;
1781 case CONVERT (A52_3F1R, A52_2F1R):
1782 mix3to2_3dnow (samples, bias);
1783 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1784 break;
1786 case CONVERT (A52_2F2R, A52_2F1R):
1787 mix2to1_3dnow (samples + 512, samples + 768, bias);
1788 break;
1790 case CONVERT (A52_3F2R, A52_2F1R):
1791 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1792 move2to1_3dnow (samples + 768, samples + 512, bias);
1793 break;
1795 case CONVERT (A52_3F2R, A52_3F1R):
1796 mix2to1_3dnow (samples + 768, samples + 1024, bias);
1797 break;
1799 case CONVERT (A52_2F1R, A52_2F2R):
1800 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1801 break;
1803 case CONVERT (A52_3F1R, A52_2F2R):
1804 mix3to2_3dnow (samples, bias);
1805 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1806 break;
1808 case CONVERT (A52_3F2R, A52_2F2R):
1809 mix3to2_3dnow (samples, bias);
1810 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1811 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1812 break;
1814 case CONVERT (A52_3F1R, A52_3F2R):
1815 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1816 break;
1818 __asm __volatile("femms":::"memory");
1821 #endif // ARCH_X86 || ARCH_X86_64