Fix:
[mplayer/glamo.git] / liba52 / downmix.c
blob67eee7a89eb4c652348250df878da729c7504c14
1 /*
2 * downmix.c
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
7 * See http://liba52.sourceforge.net/ for updates.
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff.
10 * detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/
11 * $Id$
13 * a52dec is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * a52dec is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
30 #include "config.h"
31 #include "asmalign.h"
33 #include <string.h>
34 #include <inttypes.h>
36 #include "a52.h"
37 #include "a52_internal.h"
38 #include "mm_accel.h"
40 #define CONVERT(acmod,output) (((output) << 3) + (acmod))
43 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias,
44 sample_t clev, sample_t slev)= NULL;
45 void (*upmix)(sample_t * samples, int acmod, int output)= NULL;
47 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
48 sample_t clev, sample_t slev);
49 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
50 sample_t clev, sample_t slev);
51 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
52 sample_t clev, sample_t slev);
53 static void upmix_MMX (sample_t * samples, int acmod, int output);
54 static void upmix_C (sample_t * samples, int acmod, int output);
56 void downmix_accel_init(uint32_t mm_accel)
58 upmix= upmix_C;
59 downmix= downmix_C;
60 #if defined(ARCH_X86) || defined(ARCH_X86_64)
61 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX;
62 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE;
63 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow;
64 #endif
67 int downmix_init (int input, int flags, sample_t * level,
68 sample_t clev, sample_t slev)
70 static uint8_t table[11][8] = {
71 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
72 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
73 {A52_MONO, A52_MONO, A52_MONO, A52_MONO,
74 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
75 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
76 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
77 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
78 A52_STEREO, A52_3F, A52_STEREO, A52_3F},
79 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
80 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
81 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
82 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
83 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
84 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
85 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
86 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
87 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
88 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
89 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
90 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
91 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
92 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
94 int output;
96 output = flags & A52_CHANNEL_MASK;
97 if (output > A52_DOLBY)
98 return -1;
100 output = table[output][input & 7];
102 if ((output == A52_STEREO) &&
103 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB))))
104 output = A52_DOLBY;
106 if (flags & A52_ADJUST_LEVEL)
107 switch (CONVERT (input & 7, output)) {
109 case CONVERT (A52_3F, A52_MONO):
110 *level *= LEVEL_3DB / (1 + clev);
111 break;
113 case CONVERT (A52_STEREO, A52_MONO):
114 case CONVERT (A52_2F2R, A52_2F1R):
115 case CONVERT (A52_3F2R, A52_3F1R):
116 level_3db:
117 *level *= LEVEL_3DB;
118 break;
120 case CONVERT (A52_3F2R, A52_2F1R):
121 if (clev < LEVEL_PLUS3DB - 1)
122 goto level_3db;
123 /* break thru */
124 case CONVERT (A52_3F, A52_STEREO):
125 case CONVERT (A52_3F1R, A52_2F1R):
126 case CONVERT (A52_3F1R, A52_2F2R):
127 case CONVERT (A52_3F2R, A52_2F2R):
128 *level /= 1 + clev;
129 break;
131 case CONVERT (A52_2F1R, A52_MONO):
132 *level *= LEVEL_PLUS3DB / (2 + slev);
133 break;
135 case CONVERT (A52_2F1R, A52_STEREO):
136 case CONVERT (A52_3F1R, A52_3F):
137 *level /= 1 + slev * LEVEL_3DB;
138 break;
140 case CONVERT (A52_3F1R, A52_MONO):
141 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev);
142 break;
144 case CONVERT (A52_3F1R, A52_STEREO):
145 *level /= 1 + clev + slev * LEVEL_3DB;
146 break;
148 case CONVERT (A52_2F2R, A52_MONO):
149 *level *= LEVEL_3DB / (1 + slev);
150 break;
152 case CONVERT (A52_2F2R, A52_STEREO):
153 case CONVERT (A52_3F2R, A52_3F):
154 *level /= 1 + slev;
155 break;
157 case CONVERT (A52_3F2R, A52_MONO):
158 *level *= LEVEL_3DB / (1 + clev + slev);
159 break;
161 case CONVERT (A52_3F2R, A52_STEREO):
162 *level /= 1 + clev + slev;
163 break;
165 case CONVERT (A52_MONO, A52_DOLBY):
166 *level *= LEVEL_PLUS3DB;
167 break;
169 case CONVERT (A52_3F, A52_DOLBY):
170 case CONVERT (A52_2F1R, A52_DOLBY):
171 *level *= 1 / (1 + LEVEL_3DB);
172 break;
174 case CONVERT (A52_3F1R, A52_DOLBY):
175 case CONVERT (A52_2F2R, A52_DOLBY):
176 *level *= 1 / (1 + 2 * LEVEL_3DB);
177 break;
179 case CONVERT (A52_3F2R, A52_DOLBY):
180 *level *= 1 / (1 + 3 * LEVEL_3DB);
181 break;
183 return output;
186 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
187 sample_t clev, sample_t slev)
189 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
191 case CONVERT (A52_CHANNEL, A52_CHANNEL):
192 case CONVERT (A52_MONO, A52_MONO):
193 case CONVERT (A52_STEREO, A52_STEREO):
194 case CONVERT (A52_3F, A52_3F):
195 case CONVERT (A52_2F1R, A52_2F1R):
196 case CONVERT (A52_3F1R, A52_3F1R):
197 case CONVERT (A52_2F2R, A52_2F2R):
198 case CONVERT (A52_3F2R, A52_3F2R):
199 case CONVERT (A52_STEREO, A52_DOLBY):
200 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
201 return 0;
203 case CONVERT (A52_CHANNEL, A52_MONO):
204 coeff[0] = coeff[1] = level * LEVEL_6DB;
205 return 3;
207 case CONVERT (A52_STEREO, A52_MONO):
208 coeff[0] = coeff[1] = level * LEVEL_3DB;
209 return 3;
211 case CONVERT (A52_3F, A52_MONO):
212 coeff[0] = coeff[2] = level * LEVEL_3DB;
213 coeff[1] = level * clev * LEVEL_PLUS3DB;
214 return 7;
216 case CONVERT (A52_2F1R, A52_MONO):
217 coeff[0] = coeff[1] = level * LEVEL_3DB;
218 coeff[2] = level * slev * LEVEL_3DB;
219 return 7;
221 case CONVERT (A52_2F2R, A52_MONO):
222 coeff[0] = coeff[1] = level * LEVEL_3DB;
223 coeff[2] = coeff[3] = level * slev * LEVEL_3DB;
224 return 15;
226 case CONVERT (A52_3F1R, A52_MONO):
227 coeff[0] = coeff[2] = level * LEVEL_3DB;
228 coeff[1] = level * clev * LEVEL_PLUS3DB;
229 coeff[3] = level * slev * LEVEL_3DB;
230 return 15;
232 case CONVERT (A52_3F2R, A52_MONO):
233 coeff[0] = coeff[2] = level * LEVEL_3DB;
234 coeff[1] = level * clev * LEVEL_PLUS3DB;
235 coeff[3] = coeff[4] = level * slev * LEVEL_3DB;
236 return 31;
238 case CONVERT (A52_MONO, A52_DOLBY):
239 coeff[0] = level * LEVEL_3DB;
240 return 0;
242 case CONVERT (A52_3F, A52_DOLBY):
243 clev = LEVEL_3DB;
244 case CONVERT (A52_3F, A52_STEREO):
245 case CONVERT (A52_3F1R, A52_2F1R):
246 case CONVERT (A52_3F2R, A52_2F2R):
247 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
248 coeff[1] = level * clev;
249 return 7;
251 case CONVERT (A52_2F1R, A52_DOLBY):
252 slev = 1;
253 case CONVERT (A52_2F1R, A52_STEREO):
254 coeff[0] = coeff[1] = level;
255 coeff[2] = level * slev * LEVEL_3DB;
256 return 7;
258 case CONVERT (A52_3F1R, A52_DOLBY):
259 clev = LEVEL_3DB;
260 slev = 1;
261 case CONVERT (A52_3F1R, A52_STEREO):
262 coeff[0] = coeff[2] = level;
263 coeff[1] = level * clev;
264 coeff[3] = level * slev * LEVEL_3DB;
265 return 15;
267 case CONVERT (A52_2F2R, A52_DOLBY):
268 slev = LEVEL_3DB;
269 case CONVERT (A52_2F2R, A52_STEREO):
270 coeff[0] = coeff[1] = level;
271 coeff[2] = coeff[3] = level * slev;
272 return 15;
274 case CONVERT (A52_3F2R, A52_DOLBY):
275 clev = LEVEL_3DB;
276 case CONVERT (A52_3F2R, A52_2F1R):
277 slev = LEVEL_3DB;
278 case CONVERT (A52_3F2R, A52_STEREO):
279 coeff[0] = coeff[2] = level;
280 coeff[1] = level * clev;
281 coeff[3] = coeff[4] = level * slev;
282 return 31;
284 case CONVERT (A52_3F1R, A52_3F):
285 coeff[0] = coeff[1] = coeff[2] = level;
286 coeff[3] = level * slev * LEVEL_3DB;
287 return 13;
289 case CONVERT (A52_3F2R, A52_3F):
290 coeff[0] = coeff[1] = coeff[2] = level;
291 coeff[3] = coeff[4] = level * slev;
292 return 29;
294 case CONVERT (A52_2F2R, A52_2F1R):
295 coeff[0] = coeff[1] = level;
296 coeff[2] = coeff[3] = level * LEVEL_3DB;
297 return 12;
299 case CONVERT (A52_3F2R, A52_3F1R):
300 coeff[0] = coeff[1] = coeff[2] = level;
301 coeff[3] = coeff[4] = level * LEVEL_3DB;
302 return 24;
304 case CONVERT (A52_2F1R, A52_2F2R):
305 coeff[0] = coeff[1] = level;
306 coeff[2] = level * LEVEL_3DB;
307 return 0;
309 case CONVERT (A52_3F1R, A52_2F2R):
310 coeff[0] = coeff[2] = level;
311 coeff[1] = level * clev;
312 coeff[3] = level * LEVEL_3DB;
313 return 7;
315 case CONVERT (A52_3F1R, A52_3F2R):
316 coeff[0] = coeff[1] = coeff[2] = level;
317 coeff[3] = level * LEVEL_3DB;
318 return 0;
320 case CONVERT (A52_CHANNEL, A52_CHANNEL1):
321 coeff[0] = level;
322 coeff[1] = 0;
323 return 0;
325 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
326 coeff[0] = 0;
327 coeff[1] = level;
328 return 0;
331 return -1; /* NOTREACHED */
334 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
336 int i;
338 for (i = 0; i < 256; i++)
339 dest[i] += src[i] + bias;
342 static void mix3to1 (sample_t * samples, sample_t bias)
344 int i;
346 for (i = 0; i < 256; i++)
347 samples[i] += samples[i + 256] + samples[i + 512] + bias;
350 static void mix4to1 (sample_t * samples, sample_t bias)
352 int i;
354 for (i = 0; i < 256; i++)
355 samples[i] += (samples[i + 256] + samples[i + 512] +
356 samples[i + 768] + bias);
359 static void mix5to1 (sample_t * samples, sample_t bias)
361 int i;
363 for (i = 0; i < 256; i++)
364 samples[i] += (samples[i + 256] + samples[i + 512] +
365 samples[i + 768] + samples[i + 1024] + bias);
368 static void mix3to2 (sample_t * samples, sample_t bias)
370 int i;
371 sample_t common;
373 for (i = 0; i < 256; i++) {
374 common = samples[i + 256] + bias;
375 samples[i] += common;
376 samples[i + 256] = samples[i + 512] + common;
380 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
382 int i;
383 sample_t common;
385 for (i = 0; i < 256; i++) {
386 common = right[i + 256] + bias;
387 left[i] += common;
388 right[i] += common;
392 static void mix21toS (sample_t * samples, sample_t bias)
394 int i;
395 sample_t surround;
397 for (i = 0; i < 256; i++) {
398 surround = samples[i + 512];
399 samples[i] += bias - surround;
400 samples[i + 256] += bias + surround;
404 static void mix31to2 (sample_t * samples, sample_t bias)
406 int i;
407 sample_t common;
409 for (i = 0; i < 256; i++) {
410 common = samples[i + 256] + samples[i + 768] + bias;
411 samples[i] += common;
412 samples[i + 256] = samples[i + 512] + common;
416 static void mix31toS (sample_t * samples, sample_t bias)
418 int i;
419 sample_t common, surround;
421 for (i = 0; i < 256; i++) {
422 common = samples[i + 256] + bias;
423 surround = samples[i + 768];
424 samples[i] += common - surround;
425 samples[i + 256] = samples[i + 512] + common + surround;
429 static void mix22toS (sample_t * samples, sample_t bias)
431 int i;
432 sample_t surround;
434 for (i = 0; i < 256; i++) {
435 surround = samples[i + 512] + samples[i + 768];
436 samples[i] += bias - surround;
437 samples[i + 256] += bias + surround;
441 static void mix32to2 (sample_t * samples, sample_t bias)
443 int i;
444 sample_t common;
446 for (i = 0; i < 256; i++) {
447 common = samples[i + 256] + bias;
448 samples[i] += common + samples[i + 768];
449 samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
453 static void mix32toS (sample_t * samples, sample_t bias)
455 int i;
456 sample_t common, surround;
458 for (i = 0; i < 256; i++) {
459 common = samples[i + 256] + bias;
460 surround = samples[i + 768] + samples[i + 1024];
461 samples[i] += common - surround;
462 samples[i + 256] = samples[i + 512] + common + surround;
466 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
468 int i;
470 for (i = 0; i < 256; i++)
471 dest[i] = src[i] + src[i + 256] + bias;
474 static void zero (sample_t * samples)
476 int i;
477 for (i = 0; i < 256; i++)
478 samples[i] = 0;
481 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
482 sample_t clev, sample_t slev)
484 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
486 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
487 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
488 break;
490 case CONVERT (A52_CHANNEL, A52_MONO):
491 case CONVERT (A52_STEREO, A52_MONO):
492 mix_2to1:
493 mix2to1 (samples, samples + 256, bias);
494 break;
496 case CONVERT (A52_2F1R, A52_MONO):
497 if (slev == 0)
498 goto mix_2to1;
499 case CONVERT (A52_3F, A52_MONO):
500 mix_3to1:
501 mix3to1 (samples, bias);
502 break;
504 case CONVERT (A52_3F1R, A52_MONO):
505 if (slev == 0)
506 goto mix_3to1;
507 case CONVERT (A52_2F2R, A52_MONO):
508 if (slev == 0)
509 goto mix_2to1;
510 mix4to1 (samples, bias);
511 break;
513 case CONVERT (A52_3F2R, A52_MONO):
514 if (slev == 0)
515 goto mix_3to1;
516 mix5to1 (samples, bias);
517 break;
519 case CONVERT (A52_MONO, A52_DOLBY):
520 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
521 break;
523 case CONVERT (A52_3F, A52_STEREO):
524 case CONVERT (A52_3F, A52_DOLBY):
525 mix_3to2:
526 mix3to2 (samples, bias);
527 break;
529 case CONVERT (A52_2F1R, A52_STEREO):
530 if (slev == 0)
531 break;
532 mix21to2 (samples, samples + 256, bias);
533 break;
535 case CONVERT (A52_2F1R, A52_DOLBY):
536 mix21toS (samples, bias);
537 break;
539 case CONVERT (A52_3F1R, A52_STEREO):
540 if (slev == 0)
541 goto mix_3to2;
542 mix31to2 (samples, bias);
543 break;
545 case CONVERT (A52_3F1R, A52_DOLBY):
546 mix31toS (samples, bias);
547 break;
549 case CONVERT (A52_2F2R, A52_STEREO):
550 if (slev == 0)
551 break;
552 mix2to1 (samples, samples + 512, bias);
553 mix2to1 (samples + 256, samples + 768, bias);
554 break;
556 case CONVERT (A52_2F2R, A52_DOLBY):
557 mix22toS (samples, bias);
558 break;
560 case CONVERT (A52_3F2R, A52_STEREO):
561 if (slev == 0)
562 goto mix_3to2;
563 mix32to2 (samples, bias);
564 break;
566 case CONVERT (A52_3F2R, A52_DOLBY):
567 mix32toS (samples, bias);
568 break;
570 case CONVERT (A52_3F1R, A52_3F):
571 if (slev == 0)
572 break;
573 mix21to2 (samples, samples + 512, bias);
574 break;
576 case CONVERT (A52_3F2R, A52_3F):
577 if (slev == 0)
578 break;
579 mix2to1 (samples, samples + 768, bias);
580 mix2to1 (samples + 512, samples + 1024, bias);
581 break;
583 case CONVERT (A52_3F1R, A52_2F1R):
584 mix3to2 (samples, bias);
585 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
586 break;
588 case CONVERT (A52_2F2R, A52_2F1R):
589 mix2to1 (samples + 512, samples + 768, bias);
590 break;
592 case CONVERT (A52_3F2R, A52_2F1R):
593 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
594 move2to1 (samples + 768, samples + 512, bias);
595 break;
597 case CONVERT (A52_3F2R, A52_3F1R):
598 mix2to1 (samples + 768, samples + 1024, bias);
599 break;
601 case CONVERT (A52_2F1R, A52_2F2R):
602 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
603 break;
605 case CONVERT (A52_3F1R, A52_2F2R):
606 mix3to2 (samples, bias);
607 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
608 break;
610 case CONVERT (A52_3F2R, A52_2F2R):
611 mix3to2 (samples, bias);
612 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
613 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
614 break;
616 case CONVERT (A52_3F1R, A52_3F2R):
617 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
618 break;
622 static void upmix_C (sample_t * samples, int acmod, int output)
624 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
626 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
627 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
628 break;
630 case CONVERT (A52_3F2R, A52_MONO):
631 zero (samples + 1024);
632 case CONVERT (A52_3F1R, A52_MONO):
633 case CONVERT (A52_2F2R, A52_MONO):
634 zero (samples + 768);
635 case CONVERT (A52_3F, A52_MONO):
636 case CONVERT (A52_2F1R, A52_MONO):
637 zero (samples + 512);
638 case CONVERT (A52_CHANNEL, A52_MONO):
639 case CONVERT (A52_STEREO, A52_MONO):
640 zero (samples + 256);
641 break;
643 case CONVERT (A52_3F2R, A52_STEREO):
644 case CONVERT (A52_3F2R, A52_DOLBY):
645 zero (samples + 1024);
646 case CONVERT (A52_3F1R, A52_STEREO):
647 case CONVERT (A52_3F1R, A52_DOLBY):
648 zero (samples + 768);
649 case CONVERT (A52_3F, A52_STEREO):
650 case CONVERT (A52_3F, A52_DOLBY):
651 mix_3to2:
652 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
653 zero (samples + 256);
654 break;
656 case CONVERT (A52_2F2R, A52_STEREO):
657 case CONVERT (A52_2F2R, A52_DOLBY):
658 zero (samples + 768);
659 case CONVERT (A52_2F1R, A52_STEREO):
660 case CONVERT (A52_2F1R, A52_DOLBY):
661 zero (samples + 512);
662 break;
664 case CONVERT (A52_3F2R, A52_3F):
665 zero (samples + 1024);
666 case CONVERT (A52_3F1R, A52_3F):
667 case CONVERT (A52_2F2R, A52_2F1R):
668 zero (samples + 768);
669 break;
671 case CONVERT (A52_3F2R, A52_3F1R):
672 zero (samples + 1024);
673 break;
675 case CONVERT (A52_3F2R, A52_2F1R):
676 zero (samples + 1024);
677 case CONVERT (A52_3F1R, A52_2F1R):
678 mix_31to21:
679 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
680 goto mix_3to2;
682 case CONVERT (A52_3F2R, A52_2F2R):
683 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
684 goto mix_31to21;
688 #if defined(ARCH_X86) || defined(ARCH_X86_64)
689 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
691 asm volatile(
692 "movlps %2, %%xmm7 \n\t"
693 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
694 "mov $-1024, %%"REG_S" \n\t"
695 ASMALIGN16
696 "1: \n\t"
697 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
698 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
699 "addps (%1, %%"REG_S"), %%xmm0 \n\t"
700 "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
701 "addps %%xmm7, %%xmm0 \n\t"
702 "addps %%xmm7, %%xmm1 \n\t"
703 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
704 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
705 "add $32, %%"REG_S" \n\t"
706 " jnz 1b \n\t"
707 :: "r" (src+256), "r" (dest+256), "m" (bias)
708 : "%"REG_S
712 static void mix3to1_SSE (sample_t * samples, sample_t bias)
714 asm volatile(
715 "movlps %1, %%xmm7 \n\t"
716 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
717 "mov $-1024, %%"REG_S" \n\t"
718 ASMALIGN16
719 "1: \n\t"
720 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
721 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
722 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
723 "addps %%xmm7, %%xmm1 \n\t"
724 "addps %%xmm1, %%xmm0 \n\t"
725 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
726 "add $16, %%"REG_S" \n\t"
727 " jnz 1b \n\t"
728 :: "r" (samples+256), "m" (bias)
729 : "%"REG_S
733 static void mix4to1_SSE (sample_t * samples, sample_t bias)
735 asm volatile(
736 "movlps %1, %%xmm7 \n\t"
737 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
738 "mov $-1024, %%"REG_S" \n\t"
739 ASMALIGN16
740 "1: \n\t"
741 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
742 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
743 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
744 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
745 "addps %%xmm7, %%xmm0 \n\t"
746 "addps %%xmm1, %%xmm0 \n\t"
747 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
748 "add $16, %%"REG_S" \n\t"
749 " jnz 1b \n\t"
750 :: "r" (samples+256), "m" (bias)
751 : "%"REG_S
755 static void mix5to1_SSE (sample_t * samples, sample_t bias)
757 asm volatile(
758 "movlps %1, %%xmm7 \n\t"
759 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
760 "mov $-1024, %%"REG_S" \n\t"
761 ASMALIGN16
762 "1: \n\t"
763 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
764 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
765 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
766 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
767 "addps %%xmm7, %%xmm0 \n\t"
768 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
769 "addps %%xmm1, %%xmm0 \n\t"
770 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
771 "add $16, %%"REG_S" \n\t"
772 " jnz 1b \n\t"
773 :: "r" (samples+256), "m" (bias)
774 : "%"REG_S
778 static void mix3to2_SSE (sample_t * samples, sample_t bias)
780 asm volatile(
781 "movlps %1, %%xmm7 \n\t"
782 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
783 "mov $-1024, %%"REG_S" \n\t"
784 ASMALIGN16
785 "1: \n\t"
786 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
787 "addps %%xmm7, %%xmm0 \n\t" //common
788 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
789 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
790 "addps %%xmm0, %%xmm1 \n\t"
791 "addps %%xmm0, %%xmm2 \n\t"
792 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
793 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
794 "add $16, %%"REG_S" \n\t"
795 " jnz 1b \n\t"
796 :: "r" (samples+256), "m" (bias)
797 : "%"REG_S
801 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
803 asm volatile(
804 "movlps %2, %%xmm7 \n\t"
805 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
806 "mov $-1024, %%"REG_S" \n\t"
807 ASMALIGN16
808 "1: \n\t"
809 "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
810 "addps %%xmm7, %%xmm0 \n\t" //common
811 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
812 "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
813 "addps %%xmm0, %%xmm1 \n\t"
814 "addps %%xmm0, %%xmm2 \n\t"
815 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
816 "movaps %%xmm2, (%1, %%"REG_S") \n\t"
817 "add $16, %%"REG_S" \n\t"
818 " jnz 1b \n\t"
819 :: "r" (left+256), "r" (right+256), "m" (bias)
820 : "%"REG_S
824 static void mix21toS_SSE (sample_t * samples, sample_t bias)
826 asm volatile(
827 "movlps %1, %%xmm7 \n\t"
828 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
829 "mov $-1024, %%"REG_S" \n\t"
830 ASMALIGN16
831 "1: \n\t"
832 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
833 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
834 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
835 "addps %%xmm7, %%xmm1 \n\t"
836 "addps %%xmm7, %%xmm2 \n\t"
837 "subps %%xmm0, %%xmm1 \n\t"
838 "addps %%xmm0, %%xmm2 \n\t"
839 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
840 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
841 "add $16, %%"REG_S" \n\t"
842 " jnz 1b \n\t"
843 :: "r" (samples+256), "m" (bias)
844 : "%"REG_S
848 static void mix31to2_SSE (sample_t * samples, sample_t bias)
850 asm volatile(
851 "movlps %1, %%xmm7 \n\t"
852 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
853 "mov $-1024, %%"REG_S" \n\t"
854 ASMALIGN16
855 "1: \n\t"
856 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
857 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
858 "addps %%xmm7, %%xmm0 \n\t" // common
859 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
860 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
861 "addps %%xmm0, %%xmm1 \n\t"
862 "addps %%xmm0, %%xmm2 \n\t"
863 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
864 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
865 "add $16, %%"REG_S" \n\t"
866 " jnz 1b \n\t"
867 :: "r" (samples+256), "m" (bias)
868 : "%"REG_S
872 static void mix31toS_SSE (sample_t * samples, sample_t bias)
874 asm volatile(
875 "movlps %1, %%xmm7 \n\t"
876 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
877 "mov $-1024, %%"REG_S" \n\t"
878 ASMALIGN16
879 "1: \n\t"
880 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
881 "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
882 "addps %%xmm7, %%xmm0 \n\t" // common
883 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
884 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
885 "addps %%xmm0, %%xmm1 \n\t"
886 "addps %%xmm0, %%xmm2 \n\t"
887 "subps %%xmm3, %%xmm1 \n\t"
888 "addps %%xmm3, %%xmm2 \n\t"
889 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
890 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
891 "add $16, %%"REG_S" \n\t"
892 " jnz 1b \n\t"
893 :: "r" (samples+256), "m" (bias)
894 : "%"REG_S
898 static void mix22toS_SSE (sample_t * samples, sample_t bias)
900 asm volatile(
901 "movlps %1, %%xmm7 \n\t"
902 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
903 "mov $-1024, %%"REG_S" \n\t"
904 ASMALIGN16
905 "1: \n\t"
906 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
907 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
908 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
909 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
910 "addps %%xmm7, %%xmm1 \n\t"
911 "addps %%xmm7, %%xmm2 \n\t"
912 "subps %%xmm0, %%xmm1 \n\t"
913 "addps %%xmm0, %%xmm2 \n\t"
914 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
915 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
916 "add $16, %%"REG_S" \n\t"
917 " jnz 1b \n\t"
918 :: "r" (samples+256), "m" (bias)
919 : "%"REG_S
923 static void mix32to2_SSE (sample_t * samples, sample_t bias)
925 asm volatile(
926 "movlps %1, %%xmm7 \n\t"
927 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
928 "mov $-1024, %%"REG_S" \n\t"
929 ASMALIGN16
930 "1: \n\t"
931 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
932 "addps %%xmm7, %%xmm0 \n\t" // common
933 "movaps %%xmm0, %%xmm1 \n\t" // common
934 "addps (%0, %%"REG_S"), %%xmm0 \n\t"
935 "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
936 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
937 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
938 "movaps %%xmm0, (%0, %%"REG_S") \n\t"
939 "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
940 "add $16, %%"REG_S" \n\t"
941 " jnz 1b \n\t"
942 :: "r" (samples+256), "m" (bias)
943 : "%"REG_S
947 static void mix32toS_SSE (sample_t * samples, sample_t bias)
949 asm volatile(
950 "movlps %1, %%xmm7 \n\t"
951 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
952 "mov $-1024, %%"REG_S" \n\t"
953 ASMALIGN16
954 "1: \n\t"
955 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
956 "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
957 "addps %%xmm7, %%xmm0 \n\t" // common
958 "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
959 "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
960 "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
961 "subps %%xmm2, %%xmm1 \n\t"
962 "addps %%xmm2, %%xmm3 \n\t"
963 "addps %%xmm0, %%xmm1 \n\t"
964 "addps %%xmm0, %%xmm3 \n\t"
965 "movaps %%xmm1, (%0, %%"REG_S") \n\t"
966 "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
967 "add $16, %%"REG_S" \n\t"
968 " jnz 1b \n\t"
969 :: "r" (samples+256), "m" (bias)
970 : "%"REG_S
974 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
976 asm volatile(
977 "movlps %2, %%xmm7 \n\t"
978 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
979 "mov $-1024, %%"REG_S" \n\t"
980 ASMALIGN16
981 "1: \n\t"
982 "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
983 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
984 "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
985 "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
986 "addps %%xmm7, %%xmm0 \n\t"
987 "addps %%xmm7, %%xmm1 \n\t"
988 "movaps %%xmm0, (%1, %%"REG_S") \n\t"
989 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
990 "add $32, %%"REG_S" \n\t"
991 " jnz 1b \n\t"
992 :: "r" (src+256), "r" (dest+256), "m" (bias)
993 : "%"REG_S
997 static void zero_MMX(sample_t * samples)
999 asm volatile(
1000 "mov $-1024, %%"REG_S" \n\t"
1001 "pxor %%mm0, %%mm0 \n\t"
1002 ASMALIGN16
1003 "1: \n\t"
1004 "movq %%mm0, (%0, %%"REG_S") \n\t"
1005 "movq %%mm0, 8(%0, %%"REG_S") \n\t"
1006 "movq %%mm0, 16(%0, %%"REG_S") \n\t"
1007 "movq %%mm0, 24(%0, %%"REG_S") \n\t"
1008 "add $32, %%"REG_S" \n\t"
1009 " jnz 1b \n\t"
1010 "emms"
1011 :: "r" (samples+256)
1012 : "%"REG_S
1017 I hope dest and src will be at least 8 byte aligned and size
1018 will devide on 8 without remain
1019 Note: untested and unused.
1021 static void copy_MMX(void *dest,const void *src,unsigned size)
1023 unsigned i;
1024 size /= 64;
1025 for(i=0;i<size;i++)
1027 __asm __volatile(
1028 "movq %0, %%mm0\n\t"
1029 "movq 8%0, %%mm1\n\t"
1030 "movq 16%0, %%mm2\n\t"
1031 "movq 24%0, %%mm3\n\t"
1032 "movq 32%0, %%mm4\n\t"
1033 "movq 40%0, %%mm5\n\t"
1034 "movq 48%0, %%mm6\n\t"
1035 "movq 56%0, %%mm7\n\t"
1036 "movq %%mm0, %1\n\t"
1037 "movq %%mm1, 8%1\n\t"
1038 "movq %%mm2, 16%1\n\t"
1039 "movq %%mm3, 24%1\n\t"
1040 "movq %%mm4, 32%1\n\t"
1041 "movq %%mm5, 40%1\n\t"
1042 "movq %%mm6, 48%1\n\t"
1043 "movq %%mm7, 56%1\n\t"
1045 :"m"(src),"m"(dest));
1049 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
1050 sample_t clev, sample_t slev)
1052 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1054 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1055 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1056 break;
1058 case CONVERT (A52_CHANNEL, A52_MONO):
1059 case CONVERT (A52_STEREO, A52_MONO):
1060 mix_2to1_SSE:
1061 mix2to1_SSE (samples, samples + 256, bias);
1062 break;
1064 case CONVERT (A52_2F1R, A52_MONO):
1065 if (slev == 0)
1066 goto mix_2to1_SSE;
1067 case CONVERT (A52_3F, A52_MONO):
1068 mix_3to1_SSE:
1069 mix3to1_SSE (samples, bias);
1070 break;
1072 case CONVERT (A52_3F1R, A52_MONO):
1073 if (slev == 0)
1074 goto mix_3to1_SSE;
1075 case CONVERT (A52_2F2R, A52_MONO):
1076 if (slev == 0)
1077 goto mix_2to1_SSE;
1078 mix4to1_SSE (samples, bias);
1079 break;
1081 case CONVERT (A52_3F2R, A52_MONO):
1082 if (slev == 0)
1083 goto mix_3to1_SSE;
1084 mix5to1_SSE (samples, bias);
1085 break;
1087 case CONVERT (A52_MONO, A52_DOLBY):
1088 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1089 break;
1091 case CONVERT (A52_3F, A52_STEREO):
1092 case CONVERT (A52_3F, A52_DOLBY):
1093 mix_3to2_SSE:
1094 mix3to2_SSE (samples, bias);
1095 break;
1097 case CONVERT (A52_2F1R, A52_STEREO):
1098 if (slev == 0)
1099 break;
1100 mix21to2_SSE (samples, samples + 256, bias);
1101 break;
1103 case CONVERT (A52_2F1R, A52_DOLBY):
1104 mix21toS_SSE (samples, bias);
1105 break;
1107 case CONVERT (A52_3F1R, A52_STEREO):
1108 if (slev == 0)
1109 goto mix_3to2_SSE;
1110 mix31to2_SSE (samples, bias);
1111 break;
1113 case CONVERT (A52_3F1R, A52_DOLBY):
1114 mix31toS_SSE (samples, bias);
1115 break;
1117 case CONVERT (A52_2F2R, A52_STEREO):
1118 if (slev == 0)
1119 break;
1120 mix2to1_SSE (samples, samples + 512, bias);
1121 mix2to1_SSE (samples + 256, samples + 768, bias);
1122 break;
1124 case CONVERT (A52_2F2R, A52_DOLBY):
1125 mix22toS_SSE (samples, bias);
1126 break;
1128 case CONVERT (A52_3F2R, A52_STEREO):
1129 if (slev == 0)
1130 goto mix_3to2_SSE;
1131 mix32to2_SSE (samples, bias);
1132 break;
1134 case CONVERT (A52_3F2R, A52_DOLBY):
1135 mix32toS_SSE (samples, bias);
1136 break;
1138 case CONVERT (A52_3F1R, A52_3F):
1139 if (slev == 0)
1140 break;
1141 mix21to2_SSE (samples, samples + 512, bias);
1142 break;
1144 case CONVERT (A52_3F2R, A52_3F):
1145 if (slev == 0)
1146 break;
1147 mix2to1_SSE (samples, samples + 768, bias);
1148 mix2to1_SSE (samples + 512, samples + 1024, bias);
1149 break;
1151 case CONVERT (A52_3F1R, A52_2F1R):
1152 mix3to2_SSE (samples, bias);
1153 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1154 break;
1156 case CONVERT (A52_2F2R, A52_2F1R):
1157 mix2to1_SSE (samples + 512, samples + 768, bias);
1158 break;
1160 case CONVERT (A52_3F2R, A52_2F1R):
1161 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1162 move2to1_SSE (samples + 768, samples + 512, bias);
1163 break;
1165 case CONVERT (A52_3F2R, A52_3F1R):
1166 mix2to1_SSE (samples + 768, samples + 1024, bias);
1167 break;
1169 case CONVERT (A52_2F1R, A52_2F2R):
1170 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1171 break;
1173 case CONVERT (A52_3F1R, A52_2F2R):
1174 mix3to2_SSE (samples, bias);
1175 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1176 break;
1178 case CONVERT (A52_3F2R, A52_2F2R):
1179 mix3to2_SSE (samples, bias);
1180 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1181 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1182 break;
1184 case CONVERT (A52_3F1R, A52_3F2R):
1185 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1186 break;
1190 static void upmix_MMX (sample_t * samples, int acmod, int output)
1192 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1194 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1195 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1196 break;
1198 case CONVERT (A52_3F2R, A52_MONO):
1199 zero_MMX (samples + 1024);
1200 case CONVERT (A52_3F1R, A52_MONO):
1201 case CONVERT (A52_2F2R, A52_MONO):
1202 zero_MMX (samples + 768);
1203 case CONVERT (A52_3F, A52_MONO):
1204 case CONVERT (A52_2F1R, A52_MONO):
1205 zero_MMX (samples + 512);
1206 case CONVERT (A52_CHANNEL, A52_MONO):
1207 case CONVERT (A52_STEREO, A52_MONO):
1208 zero_MMX (samples + 256);
1209 break;
1211 case CONVERT (A52_3F2R, A52_STEREO):
1212 case CONVERT (A52_3F2R, A52_DOLBY):
1213 zero_MMX (samples + 1024);
1214 case CONVERT (A52_3F1R, A52_STEREO):
1215 case CONVERT (A52_3F1R, A52_DOLBY):
1216 zero_MMX (samples + 768);
1217 case CONVERT (A52_3F, A52_STEREO):
1218 case CONVERT (A52_3F, A52_DOLBY):
1219 mix_3to2_MMX:
1220 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
1221 zero_MMX (samples + 256);
1222 break;
1224 case CONVERT (A52_2F2R, A52_STEREO):
1225 case CONVERT (A52_2F2R, A52_DOLBY):
1226 zero_MMX (samples + 768);
1227 case CONVERT (A52_2F1R, A52_STEREO):
1228 case CONVERT (A52_2F1R, A52_DOLBY):
1229 zero_MMX (samples + 512);
1230 break;
1232 case CONVERT (A52_3F2R, A52_3F):
1233 zero_MMX (samples + 1024);
1234 case CONVERT (A52_3F1R, A52_3F):
1235 case CONVERT (A52_2F2R, A52_2F1R):
1236 zero_MMX (samples + 768);
1237 break;
1239 case CONVERT (A52_3F2R, A52_3F1R):
1240 zero_MMX (samples + 1024);
1241 break;
1243 case CONVERT (A52_3F2R, A52_2F1R):
1244 zero_MMX (samples + 1024);
1245 case CONVERT (A52_3F1R, A52_2F1R):
1246 mix_31to21_MMX:
1247 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1248 goto mix_3to2_MMX;
1250 case CONVERT (A52_3F2R, A52_2F2R):
1251 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1252 goto mix_31to21_MMX;
1256 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
1258 asm volatile(
1259 "movd %2, %%mm7 \n\t"
1260 "punpckldq %2, %%mm7 \n\t"
1261 "mov $-1024, %%"REG_S" \n\t"
1262 ASMALIGN16
1263 "1: \n\t"
1264 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1265 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1266 "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
1267 "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
1268 "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
1269 "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
1270 "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
1271 "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
1272 "pfadd %%mm7, %%mm0 \n\t"
1273 "pfadd %%mm7, %%mm1 \n\t"
1274 "pfadd %%mm7, %%mm2 \n\t"
1275 "pfadd %%mm7, %%mm3 \n\t"
1276 "movq %%mm0, (%1, %%"REG_S") \n\t"
1277 "movq %%mm1, 8(%1, %%"REG_S") \n\t"
1278 "movq %%mm2, 16(%1, %%"REG_S") \n\t"
1279 "movq %%mm3, 24(%1, %%"REG_S") \n\t"
1280 "add $32, %%"REG_S" \n\t"
1281 " jnz 1b \n\t"
1282 :: "r" (src+256), "r" (dest+256), "m" (bias)
1283 : "%"REG_S
1287 static void mix3to1_3dnow (sample_t * samples, sample_t bias)
1289 asm volatile(
1290 "movd %1, %%mm7 \n\t"
1291 "punpckldq %1, %%mm7 \n\t"
1292 "mov $-1024, %%"REG_S" \n\t"
1293 ASMALIGN16
1294 "1: \n\t"
1295 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1296 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1297 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1298 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1299 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1300 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1301 "pfadd %%mm7, %%mm0 \n\t"
1302 "pfadd %%mm7, %%mm1 \n\t"
1303 "pfadd %%mm2, %%mm0 \n\t"
1304 "pfadd %%mm3, %%mm1 \n\t"
1305 "movq %%mm0, (%0, %%"REG_S") \n\t"
1306 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1307 "add $16, %%"REG_S" \n\t"
1308 " jnz 1b \n\t"
1309 :: "r" (samples+256), "m" (bias)
1310 : "%"REG_S
1314 static void mix4to1_3dnow (sample_t * samples, sample_t bias)
1316 asm volatile(
1317 "movd %1, %%mm7 \n\t"
1318 "punpckldq %1, %%mm7 \n\t"
1319 "mov $-1024, %%"REG_S" \n\t"
1320 ASMALIGN16
1321 "1: \n\t"
1322 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1323 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1324 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1325 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1326 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1327 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1328 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
1329 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
1330 "pfadd %%mm7, %%mm0 \n\t"
1331 "pfadd %%mm7, %%mm1 \n\t"
1332 "pfadd %%mm2, %%mm0 \n\t"
1333 "pfadd %%mm3, %%mm1 \n\t"
1334 "movq %%mm0, (%0, %%"REG_S") \n\t"
1335 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1336 "add $16, %%"REG_S" \n\t"
1337 " jnz 1b \n\t"
1338 :: "r" (samples+256), "m" (bias)
1339 : "%"REG_S
1343 static void mix5to1_3dnow (sample_t * samples, sample_t bias)
1345 asm volatile(
1346 "movd %1, %%mm7 \n\t"
1347 "punpckldq %1, %%mm7 \n\t"
1348 "mov $-1024, %%"REG_S" \n\t"
1349 ASMALIGN16
1350 "1: \n\t"
1351 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1352 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1353 "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
1354 "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
1355 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
1356 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
1357 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
1358 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
1359 "pfadd %%mm7, %%mm0 \n\t"
1360 "pfadd %%mm7, %%mm1 \n\t"
1361 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
1362 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
1363 "pfadd %%mm2, %%mm0 \n\t"
1364 "pfadd %%mm3, %%mm1 \n\t"
1365 "movq %%mm0, (%0, %%"REG_S") \n\t"
1366 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1367 "add $16, %%"REG_S" \n\t"
1368 " jnz 1b \n\t"
1369 :: "r" (samples+256), "m" (bias)
1370 : "%"REG_S
1374 static void mix3to2_3dnow (sample_t * samples, sample_t bias)
1376 asm volatile(
1377 "movd %1, %%mm7 \n\t"
1378 "punpckldq %1, %%mm7 \n\t"
1379 "mov $-1024, %%"REG_S" \n\t"
1380 ASMALIGN16
1381 "1: \n\t"
1382 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1383 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1384 "pfadd %%mm7, %%mm0 \n\t" //common
1385 "pfadd %%mm7, %%mm1 \n\t" //common
1386 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1387 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1388 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1389 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1390 "pfadd %%mm0, %%mm2 \n\t"
1391 "pfadd %%mm1, %%mm3 \n\t"
1392 "pfadd %%mm0, %%mm4 \n\t"
1393 "pfadd %%mm1, %%mm5 \n\t"
1394 "movq %%mm2, (%0, %%"REG_S") \n\t"
1395 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1396 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1397 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1398 "add $16, %%"REG_S" \n\t"
1399 " jnz 1b \n\t"
1400 :: "r" (samples+256), "m" (bias)
1401 : "%"REG_S
1405 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
1407 asm volatile(
1408 "movd %2, %%mm7 \n\t"
1409 "punpckldq %2, %%mm7 \n\t"
1410 "mov $-1024, %%"REG_S" \n\t"
1411 ASMALIGN16
1412 "1: \n\t"
1413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
1414 "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
1415 "pfadd %%mm7, %%mm0 \n\t" //common
1416 "pfadd %%mm7, %%mm1 \n\t" //common
1417 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1418 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1419 "movq (%1, %%"REG_S"), %%mm4 \n\t"
1420 "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
1421 "pfadd %%mm0, %%mm2 \n\t"
1422 "pfadd %%mm1, %%mm3 \n\t"
1423 "pfadd %%mm0, %%mm4 \n\t"
1424 "pfadd %%mm1, %%mm5 \n\t"
1425 "movq %%mm2, (%0, %%"REG_S") \n\t"
1426 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1427 "movq %%mm4, (%1, %%"REG_S") \n\t"
1428 "movq %%mm5, 8(%1, %%"REG_S") \n\t"
1429 "add $16, %%"REG_S" \n\t"
1430 " jnz 1b \n\t"
1431 :: "r" (left+256), "r" (right+256), "m" (bias)
1432 : "%"REG_S
1436 static void mix21toS_3dnow (sample_t * samples, sample_t bias)
1438 asm volatile(
1439 "movd %1, %%mm7 \n\t"
1440 "punpckldq %1, %%mm7 \n\t"
1441 "mov $-1024, %%"REG_S" \n\t"
1442 ASMALIGN16
1443 "1: \n\t"
1444 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
1445 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
1446 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1447 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1448 "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
1449 "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
1450 "pfadd %%mm7, %%mm2 \n\t"
1451 "pfadd %%mm7, %%mm3 \n\t"
1452 "pfadd %%mm7, %%mm4 \n\t"
1453 "pfadd %%mm7, %%mm5 \n\t"
1454 "pfsub %%mm0, %%mm2 \n\t"
1455 "pfsub %%mm1, %%mm3 \n\t"
1456 "pfadd %%mm0, %%mm4 \n\t"
1457 "pfadd %%mm1, %%mm5 \n\t"
1458 "movq %%mm2, (%0, %%"REG_S") \n\t"
1459 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1460 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1461 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1462 "add $16, %%"REG_S" \n\t"
1463 " jnz 1b \n\t"
1464 :: "r" (samples+256), "m" (bias)
1465 : "%"REG_S
1469 static void mix31to2_3dnow (sample_t * samples, sample_t bias)
1471 asm volatile(
1472 "movd %1, %%mm7 \n\t"
1473 "punpckldq %1, %%mm7 \n\t"
1474 "mov $-1024, %%"REG_S" \n\t"
1475 ASMALIGN16
1476 "1: \n\t"
1477 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1478 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1479 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
1480 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
1481 "pfadd %%mm7, %%mm0 \n\t" // common
1482 "pfadd %%mm7, %%mm1 \n\t" // common
1483 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1484 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1485 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1486 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1487 "pfadd %%mm0, %%mm2 \n\t"
1488 "pfadd %%mm1, %%mm3 \n\t"
1489 "pfadd %%mm0, %%mm4 \n\t"
1490 "pfadd %%mm1, %%mm5 \n\t"
1491 "movq %%mm2, (%0, %%"REG_S") \n\t"
1492 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1493 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1494 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1495 "add $16, %%"REG_S" \n\t"
1496 " jnz 1b \n\t"
1497 :: "r" (samples+256), "m" (bias)
1498 : "%"REG_S
1502 static void mix31toS_3dnow (sample_t * samples, sample_t bias)
1504 asm volatile(
1505 "movd %1, %%mm7 \n\t"
1506 "punpckldq %1, %%mm7 \n\t"
1507 "mov $-1024, %%"REG_S" \n\t"
1508 ASMALIGN16
1509 "1: \n\t"
1510 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1511 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1512 "pfadd %%mm7, %%mm0 \n\t" // common
1513 "pfadd %%mm7, %%mm1 \n\t" // common
1514 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1515 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1516 "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
1517 "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
1518 "pfadd %%mm0, %%mm2 \n\t"
1519 "pfadd %%mm1, %%mm3 \n\t"
1520 "pfadd %%mm0, %%mm4 \n\t"
1521 "pfadd %%mm1, %%mm5 \n\t"
1522 "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
1523 "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
1524 "pfsub %%mm0, %%mm2 \n\t"
1525 "pfsub %%mm1, %%mm3 \n\t"
1526 "pfadd %%mm0, %%mm4 \n\t"
1527 "pfadd %%mm1, %%mm5 \n\t"
1528 "movq %%mm2, (%0, %%"REG_S") \n\t"
1529 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1530 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1531 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1532 "add $16, %%"REG_S" \n\t"
1533 " jnz 1b \n\t"
1534 :: "r" (samples+256), "m" (bias)
1535 : "%"REG_S
1539 static void mix22toS_3dnow (sample_t * samples, sample_t bias)
1541 asm volatile(
1542 "movd %1, %%mm7 \n\t"
1543 "punpckldq %1, %%mm7 \n\t"
1544 "mov $-1024, %%"REG_S" \n\t"
1545 ASMALIGN16
1546 "1: \n\t"
1547 "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
1548 "movq 2056(%0, %%"REG_S"), %%mm1\n\t"
1549 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
1550 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
1551 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1552 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1553 "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
1554 "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
1555 "pfadd %%mm7, %%mm2 \n\t"
1556 "pfadd %%mm7, %%mm3 \n\t"
1557 "pfadd %%mm7, %%mm4 \n\t"
1558 "pfadd %%mm7, %%mm5 \n\t"
1559 "pfsub %%mm0, %%mm2 \n\t"
1560 "pfsub %%mm1, %%mm3 \n\t"
1561 "pfadd %%mm0, %%mm4 \n\t"
1562 "pfadd %%mm1, %%mm5 \n\t"
1563 "movq %%mm2, (%0, %%"REG_S") \n\t"
1564 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1565 "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
1566 "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
1567 "add $16, %%"REG_S" \n\t"
1568 " jnz 1b \n\t"
1569 :: "r" (samples+256), "m" (bias)
1570 : "%"REG_S
1574 static void mix32to2_3dnow (sample_t * samples, sample_t bias)
1576 asm volatile(
1577 "movd %1, %%mm7 \n\t"
1578 "punpckldq %1, %%mm7 \n\t"
1579 "mov $-1024, %%"REG_S" \n\t"
1580 ASMALIGN16
1581 "1: \n\t"
1582 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1583 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1584 "pfadd %%mm7, %%mm0 \n\t" // common
1585 "pfadd %%mm7, %%mm1 \n\t" // common
1586 "movq %%mm0, %%mm2 \n\t" // common
1587 "movq %%mm1, %%mm3 \n\t" // common
1588 "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
1589 "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t"
1590 "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
1591 "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t"
1592 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
1593 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
1594 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
1595 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
1596 "movq %%mm0, (%0, %%"REG_S") \n\t"
1597 "movq %%mm1, 8(%0, %%"REG_S") \n\t"
1598 "movq %%mm2, 1024(%0, %%"REG_S")\n\t"
1599 "movq %%mm3, 1032(%0, %%"REG_S")\n\t"
1600 "add $16, %%"REG_S" \n\t"
1601 " jnz 1b \n\t"
1602 :: "r" (samples+256), "m" (bias)
1603 : "%"REG_S
1607 /* todo: should be optimized better */
1608 static void mix32toS_3dnow (sample_t * samples, sample_t bias)
1610 asm volatile(
1611 "mov $-1024, %%"REG_S" \n\t"
1612 ASMALIGN16
1613 "1: \n\t"
1614 "movd %1, %%mm7 \n\t"
1615 "punpckldq %1, %%mm7 \n\t"
1616 "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
1617 "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
1618 "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
1619 "movq 3080(%0, %%"REG_S"), %%mm5\n\t"
1620 "pfadd %%mm7, %%mm0 \n\t" // common
1621 "pfadd %%mm7, %%mm1 \n\t" // common
1622 "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
1623 "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround
1624 "movq (%0, %%"REG_S"), %%mm2 \n\t"
1625 "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
1626 "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
1627 "movq 2056(%0, %%"REG_S"), %%mm7\n\t"
1628 "pfsub %%mm4, %%mm2 \n\t"
1629 "pfsub %%mm5, %%mm3 \n\t"
1630 "pfadd %%mm4, %%mm6 \n\t"
1631 "pfadd %%mm5, %%mm7 \n\t"
1632 "pfadd %%mm0, %%mm2 \n\t"
1633 "pfadd %%mm1, %%mm3 \n\t"
1634 "pfadd %%mm0, %%mm6 \n\t"
1635 "pfadd %%mm1, %%mm7 \n\t"
1636 "movq %%mm2, (%0, %%"REG_S") \n\t"
1637 "movq %%mm3, 8(%0, %%"REG_S") \n\t"
1638 "movq %%mm6, 1024(%0, %%"REG_S")\n\t"
1639 "movq %%mm7, 1032(%0, %%"REG_S")\n\t"
1640 "add $16, %%"REG_S" \n\t"
1641 " jnz 1b \n\t"
1642 :: "r" (samples+256), "m" (bias)
1643 : "%"REG_S
1647 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
1649 asm volatile(
1650 "movd %2, %%mm7 \n\t"
1651 "punpckldq %2, %%mm7 \n\t"
1652 "mov $-1024, %%"REG_S" \n\t"
1653 ASMALIGN16
1654 "1: \n\t"
1655 "movq (%0, %%"REG_S"), %%mm0 \n\t"
1656 "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
1657 "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
1658 "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
1659 "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t"
1660 "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t"
1661 "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t"
1662 "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t"
1663 "pfadd %%mm7, %%mm0 \n\t"
1664 "pfadd %%mm7, %%mm1 \n\t"
1665 "pfadd %%mm7, %%mm2 \n\t"
1666 "pfadd %%mm7, %%mm3 \n\t"
1667 "movq %%mm0, (%1, %%"REG_S") \n\t"
1668 "movq %%mm1, 8(%1, %%"REG_S") \n\t"
1669 "movq %%mm2, 16(%1, %%"REG_S") \n\t"
1670 "movq %%mm3, 24(%1, %%"REG_S") \n\t"
1671 "add $32, %%"REG_S" \n\t"
1672 " jnz 1b \n\t"
1673 :: "r" (src+256), "r" (dest+256), "m" (bias)
1674 : "%"REG_S
1678 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
1679 sample_t clev, sample_t slev)
1681 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1683 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1684 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1685 break;
1687 case CONVERT (A52_CHANNEL, A52_MONO):
1688 case CONVERT (A52_STEREO, A52_MONO):
1689 mix_2to1_3dnow:
1690 mix2to1_3dnow (samples, samples + 256, bias);
1691 break;
1693 case CONVERT (A52_2F1R, A52_MONO):
1694 if (slev == 0)
1695 goto mix_2to1_3dnow;
1696 case CONVERT (A52_3F, A52_MONO):
1697 mix_3to1_3dnow:
1698 mix3to1_3dnow (samples, bias);
1699 break;
1701 case CONVERT (A52_3F1R, A52_MONO):
1702 if (slev == 0)
1703 goto mix_3to1_3dnow;
1704 case CONVERT (A52_2F2R, A52_MONO):
1705 if (slev == 0)
1706 goto mix_2to1_3dnow;
1707 mix4to1_3dnow (samples, bias);
1708 break;
1710 case CONVERT (A52_3F2R, A52_MONO):
1711 if (slev == 0)
1712 goto mix_3to1_3dnow;
1713 mix5to1_3dnow (samples, bias);
1714 break;
1716 case CONVERT (A52_MONO, A52_DOLBY):
1717 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1718 break;
1720 case CONVERT (A52_3F, A52_STEREO):
1721 case CONVERT (A52_3F, A52_DOLBY):
1722 mix_3to2_3dnow:
1723 mix3to2_3dnow (samples, bias);
1724 break;
1726 case CONVERT (A52_2F1R, A52_STEREO):
1727 if (slev == 0)
1728 break;
1729 mix21to2_3dnow (samples, samples + 256, bias);
1730 break;
1732 case CONVERT (A52_2F1R, A52_DOLBY):
1733 mix21toS_3dnow (samples, bias);
1734 break;
1736 case CONVERT (A52_3F1R, A52_STEREO):
1737 if (slev == 0)
1738 goto mix_3to2_3dnow;
1739 mix31to2_3dnow (samples, bias);
1740 break;
1742 case CONVERT (A52_3F1R, A52_DOLBY):
1743 mix31toS_3dnow (samples, bias);
1744 break;
1746 case CONVERT (A52_2F2R, A52_STEREO):
1747 if (slev == 0)
1748 break;
1749 mix2to1_3dnow (samples, samples + 512, bias);
1750 mix2to1_3dnow (samples + 256, samples + 768, bias);
1751 break;
1753 case CONVERT (A52_2F2R, A52_DOLBY):
1754 mix22toS_3dnow (samples, bias);
1755 break;
1757 case CONVERT (A52_3F2R, A52_STEREO):
1758 if (slev == 0)
1759 goto mix_3to2_3dnow;
1760 mix32to2_3dnow (samples, bias);
1761 break;
1763 case CONVERT (A52_3F2R, A52_DOLBY):
1764 mix32toS_3dnow (samples, bias);
1765 break;
1767 case CONVERT (A52_3F1R, A52_3F):
1768 if (slev == 0)
1769 break;
1770 mix21to2_3dnow (samples, samples + 512, bias);
1771 break;
1773 case CONVERT (A52_3F2R, A52_3F):
1774 if (slev == 0)
1775 break;
1776 mix2to1_3dnow (samples, samples + 768, bias);
1777 mix2to1_3dnow (samples + 512, samples + 1024, bias);
1778 break;
1780 case CONVERT (A52_3F1R, A52_2F1R):
1781 mix3to2_3dnow (samples, bias);
1782 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1783 break;
1785 case CONVERT (A52_2F2R, A52_2F1R):
1786 mix2to1_3dnow (samples + 512, samples + 768, bias);
1787 break;
1789 case CONVERT (A52_3F2R, A52_2F1R):
1790 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1791 move2to1_3dnow (samples + 768, samples + 512, bias);
1792 break;
1794 case CONVERT (A52_3F2R, A52_3F1R):
1795 mix2to1_3dnow (samples + 768, samples + 1024, bias);
1796 break;
1798 case CONVERT (A52_2F1R, A52_2F2R):
1799 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1800 break;
1802 case CONVERT (A52_3F1R, A52_2F2R):
1803 mix3to2_3dnow (samples, bias);
1804 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1805 break;
1807 case CONVERT (A52_3F2R, A52_2F2R):
1808 mix3to2_3dnow (samples, bias);
1809 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1810 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1811 break;
1813 case CONVERT (A52_3F1R, A52_3F2R):
1814 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1815 break;
1817 __asm __volatile("femms":::"memory");
1820 #endif // ARCH_X86 || ARCH_X86_64