obvious gcc warning fix, approved by Nico
[mplayer/glamo.git] / liba52 / downmix.c
blob55e2536c9985ce3c4e18ac00031de74de6548fe4
1 /*
2 * downmix.c
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
7 * See http://liba52.sourceforge.net/ for updates.
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff.
10 * detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/
11 * $Id$
13 * a52dec is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * a52dec is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
30 #include "config.h"
32 #include <string.h>
33 #include <inttypes.h>
35 #include "a52.h"
36 #include "a52_internal.h"
37 #include "mm_accel.h"
39 #define CONVERT(acmod,output) (((output) << 3) + (acmod))
42 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias,
43 sample_t clev, sample_t slev)= NULL;
44 void (*upmix)(sample_t * samples, int acmod, int output)= NULL;
46 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
47 sample_t clev, sample_t slev);
48 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
49 sample_t clev, sample_t slev);
50 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
51 sample_t clev, sample_t slev);
52 static void upmix_MMX (sample_t * samples, int acmod, int output);
53 static void upmix_C (sample_t * samples, int acmod, int output);
55 void downmix_accel_init(uint32_t mm_accel)
57 upmix= upmix_C;
58 downmix= downmix_C;
59 #ifdef ARCH_X86
60 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX;
61 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE;
62 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow;
63 #endif
66 int downmix_init (int input, int flags, sample_t * level,
67 sample_t clev, sample_t slev)
69 static uint8_t table[11][8] = {
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
72 {A52_MONO, A52_MONO, A52_MONO, A52_MONO,
73 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
75 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
77 A52_STEREO, A52_3F, A52_STEREO, A52_3F},
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
79 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
81 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
82 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
83 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
84 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
85 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
86 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
87 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
88 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
89 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
90 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
91 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
93 int output;
95 output = flags & A52_CHANNEL_MASK;
96 if (output > A52_DOLBY)
97 return -1;
99 output = table[output][input & 7];
101 if ((output == A52_STEREO) &&
102 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB))))
103 output = A52_DOLBY;
105 if (flags & A52_ADJUST_LEVEL)
106 switch (CONVERT (input & 7, output)) {
108 case CONVERT (A52_3F, A52_MONO):
109 *level *= LEVEL_3DB / (1 + clev);
110 break;
112 case CONVERT (A52_STEREO, A52_MONO):
113 case CONVERT (A52_2F2R, A52_2F1R):
114 case CONVERT (A52_3F2R, A52_3F1R):
115 level_3db:
116 *level *= LEVEL_3DB;
117 break;
119 case CONVERT (A52_3F2R, A52_2F1R):
120 if (clev < LEVEL_PLUS3DB - 1)
121 goto level_3db;
122 /* break thru */
123 case CONVERT (A52_3F, A52_STEREO):
124 case CONVERT (A52_3F1R, A52_2F1R):
125 case CONVERT (A52_3F1R, A52_2F2R):
126 case CONVERT (A52_3F2R, A52_2F2R):
127 *level /= 1 + clev;
128 break;
130 case CONVERT (A52_2F1R, A52_MONO):
131 *level *= LEVEL_PLUS3DB / (2 + slev);
132 break;
134 case CONVERT (A52_2F1R, A52_STEREO):
135 case CONVERT (A52_3F1R, A52_3F):
136 *level /= 1 + slev * LEVEL_3DB;
137 break;
139 case CONVERT (A52_3F1R, A52_MONO):
140 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev);
141 break;
143 case CONVERT (A52_3F1R, A52_STEREO):
144 *level /= 1 + clev + slev * LEVEL_3DB;
145 break;
147 case CONVERT (A52_2F2R, A52_MONO):
148 *level *= LEVEL_3DB / (1 + slev);
149 break;
151 case CONVERT (A52_2F2R, A52_STEREO):
152 case CONVERT (A52_3F2R, A52_3F):
153 *level /= 1 + slev;
154 break;
156 case CONVERT (A52_3F2R, A52_MONO):
157 *level *= LEVEL_3DB / (1 + clev + slev);
158 break;
160 case CONVERT (A52_3F2R, A52_STEREO):
161 *level /= 1 + clev + slev;
162 break;
164 case CONVERT (A52_MONO, A52_DOLBY):
165 *level *= LEVEL_PLUS3DB;
166 break;
168 case CONVERT (A52_3F, A52_DOLBY):
169 case CONVERT (A52_2F1R, A52_DOLBY):
170 *level *= 1 / (1 + LEVEL_3DB);
171 break;
173 case CONVERT (A52_3F1R, A52_DOLBY):
174 case CONVERT (A52_2F2R, A52_DOLBY):
175 *level *= 1 / (1 + 2 * LEVEL_3DB);
176 break;
178 case CONVERT (A52_3F2R, A52_DOLBY):
179 *level *= 1 / (1 + 3 * LEVEL_3DB);
180 break;
182 return output;
185 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
186 sample_t clev, sample_t slev)
188 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
190 case CONVERT (A52_CHANNEL, A52_CHANNEL):
191 case CONVERT (A52_MONO, A52_MONO):
192 case CONVERT (A52_STEREO, A52_STEREO):
193 case CONVERT (A52_3F, A52_3F):
194 case CONVERT (A52_2F1R, A52_2F1R):
195 case CONVERT (A52_3F1R, A52_3F1R):
196 case CONVERT (A52_2F2R, A52_2F2R):
197 case CONVERT (A52_3F2R, A52_3F2R):
198 case CONVERT (A52_STEREO, A52_DOLBY):
199 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
200 return 0;
202 case CONVERT (A52_CHANNEL, A52_MONO):
203 coeff[0] = coeff[1] = level * LEVEL_6DB;
204 return 3;
206 case CONVERT (A52_STEREO, A52_MONO):
207 coeff[0] = coeff[1] = level * LEVEL_3DB;
208 return 3;
210 case CONVERT (A52_3F, A52_MONO):
211 coeff[0] = coeff[2] = level * LEVEL_3DB;
212 coeff[1] = level * clev * LEVEL_PLUS3DB;
213 return 7;
215 case CONVERT (A52_2F1R, A52_MONO):
216 coeff[0] = coeff[1] = level * LEVEL_3DB;
217 coeff[2] = level * slev * LEVEL_3DB;
218 return 7;
220 case CONVERT (A52_2F2R, A52_MONO):
221 coeff[0] = coeff[1] = level * LEVEL_3DB;
222 coeff[2] = coeff[3] = level * slev * LEVEL_3DB;
223 return 15;
225 case CONVERT (A52_3F1R, A52_MONO):
226 coeff[0] = coeff[2] = level * LEVEL_3DB;
227 coeff[1] = level * clev * LEVEL_PLUS3DB;
228 coeff[3] = level * slev * LEVEL_3DB;
229 return 15;
231 case CONVERT (A52_3F2R, A52_MONO):
232 coeff[0] = coeff[2] = level * LEVEL_3DB;
233 coeff[1] = level * clev * LEVEL_PLUS3DB;
234 coeff[3] = coeff[4] = level * slev * LEVEL_3DB;
235 return 31;
237 case CONVERT (A52_MONO, A52_DOLBY):
238 coeff[0] = level * LEVEL_3DB;
239 return 0;
241 case CONVERT (A52_3F, A52_DOLBY):
242 clev = LEVEL_3DB;
243 case CONVERT (A52_3F, A52_STEREO):
244 case CONVERT (A52_3F1R, A52_2F1R):
245 case CONVERT (A52_3F2R, A52_2F2R):
246 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
247 coeff[1] = level * clev;
248 return 7;
250 case CONVERT (A52_2F1R, A52_DOLBY):
251 slev = 1;
252 case CONVERT (A52_2F1R, A52_STEREO):
253 coeff[0] = coeff[1] = level;
254 coeff[2] = level * slev * LEVEL_3DB;
255 return 7;
257 case CONVERT (A52_3F1R, A52_DOLBY):
258 clev = LEVEL_3DB;
259 slev = 1;
260 case CONVERT (A52_3F1R, A52_STEREO):
261 coeff[0] = coeff[2] = level;
262 coeff[1] = level * clev;
263 coeff[3] = level * slev * LEVEL_3DB;
264 return 15;
266 case CONVERT (A52_2F2R, A52_DOLBY):
267 slev = LEVEL_3DB;
268 case CONVERT (A52_2F2R, A52_STEREO):
269 coeff[0] = coeff[1] = level;
270 coeff[2] = coeff[3] = level * slev;
271 return 15;
273 case CONVERT (A52_3F2R, A52_DOLBY):
274 clev = LEVEL_3DB;
275 case CONVERT (A52_3F2R, A52_2F1R):
276 slev = LEVEL_3DB;
277 case CONVERT (A52_3F2R, A52_STEREO):
278 coeff[0] = coeff[2] = level;
279 coeff[1] = level * clev;
280 coeff[3] = coeff[4] = level * slev;
281 return 31;
283 case CONVERT (A52_3F1R, A52_3F):
284 coeff[0] = coeff[1] = coeff[2] = level;
285 coeff[3] = level * slev * LEVEL_3DB;
286 return 13;
288 case CONVERT (A52_3F2R, A52_3F):
289 coeff[0] = coeff[1] = coeff[2] = level;
290 coeff[3] = coeff[4] = level * slev;
291 return 29;
293 case CONVERT (A52_2F2R, A52_2F1R):
294 coeff[0] = coeff[1] = level;
295 coeff[2] = coeff[3] = level * LEVEL_3DB;
296 return 12;
298 case CONVERT (A52_3F2R, A52_3F1R):
299 coeff[0] = coeff[1] = coeff[2] = level;
300 coeff[3] = coeff[4] = level * LEVEL_3DB;
301 return 24;
303 case CONVERT (A52_2F1R, A52_2F2R):
304 coeff[0] = coeff[1] = level;
305 coeff[2] = level * LEVEL_3DB;
306 return 0;
308 case CONVERT (A52_3F1R, A52_2F2R):
309 coeff[0] = coeff[2] = level;
310 coeff[1] = level * clev;
311 coeff[3] = level * LEVEL_3DB;
312 return 7;
314 case CONVERT (A52_3F1R, A52_3F2R):
315 coeff[0] = coeff[1] = coeff[2] = level;
316 coeff[3] = level * LEVEL_3DB;
317 return 0;
319 case CONVERT (A52_CHANNEL, A52_CHANNEL1):
320 coeff[0] = level;
321 coeff[1] = 0;
322 return 0;
324 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
325 coeff[0] = 0;
326 coeff[1] = level;
327 return 0;
330 return -1; /* NOTREACHED */
333 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
335 int i;
337 for (i = 0; i < 256; i++)
338 dest[i] += src[i] + bias;
341 static void mix3to1 (sample_t * samples, sample_t bias)
343 int i;
345 for (i = 0; i < 256; i++)
346 samples[i] += samples[i + 256] + samples[i + 512] + bias;
349 static void mix4to1 (sample_t * samples, sample_t bias)
351 int i;
353 for (i = 0; i < 256; i++)
354 samples[i] += (samples[i + 256] + samples[i + 512] +
355 samples[i + 768] + bias);
358 static void mix5to1 (sample_t * samples, sample_t bias)
360 int i;
362 for (i = 0; i < 256; i++)
363 samples[i] += (samples[i + 256] + samples[i + 512] +
364 samples[i + 768] + samples[i + 1024] + bias);
367 static void mix3to2 (sample_t * samples, sample_t bias)
369 int i;
370 sample_t common;
372 for (i = 0; i < 256; i++) {
373 common = samples[i + 256] + bias;
374 samples[i] += common;
375 samples[i + 256] = samples[i + 512] + common;
379 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
381 int i;
382 sample_t common;
384 for (i = 0; i < 256; i++) {
385 common = right[i + 256] + bias;
386 left[i] += common;
387 right[i] += common;
391 static void mix21toS (sample_t * samples, sample_t bias)
393 int i;
394 sample_t surround;
396 for (i = 0; i < 256; i++) {
397 surround = samples[i + 512];
398 samples[i] += bias - surround;
399 samples[i + 256] += bias + surround;
403 static void mix31to2 (sample_t * samples, sample_t bias)
405 int i;
406 sample_t common;
408 for (i = 0; i < 256; i++) {
409 common = samples[i + 256] + samples[i + 768] + bias;
410 samples[i] += common;
411 samples[i + 256] = samples[i + 512] + common;
415 static void mix31toS (sample_t * samples, sample_t bias)
417 int i;
418 sample_t common, surround;
420 for (i = 0; i < 256; i++) {
421 common = samples[i + 256] + bias;
422 surround = samples[i + 768];
423 samples[i] += common - surround;
424 samples[i + 256] = samples[i + 512] + common + surround;
428 static void mix22toS (sample_t * samples, sample_t bias)
430 int i;
431 sample_t surround;
433 for (i = 0; i < 256; i++) {
434 surround = samples[i + 512] + samples[i + 768];
435 samples[i] += bias - surround;
436 samples[i + 256] += bias + surround;
440 static void mix32to2 (sample_t * samples, sample_t bias)
442 int i;
443 sample_t common;
445 for (i = 0; i < 256; i++) {
446 common = samples[i + 256] + bias;
447 samples[i] += common + samples[i + 768];
448 samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
452 static void mix32toS (sample_t * samples, sample_t bias)
454 int i;
455 sample_t common, surround;
457 for (i = 0; i < 256; i++) {
458 common = samples[i + 256] + bias;
459 surround = samples[i + 768] + samples[i + 1024];
460 samples[i] += common - surround;
461 samples[i + 256] = samples[i + 512] + common + surround;
465 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
467 int i;
469 for (i = 0; i < 256; i++)
470 dest[i] = src[i] + src[i + 256] + bias;
473 static void zero (sample_t * samples)
475 int i;
476 for (i = 0; i < 256; i++)
477 samples[i] = 0;
480 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
481 sample_t clev, sample_t slev)
483 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
485 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
486 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
487 break;
489 case CONVERT (A52_CHANNEL, A52_MONO):
490 case CONVERT (A52_STEREO, A52_MONO):
491 mix_2to1:
492 mix2to1 (samples, samples + 256, bias);
493 break;
495 case CONVERT (A52_2F1R, A52_MONO):
496 if (slev == 0)
497 goto mix_2to1;
498 case CONVERT (A52_3F, A52_MONO):
499 mix_3to1:
500 mix3to1 (samples, bias);
501 break;
503 case CONVERT (A52_3F1R, A52_MONO):
504 if (slev == 0)
505 goto mix_3to1;
506 case CONVERT (A52_2F2R, A52_MONO):
507 if (slev == 0)
508 goto mix_2to1;
509 mix4to1 (samples, bias);
510 break;
512 case CONVERT (A52_3F2R, A52_MONO):
513 if (slev == 0)
514 goto mix_3to1;
515 mix5to1 (samples, bias);
516 break;
518 case CONVERT (A52_MONO, A52_DOLBY):
519 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
520 break;
522 case CONVERT (A52_3F, A52_STEREO):
523 case CONVERT (A52_3F, A52_DOLBY):
524 mix_3to2:
525 mix3to2 (samples, bias);
526 break;
528 case CONVERT (A52_2F1R, A52_STEREO):
529 if (slev == 0)
530 break;
531 mix21to2 (samples, samples + 256, bias);
532 break;
534 case CONVERT (A52_2F1R, A52_DOLBY):
535 mix21toS (samples, bias);
536 break;
538 case CONVERT (A52_3F1R, A52_STEREO):
539 if (slev == 0)
540 goto mix_3to2;
541 mix31to2 (samples, bias);
542 break;
544 case CONVERT (A52_3F1R, A52_DOLBY):
545 mix31toS (samples, bias);
546 break;
548 case CONVERT (A52_2F2R, A52_STEREO):
549 if (slev == 0)
550 break;
551 mix2to1 (samples, samples + 512, bias);
552 mix2to1 (samples + 256, samples + 768, bias);
553 break;
555 case CONVERT (A52_2F2R, A52_DOLBY):
556 mix22toS (samples, bias);
557 break;
559 case CONVERT (A52_3F2R, A52_STEREO):
560 if (slev == 0)
561 goto mix_3to2;
562 mix32to2 (samples, bias);
563 break;
565 case CONVERT (A52_3F2R, A52_DOLBY):
566 mix32toS (samples, bias);
567 break;
569 case CONVERT (A52_3F1R, A52_3F):
570 if (slev == 0)
571 break;
572 mix21to2 (samples, samples + 512, bias);
573 break;
575 case CONVERT (A52_3F2R, A52_3F):
576 if (slev == 0)
577 break;
578 mix2to1 (samples, samples + 768, bias);
579 mix2to1 (samples + 512, samples + 1024, bias);
580 break;
582 case CONVERT (A52_3F1R, A52_2F1R):
583 mix3to2 (samples, bias);
584 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
585 break;
587 case CONVERT (A52_2F2R, A52_2F1R):
588 mix2to1 (samples + 512, samples + 768, bias);
589 break;
591 case CONVERT (A52_3F2R, A52_2F1R):
592 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
593 move2to1 (samples + 768, samples + 512, bias);
594 break;
596 case CONVERT (A52_3F2R, A52_3F1R):
597 mix2to1 (samples + 768, samples + 1024, bias);
598 break;
600 case CONVERT (A52_2F1R, A52_2F2R):
601 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
602 break;
604 case CONVERT (A52_3F1R, A52_2F2R):
605 mix3to2 (samples, bias);
606 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
607 break;
609 case CONVERT (A52_3F2R, A52_2F2R):
610 mix3to2 (samples, bias);
611 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
612 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
613 break;
615 case CONVERT (A52_3F1R, A52_3F2R):
616 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
617 break;
621 static void upmix_C (sample_t * samples, int acmod, int output)
623 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
625 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
626 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
627 break;
629 case CONVERT (A52_3F2R, A52_MONO):
630 zero (samples + 1024);
631 case CONVERT (A52_3F1R, A52_MONO):
632 case CONVERT (A52_2F2R, A52_MONO):
633 zero (samples + 768);
634 case CONVERT (A52_3F, A52_MONO):
635 case CONVERT (A52_2F1R, A52_MONO):
636 zero (samples + 512);
637 case CONVERT (A52_CHANNEL, A52_MONO):
638 case CONVERT (A52_STEREO, A52_MONO):
639 zero (samples + 256);
640 break;
642 case CONVERT (A52_3F2R, A52_STEREO):
643 case CONVERT (A52_3F2R, A52_DOLBY):
644 zero (samples + 1024);
645 case CONVERT (A52_3F1R, A52_STEREO):
646 case CONVERT (A52_3F1R, A52_DOLBY):
647 zero (samples + 768);
648 case CONVERT (A52_3F, A52_STEREO):
649 case CONVERT (A52_3F, A52_DOLBY):
650 mix_3to2:
651 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
652 zero (samples + 256);
653 break;
655 case CONVERT (A52_2F2R, A52_STEREO):
656 case CONVERT (A52_2F2R, A52_DOLBY):
657 zero (samples + 768);
658 case CONVERT (A52_2F1R, A52_STEREO):
659 case CONVERT (A52_2F1R, A52_DOLBY):
660 zero (samples + 512);
661 break;
663 case CONVERT (A52_3F2R, A52_3F):
664 zero (samples + 1024);
665 case CONVERT (A52_3F1R, A52_3F):
666 case CONVERT (A52_2F2R, A52_2F1R):
667 zero (samples + 768);
668 break;
670 case CONVERT (A52_3F2R, A52_3F1R):
671 zero (samples + 1024);
672 break;
674 case CONVERT (A52_3F2R, A52_2F1R):
675 zero (samples + 1024);
676 case CONVERT (A52_3F1R, A52_2F1R):
677 mix_31to21:
678 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
679 goto mix_3to2;
681 case CONVERT (A52_3F2R, A52_2F2R):
682 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
683 goto mix_31to21;
687 #ifdef ARCH_X86
688 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
690 asm volatile(
691 "movlps %2, %%xmm7 \n\t"
692 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
693 "movl $-1024, %%esi \n\t"
694 ".balign 16\n\t"
695 "1: \n\t"
696 "movaps (%0, %%esi), %%xmm0 \n\t"
697 "movaps 16(%0, %%esi), %%xmm1 \n\t"
698 "addps (%1, %%esi), %%xmm0 \n\t"
699 "addps 16(%1, %%esi), %%xmm1 \n\t"
700 "addps %%xmm7, %%xmm0 \n\t"
701 "addps %%xmm7, %%xmm1 \n\t"
702 "movaps %%xmm0, (%1, %%esi) \n\t"
703 "movaps %%xmm1, 16(%1, %%esi) \n\t"
704 "addl $32, %%esi \n\t"
705 " jnz 1b \n\t"
706 :: "r" (src+256), "r" (dest+256), "m" (bias)
707 : "%esi"
711 static void mix3to1_SSE (sample_t * samples, sample_t bias)
713 asm volatile(
714 "movlps %1, %%xmm7 \n\t"
715 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
716 "movl $-1024, %%esi \n\t"
717 ".balign 16\n\t"
718 "1: \n\t"
719 "movaps (%0, %%esi), %%xmm0 \n\t"
720 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
721 "addps 2048(%0, %%esi), %%xmm0 \n\t"
722 "addps %%xmm7, %%xmm1 \n\t"
723 "addps %%xmm1, %%xmm0 \n\t"
724 "movaps %%xmm0, (%0, %%esi) \n\t"
725 "addl $16, %%esi \n\t"
726 " jnz 1b \n\t"
727 :: "r" (samples+256), "m" (bias)
728 : "%esi"
732 static void mix4to1_SSE (sample_t * samples, sample_t bias)
734 asm volatile(
735 "movlps %1, %%xmm7 \n\t"
736 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
737 "movl $-1024, %%esi \n\t"
738 ".balign 16\n\t"
739 "1: \n\t"
740 "movaps (%0, %%esi), %%xmm0 \n\t"
741 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
742 "addps 2048(%0, %%esi), %%xmm0 \n\t"
743 "addps 3072(%0, %%esi), %%xmm1 \n\t"
744 "addps %%xmm7, %%xmm0 \n\t"
745 "addps %%xmm1, %%xmm0 \n\t"
746 "movaps %%xmm0, (%0, %%esi) \n\t"
747 "addl $16, %%esi \n\t"
748 " jnz 1b \n\t"
749 :: "r" (samples+256), "m" (bias)
750 : "%esi"
754 static void mix5to1_SSE (sample_t * samples, sample_t bias)
756 asm volatile(
757 "movlps %1, %%xmm7 \n\t"
758 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
759 "movl $-1024, %%esi \n\t"
760 ".balign 16\n\t"
761 "1: \n\t"
762 "movaps (%0, %%esi), %%xmm0 \n\t"
763 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
764 "addps 2048(%0, %%esi), %%xmm0 \n\t"
765 "addps 3072(%0, %%esi), %%xmm1 \n\t"
766 "addps %%xmm7, %%xmm0 \n\t"
767 "addps 4096(%0, %%esi), %%xmm1 \n\t"
768 "addps %%xmm1, %%xmm0 \n\t"
769 "movaps %%xmm0, (%0, %%esi) \n\t"
770 "addl $16, %%esi \n\t"
771 " jnz 1b \n\t"
772 :: "r" (samples+256), "m" (bias)
773 : "%esi"
777 static void mix3to2_SSE (sample_t * samples, sample_t bias)
779 asm volatile(
780 "movlps %1, %%xmm7 \n\t"
781 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
782 "movl $-1024, %%esi \n\t"
783 ".balign 16\n\t"
784 "1: \n\t"
785 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
786 "addps %%xmm7, %%xmm0 \n\t" //common
787 "movaps (%0, %%esi), %%xmm1 \n\t"
788 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
789 "addps %%xmm0, %%xmm1 \n\t"
790 "addps %%xmm0, %%xmm2 \n\t"
791 "movaps %%xmm1, (%0, %%esi) \n\t"
792 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
793 "addl $16, %%esi \n\t"
794 " jnz 1b \n\t"
795 :: "r" (samples+256), "m" (bias)
796 : "%esi"
800 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
802 asm volatile(
803 "movlps %2, %%xmm7 \n\t"
804 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
805 "movl $-1024, %%esi \n\t"
806 ".balign 16\n\t"
807 "1: \n\t"
808 "movaps 1024(%1, %%esi), %%xmm0 \n\t"
809 "addps %%xmm7, %%xmm0 \n\t" //common
810 "movaps (%0, %%esi), %%xmm1 \n\t"
811 "movaps (%1, %%esi), %%xmm2 \n\t"
812 "addps %%xmm0, %%xmm1 \n\t"
813 "addps %%xmm0, %%xmm2 \n\t"
814 "movaps %%xmm1, (%0, %%esi) \n\t"
815 "movaps %%xmm2, (%1, %%esi) \n\t"
816 "addl $16, %%esi \n\t"
817 " jnz 1b \n\t"
818 :: "r" (left+256), "r" (right+256), "m" (bias)
819 : "%esi"
823 static void mix21toS_SSE (sample_t * samples, sample_t bias)
825 asm volatile(
826 "movlps %1, %%xmm7 \n\t"
827 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
828 "movl $-1024, %%esi \n\t"
829 ".balign 16\n\t"
830 "1: \n\t"
831 "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround
832 "movaps (%0, %%esi), %%xmm1 \n\t"
833 "movaps 1024(%0, %%esi), %%xmm2 \n\t"
834 "addps %%xmm7, %%xmm1 \n\t"
835 "addps %%xmm7, %%xmm2 \n\t"
836 "subps %%xmm0, %%xmm1 \n\t"
837 "addps %%xmm0, %%xmm2 \n\t"
838 "movaps %%xmm1, (%0, %%esi) \n\t"
839 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
840 "addl $16, %%esi \n\t"
841 " jnz 1b \n\t"
842 :: "r" (samples+256), "m" (bias)
843 : "%esi"
847 static void mix31to2_SSE (sample_t * samples, sample_t bias)
849 asm volatile(
850 "movlps %1, %%xmm7 \n\t"
851 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
852 "movl $-1024, %%esi \n\t"
853 ".balign 16\n\t"
854 "1: \n\t"
855 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
856 "addps 3072(%0, %%esi), %%xmm0 \n\t"
857 "addps %%xmm7, %%xmm0 \n\t" // common
858 "movaps (%0, %%esi), %%xmm1 \n\t"
859 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
860 "addps %%xmm0, %%xmm1 \n\t"
861 "addps %%xmm0, %%xmm2 \n\t"
862 "movaps %%xmm1, (%0, %%esi) \n\t"
863 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
864 "addl $16, %%esi \n\t"
865 " jnz 1b \n\t"
866 :: "r" (samples+256), "m" (bias)
867 : "%esi"
871 static void mix31toS_SSE (sample_t * samples, sample_t bias)
873 asm volatile(
874 "movlps %1, %%xmm7 \n\t"
875 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
876 "movl $-1024, %%esi \n\t"
877 ".balign 16\n\t"
878 "1: \n\t"
879 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
880 "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround
881 "addps %%xmm7, %%xmm0 \n\t" // common
882 "movaps (%0, %%esi), %%xmm1 \n\t"
883 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
884 "addps %%xmm0, %%xmm1 \n\t"
885 "addps %%xmm0, %%xmm2 \n\t"
886 "subps %%xmm3, %%xmm1 \n\t"
887 "addps %%xmm3, %%xmm2 \n\t"
888 "movaps %%xmm1, (%0, %%esi) \n\t"
889 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
890 "addl $16, %%esi \n\t"
891 " jnz 1b \n\t"
892 :: "r" (samples+256), "m" (bias)
893 : "%esi"
897 static void mix22toS_SSE (sample_t * samples, sample_t bias)
899 asm volatile(
900 "movlps %1, %%xmm7 \n\t"
901 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
902 "movl $-1024, %%esi \n\t"
903 ".balign 16\n\t"
904 "1: \n\t"
905 "movaps 2048(%0, %%esi), %%xmm0 \n\t"
906 "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround
907 "movaps (%0, %%esi), %%xmm1 \n\t"
908 "movaps 1024(%0, %%esi), %%xmm2 \n\t"
909 "addps %%xmm7, %%xmm1 \n\t"
910 "addps %%xmm7, %%xmm2 \n\t"
911 "subps %%xmm0, %%xmm1 \n\t"
912 "addps %%xmm0, %%xmm2 \n\t"
913 "movaps %%xmm1, (%0, %%esi) \n\t"
914 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
915 "addl $16, %%esi \n\t"
916 " jnz 1b \n\t"
917 :: "r" (samples+256), "m" (bias)
918 : "%esi"
922 static void mix32to2_SSE (sample_t * samples, sample_t bias)
924 asm volatile(
925 "movlps %1, %%xmm7 \n\t"
926 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
927 "movl $-1024, %%esi \n\t"
928 ".balign 16\n\t"
929 "1: \n\t"
930 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
931 "addps %%xmm7, %%xmm0 \n\t" // common
932 "movaps %%xmm0, %%xmm1 \n\t" // common
933 "addps (%0, %%esi), %%xmm0 \n\t"
934 "addps 2048(%0, %%esi), %%xmm1 \n\t"
935 "addps 3072(%0, %%esi), %%xmm0 \n\t"
936 "addps 4096(%0, %%esi), %%xmm1 \n\t"
937 "movaps %%xmm0, (%0, %%esi) \n\t"
938 "movaps %%xmm1, 1024(%0, %%esi) \n\t"
939 "addl $16, %%esi \n\t"
940 " jnz 1b \n\t"
941 :: "r" (samples+256), "m" (bias)
942 : "%esi"
946 static void mix32toS_SSE (sample_t * samples, sample_t bias)
948 asm volatile(
949 "movlps %1, %%xmm7 \n\t"
950 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
951 "movl $-1024, %%esi \n\t"
952 ".balign 16\n\t"
953 "1: \n\t"
954 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
955 "movaps 3072(%0, %%esi), %%xmm2 \n\t"
956 "addps %%xmm7, %%xmm0 \n\t" // common
957 "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround
958 "movaps (%0, %%esi), %%xmm1 \n\t"
959 "movaps 2048(%0, %%esi), %%xmm3 \n\t"
960 "subps %%xmm2, %%xmm1 \n\t"
961 "addps %%xmm2, %%xmm3 \n\t"
962 "addps %%xmm0, %%xmm1 \n\t"
963 "addps %%xmm0, %%xmm3 \n\t"
964 "movaps %%xmm1, (%0, %%esi) \n\t"
965 "movaps %%xmm3, 1024(%0, %%esi) \n\t"
966 "addl $16, %%esi \n\t"
967 " jnz 1b \n\t"
968 :: "r" (samples+256), "m" (bias)
969 : "%esi"
973 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
975 asm volatile(
976 "movlps %2, %%xmm7 \n\t"
977 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
978 "movl $-1024, %%esi \n\t"
979 ".balign 16\n\t"
980 "1: \n\t"
981 "movaps (%0, %%esi), %%xmm0 \n\t"
982 "movaps 16(%0, %%esi), %%xmm1 \n\t"
983 "addps 1024(%0, %%esi), %%xmm0 \n\t"
984 "addps 1040(%0, %%esi), %%xmm1 \n\t"
985 "addps %%xmm7, %%xmm0 \n\t"
986 "addps %%xmm7, %%xmm1 \n\t"
987 "movaps %%xmm0, (%1, %%esi) \n\t"
988 "movaps %%xmm1, 16(%1, %%esi) \n\t"
989 "addl $32, %%esi \n\t"
990 " jnz 1b \n\t"
991 :: "r" (src+256), "r" (dest+256), "m" (bias)
992 : "%esi"
996 static void zero_MMX(sample_t * samples)
998 asm volatile(
999 "movl $-1024, %%esi \n\t"
1000 "pxor %%mm0, %%mm0 \n\t"
1001 ".balign 16\n\t"
1002 "1: \n\t"
1003 "movq %%mm0, (%0, %%esi) \n\t"
1004 "movq %%mm0, 8(%0, %%esi) \n\t"
1005 "movq %%mm0, 16(%0, %%esi) \n\t"
1006 "movq %%mm0, 24(%0, %%esi) \n\t"
1007 "addl $32, %%esi \n\t"
1008 " jnz 1b \n\t"
1009 "emms"
1010 :: "r" (samples+256)
1011 : "%esi"
1016 I hope dest and src will be at least 8 byte aligned and size
1017 will devide on 8 without remain
1018 Note: untested and unused.
1020 static void copy_MMX(void *dest,const void *src,unsigned size)
1022 unsigned i;
1023 size /= 64;
1024 for(i=0;i<size;i++)
1026 __asm __volatile(
1027 "movq %0, %%mm0\n\t"
1028 "movq 8%0, %%mm1\n\t"
1029 "movq 16%0, %%mm2\n\t"
1030 "movq 24%0, %%mm3\n\t"
1031 "movq 32%0, %%mm4\n\t"
1032 "movq 40%0, %%mm5\n\t"
1033 "movq 48%0, %%mm6\n\t"
1034 "movq 56%0, %%mm7\n\t"
1035 "movq %%mm0, %1\n\t"
1036 "movq %%mm1, 8%1\n\t"
1037 "movq %%mm2, 16%1\n\t"
1038 "movq %%mm3, 24%1\n\t"
1039 "movq %%mm4, 32%1\n\t"
1040 "movq %%mm5, 40%1\n\t"
1041 "movq %%mm6, 48%1\n\t"
1042 "movq %%mm7, 56%1\n\t"
1044 :"m"(src),"m"(dest));
1048 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
1049 sample_t clev, sample_t slev)
1051 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1053 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1054 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1055 break;
1057 case CONVERT (A52_CHANNEL, A52_MONO):
1058 case CONVERT (A52_STEREO, A52_MONO):
1059 mix_2to1_SSE:
1060 mix2to1_SSE (samples, samples + 256, bias);
1061 break;
1063 case CONVERT (A52_2F1R, A52_MONO):
1064 if (slev == 0)
1065 goto mix_2to1_SSE;
1066 case CONVERT (A52_3F, A52_MONO):
1067 mix_3to1_SSE:
1068 mix3to1_SSE (samples, bias);
1069 break;
1071 case CONVERT (A52_3F1R, A52_MONO):
1072 if (slev == 0)
1073 goto mix_3to1_SSE;
1074 case CONVERT (A52_2F2R, A52_MONO):
1075 if (slev == 0)
1076 goto mix_2to1_SSE;
1077 mix4to1_SSE (samples, bias);
1078 break;
1080 case CONVERT (A52_3F2R, A52_MONO):
1081 if (slev == 0)
1082 goto mix_3to1_SSE;
1083 mix5to1_SSE (samples, bias);
1084 break;
1086 case CONVERT (A52_MONO, A52_DOLBY):
1087 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1088 break;
1090 case CONVERT (A52_3F, A52_STEREO):
1091 case CONVERT (A52_3F, A52_DOLBY):
1092 mix_3to2_SSE:
1093 mix3to2_SSE (samples, bias);
1094 break;
1096 case CONVERT (A52_2F1R, A52_STEREO):
1097 if (slev == 0)
1098 break;
1099 mix21to2_SSE (samples, samples + 256, bias);
1100 break;
1102 case CONVERT (A52_2F1R, A52_DOLBY):
1103 mix21toS_SSE (samples, bias);
1104 break;
1106 case CONVERT (A52_3F1R, A52_STEREO):
1107 if (slev == 0)
1108 goto mix_3to2_SSE;
1109 mix31to2_SSE (samples, bias);
1110 break;
1112 case CONVERT (A52_3F1R, A52_DOLBY):
1113 mix31toS_SSE (samples, bias);
1114 break;
1116 case CONVERT (A52_2F2R, A52_STEREO):
1117 if (slev == 0)
1118 break;
1119 mix2to1_SSE (samples, samples + 512, bias);
1120 mix2to1_SSE (samples + 256, samples + 768, bias);
1121 break;
1123 case CONVERT (A52_2F2R, A52_DOLBY):
1124 mix22toS_SSE (samples, bias);
1125 break;
1127 case CONVERT (A52_3F2R, A52_STEREO):
1128 if (slev == 0)
1129 goto mix_3to2_SSE;
1130 mix32to2_SSE (samples, bias);
1131 break;
1133 case CONVERT (A52_3F2R, A52_DOLBY):
1134 mix32toS_SSE (samples, bias);
1135 break;
1137 case CONVERT (A52_3F1R, A52_3F):
1138 if (slev == 0)
1139 break;
1140 mix21to2_SSE (samples, samples + 512, bias);
1141 break;
1143 case CONVERT (A52_3F2R, A52_3F):
1144 if (slev == 0)
1145 break;
1146 mix2to1_SSE (samples, samples + 768, bias);
1147 mix2to1_SSE (samples + 512, samples + 1024, bias);
1148 break;
1150 case CONVERT (A52_3F1R, A52_2F1R):
1151 mix3to2_SSE (samples, bias);
1152 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1153 break;
1155 case CONVERT (A52_2F2R, A52_2F1R):
1156 mix2to1_SSE (samples + 512, samples + 768, bias);
1157 break;
1159 case CONVERT (A52_3F2R, A52_2F1R):
1160 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1161 move2to1_SSE (samples + 768, samples + 512, bias);
1162 break;
1164 case CONVERT (A52_3F2R, A52_3F1R):
1165 mix2to1_SSE (samples + 768, samples + 1024, bias);
1166 break;
1168 case CONVERT (A52_2F1R, A52_2F2R):
1169 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1170 break;
1172 case CONVERT (A52_3F1R, A52_2F2R):
1173 mix3to2_SSE (samples, bias);
1174 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1175 break;
1177 case CONVERT (A52_3F2R, A52_2F2R):
1178 mix3to2_SSE (samples, bias);
1179 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1180 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1181 break;
1183 case CONVERT (A52_3F1R, A52_3F2R):
1184 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1185 break;
1189 static void upmix_MMX (sample_t * samples, int acmod, int output)
1191 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1193 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1194 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1195 break;
1197 case CONVERT (A52_3F2R, A52_MONO):
1198 zero_MMX (samples + 1024);
1199 case CONVERT (A52_3F1R, A52_MONO):
1200 case CONVERT (A52_2F2R, A52_MONO):
1201 zero_MMX (samples + 768);
1202 case CONVERT (A52_3F, A52_MONO):
1203 case CONVERT (A52_2F1R, A52_MONO):
1204 zero_MMX (samples + 512);
1205 case CONVERT (A52_CHANNEL, A52_MONO):
1206 case CONVERT (A52_STEREO, A52_MONO):
1207 zero_MMX (samples + 256);
1208 break;
1210 case CONVERT (A52_3F2R, A52_STEREO):
1211 case CONVERT (A52_3F2R, A52_DOLBY):
1212 zero_MMX (samples + 1024);
1213 case CONVERT (A52_3F1R, A52_STEREO):
1214 case CONVERT (A52_3F1R, A52_DOLBY):
1215 zero_MMX (samples + 768);
1216 case CONVERT (A52_3F, A52_STEREO):
1217 case CONVERT (A52_3F, A52_DOLBY):
1218 mix_3to2_MMX:
1219 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
1220 zero_MMX (samples + 256);
1221 break;
1223 case CONVERT (A52_2F2R, A52_STEREO):
1224 case CONVERT (A52_2F2R, A52_DOLBY):
1225 zero_MMX (samples + 768);
1226 case CONVERT (A52_2F1R, A52_STEREO):
1227 case CONVERT (A52_2F1R, A52_DOLBY):
1228 zero_MMX (samples + 512);
1229 break;
1231 case CONVERT (A52_3F2R, A52_3F):
1232 zero_MMX (samples + 1024);
1233 case CONVERT (A52_3F1R, A52_3F):
1234 case CONVERT (A52_2F2R, A52_2F1R):
1235 zero_MMX (samples + 768);
1236 break;
1238 case CONVERT (A52_3F2R, A52_3F1R):
1239 zero_MMX (samples + 1024);
1240 break;
1242 case CONVERT (A52_3F2R, A52_2F1R):
1243 zero_MMX (samples + 1024);
1244 case CONVERT (A52_3F1R, A52_2F1R):
1245 mix_31to21_MMX:
1246 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1247 goto mix_3to2_MMX;
1249 case CONVERT (A52_3F2R, A52_2F2R):
1250 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1251 goto mix_31to21_MMX;
1255 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
1257 asm volatile(
1258 "movd %2, %%mm7 \n\t"
1259 "punpckldq %2, %%mm7 \n\t"
1260 "movl $-1024, %%esi \n\t"
1261 ".balign 16\n\t"
1262 "1: \n\t"
1263 "movq (%0, %%esi), %%mm0 \n\t"
1264 "movq 8(%0, %%esi), %%mm1 \n\t"
1265 "movq 16(%0, %%esi), %%mm2 \n\t"
1266 "movq 24(%0, %%esi), %%mm3 \n\t"
1267 "pfadd (%1, %%esi), %%mm0 \n\t"
1268 "pfadd 8(%1, %%esi), %%mm1 \n\t"
1269 "pfadd 16(%1, %%esi), %%mm2 \n\t"
1270 "pfadd 24(%1, %%esi), %%mm3 \n\t"
1271 "pfadd %%mm7, %%mm0 \n\t"
1272 "pfadd %%mm7, %%mm1 \n\t"
1273 "pfadd %%mm7, %%mm2 \n\t"
1274 "pfadd %%mm7, %%mm3 \n\t"
1275 "movq %%mm0, (%1, %%esi) \n\t"
1276 "movq %%mm1, 8(%1, %%esi) \n\t"
1277 "movq %%mm2, 16(%1, %%esi) \n\t"
1278 "movq %%mm3, 24(%1, %%esi) \n\t"
1279 "addl $32, %%esi \n\t"
1280 " jnz 1b \n\t"
1281 :: "r" (src+256), "r" (dest+256), "m" (bias)
1282 : "%esi"
1286 static void mix3to1_3dnow (sample_t * samples, sample_t bias)
1288 asm volatile(
1289 "movd %1, %%mm7 \n\t"
1290 "punpckldq %1, %%mm7 \n\t"
1291 "movl $-1024, %%esi \n\t"
1292 ".balign 16\n\t"
1293 "1: \n\t"
1294 "movq (%0, %%esi), %%mm0 \n\t"
1295 "movq 8(%0, %%esi), %%mm1 \n\t"
1296 "movq 1024(%0, %%esi), %%mm2 \n\t"
1297 "movq 1032(%0, %%esi), %%mm3 \n\t"
1298 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
1299 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
1300 "pfadd %%mm7, %%mm0 \n\t"
1301 "pfadd %%mm7, %%mm1 \n\t"
1302 "pfadd %%mm2, %%mm0 \n\t"
1303 "pfadd %%mm3, %%mm1 \n\t"
1304 "movq %%mm0, (%0, %%esi) \n\t"
1305 "movq %%mm1, 8(%0, %%esi) \n\t"
1306 "addl $16, %%esi \n\t"
1307 " jnz 1b \n\t"
1308 :: "r" (samples+256), "m" (bias)
1309 : "%esi"
1313 static void mix4to1_3dnow (sample_t * samples, sample_t bias)
1315 asm volatile(
1316 "movd %1, %%mm7 \n\t"
1317 "punpckldq %1, %%mm7 \n\t"
1318 "movl $-1024, %%esi \n\t"
1319 ".balign 16\n\t"
1320 "1: \n\t"
1321 "movq (%0, %%esi), %%mm0 \n\t"
1322 "movq 8(%0, %%esi), %%mm1 \n\t"
1323 "movq 1024(%0, %%esi), %%mm2 \n\t"
1324 "movq 1032(%0, %%esi), %%mm3 \n\t"
1325 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
1326 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
1327 "pfadd 3072(%0, %%esi), %%mm2 \n\t"
1328 "pfadd 3080(%0, %%esi), %%mm3 \n\t"
1329 "pfadd %%mm7, %%mm0 \n\t"
1330 "pfadd %%mm7, %%mm1 \n\t"
1331 "pfadd %%mm2, %%mm0 \n\t"
1332 "pfadd %%mm3, %%mm1 \n\t"
1333 "movq %%mm0, (%0, %%esi) \n\t"
1334 "movq %%mm1, 8(%0, %%esi) \n\t"
1335 "addl $16, %%esi \n\t"
1336 " jnz 1b \n\t"
1337 :: "r" (samples+256), "m" (bias)
1338 : "%esi"
1342 static void mix5to1_3dnow (sample_t * samples, sample_t bias)
1344 asm volatile(
1345 "movd %1, %%mm7 \n\t"
1346 "punpckldq %1, %%mm7 \n\t"
1347 "movl $-1024, %%esi \n\t"
1348 ".balign 16\n\t"
1349 "1: \n\t"
1350 "movq (%0, %%esi), %%mm0 \n\t"
1351 "movq 8(%0, %%esi), %%mm1 \n\t"
1352 "movq 1024(%0, %%esi), %%mm2 \n\t"
1353 "movq 1032(%0, %%esi), %%mm3 \n\t"
1354 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
1355 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
1356 "pfadd 3072(%0, %%esi), %%mm2 \n\t"
1357 "pfadd 3080(%0, %%esi), %%mm3 \n\t"
1358 "pfadd %%mm7, %%mm0 \n\t"
1359 "pfadd %%mm7, %%mm1 \n\t"
1360 "pfadd 4096(%0, %%esi), %%mm2 \n\t"
1361 "pfadd 4104(%0, %%esi), %%mm3 \n\t"
1362 "pfadd %%mm2, %%mm0 \n\t"
1363 "pfadd %%mm3, %%mm1 \n\t"
1364 "movq %%mm0, (%0, %%esi) \n\t"
1365 "movq %%mm1, 8(%0, %%esi) \n\t"
1366 "addl $16, %%esi \n\t"
1367 " jnz 1b \n\t"
1368 :: "r" (samples+256), "m" (bias)
1369 : "%esi"
1373 static void mix3to2_3dnow (sample_t * samples, sample_t bias)
1375 asm volatile(
1376 "movd %1, %%mm7 \n\t"
1377 "punpckldq %1, %%mm7 \n\t"
1378 "movl $-1024, %%esi \n\t"
1379 ".balign 16\n\t"
1380 "1: \n\t"
1381 "movq 1024(%0, %%esi), %%mm0 \n\t"
1382 "movq 1032(%0, %%esi), %%mm1 \n\t"
1383 "pfadd %%mm7, %%mm0 \n\t" //common
1384 "pfadd %%mm7, %%mm1 \n\t" //common
1385 "movq (%0, %%esi), %%mm2 \n\t"
1386 "movq 8(%0, %%esi), %%mm3 \n\t"
1387 "movq 2048(%0, %%esi), %%mm4 \n\t"
1388 "movq 2056(%0, %%esi), %%mm5 \n\t"
1389 "pfadd %%mm0, %%mm2 \n\t"
1390 "pfadd %%mm1, %%mm3 \n\t"
1391 "pfadd %%mm0, %%mm4 \n\t"
1392 "pfadd %%mm1, %%mm5 \n\t"
1393 "movq %%mm2, (%0, %%esi) \n\t"
1394 "movq %%mm3, 8(%0, %%esi) \n\t"
1395 "movq %%mm4, 1024(%0, %%esi) \n\t"
1396 "movq %%mm5, 1032(%0, %%esi) \n\t"
1397 "addl $16, %%esi \n\t"
1398 " jnz 1b \n\t"
1399 :: "r" (samples+256), "m" (bias)
1400 : "%esi"
1404 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
1406 asm volatile(
1407 "movd %2, %%mm7 \n\t"
1408 "punpckldq %2, %%mm7 \n\t"
1409 "movl $-1024, %%esi \n\t"
1410 ".balign 16\n\t"
1411 "1: \n\t"
1412 "movq 1024(%1, %%esi), %%mm0 \n\t"
1413 "movq 1032(%1, %%esi), %%mm1 \n\t"
1414 "pfadd %%mm7, %%mm0 \n\t" //common
1415 "pfadd %%mm7, %%mm1 \n\t" //common
1416 "movq (%0, %%esi), %%mm2 \n\t"
1417 "movq 8(%0, %%esi), %%mm3 \n\t"
1418 "movq (%1, %%esi), %%mm4 \n\t"
1419 "movq 8(%1, %%esi), %%mm5 \n\t"
1420 "pfadd %%mm0, %%mm2 \n\t"
1421 "pfadd %%mm1, %%mm3 \n\t"
1422 "pfadd %%mm0, %%mm4 \n\t"
1423 "pfadd %%mm1, %%mm5 \n\t"
1424 "movq %%mm2, (%0, %%esi) \n\t"
1425 "movq %%mm3, 8(%0, %%esi) \n\t"
1426 "movq %%mm4, (%1, %%esi) \n\t"
1427 "movq %%mm5, 8(%1, %%esi) \n\t"
1428 "addl $16, %%esi \n\t"
1429 " jnz 1b \n\t"
1430 :: "r" (left+256), "r" (right+256), "m" (bias)
1431 : "%esi"
1435 static void mix21toS_3dnow (sample_t * samples, sample_t bias)
1437 asm volatile(
1438 "movd %1, %%mm7 \n\t"
1439 "punpckldq %1, %%mm7 \n\t"
1440 "movl $-1024, %%esi \n\t"
1441 ".balign 16\n\t"
1442 "1: \n\t"
1443 "movq 2048(%0, %%esi), %%mm0 \n\t" // surround
1444 "movq 2056(%0, %%esi), %%mm1 \n\t" // surround
1445 "movq (%0, %%esi), %%mm2 \n\t"
1446 "movq 8(%0, %%esi), %%mm3 \n\t"
1447 "movq 1024(%0, %%esi), %%mm4 \n\t"
1448 "movq 1032(%0, %%esi), %%mm5 \n\t"
1449 "pfadd %%mm7, %%mm2 \n\t"
1450 "pfadd %%mm7, %%mm3 \n\t"
1451 "pfadd %%mm7, %%mm4 \n\t"
1452 "pfadd %%mm7, %%mm5 \n\t"
1453 "pfsub %%mm0, %%mm2 \n\t"
1454 "pfsub %%mm1, %%mm3 \n\t"
1455 "pfadd %%mm0, %%mm4 \n\t"
1456 "pfadd %%mm1, %%mm5 \n\t"
1457 "movq %%mm2, (%0, %%esi) \n\t"
1458 "movq %%mm3, 8(%0, %%esi) \n\t"
1459 "movq %%mm4, 1024(%0, %%esi) \n\t"
1460 "movq %%mm5, 1032(%0, %%esi) \n\t"
1461 "addl $16, %%esi \n\t"
1462 " jnz 1b \n\t"
1463 :: "r" (samples+256), "m" (bias)
1464 : "%esi"
1468 static void mix31to2_3dnow (sample_t * samples, sample_t bias)
1470 asm volatile(
1471 "movd %1, %%mm7 \n\t"
1472 "punpckldq %1, %%mm7 \n\t"
1473 "movl $-1024, %%esi \n\t"
1474 ".balign 16\n\t"
1475 "1: \n\t"
1476 "movq 1024(%0, %%esi), %%mm0 \n\t"
1477 "movq 1032(%0, %%esi), %%mm1 \n\t"
1478 "pfadd 3072(%0, %%esi), %%mm0 \n\t"
1479 "pfadd 3080(%0, %%esi), %%mm1 \n\t"
1480 "pfadd %%mm7, %%mm0 \n\t" // common
1481 "pfadd %%mm7, %%mm1 \n\t" // common
1482 "movq (%0, %%esi), %%mm2 \n\t"
1483 "movq 8(%0, %%esi), %%mm3 \n\t"
1484 "movq 2048(%0, %%esi), %%mm4 \n\t"
1485 "movq 2056(%0, %%esi), %%mm5 \n\t"
1486 "pfadd %%mm0, %%mm2 \n\t"
1487 "pfadd %%mm1, %%mm3 \n\t"
1488 "pfadd %%mm0, %%mm4 \n\t"
1489 "pfadd %%mm1, %%mm5 \n\t"
1490 "movq %%mm2, (%0, %%esi) \n\t"
1491 "movq %%mm3, 8(%0, %%esi) \n\t"
1492 "movq %%mm4, 1024(%0, %%esi) \n\t"
1493 "movq %%mm5, 1032(%0, %%esi) \n\t"
1494 "addl $16, %%esi \n\t"
1495 " jnz 1b \n\t"
1496 :: "r" (samples+256), "m" (bias)
1497 : "%esi"
1501 static void mix31toS_3dnow (sample_t * samples, sample_t bias)
1503 asm volatile(
1504 "movd %1, %%mm7 \n\t"
1505 "punpckldq %1, %%mm7 \n\t"
1506 "movl $-1024, %%esi \n\t"
1507 ".balign 16\n\t"
1508 "1: \n\t"
1509 "movq 1024(%0, %%esi), %%mm0 \n\t"
1510 "movq 1032(%0, %%esi), %%mm1 \n\t"
1511 "pfadd %%mm7, %%mm0 \n\t" // common
1512 "pfadd %%mm7, %%mm1 \n\t" // common
1513 "movq (%0, %%esi), %%mm2 \n\t"
1514 "movq 8(%0, %%esi), %%mm3 \n\t"
1515 "movq 2048(%0, %%esi), %%mm4 \n\t"
1516 "movq 2056(%0, %%esi), %%mm5 \n\t"
1517 "pfadd %%mm0, %%mm2 \n\t"
1518 "pfadd %%mm1, %%mm3 \n\t"
1519 "pfadd %%mm0, %%mm4 \n\t"
1520 "pfadd %%mm1, %%mm5 \n\t"
1521 "movq 3072(%0, %%esi), %%mm0 \n\t" // surround
1522 "movq 3080(%0, %%esi), %%mm1 \n\t" // surround
1523 "pfsub %%mm0, %%mm2 \n\t"
1524 "pfsub %%mm1, %%mm3 \n\t"
1525 "pfadd %%mm0, %%mm4 \n\t"
1526 "pfadd %%mm1, %%mm5 \n\t"
1527 "movq %%mm2, (%0, %%esi) \n\t"
1528 "movq %%mm3, 8(%0, %%esi) \n\t"
1529 "movq %%mm4, 1024(%0, %%esi) \n\t"
1530 "movq %%mm5, 1032(%0, %%esi) \n\t"
1531 "addl $16, %%esi \n\t"
1532 " jnz 1b \n\t"
1533 :: "r" (samples+256), "m" (bias)
1534 : "%esi"
1538 static void mix22toS_3dnow (sample_t * samples, sample_t bias)
1540 asm volatile(
1541 "movd %1, %%mm7 \n\t"
1542 "punpckldq %1, %%mm7 \n\t"
1543 "movl $-1024, %%esi \n\t"
1544 ".balign 16\n\t"
1545 "1: \n\t"
1546 "movq 2048(%0, %%esi), %%mm0 \n\t"
1547 "movq 2056(%0, %%esi), %%mm1 \n\t"
1548 "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround
1549 "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround
1550 "movq (%0, %%esi), %%mm2 \n\t"
1551 "movq 8(%0, %%esi), %%mm3 \n\t"
1552 "movq 1024(%0, %%esi), %%mm4 \n\t"
1553 "movq 1032(%0, %%esi), %%mm5 \n\t"
1554 "pfadd %%mm7, %%mm2 \n\t"
1555 "pfadd %%mm7, %%mm3 \n\t"
1556 "pfadd %%mm7, %%mm4 \n\t"
1557 "pfadd %%mm7, %%mm5 \n\t"
1558 "pfsub %%mm0, %%mm2 \n\t"
1559 "pfsub %%mm1, %%mm3 \n\t"
1560 "pfadd %%mm0, %%mm4 \n\t"
1561 "pfadd %%mm1, %%mm5 \n\t"
1562 "movq %%mm2, (%0, %%esi) \n\t"
1563 "movq %%mm3, 8(%0, %%esi) \n\t"
1564 "movq %%mm4, 1024(%0, %%esi) \n\t"
1565 "movq %%mm5, 1032(%0, %%esi) \n\t"
1566 "addl $16, %%esi \n\t"
1567 " jnz 1b \n\t"
1568 :: "r" (samples+256), "m" (bias)
1569 : "%esi"
1573 static void mix32to2_3dnow (sample_t * samples, sample_t bias)
1575 asm volatile(
1576 "movd %1, %%mm7 \n\t"
1577 "punpckldq %1, %%mm7 \n\t"
1578 "movl $-1024, %%esi \n\t"
1579 ".balign 16\n\t"
1580 "1: \n\t"
1581 "movq 1024(%0, %%esi), %%mm0 \n\t"
1582 "movq 1032(%0, %%esi), %%mm1 \n\t"
1583 "pfadd %%mm7, %%mm0 \n\t" // common
1584 "pfadd %%mm7, %%mm1 \n\t" // common
1585 "movq %%mm0, %%mm2 \n\t" // common
1586 "movq %%mm1, %%mm3 \n\t" // common
1587 "pfadd (%0, %%esi), %%mm0 \n\t"
1588 "pfadd 8(%0, %%esi), %%mm1 \n\t"
1589 "pfadd 2048(%0, %%esi), %%mm2 \n\t"
1590 "pfadd 2056(%0, %%esi), %%mm3 \n\t"
1591 "pfadd 3072(%0, %%esi), %%mm0 \n\t"
1592 "pfadd 3080(%0, %%esi), %%mm1 \n\t"
1593 "pfadd 4096(%0, %%esi), %%mm2 \n\t"
1594 "pfadd 4104(%0, %%esi), %%mm3 \n\t"
1595 "movq %%mm0, (%0, %%esi) \n\t"
1596 "movq %%mm1, 8(%0, %%esi) \n\t"
1597 "movq %%mm2, 1024(%0, %%esi) \n\t"
1598 "movq %%mm3, 1032(%0, %%esi) \n\t"
1599 "addl $16, %%esi \n\t"
1600 " jnz 1b \n\t"
1601 :: "r" (samples+256), "m" (bias)
1602 : "%esi"
1606 /* todo: should be optimized better */
1607 static void mix32toS_3dnow (sample_t * samples, sample_t bias)
1609 asm volatile(
1610 "movl $-1024, %%esi \n\t"
1611 ".balign 16\n\t"
1612 "1: \n\t"
1613 "movd %1, %%mm7 \n\t"
1614 "punpckldq %1, %%mm7 \n\t"
1615 "movq 1024(%0, %%esi), %%mm0 \n\t"
1616 "movq 1032(%0, %%esi), %%mm1 \n\t"
1617 "movq 3072(%0, %%esi), %%mm4 \n\t"
1618 "movq 3080(%0, %%esi), %%mm5 \n\t"
1619 "pfadd %%mm7, %%mm0 \n\t" // common
1620 "pfadd %%mm7, %%mm1 \n\t" // common
1621 "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround
1622 "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround
1623 "movq (%0, %%esi), %%mm2 \n\t"
1624 "movq 8(%0, %%esi), %%mm3 \n\t"
1625 "movq 2048(%0, %%esi), %%mm6 \n\t"
1626 "movq 2056(%0, %%esi), %%mm7 \n\t"
1627 "pfsub %%mm4, %%mm2 \n\t"
1628 "pfsub %%mm5, %%mm3 \n\t"
1629 "pfadd %%mm4, %%mm6 \n\t"
1630 "pfadd %%mm5, %%mm7 \n\t"
1631 "pfadd %%mm0, %%mm2 \n\t"
1632 "pfadd %%mm1, %%mm3 \n\t"
1633 "pfadd %%mm0, %%mm6 \n\t"
1634 "pfadd %%mm1, %%mm7 \n\t"
1635 "movq %%mm2, (%0, %%esi) \n\t"
1636 "movq %%mm3, 8(%0, %%esi) \n\t"
1637 "movq %%mm6, 1024(%0, %%esi) \n\t"
1638 "movq %%mm7, 1032(%0, %%esi) \n\t"
1639 "addl $16, %%esi \n\t"
1640 " jnz 1b \n\t"
1641 :: "r" (samples+256), "m" (bias)
1642 : "%esi"
1646 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
1648 asm volatile(
1649 "movd %2, %%mm7 \n\t"
1650 "punpckldq %2, %%mm7 \n\t"
1651 "movl $-1024, %%esi \n\t"
1652 ".balign 16\n\t"
1653 "1: \n\t"
1654 "movq (%0, %%esi), %%mm0 \n\t"
1655 "movq 8(%0, %%esi), %%mm1 \n\t"
1656 "movq 16(%0, %%esi), %%mm2 \n\t"
1657 "movq 24(%0, %%esi), %%mm3 \n\t"
1658 "pfadd 1024(%0, %%esi), %%mm0 \n\t"
1659 "pfadd 1032(%0, %%esi), %%mm1 \n\t"
1660 "pfadd 1040(%0, %%esi), %%mm2 \n\t"
1661 "pfadd 1048(%0, %%esi), %%mm3 \n\t"
1662 "pfadd %%mm7, %%mm0 \n\t"
1663 "pfadd %%mm7, %%mm1 \n\t"
1664 "pfadd %%mm7, %%mm2 \n\t"
1665 "pfadd %%mm7, %%mm3 \n\t"
1666 "movq %%mm0, (%1, %%esi) \n\t"
1667 "movq %%mm1, 8(%1, %%esi) \n\t"
1668 "movq %%mm2, 16(%1, %%esi) \n\t"
1669 "movq %%mm3, 24(%1, %%esi) \n\t"
1670 "addl $32, %%esi \n\t"
1671 " jnz 1b \n\t"
1672 :: "r" (src+256), "r" (dest+256), "m" (bias)
1673 : "%esi"
1677 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
1678 sample_t clev, sample_t slev)
1680 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
1682 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
1683 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
1684 break;
1686 case CONVERT (A52_CHANNEL, A52_MONO):
1687 case CONVERT (A52_STEREO, A52_MONO):
1688 mix_2to1_3dnow:
1689 mix2to1_3dnow (samples, samples + 256, bias);
1690 break;
1692 case CONVERT (A52_2F1R, A52_MONO):
1693 if (slev == 0)
1694 goto mix_2to1_3dnow;
1695 case CONVERT (A52_3F, A52_MONO):
1696 mix_3to1_3dnow:
1697 mix3to1_3dnow (samples, bias);
1698 break;
1700 case CONVERT (A52_3F1R, A52_MONO):
1701 if (slev == 0)
1702 goto mix_3to1_3dnow;
1703 case CONVERT (A52_2F2R, A52_MONO):
1704 if (slev == 0)
1705 goto mix_2to1_3dnow;
1706 mix4to1_3dnow (samples, bias);
1707 break;
1709 case CONVERT (A52_3F2R, A52_MONO):
1710 if (slev == 0)
1711 goto mix_3to1_3dnow;
1712 mix5to1_3dnow (samples, bias);
1713 break;
1715 case CONVERT (A52_MONO, A52_DOLBY):
1716 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
1717 break;
1719 case CONVERT (A52_3F, A52_STEREO):
1720 case CONVERT (A52_3F, A52_DOLBY):
1721 mix_3to2_3dnow:
1722 mix3to2_3dnow (samples, bias);
1723 break;
1725 case CONVERT (A52_2F1R, A52_STEREO):
1726 if (slev == 0)
1727 break;
1728 mix21to2_3dnow (samples, samples + 256, bias);
1729 break;
1731 case CONVERT (A52_2F1R, A52_DOLBY):
1732 mix21toS_3dnow (samples, bias);
1733 break;
1735 case CONVERT (A52_3F1R, A52_STEREO):
1736 if (slev == 0)
1737 goto mix_3to2_3dnow;
1738 mix31to2_3dnow (samples, bias);
1739 break;
1741 case CONVERT (A52_3F1R, A52_DOLBY):
1742 mix31toS_3dnow (samples, bias);
1743 break;
1745 case CONVERT (A52_2F2R, A52_STEREO):
1746 if (slev == 0)
1747 break;
1748 mix2to1_3dnow (samples, samples + 512, bias);
1749 mix2to1_3dnow (samples + 256, samples + 768, bias);
1750 break;
1752 case CONVERT (A52_2F2R, A52_DOLBY):
1753 mix22toS_3dnow (samples, bias);
1754 break;
1756 case CONVERT (A52_3F2R, A52_STEREO):
1757 if (slev == 0)
1758 goto mix_3to2_3dnow;
1759 mix32to2_3dnow (samples, bias);
1760 break;
1762 case CONVERT (A52_3F2R, A52_DOLBY):
1763 mix32toS_3dnow (samples, bias);
1764 break;
1766 case CONVERT (A52_3F1R, A52_3F):
1767 if (slev == 0)
1768 break;
1769 mix21to2_3dnow (samples, samples + 512, bias);
1770 break;
1772 case CONVERT (A52_3F2R, A52_3F):
1773 if (slev == 0)
1774 break;
1775 mix2to1_3dnow (samples, samples + 768, bias);
1776 mix2to1_3dnow (samples + 512, samples + 1024, bias);
1777 break;
1779 case CONVERT (A52_3F1R, A52_2F1R):
1780 mix3to2_3dnow (samples, bias);
1781 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1782 break;
1784 case CONVERT (A52_2F2R, A52_2F1R):
1785 mix2to1_3dnow (samples + 512, samples + 768, bias);
1786 break;
1788 case CONVERT (A52_3F2R, A52_2F1R):
1789 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
1790 move2to1_3dnow (samples + 768, samples + 512, bias);
1791 break;
1793 case CONVERT (A52_3F2R, A52_3F1R):
1794 mix2to1_3dnow (samples + 768, samples + 1024, bias);
1795 break;
1797 case CONVERT (A52_2F1R, A52_2F2R):
1798 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
1799 break;
1801 case CONVERT (A52_3F1R, A52_2F2R):
1802 mix3to2_3dnow (samples, bias);
1803 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1804 break;
1806 case CONVERT (A52_3F2R, A52_2F2R):
1807 mix3to2_3dnow (samples, bias);
1808 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
1809 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
1810 break;
1812 case CONVERT (A52_3F1R, A52_3F2R):
1813 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
1814 break;
1816 __asm __volatile("femms":::"memory");
1819 #endif //ARCH_X86