3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
7 * See http://liba52.sourceforge.net/ for updates.
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff.
10 * detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/
13 * a52dec is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
18 * a52dec is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
37 #include "a52_internal.h"
40 #define CONVERT(acmod,output) (((output) << 3) + (acmod))
43 void (*downmix
)(sample_t
* samples
, int acmod
, int output
, sample_t bias
,
44 sample_t clev
, sample_t slev
)= NULL
;
45 void (*upmix
)(sample_t
* samples
, int acmod
, int output
)= NULL
;
47 static void downmix_SSE (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
48 sample_t clev
, sample_t slev
);
49 static void downmix_3dnow (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
50 sample_t clev
, sample_t slev
);
51 static void downmix_C (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
52 sample_t clev
, sample_t slev
);
53 static void upmix_MMX (sample_t
* samples
, int acmod
, int output
);
54 static void upmix_C (sample_t
* samples
, int acmod
, int output
);
56 void downmix_accel_init(uint32_t mm_accel
)
60 #if defined(ARCH_X86) || defined(ARCH_X86_64)
61 if(mm_accel
& MM_ACCEL_X86_MMX
) upmix
= upmix_MMX
;
62 if(mm_accel
& MM_ACCEL_X86_SSE
) downmix
= downmix_SSE
;
63 if(mm_accel
& MM_ACCEL_X86_3DNOW
) downmix
= downmix_3dnow
;
67 int downmix_init (int input
, int flags
, sample_t
* level
,
68 sample_t clev
, sample_t slev
)
70 static uint8_t table
[11][8] = {
71 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_STEREO
,
72 A52_STEREO
, A52_STEREO
, A52_STEREO
, A52_STEREO
},
73 {A52_MONO
, A52_MONO
, A52_MONO
, A52_MONO
,
74 A52_MONO
, A52_MONO
, A52_MONO
, A52_MONO
},
75 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_STEREO
,
76 A52_STEREO
, A52_STEREO
, A52_STEREO
, A52_STEREO
},
77 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_3F
,
78 A52_STEREO
, A52_3F
, A52_STEREO
, A52_3F
},
79 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_STEREO
,
80 A52_2F1R
, A52_2F1R
, A52_2F1R
, A52_2F1R
},
81 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_STEREO
,
82 A52_2F1R
, A52_3F1R
, A52_2F1R
, A52_3F1R
},
83 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_3F
,
84 A52_2F2R
, A52_2F2R
, A52_2F2R
, A52_2F2R
},
85 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_3F
,
86 A52_2F2R
, A52_3F2R
, A52_2F2R
, A52_3F2R
},
87 {A52_CHANNEL1
, A52_MONO
, A52_MONO
, A52_MONO
,
88 A52_MONO
, A52_MONO
, A52_MONO
, A52_MONO
},
89 {A52_CHANNEL2
, A52_MONO
, A52_MONO
, A52_MONO
,
90 A52_MONO
, A52_MONO
, A52_MONO
, A52_MONO
},
91 {A52_CHANNEL
, A52_DOLBY
, A52_STEREO
, A52_DOLBY
,
92 A52_DOLBY
, A52_DOLBY
, A52_DOLBY
, A52_DOLBY
}
96 output
= flags
& A52_CHANNEL_MASK
;
97 if (output
> A52_DOLBY
)
100 output
= table
[output
][input
& 7];
102 if ((output
== A52_STEREO
) &&
103 ((input
== A52_DOLBY
) || ((input
== A52_3F
) && (clev
== LEVEL_3DB
))))
106 if (flags
& A52_ADJUST_LEVEL
)
107 switch (CONVERT (input
& 7, output
)) {
109 case CONVERT (A52_3F
, A52_MONO
):
110 *level
*= LEVEL_3DB
/ (1 + clev
);
113 case CONVERT (A52_STEREO
, A52_MONO
):
114 case CONVERT (A52_2F2R
, A52_2F1R
):
115 case CONVERT (A52_3F2R
, A52_3F1R
):
120 case CONVERT (A52_3F2R
, A52_2F1R
):
121 if (clev
< LEVEL_PLUS3DB
- 1)
124 case CONVERT (A52_3F
, A52_STEREO
):
125 case CONVERT (A52_3F1R
, A52_2F1R
):
126 case CONVERT (A52_3F1R
, A52_2F2R
):
127 case CONVERT (A52_3F2R
, A52_2F2R
):
131 case CONVERT (A52_2F1R
, A52_MONO
):
132 *level
*= LEVEL_PLUS3DB
/ (2 + slev
);
135 case CONVERT (A52_2F1R
, A52_STEREO
):
136 case CONVERT (A52_3F1R
, A52_3F
):
137 *level
/= 1 + slev
* LEVEL_3DB
;
140 case CONVERT (A52_3F1R
, A52_MONO
):
141 *level
*= LEVEL_3DB
/ (1 + clev
+ 0.5 * slev
);
144 case CONVERT (A52_3F1R
, A52_STEREO
):
145 *level
/= 1 + clev
+ slev
* LEVEL_3DB
;
148 case CONVERT (A52_2F2R
, A52_MONO
):
149 *level
*= LEVEL_3DB
/ (1 + slev
);
152 case CONVERT (A52_2F2R
, A52_STEREO
):
153 case CONVERT (A52_3F2R
, A52_3F
):
157 case CONVERT (A52_3F2R
, A52_MONO
):
158 *level
*= LEVEL_3DB
/ (1 + clev
+ slev
);
161 case CONVERT (A52_3F2R
, A52_STEREO
):
162 *level
/= 1 + clev
+ slev
;
165 case CONVERT (A52_MONO
, A52_DOLBY
):
166 *level
*= LEVEL_PLUS3DB
;
169 case CONVERT (A52_3F
, A52_DOLBY
):
170 case CONVERT (A52_2F1R
, A52_DOLBY
):
171 *level
*= 1 / (1 + LEVEL_3DB
);
174 case CONVERT (A52_3F1R
, A52_DOLBY
):
175 case CONVERT (A52_2F2R
, A52_DOLBY
):
176 *level
*= 1 / (1 + 2 * LEVEL_3DB
);
179 case CONVERT (A52_3F2R
, A52_DOLBY
):
180 *level
*= 1 / (1 + 3 * LEVEL_3DB
);
186 int downmix_coeff (sample_t
* coeff
, int acmod
, int output
, sample_t level
,
187 sample_t clev
, sample_t slev
)
189 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
191 case CONVERT (A52_CHANNEL
, A52_CHANNEL
):
192 case CONVERT (A52_MONO
, A52_MONO
):
193 case CONVERT (A52_STEREO
, A52_STEREO
):
194 case CONVERT (A52_3F
, A52_3F
):
195 case CONVERT (A52_2F1R
, A52_2F1R
):
196 case CONVERT (A52_3F1R
, A52_3F1R
):
197 case CONVERT (A52_2F2R
, A52_2F2R
):
198 case CONVERT (A52_3F2R
, A52_3F2R
):
199 case CONVERT (A52_STEREO
, A52_DOLBY
):
200 coeff
[0] = coeff
[1] = coeff
[2] = coeff
[3] = coeff
[4] = level
;
203 case CONVERT (A52_CHANNEL
, A52_MONO
):
204 coeff
[0] = coeff
[1] = level
* LEVEL_6DB
;
207 case CONVERT (A52_STEREO
, A52_MONO
):
208 coeff
[0] = coeff
[1] = level
* LEVEL_3DB
;
211 case CONVERT (A52_3F
, A52_MONO
):
212 coeff
[0] = coeff
[2] = level
* LEVEL_3DB
;
213 coeff
[1] = level
* clev
* LEVEL_PLUS3DB
;
216 case CONVERT (A52_2F1R
, A52_MONO
):
217 coeff
[0] = coeff
[1] = level
* LEVEL_3DB
;
218 coeff
[2] = level
* slev
* LEVEL_3DB
;
221 case CONVERT (A52_2F2R
, A52_MONO
):
222 coeff
[0] = coeff
[1] = level
* LEVEL_3DB
;
223 coeff
[2] = coeff
[3] = level
* slev
* LEVEL_3DB
;
226 case CONVERT (A52_3F1R
, A52_MONO
):
227 coeff
[0] = coeff
[2] = level
* LEVEL_3DB
;
228 coeff
[1] = level
* clev
* LEVEL_PLUS3DB
;
229 coeff
[3] = level
* slev
* LEVEL_3DB
;
232 case CONVERT (A52_3F2R
, A52_MONO
):
233 coeff
[0] = coeff
[2] = level
* LEVEL_3DB
;
234 coeff
[1] = level
* clev
* LEVEL_PLUS3DB
;
235 coeff
[3] = coeff
[4] = level
* slev
* LEVEL_3DB
;
238 case CONVERT (A52_MONO
, A52_DOLBY
):
239 coeff
[0] = level
* LEVEL_3DB
;
242 case CONVERT (A52_3F
, A52_DOLBY
):
244 case CONVERT (A52_3F
, A52_STEREO
):
245 case CONVERT (A52_3F1R
, A52_2F1R
):
246 case CONVERT (A52_3F2R
, A52_2F2R
):
247 coeff
[0] = coeff
[2] = coeff
[3] = coeff
[4] = level
;
248 coeff
[1] = level
* clev
;
251 case CONVERT (A52_2F1R
, A52_DOLBY
):
253 case CONVERT (A52_2F1R
, A52_STEREO
):
254 coeff
[0] = coeff
[1] = level
;
255 coeff
[2] = level
* slev
* LEVEL_3DB
;
258 case CONVERT (A52_3F1R
, A52_DOLBY
):
261 case CONVERT (A52_3F1R
, A52_STEREO
):
262 coeff
[0] = coeff
[2] = level
;
263 coeff
[1] = level
* clev
;
264 coeff
[3] = level
* slev
* LEVEL_3DB
;
267 case CONVERT (A52_2F2R
, A52_DOLBY
):
269 case CONVERT (A52_2F2R
, A52_STEREO
):
270 coeff
[0] = coeff
[1] = level
;
271 coeff
[2] = coeff
[3] = level
* slev
;
274 case CONVERT (A52_3F2R
, A52_DOLBY
):
276 case CONVERT (A52_3F2R
, A52_2F1R
):
278 case CONVERT (A52_3F2R
, A52_STEREO
):
279 coeff
[0] = coeff
[2] = level
;
280 coeff
[1] = level
* clev
;
281 coeff
[3] = coeff
[4] = level
* slev
;
284 case CONVERT (A52_3F1R
, A52_3F
):
285 coeff
[0] = coeff
[1] = coeff
[2] = level
;
286 coeff
[3] = level
* slev
* LEVEL_3DB
;
289 case CONVERT (A52_3F2R
, A52_3F
):
290 coeff
[0] = coeff
[1] = coeff
[2] = level
;
291 coeff
[3] = coeff
[4] = level
* slev
;
294 case CONVERT (A52_2F2R
, A52_2F1R
):
295 coeff
[0] = coeff
[1] = level
;
296 coeff
[2] = coeff
[3] = level
* LEVEL_3DB
;
299 case CONVERT (A52_3F2R
, A52_3F1R
):
300 coeff
[0] = coeff
[1] = coeff
[2] = level
;
301 coeff
[3] = coeff
[4] = level
* LEVEL_3DB
;
304 case CONVERT (A52_2F1R
, A52_2F2R
):
305 coeff
[0] = coeff
[1] = level
;
306 coeff
[2] = level
* LEVEL_3DB
;
309 case CONVERT (A52_3F1R
, A52_2F2R
):
310 coeff
[0] = coeff
[2] = level
;
311 coeff
[1] = level
* clev
;
312 coeff
[3] = level
* LEVEL_3DB
;
315 case CONVERT (A52_3F1R
, A52_3F2R
):
316 coeff
[0] = coeff
[1] = coeff
[2] = level
;
317 coeff
[3] = level
* LEVEL_3DB
;
320 case CONVERT (A52_CHANNEL
, A52_CHANNEL1
):
325 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
331 return -1; /* NOTREACHED */
334 static void mix2to1 (sample_t
* dest
, sample_t
* src
, sample_t bias
)
338 for (i
= 0; i
< 256; i
++)
339 dest
[i
] += src
[i
] + bias
;
342 static void mix3to1 (sample_t
* samples
, sample_t bias
)
346 for (i
= 0; i
< 256; i
++)
347 samples
[i
] += samples
[i
+ 256] + samples
[i
+ 512] + bias
;
350 static void mix4to1 (sample_t
* samples
, sample_t bias
)
354 for (i
= 0; i
< 256; i
++)
355 samples
[i
] += (samples
[i
+ 256] + samples
[i
+ 512] +
356 samples
[i
+ 768] + bias
);
359 static void mix5to1 (sample_t
* samples
, sample_t bias
)
363 for (i
= 0; i
< 256; i
++)
364 samples
[i
] += (samples
[i
+ 256] + samples
[i
+ 512] +
365 samples
[i
+ 768] + samples
[i
+ 1024] + bias
);
368 static void mix3to2 (sample_t
* samples
, sample_t bias
)
373 for (i
= 0; i
< 256; i
++) {
374 common
= samples
[i
+ 256] + bias
;
375 samples
[i
] += common
;
376 samples
[i
+ 256] = samples
[i
+ 512] + common
;
380 static void mix21to2 (sample_t
* left
, sample_t
* right
, sample_t bias
)
385 for (i
= 0; i
< 256; i
++) {
386 common
= right
[i
+ 256] + bias
;
392 static void mix21toS (sample_t
* samples
, sample_t bias
)
397 for (i
= 0; i
< 256; i
++) {
398 surround
= samples
[i
+ 512];
399 samples
[i
] += bias
- surround
;
400 samples
[i
+ 256] += bias
+ surround
;
404 static void mix31to2 (sample_t
* samples
, sample_t bias
)
409 for (i
= 0; i
< 256; i
++) {
410 common
= samples
[i
+ 256] + samples
[i
+ 768] + bias
;
411 samples
[i
] += common
;
412 samples
[i
+ 256] = samples
[i
+ 512] + common
;
416 static void mix31toS (sample_t
* samples
, sample_t bias
)
419 sample_t common
, surround
;
421 for (i
= 0; i
< 256; i
++) {
422 common
= samples
[i
+ 256] + bias
;
423 surround
= samples
[i
+ 768];
424 samples
[i
] += common
- surround
;
425 samples
[i
+ 256] = samples
[i
+ 512] + common
+ surround
;
429 static void mix22toS (sample_t
* samples
, sample_t bias
)
434 for (i
= 0; i
< 256; i
++) {
435 surround
= samples
[i
+ 512] + samples
[i
+ 768];
436 samples
[i
] += bias
- surround
;
437 samples
[i
+ 256] += bias
+ surround
;
441 static void mix32to2 (sample_t
* samples
, sample_t bias
)
446 for (i
= 0; i
< 256; i
++) {
447 common
= samples
[i
+ 256] + bias
;
448 samples
[i
] += common
+ samples
[i
+ 768];
449 samples
[i
+ 256] = common
+ samples
[i
+ 512] + samples
[i
+ 1024];
453 static void mix32toS (sample_t
* samples
, sample_t bias
)
456 sample_t common
, surround
;
458 for (i
= 0; i
< 256; i
++) {
459 common
= samples
[i
+ 256] + bias
;
460 surround
= samples
[i
+ 768] + samples
[i
+ 1024];
461 samples
[i
] += common
- surround
;
462 samples
[i
+ 256] = samples
[i
+ 512] + common
+ surround
;
466 static void move2to1 (sample_t
* src
, sample_t
* dest
, sample_t bias
)
470 for (i
= 0; i
< 256; i
++)
471 dest
[i
] = src
[i
] + src
[i
+ 256] + bias
;
474 static void zero (sample_t
* samples
)
477 for (i
= 0; i
< 256; i
++)
481 static void downmix_C (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
482 sample_t clev
, sample_t slev
)
484 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
486 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
487 memcpy (samples
, samples
+ 256, 256 * sizeof (sample_t
));
490 case CONVERT (A52_CHANNEL
, A52_MONO
):
491 case CONVERT (A52_STEREO
, A52_MONO
):
493 mix2to1 (samples
, samples
+ 256, bias
);
496 case CONVERT (A52_2F1R
, A52_MONO
):
499 case CONVERT (A52_3F
, A52_MONO
):
501 mix3to1 (samples
, bias
);
504 case CONVERT (A52_3F1R
, A52_MONO
):
507 case CONVERT (A52_2F2R
, A52_MONO
):
510 mix4to1 (samples
, bias
);
513 case CONVERT (A52_3F2R
, A52_MONO
):
516 mix5to1 (samples
, bias
);
519 case CONVERT (A52_MONO
, A52_DOLBY
):
520 memcpy (samples
+ 256, samples
, 256 * sizeof (sample_t
));
523 case CONVERT (A52_3F
, A52_STEREO
):
524 case CONVERT (A52_3F
, A52_DOLBY
):
526 mix3to2 (samples
, bias
);
529 case CONVERT (A52_2F1R
, A52_STEREO
):
532 mix21to2 (samples
, samples
+ 256, bias
);
535 case CONVERT (A52_2F1R
, A52_DOLBY
):
536 mix21toS (samples
, bias
);
539 case CONVERT (A52_3F1R
, A52_STEREO
):
542 mix31to2 (samples
, bias
);
545 case CONVERT (A52_3F1R
, A52_DOLBY
):
546 mix31toS (samples
, bias
);
549 case CONVERT (A52_2F2R
, A52_STEREO
):
552 mix2to1 (samples
, samples
+ 512, bias
);
553 mix2to1 (samples
+ 256, samples
+ 768, bias
);
556 case CONVERT (A52_2F2R
, A52_DOLBY
):
557 mix22toS (samples
, bias
);
560 case CONVERT (A52_3F2R
, A52_STEREO
):
563 mix32to2 (samples
, bias
);
566 case CONVERT (A52_3F2R
, A52_DOLBY
):
567 mix32toS (samples
, bias
);
570 case CONVERT (A52_3F1R
, A52_3F
):
573 mix21to2 (samples
, samples
+ 512, bias
);
576 case CONVERT (A52_3F2R
, A52_3F
):
579 mix2to1 (samples
, samples
+ 768, bias
);
580 mix2to1 (samples
+ 512, samples
+ 1024, bias
);
583 case CONVERT (A52_3F1R
, A52_2F1R
):
584 mix3to2 (samples
, bias
);
585 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
588 case CONVERT (A52_2F2R
, A52_2F1R
):
589 mix2to1 (samples
+ 512, samples
+ 768, bias
);
592 case CONVERT (A52_3F2R
, A52_2F1R
):
593 mix3to2 (samples
, bias
); //FIXME possible bug? (output doesnt seem to be used)
594 move2to1 (samples
+ 768, samples
+ 512, bias
);
597 case CONVERT (A52_3F2R
, A52_3F1R
):
598 mix2to1 (samples
+ 768, samples
+ 1024, bias
);
601 case CONVERT (A52_2F1R
, A52_2F2R
):
602 memcpy (samples
+ 768, samples
+ 512, 256 * sizeof (sample_t
));
605 case CONVERT (A52_3F1R
, A52_2F2R
):
606 mix3to2 (samples
, bias
);
607 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
610 case CONVERT (A52_3F2R
, A52_2F2R
):
611 mix3to2 (samples
, bias
);
612 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
613 memcpy (samples
+ 768, samples
+ 1024, 256 * sizeof (sample_t
));
616 case CONVERT (A52_3F1R
, A52_3F2R
):
617 memcpy (samples
+ 1024, samples
+ 768, 256 * sizeof (sample_t
));
622 static void upmix_C (sample_t
* samples
, int acmod
, int output
)
624 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
626 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
627 memcpy (samples
+ 256, samples
, 256 * sizeof (sample_t
));
630 case CONVERT (A52_3F2R
, A52_MONO
):
631 zero (samples
+ 1024);
632 case CONVERT (A52_3F1R
, A52_MONO
):
633 case CONVERT (A52_2F2R
, A52_MONO
):
634 zero (samples
+ 768);
635 case CONVERT (A52_3F
, A52_MONO
):
636 case CONVERT (A52_2F1R
, A52_MONO
):
637 zero (samples
+ 512);
638 case CONVERT (A52_CHANNEL
, A52_MONO
):
639 case CONVERT (A52_STEREO
, A52_MONO
):
640 zero (samples
+ 256);
643 case CONVERT (A52_3F2R
, A52_STEREO
):
644 case CONVERT (A52_3F2R
, A52_DOLBY
):
645 zero (samples
+ 1024);
646 case CONVERT (A52_3F1R
, A52_STEREO
):
647 case CONVERT (A52_3F1R
, A52_DOLBY
):
648 zero (samples
+ 768);
649 case CONVERT (A52_3F
, A52_STEREO
):
650 case CONVERT (A52_3F
, A52_DOLBY
):
652 memcpy (samples
+ 512, samples
+ 256, 256 * sizeof (sample_t
));
653 zero (samples
+ 256);
656 case CONVERT (A52_2F2R
, A52_STEREO
):
657 case CONVERT (A52_2F2R
, A52_DOLBY
):
658 zero (samples
+ 768);
659 case CONVERT (A52_2F1R
, A52_STEREO
):
660 case CONVERT (A52_2F1R
, A52_DOLBY
):
661 zero (samples
+ 512);
664 case CONVERT (A52_3F2R
, A52_3F
):
665 zero (samples
+ 1024);
666 case CONVERT (A52_3F1R
, A52_3F
):
667 case CONVERT (A52_2F2R
, A52_2F1R
):
668 zero (samples
+ 768);
671 case CONVERT (A52_3F2R
, A52_3F1R
):
672 zero (samples
+ 1024);
675 case CONVERT (A52_3F2R
, A52_2F1R
):
676 zero (samples
+ 1024);
677 case CONVERT (A52_3F1R
, A52_2F1R
):
679 memcpy (samples
+ 768, samples
+ 512, 256 * sizeof (sample_t
));
682 case CONVERT (A52_3F2R
, A52_2F2R
):
683 memcpy (samples
+ 1024, samples
+ 768, 256 * sizeof (sample_t
));
688 #if defined(ARCH_X86) || defined(ARCH_X86_64)
689 static void mix2to1_SSE (sample_t
* dest
, sample_t
* src
, sample_t bias
)
692 "movlps %2, %%xmm7 \n\t"
693 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
694 "mov $-1024, %%"REG_S
" \n\t"
697 "movaps (%0, %%"REG_S
"), %%xmm0 \n\t"
698 "movaps 16(%0, %%"REG_S
"), %%xmm1\n\t"
699 "addps (%1, %%"REG_S
"), %%xmm0 \n\t"
700 "addps 16(%1, %%"REG_S
"), %%xmm1\n\t"
701 "addps %%xmm7, %%xmm0 \n\t"
702 "addps %%xmm7, %%xmm1 \n\t"
703 "movaps %%xmm0, (%1, %%"REG_S
") \n\t"
704 "movaps %%xmm1, 16(%1, %%"REG_S
")\n\t"
705 "add $32, %%"REG_S
" \n\t"
707 :: "r" (src
+256), "r" (dest
+256), "m" (bias
)
712 static void mix3to1_SSE (sample_t
* samples
, sample_t bias
)
715 "movlps %1, %%xmm7 \n\t"
716 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
717 "mov $-1024, %%"REG_S
" \n\t"
720 "movaps (%0, %%"REG_S
"), %%xmm0 \n\t"
721 "movaps 1024(%0, %%"REG_S
"), %%xmm1\n\t"
722 "addps 2048(%0, %%"REG_S
"), %%xmm0\n\t"
723 "addps %%xmm7, %%xmm1 \n\t"
724 "addps %%xmm1, %%xmm0 \n\t"
725 "movaps %%xmm0, (%0, %%"REG_S
") \n\t"
726 "add $16, %%"REG_S
" \n\t"
728 :: "r" (samples
+256), "m" (bias
)
733 static void mix4to1_SSE (sample_t
* samples
, sample_t bias
)
736 "movlps %1, %%xmm7 \n\t"
737 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
738 "mov $-1024, %%"REG_S
" \n\t"
741 "movaps (%0, %%"REG_S
"), %%xmm0 \n\t"
742 "movaps 1024(%0, %%"REG_S
"), %%xmm1\n\t"
743 "addps 2048(%0, %%"REG_S
"), %%xmm0\n\t"
744 "addps 3072(%0, %%"REG_S
"), %%xmm1\n\t"
745 "addps %%xmm7, %%xmm0 \n\t"
746 "addps %%xmm1, %%xmm0 \n\t"
747 "movaps %%xmm0, (%0, %%"REG_S
") \n\t"
748 "add $16, %%"REG_S
" \n\t"
750 :: "r" (samples
+256), "m" (bias
)
755 static void mix5to1_SSE (sample_t
* samples
, sample_t bias
)
758 "movlps %1, %%xmm7 \n\t"
759 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
760 "mov $-1024, %%"REG_S
" \n\t"
763 "movaps (%0, %%"REG_S
"), %%xmm0 \n\t"
764 "movaps 1024(%0, %%"REG_S
"), %%xmm1\n\t"
765 "addps 2048(%0, %%"REG_S
"), %%xmm0\n\t"
766 "addps 3072(%0, %%"REG_S
"), %%xmm1\n\t"
767 "addps %%xmm7, %%xmm0 \n\t"
768 "addps 4096(%0, %%"REG_S
"), %%xmm1\n\t"
769 "addps %%xmm1, %%xmm0 \n\t"
770 "movaps %%xmm0, (%0, %%"REG_S
") \n\t"
771 "add $16, %%"REG_S
" \n\t"
773 :: "r" (samples
+256), "m" (bias
)
778 static void mix3to2_SSE (sample_t
* samples
, sample_t bias
)
781 "movlps %1, %%xmm7 \n\t"
782 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
783 "mov $-1024, %%"REG_S
" \n\t"
786 "movaps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
787 "addps %%xmm7, %%xmm0 \n\t" //common
788 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
789 "movaps 2048(%0, %%"REG_S
"), %%xmm2\n\t"
790 "addps %%xmm0, %%xmm1 \n\t"
791 "addps %%xmm0, %%xmm2 \n\t"
792 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
793 "movaps %%xmm2, 1024(%0, %%"REG_S
")\n\t"
794 "add $16, %%"REG_S
" \n\t"
796 :: "r" (samples
+256), "m" (bias
)
801 static void mix21to2_SSE (sample_t
* left
, sample_t
* right
, sample_t bias
)
804 "movlps %2, %%xmm7 \n\t"
805 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
806 "mov $-1024, %%"REG_S
" \n\t"
809 "movaps 1024(%1, %%"REG_S
"), %%xmm0\n\t"
810 "addps %%xmm7, %%xmm0 \n\t" //common
811 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
812 "movaps (%1, %%"REG_S
"), %%xmm2 \n\t"
813 "addps %%xmm0, %%xmm1 \n\t"
814 "addps %%xmm0, %%xmm2 \n\t"
815 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
816 "movaps %%xmm2, (%1, %%"REG_S
") \n\t"
817 "add $16, %%"REG_S
" \n\t"
819 :: "r" (left
+256), "r" (right
+256), "m" (bias
)
824 static void mix21toS_SSE (sample_t
* samples
, sample_t bias
)
827 "movlps %1, %%xmm7 \n\t"
828 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
829 "mov $-1024, %%"REG_S
" \n\t"
832 "movaps 2048(%0, %%"REG_S
"), %%xmm0\n\t" // surround
833 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
834 "movaps 1024(%0, %%"REG_S
"), %%xmm2\n\t"
835 "addps %%xmm7, %%xmm1 \n\t"
836 "addps %%xmm7, %%xmm2 \n\t"
837 "subps %%xmm0, %%xmm1 \n\t"
838 "addps %%xmm0, %%xmm2 \n\t"
839 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
840 "movaps %%xmm2, 1024(%0, %%"REG_S
")\n\t"
841 "add $16, %%"REG_S
" \n\t"
843 :: "r" (samples
+256), "m" (bias
)
848 static void mix31to2_SSE (sample_t
* samples
, sample_t bias
)
851 "movlps %1, %%xmm7 \n\t"
852 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
853 "mov $-1024, %%"REG_S
" \n\t"
856 "movaps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
857 "addps 3072(%0, %%"REG_S
"), %%xmm0\n\t"
858 "addps %%xmm7, %%xmm0 \n\t" // common
859 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
860 "movaps 2048(%0, %%"REG_S
"), %%xmm2\n\t"
861 "addps %%xmm0, %%xmm1 \n\t"
862 "addps %%xmm0, %%xmm2 \n\t"
863 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
864 "movaps %%xmm2, 1024(%0, %%"REG_S
")\n\t"
865 "add $16, %%"REG_S
" \n\t"
867 :: "r" (samples
+256), "m" (bias
)
872 static void mix31toS_SSE (sample_t
* samples
, sample_t bias
)
875 "movlps %1, %%xmm7 \n\t"
876 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
877 "mov $-1024, %%"REG_S
" \n\t"
880 "movaps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
881 "movaps 3072(%0, %%"REG_S
"), %%xmm3\n\t" // surround
882 "addps %%xmm7, %%xmm0 \n\t" // common
883 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
884 "movaps 2048(%0, %%"REG_S
"), %%xmm2\n\t"
885 "addps %%xmm0, %%xmm1 \n\t"
886 "addps %%xmm0, %%xmm2 \n\t"
887 "subps %%xmm3, %%xmm1 \n\t"
888 "addps %%xmm3, %%xmm2 \n\t"
889 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
890 "movaps %%xmm2, 1024(%0, %%"REG_S
")\n\t"
891 "add $16, %%"REG_S
" \n\t"
893 :: "r" (samples
+256), "m" (bias
)
898 static void mix22toS_SSE (sample_t
* samples
, sample_t bias
)
901 "movlps %1, %%xmm7 \n\t"
902 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
903 "mov $-1024, %%"REG_S
" \n\t"
906 "movaps 2048(%0, %%"REG_S
"), %%xmm0\n\t"
907 "addps 3072(%0, %%"REG_S
"), %%xmm0\n\t" // surround
908 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
909 "movaps 1024(%0, %%"REG_S
"), %%xmm2\n\t"
910 "addps %%xmm7, %%xmm1 \n\t"
911 "addps %%xmm7, %%xmm2 \n\t"
912 "subps %%xmm0, %%xmm1 \n\t"
913 "addps %%xmm0, %%xmm2 \n\t"
914 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
915 "movaps %%xmm2, 1024(%0, %%"REG_S
")\n\t"
916 "add $16, %%"REG_S
" \n\t"
918 :: "r" (samples
+256), "m" (bias
)
923 static void mix32to2_SSE (sample_t
* samples
, sample_t bias
)
926 "movlps %1, %%xmm7 \n\t"
927 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
928 "mov $-1024, %%"REG_S
" \n\t"
931 "movaps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
932 "addps %%xmm7, %%xmm0 \n\t" // common
933 "movaps %%xmm0, %%xmm1 \n\t" // common
934 "addps (%0, %%"REG_S
"), %%xmm0 \n\t"
935 "addps 2048(%0, %%"REG_S
"), %%xmm1\n\t"
936 "addps 3072(%0, %%"REG_S
"), %%xmm0\n\t"
937 "addps 4096(%0, %%"REG_S
"), %%xmm1\n\t"
938 "movaps %%xmm0, (%0, %%"REG_S
") \n\t"
939 "movaps %%xmm1, 1024(%0, %%"REG_S
")\n\t"
940 "add $16, %%"REG_S
" \n\t"
942 :: "r" (samples
+256), "m" (bias
)
947 static void mix32toS_SSE (sample_t
* samples
, sample_t bias
)
950 "movlps %1, %%xmm7 \n\t"
951 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
952 "mov $-1024, %%"REG_S
" \n\t"
955 "movaps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
956 "movaps 3072(%0, %%"REG_S
"), %%xmm2\n\t"
957 "addps %%xmm7, %%xmm0 \n\t" // common
958 "addps 4096(%0, %%"REG_S
"), %%xmm2\n\t" // surround
959 "movaps (%0, %%"REG_S
"), %%xmm1 \n\t"
960 "movaps 2048(%0, %%"REG_S
"), %%xmm3\n\t"
961 "subps %%xmm2, %%xmm1 \n\t"
962 "addps %%xmm2, %%xmm3 \n\t"
963 "addps %%xmm0, %%xmm1 \n\t"
964 "addps %%xmm0, %%xmm3 \n\t"
965 "movaps %%xmm1, (%0, %%"REG_S
") \n\t"
966 "movaps %%xmm3, 1024(%0, %%"REG_S
")\n\t"
967 "add $16, %%"REG_S
" \n\t"
969 :: "r" (samples
+256), "m" (bias
)
974 static void move2to1_SSE (sample_t
* src
, sample_t
* dest
, sample_t bias
)
977 "movlps %2, %%xmm7 \n\t"
978 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
979 "mov $-1024, %%"REG_S
" \n\t"
982 "movaps (%0, %%"REG_S
"), %%xmm0 \n\t"
983 "movaps 16(%0, %%"REG_S
"), %%xmm1\n\t"
984 "addps 1024(%0, %%"REG_S
"), %%xmm0\n\t"
985 "addps 1040(%0, %%"REG_S
"), %%xmm1\n\t"
986 "addps %%xmm7, %%xmm0 \n\t"
987 "addps %%xmm7, %%xmm1 \n\t"
988 "movaps %%xmm0, (%1, %%"REG_S
") \n\t"
989 "movaps %%xmm1, 16(%1, %%"REG_S
")\n\t"
990 "add $32, %%"REG_S
" \n\t"
992 :: "r" (src
+256), "r" (dest
+256), "m" (bias
)
997 static void zero_MMX(sample_t
* samples
)
1000 "mov $-1024, %%"REG_S
" \n\t"
1001 "pxor %%mm0, %%mm0 \n\t"
1004 "movq %%mm0, (%0, %%"REG_S
") \n\t"
1005 "movq %%mm0, 8(%0, %%"REG_S
") \n\t"
1006 "movq %%mm0, 16(%0, %%"REG_S
") \n\t"
1007 "movq %%mm0, 24(%0, %%"REG_S
") \n\t"
1008 "add $32, %%"REG_S
" \n\t"
1011 :: "r" (samples
+256)
1017 I hope dest and src will be at least 8 byte aligned and size
1018 will devide on 8 without remain
1019 Note: untested and unused.
1021 static void copy_MMX(void *dest
,const void *src
,unsigned size
)
1028 "movq %0, %%mm0\n\t"
1029 "movq 8%0, %%mm1\n\t"
1030 "movq 16%0, %%mm2\n\t"
1031 "movq 24%0, %%mm3\n\t"
1032 "movq 32%0, %%mm4\n\t"
1033 "movq 40%0, %%mm5\n\t"
1034 "movq 48%0, %%mm6\n\t"
1035 "movq 56%0, %%mm7\n\t"
1036 "movq %%mm0, %1\n\t"
1037 "movq %%mm1, 8%1\n\t"
1038 "movq %%mm2, 16%1\n\t"
1039 "movq %%mm3, 24%1\n\t"
1040 "movq %%mm4, 32%1\n\t"
1041 "movq %%mm5, 40%1\n\t"
1042 "movq %%mm6, 48%1\n\t"
1043 "movq %%mm7, 56%1\n\t"
1045 :"m"(src
),"m"(dest
));
1049 static void downmix_SSE (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
1050 sample_t clev
, sample_t slev
)
1052 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
1054 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
1055 memcpy (samples
, samples
+ 256, 256 * sizeof (sample_t
));
1058 case CONVERT (A52_CHANNEL
, A52_MONO
):
1059 case CONVERT (A52_STEREO
, A52_MONO
):
1061 mix2to1_SSE (samples
, samples
+ 256, bias
);
1064 case CONVERT (A52_2F1R
, A52_MONO
):
1067 case CONVERT (A52_3F
, A52_MONO
):
1069 mix3to1_SSE (samples
, bias
);
1072 case CONVERT (A52_3F1R
, A52_MONO
):
1075 case CONVERT (A52_2F2R
, A52_MONO
):
1078 mix4to1_SSE (samples
, bias
);
1081 case CONVERT (A52_3F2R
, A52_MONO
):
1084 mix5to1_SSE (samples
, bias
);
1087 case CONVERT (A52_MONO
, A52_DOLBY
):
1088 memcpy (samples
+ 256, samples
, 256 * sizeof (sample_t
));
1091 case CONVERT (A52_3F
, A52_STEREO
):
1092 case CONVERT (A52_3F
, A52_DOLBY
):
1094 mix3to2_SSE (samples
, bias
);
1097 case CONVERT (A52_2F1R
, A52_STEREO
):
1100 mix21to2_SSE (samples
, samples
+ 256, bias
);
1103 case CONVERT (A52_2F1R
, A52_DOLBY
):
1104 mix21toS_SSE (samples
, bias
);
1107 case CONVERT (A52_3F1R
, A52_STEREO
):
1110 mix31to2_SSE (samples
, bias
);
1113 case CONVERT (A52_3F1R
, A52_DOLBY
):
1114 mix31toS_SSE (samples
, bias
);
1117 case CONVERT (A52_2F2R
, A52_STEREO
):
1120 mix2to1_SSE (samples
, samples
+ 512, bias
);
1121 mix2to1_SSE (samples
+ 256, samples
+ 768, bias
);
1124 case CONVERT (A52_2F2R
, A52_DOLBY
):
1125 mix22toS_SSE (samples
, bias
);
1128 case CONVERT (A52_3F2R
, A52_STEREO
):
1131 mix32to2_SSE (samples
, bias
);
1134 case CONVERT (A52_3F2R
, A52_DOLBY
):
1135 mix32toS_SSE (samples
, bias
);
1138 case CONVERT (A52_3F1R
, A52_3F
):
1141 mix21to2_SSE (samples
, samples
+ 512, bias
);
1144 case CONVERT (A52_3F2R
, A52_3F
):
1147 mix2to1_SSE (samples
, samples
+ 768, bias
);
1148 mix2to1_SSE (samples
+ 512, samples
+ 1024, bias
);
1151 case CONVERT (A52_3F1R
, A52_2F1R
):
1152 mix3to2_SSE (samples
, bias
);
1153 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1156 case CONVERT (A52_2F2R
, A52_2F1R
):
1157 mix2to1_SSE (samples
+ 512, samples
+ 768, bias
);
1160 case CONVERT (A52_3F2R
, A52_2F1R
):
1161 mix3to2_SSE (samples
, bias
); //FIXME possible bug? (output doesnt seem to be used)
1162 move2to1_SSE (samples
+ 768, samples
+ 512, bias
);
1165 case CONVERT (A52_3F2R
, A52_3F1R
):
1166 mix2to1_SSE (samples
+ 768, samples
+ 1024, bias
);
1169 case CONVERT (A52_2F1R
, A52_2F2R
):
1170 memcpy (samples
+ 768, samples
+ 512, 256 * sizeof (sample_t
));
1173 case CONVERT (A52_3F1R
, A52_2F2R
):
1174 mix3to2_SSE (samples
, bias
);
1175 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1178 case CONVERT (A52_3F2R
, A52_2F2R
):
1179 mix3to2_SSE (samples
, bias
);
1180 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1181 memcpy (samples
+ 768, samples
+ 1024, 256 * sizeof (sample_t
));
1184 case CONVERT (A52_3F1R
, A52_3F2R
):
1185 memcpy (samples
+ 1024, samples
+ 768, 256 * sizeof (sample_t
));
1190 static void upmix_MMX (sample_t
* samples
, int acmod
, int output
)
1192 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
1194 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
1195 memcpy (samples
+ 256, samples
, 256 * sizeof (sample_t
));
1198 case CONVERT (A52_3F2R
, A52_MONO
):
1199 zero_MMX (samples
+ 1024);
1200 case CONVERT (A52_3F1R
, A52_MONO
):
1201 case CONVERT (A52_2F2R
, A52_MONO
):
1202 zero_MMX (samples
+ 768);
1203 case CONVERT (A52_3F
, A52_MONO
):
1204 case CONVERT (A52_2F1R
, A52_MONO
):
1205 zero_MMX (samples
+ 512);
1206 case CONVERT (A52_CHANNEL
, A52_MONO
):
1207 case CONVERT (A52_STEREO
, A52_MONO
):
1208 zero_MMX (samples
+ 256);
1211 case CONVERT (A52_3F2R
, A52_STEREO
):
1212 case CONVERT (A52_3F2R
, A52_DOLBY
):
1213 zero_MMX (samples
+ 1024);
1214 case CONVERT (A52_3F1R
, A52_STEREO
):
1215 case CONVERT (A52_3F1R
, A52_DOLBY
):
1216 zero_MMX (samples
+ 768);
1217 case CONVERT (A52_3F
, A52_STEREO
):
1218 case CONVERT (A52_3F
, A52_DOLBY
):
1220 memcpy (samples
+ 512, samples
+ 256, 256 * sizeof (sample_t
));
1221 zero_MMX (samples
+ 256);
1224 case CONVERT (A52_2F2R
, A52_STEREO
):
1225 case CONVERT (A52_2F2R
, A52_DOLBY
):
1226 zero_MMX (samples
+ 768);
1227 case CONVERT (A52_2F1R
, A52_STEREO
):
1228 case CONVERT (A52_2F1R
, A52_DOLBY
):
1229 zero_MMX (samples
+ 512);
1232 case CONVERT (A52_3F2R
, A52_3F
):
1233 zero_MMX (samples
+ 1024);
1234 case CONVERT (A52_3F1R
, A52_3F
):
1235 case CONVERT (A52_2F2R
, A52_2F1R
):
1236 zero_MMX (samples
+ 768);
1239 case CONVERT (A52_3F2R
, A52_3F1R
):
1240 zero_MMX (samples
+ 1024);
1243 case CONVERT (A52_3F2R
, A52_2F1R
):
1244 zero_MMX (samples
+ 1024);
1245 case CONVERT (A52_3F1R
, A52_2F1R
):
1247 memcpy (samples
+ 768, samples
+ 512, 256 * sizeof (sample_t
));
1250 case CONVERT (A52_3F2R
, A52_2F2R
):
1251 memcpy (samples
+ 1024, samples
+ 768, 256 * sizeof (sample_t
));
1252 goto mix_31to21_MMX
;
1256 static void mix2to1_3dnow (sample_t
* dest
, sample_t
* src
, sample_t bias
)
1259 "movd %2, %%mm7 \n\t"
1260 "punpckldq %2, %%mm7 \n\t"
1261 "mov $-1024, %%"REG_S
" \n\t"
1264 "movq (%0, %%"REG_S
"), %%mm0 \n\t"
1265 "movq 8(%0, %%"REG_S
"), %%mm1 \n\t"
1266 "movq 16(%0, %%"REG_S
"), %%mm2 \n\t"
1267 "movq 24(%0, %%"REG_S
"), %%mm3 \n\t"
1268 "pfadd (%1, %%"REG_S
"), %%mm0 \n\t"
1269 "pfadd 8(%1, %%"REG_S
"), %%mm1 \n\t"
1270 "pfadd 16(%1, %%"REG_S
"), %%mm2 \n\t"
1271 "pfadd 24(%1, %%"REG_S
"), %%mm3 \n\t"
1272 "pfadd %%mm7, %%mm0 \n\t"
1273 "pfadd %%mm7, %%mm1 \n\t"
1274 "pfadd %%mm7, %%mm2 \n\t"
1275 "pfadd %%mm7, %%mm3 \n\t"
1276 "movq %%mm0, (%1, %%"REG_S
") \n\t"
1277 "movq %%mm1, 8(%1, %%"REG_S
") \n\t"
1278 "movq %%mm2, 16(%1, %%"REG_S
") \n\t"
1279 "movq %%mm3, 24(%1, %%"REG_S
") \n\t"
1280 "add $32, %%"REG_S
" \n\t"
1282 :: "r" (src
+256), "r" (dest
+256), "m" (bias
)
1287 static void mix3to1_3dnow (sample_t
* samples
, sample_t bias
)
1290 "movd %1, %%mm7 \n\t"
1291 "punpckldq %1, %%mm7 \n\t"
1292 "mov $-1024, %%"REG_S
" \n\t"
1295 "movq (%0, %%"REG_S
"), %%mm0 \n\t"
1296 "movq 8(%0, %%"REG_S
"), %%mm1 \n\t"
1297 "movq 1024(%0, %%"REG_S
"), %%mm2\n\t"
1298 "movq 1032(%0, %%"REG_S
"), %%mm3\n\t"
1299 "pfadd 2048(%0, %%"REG_S
"), %%mm0\n\t"
1300 "pfadd 2056(%0, %%"REG_S
"), %%mm1\n\t"
1301 "pfadd %%mm7, %%mm0 \n\t"
1302 "pfadd %%mm7, %%mm1 \n\t"
1303 "pfadd %%mm2, %%mm0 \n\t"
1304 "pfadd %%mm3, %%mm1 \n\t"
1305 "movq %%mm0, (%0, %%"REG_S
") \n\t"
1306 "movq %%mm1, 8(%0, %%"REG_S
") \n\t"
1307 "add $16, %%"REG_S
" \n\t"
1309 :: "r" (samples
+256), "m" (bias
)
1314 static void mix4to1_3dnow (sample_t
* samples
, sample_t bias
)
1317 "movd %1, %%mm7 \n\t"
1318 "punpckldq %1, %%mm7 \n\t"
1319 "mov $-1024, %%"REG_S
" \n\t"
1322 "movq (%0, %%"REG_S
"), %%mm0 \n\t"
1323 "movq 8(%0, %%"REG_S
"), %%mm1 \n\t"
1324 "movq 1024(%0, %%"REG_S
"), %%mm2\n\t"
1325 "movq 1032(%0, %%"REG_S
"), %%mm3\n\t"
1326 "pfadd 2048(%0, %%"REG_S
"), %%mm0\n\t"
1327 "pfadd 2056(%0, %%"REG_S
"), %%mm1\n\t"
1328 "pfadd 3072(%0, %%"REG_S
"), %%mm2\n\t"
1329 "pfadd 3080(%0, %%"REG_S
"), %%mm3\n\t"
1330 "pfadd %%mm7, %%mm0 \n\t"
1331 "pfadd %%mm7, %%mm1 \n\t"
1332 "pfadd %%mm2, %%mm0 \n\t"
1333 "pfadd %%mm3, %%mm1 \n\t"
1334 "movq %%mm0, (%0, %%"REG_S
") \n\t"
1335 "movq %%mm1, 8(%0, %%"REG_S
") \n\t"
1336 "add $16, %%"REG_S
" \n\t"
1338 :: "r" (samples
+256), "m" (bias
)
1343 static void mix5to1_3dnow (sample_t
* samples
, sample_t bias
)
1346 "movd %1, %%mm7 \n\t"
1347 "punpckldq %1, %%mm7 \n\t"
1348 "mov $-1024, %%"REG_S
" \n\t"
1351 "movq (%0, %%"REG_S
"), %%mm0 \n\t"
1352 "movq 8(%0, %%"REG_S
"), %%mm1 \n\t"
1353 "movq 1024(%0, %%"REG_S
"), %%mm2\n\t"
1354 "movq 1032(%0, %%"REG_S
"), %%mm3\n\t"
1355 "pfadd 2048(%0, %%"REG_S
"), %%mm0\n\t"
1356 "pfadd 2056(%0, %%"REG_S
"), %%mm1\n\t"
1357 "pfadd 3072(%0, %%"REG_S
"), %%mm2\n\t"
1358 "pfadd 3080(%0, %%"REG_S
"), %%mm3\n\t"
1359 "pfadd %%mm7, %%mm0 \n\t"
1360 "pfadd %%mm7, %%mm1 \n\t"
1361 "pfadd 4096(%0, %%"REG_S
"), %%mm2\n\t"
1362 "pfadd 4104(%0, %%"REG_S
"), %%mm3\n\t"
1363 "pfadd %%mm2, %%mm0 \n\t"
1364 "pfadd %%mm3, %%mm1 \n\t"
1365 "movq %%mm0, (%0, %%"REG_S
") \n\t"
1366 "movq %%mm1, 8(%0, %%"REG_S
") \n\t"
1367 "add $16, %%"REG_S
" \n\t"
1369 :: "r" (samples
+256), "m" (bias
)
1374 static void mix3to2_3dnow (sample_t
* samples
, sample_t bias
)
1377 "movd %1, %%mm7 \n\t"
1378 "punpckldq %1, %%mm7 \n\t"
1379 "mov $-1024, %%"REG_S
" \n\t"
1382 "movq 1024(%0, %%"REG_S
"), %%mm0\n\t"
1383 "movq 1032(%0, %%"REG_S
"), %%mm1\n\t"
1384 "pfadd %%mm7, %%mm0 \n\t" //common
1385 "pfadd %%mm7, %%mm1 \n\t" //common
1386 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1387 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1388 "movq 2048(%0, %%"REG_S
"), %%mm4\n\t"
1389 "movq 2056(%0, %%"REG_S
"), %%mm5\n\t"
1390 "pfadd %%mm0, %%mm2 \n\t"
1391 "pfadd %%mm1, %%mm3 \n\t"
1392 "pfadd %%mm0, %%mm4 \n\t"
1393 "pfadd %%mm1, %%mm5 \n\t"
1394 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1395 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1396 "movq %%mm4, 1024(%0, %%"REG_S
")\n\t"
1397 "movq %%mm5, 1032(%0, %%"REG_S
")\n\t"
1398 "add $16, %%"REG_S
" \n\t"
1400 :: "r" (samples
+256), "m" (bias
)
1405 static void mix21to2_3dnow (sample_t
* left
, sample_t
* right
, sample_t bias
)
1408 "movd %2, %%mm7 \n\t"
1409 "punpckldq %2, %%mm7 \n\t"
1410 "mov $-1024, %%"REG_S
" \n\t"
1413 "movq 1024(%1, %%"REG_S
"), %%mm0\n\t"
1414 "movq 1032(%1, %%"REG_S
"), %%mm1\n\t"
1415 "pfadd %%mm7, %%mm0 \n\t" //common
1416 "pfadd %%mm7, %%mm1 \n\t" //common
1417 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1418 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1419 "movq (%1, %%"REG_S
"), %%mm4 \n\t"
1420 "movq 8(%1, %%"REG_S
"), %%mm5 \n\t"
1421 "pfadd %%mm0, %%mm2 \n\t"
1422 "pfadd %%mm1, %%mm3 \n\t"
1423 "pfadd %%mm0, %%mm4 \n\t"
1424 "pfadd %%mm1, %%mm5 \n\t"
1425 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1426 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1427 "movq %%mm4, (%1, %%"REG_S
") \n\t"
1428 "movq %%mm5, 8(%1, %%"REG_S
") \n\t"
1429 "add $16, %%"REG_S
" \n\t"
1431 :: "r" (left
+256), "r" (right
+256), "m" (bias
)
1436 static void mix21toS_3dnow (sample_t
* samples
, sample_t bias
)
1439 "movd %1, %%mm7 \n\t"
1440 "punpckldq %1, %%mm7 \n\t"
1441 "mov $-1024, %%"REG_S
" \n\t"
1444 "movq 2048(%0, %%"REG_S
"), %%mm0\n\t" // surround
1445 "movq 2056(%0, %%"REG_S
"), %%mm1\n\t" // surround
1446 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1447 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1448 "movq 1024(%0, %%"REG_S
"), %%mm4\n\t"
1449 "movq 1032(%0, %%"REG_S
"), %%mm5\n\t"
1450 "pfadd %%mm7, %%mm2 \n\t"
1451 "pfadd %%mm7, %%mm3 \n\t"
1452 "pfadd %%mm7, %%mm4 \n\t"
1453 "pfadd %%mm7, %%mm5 \n\t"
1454 "pfsub %%mm0, %%mm2 \n\t"
1455 "pfsub %%mm1, %%mm3 \n\t"
1456 "pfadd %%mm0, %%mm4 \n\t"
1457 "pfadd %%mm1, %%mm5 \n\t"
1458 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1459 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1460 "movq %%mm4, 1024(%0, %%"REG_S
")\n\t"
1461 "movq %%mm5, 1032(%0, %%"REG_S
")\n\t"
1462 "add $16, %%"REG_S
" \n\t"
1464 :: "r" (samples
+256), "m" (bias
)
1469 static void mix31to2_3dnow (sample_t
* samples
, sample_t bias
)
1472 "movd %1, %%mm7 \n\t"
1473 "punpckldq %1, %%mm7 \n\t"
1474 "mov $-1024, %%"REG_S
" \n\t"
1477 "movq 1024(%0, %%"REG_S
"), %%mm0\n\t"
1478 "movq 1032(%0, %%"REG_S
"), %%mm1\n\t"
1479 "pfadd 3072(%0, %%"REG_S
"), %%mm0\n\t"
1480 "pfadd 3080(%0, %%"REG_S
"), %%mm1\n\t"
1481 "pfadd %%mm7, %%mm0 \n\t" // common
1482 "pfadd %%mm7, %%mm1 \n\t" // common
1483 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1484 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1485 "movq 2048(%0, %%"REG_S
"), %%mm4\n\t"
1486 "movq 2056(%0, %%"REG_S
"), %%mm5\n\t"
1487 "pfadd %%mm0, %%mm2 \n\t"
1488 "pfadd %%mm1, %%mm3 \n\t"
1489 "pfadd %%mm0, %%mm4 \n\t"
1490 "pfadd %%mm1, %%mm5 \n\t"
1491 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1492 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1493 "movq %%mm4, 1024(%0, %%"REG_S
")\n\t"
1494 "movq %%mm5, 1032(%0, %%"REG_S
")\n\t"
1495 "add $16, %%"REG_S
" \n\t"
1497 :: "r" (samples
+256), "m" (bias
)
1502 static void mix31toS_3dnow (sample_t
* samples
, sample_t bias
)
1505 "movd %1, %%mm7 \n\t"
1506 "punpckldq %1, %%mm7 \n\t"
1507 "mov $-1024, %%"REG_S
" \n\t"
1510 "movq 1024(%0, %%"REG_S
"), %%mm0\n\t"
1511 "movq 1032(%0, %%"REG_S
"), %%mm1\n\t"
1512 "pfadd %%mm7, %%mm0 \n\t" // common
1513 "pfadd %%mm7, %%mm1 \n\t" // common
1514 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1515 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1516 "movq 2048(%0, %%"REG_S
"), %%mm4\n\t"
1517 "movq 2056(%0, %%"REG_S
"), %%mm5\n\t"
1518 "pfadd %%mm0, %%mm2 \n\t"
1519 "pfadd %%mm1, %%mm3 \n\t"
1520 "pfadd %%mm0, %%mm4 \n\t"
1521 "pfadd %%mm1, %%mm5 \n\t"
1522 "movq 3072(%0, %%"REG_S
"), %%mm0\n\t" // surround
1523 "movq 3080(%0, %%"REG_S
"), %%mm1\n\t" // surround
1524 "pfsub %%mm0, %%mm2 \n\t"
1525 "pfsub %%mm1, %%mm3 \n\t"
1526 "pfadd %%mm0, %%mm4 \n\t"
1527 "pfadd %%mm1, %%mm5 \n\t"
1528 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1529 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1530 "movq %%mm4, 1024(%0, %%"REG_S
")\n\t"
1531 "movq %%mm5, 1032(%0, %%"REG_S
")\n\t"
1532 "add $16, %%"REG_S
" \n\t"
1534 :: "r" (samples
+256), "m" (bias
)
1539 static void mix22toS_3dnow (sample_t
* samples
, sample_t bias
)
1542 "movd %1, %%mm7 \n\t"
1543 "punpckldq %1, %%mm7 \n\t"
1544 "mov $-1024, %%"REG_S
" \n\t"
1547 "movq 2048(%0, %%"REG_S
"), %%mm0\n\t"
1548 "movq 2056(%0, %%"REG_S
"), %%mm1\n\t"
1549 "pfadd 3072(%0, %%"REG_S
"), %%mm0\n\t" // surround
1550 "pfadd 3080(%0, %%"REG_S
"), %%mm1\n\t" // surround
1551 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1552 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1553 "movq 1024(%0, %%"REG_S
"), %%mm4\n\t"
1554 "movq 1032(%0, %%"REG_S
"), %%mm5\n\t"
1555 "pfadd %%mm7, %%mm2 \n\t"
1556 "pfadd %%mm7, %%mm3 \n\t"
1557 "pfadd %%mm7, %%mm4 \n\t"
1558 "pfadd %%mm7, %%mm5 \n\t"
1559 "pfsub %%mm0, %%mm2 \n\t"
1560 "pfsub %%mm1, %%mm3 \n\t"
1561 "pfadd %%mm0, %%mm4 \n\t"
1562 "pfadd %%mm1, %%mm5 \n\t"
1563 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1564 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1565 "movq %%mm4, 1024(%0, %%"REG_S
")\n\t"
1566 "movq %%mm5, 1032(%0, %%"REG_S
")\n\t"
1567 "add $16, %%"REG_S
" \n\t"
1569 :: "r" (samples
+256), "m" (bias
)
1574 static void mix32to2_3dnow (sample_t
* samples
, sample_t bias
)
1577 "movd %1, %%mm7 \n\t"
1578 "punpckldq %1, %%mm7 \n\t"
1579 "mov $-1024, %%"REG_S
" \n\t"
1582 "movq 1024(%0, %%"REG_S
"), %%mm0\n\t"
1583 "movq 1032(%0, %%"REG_S
"), %%mm1\n\t"
1584 "pfadd %%mm7, %%mm0 \n\t" // common
1585 "pfadd %%mm7, %%mm1 \n\t" // common
1586 "movq %%mm0, %%mm2 \n\t" // common
1587 "movq %%mm1, %%mm3 \n\t" // common
1588 "pfadd (%0, %%"REG_S
"), %%mm0 \n\t"
1589 "pfadd 8(%0, %%"REG_S
"), %%mm1 \n\t"
1590 "pfadd 2048(%0, %%"REG_S
"), %%mm2\n\t"
1591 "pfadd 2056(%0, %%"REG_S
"), %%mm3\n\t"
1592 "pfadd 3072(%0, %%"REG_S
"), %%mm0\n\t"
1593 "pfadd 3080(%0, %%"REG_S
"), %%mm1\n\t"
1594 "pfadd 4096(%0, %%"REG_S
"), %%mm2\n\t"
1595 "pfadd 4104(%0, %%"REG_S
"), %%mm3\n\t"
1596 "movq %%mm0, (%0, %%"REG_S
") \n\t"
1597 "movq %%mm1, 8(%0, %%"REG_S
") \n\t"
1598 "movq %%mm2, 1024(%0, %%"REG_S
")\n\t"
1599 "movq %%mm3, 1032(%0, %%"REG_S
")\n\t"
1600 "add $16, %%"REG_S
" \n\t"
1602 :: "r" (samples
+256), "m" (bias
)
1607 /* todo: should be optimized better */
1608 static void mix32toS_3dnow (sample_t
* samples
, sample_t bias
)
1611 "mov $-1024, %%"REG_S
" \n\t"
1614 "movd %1, %%mm7 \n\t"
1615 "punpckldq %1, %%mm7 \n\t"
1616 "movq 1024(%0, %%"REG_S
"), %%mm0\n\t"
1617 "movq 1032(%0, %%"REG_S
"), %%mm1\n\t"
1618 "movq 3072(%0, %%"REG_S
"), %%mm4\n\t"
1619 "movq 3080(%0, %%"REG_S
"), %%mm5\n\t"
1620 "pfadd %%mm7, %%mm0 \n\t" // common
1621 "pfadd %%mm7, %%mm1 \n\t" // common
1622 "pfadd 4096(%0, %%"REG_S
"), %%mm4\n\t" // surround
1623 "pfadd 4104(%0, %%"REG_S
"), %%mm5\n\t" // surround
1624 "movq (%0, %%"REG_S
"), %%mm2 \n\t"
1625 "movq 8(%0, %%"REG_S
"), %%mm3 \n\t"
1626 "movq 2048(%0, %%"REG_S
"), %%mm6\n\t"
1627 "movq 2056(%0, %%"REG_S
"), %%mm7\n\t"
1628 "pfsub %%mm4, %%mm2 \n\t"
1629 "pfsub %%mm5, %%mm3 \n\t"
1630 "pfadd %%mm4, %%mm6 \n\t"
1631 "pfadd %%mm5, %%mm7 \n\t"
1632 "pfadd %%mm0, %%mm2 \n\t"
1633 "pfadd %%mm1, %%mm3 \n\t"
1634 "pfadd %%mm0, %%mm6 \n\t"
1635 "pfadd %%mm1, %%mm7 \n\t"
1636 "movq %%mm2, (%0, %%"REG_S
") \n\t"
1637 "movq %%mm3, 8(%0, %%"REG_S
") \n\t"
1638 "movq %%mm6, 1024(%0, %%"REG_S
")\n\t"
1639 "movq %%mm7, 1032(%0, %%"REG_S
")\n\t"
1640 "add $16, %%"REG_S
" \n\t"
1642 :: "r" (samples
+256), "m" (bias
)
1647 static void move2to1_3dnow (sample_t
* src
, sample_t
* dest
, sample_t bias
)
1650 "movd %2, %%mm7 \n\t"
1651 "punpckldq %2, %%mm7 \n\t"
1652 "mov $-1024, %%"REG_S
" \n\t"
1655 "movq (%0, %%"REG_S
"), %%mm0 \n\t"
1656 "movq 8(%0, %%"REG_S
"), %%mm1 \n\t"
1657 "movq 16(%0, %%"REG_S
"), %%mm2 \n\t"
1658 "movq 24(%0, %%"REG_S
"), %%mm3 \n\t"
1659 "pfadd 1024(%0, %%"REG_S
"), %%mm0\n\t"
1660 "pfadd 1032(%0, %%"REG_S
"), %%mm1\n\t"
1661 "pfadd 1040(%0, %%"REG_S
"), %%mm2\n\t"
1662 "pfadd 1048(%0, %%"REG_S
"), %%mm3\n\t"
1663 "pfadd %%mm7, %%mm0 \n\t"
1664 "pfadd %%mm7, %%mm1 \n\t"
1665 "pfadd %%mm7, %%mm2 \n\t"
1666 "pfadd %%mm7, %%mm3 \n\t"
1667 "movq %%mm0, (%1, %%"REG_S
") \n\t"
1668 "movq %%mm1, 8(%1, %%"REG_S
") \n\t"
1669 "movq %%mm2, 16(%1, %%"REG_S
") \n\t"
1670 "movq %%mm3, 24(%1, %%"REG_S
") \n\t"
1671 "add $32, %%"REG_S
" \n\t"
1673 :: "r" (src
+256), "r" (dest
+256), "m" (bias
)
1678 static void downmix_3dnow (sample_t
* samples
, int acmod
, int output
, sample_t bias
,
1679 sample_t clev
, sample_t slev
)
1681 switch (CONVERT (acmod
, output
& A52_CHANNEL_MASK
)) {
1683 case CONVERT (A52_CHANNEL
, A52_CHANNEL2
):
1684 memcpy (samples
, samples
+ 256, 256 * sizeof (sample_t
));
1687 case CONVERT (A52_CHANNEL
, A52_MONO
):
1688 case CONVERT (A52_STEREO
, A52_MONO
):
1690 mix2to1_3dnow (samples
, samples
+ 256, bias
);
1693 case CONVERT (A52_2F1R
, A52_MONO
):
1695 goto mix_2to1_3dnow
;
1696 case CONVERT (A52_3F
, A52_MONO
):
1698 mix3to1_3dnow (samples
, bias
);
1701 case CONVERT (A52_3F1R
, A52_MONO
):
1703 goto mix_3to1_3dnow
;
1704 case CONVERT (A52_2F2R
, A52_MONO
):
1706 goto mix_2to1_3dnow
;
1707 mix4to1_3dnow (samples
, bias
);
1710 case CONVERT (A52_3F2R
, A52_MONO
):
1712 goto mix_3to1_3dnow
;
1713 mix5to1_3dnow (samples
, bias
);
1716 case CONVERT (A52_MONO
, A52_DOLBY
):
1717 memcpy (samples
+ 256, samples
, 256 * sizeof (sample_t
));
1720 case CONVERT (A52_3F
, A52_STEREO
):
1721 case CONVERT (A52_3F
, A52_DOLBY
):
1723 mix3to2_3dnow (samples
, bias
);
1726 case CONVERT (A52_2F1R
, A52_STEREO
):
1729 mix21to2_3dnow (samples
, samples
+ 256, bias
);
1732 case CONVERT (A52_2F1R
, A52_DOLBY
):
1733 mix21toS_3dnow (samples
, bias
);
1736 case CONVERT (A52_3F1R
, A52_STEREO
):
1738 goto mix_3to2_3dnow
;
1739 mix31to2_3dnow (samples
, bias
);
1742 case CONVERT (A52_3F1R
, A52_DOLBY
):
1743 mix31toS_3dnow (samples
, bias
);
1746 case CONVERT (A52_2F2R
, A52_STEREO
):
1749 mix2to1_3dnow (samples
, samples
+ 512, bias
);
1750 mix2to1_3dnow (samples
+ 256, samples
+ 768, bias
);
1753 case CONVERT (A52_2F2R
, A52_DOLBY
):
1754 mix22toS_3dnow (samples
, bias
);
1757 case CONVERT (A52_3F2R
, A52_STEREO
):
1759 goto mix_3to2_3dnow
;
1760 mix32to2_3dnow (samples
, bias
);
1763 case CONVERT (A52_3F2R
, A52_DOLBY
):
1764 mix32toS_3dnow (samples
, bias
);
1767 case CONVERT (A52_3F1R
, A52_3F
):
1770 mix21to2_3dnow (samples
, samples
+ 512, bias
);
1773 case CONVERT (A52_3F2R
, A52_3F
):
1776 mix2to1_3dnow (samples
, samples
+ 768, bias
);
1777 mix2to1_3dnow (samples
+ 512, samples
+ 1024, bias
);
1780 case CONVERT (A52_3F1R
, A52_2F1R
):
1781 mix3to2_3dnow (samples
, bias
);
1782 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1785 case CONVERT (A52_2F2R
, A52_2F1R
):
1786 mix2to1_3dnow (samples
+ 512, samples
+ 768, bias
);
1789 case CONVERT (A52_3F2R
, A52_2F1R
):
1790 mix3to2_3dnow (samples
, bias
); //FIXME possible bug? (output doesnt seem to be used)
1791 move2to1_3dnow (samples
+ 768, samples
+ 512, bias
);
1794 case CONVERT (A52_3F2R
, A52_3F1R
):
1795 mix2to1_3dnow (samples
+ 768, samples
+ 1024, bias
);
1798 case CONVERT (A52_2F1R
, A52_2F2R
):
1799 memcpy (samples
+ 768, samples
+ 512, 256 * sizeof (sample_t
));
1802 case CONVERT (A52_3F1R
, A52_2F2R
):
1803 mix3to2_3dnow (samples
, bias
);
1804 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1807 case CONVERT (A52_3F2R
, A52_2F2R
):
1808 mix3to2_3dnow (samples
, bias
);
1809 memcpy (samples
+ 512, samples
+ 768, 256 * sizeof (sample_t
));
1810 memcpy (samples
+ 768, samples
+ 1024, 256 * sizeof (sample_t
));
1813 case CONVERT (A52_3F1R
, A52_3F2R
):
1814 memcpy (samples
+ 1024, samples
+ 768, 256 * sizeof (sample_t
));
1817 __asm
__volatile("femms":::"memory");
1820 #endif // ARCH_X86 || ARCH_X86_64