libmwapro: use codeclib asm macros for XNPROD31, speeds up decoding of a 55kbps wma...
[maemo-rb.git] / apps / codecs / libwmapro / wmapro_mdct.c
blob3f5439e5f318a0c8ee1681e8125ba28f44144e2f
1 #include <inttypes.h>
2 #include "wmapro_mdct.h"
3 #include "mdct_tables.h" /* for sincos_lookup_wmap */
4 #include "../lib/mdct_lookup.h" /* for revtab */
5 #include "../lib/fft.h" /* for FFT data structures */
6 #include "codeclib.h"
8 /* for XNPROD31 */
9 #include "asm_arm.h"
10 #include "asm_mcf5249.h"
11 #include "codeclib_misc.h"
13 #include "wmapro_math.h"
15 void imdct_half(unsigned int nbits, int32_t *output, const int32_t *input){
16 int k, n8, n4, n2, n, j;
17 const int32_t *in1, *in2;
18 FFTComplex *z = (FFTComplex *)output;
20 n = 1 << nbits;
21 n2 = n >> 1;
22 n4 = n >> 2;
23 n8 = n >> 3;
25 const int32_t *T = sincos_lookup_wmap + ((n2) - (1<<7));
27 /* pre rotation */
28 const int revtab_shift = (14- nbits);
29 in1 = input;
30 in2 = input + n2 - 1;
31 for(k = 0; k < n4; k++) {
32 j=revtab[k]>>revtab_shift;
33 XNPROD31(*in2<<2, *in1<<2, T[1]<<14, T[0]<<14, &z[j].re, &z[j].im );
34 in1 += 2;
35 in2 -= 2;
36 T += 2;
39 ff_fft_calc_c(nbits-2, z);
41 /* post rotation + reordering */
42 T = sincos_lookup_wmap + ((n2) - (1<<7)) + n4;
43 const int32_t *V = T;
44 for(k = 0; k < n8; k++) {
45 int32_t r0, i0, r1, i1;
46 XNPROD31(z[n8-k-1].im, z[n8-k-1].re, T[0]<<8, T[1]<<8, &r0, &i1 );
47 XNPROD31(z[n8+k ].im, z[n8+k ].re, V[0]<<8, V[1]<<8, &r1, &i0 );
48 z[n8-k-1].re = r0;
49 z[n8-k-1].im = i0;
50 z[n8+k ].re = r1;
51 z[n8+k ].im = i1;
52 T-=2;
53 V+=2;