1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
39 #include "mdct_lookup.h"
40 #include <codecs/lib/codeclib.h>
42 #if defined(CPU_ARM) && CONFIG_CPU != S3C2440
43 /* C code is faster on S3C2440 */
45 extern void mdct_butterfly_32(int32_t *x
);
46 extern void mdct_butterfly_generic_loop(int32_t *x1
, int32_t *x2
,
47 const int32_t *T0
, int step
,
50 static inline void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
51 mdct_butterfly_generic_loop(x
+ points
, x
+ (points
>>1), sincos_lookup0
, step
, sincos_lookup0
+1024);
56 /* 8 point butterfly (in place) */
57 static inline void mdct_butterfly_8(int32_t *x
){
58 register int32_t r0
= x
[4] + x
[0];
59 register int32_t r1
= x
[4] - x
[0];
60 register int32_t r2
= x
[5] + x
[1];
61 register int32_t r3
= x
[5] - x
[1];
62 register int32_t r4
= x
[6] + x
[2];
63 register int32_t r5
= x
[6] - x
[2];
64 register int32_t r6
= x
[7] + x
[3];
65 register int32_t r7
= x
[7] - x
[3];
78 /* 16 point butterfly (in place, 4 register) */
79 static inline void mdct_butterfly_16(int32_t *x
){
81 register int32_t r0
, r1
;
83 r0
= x
[ 0] - x
[ 8]; x
[ 8] += x
[ 0];
84 r1
= x
[ 1] - x
[ 9]; x
[ 9] += x
[ 1];
85 x
[ 0] = MULT31((r0
+ r1
) , cPI2_8
);
86 x
[ 1] = MULT31((r1
- r0
) , cPI2_8
);
89 r0
= x
[10] - x
[ 2]; x
[10] += x
[ 2];
90 r1
= x
[ 3] - x
[11]; x
[11] += x
[ 3];
91 x
[ 2] = r1
; x
[ 3] = r0
;
94 r0
= x
[12] - x
[ 4]; x
[12] += x
[ 4];
95 r1
= x
[13] - x
[ 5]; x
[13] += x
[ 5];
96 x
[ 4] = MULT31((r0
- r1
) , cPI2_8
);
97 x
[ 5] = MULT31((r0
+ r1
) , cPI2_8
);
100 r0
= x
[14] - x
[ 6]; x
[14] += x
[ 6];
101 r1
= x
[15] - x
[ 7]; x
[15] += x
[ 7];
102 x
[ 6] = r0
; x
[ 7] = r1
;
106 mdct_butterfly_8(x
+8);
109 /* 32 point butterfly (in place, 4 register) */
110 static inline void mdct_butterfly_32(int32_t *x
){
112 register int32_t r0
, r1
;
114 r0
= x
[30] - x
[14]; x
[30] += x
[14];
115 r1
= x
[31] - x
[15]; x
[31] += x
[15];
116 x
[14] = r0
; x
[15] = r1
;
119 r0
= x
[28] - x
[12]; x
[28] += x
[12];
120 r1
= x
[29] - x
[13]; x
[29] += x
[13];
121 XNPROD31( r0
, r1
, cPI1_8
, cPI3_8
, &x
[12], &x
[13] );
124 r0
= x
[26] - x
[10]; x
[26] += x
[10];
125 r1
= x
[27] - x
[11]; x
[27] += x
[11];
126 x
[10] = MULT31((r0
- r1
) , cPI2_8
);
127 x
[11] = MULT31((r0
+ r1
) , cPI2_8
);
130 r0
= x
[24] - x
[ 8]; x
[24] += x
[ 8];
131 r1
= x
[25] - x
[ 9]; x
[25] += x
[ 9];
132 XNPROD31( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 8], &x
[ 9] );
135 r0
= x
[22] - x
[ 6]; x
[22] += x
[ 6];
136 r1
= x
[ 7] - x
[23]; x
[23] += x
[ 7];
137 x
[ 6] = r1
; x
[ 7] = r0
;
140 r0
= x
[ 4] - x
[20]; x
[20] += x
[ 4];
141 r1
= x
[ 5] - x
[21]; x
[21] += x
[ 5];
142 XPROD31 ( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 4], &x
[ 5] );
145 r0
= x
[ 2] - x
[18]; x
[18] += x
[ 2];
146 r1
= x
[ 3] - x
[19]; x
[19] += x
[ 3];
147 x
[ 2] = MULT31((r1
+ r0
) , cPI2_8
);
148 x
[ 3] = MULT31((r1
- r0
) , cPI2_8
);
151 r0
= x
[ 0] - x
[16]; x
[16] += x
[ 0];
152 r1
= x
[ 1] - x
[17]; x
[17] += x
[ 1];
153 XPROD31 ( r0
, r1
, cPI1_8
, cPI3_8
, &x
[ 0], &x
[ 1] );
156 mdct_butterfly_16(x
);
157 mdct_butterfly_16(x
+16);
160 /* N/stage point generic N stage butterfly (in place, 4 register) */
161 void mdct_butterfly_generic(int32_t *x
,int points
, int step
)
162 ICODE_ATTR_TREMOR_MDCT
;
163 void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
164 const int32_t *T
= sincos_lookup0
;
165 int32_t *x1
= x
+ points
- 8;
166 int32_t *x2
= x
+ (points
>>1) - 8;
173 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
174 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
175 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
176 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
177 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
178 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
180 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
181 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
182 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
183 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
184 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
185 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
188 }while(T
<sincos_lookup0
+1024);
190 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
191 r1
= x1
[7] - x2
[7]; x1
[7] += x2
[7];
192 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
193 r3
= x1
[5] - x2
[5]; x1
[5] += x2
[5];
194 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
195 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
197 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
198 r1
= x1
[3] - x2
[3]; x1
[3] += x2
[3];
199 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
200 r3
= x1
[1] - x2
[1]; x1
[1] += x2
[1];
201 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
202 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
205 }while(T
>sincos_lookup0
);
207 r0
= x2
[6] - x1
[6]; x1
[6] += x2
[6];
208 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
209 r2
= x2
[4] - x1
[4]; x1
[4] += x2
[4];
210 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
211 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
212 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
214 r0
= x2
[2] - x1
[2]; x1
[2] += x2
[2];
215 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
216 r2
= x2
[0] - x1
[0]; x1
[0] += x2
[0];
217 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
218 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
219 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
222 }while(T
<sincos_lookup0
+1024);
224 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
225 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
226 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
227 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
228 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
229 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
231 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
232 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
233 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
234 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
235 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
236 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
239 }while(T
>sincos_lookup0
);
244 static inline void mdct_butterflies(int32_t *x
,int points
,int shift
) {
249 for(i
=0;--stages
>0;i
++){
250 for(j
=0;j
<(1<<i
);j
++)
251 mdct_butterfly_generic(x
+(points
>>i
)*j
,points
>>i
,4<<(i
+shift
));
254 for(j
=0;j
<points
;j
+=32)
255 mdct_butterfly_32(x
+j
);
259 static const unsigned char bitrev
[16] ICONST_ATTR
=
260 {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
262 static inline int bitrev12(int x
){
263 return bitrev
[x
>>8]|(bitrev
[(x
&0x0f0)>>4]<<4)|(((int)bitrev
[x
&0x00f])<<8);
266 static inline void mdct_bitreverse(int32_t *x
,int n
,int step
,int shift
) {
270 int32_t *w1
= x
= w0
+(n
>>1);
271 const int32_t *T
= (step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
272 const int32_t *Ttop
= T
+1024;
276 register int32_t r3
= bitrev12(bit
++);
277 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
278 int32_t *x1
= x
+ (r3
>>shift
);
280 register int32_t r0
= x0
[0] + x1
[0];
281 register int32_t r1
= x1
[1] - x0
[1];
283 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
287 r0
= (x0
[1] + x1
[1])>>1;
288 r1
= (x0
[0] - x1
[0])>>1;
294 r3
= bitrev12(bit
++);
295 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
296 x1
= x
+ (r3
>>shift
);
301 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
303 r0
= (x0
[1] + x1
[1])>>1;
304 r1
= (x0
[0] - x1
[0])>>1;
313 register int32_t r3
= bitrev12(bit
++);
314 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
315 int32_t *x1
= x
+ (r3
>>shift
);
317 register int32_t r0
= x0
[0] + x1
[0];
318 register int32_t r1
= x1
[1] - x0
[1];
320 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
324 r0
= (x0
[1] + x1
[1])>>1;
325 r1
= (x0
[0] - x1
[0])>>1;
331 r3
= bitrev12(bit
++);
332 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
333 x1
= x
+ (r3
>>shift
);
338 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
340 r0
= (x0
[1] + x1
[1])>>1;
341 r1
= (x0
[0] - x1
[0])>>1;
352 void mdct_backward(int n
, int32_t *in
, int32_t *out
)
353 ICODE_ATTR_TREMOR_MDCT
;
354 void mdct_backward(int n
, int32_t *in
, int32_t *out
) {
363 for (shift
=6;!(n
&(1<<shift
));shift
++);
375 XPROD31( iX
[4], iX
[6], T
[0], T
[1], &oX
[2], &oX
[3] ); T
+=step
;
376 XPROD31( iX
[0], iX
[2], T
[0], T
[1], &oX
[0], &oX
[1] ); T
+=step
;
381 XPROD31( iX
[4], iX
[6], T
[1], T
[0], &oX
[2], &oX
[3] ); T
-=step
;
382 XPROD31( iX
[0], iX
[2], T
[1], T
[0], &oX
[0], &oX
[1] ); T
-=step
;
391 T
+=step
; XNPROD31( iX
[6], iX
[4], T
[0], T
[1], &oX
[0], &oX
[1] );
392 T
+=step
; XNPROD31( iX
[2], iX
[0], T
[0], T
[1], &oX
[2], &oX
[3] );
397 T
-=step
; XNPROD31( iX
[6], iX
[4], T
[1], T
[0], &oX
[0], &oX
[1] );
398 T
-=step
; XNPROD31( iX
[2], iX
[0], T
[1], T
[0], &oX
[2], &oX
[3] );
403 mdct_butterflies(out
+n2
,n2
,shift
);
404 mdct_bitreverse(out
,n
,step
,shift
);
405 /* rotate + window */
409 int32_t *oX1
=out
+n2
+n4
;
410 int32_t *oX2
=out
+n2
+n4
;
415 T
=(step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
418 XPROD31( iX
[0], -iX
[1], T
[0], T
[1], &oX1
[3], &oX2
[0] ); T
+=step
;
419 XPROD31( iX
[2], -iX
[3], T
[0], T
[1], &oX1
[2], &oX2
[1] ); T
+=step
;
420 XPROD31( iX
[4], -iX
[5], T
[0], T
[1], &oX1
[1], &oX2
[2] ); T
+=step
;
421 XPROD31( iX
[6], -iX
[7], T
[0], T
[1], &oX1
[0], &oX2
[3] ); T
+=step
;
429 /* linear interpolation between table values: offset=0.5, step=1 */
430 register int32_t t0
,t1
,v0
,v1
;
438 t0
+= (v0
= (*V
++)>>1);
439 t1
+= (v1
= (*V
++)>>1);
440 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
441 v0
+= (t0
= (*T
++)>>1);
442 v1
+= (t1
= (*T
++)>>1);
443 XPROD31( iX
[2], -iX
[3], v0
, v1
, &oX1
[2], &oX2
[1] );
444 t0
+= (v0
= (*V
++)>>1);
445 t1
+= (v1
= (*V
++)>>1);
446 XPROD31( iX
[4], -iX
[5], t0
, t1
, &oX1
[1], &oX2
[2] );
447 v0
+= (t0
= (*T
++)>>1);
448 v1
+= (t1
= (*T
++)>>1);
449 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
458 /* linear interpolation between table values: offset=0.25, step=0.5 */
459 register int32_t t0
,t1
,v0
,v1
,q0
,q1
;
469 t0
+= (q0
= (v0
-t0
)>>2);
470 t1
+= (q1
= (v1
-t1
)>>2);
471 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
474 XPROD31( iX
[2], -iX
[3], t0
, t1
, &oX1
[2], &oX2
[1] );
478 v0
+= (q0
= (t0
-v0
)>>2);
479 v1
+= (q1
= (t1
-v1
)>>2);
480 XPROD31( iX
[4], -iX
[5], v0
, v1
, &oX1
[1], &oX2
[2] );
483 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
500 oX2
[0] = -(oX1
[3] = iX
[3]);
501 oX2
[1] = -(oX1
[2] = iX
[2]);
502 oX2
[2] = -(oX1
[1] = iX
[1]);
503 oX2
[3] = -(oX1
[0] = iX
[0]);