1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
39 #include "mdct_lookup.h"
41 #include <codecs/lib/codeclib.h>
44 #if defined(CPU_ARM) && CONFIG_CPU != S3C2440
45 /* C code is faster on S3C2440 */
47 extern void mdct_butterfly_32(int32_t *x
);
48 extern void mdct_butterfly_generic_loop(int32_t *x1
, int32_t *x2
,
49 const int32_t *T0
, int step
,
52 static inline void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
53 mdct_butterfly_generic_loop(x
+ points
, x
+ (points
>>1), sincos_lookup0
, step
, sincos_lookup0
+1024);
58 /* 8 point butterfly (in place) */
59 static inline void mdct_butterfly_8(int32_t *x
){
60 register int32_t r0
= x
[4] + x
[0];
61 register int32_t r1
= x
[4] - x
[0];
62 register int32_t r2
= x
[5] + x
[1];
63 register int32_t r3
= x
[5] - x
[1];
64 register int32_t r4
= x
[6] + x
[2];
65 register int32_t r5
= x
[6] - x
[2];
66 register int32_t r6
= x
[7] + x
[3];
67 register int32_t r7
= x
[7] - x
[3];
80 /* 16 point butterfly (in place, 4 register) */
81 static inline void mdct_butterfly_16(int32_t *x
){
83 register int32_t r0
, r1
;
85 r0
= x
[ 0] - x
[ 8]; x
[ 8] += x
[ 0];
86 r1
= x
[ 1] - x
[ 9]; x
[ 9] += x
[ 1];
87 x
[ 0] = MULT31((r0
+ r1
) , cPI2_8
);
88 x
[ 1] = MULT31((r1
- r0
) , cPI2_8
);
91 r0
= x
[10] - x
[ 2]; x
[10] += x
[ 2];
92 r1
= x
[ 3] - x
[11]; x
[11] += x
[ 3];
93 x
[ 2] = r1
; x
[ 3] = r0
;
96 r0
= x
[12] - x
[ 4]; x
[12] += x
[ 4];
97 r1
= x
[13] - x
[ 5]; x
[13] += x
[ 5];
98 x
[ 4] = MULT31((r0
- r1
) , cPI2_8
);
99 x
[ 5] = MULT31((r0
+ r1
) , cPI2_8
);
102 r0
= x
[14] - x
[ 6]; x
[14] += x
[ 6];
103 r1
= x
[15] - x
[ 7]; x
[15] += x
[ 7];
104 x
[ 6] = r0
; x
[ 7] = r1
;
108 mdct_butterfly_8(x
+8);
111 /* 32 point butterfly (in place, 4 register) */
112 static inline void mdct_butterfly_32(int32_t *x
){
114 register int32_t r0
, r1
;
116 r0
= x
[30] - x
[14]; x
[30] += x
[14];
117 r1
= x
[31] - x
[15]; x
[31] += x
[15];
118 x
[14] = r0
; x
[15] = r1
;
121 r0
= x
[28] - x
[12]; x
[28] += x
[12];
122 r1
= x
[29] - x
[13]; x
[29] += x
[13];
123 XNPROD31( r0
, r1
, cPI1_8
, cPI3_8
, &x
[12], &x
[13] );
126 r0
= x
[26] - x
[10]; x
[26] += x
[10];
127 r1
= x
[27] - x
[11]; x
[27] += x
[11];
128 x
[10] = MULT31((r0
- r1
) , cPI2_8
);
129 x
[11] = MULT31((r0
+ r1
) , cPI2_8
);
132 r0
= x
[24] - x
[ 8]; x
[24] += x
[ 8];
133 r1
= x
[25] - x
[ 9]; x
[25] += x
[ 9];
134 XNPROD31( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 8], &x
[ 9] );
137 r0
= x
[22] - x
[ 6]; x
[22] += x
[ 6];
138 r1
= x
[ 7] - x
[23]; x
[23] += x
[ 7];
139 x
[ 6] = r1
; x
[ 7] = r0
;
142 r0
= x
[ 4] - x
[20]; x
[20] += x
[ 4];
143 r1
= x
[ 5] - x
[21]; x
[21] += x
[ 5];
144 XPROD31 ( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 4], &x
[ 5] );
147 r0
= x
[ 2] - x
[18]; x
[18] += x
[ 2];
148 r1
= x
[ 3] - x
[19]; x
[19] += x
[ 3];
149 x
[ 2] = MULT31((r1
+ r0
) , cPI2_8
);
150 x
[ 3] = MULT31((r1
- r0
) , cPI2_8
);
153 r0
= x
[ 0] - x
[16]; x
[16] += x
[ 0];
154 r1
= x
[ 1] - x
[17]; x
[17] += x
[ 1];
155 XPROD31 ( r0
, r1
, cPI1_8
, cPI3_8
, &x
[ 0], &x
[ 1] );
158 mdct_butterfly_16(x
);
159 mdct_butterfly_16(x
+16);
162 /* N/stage point generic N stage butterfly (in place, 4 register) */
163 void mdct_butterfly_generic(int32_t *x
,int points
, int step
)
164 ICODE_ATTR_TREMOR_MDCT
;
165 void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
166 const int32_t *T
= sincos_lookup0
;
167 int32_t *x1
= x
+ points
- 8;
168 int32_t *x2
= x
+ (points
>>1) - 8;
175 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
176 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
177 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
178 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
179 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
180 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
182 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
183 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
184 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
185 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
186 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
187 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
190 }while(T
<sincos_lookup0
+1024);
192 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
193 r1
= x1
[7] - x2
[7]; x1
[7] += x2
[7];
194 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
195 r3
= x1
[5] - x2
[5]; x1
[5] += x2
[5];
196 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
197 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
199 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
200 r1
= x1
[3] - x2
[3]; x1
[3] += x2
[3];
201 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
202 r3
= x1
[1] - x2
[1]; x1
[1] += x2
[1];
203 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
204 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
207 }while(T
>sincos_lookup0
);
209 r0
= x2
[6] - x1
[6]; x1
[6] += x2
[6];
210 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
211 r2
= x2
[4] - x1
[4]; x1
[4] += x2
[4];
212 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
213 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
214 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
216 r0
= x2
[2] - x1
[2]; x1
[2] += x2
[2];
217 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
218 r2
= x2
[0] - x1
[0]; x1
[0] += x2
[0];
219 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
220 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
221 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
224 }while(T
<sincos_lookup0
+1024);
226 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
227 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
228 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
229 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
230 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
231 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
233 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
234 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
235 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
236 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
237 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
238 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
241 }while(T
>sincos_lookup0
);
246 static inline void mdct_butterflies(int32_t *x
,int points
,int shift
) {
251 for(i
=0;--stages
>0;i
++){
252 for(j
=0;j
<(1<<i
);j
++)
253 mdct_butterfly_generic(x
+(points
>>i
)*j
,points
>>i
,4<<(i
+shift
));
256 for(j
=0;j
<points
;j
+=32)
257 mdct_butterfly_32(x
+j
);
261 static const unsigned char bitrev
[16] ICONST_ATTR
=
262 {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
264 static inline int bitrev12(int x
){
265 return bitrev
[x
>>8]|(bitrev
[(x
&0x0f0)>>4]<<4)|(((int)bitrev
[x
&0x00f])<<8);
268 static inline void mdct_bitreverse(int32_t *x
,int n
,int step
,int shift
) {
272 int32_t *w1
= x
= w0
+(n
>>1);
273 const int32_t *T
= (step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
274 const int32_t *Ttop
= T
+1024;
278 register int32_t r3
= bitrev12(bit
++);
279 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
280 int32_t *x1
= x
+ (r3
>>shift
);
282 register int32_t r0
= x0
[0] + x1
[0];
283 register int32_t r1
= x1
[1] - x0
[1];
285 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
289 r0
= (x0
[1] + x1
[1])>>1;
290 r1
= (x0
[0] - x1
[0])>>1;
296 r3
= bitrev12(bit
++);
297 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
298 x1
= x
+ (r3
>>shift
);
303 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
305 r0
= (x0
[1] + x1
[1])>>1;
306 r1
= (x0
[0] - x1
[0])>>1;
315 register int32_t r3
= bitrev12(bit
++);
316 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
317 int32_t *x1
= x
+ (r3
>>shift
);
319 register int32_t r0
= x0
[0] + x1
[0];
320 register int32_t r1
= x1
[1] - x0
[1];
322 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
326 r0
= (x0
[1] + x1
[1])>>1;
327 r1
= (x0
[0] - x1
[0])>>1;
333 r3
= bitrev12(bit
++);
334 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
335 x1
= x
+ (r3
>>shift
);
340 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
342 r0
= (x0
[1] + x1
[1])>>1;
343 r1
= (x0
[0] - x1
[0])>>1;
354 void mdct_backward(int n
, int32_t *in
, int32_t *out
)
355 ICODE_ATTR_TREMOR_MDCT
;
356 void mdct_backward(int n
, int32_t *in
, int32_t *out
) {
365 for (shift
=6;!(n
&(1<<shift
));shift
++);
377 XPROD31( iX
[4], iX
[6], T
[0], T
[1], &oX
[2], &oX
[3] ); T
+=step
;
378 XPROD31( iX
[0], iX
[2], T
[0], T
[1], &oX
[0], &oX
[1] ); T
+=step
;
383 XPROD31( iX
[4], iX
[6], T
[1], T
[0], &oX
[2], &oX
[3] ); T
-=step
;
384 XPROD31( iX
[0], iX
[2], T
[1], T
[0], &oX
[0], &oX
[1] ); T
-=step
;
393 T
+=step
; XNPROD31( iX
[6], iX
[4], T
[0], T
[1], &oX
[0], &oX
[1] );
394 T
+=step
; XNPROD31( iX
[2], iX
[0], T
[0], T
[1], &oX
[2], &oX
[3] );
399 T
-=step
; XNPROD31( iX
[6], iX
[4], T
[1], T
[0], &oX
[0], &oX
[1] );
400 T
-=step
; XNPROD31( iX
[2], iX
[0], T
[1], T
[0], &oX
[2], &oX
[3] );
405 mdct_butterflies(out
+n2
,n2
,shift
);
406 mdct_bitreverse(out
,n
,step
,shift
);
407 /* rotate + window */
411 int32_t *oX1
=out
+n2
+n4
;
412 int32_t *oX2
=out
+n2
+n4
;
417 T
=(step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
420 XPROD31( iX
[0], -iX
[1], T
[0], T
[1], &oX1
[3], &oX2
[0] ); T
+=step
;
421 XPROD31( iX
[2], -iX
[3], T
[0], T
[1], &oX1
[2], &oX2
[1] ); T
+=step
;
422 XPROD31( iX
[4], -iX
[5], T
[0], T
[1], &oX1
[1], &oX2
[2] ); T
+=step
;
423 XPROD31( iX
[6], -iX
[7], T
[0], T
[1], &oX1
[0], &oX2
[3] ); T
+=step
;
431 /* linear interpolation between table values: offset=0.5, step=1 */
432 register int32_t t0
,t1
,v0
,v1
;
440 t0
+= (v0
= (*V
++)>>1);
441 t1
+= (v1
= (*V
++)>>1);
442 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
443 v0
+= (t0
= (*T
++)>>1);
444 v1
+= (t1
= (*T
++)>>1);
445 XPROD31( iX
[2], -iX
[3], v0
, v1
, &oX1
[2], &oX2
[1] );
446 t0
+= (v0
= (*V
++)>>1);
447 t1
+= (v1
= (*V
++)>>1);
448 XPROD31( iX
[4], -iX
[5], t0
, t1
, &oX1
[1], &oX2
[2] );
449 v0
+= (t0
= (*T
++)>>1);
450 v1
+= (t1
= (*T
++)>>1);
451 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
460 /* linear interpolation between table values: offset=0.25, step=0.5 */
461 register int32_t t0
,t1
,v0
,v1
,q0
,q1
;
471 t0
+= (q0
= (v0
-t0
)>>2);
472 t1
+= (q1
= (v1
-t1
)>>2);
473 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
476 XPROD31( iX
[2], -iX
[3], t0
, t1
, &oX1
[2], &oX2
[1] );
480 v0
+= (q0
= (t0
-v0
)>>2);
481 v1
+= (q1
= (t1
-v1
)>>2);
482 XPROD31( iX
[4], -iX
[5], v0
, v1
, &oX1
[1], &oX2
[2] );
485 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
502 oX2
[0] = -(oX1
[3] = iX
[3]);
503 oX2
[1] = -(oX1
[2] = iX
[2]);
504 oX2
[2] = -(oX1
[1] = iX
[1]);
505 oX2
[3] = -(oX1
[0] = iX
[0]);