1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
39 #include "mdct_lookup.h"
41 #include <codecs/lib/codeclib.h>
46 extern void mdct_butterfly_32(int32_t *x
);
47 extern void mdct_butterfly_generic_loop(int32_t *x1
, int32_t *x2
,
48 const int32_t *T0
, int step
,
51 static inline void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
52 mdct_butterfly_generic_loop(x
+ points
, x
+ (points
>>1), sincos_lookup0
, step
, sincos_lookup0
+1024);
57 /* 8 point butterfly (in place) */
58 static inline void mdct_butterfly_8(int32_t *x
){
59 register int32_t r0
= x
[4] + x
[0];
60 register int32_t r1
= x
[4] - x
[0];
61 register int32_t r2
= x
[5] + x
[1];
62 register int32_t r3
= x
[5] - x
[1];
63 register int32_t r4
= x
[6] + x
[2];
64 register int32_t r5
= x
[6] - x
[2];
65 register int32_t r6
= x
[7] + x
[3];
66 register int32_t r7
= x
[7] - x
[3];
78 /* 16 point butterfly (in place, 4 register) */
79 static inline void mdct_butterfly_16(int32_t *x
){
81 register int32_t r0
, r1
;
83 r0
= x
[ 0] - x
[ 8]; x
[ 8] += x
[ 0];
84 r1
= x
[ 1] - x
[ 9]; x
[ 9] += x
[ 1];
85 x
[ 0] = MULT31((r0
+ r1
) , cPI2_8
);
86 x
[ 1] = MULT31((r1
- r0
) , cPI2_8
);
88 r0
= x
[10] - x
[ 2]; x
[10] += x
[ 2];
89 r1
= x
[ 3] - x
[11]; x
[11] += x
[ 3];
90 x
[ 2] = r1
; x
[ 3] = r0
;
92 r0
= x
[12] - x
[ 4]; x
[12] += x
[ 4];
93 r1
= x
[13] - x
[ 5]; x
[13] += x
[ 5];
94 x
[ 4] = MULT31((r0
- r1
) , cPI2_8
);
95 x
[ 5] = MULT31((r0
+ r1
) , cPI2_8
);
97 r0
= x
[14] - x
[ 6]; x
[14] += x
[ 6];
98 r1
= x
[15] - x
[ 7]; x
[15] += x
[ 7];
99 x
[ 6] = r0
; x
[ 7] = r1
;
102 mdct_butterfly_8(x
+8);
105 /* 32 point butterfly (in place, 4 register) */
106 static inline void mdct_butterfly_32(int32_t *x
){
108 register int32_t r0
, r1
;
110 r0
= x
[30] - x
[14]; x
[30] += x
[14];
111 r1
= x
[31] - x
[15]; x
[31] += x
[15];
112 x
[14] = r0
; x
[15] = r1
;
114 r0
= x
[28] - x
[12]; x
[28] += x
[12];
115 r1
= x
[29] - x
[13]; x
[29] += x
[13];
116 XNPROD31( r0
, r1
, cPI1_8
, cPI3_8
, &x
[12], &x
[13] );
118 r0
= x
[26] - x
[10]; x
[26] += x
[10];
119 r1
= x
[27] - x
[11]; x
[27] += x
[11];
120 x
[10] = MULT31((r0
- r1
) , cPI2_8
);
121 x
[11] = MULT31((r0
+ r1
) , cPI2_8
);
123 r0
= x
[24] - x
[ 8]; x
[24] += x
[ 8];
124 r1
= x
[25] - x
[ 9]; x
[25] += x
[ 9];
125 XNPROD31( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 8], &x
[ 9] );
127 r0
= x
[22] - x
[ 6]; x
[22] += x
[ 6];
128 r1
= x
[ 7] - x
[23]; x
[23] += x
[ 7];
129 x
[ 6] = r1
; x
[ 7] = r0
;
131 r0
= x
[ 4] - x
[20]; x
[20] += x
[ 4];
132 r1
= x
[ 5] - x
[21]; x
[21] += x
[ 5];
133 XPROD31 ( r0
, r1
, cPI3_8
, cPI1_8
, &x
[ 4], &x
[ 5] );
135 r0
= x
[ 2] - x
[18]; x
[18] += x
[ 2];
136 r1
= x
[ 3] - x
[19]; x
[19] += x
[ 3];
137 x
[ 2] = MULT31((r1
+ r0
) , cPI2_8
);
138 x
[ 3] = MULT31((r1
- r0
) , cPI2_8
);
140 r0
= x
[ 0] - x
[16]; x
[16] += x
[ 0];
141 r1
= x
[ 1] - x
[17]; x
[17] += x
[ 1];
142 XPROD31 ( r0
, r1
, cPI1_8
, cPI3_8
, &x
[ 0], &x
[ 1] );
144 mdct_butterfly_16(x
);
145 mdct_butterfly_16(x
+16);
148 /* N/stage point generic N stage butterfly (in place, 4 register) */
149 void mdct_butterfly_generic(int32_t *x
,int points
, int step
)
150 ICODE_ATTR_TREMOR_MDCT
;
151 void mdct_butterfly_generic(int32_t *x
,int points
, int step
){
152 const int32_t *T
= sincos_lookup0
;
153 int32_t *x1
= x
+ points
- 8;
154 int32_t *x2
= x
+ (points
>>1) - 8;
161 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
162 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
163 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
164 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
165 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
166 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
168 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
169 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
170 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
171 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
172 XPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
173 XPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
176 }while(T
<sincos_lookup0
+1024);
178 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
179 r1
= x1
[7] - x2
[7]; x1
[7] += x2
[7];
180 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
181 r3
= x1
[5] - x2
[5]; x1
[5] += x2
[5];
182 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
183 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
185 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
186 r1
= x1
[3] - x2
[3]; x1
[3] += x2
[3];
187 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
188 r3
= x1
[1] - x2
[1]; x1
[1] += x2
[1];
189 XNPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
190 XNPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
193 }while(T
>sincos_lookup0
);
195 r0
= x2
[6] - x1
[6]; x1
[6] += x2
[6];
196 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
197 r2
= x2
[4] - x1
[4]; x1
[4] += x2
[4];
198 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
199 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
+=step
;
200 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
+=step
;
202 r0
= x2
[2] - x1
[2]; x1
[2] += x2
[2];
203 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
204 r2
= x2
[0] - x1
[0]; x1
[0] += x2
[0];
205 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
206 XPROD31( r0
, r1
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
+=step
;
207 XPROD31( r2
, r3
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
+=step
;
210 }while(T
<sincos_lookup0
+1024);
212 r0
= x1
[6] - x2
[6]; x1
[6] += x2
[6];
213 r1
= x2
[7] - x1
[7]; x1
[7] += x2
[7];
214 r2
= x1
[4] - x2
[4]; x1
[4] += x2
[4];
215 r3
= x2
[5] - x1
[5]; x1
[5] += x2
[5];
216 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[6], &x2
[7] ); T
-=step
;
217 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[4], &x2
[5] ); T
-=step
;
219 r0
= x1
[2] - x2
[2]; x1
[2] += x2
[2];
220 r1
= x2
[3] - x1
[3]; x1
[3] += x2
[3];
221 r2
= x1
[0] - x2
[0]; x1
[0] += x2
[0];
222 r3
= x2
[1] - x1
[1]; x1
[1] += x2
[1];
223 XNPROD31( r1
, r0
, T
[0], T
[1], &x2
[2], &x2
[3] ); T
-=step
;
224 XNPROD31( r3
, r2
, T
[0], T
[1], &x2
[0], &x2
[1] ); T
-=step
;
227 }while(T
>sincos_lookup0
);
232 static inline void mdct_butterflies(int32_t *x
,int points
,int shift
) {
237 for(i
=0;--stages
>0;i
++){
238 for(j
=0;j
<(1<<i
);j
++)
239 mdct_butterfly_generic(x
+(points
>>i
)*j
,points
>>i
,4<<(i
+shift
));
242 for(j
=0;j
<points
;j
+=32)
243 mdct_butterfly_32(x
+j
);
246 static const unsigned char bitrev
[] ICONST_ATTR
=
248 0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
249 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
250 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
251 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63
254 static inline int bitrev12(int x
){
255 return bitrev
[x
>>6]|((bitrev
[x
&0x03f])<<6);
258 static inline void mdct_bitreverse(int32_t *x
,int n
,int step
,int shift
) {
262 int32_t *w1
= x
= w0
+(n
>>1);
263 const int32_t *T
= (step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
264 const int32_t *Ttop
= T
+1024;
268 register int32_t r3
= bitrev12(bit
++);
269 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
270 int32_t *x1
= x
+ (r3
>>shift
);
272 register int32_t r0
= x0
[0] + x1
[0];
273 register int32_t r1
= x1
[1] - x0
[1];
275 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
279 r0
= (x0
[1] + x1
[1])>>1;
280 r1
= (x0
[0] - x1
[0])>>1;
286 r3
= bitrev12(bit
++);
287 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
288 x1
= x
+ (r3
>>shift
);
293 XPROD32( r0
, r1
, T
[1], T
[0], r2
, r3
); T
+=step
;
295 r0
= (x0
[1] + x1
[1])>>1;
296 r1
= (x0
[0] - x1
[0])>>1;
305 register int32_t r3
= bitrev12(bit
++);
306 int32_t *x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
307 int32_t *x1
= x
+ (r3
>>shift
);
309 register int32_t r0
= x0
[0] + x1
[0];
310 register int32_t r1
= x1
[1] - x0
[1];
312 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
316 r0
= (x0
[1] + x1
[1])>>1;
317 r1
= (x0
[0] - x1
[0])>>1;
323 r3
= bitrev12(bit
++);
324 x0
= x
+ ((r3
^ 0xfff)>>shift
) -1;
325 x1
= x
+ (r3
>>shift
);
330 T
-=step
; XPROD32( r0
, r1
, T
[0], T
[1], r2
, r3
);
332 r0
= (x0
[1] + x1
[1])>>1;
333 r1
= (x0
[0] - x1
[0])>>1;
344 void mdct_backward(int n
, int32_t *in
, int32_t *out
)
345 ICODE_ATTR_TREMOR_MDCT
;
346 void mdct_backward(int n
, int32_t *in
, int32_t *out
) {
355 for (shift
=6;!(n
&(1<<shift
));shift
++);
367 XPROD31( iX
[4], iX
[6], T
[0], T
[1], &oX
[2], &oX
[3] ); T
+=step
;
368 XPROD31( iX
[0], iX
[2], T
[0], T
[1], &oX
[0], &oX
[1] ); T
+=step
;
373 XPROD31( iX
[4], iX
[6], T
[1], T
[0], &oX
[2], &oX
[3] ); T
-=step
;
374 XPROD31( iX
[0], iX
[2], T
[1], T
[0], &oX
[0], &oX
[1] ); T
-=step
;
383 T
+=step
; XNPROD31( iX
[6], iX
[4], T
[0], T
[1], &oX
[0], &oX
[1] );
384 T
+=step
; XNPROD31( iX
[2], iX
[0], T
[0], T
[1], &oX
[2], &oX
[3] );
389 T
-=step
; XNPROD31( iX
[6], iX
[4], T
[1], T
[0], &oX
[0], &oX
[1] );
390 T
-=step
; XNPROD31( iX
[2], iX
[0], T
[1], T
[0], &oX
[2], &oX
[3] );
395 mdct_butterflies(out
+n2
,n2
,shift
);
396 mdct_bitreverse(out
,n
,step
,shift
);
397 /* rotate + window */
401 int32_t *oX1
=out
+n2
+n4
;
402 int32_t *oX2
=out
+n2
+n4
;
407 T
=(step
>=4)?(sincos_lookup0
+(step
>>1)):sincos_lookup1
;
410 XPROD31( iX
[0], -iX
[1], T
[0], T
[1], &oX1
[3], &oX2
[0] ); T
+=step
;
411 XPROD31( iX
[2], -iX
[3], T
[0], T
[1], &oX1
[2], &oX2
[1] ); T
+=step
;
412 XPROD31( iX
[4], -iX
[5], T
[0], T
[1], &oX1
[1], &oX2
[2] ); T
+=step
;
413 XPROD31( iX
[6], -iX
[7], T
[0], T
[1], &oX1
[0], &oX2
[3] ); T
+=step
;
421 /* linear interpolation between table values: offset=0.5, step=1 */
422 register int32_t t0
,t1
,v0
,v1
;
430 t0
+= (v0
= (*V
++)>>1);
431 t1
+= (v1
= (*V
++)>>1);
432 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
433 v0
+= (t0
= (*T
++)>>1);
434 v1
+= (t1
= (*T
++)>>1);
435 XPROD31( iX
[2], -iX
[3], v0
, v1
, &oX1
[2], &oX2
[1] );
436 t0
+= (v0
= (*V
++)>>1);
437 t1
+= (v1
= (*V
++)>>1);
438 XPROD31( iX
[4], -iX
[5], t0
, t1
, &oX1
[1], &oX2
[2] );
439 v0
+= (t0
= (*T
++)>>1);
440 v1
+= (t1
= (*T
++)>>1);
441 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
450 /* linear interpolation between table values: offset=0.25, step=0.5 */
451 register int32_t t0
,t1
,v0
,v1
,q0
,q1
;
461 t0
+= (q0
= (v0
-t0
)>>2);
462 t1
+= (q1
= (v1
-t1
)>>2);
463 XPROD31( iX
[0], -iX
[1], t0
, t1
, &oX1
[3], &oX2
[0] );
466 XPROD31( iX
[2], -iX
[3], t0
, t1
, &oX1
[2], &oX2
[1] );
470 v0
+= (q0
= (t0
-v0
)>>2);
471 v1
+= (q1
= (t1
-v1
)>>2);
472 XPROD31( iX
[4], -iX
[5], v0
, v1
, &oX1
[1], &oX2
[2] );
475 XPROD31( iX
[6], -iX
[7], v0
, v1
, &oX1
[0], &oX2
[3] );
492 oX2
[0] = -(oX1
[3] = iX
[3]);
493 oX2
[1] = -(oX1
[2] = iX
[2]);
494 oX2
[2] = -(oX1
[1] = iX
[1]);
495 oX2
[3] = -(oX1
[0] = iX
[0]);