Modify the mdct library to enable using it outside rockbox - No functional changes.
[kugel-rb.git] / apps / codecs / lib / mdct2.c
blob6d9168804c9388fc2a90a8628693fc0dcbe0bb73
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
38 #include "mdct2.h"
39 #include "mdct_lookup.h"
40 #ifdef ROCKBOX
41 #include <codecs/lib/codeclib.h>
42 #endif /* ROCKBOX */
44 #if defined(CPU_ARM) && CONFIG_CPU != S3C2440
45 /* C code is faster on S3C2440 */
47 extern void mdct_butterfly_32(int32_t *x);
48 extern void mdct_butterfly_generic_loop(int32_t *x1, int32_t *x2,
49 const int32_t *T0, int step,
50 const int32_t *Ttop);
52 static inline void mdct_butterfly_generic(int32_t *x,int points, int step){
53 mdct_butterfly_generic_loop(x + points, x + (points>>1), sincos_lookup0, step, sincos_lookup0+1024);
56 #else
58 /* 8 point butterfly (in place) */
59 static inline void mdct_butterfly_8(int32_t *x){
60 register int32_t r0 = x[4] + x[0];
61 register int32_t r1 = x[4] - x[0];
62 register int32_t r2 = x[5] + x[1];
63 register int32_t r3 = x[5] - x[1];
64 register int32_t r4 = x[6] + x[2];
65 register int32_t r5 = x[6] - x[2];
66 register int32_t r6 = x[7] + x[3];
67 register int32_t r7 = x[7] - x[3];
69 x[0] = r5 + r3;
70 x[1] = r7 - r1;
71 x[2] = r5 - r3;
72 x[3] = r7 + r1;
73 x[4] = r4 - r0;
74 x[5] = r6 - r2;
75 x[6] = r4 + r0;
76 x[7] = r6 + r2;
77 MB();
80 /* 16 point butterfly (in place, 4 register) */
81 static inline void mdct_butterfly_16(int32_t *x){
83 register int32_t r0, r1;
85 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
86 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
87 x[ 0] = MULT31((r0 + r1) , cPI2_8);
88 x[ 1] = MULT31((r1 - r0) , cPI2_8);
89 MB();
91 r0 = x[10] - x[ 2]; x[10] += x[ 2];
92 r1 = x[ 3] - x[11]; x[11] += x[ 3];
93 x[ 2] = r1; x[ 3] = r0;
94 MB();
96 r0 = x[12] - x[ 4]; x[12] += x[ 4];
97 r1 = x[13] - x[ 5]; x[13] += x[ 5];
98 x[ 4] = MULT31((r0 - r1) , cPI2_8);
99 x[ 5] = MULT31((r0 + r1) , cPI2_8);
100 MB();
102 r0 = x[14] - x[ 6]; x[14] += x[ 6];
103 r1 = x[15] - x[ 7]; x[15] += x[ 7];
104 x[ 6] = r0; x[ 7] = r1;
105 MB();
107 mdct_butterfly_8(x);
108 mdct_butterfly_8(x+8);
111 /* 32 point butterfly (in place, 4 register) */
112 static inline void mdct_butterfly_32(int32_t *x){
114 register int32_t r0, r1;
116 r0 = x[30] - x[14]; x[30] += x[14];
117 r1 = x[31] - x[15]; x[31] += x[15];
118 x[14] = r0; x[15] = r1;
119 MB();
121 r0 = x[28] - x[12]; x[28] += x[12];
122 r1 = x[29] - x[13]; x[29] += x[13];
123 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
124 MB();
126 r0 = x[26] - x[10]; x[26] += x[10];
127 r1 = x[27] - x[11]; x[27] += x[11];
128 x[10] = MULT31((r0 - r1) , cPI2_8);
129 x[11] = MULT31((r0 + r1) , cPI2_8);
130 MB();
132 r0 = x[24] - x[ 8]; x[24] += x[ 8];
133 r1 = x[25] - x[ 9]; x[25] += x[ 9];
134 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
135 MB();
137 r0 = x[22] - x[ 6]; x[22] += x[ 6];
138 r1 = x[ 7] - x[23]; x[23] += x[ 7];
139 x[ 6] = r1; x[ 7] = r0;
140 MB();
142 r0 = x[ 4] - x[20]; x[20] += x[ 4];
143 r1 = x[ 5] - x[21]; x[21] += x[ 5];
144 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
145 MB();
147 r0 = x[ 2] - x[18]; x[18] += x[ 2];
148 r1 = x[ 3] - x[19]; x[19] += x[ 3];
149 x[ 2] = MULT31((r1 + r0) , cPI2_8);
150 x[ 3] = MULT31((r1 - r0) , cPI2_8);
151 MB();
153 r0 = x[ 0] - x[16]; x[16] += x[ 0];
154 r1 = x[ 1] - x[17]; x[17] += x[ 1];
155 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
156 MB();
158 mdct_butterfly_16(x);
159 mdct_butterfly_16(x+16);
162 /* N/stage point generic N stage butterfly (in place, 4 register) */
163 void mdct_butterfly_generic(int32_t *x,int points, int step)
164 ICODE_ATTR_TREMOR_MDCT;
165 void mdct_butterfly_generic(int32_t *x,int points, int step){
166 const int32_t *T = sincos_lookup0;
167 int32_t *x1 = x + points - 8;
168 int32_t *x2 = x + (points>>1) - 8;
169 register int32_t r0;
170 register int32_t r1;
171 register int32_t r2;
172 register int32_t r3;
175 r0 = x1[6] - x2[6]; x1[6] += x2[6];
176 r1 = x2[7] - x1[7]; x1[7] += x2[7];
177 r2 = x1[4] - x2[4]; x1[4] += x2[4];
178 r3 = x2[5] - x1[5]; x1[5] += x2[5];
179 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
180 XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
182 r0 = x1[2] - x2[2]; x1[2] += x2[2];
183 r1 = x2[3] - x1[3]; x1[3] += x2[3];
184 r2 = x1[0] - x2[0]; x1[0] += x2[0];
185 r3 = x2[1] - x1[1]; x1[1] += x2[1];
186 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
187 XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
189 x1-=8; x2-=8;
190 }while(T<sincos_lookup0+1024);
192 r0 = x1[6] - x2[6]; x1[6] += x2[6];
193 r1 = x1[7] - x2[7]; x1[7] += x2[7];
194 r2 = x1[4] - x2[4]; x1[4] += x2[4];
195 r3 = x1[5] - x2[5]; x1[5] += x2[5];
196 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
197 XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
199 r0 = x1[2] - x2[2]; x1[2] += x2[2];
200 r1 = x1[3] - x2[3]; x1[3] += x2[3];
201 r2 = x1[0] - x2[0]; x1[0] += x2[0];
202 r3 = x1[1] - x2[1]; x1[1] += x2[1];
203 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
204 XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
206 x1-=8; x2-=8;
207 }while(T>sincos_lookup0);
209 r0 = x2[6] - x1[6]; x1[6] += x2[6];
210 r1 = x2[7] - x1[7]; x1[7] += x2[7];
211 r2 = x2[4] - x1[4]; x1[4] += x2[4];
212 r3 = x2[5] - x1[5]; x1[5] += x2[5];
213 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
214 XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
216 r0 = x2[2] - x1[2]; x1[2] += x2[2];
217 r1 = x2[3] - x1[3]; x1[3] += x2[3];
218 r2 = x2[0] - x1[0]; x1[0] += x2[0];
219 r3 = x2[1] - x1[1]; x1[1] += x2[1];
220 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
221 XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
223 x1-=8; x2-=8;
224 }while(T<sincos_lookup0+1024);
226 r0 = x1[6] - x2[6]; x1[6] += x2[6];
227 r1 = x2[7] - x1[7]; x1[7] += x2[7];
228 r2 = x1[4] - x2[4]; x1[4] += x2[4];
229 r3 = x2[5] - x1[5]; x1[5] += x2[5];
230 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
231 XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
233 r0 = x1[2] - x2[2]; x1[2] += x2[2];
234 r1 = x2[3] - x1[3]; x1[3] += x2[3];
235 r2 = x1[0] - x2[0]; x1[0] += x2[0];
236 r3 = x2[1] - x1[1]; x1[1] += x2[1];
237 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
238 XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
240 x1-=8; x2-=8;
241 }while(T>sincos_lookup0);
244 #endif /* CPU_ARM */
246 static inline void mdct_butterflies(int32_t *x,int points,int shift) {
248 int stages=8-shift;
249 int i,j;
251 for(i=0;--stages>0;i++){
252 for(j=0;j<(1<<i);j++)
253 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
256 for(j=0;j<points;j+=32)
257 mdct_butterfly_32(x+j);
261 static const unsigned char bitrev[16] ICONST_ATTR =
262 {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
264 static inline int bitrev12(int x){
265 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
268 static inline void mdct_bitreverse(int32_t *x,int n,int step,int shift) {
270 int bit = 0;
271 int32_t *w0 = x;
272 int32_t *w1 = x = w0+(n>>1);
273 const int32_t *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
274 const int32_t *Ttop = T+1024;
275 register int32_t r2;
278 register int32_t r3 = bitrev12(bit++);
279 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
280 int32_t *x1 = x + (r3>>shift);
282 register int32_t r0 = x0[0] + x1[0];
283 register int32_t r1 = x1[1] - x0[1];
285 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
287 w1 -= 4;
289 r0 = (x0[1] + x1[1])>>1;
290 r1 = (x0[0] - x1[0])>>1;
291 w0[0] = r0 + r2;
292 w0[1] = r1 + r3;
293 w1[2] = r0 - r2;
294 w1[3] = r3 - r1;
296 r3 = bitrev12(bit++);
297 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
298 x1 = x + (r3>>shift);
300 r0 = x0[0] + x1[0];
301 r1 = x1[1] - x0[1];
303 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
305 r0 = (x0[1] + x1[1])>>1;
306 r1 = (x0[0] - x1[0])>>1;
307 w0[2] = r0 + r2;
308 w0[3] = r1 + r3;
309 w1[0] = r0 - r2;
310 w1[1] = r3 - r1;
312 w0 += 4;
313 }while(T<Ttop);
315 register int32_t r3 = bitrev12(bit++);
316 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
317 int32_t *x1 = x + (r3>>shift);
319 register int32_t r0 = x0[0] + x1[0];
320 register int32_t r1 = x1[1] - x0[1];
322 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
324 w1 -= 4;
326 r0 = (x0[1] + x1[1])>>1;
327 r1 = (x0[0] - x1[0])>>1;
328 w0[0] = r0 + r2;
329 w0[1] = r1 + r3;
330 w1[2] = r0 - r2;
331 w1[3] = r3 - r1;
333 r3 = bitrev12(bit++);
334 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
335 x1 = x + (r3>>shift);
337 r0 = x0[0] + x1[0];
338 r1 = x1[1] - x0[1];
340 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
342 r0 = (x0[1] + x1[1])>>1;
343 r1 = (x0[0] - x1[0])>>1;
344 w0[2] = r0 + r2;
345 w0[3] = r1 + r3;
346 w1[0] = r0 - r2;
347 w1[1] = r3 - r1;
349 w0 += 4;
350 }while(w0<w1);
354 void mdct_backward(int n, int32_t *in, int32_t *out)
355 ICODE_ATTR_TREMOR_MDCT;
356 void mdct_backward(int n, int32_t *in, int32_t *out) {
357 int n2=n>>1;
358 int n4=n>>2;
359 int32_t *iX;
360 int32_t *oX;
361 const int32_t *T;
362 const int32_t *V;
363 int shift;
364 int step;
365 for (shift=6;!(n&(1<<shift));shift++);
366 shift=13-shift;
367 step=2<<shift;
369 /* rotate */
371 iX = in+n2-7;
372 oX = out+n2+n4;
373 T = sincos_lookup0;
376 oX-=4;
377 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
378 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
379 iX-=8;
380 }while(iX>=in+n4);
382 oX-=4;
383 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
384 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
385 iX-=8;
386 }while(iX>=in);
388 iX = in+n2-8;
389 oX = out+n2+n4;
390 T = sincos_lookup0;
393 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
394 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
395 iX-=8;
396 oX+=4;
397 }while(iX>=in+n4);
399 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
400 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
401 iX-=8;
402 oX+=4;
403 }while(iX>=in);
405 mdct_butterflies(out+n2,n2,shift);
406 mdct_bitreverse(out,n,step,shift);
407 /* rotate + window */
409 step>>=2;
411 int32_t *oX1=out+n2+n4;
412 int32_t *oX2=out+n2+n4;
413 int32_t *iX =out;
415 switch(step) {
416 default: {
417 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
419 oX1-=4;
420 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
421 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
422 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
423 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
424 oX2+=4;
425 iX+=8;
426 }while(iX<oX1);
427 break;
430 case 1: {
431 /* linear interpolation between table values: offset=0.5, step=1 */
432 register int32_t t0,t1,v0,v1;
433 T = sincos_lookup0;
434 V = sincos_lookup1;
435 t0 = (*T++)>>1;
436 t1 = (*T++)>>1;
438 oX1-=4;
440 t0 += (v0 = (*V++)>>1);
441 t1 += (v1 = (*V++)>>1);
442 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
443 v0 += (t0 = (*T++)>>1);
444 v1 += (t1 = (*T++)>>1);
445 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
446 t0 += (v0 = (*V++)>>1);
447 t1 += (v1 = (*V++)>>1);
448 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
449 v0 += (t0 = (*T++)>>1);
450 v1 += (t1 = (*T++)>>1);
451 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
453 oX2+=4;
454 iX+=8;
455 }while(iX<oX1);
456 break;
459 case 0: {
460 /* linear interpolation between table values: offset=0.25, step=0.5 */
461 register int32_t t0,t1,v0,v1,q0,q1;
462 T = sincos_lookup0;
463 V = sincos_lookup1;
464 t0 = *T++;
465 t1 = *T++;
467 oX1-=4;
469 v0 = *V++;
470 v1 = *V++;
471 t0 += (q0 = (v0-t0)>>2);
472 t1 += (q1 = (v1-t1)>>2);
473 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
474 t0 = v0-q0;
475 t1 = v1-q1;
476 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
478 t0 = *T++;
479 t1 = *T++;
480 v0 += (q0 = (t0-v0)>>2);
481 v1 += (q1 = (t1-v1)>>2);
482 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
483 v0 = t0-q0;
484 v1 = t1-q1;
485 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
487 oX2+=4;
488 iX+=8;
489 }while(iX<oX1);
490 break;
494 iX=out+n2+n4;
495 oX1=out+n4;
496 oX2=oX1;
499 oX1-=4;
500 iX-=4;
502 oX2[0] = -(oX1[3] = iX[3]);
503 oX2[1] = -(oX1[2] = iX[2]);
504 oX2[2] = -(oX1[1] = iX[1]);
505 oX2[3] = -(oX1[0] = iX[0]);
507 oX2+=4;
508 }while(oX2<iX);
510 iX=out+n2+n4;
511 oX1=out+n2+n4;
512 oX2=out+n2;
515 oX1-=4;
516 oX1[0]= iX[3];
517 oX1[1]= iX[2];
518 oX1[2]= iX[1];
519 oX1[3]= iX[0];
520 iX+=4;
521 }while(oX1>oX2);