Rearrange the MDCT library lookup tables so that codecs can access them. Access...
[kugel-rb.git] / apps / codecs / lib / mdct2.c
blob7c448e1a01f3b92746c352a3d2538add7f043724
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
38 #include "mdct2.h"
39 #include "mdct_lookup.h"
40 #include <codecs/lib/codeclib.h>
42 #if defined(CPU_ARM) && CONFIG_CPU != S3C2440
43 /* C code is faster on S3C2440 */
45 extern void mdct_butterfly_32(int32_t *x);
46 extern void mdct_butterfly_generic_loop(int32_t *x1, int32_t *x2,
47 const int32_t *T0, int step,
48 const int32_t *Ttop);
50 static inline void mdct_butterfly_generic(int32_t *x,int points, int step){
51 mdct_butterfly_generic_loop(x + points, x + (points>>1), sincos_lookup0, step, sincos_lookup0+1024);
54 #else
56 /* 8 point butterfly (in place) */
57 static inline void mdct_butterfly_8(int32_t *x){
58 register int32_t r0 = x[4] + x[0];
59 register int32_t r1 = x[4] - x[0];
60 register int32_t r2 = x[5] + x[1];
61 register int32_t r3 = x[5] - x[1];
62 register int32_t r4 = x[6] + x[2];
63 register int32_t r5 = x[6] - x[2];
64 register int32_t r6 = x[7] + x[3];
65 register int32_t r7 = x[7] - x[3];
67 x[0] = r5 + r3;
68 x[1] = r7 - r1;
69 x[2] = r5 - r3;
70 x[3] = r7 + r1;
71 x[4] = r4 - r0;
72 x[5] = r6 - r2;
73 x[6] = r4 + r0;
74 x[7] = r6 + r2;
75 MB();
78 /* 16 point butterfly (in place, 4 register) */
79 static inline void mdct_butterfly_16(int32_t *x){
81 register int32_t r0, r1;
83 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
84 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
85 x[ 0] = MULT31((r0 + r1) , cPI2_8);
86 x[ 1] = MULT31((r1 - r0) , cPI2_8);
87 MB();
89 r0 = x[10] - x[ 2]; x[10] += x[ 2];
90 r1 = x[ 3] - x[11]; x[11] += x[ 3];
91 x[ 2] = r1; x[ 3] = r0;
92 MB();
94 r0 = x[12] - x[ 4]; x[12] += x[ 4];
95 r1 = x[13] - x[ 5]; x[13] += x[ 5];
96 x[ 4] = MULT31((r0 - r1) , cPI2_8);
97 x[ 5] = MULT31((r0 + r1) , cPI2_8);
98 MB();
100 r0 = x[14] - x[ 6]; x[14] += x[ 6];
101 r1 = x[15] - x[ 7]; x[15] += x[ 7];
102 x[ 6] = r0; x[ 7] = r1;
103 MB();
105 mdct_butterfly_8(x);
106 mdct_butterfly_8(x+8);
109 /* 32 point butterfly (in place, 4 register) */
110 static inline void mdct_butterfly_32(int32_t *x){
112 register int32_t r0, r1;
114 r0 = x[30] - x[14]; x[30] += x[14];
115 r1 = x[31] - x[15]; x[31] += x[15];
116 x[14] = r0; x[15] = r1;
117 MB();
119 r0 = x[28] - x[12]; x[28] += x[12];
120 r1 = x[29] - x[13]; x[29] += x[13];
121 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
122 MB();
124 r0 = x[26] - x[10]; x[26] += x[10];
125 r1 = x[27] - x[11]; x[27] += x[11];
126 x[10] = MULT31((r0 - r1) , cPI2_8);
127 x[11] = MULT31((r0 + r1) , cPI2_8);
128 MB();
130 r0 = x[24] - x[ 8]; x[24] += x[ 8];
131 r1 = x[25] - x[ 9]; x[25] += x[ 9];
132 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
133 MB();
135 r0 = x[22] - x[ 6]; x[22] += x[ 6];
136 r1 = x[ 7] - x[23]; x[23] += x[ 7];
137 x[ 6] = r1; x[ 7] = r0;
138 MB();
140 r0 = x[ 4] - x[20]; x[20] += x[ 4];
141 r1 = x[ 5] - x[21]; x[21] += x[ 5];
142 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
143 MB();
145 r0 = x[ 2] - x[18]; x[18] += x[ 2];
146 r1 = x[ 3] - x[19]; x[19] += x[ 3];
147 x[ 2] = MULT31((r1 + r0) , cPI2_8);
148 x[ 3] = MULT31((r1 - r0) , cPI2_8);
149 MB();
151 r0 = x[ 0] - x[16]; x[16] += x[ 0];
152 r1 = x[ 1] - x[17]; x[17] += x[ 1];
153 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
154 MB();
156 mdct_butterfly_16(x);
157 mdct_butterfly_16(x+16);
160 /* N/stage point generic N stage butterfly (in place, 4 register) */
161 void mdct_butterfly_generic(int32_t *x,int points, int step)
162 ICODE_ATTR_TREMOR_MDCT;
163 void mdct_butterfly_generic(int32_t *x,int points, int step){
164 const int32_t *T = sincos_lookup0;
165 int32_t *x1 = x + points - 8;
166 int32_t *x2 = x + (points>>1) - 8;
167 register int32_t r0;
168 register int32_t r1;
169 register int32_t r2;
170 register int32_t r3;
173 r0 = x1[6] - x2[6]; x1[6] += x2[6];
174 r1 = x2[7] - x1[7]; x1[7] += x2[7];
175 r2 = x1[4] - x2[4]; x1[4] += x2[4];
176 r3 = x2[5] - x1[5]; x1[5] += x2[5];
177 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
178 XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
180 r0 = x1[2] - x2[2]; x1[2] += x2[2];
181 r1 = x2[3] - x1[3]; x1[3] += x2[3];
182 r2 = x1[0] - x2[0]; x1[0] += x2[0];
183 r3 = x2[1] - x1[1]; x1[1] += x2[1];
184 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
185 XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
187 x1-=8; x2-=8;
188 }while(T<sincos_lookup0+1024);
190 r0 = x1[6] - x2[6]; x1[6] += x2[6];
191 r1 = x1[7] - x2[7]; x1[7] += x2[7];
192 r2 = x1[4] - x2[4]; x1[4] += x2[4];
193 r3 = x1[5] - x2[5]; x1[5] += x2[5];
194 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
195 XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
197 r0 = x1[2] - x2[2]; x1[2] += x2[2];
198 r1 = x1[3] - x2[3]; x1[3] += x2[3];
199 r2 = x1[0] - x2[0]; x1[0] += x2[0];
200 r3 = x1[1] - x2[1]; x1[1] += x2[1];
201 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
202 XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
204 x1-=8; x2-=8;
205 }while(T>sincos_lookup0);
207 r0 = x2[6] - x1[6]; x1[6] += x2[6];
208 r1 = x2[7] - x1[7]; x1[7] += x2[7];
209 r2 = x2[4] - x1[4]; x1[4] += x2[4];
210 r3 = x2[5] - x1[5]; x1[5] += x2[5];
211 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
212 XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
214 r0 = x2[2] - x1[2]; x1[2] += x2[2];
215 r1 = x2[3] - x1[3]; x1[3] += x2[3];
216 r2 = x2[0] - x1[0]; x1[0] += x2[0];
217 r3 = x2[1] - x1[1]; x1[1] += x2[1];
218 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
219 XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
221 x1-=8; x2-=8;
222 }while(T<sincos_lookup0+1024);
224 r0 = x1[6] - x2[6]; x1[6] += x2[6];
225 r1 = x2[7] - x1[7]; x1[7] += x2[7];
226 r2 = x1[4] - x2[4]; x1[4] += x2[4];
227 r3 = x2[5] - x1[5]; x1[5] += x2[5];
228 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
229 XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
231 r0 = x1[2] - x2[2]; x1[2] += x2[2];
232 r1 = x2[3] - x1[3]; x1[3] += x2[3];
233 r2 = x1[0] - x2[0]; x1[0] += x2[0];
234 r3 = x2[1] - x1[1]; x1[1] += x2[1];
235 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
236 XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
238 x1-=8; x2-=8;
239 }while(T>sincos_lookup0);
242 #endif /* CPU_ARM */
244 static inline void mdct_butterflies(int32_t *x,int points,int shift) {
246 int stages=8-shift;
247 int i,j;
249 for(i=0;--stages>0;i++){
250 for(j=0;j<(1<<i);j++)
251 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
254 for(j=0;j<points;j+=32)
255 mdct_butterfly_32(x+j);
259 static const unsigned char bitrev[16] ICONST_ATTR =
260 {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
262 static inline int bitrev12(int x){
263 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
266 static inline void mdct_bitreverse(int32_t *x,int n,int step,int shift) {
268 int bit = 0;
269 int32_t *w0 = x;
270 int32_t *w1 = x = w0+(n>>1);
271 const int32_t *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
272 const int32_t *Ttop = T+1024;
273 register int32_t r2;
276 register int32_t r3 = bitrev12(bit++);
277 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
278 int32_t *x1 = x + (r3>>shift);
280 register int32_t r0 = x0[0] + x1[0];
281 register int32_t r1 = x1[1] - x0[1];
283 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
285 w1 -= 4;
287 r0 = (x0[1] + x1[1])>>1;
288 r1 = (x0[0] - x1[0])>>1;
289 w0[0] = r0 + r2;
290 w0[1] = r1 + r3;
291 w1[2] = r0 - r2;
292 w1[3] = r3 - r1;
294 r3 = bitrev12(bit++);
295 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
296 x1 = x + (r3>>shift);
298 r0 = x0[0] + x1[0];
299 r1 = x1[1] - x0[1];
301 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
303 r0 = (x0[1] + x1[1])>>1;
304 r1 = (x0[0] - x1[0])>>1;
305 w0[2] = r0 + r2;
306 w0[3] = r1 + r3;
307 w1[0] = r0 - r2;
308 w1[1] = r3 - r1;
310 w0 += 4;
311 }while(T<Ttop);
313 register int32_t r3 = bitrev12(bit++);
314 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
315 int32_t *x1 = x + (r3>>shift);
317 register int32_t r0 = x0[0] + x1[0];
318 register int32_t r1 = x1[1] - x0[1];
320 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
322 w1 -= 4;
324 r0 = (x0[1] + x1[1])>>1;
325 r1 = (x0[0] - x1[0])>>1;
326 w0[0] = r0 + r2;
327 w0[1] = r1 + r3;
328 w1[2] = r0 - r2;
329 w1[3] = r3 - r1;
331 r3 = bitrev12(bit++);
332 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
333 x1 = x + (r3>>shift);
335 r0 = x0[0] + x1[0];
336 r1 = x1[1] - x0[1];
338 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
340 r0 = (x0[1] + x1[1])>>1;
341 r1 = (x0[0] - x1[0])>>1;
342 w0[2] = r0 + r2;
343 w0[3] = r1 + r3;
344 w1[0] = r0 - r2;
345 w1[1] = r3 - r1;
347 w0 += 4;
348 }while(w0<w1);
352 void mdct_backward(int n, int32_t *in, int32_t *out)
353 ICODE_ATTR_TREMOR_MDCT;
354 void mdct_backward(int n, int32_t *in, int32_t *out) {
355 int n2=n>>1;
356 int n4=n>>2;
357 int32_t *iX;
358 int32_t *oX;
359 const int32_t *T;
360 const int32_t *V;
361 int shift;
362 int step;
363 for (shift=6;!(n&(1<<shift));shift++);
364 shift=13-shift;
365 step=2<<shift;
367 /* rotate */
369 iX = in+n2-7;
370 oX = out+n2+n4;
371 T = sincos_lookup0;
374 oX-=4;
375 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
376 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
377 iX-=8;
378 }while(iX>=in+n4);
380 oX-=4;
381 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
382 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
383 iX-=8;
384 }while(iX>=in);
386 iX = in+n2-8;
387 oX = out+n2+n4;
388 T = sincos_lookup0;
391 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
392 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
393 iX-=8;
394 oX+=4;
395 }while(iX>=in+n4);
397 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
398 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
399 iX-=8;
400 oX+=4;
401 }while(iX>=in);
403 mdct_butterflies(out+n2,n2,shift);
404 mdct_bitreverse(out,n,step,shift);
405 /* rotate + window */
407 step>>=2;
409 int32_t *oX1=out+n2+n4;
410 int32_t *oX2=out+n2+n4;
411 int32_t *iX =out;
413 switch(step) {
414 default: {
415 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
417 oX1-=4;
418 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
419 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
420 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
421 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
422 oX2+=4;
423 iX+=8;
424 }while(iX<oX1);
425 break;
428 case 1: {
429 /* linear interpolation between table values: offset=0.5, step=1 */
430 register int32_t t0,t1,v0,v1;
431 T = sincos_lookup0;
432 V = sincos_lookup1;
433 t0 = (*T++)>>1;
434 t1 = (*T++)>>1;
436 oX1-=4;
438 t0 += (v0 = (*V++)>>1);
439 t1 += (v1 = (*V++)>>1);
440 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
441 v0 += (t0 = (*T++)>>1);
442 v1 += (t1 = (*T++)>>1);
443 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
444 t0 += (v0 = (*V++)>>1);
445 t1 += (v1 = (*V++)>>1);
446 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
447 v0 += (t0 = (*T++)>>1);
448 v1 += (t1 = (*T++)>>1);
449 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
451 oX2+=4;
452 iX+=8;
453 }while(iX<oX1);
454 break;
457 case 0: {
458 /* linear interpolation between table values: offset=0.25, step=0.5 */
459 register int32_t t0,t1,v0,v1,q0,q1;
460 T = sincos_lookup0;
461 V = sincos_lookup1;
462 t0 = *T++;
463 t1 = *T++;
465 oX1-=4;
467 v0 = *V++;
468 v1 = *V++;
469 t0 += (q0 = (v0-t0)>>2);
470 t1 += (q1 = (v1-t1)>>2);
471 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
472 t0 = v0-q0;
473 t1 = v1-q1;
474 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
476 t0 = *T++;
477 t1 = *T++;
478 v0 += (q0 = (t0-v0)>>2);
479 v1 += (q1 = (t1-v1)>>2);
480 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
481 v0 = t0-q0;
482 v1 = t1-q1;
483 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
485 oX2+=4;
486 iX+=8;
487 }while(iX<oX1);
488 break;
492 iX=out+n2+n4;
493 oX1=out+n4;
494 oX2=oX1;
497 oX1-=4;
498 iX-=4;
500 oX2[0] = -(oX1[3] = iX[3]);
501 oX2[1] = -(oX1[2] = iX[2]);
502 oX2[2] = -(oX1[1] = iX[1]);
503 oX2[3] = -(oX1[0] = iX[0]);
505 oX2+=4;
506 }while(oX2<iX);
508 iX=out+n2+n4;
509 oX1=out+n2+n4;
510 oX2=out+n2;
513 oX1-=4;
514 oX1[0]= iX[3];
515 oX1[1]= iX[2];
516 oX1[2]= iX[1];
517 oX1[3]= iX[0];
518 iX+=4;
519 }while(oX1>oX2);