Updated our source code header to explicitly mention that we are GPL v2 or
[Rockbox.git] / apps / codecs / Tremor / mdct.c
blobdce2a9d1969758e7c90b7624cc8ba5fc67e587b3
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
16 last mod: $Id$
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
33 ********************************************************************/
35 #include "ivorbiscodec.h"
36 #include "os.h"
37 #include "misc.h"
38 #include "mdct.h"
39 #include "mdct_lookup.h"
41 #if defined(CPU_ARM) && CONFIG_CPU != S3C2440
42 /* C code is faster on S3C2440 */
44 extern void mdct_butterfly_32(DATA_TYPE *x);
45 extern void mdct_butterfly_generic_loop(DATA_TYPE *x1, DATA_TYPE *x2,
46 LOOKUP_T *T0, int step,
47 LOOKUP_T *Ttop);
49 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
50 mdct_butterfly_generic_loop(x + points, x + (points>>1),
51 sincos_lookup0, step, sincos_lookup0+1024);
54 #else
56 /* 8 point butterfly (in place) */
57 STIN void mdct_butterfly_8(DATA_TYPE *x){
58 REG_TYPE r0 = x[4] + x[0];
59 REG_TYPE r1 = x[4] - x[0];
60 REG_TYPE r2 = x[5] + x[1];
61 REG_TYPE r3 = x[5] - x[1];
62 REG_TYPE r4 = x[6] + x[2];
63 REG_TYPE r5 = x[6] - x[2];
64 REG_TYPE r6 = x[7] + x[3];
65 REG_TYPE r7 = x[7] - x[3];
67 x[0] = r5 + r3;
68 x[1] = r7 - r1;
69 x[2] = r5 - r3;
70 x[3] = r7 + r1;
71 x[4] = r4 - r0;
72 x[5] = r6 - r2;
73 x[6] = r4 + r0;
74 x[7] = r6 + r2;
75 MB();
78 /* 16 point butterfly (in place, 4 register) */
79 STIN void mdct_butterfly_16(DATA_TYPE *x){
81 REG_TYPE r0, r1;
83 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
84 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
85 x[ 0] = MULT31((r0 + r1) , cPI2_8);
86 x[ 1] = MULT31((r1 - r0) , cPI2_8);
87 MB();
89 r0 = x[10] - x[ 2]; x[10] += x[ 2];
90 r1 = x[ 3] - x[11]; x[11] += x[ 3];
91 x[ 2] = r1; x[ 3] = r0;
92 MB();
94 r0 = x[12] - x[ 4]; x[12] += x[ 4];
95 r1 = x[13] - x[ 5]; x[13] += x[ 5];
96 x[ 4] = MULT31((r0 - r1) , cPI2_8);
97 x[ 5] = MULT31((r0 + r1) , cPI2_8);
98 MB();
100 r0 = x[14] - x[ 6]; x[14] += x[ 6];
101 r1 = x[15] - x[ 7]; x[15] += x[ 7];
102 x[ 6] = r0; x[ 7] = r1;
103 MB();
105 mdct_butterfly_8(x);
106 mdct_butterfly_8(x+8);
109 /* 32 point butterfly (in place, 4 register) */
110 STIN void mdct_butterfly_32(DATA_TYPE *x){
112 REG_TYPE r0, r1;
114 r0 = x[30] - x[14]; x[30] += x[14];
115 r1 = x[31] - x[15]; x[31] += x[15];
116 x[14] = r0; x[15] = r1;
117 MB();
119 r0 = x[28] - x[12]; x[28] += x[12];
120 r1 = x[29] - x[13]; x[29] += x[13];
121 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
122 MB();
124 r0 = x[26] - x[10]; x[26] += x[10];
125 r1 = x[27] - x[11]; x[27] += x[11];
126 x[10] = MULT31((r0 - r1) , cPI2_8);
127 x[11] = MULT31((r0 + r1) , cPI2_8);
128 MB();
130 r0 = x[24] - x[ 8]; x[24] += x[ 8];
131 r1 = x[25] - x[ 9]; x[25] += x[ 9];
132 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
133 MB();
135 r0 = x[22] - x[ 6]; x[22] += x[ 6];
136 r1 = x[ 7] - x[23]; x[23] += x[ 7];
137 x[ 6] = r1; x[ 7] = r0;
138 MB();
140 r0 = x[ 4] - x[20]; x[20] += x[ 4];
141 r1 = x[ 5] - x[21]; x[21] += x[ 5];
142 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
143 MB();
145 r0 = x[ 2] - x[18]; x[18] += x[ 2];
146 r1 = x[ 3] - x[19]; x[19] += x[ 3];
147 x[ 2] = MULT31((r1 + r0) , cPI2_8);
148 x[ 3] = MULT31((r1 - r0) , cPI2_8);
149 MB();
151 r0 = x[ 0] - x[16]; x[16] += x[ 0];
152 r1 = x[ 1] - x[17]; x[17] += x[ 1];
153 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
154 MB();
156 mdct_butterfly_16(x);
157 mdct_butterfly_16(x+16);
160 /* N/stage point generic N stage butterfly (in place, 4 register) */
161 void mdct_butterfly_generic(DATA_TYPE *x,int points, int step)
162 ICODE_ATTR_TREMOR_MDCT;
163 void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
164 LOOKUP_T *T = sincos_lookup0;
165 DATA_TYPE *x1 = x + points - 8;
166 DATA_TYPE *x2 = x + (points>>1) - 8;
167 REG_TYPE r0;
168 REG_TYPE r1;
169 REG_TYPE r2;
170 REG_TYPE r3;
173 r0 = x1[6] - x2[6]; x1[6] += x2[6];
174 r1 = x2[7] - x1[7]; x1[7] += x2[7];
175 r2 = x1[4] - x2[4]; x1[4] += x2[4];
176 r3 = x2[5] - x1[5]; x1[5] += x2[5];
177 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
178 XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
180 r0 = x1[2] - x2[2]; x1[2] += x2[2];
181 r1 = x2[3] - x1[3]; x1[3] += x2[3];
182 r2 = x1[0] - x2[0]; x1[0] += x2[0];
183 r3 = x2[1] - x1[1]; x1[1] += x2[1];
184 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
185 XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
187 x1-=8; x2-=8;
188 }while(T<sincos_lookup0+1024);
190 r0 = x1[6] - x2[6]; x1[6] += x2[6];
191 r1 = x1[7] - x2[7]; x1[7] += x2[7];
192 r2 = x1[4] - x2[4]; x1[4] += x2[4];
193 r3 = x1[5] - x2[5]; x1[5] += x2[5];
194 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
195 XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
197 r0 = x1[2] - x2[2]; x1[2] += x2[2];
198 r1 = x1[3] - x2[3]; x1[3] += x2[3];
199 r2 = x1[0] - x2[0]; x1[0] += x2[0];
200 r3 = x1[1] - x2[1]; x1[1] += x2[1];
201 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
202 XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
204 x1-=8; x2-=8;
205 }while(T>sincos_lookup0);
207 r0 = x2[6] - x1[6]; x1[6] += x2[6];
208 r1 = x2[7] - x1[7]; x1[7] += x2[7];
209 r2 = x2[4] - x1[4]; x1[4] += x2[4];
210 r3 = x2[5] - x1[5]; x1[5] += x2[5];
211 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
212 XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
214 r0 = x2[2] - x1[2]; x1[2] += x2[2];
215 r1 = x2[3] - x1[3]; x1[3] += x2[3];
216 r2 = x2[0] - x1[0]; x1[0] += x2[0];
217 r3 = x2[1] - x1[1]; x1[1] += x2[1];
218 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
219 XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
221 x1-=8; x2-=8;
222 }while(T<sincos_lookup0+1024);
224 r0 = x1[6] - x2[6]; x1[6] += x2[6];
225 r1 = x2[7] - x1[7]; x1[7] += x2[7];
226 r2 = x1[4] - x2[4]; x1[4] += x2[4];
227 r3 = x2[5] - x1[5]; x1[5] += x2[5];
228 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
229 XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
231 r0 = x1[2] - x2[2]; x1[2] += x2[2];
232 r1 = x2[3] - x1[3]; x1[3] += x2[3];
233 r2 = x1[0] - x2[0]; x1[0] += x2[0];
234 r3 = x2[1] - x1[1]; x1[1] += x2[1];
235 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
236 XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
238 x1-=8; x2-=8;
239 }while(T>sincos_lookup0);
242 #endif /* CPU_ARM */
244 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) {
246 int stages=8-shift;
247 int i,j;
249 for(i=0;--stages>0;i++){
250 for(j=0;j<(1<<i);j++)
251 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
254 for(j=0;j<points;j+=32)
255 mdct_butterfly_32(x+j);
259 static const unsigned char bitrev[16] ICONST_ATTR =
260 {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
262 STIN int bitrev12(int x){
263 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
266 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift) {
268 int bit = 0;
269 DATA_TYPE *w0 = x;
270 DATA_TYPE *w1 = x = w0+(n>>1);
271 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
272 LOOKUP_T *Ttop = T+1024;
273 REG_TYPE r2;
276 REG_TYPE r3 = bitrev12(bit++);
277 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
278 DATA_TYPE *x1 = x + (r3>>shift);
280 REG_TYPE r0 = x0[0] + x1[0];
281 REG_TYPE r1 = x1[1] - x0[1];
283 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
285 w1 -= 4;
287 r0 = (x0[1] + x1[1])>>1;
288 r1 = (x0[0] - x1[0])>>1;
289 w0[0] = r0 + r2;
290 w0[1] = r1 + r3;
291 w1[2] = r0 - r2;
292 w1[3] = r3 - r1;
294 r3 = bitrev12(bit++);
295 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
296 x1 = x + (r3>>shift);
298 r0 = x0[0] + x1[0];
299 r1 = x1[1] - x0[1];
301 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
303 r0 = (x0[1] + x1[1])>>1;
304 r1 = (x0[0] - x1[0])>>1;
305 w0[2] = r0 + r2;
306 w0[3] = r1 + r3;
307 w1[0] = r0 - r2;
308 w1[1] = r3 - r1;
310 w0 += 4;
311 }while(T<Ttop);
313 REG_TYPE r3 = bitrev12(bit++);
314 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
315 DATA_TYPE *x1 = x + (r3>>shift);
317 REG_TYPE r0 = x0[0] + x1[0];
318 REG_TYPE r1 = x1[1] - x0[1];
320 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
322 w1 -= 4;
324 r0 = (x0[1] + x1[1])>>1;
325 r1 = (x0[0] - x1[0])>>1;
326 w0[0] = r0 + r2;
327 w0[1] = r1 + r3;
328 w1[2] = r0 - r2;
329 w1[3] = r3 - r1;
331 r3 = bitrev12(bit++);
332 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
333 x1 = x + (r3>>shift);
335 r0 = x0[0] + x1[0];
336 r1 = x1[1] - x0[1];
338 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
340 r0 = (x0[1] + x1[1])>>1;
341 r1 = (x0[0] - x1[0])>>1;
342 w0[2] = r0 + r2;
343 w0[3] = r1 + r3;
344 w1[0] = r0 - r2;
345 w1[1] = r3 - r1;
347 w0 += 4;
348 }while(w0<w1);
352 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out)
353 ICODE_ATTR_TREMOR_MDCT;
354 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out) {
355 int n2=n>>1;
356 int n4=n>>2;
357 DATA_TYPE *iX;
358 DATA_TYPE *oX;
359 LOOKUP_T *T;
360 LOOKUP_T *V;
361 int shift;
362 int step;
364 for (shift=6;!(n&(1<<shift));shift++);
365 shift=13-shift;
366 step=2<<shift;
368 /* rotate */
370 iX = in+n2-7;
371 oX = out+n2+n4;
372 T = sincos_lookup0;
375 oX-=4;
376 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
377 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
378 iX-=8;
379 }while(iX>=in+n4);
381 oX-=4;
382 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
383 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
384 iX-=8;
385 }while(iX>=in);
387 iX = in+n2-8;
388 oX = out+n2+n4;
389 T = sincos_lookup0;
392 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
393 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
394 iX-=8;
395 oX+=4;
396 }while(iX>=in+n4);
398 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
399 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
400 iX-=8;
401 oX+=4;
402 }while(iX>=in);
404 mdct_butterflies(out+n2,n2,shift);
405 mdct_bitreverse(out,n,step,shift);
406 /* rotate + window */
408 step>>=2;
410 DATA_TYPE *oX1=out+n2+n4;
411 DATA_TYPE *oX2=out+n2+n4;
412 DATA_TYPE *iX =out;
414 switch(step) {
415 default: {
416 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
418 oX1-=4;
419 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
420 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
421 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
422 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
423 oX2+=4;
424 iX+=8;
425 }while(iX<oX1);
426 break;
429 case 1: {
430 /* linear interpolation between table values: offset=0.5, step=1 */
431 REG_TYPE t0,t1,v0,v1;
432 T = sincos_lookup0;
433 V = sincos_lookup1;
434 t0 = (*T++)>>1;
435 t1 = (*T++)>>1;
437 oX1-=4;
439 t0 += (v0 = (*V++)>>1);
440 t1 += (v1 = (*V++)>>1);
441 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
442 v0 += (t0 = (*T++)>>1);
443 v1 += (t1 = (*T++)>>1);
444 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
445 t0 += (v0 = (*V++)>>1);
446 t1 += (v1 = (*V++)>>1);
447 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
448 v0 += (t0 = (*T++)>>1);
449 v1 += (t1 = (*T++)>>1);
450 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
452 oX2+=4;
453 iX+=8;
454 }while(iX<oX1);
455 break;
458 case 0: {
459 /* linear interpolation between table values: offset=0.25, step=0.5 */
460 REG_TYPE t0,t1,v0,v1,q0,q1;
461 T = sincos_lookup0;
462 V = sincos_lookup1;
463 t0 = *T++;
464 t1 = *T++;
466 oX1-=4;
468 v0 = *V++;
469 v1 = *V++;
470 t0 += (q0 = (v0-t0)>>2);
471 t1 += (q1 = (v1-t1)>>2);
472 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
473 t0 = v0-q0;
474 t1 = v1-q1;
475 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
477 t0 = *T++;
478 t1 = *T++;
479 v0 += (q0 = (t0-v0)>>2);
480 v1 += (q1 = (t1-v1)>>2);
481 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
482 v0 = t0-q0;
483 v1 = t1-q1;
484 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
486 oX2+=4;
487 iX+=8;
488 }while(iX<oX1);
489 break;
493 iX=out+n2+n4;
494 oX1=out+n4;
495 oX2=oX1;
498 oX1-=4;
499 iX-=4;
501 oX2[0] = -(oX1[3] = iX[3]);
502 oX2[1] = -(oX1[2] = iX[2]);
503 oX2[2] = -(oX1[1] = iX[1]);
504 oX2[3] = -(oX1[0] = iX[0]);
506 oX2+=4;
507 }while(oX2<iX);
509 iX=out+n2+n4;
510 oX1=out+n2+n4;
511 oX2=out+n2;
514 oX1-=4;
515 oX1[0]= iX[3];
516 oX1[1]= iX[2];
517 oX1[2]= iX[1];
518 oX1[3]= iX[0];
519 iX+=4;
520 }while(oX1>oX2);