Update of Czech language - FS#11371 by Marek Salaba
[kugel-rb.git] / apps / codecs / lib / mdct2.c
blobba8b5ca6bee08c3a8a882d2dc67bb0071f15c578
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
33 ********************************************************************/
35 /*Tremor IMDCT adapted for use with libwmai*/
38 #include "mdct2.h"
39 #include "mdct_lookup.h"
40 #ifdef ROCKBOX
41 #include <codecs/lib/codeclib.h>
42 #endif /* ROCKBOX */
44 #if defined(CPU_ARM)
46 extern void mdct_butterfly_32(int32_t *x);
47 extern void mdct_butterfly_generic_loop(int32_t *x1, int32_t *x2,
48 const int32_t *T0, int step,
49 const int32_t *Ttop);
51 static inline void mdct_butterfly_generic(int32_t *x,int points, int step){
52 mdct_butterfly_generic_loop(x + points, x + (points>>1), sincos_lookup0, step, sincos_lookup0+1024);
55 #else
57 /* 8 point butterfly (in place) */
58 static inline void mdct_butterfly_8(int32_t *x){
59 register int32_t r0 = x[4] + x[0];
60 register int32_t r1 = x[4] - x[0];
61 register int32_t r2 = x[5] + x[1];
62 register int32_t r3 = x[5] - x[1];
63 register int32_t r4 = x[6] + x[2];
64 register int32_t r5 = x[6] - x[2];
65 register int32_t r6 = x[7] + x[3];
66 register int32_t r7 = x[7] - x[3];
68 x[0] = r5 + r3;
69 x[1] = r7 - r1;
70 x[2] = r5 - r3;
71 x[3] = r7 + r1;
72 x[4] = r4 - r0;
73 x[5] = r6 - r2;
74 x[6] = r4 + r0;
75 x[7] = r6 + r2;
78 /* 16 point butterfly (in place, 4 register) */
79 static inline void mdct_butterfly_16(int32_t *x){
81 register int32_t r0, r1;
83 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
84 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
85 x[ 0] = MULT31((r0 + r1) , cPI2_8);
86 x[ 1] = MULT31((r1 - r0) , cPI2_8);
88 r0 = x[10] - x[ 2]; x[10] += x[ 2];
89 r1 = x[ 3] - x[11]; x[11] += x[ 3];
90 x[ 2] = r1; x[ 3] = r0;
92 r0 = x[12] - x[ 4]; x[12] += x[ 4];
93 r1 = x[13] - x[ 5]; x[13] += x[ 5];
94 x[ 4] = MULT31((r0 - r1) , cPI2_8);
95 x[ 5] = MULT31((r0 + r1) , cPI2_8);
97 r0 = x[14] - x[ 6]; x[14] += x[ 6];
98 r1 = x[15] - x[ 7]; x[15] += x[ 7];
99 x[ 6] = r0; x[ 7] = r1;
101 mdct_butterfly_8(x);
102 mdct_butterfly_8(x+8);
105 /* 32 point butterfly (in place, 4 register) */
106 static inline void mdct_butterfly_32(int32_t *x){
108 register int32_t r0, r1;
110 r0 = x[30] - x[14]; x[30] += x[14];
111 r1 = x[31] - x[15]; x[31] += x[15];
112 x[14] = r0; x[15] = r1;
114 r0 = x[28] - x[12]; x[28] += x[12];
115 r1 = x[29] - x[13]; x[29] += x[13];
116 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
118 r0 = x[26] - x[10]; x[26] += x[10];
119 r1 = x[27] - x[11]; x[27] += x[11];
120 x[10] = MULT31((r0 - r1) , cPI2_8);
121 x[11] = MULT31((r0 + r1) , cPI2_8);
123 r0 = x[24] - x[ 8]; x[24] += x[ 8];
124 r1 = x[25] - x[ 9]; x[25] += x[ 9];
125 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
127 r0 = x[22] - x[ 6]; x[22] += x[ 6];
128 r1 = x[ 7] - x[23]; x[23] += x[ 7];
129 x[ 6] = r1; x[ 7] = r0;
131 r0 = x[ 4] - x[20]; x[20] += x[ 4];
132 r1 = x[ 5] - x[21]; x[21] += x[ 5];
133 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
135 r0 = x[ 2] - x[18]; x[18] += x[ 2];
136 r1 = x[ 3] - x[19]; x[19] += x[ 3];
137 x[ 2] = MULT31((r1 + r0) , cPI2_8);
138 x[ 3] = MULT31((r1 - r0) , cPI2_8);
140 r0 = x[ 0] - x[16]; x[16] += x[ 0];
141 r1 = x[ 1] - x[17]; x[17] += x[ 1];
142 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
144 mdct_butterfly_16(x);
145 mdct_butterfly_16(x+16);
148 /* N/stage point generic N stage butterfly (in place, 4 register) */
149 void mdct_butterfly_generic(int32_t *x,int points, int step)
150 ICODE_ATTR_TREMOR_MDCT;
151 void mdct_butterfly_generic(int32_t *x,int points, int step){
152 const int32_t *T = sincos_lookup0;
153 int32_t *x1 = x + points - 8;
154 int32_t *x2 = x + (points>>1) - 8;
155 register int32_t r0;
156 register int32_t r1;
157 register int32_t r2;
158 register int32_t r3;
161 r0 = x1[6] - x2[6]; x1[6] += x2[6];
162 r1 = x2[7] - x1[7]; x1[7] += x2[7];
163 r2 = x1[4] - x2[4]; x1[4] += x2[4];
164 r3 = x2[5] - x1[5]; x1[5] += x2[5];
165 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
166 XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
168 r0 = x1[2] - x2[2]; x1[2] += x2[2];
169 r1 = x2[3] - x1[3]; x1[3] += x2[3];
170 r2 = x1[0] - x2[0]; x1[0] += x2[0];
171 r3 = x2[1] - x1[1]; x1[1] += x2[1];
172 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
173 XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
175 x1-=8; x2-=8;
176 }while(T<sincos_lookup0+1024);
178 r0 = x1[6] - x2[6]; x1[6] += x2[6];
179 r1 = x1[7] - x2[7]; x1[7] += x2[7];
180 r2 = x1[4] - x2[4]; x1[4] += x2[4];
181 r3 = x1[5] - x2[5]; x1[5] += x2[5];
182 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
183 XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
185 r0 = x1[2] - x2[2]; x1[2] += x2[2];
186 r1 = x1[3] - x2[3]; x1[3] += x2[3];
187 r2 = x1[0] - x2[0]; x1[0] += x2[0];
188 r3 = x1[1] - x2[1]; x1[1] += x2[1];
189 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
190 XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
192 x1-=8; x2-=8;
193 }while(T>sincos_lookup0);
195 r0 = x2[6] - x1[6]; x1[6] += x2[6];
196 r1 = x2[7] - x1[7]; x1[7] += x2[7];
197 r2 = x2[4] - x1[4]; x1[4] += x2[4];
198 r3 = x2[5] - x1[5]; x1[5] += x2[5];
199 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
200 XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
202 r0 = x2[2] - x1[2]; x1[2] += x2[2];
203 r1 = x2[3] - x1[3]; x1[3] += x2[3];
204 r2 = x2[0] - x1[0]; x1[0] += x2[0];
205 r3 = x2[1] - x1[1]; x1[1] += x2[1];
206 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
207 XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
209 x1-=8; x2-=8;
210 }while(T<sincos_lookup0+1024);
212 r0 = x1[6] - x2[6]; x1[6] += x2[6];
213 r1 = x2[7] - x1[7]; x1[7] += x2[7];
214 r2 = x1[4] - x2[4]; x1[4] += x2[4];
215 r3 = x2[5] - x1[5]; x1[5] += x2[5];
216 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
217 XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
219 r0 = x1[2] - x2[2]; x1[2] += x2[2];
220 r1 = x2[3] - x1[3]; x1[3] += x2[3];
221 r2 = x1[0] - x2[0]; x1[0] += x2[0];
222 r3 = x2[1] - x1[1]; x1[1] += x2[1];
223 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
224 XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
226 x1-=8; x2-=8;
227 }while(T>sincos_lookup0);
230 #endif /* CPU_ARM */
232 static inline void mdct_butterflies(int32_t *x,int points,int shift) {
234 int stages=8-shift;
235 int i,j;
237 for(i=0;--stages>0;i++){
238 for(j=0;j<(1<<i);j++)
239 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
242 for(j=0;j<points;j+=32)
243 mdct_butterfly_32(x+j);
246 static const unsigned char bitrev[] ICONST_ATTR =
248 0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
249 2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
250 1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
251 3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63
254 static inline int bitrev12(int x){
255 return bitrev[x>>6]|((bitrev[x&0x03f])<<6);
258 static inline void mdct_bitreverse(int32_t *x,int n,int step,int shift) {
260 int bit = 0;
261 int32_t *w0 = x;
262 int32_t *w1 = x = w0+(n>>1);
263 const int32_t *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
264 const int32_t *Ttop = T+1024;
265 register int32_t r2;
268 register int32_t r3 = bitrev12(bit++);
269 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
270 int32_t *x1 = x + (r3>>shift);
272 register int32_t r0 = x0[0] + x1[0];
273 register int32_t r1 = x1[1] - x0[1];
275 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
277 w1 -= 4;
279 r0 = (x0[1] + x1[1])>>1;
280 r1 = (x0[0] - x1[0])>>1;
281 w0[0] = r0 + r2;
282 w0[1] = r1 + r3;
283 w1[2] = r0 - r2;
284 w1[3] = r3 - r1;
286 r3 = bitrev12(bit++);
287 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
288 x1 = x + (r3>>shift);
290 r0 = x0[0] + x1[0];
291 r1 = x1[1] - x0[1];
293 XPROD32( r0, r1, T[1], T[0], r2, r3 ); T+=step;
295 r0 = (x0[1] + x1[1])>>1;
296 r1 = (x0[0] - x1[0])>>1;
297 w0[2] = r0 + r2;
298 w0[3] = r1 + r3;
299 w1[0] = r0 - r2;
300 w1[1] = r3 - r1;
302 w0 += 4;
303 }while(T<Ttop);
305 register int32_t r3 = bitrev12(bit++);
306 int32_t *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
307 int32_t *x1 = x + (r3>>shift);
309 register int32_t r0 = x0[0] + x1[0];
310 register int32_t r1 = x1[1] - x0[1];
312 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
314 w1 -= 4;
316 r0 = (x0[1] + x1[1])>>1;
317 r1 = (x0[0] - x1[0])>>1;
318 w0[0] = r0 + r2;
319 w0[1] = r1 + r3;
320 w1[2] = r0 - r2;
321 w1[3] = r3 - r1;
323 r3 = bitrev12(bit++);
324 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
325 x1 = x + (r3>>shift);
327 r0 = x0[0] + x1[0];
328 r1 = x1[1] - x0[1];
330 T-=step; XPROD32( r0, r1, T[0], T[1], r2, r3 );
332 r0 = (x0[1] + x1[1])>>1;
333 r1 = (x0[0] - x1[0])>>1;
334 w0[2] = r0 + r2;
335 w0[3] = r1 + r3;
336 w1[0] = r0 - r2;
337 w1[1] = r3 - r1;
339 w0 += 4;
340 }while(w0<w1);
344 void mdct_backward(int n, int32_t *in, int32_t *out)
345 ICODE_ATTR_TREMOR_MDCT;
346 void mdct_backward(int n, int32_t *in, int32_t *out) {
347 int n2=n>>1;
348 int n4=n>>2;
349 int32_t *iX;
350 int32_t *oX;
351 const int32_t *T;
352 const int32_t *V;
353 int shift;
354 int step;
355 for (shift=6;!(n&(1<<shift));shift++);
356 shift=13-shift;
357 step=2<<shift;
359 /* rotate */
361 iX = in+n2-7;
362 oX = out+n2+n4;
363 T = sincos_lookup0;
366 oX-=4;
367 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
368 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
369 iX-=8;
370 }while(iX>=in+n4);
372 oX-=4;
373 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
374 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
375 iX-=8;
376 }while(iX>=in);
378 iX = in+n2-8;
379 oX = out+n2+n4;
380 T = sincos_lookup0;
383 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
384 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
385 iX-=8;
386 oX+=4;
387 }while(iX>=in+n4);
389 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
390 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
391 iX-=8;
392 oX+=4;
393 }while(iX>=in);
395 mdct_butterflies(out+n2,n2,shift);
396 mdct_bitreverse(out,n,step,shift);
397 /* rotate + window */
399 step>>=2;
401 int32_t *oX1=out+n2+n4;
402 int32_t *oX2=out+n2+n4;
403 int32_t *iX =out;
405 switch(step) {
406 default: {
407 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
409 oX1-=4;
410 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
411 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
412 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
413 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
414 oX2+=4;
415 iX+=8;
416 }while(iX<oX1);
417 break;
420 case 1: {
421 /* linear interpolation between table values: offset=0.5, step=1 */
422 register int32_t t0,t1,v0,v1;
423 T = sincos_lookup0;
424 V = sincos_lookup1;
425 t0 = (*T++)>>1;
426 t1 = (*T++)>>1;
428 oX1-=4;
430 t0 += (v0 = (*V++)>>1);
431 t1 += (v1 = (*V++)>>1);
432 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
433 v0 += (t0 = (*T++)>>1);
434 v1 += (t1 = (*T++)>>1);
435 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
436 t0 += (v0 = (*V++)>>1);
437 t1 += (v1 = (*V++)>>1);
438 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
439 v0 += (t0 = (*T++)>>1);
440 v1 += (t1 = (*T++)>>1);
441 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
443 oX2+=4;
444 iX+=8;
445 }while(iX<oX1);
446 break;
449 case 0: {
450 /* linear interpolation between table values: offset=0.25, step=0.5 */
451 register int32_t t0,t1,v0,v1,q0,q1;
452 T = sincos_lookup0;
453 V = sincos_lookup1;
454 t0 = *T++;
455 t1 = *T++;
457 oX1-=4;
459 v0 = *V++;
460 v1 = *V++;
461 t0 += (q0 = (v0-t0)>>2);
462 t1 += (q1 = (v1-t1)>>2);
463 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
464 t0 = v0-q0;
465 t1 = v1-q1;
466 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
468 t0 = *T++;
469 t1 = *T++;
470 v0 += (q0 = (t0-v0)>>2);
471 v1 += (q1 = (t1-v1)>>2);
472 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
473 v0 = t0-q0;
474 v1 = t1-q1;
475 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
477 oX2+=4;
478 iX+=8;
479 }while(iX<oX1);
480 break;
484 iX=out+n2+n4;
485 oX1=out+n4;
486 oX2=oX1;
489 oX1-=4;
490 iX-=4;
492 oX2[0] = -(oX1[3] = iX[3]);
493 oX2[1] = -(oX1[2] = iX[2]);
494 oX2[2] = -(oX1[1] = iX[1]);
495 oX2[3] = -(oX1[0] = iX[0]);
497 oX2+=4;
498 }while(oX2<iX);
500 iX=out+n2+n4;
501 oX1=out+n2+n4;
502 oX2=out+n2;
505 oX1-=4;
506 oX1[0]= iX[3];
507 oX1[1]= iX[2];
508 oX1[2]= iX[1];
509 oX1[3]= iX[0];
510 iX+=4;
511 }while(oX1>oX2);