Add support for VDPAU video out, including hardware decoding.
[mplayer/glamo.git] / tremor / mdct.c
blobcc201b210672d935ec707e0471b019bf99eba46b
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
4 * *
5 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
6 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * *
9 * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
10 * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
11 * *
12 ********************************************************************
14 function: normalized modified discrete cosine transform
15 power of two length transform only [64 <= n ]
16 last mod: $Id$
18 Original algorithm adapted long ago from _The use of multirate filter
19 banks for coding of high quality digital audio_, by T. Sporer,
20 K. Brandenburg and B. Edler, collection of the European Signal
21 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22 211-214
24 The below code implements an algorithm that no longer looks much like
25 that presented in the paper, but the basic structure remains if you
26 dig deep enough to see it.
28 This module DOES NOT INCLUDE code to generate/apply the window
29 function. Everybody has their own weird favorite including me... I
30 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31 vehemently disagree.
33 ********************************************************************/
35 #include "ivorbiscodec.h"
36 #include "os.h"
37 #include "misc.h"
38 #include "mdct.h"
39 #include "mdct_lookup.h"
42 /* 8 point butterfly (in place) */
43 STIN void mdct_butterfly_8(DATA_TYPE *x){
45 REG_TYPE r0 = x[4] + x[0];
46 REG_TYPE r1 = x[4] - x[0];
47 REG_TYPE r2 = x[5] + x[1];
48 REG_TYPE r3 = x[5] - x[1];
49 REG_TYPE r4 = x[6] + x[2];
50 REG_TYPE r5 = x[6] - x[2];
51 REG_TYPE r6 = x[7] + x[3];
52 REG_TYPE r7 = x[7] - x[3];
54 x[0] = r5 + r3;
55 x[1] = r7 - r1;
56 x[2] = r5 - r3;
57 x[3] = r7 + r1;
58 x[4] = r4 - r0;
59 x[5] = r6 - r2;
60 x[6] = r4 + r0;
61 x[7] = r6 + r2;
62 MB();
65 /* 16 point butterfly (in place, 4 register) */
66 STIN void mdct_butterfly_16(DATA_TYPE *x){
68 REG_TYPE r0, r1;
70 r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
71 r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
72 x[ 0] = MULT31((r0 + r1) , cPI2_8);
73 x[ 1] = MULT31((r1 - r0) , cPI2_8);
74 MB();
76 r0 = x[10] - x[ 2]; x[10] += x[ 2];
77 r1 = x[ 3] - x[11]; x[11] += x[ 3];
78 x[ 2] = r1; x[ 3] = r0;
79 MB();
81 r0 = x[12] - x[ 4]; x[12] += x[ 4];
82 r1 = x[13] - x[ 5]; x[13] += x[ 5];
83 x[ 4] = MULT31((r0 - r1) , cPI2_8);
84 x[ 5] = MULT31((r0 + r1) , cPI2_8);
85 MB();
87 r0 = x[14] - x[ 6]; x[14] += x[ 6];
88 r1 = x[15] - x[ 7]; x[15] += x[ 7];
89 x[ 6] = r0; x[ 7] = r1;
90 MB();
92 mdct_butterfly_8(x);
93 mdct_butterfly_8(x+8);
96 /* 32 point butterfly (in place, 4 register) */
97 STIN void mdct_butterfly_32(DATA_TYPE *x){
99 REG_TYPE r0, r1;
101 r0 = x[30] - x[14]; x[30] += x[14];
102 r1 = x[31] - x[15]; x[31] += x[15];
103 x[14] = r0; x[15] = r1;
104 MB();
106 r0 = x[28] - x[12]; x[28] += x[12];
107 r1 = x[29] - x[13]; x[29] += x[13];
108 XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
109 MB();
111 r0 = x[26] - x[10]; x[26] += x[10];
112 r1 = x[27] - x[11]; x[27] += x[11];
113 x[10] = MULT31((r0 - r1) , cPI2_8);
114 x[11] = MULT31((r0 + r1) , cPI2_8);
115 MB();
117 r0 = x[24] - x[ 8]; x[24] += x[ 8];
118 r1 = x[25] - x[ 9]; x[25] += x[ 9];
119 XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
120 MB();
122 r0 = x[22] - x[ 6]; x[22] += x[ 6];
123 r1 = x[ 7] - x[23]; x[23] += x[ 7];
124 x[ 6] = r1; x[ 7] = r0;
125 MB();
127 r0 = x[ 4] - x[20]; x[20] += x[ 4];
128 r1 = x[ 5] - x[21]; x[21] += x[ 5];
129 XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
130 MB();
132 r0 = x[ 2] - x[18]; x[18] += x[ 2];
133 r1 = x[ 3] - x[19]; x[19] += x[ 3];
134 x[ 2] = MULT31((r1 + r0) , cPI2_8);
135 x[ 3] = MULT31((r1 - r0) , cPI2_8);
136 MB();
138 r0 = x[ 0] - x[16]; x[16] += x[ 0];
139 r1 = x[ 1] - x[17]; x[17] += x[ 1];
140 XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
141 MB();
143 mdct_butterfly_16(x);
144 mdct_butterfly_16(x+16);
147 /* N/stage point generic N stage butterfly (in place, 2 register) */
148 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
150 LOOKUP_T *T = sincos_lookup0;
151 DATA_TYPE *x1 = x + points - 8;
152 DATA_TYPE *x2 = x + (points>>1) - 8;
153 REG_TYPE r0;
154 REG_TYPE r1;
157 r0 = x1[6] - x2[6]; x1[6] += x2[6];
158 r1 = x2[7] - x1[7]; x1[7] += x2[7];
159 XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
161 r0 = x1[4] - x2[4]; x1[4] += x2[4];
162 r1 = x2[5] - x1[5]; x1[5] += x2[5];
163 XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
165 r0 = x1[2] - x2[2]; x1[2] += x2[2];
166 r1 = x2[3] - x1[3]; x1[3] += x2[3];
167 XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
169 r0 = x1[0] - x2[0]; x1[0] += x2[0];
170 r1 = x2[1] - x1[1]; x1[1] += x2[1];
171 XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
173 x1-=8; x2-=8;
174 }while(T<sincos_lookup0+1024);
176 r0 = x1[6] - x2[6]; x1[6] += x2[6];
177 r1 = x1[7] - x2[7]; x1[7] += x2[7];
178 XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
180 r0 = x1[4] - x2[4]; x1[4] += x2[4];
181 r1 = x1[5] - x2[5]; x1[5] += x2[5];
182 XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
184 r0 = x1[2] - x2[2]; x1[2] += x2[2];
185 r1 = x1[3] - x2[3]; x1[3] += x2[3];
186 XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
188 r0 = x1[0] - x2[0]; x1[0] += x2[0];
189 r1 = x1[1] - x2[1]; x1[1] += x2[1];
190 XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
192 x1-=8; x2-=8;
193 }while(T>sincos_lookup0);
195 r0 = x2[6] - x1[6]; x1[6] += x2[6];
196 r1 = x2[7] - x1[7]; x1[7] += x2[7];
197 XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
199 r0 = x2[4] - x1[4]; x1[4] += x2[4];
200 r1 = x2[5] - x1[5]; x1[5] += x2[5];
201 XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
203 r0 = x2[2] - x1[2]; x1[2] += x2[2];
204 r1 = x2[3] - x1[3]; x1[3] += x2[3];
205 XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
207 r0 = x2[0] - x1[0]; x1[0] += x2[0];
208 r1 = x2[1] - x1[1]; x1[1] += x2[1];
209 XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
211 x1-=8; x2-=8;
212 }while(T<sincos_lookup0+1024);
214 r0 = x1[6] - x2[6]; x1[6] += x2[6];
215 r1 = x2[7] - x1[7]; x1[7] += x2[7];
216 XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
218 r0 = x1[4] - x2[4]; x1[4] += x2[4];
219 r1 = x2[5] - x1[5]; x1[5] += x2[5];
220 XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
222 r0 = x1[2] - x2[2]; x1[2] += x2[2];
223 r1 = x2[3] - x1[3]; x1[3] += x2[3];
224 XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
226 r0 = x1[0] - x2[0]; x1[0] += x2[0];
227 r1 = x2[1] - x1[1]; x1[1] += x2[1];
228 XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
230 x1-=8; x2-=8;
231 }while(T>sincos_lookup0);
234 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
236 int stages=8-shift;
237 int i,j;
239 for(i=0;--stages>0;i++){
240 for(j=0;j<(1<<i);j++)
241 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
244 for(j=0;j<points;j+=32)
245 mdct_butterfly_32(x+j);
249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
251 STIN int bitrev12(int x){
252 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
257 int bit = 0;
258 DATA_TYPE *w0 = x;
259 DATA_TYPE *w1 = x = w0+(n>>1);
260 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
261 LOOKUP_T *Ttop = T+1024;
262 DATA_TYPE r2;
265 DATA_TYPE r3 = bitrev12(bit++);
266 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
267 DATA_TYPE *x1 = x + (r3>>shift);
269 REG_TYPE r0 = x0[0] + x1[0];
270 REG_TYPE r1 = x1[1] - x0[1];
272 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
274 w1 -= 4;
276 r0 = (x0[1] + x1[1])>>1;
277 r1 = (x0[0] - x1[0])>>1;
278 w0[0] = r0 + r2;
279 w0[1] = r1 + r3;
280 w1[2] = r0 - r2;
281 w1[3] = r3 - r1;
283 r3 = bitrev12(bit++);
284 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
285 x1 = x + (r3>>shift);
287 r0 = x0[0] + x1[0];
288 r1 = x1[1] - x0[1];
290 XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
292 r0 = (x0[1] + x1[1])>>1;
293 r1 = (x0[0] - x1[0])>>1;
294 w0[2] = r0 + r2;
295 w0[3] = r1 + r3;
296 w1[0] = r0 - r2;
297 w1[1] = r3 - r1;
299 w0 += 4;
300 }while(T<Ttop);
302 DATA_TYPE r3 = bitrev12(bit++);
303 DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
304 DATA_TYPE *x1 = x + (r3>>shift);
306 REG_TYPE r0 = x0[0] + x1[0];
307 REG_TYPE r1 = x1[1] - x0[1];
309 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
311 w1 -= 4;
313 r0 = (x0[1] + x1[1])>>1;
314 r1 = (x0[0] - x1[0])>>1;
315 w0[0] = r0 + r2;
316 w0[1] = r1 + r3;
317 w1[2] = r0 - r2;
318 w1[3] = r3 - r1;
320 r3 = bitrev12(bit++);
321 x0 = x + ((r3 ^ 0xfff)>>shift) -1;
322 x1 = x + (r3>>shift);
324 r0 = x0[0] + x1[0];
325 r1 = x1[1] - x0[1];
327 T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
329 r0 = (x0[1] + x1[1])>>1;
330 r1 = (x0[0] - x1[0])>>1;
331 w0[2] = r0 + r2;
332 w0[3] = r1 + r3;
333 w1[0] = r0 - r2;
334 w1[1] = r3 - r1;
336 w0 += 4;
337 }while(w0<w1);
340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
341 int n2=n>>1;
342 int n4=n>>2;
343 DATA_TYPE *iX;
344 DATA_TYPE *oX;
345 LOOKUP_T *T;
346 LOOKUP_T *V;
347 int shift;
348 int step;
350 for (shift=6;!(n&(1<<shift));shift++);
351 shift=13-shift;
352 step=2<<shift;
354 /* rotate */
356 iX = in+n2-7;
357 oX = out+n2+n4;
358 T = sincos_lookup0;
361 oX-=4;
362 XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
363 XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
364 iX-=8;
365 }while(iX>=in+n4);
367 oX-=4;
368 XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
369 XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
370 iX-=8;
371 }while(iX>=in);
373 iX = in+n2-8;
374 oX = out+n2+n4;
375 T = sincos_lookup0;
378 T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
379 T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
380 iX-=8;
381 oX+=4;
382 }while(iX>=in+n4);
384 T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
385 T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
386 iX-=8;
387 oX+=4;
388 }while(iX>=in);
390 mdct_butterflies(out+n2,n2,shift);
391 mdct_bitreverse(out,n,step,shift);
393 /* rotate + window */
395 step>>=2;
397 DATA_TYPE *oX1=out+n2+n4;
398 DATA_TYPE *oX2=out+n2+n4;
399 DATA_TYPE *iX =out;
401 switch(step) {
402 default: {
403 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
405 oX1-=4;
406 XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
407 XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
408 XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
409 XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
410 oX2+=4;
411 iX+=8;
412 }while(iX<oX1);
413 break;
416 case 1: {
417 /* linear interpolation between table values: offset=0.5, step=1 */
418 REG_TYPE t0,t1,v0,v1;
419 T = sincos_lookup0;
420 V = sincos_lookup1;
421 t0 = (*T++)>>1;
422 t1 = (*T++)>>1;
424 oX1-=4;
426 t0 += (v0 = (*V++)>>1);
427 t1 += (v1 = (*V++)>>1);
428 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
429 v0 += (t0 = (*T++)>>1);
430 v1 += (t1 = (*T++)>>1);
431 XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
432 t0 += (v0 = (*V++)>>1);
433 t1 += (v1 = (*V++)>>1);
434 XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
435 v0 += (t0 = (*T++)>>1);
436 v1 += (t1 = (*T++)>>1);
437 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
439 oX2+=4;
440 iX+=8;
441 }while(iX<oX1);
442 break;
445 case 0: {
446 /* linear interpolation between table values: offset=0.25, step=0.5 */
447 REG_TYPE t0,t1,v0,v1,q0,q1;
448 T = sincos_lookup0;
449 V = sincos_lookup1;
450 t0 = *T++;
451 t1 = *T++;
453 oX1-=4;
455 v0 = *V++;
456 v1 = *V++;
457 t0 += (q0 = (v0-t0)>>2);
458 t1 += (q1 = (v1-t1)>>2);
459 XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
460 t0 = v0-q0;
461 t1 = v1-q1;
462 XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
464 t0 = *T++;
465 t1 = *T++;
466 v0 += (q0 = (t0-v0)>>2);
467 v1 += (q1 = (t1-v1)>>2);
468 XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
469 v0 = t0-q0;
470 v1 = t1-q1;
471 XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
473 oX2+=4;
474 iX+=8;
475 }while(iX<oX1);
476 break;
480 iX=out+n2+n4;
481 oX1=out+n4;
482 oX2=oX1;
485 oX1-=4;
486 iX-=4;
488 oX2[0] = -(oX1[3] = iX[3]);
489 oX2[1] = -(oX1[2] = iX[2]);
490 oX2[2] = -(oX1[1] = iX[1]);
491 oX2[3] = -(oX1[0] = iX[0]);
493 oX2+=4;
494 }while(oX2<iX);
496 iX=out+n2+n4;
497 oX1=out+n2+n4;
498 oX2=out+n2;
501 oX1-=4;
502 oX1[0]= iX[3];
503 oX1[1]= iX[2];
504 oX1[2]= iX[1];
505 oX1[3]= iX[0];
506 iX+=4;
507 }while(oX1>oX2);