2 * Modified for use with MPlayer, for details see the changelog at
3 * http://svn.mplayerhq.hu/mplayer/trunk/
8 // This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
9 // Saved one multiplication by doing the 'twiddle factor' stuff
10 // together with the window mul. (MH)
12 // This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
13 // 9 point IDCT needs to be reduced further. Unfortunately, I don't
14 // know how to do that, because 9 is not an even number. - Jeff.
16 //////////////////////////////////////////////////////////////////
18 // 9 Point Inverse Discrete Cosine Transform
20 // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
21 // by anybody. The algorithm itself is of course in the public domain.
23 // Again derived heuristically from the 9-point WFTA.
25 // The algorithm is optimized (?) for speed, not for small rounding errors or
28 // 36 additions, 11 multiplications
30 // Again this is very likely sub-optimal.
32 // The code is optimized to use a minimum number of temporary variables,
33 // so it should compile quite well even on 8-register Intel x86 processors.
34 // This makes the code quite obfuscated and very difficult to understand.
37 // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
38 // Mathematics of Computation, Volume 32, Number 141, January 1978,
42 /*------------------------------------------------------------------*/
44 /* Function: Calculation of the inverse MDCT */
46 /*------------------------------------------------------------------*/
48 static void dct36(real
*inbuf
,real
*o1
,real
*o2
,real
*wintab
,real
*tsbuf
)
55 register real
*in
= inbuf
;
57 in
[17]+=in
[16]; in
[16]+=in
[15]; in
[15]+=in
[14];
58 in
[14]+=in
[13]; in
[13]+=in
[12]; in
[12]+=in
[11];
59 in
[11]+=in
[10]; in
[10]+=in
[9]; in
[9] +=in
[8];
60 in
[8] +=in
[7]; in
[7] +=in
[6]; in
[6] +=in
[5];
61 in
[5] +=in
[4]; in
[4] +=in
[3]; in
[3] +=in
[2];
62 in
[2] +=in
[1]; in
[1] +=in
[0];
64 in
[17]+=in
[15]; in
[15]+=in
[13]; in
[13]+=in
[11]; in
[11]+=in
[9];
65 in
[9] +=in
[7]; in
[7] +=in
[5]; in
[5] +=in
[3]; in
[3] +=in
[1];
70 real t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
73 t2
= COS6_2
* (in
[8] + in
[16] - in
[4]);
79 t0
= cos9
[0] * (in
[4] + in
[8]);
80 t1
= cos9
[1] * (in
[8] - in
[16]);
82 tmp
[4] = t4
+ t2
+ t2
;
83 t2
= cos9
[2] * (in
[4] + in
[16]);
89 t2
= cos18
[0] * (in
[2] + in
[10]);
90 t4
= cos18
[1] * (in
[10] - in
[14]);
96 t1
= cos18
[2] * (in
[2] + in
[14]);
100 t0
= COS6_1
* (in
[10] + in
[14] - in
[2]);
112 real t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
;
114 t1
= COS6_2
* in
[13];
115 t2
= COS6_2
* (in
[9] + in
[17] - in
[5]);
118 t4
= in
[1] - t1
- t1
;
121 t0
= cos9
[0] * (in
[5] + in
[9]);
122 t1
= cos9
[1] * (in
[9] - in
[17]);
124 tmp
[13] = (t4
+ t2
+ t2
) * tfcos36
[17-13];
125 t2
= cos9
[2] * (in
[5] + in
[17]);
131 t2
= cos18
[0] * (in
[3] + in
[11]);
132 t4
= cos18
[1] * (in
[11] - in
[15]);
136 tmp
[17] = (t0
+ t1
) * tfcos36
[17-17];
137 tmp
[9] = (t0
- t1
) * tfcos36
[17-9];
138 t1
= cos18
[2] * (in
[3] + in
[15]);
141 tmp
[14] = (t3
+ t2
) * tfcos36
[17-14];
142 t0
= COS6_1
* (in
[11] + in
[15] - in
[3]);
143 tmp
[12] = (t3
- t2
) * tfcos36
[17-12];
147 tmp
[16] = (t5
- t0
) * tfcos36
[17-16];
148 tmp
[10] = (t5
+ t0
) * tfcos36
[17-10];
149 tmp
[15] = (t6
+ t4
) * tfcos36
[17-15];
150 tmp
[11] = (t6
- t4
) * tfcos36
[17-11];
155 real sum0 = tmp[(v)]; \
156 real sum1 = tmp[17-(v)]; \
157 out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
158 out2[8-(v)] = tmpval * w[26-(v)]; \
160 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
161 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
164 register real
*out2
= o2
;
165 register real
*w
= wintab
;
166 register real
*out1
= o1
;
167 register real
*ts
= tsbuf
;
184 #define MACRO0(v) { \
186 out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
187 out2[8-(v)] = tmp * w[26-(v)]; } \
189 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
190 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
191 #define MACRO1(v) { \
193 sum0 = tmp1a + tmp2a; \
194 sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
196 #define MACRO2(v) { \
198 sum0 = tmp2a - tmp1a; \
199 sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
202 register const real
*c
= COS9
;
203 register real
*out2
= o2
;
204 register real
*w
= wintab
;
205 register real
*out1
= o1
;
206 register real
*ts
= tsbuf
;
208 real ta33
,ta66
,tb33
,tb66
;
210 ta33
= in
[2*3+0] * c
[3];
211 ta66
= in
[2*6+0] * c
[6];
212 tb33
= in
[2*3+1] * c
[3];
213 tb66
= in
[2*6+1] * c
[6];
216 real tmp1a
,tmp2a
,tmp1b
,tmp2b
;
217 tmp1a
= in
[2*1+0] * c
[1] + ta33
+ in
[2*5+0] * c
[5] + in
[2*7+0] * c
[7];
218 tmp1b
= in
[2*1+1] * c
[1] + tb33
+ in
[2*5+1] * c
[5] + in
[2*7+1] * c
[7];
219 tmp2a
= in
[2*0+0] + in
[2*2+0] * c
[2] + in
[2*4+0] * c
[4] + ta66
+ in
[2*8+0] * c
[8];
220 tmp2b
= in
[2*0+1] + in
[2*2+1] * c
[2] + in
[2*4+1] * c
[4] + tb66
+ in
[2*8+1] * c
[8];
227 real tmp1a
,tmp2a
,tmp1b
,tmp2b
;
228 tmp1a
= ( in
[2*1+0] - in
[2*5+0] - in
[2*7+0] ) * c
[3];
229 tmp1b
= ( in
[2*1+1] - in
[2*5+1] - in
[2*7+1] ) * c
[3];
230 tmp2a
= ( in
[2*2+0] - in
[2*4+0] - in
[2*8+0] ) * c
[6] - in
[2*6+0] + in
[2*0+0];
231 tmp2b
= ( in
[2*2+1] - in
[2*4+1] - in
[2*8+1] ) * c
[6] - in
[2*6+1] + in
[2*0+1];
238 real tmp1a
,tmp2a
,tmp1b
,tmp2b
;
239 tmp1a
= in
[2*1+0] * c
[5] - ta33
- in
[2*5+0] * c
[7] + in
[2*7+0] * c
[1];
240 tmp1b
= in
[2*1+1] * c
[5] - tb33
- in
[2*5+1] * c
[7] + in
[2*7+1] * c
[1];
241 tmp2a
= in
[2*0+0] - in
[2*2+0] * c
[8] - in
[2*4+0] * c
[2] + ta66
+ in
[2*8+0] * c
[4];
242 tmp2b
= in
[2*0+1] - in
[2*2+1] * c
[8] - in
[2*4+1] * c
[2] + tb66
+ in
[2*8+1] * c
[4];
249 real tmp1a
,tmp2a
,tmp1b
,tmp2b
;
250 tmp1a
= in
[2*1+0] * c
[7] - ta33
+ in
[2*5+0] * c
[1] - in
[2*7+0] * c
[5];
251 tmp1b
= in
[2*1+1] * c
[7] - tb33
+ in
[2*5+1] * c
[1] - in
[2*7+1] * c
[5];
252 tmp2a
= in
[2*0+0] - in
[2*2+0] * c
[4] + in
[2*4+0] * c
[8] + ta66
- in
[2*8+0] * c
[2];
253 tmp2b
= in
[2*0+1] - in
[2*2+1] * c
[4] + in
[2*4+1] * c
[8] + tb66
- in
[2*8+1] * c
[2];
261 sum0
= in
[2*0+0] - in
[2*2+0] + in
[2*4+0] - in
[2*6+0] + in
[2*8+0];
262 sum1
= (in
[2*0+1] - in
[2*2+1] + in
[2*4+1] - in
[2*6+1] + in
[2*8+1] ) * tfcos36
[4];