rpm: Remove MEncoder from rpm packaging
[mplayer/glamo.git] / mp3lib / dct36.c
blob22ec2732777f955c2e318f9b3db089e76c3e5579
1 /*
2 * Modified for use with MPlayer, for details see the changelog at
3 * http://svn.mplayerhq.hu/mplayer/trunk/
4 * $Id$
5 */
7 /*
8 // This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
9 // Saved one multiplication by doing the 'twiddle factor' stuff
10 // together with the window mul. (MH)
12 // This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
13 // 9 point IDCT needs to be reduced further. Unfortunately, I don't
14 // know how to do that, because 9 is not an even number. - Jeff.
16 //////////////////////////////////////////////////////////////////
18 // 9 Point Inverse Discrete Cosine Transform
20 // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
21 // by anybody. The algorithm itself is of course in the public domain.
23 // Again derived heuristically from the 9-point WFTA.
25 // The algorithm is optimized (?) for speed, not for small rounding errors or
26 // good readability.
28 // 36 additions, 11 multiplications
30 // Again this is very likely sub-optimal.
32 // The code is optimized to use a minimum number of temporary variables,
33 // so it should compile quite well even on 8-register Intel x86 processors.
34 // This makes the code quite obfuscated and very difficult to understand.
36 // References:
37 // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
38 // Mathematics of Computation, Volume 32, Number 141, January 1978,
39 // Pages 175-199
42 /*------------------------------------------------------------------*/
43 /* */
44 /* Function: Calculation of the inverse MDCT */
45 /* */
46 /*------------------------------------------------------------------*/
48 static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
50 #ifdef NEW_DCT9
51 real tmp[18];
52 #endif
55 register real *in = inbuf;
57 in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
58 in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
59 in[11]+=in[10]; in[10]+=in[9]; in[9] +=in[8];
60 in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5];
61 in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2];
62 in[2] +=in[1]; in[1] +=in[0];
64 in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
65 in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1];
68 #ifdef NEW_DCT9
70 real t0, t1, t2, t3, t4, t5, t6, t7;
72 t1 = COS6_2 * in[12];
73 t2 = COS6_2 * (in[8] + in[16] - in[4]);
75 t3 = in[0] + t1;
76 t4 = in[0] - t1 - t1;
77 t5 = t4 - t2;
79 t0 = cos9[0] * (in[4] + in[8]);
80 t1 = cos9[1] * (in[8] - in[16]);
82 tmp[4] = t4 + t2 + t2;
83 t2 = cos9[2] * (in[4] + in[16]);
85 t6 = t3 - t0 - t2;
86 t0 += t3 + t1;
87 t3 += t2 - t1;
89 t2 = cos18[0] * (in[2] + in[10]);
90 t4 = cos18[1] * (in[10] - in[14]);
91 t7 = COS6_1 * in[6];
93 t1 = t2 + t4 + t7;
94 tmp[0] = t0 + t1;
95 tmp[8] = t0 - t1;
96 t1 = cos18[2] * (in[2] + in[14]);
97 t2 += t1 - t7;
99 tmp[3] = t3 + t2;
100 t0 = COS6_1 * (in[10] + in[14] - in[2]);
101 tmp[5] = t3 - t2;
103 t4 -= t1 + t7;
105 tmp[1] = t5 - t0;
106 tmp[7] = t5 + t0;
107 tmp[2] = t6 + t4;
108 tmp[6] = t6 - t4;
112 real t0, t1, t2, t3, t4, t5, t6, t7;
114 t1 = COS6_2 * in[13];
115 t2 = COS6_2 * (in[9] + in[17] - in[5]);
117 t3 = in[1] + t1;
118 t4 = in[1] - t1 - t1;
119 t5 = t4 - t2;
121 t0 = cos9[0] * (in[5] + in[9]);
122 t1 = cos9[1] * (in[9] - in[17]);
124 tmp[13] = (t4 + t2 + t2) * tfcos36[17-13];
125 t2 = cos9[2] * (in[5] + in[17]);
127 t6 = t3 - t0 - t2;
128 t0 += t3 + t1;
129 t3 += t2 - t1;
131 t2 = cos18[0] * (in[3] + in[11]);
132 t4 = cos18[1] * (in[11] - in[15]);
133 t7 = COS6_1 * in[7];
135 t1 = t2 + t4 + t7;
136 tmp[17] = (t0 + t1) * tfcos36[17-17];
137 tmp[9] = (t0 - t1) * tfcos36[17-9];
138 t1 = cos18[2] * (in[3] + in[15]);
139 t2 += t1 - t7;
141 tmp[14] = (t3 + t2) * tfcos36[17-14];
142 t0 = COS6_1 * (in[11] + in[15] - in[3]);
143 tmp[12] = (t3 - t2) * tfcos36[17-12];
145 t4 -= t1 + t7;
147 tmp[16] = (t5 - t0) * tfcos36[17-16];
148 tmp[10] = (t5 + t0) * tfcos36[17-10];
149 tmp[15] = (t6 + t4) * tfcos36[17-15];
150 tmp[11] = (t6 - t4) * tfcos36[17-11];
153 #define MACRO(v) { \
154 real tmpval; \
155 real sum0 = tmp[(v)]; \
156 real sum1 = tmp[17-(v)]; \
157 out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
158 out2[8-(v)] = tmpval * w[26-(v)]; \
159 sum0 -= sum1; \
160 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
161 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
164 register real *out2 = o2;
165 register real *w = wintab;
166 register real *out1 = o1;
167 register real *ts = tsbuf;
169 MACRO(0);
170 MACRO(1);
171 MACRO(2);
172 MACRO(3);
173 MACRO(4);
174 MACRO(5);
175 MACRO(6);
176 MACRO(7);
177 MACRO(8);
180 #else
184 #define MACRO0(v) { \
185 real tmp; \
186 out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
187 out2[8-(v)] = tmp * w[26-(v)]; } \
188 sum0 -= sum1; \
189 ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
190 ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
191 #define MACRO1(v) { \
192 real sum0, sum1; \
193 sum0 = tmp1a + tmp2a; \
194 sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
195 MACRO0(v); }
196 #define MACRO2(v) { \
197 real sum0, sum1; \
198 sum0 = tmp2a - tmp1a; \
199 sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
200 MACRO0(v); }
202 register const real *c = COS9;
203 register real *out2 = o2;
204 register real *w = wintab;
205 register real *out1 = o1;
206 register real *ts = tsbuf;
208 real ta33,ta66,tb33,tb66;
210 ta33 = in[2*3+0] * c[3];
211 ta66 = in[2*6+0] * c[6];
212 tb33 = in[2*3+1] * c[3];
213 tb66 = in[2*6+1] * c[6];
216 real tmp1a,tmp2a,tmp1b,tmp2b;
217 tmp1a = in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7];
218 tmp1b = in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7];
219 tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8];
220 tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8];
222 MACRO1(0);
223 MACRO2(8);
227 real tmp1a,tmp2a,tmp1b,tmp2b;
228 tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3];
229 tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3];
230 tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0];
231 tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1];
233 MACRO1(1);
234 MACRO2(7);
238 real tmp1a,tmp2a,tmp1b,tmp2b;
239 tmp1a = in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1];
240 tmp1b = in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1];
241 tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4];
242 tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4];
244 MACRO1(2);
245 MACRO2(6);
249 real tmp1a,tmp2a,tmp1b,tmp2b;
250 tmp1a = in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5];
251 tmp1b = in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5];
252 tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2];
253 tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2];
255 MACRO1(3);
256 MACRO2(5);
260 real sum0,sum1;
261 sum0 = in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
262 sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
263 MACRO0(4);
266 #endif