mp3lib/dct36.c

   1 /*
   2  * Modified for use with MPlayer, for details see the CVS changelog at
   3  * http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/
   4  * $Id$
   5  */
   6
   7 /*
   8 // This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
   9 // Saved one multiplication by doing the 'twiddle factor' stuff
  10 // together with the window mul. (MH)
  11 //
  12 // This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
  13 // 9 point IDCT needs to be reduced further. Unfortunately, I don't
  14 // know how to do that, because 9 is not an even number. - Jeff.
  15 //
  16 //////////////////////////////////////////////////////////////////
  17 //
  18 // 9 Point Inverse Discrete Cosine Transform
  19 //
  20 // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
  21 // by anybody. The algorithm itself is of course in the public domain.
  22 //
  23 // Again derived heuristically from the 9-point WFTA.
  24 //
  25 // The algorithm is optimized (?) for speed, not for small rounding errors or
  26 // good readability.
  27 //
  28 // 36 additions, 11 multiplications
  29 //
  30 // Again this is very likely sub-optimal.
  31 //
  32 // The code is optimized to use a minimum number of temporary variables,
  33 // so it should compile quite well even on 8-register Intel x86 processors.
  34 // This makes the code quite obfuscated and very difficult to understand.
  35 //
  36 // References:
  37 // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
  38 //     Mathematics of Computation, Volume 32, Number 141, January 1978,
  39 //     Pages 175-199
  40 */
  41
  42 /*------------------------------------------------------------------*/
  43 /*                                                                  */
  44 /*    Function: Calculation of the inverse MDCT                     */
  45 /*                                                                  */
  46 /*------------------------------------------------------------------*/
  47
  48 static void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
  49 {
  50 #ifdef NEW_DCT9
  51   real tmp[18];
  52 #endif
  53
  54   {
  55     register real *in = inbuf;
  56
  57     in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
  58     in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
  59     in[11]+=in[10]; in[10]+=in[9];  in[9] +=in[8];
  60     in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
  61     in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];
  62     in[2] +=in[1];  in[1] +=in[0];
  63
  64     in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
  65     in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];
  66
  67
  68 #ifdef NEW_DCT9
  69     {
  70       real t0, t1, t2, t3, t4, t5, t6, t7;
  71
  72       t1 = COS6_2 * in[12];
  73       t2 = COS6_2 * (in[8] + in[16] - in[4]);
  74
  75       t3 = in[0] + t1;
  76       t4 = in[0] - t1 - t1;
  77       t5 = t4 - t2;
  78
  79       t0 = cos9[0] * (in[4] + in[8]);
  80       t1 = cos9[1] * (in[8] - in[16]);
  81
  82       tmp[4] = t4 + t2 + t2;
  83       t2 = cos9[2] * (in[4] + in[16]);
  84
  85       t6 = t3 - t0 - t2;
  86       t0 += t3 + t1;
  87       t3 += t2 - t1;
  88
  89       t2 = cos18[0] * (in[2]  + in[10]);
  90       t4 = cos18[1] * (in[10] - in[14]);
  91       t7 = COS6_1 * in[6];
  92
  93       t1 = t2 + t4 + t7;
  94       tmp[0] = t0 + t1;
  95       tmp[8] = t0 - t1;
  96       t1 = cos18[2] * (in[2] + in[14]);
  97       t2 += t1 - t7;
  98
  99       tmp[3] = t3 + t2;
 100       t0 = COS6_1 * (in[10] + in[14] - in[2]);
 101       tmp[5] = t3 - t2;
 102
 103       t4 -= t1 + t7;
 104
 105       tmp[1] = t5 - t0;
 106       tmp[7] = t5 + t0;
 107       tmp[2] = t6 + t4;
 108       tmp[6] = t6 - t4;
 109     }
 110
 111     {
 112       real t0, t1, t2, t3, t4, t5, t6, t7;
 113
 114       t1 = COS6_2 * in[13];
 115       t2 = COS6_2 * (in[9] + in[17] - in[5]);
 116
 117       t3 = in[1] + t1;
 118       t4 = in[1] - t1 - t1;
 119       t5 = t4 - t2;
 120
 121       t0 = cos9[0] * (in[5] + in[9]);
 122       t1 = cos9[1] * (in[9] - in[17]);
 123
 124       tmp[13] = (t4 + t2 + t2) * tfcos36[17-13];
 125       t2 = cos9[2] * (in[5] + in[17]);
 126
 127       t6 = t3 - t0 - t2;
 128       t0 += t3 + t1;
 129       t3 += t2 - t1;
 130
 131       t2 = cos18[0] * (in[3]  + in[11]);
 132       t4 = cos18[1] * (in[11] - in[15]);
 133       t7 = COS6_1 * in[7];
 134
 135       t1 = t2 + t4 + t7;
 136       tmp[17] = (t0 + t1) * tfcos36[17-17];
 137       tmp[9]  = (t0 - t1) * tfcos36[17-9];
 138       t1 = cos18[2] * (in[3] + in[15]);
 139       t2 += t1 - t7;
 140
 141       tmp[14] = (t3 + t2) * tfcos36[17-14];
 142       t0 = COS6_1 * (in[11] + in[15] - in[3]);
 143       tmp[12] = (t3 - t2) * tfcos36[17-12];
 144
 145       t4 -= t1 + t7;
 146
 147       tmp[16] = (t5 - t0) * tfcos36[17-16];
 148       tmp[10] = (t5 + t0) * tfcos36[17-10];
 149       tmp[15] = (t6 + t4) * tfcos36[17-15];
 150       tmp[11] = (t6 - t4) * tfcos36[17-11];
 151    }
 152
 153 #define MACRO(v) { \
 154     real tmpval; \
 155     real sum0 = tmp[(v)]; \
 156     real sum1 = tmp[17-(v)]; \
 157     out2[9+(v)] = (tmpval = sum0 + sum1) * w[27+(v)]; \
 158     out2[8-(v)] = tmpval * w[26-(v)]; \
 159     sum0 -= sum1; \
 160     ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
 161     ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)]; }
 162
 163 {
 164    register real *out2 = o2;
 165    register real *w = wintab;
 166    register real *out1 = o1;
 167    register real *ts = tsbuf;
 168
 169    MACRO(0);
 170    MACRO(1);
 171    MACRO(2);
 172    MACRO(3);
 173    MACRO(4);
 174    MACRO(5);
 175    MACRO(6);
 176    MACRO(7);
 177    MACRO(8);
 178 }
 179
 180 #else
 181
 182   {
 183
 184 #define MACRO0(v) { \
 185     real tmp; \
 186     out2[9+(v)] = (tmp = sum0 + sum1) * w[27+(v)]; \
 187     out2[8-(v)] = tmp * w[26-(v)];  } \
 188     sum0 -= sum1; \
 189     ts[SBLIMIT*(8-(v))] = out1[8-(v)] + sum0 * w[8-(v)]; \
 190     ts[SBLIMIT*(9+(v))] = out1[9+(v)] + sum0 * w[9+(v)];
 191 #define MACRO1(v) { \
 192         real sum0,sum1; \
 193     sum0 = tmp1a + tmp2a; \
 194         sum1 = (tmp1b + tmp2b) * tfcos36[(v)]; \
 195         MACRO0(v); }
 196 #define MACRO2(v) { \
 197     real sum0,sum1; \
 198     sum0 = tmp2a - tmp1a; \
 199     sum1 = (tmp2b - tmp1b) * tfcos36[(v)]; \
 200         MACRO0(v); }
 201
 202     register const real *c = COS9;
 203     register real *out2 = o2;
 204         register real *w = wintab;
 205         register real *out1 = o1;
 206         register real *ts = tsbuf;
 207
 208     real ta33,ta66,tb33,tb66;
 209
 210     ta33 = in[2*3+0] * c[3];
 211     ta66 = in[2*6+0] * c[6];
 212     tb33 = in[2*3+1] * c[3];
 213     tb66 = in[2*6+1] * c[6];
 214
 215     {
 216       real tmp1a,tmp2a,tmp1b,tmp2b;
 217       tmp1a =             in[2*1+0] * c[1] + ta33 + in[2*5+0] * c[5] + in[2*7+0] * c[7];
 218       tmp1b =             in[2*1+1] * c[1] + tb33 + in[2*5+1] * c[5] + in[2*7+1] * c[7];
 219       tmp2a = in[2*0+0] + in[2*2+0] * c[2] + in[2*4+0] * c[4] + ta66 + in[2*8+0] * c[8];
 220       tmp2b = in[2*0+1] + in[2*2+1] * c[2] + in[2*4+1] * c[4] + tb66 + in[2*8+1] * c[8];
 221
 222       MACRO1(0);
 223       MACRO2(8);
 224     }
 225
 226     {
 227       real tmp1a,tmp2a,tmp1b,tmp2b;
 228       tmp1a = ( in[2*1+0] - in[2*5+0] - in[2*7+0] ) * c[3];
 229       tmp1b = ( in[2*1+1] - in[2*5+1] - in[2*7+1] ) * c[3];
 230       tmp2a = ( in[2*2+0] - in[2*4+0] - in[2*8+0] ) * c[6] - in[2*6+0] + in[2*0+0];
 231       tmp2b = ( in[2*2+1] - in[2*4+1] - in[2*8+1] ) * c[6] - in[2*6+1] + in[2*0+1];
 232
 233       MACRO1(1);
 234       MACRO2(7);
 235     }
 236
 237     {
 238       real tmp1a,tmp2a,tmp1b,tmp2b;
 239       tmp1a =             in[2*1+0] * c[5] - ta33 - in[2*5+0] * c[7] + in[2*7+0] * c[1];
 240       tmp1b =             in[2*1+1] * c[5] - tb33 - in[2*5+1] * c[7] + in[2*7+1] * c[1];
 241       tmp2a = in[2*0+0] - in[2*2+0] * c[8] - in[2*4+0] * c[2] + ta66 + in[2*8+0] * c[4];
 242       tmp2b = in[2*0+1] - in[2*2+1] * c[8] - in[2*4+1] * c[2] + tb66 + in[2*8+1] * c[4];
 243
 244       MACRO1(2);
 245       MACRO2(6);
 246     }
 247
 248     {
 249       real tmp1a,tmp2a,tmp1b,tmp2b;
 250       tmp1a =             in[2*1+0] * c[7] - ta33 + in[2*5+0] * c[1] - in[2*7+0] * c[5];
 251       tmp1b =             in[2*1+1] * c[7] - tb33 + in[2*5+1] * c[1] - in[2*7+1] * c[5];
 252       tmp2a = in[2*0+0] - in[2*2+0] * c[4] + in[2*4+0] * c[8] + ta66 - in[2*8+0] * c[2];
 253       tmp2b = in[2*0+1] - in[2*2+1] * c[4] + in[2*4+1] * c[8] + tb66 - in[2*8+1] * c[2];
 254
 255       MACRO1(3);
 256       MACRO2(5);
 257     }
 258
 259         {
 260                 real sum0,sum1;
 261         sum0 =  in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
 262         sum1 = (in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ) * tfcos36[4];
 263                 MACRO0(4);
 264         }
 265   }
 266 #endif
 267
 268   }
 269 }
 270