doc/transforms.py

   1 # Lossy compression algorithms very often make use of DCT or DFT calculations,
   2 # or variations of these calculations. This file is intended to be a short
   3 # reference about classical DCT and DFT algorithms.
   4
   5
   6 import math
   7 import cmath
   8
   9 pi = math.pi
  10 sin = math.sin
  11 cos = math.cos
  12 sqrt = math.sqrt
  13
  14 def exp_j (alpha):
  15     return cmath.exp (alpha * 1j)
  16
  17 def conjugate (c):
  18     c = c + 0j
  19     return c.real - 1j * c.imag
  20
  21 def vector (N):
  22     return [0j] * N
  23
  24
  25 # Let us start withthe canonical definition of the unscaled DFT algorithm :
  26 # (I can not draw sigmas in a text file so I'll use python code instead)  :)
  27
  28 def W (k, N):
  29     return exp_j ((-2*pi*k)/N)
  30
  31 def unscaled_DFT (N, input, output):
  32     for o in range(N):          # o is output index
  33         output[o] = 0
  34         for i in range(N):
  35             output[o] = output[o] + input[i] * W (i*o, N)
  36
  37 # This algorithm takes complex input and output. There are N*N complex
  38 # multiplications and N*(N-1) complex additions.
  39
  40
  41 # Of course this algorithm is an extremely naive implementation and there are
  42 # some ways to use the trigonometric properties of the coefficients to find
  43 # some decompositions that can accelerate the calculation by several orders
  44 # of magnitude... This is a well known and studied problem. One of the
  45 # available explanations of this process is at this url :
  46 # www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fft.html
  47
  48
  49 # Let's start with the radix-2 decimation-in-time algorithm :
  50
  51 def unscaled_DFT_radix2_time (N, input, output):
  52     even_input = vector(N/2)
  53     odd_input = vector(N/2)
  54     even_output = vector(N/2)
  55     odd_output = vector(N/2)
  56
  57     for i in range(N/2):
  58         even_input[i] = input[2*i]
  59         odd_input[i] = input[2*i+1]
  60
  61     unscaled_DFT (N/2, even_input, even_output)
  62     unscaled_DFT (N/2, odd_input, odd_output)
  63
  64     for i in range(N/2):
  65         odd_output[i] = odd_output[i] * W (i, N)
  66
  67     for i in range(N/2):
  68         output[i] = even_output[i] + odd_output[i]
  69         output[i+N/2] = even_output[i] - odd_output[i]
  70
  71 # This algorithm takes complex input and output.
  72
  73 # We divide the DFT calculation into 2 DFT calculations of size N/2
  74 # We then do N/2 complex multiplies followed by N complex additions.
  75 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
  76 # multiplies... we will skip 1 for i=0 and 1 for i=N/4. Also for i=N/8 and for
  77 # i=3*N/8 the W(i,N) values can be special-cased to implement the complex
  78 # multiplication using only 2 real additions and 2 real multiplies)
  79
  80 # Also note that all the basic stages of this DFT algorithm are easily
  81 # reversible, so we can calculate the IDFT with the same complexity.
  82
  83
  84 # A varient of this is the radix-2 decimation-in-frequency algorithm :
  85
  86 def unscaled_DFT_radix2_freq (N, input, output):
  87     even_input = vector(N/2)
  88     odd_input = vector(N/2)
  89     even_output = vector(N/2)
  90     odd_output = vector(N/2)
  91
  92     for i in range(N/2):
  93         even_input[i] = input[i] + input[i+N/2]
  94         odd_input[i] = input[i] - input[i+N/2]
  95
  96     for i in range(N/2):
  97         odd_input[i] = odd_input[i] * W (i, N)
  98
  99     unscaled_DFT (N/2, even_input, even_output)
 100     unscaled_DFT (N/2, odd_input, odd_output)
 101
 102     for i in range(N/2):
 103         output[2*i] = even_output[i]
 104         output[2*i+1] = odd_output[i]
 105
 106 # Note that the decimation-in-time and the decimation-in-frequency varients
 107 # have exactly the same complexity, they only do the operations in a different
 108 # order.
 109
 110 # Actually, if you look at the decimation-in-time varient of the DFT, and
 111 # reverse it to calculate an IDFT, you get something that is extremely close
 112 # to the decimation-in-frequency DFT algorithm...
 113
 114
 115 # The radix-4 algorithms are slightly more efficient : they take into account
 116 # the fact that with complex numbers, multiplications by j and -j are also
 117 # "free"... i.e. when you code them using real numbers, they translate into
 118 # a few data moves but no real operation.
 119
 120 # Let's start with the radix-4 decimation-in-time algorithm :
 121
 122 def unscaled_DFT_radix4_time (N, input, output):
 123     input_0 = vector(N/4)
 124     input_1 = vector(N/4)
 125     input_2 = vector(N/4)
 126     input_3 = vector(N/4)
 127     output_0 = vector(N/4)
 128     output_1 = vector(N/4)
 129     output_2 = vector(N/4)
 130     output_3 = vector(N/4)
 131     tmp_0 = vector(N/4)
 132     tmp_1 = vector(N/4)
 133     tmp_2 = vector(N/4)
 134     tmp_3 = vector(N/4)
 135
 136     for i in range(N/4):
 137         input_0[i] = input[4*i]
 138         input_1[i] = input[4*i+1]
 139         input_2[i] = input[4*i+2]
 140         input_3[i] = input[4*i+3]
 141
 142     unscaled_DFT (N/4, input_0, output_0)
 143     unscaled_DFT (N/4, input_1, output_1)
 144     unscaled_DFT (N/4, input_2, output_2)
 145     unscaled_DFT (N/4, input_3, output_3)
 146
 147     for i in range(N/4):
 148         output_1[i] = output_1[i] * W (i, N)
 149         output_2[i] = output_2[i] * W (2*i, N)
 150         output_3[i] = output_3[i] * W (3*i, N)
 151
 152     for i in range(N/4):
 153         tmp_0[i] = output_0[i] + output_2[i]
 154         tmp_1[i] = output_0[i] - output_2[i]
 155         tmp_2[i] = output_1[i] + output_3[i]
 156         tmp_3[i] = output_1[i] - output_3[i]
 157
 158     for i in range(N/4):
 159         output[i] = tmp_0[i] + tmp_2[i]
 160         output[i+N/4] = tmp_1[i] - 1j * tmp_3[i]
 161         output[i+N/2] = tmp_0[i] - tmp_2[i]
 162         output[i+3*N/4] = tmp_1[i] + 1j * tmp_3[i]
 163
 164 # This algorithm takes complex input and output.
 165
 166 # We divide the DFT calculation into 4 DFT calculations of size N/4
 167 # We then do 3*N/4 complex multiplies followed by 2*N complex additions.
 168 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
 169 # multiplies... we will skip 3 for i=0 and 1 for i=N/8. Also for i=N/8
 170 # the remaining W(i,N) and W(3*i,N) multiplies can be implemented using only
 171 # 2 real additions and 2 real multiplies. For i=N/16 and i=3*N/16 we can also
 172 # optimise the W(2*i/N) multiply this way.
 173
 174 # If we wanted to do the same decomposition with one radix-2 decomposition
 175 # of size N and 2 radix-2 decompositions of size N/2, the total cost would be
 176 # N complex multiplies and 2*N complex additions. Thus we see that the
 177 # decomposition of one DFT calculation of size N into 4 calculations of size
 178 # N/4 using the radix-4 algorithm instead of the radix-2 algorithm saved N/4
 179 # complex multiplies...
 180
 181
 182 # The radix-4 decimation-in-frequency algorithm is similar :
 183
 184 def unscaled_DFT_radix4_freq (N, input, output):
 185     input_0 = vector(N/4)
 186     input_1 = vector(N/4)
 187     input_2 = vector(N/4)
 188     input_3 = vector(N/4)
 189     output_0 = vector(N/4)
 190     output_1 = vector(N/4)
 191     output_2 = vector(N/4)
 192     output_3 = vector(N/4)
 193     tmp_0 = vector(N/4)
 194     tmp_1 = vector(N/4)
 195     tmp_2 = vector(N/4)
 196     tmp_3 = vector(N/4)
 197
 198     for i in range(N/4):
 199         tmp_0[i] = input[i] + input[i+N/2]
 200         tmp_1[i] = input[i+N/4] + input[i+3*N/4]
 201         tmp_2[i] = input[i] - input[i+N/2]
 202         tmp_3[i] = input[i+N/4] - input[i+3*N/4]
 203
 204     for i in range(N/4):
 205         input_0[i] = tmp_0[i] + tmp_1[i]
 206         input_1[i] = tmp_2[i] - 1j * tmp_3[i]
 207         input_2[i] = tmp_0[i] - tmp_1[i]
 208         input_3[i] = tmp_2[i] + 1j * tmp_3[i]
 209
 210     for i in range(N/4):
 211         input_1[i] = input_1[i] * W (i, N)
 212         input_2[i] = input_2[i] * W (2*i, N)
 213         input_3[i] = input_3[i] * W (3*i, N)
 214
 215     unscaled_DFT (N/4, input_0, output_0)
 216     unscaled_DFT (N/4, input_1, output_1)
 217     unscaled_DFT (N/4, input_2, output_2)
 218     unscaled_DFT (N/4, input_3, output_3)
 219
 220     for i in range(N/4):
 221         output[4*i] = output_0[i]
 222         output[4*i+1] = output_1[i]
 223         output[4*i+2] = output_2[i]
 224         output[4*i+3] = output_3[i]
 225
 226 # Once again the complexity is exactly the same as for the radix-4
 227 # decimation-in-time DFT algorithm, only the order of the operations is
 228 # different.
 229
 230
 231 # Now let us reorder the radix-4 algorithms in a different way :
 232
 233 #def unscaled_DFT_radix4_time (N, input, output):
 234 #   input_0 = vector(N/4)
 235 #   input_1 = vector(N/4)
 236 #   input_2 = vector(N/4)
 237 #   input_3 = vector(N/4)
 238 #   output_0 = vector(N/4)
 239 #   output_1 = vector(N/4)
 240 #   output_2 = vector(N/4)
 241 #   output_3 = vector(N/4)
 242 #   tmp_0 = vector(N/4)
 243 #   tmp_1 = vector(N/4)
 244 #   tmp_2 = vector(N/4)
 245 #   tmp_3 = vector(N/4)
 246 #
 247 #   for i in range(N/4):
 248 #       input_0[i] = input[4*i]
 249 #       input_2[i] = input[4*i+2]
 250 #
 251 #   unscaled_DFT (N/4, input_0, output_0)
 252 #   unscaled_DFT (N/4, input_2, output_2)
 253 #
 254 #   for i in range(N/4):
 255 #       output_2[i] = output_2[i] * W (2*i, N)
 256 #
 257 #   for i in range(N/4):
 258 #       tmp_0[i] = output_0[i] + output_2[i]
 259 #       tmp_1[i] = output_0[i] - output_2[i]
 260 #
 261 #   for i in range(N/4):
 262 #       input_1[i] = input[4*i+1]
 263 #       input_3[i] = input[4*i+3]
 264 #
 265 #   unscaled_DFT (N/4, input_1, output_1)
 266 #   unscaled_DFT (N/4, input_3, output_3)
 267 #
 268 #   for i in range(N/4):
 269 #       output_1[i] = output_1[i] * W (i, N)
 270 #       output_3[i] = output_3[i] * W (3*i, N)
 271 #
 272 #   for i in range(N/4):
 273 #       tmp_2[i] = output_1[i] + output_3[i]
 274 #       tmp_3[i] = output_1[i] - output_3[i]
 275 #
 276 #   for i in range(N/4):
 277 #       output[i] = tmp_0[i] + tmp_2[i]
 278 #       output[i+N/4] = tmp_1[i] - 1j * tmp_3[i]
 279 #       output[i+N/2] = tmp_0[i] - tmp_2[i]
 280 #       output[i+3*N/4] = tmp_1[i] + 1j * tmp_3[i]
 281
 282 # We didnt do anything here, only reorder the operations. But now, look at the
 283 # first part of this function, up to the calculations of tmp0 and tmp1 : this
 284 # is extremely similar to the radix-2 decimation-in-time algorithm ! or more
 285 # precisely, it IS the radix-2 decimation-in-time algorithm, with size N/2,
 286 # applied on a vector representing the even input coefficients, and giving
 287 # an output vector that is the concatenation of tmp0 and tmp1.
 288 # This is important to notice, because this means we can now choose to
 289 # calculate tmp0 and tmp1 using any DFT algorithm that we want, and we know
 290 # that some of them are more efficient than radix-2...
 291
 292 # This leads us directly to the split-radix decimation-in-time algorithm :
 293
 294 def unscaled_DFT_split_radix_time (N, input, output):
 295     even_input = vector(N/2)
 296     input_1 = vector(N/4)
 297     input_3 = vector(N/4)
 298     even_output = vector(N/2)
 299     output_1 = vector(N/4)
 300     output_3 = vector(N/4)
 301     tmp_0 = vector(N/4)
 302     tmp_1 = vector(N/4)
 303
 304     for i in range(N/2):
 305         even_input[i] = input[2*i]
 306
 307     for i in range(N/4):
 308         input_1[i] = input[4*i+1]
 309         input_3[i] = input[4*i+3]
 310
 311     unscaled_DFT (N/2, even_input, even_output)
 312     unscaled_DFT (N/4, input_1, output_1)
 313     unscaled_DFT (N/4, input_3, output_3)
 314
 315     for i in range(N/4):
 316         output_1[i] = output_1[i] * W (i, N)
 317         output_3[i] = output_3[i] * W (3*i, N)
 318
 319     for i in range(N/4):
 320         tmp_0[i] = output_1[i] + output_3[i]
 321         tmp_1[i] = output_1[i] - output_3[i]
 322
 323     for i in range(N/4):
 324         output[i] = even_output[i] + tmp_0[i]
 325         output[i+N/4] = even_output[i+N/4] - 1j * tmp_1[i]
 326         output[i+N/2] = even_output[i] - tmp_0[i]
 327         output[i+3*N/4] = even_output[i+N/4] + 1j * tmp_1[i]
 328
 329 # This function performs one DFT of size N/2 and two of size N/4, followed by
 330 # N/2 complex multiplies and 3*N/2 complex additions.
 331 # (actually W(0,N) = 1 and W(N/4,N) = -j so we can skip a few of these complex
 332 # multiplies... we will skip 2 for i=0. Also for i=N/8 the W(i,N) and W(3*i,N)
 333 # multiplies can be implemented using only 2 real additions and 2 real
 334 # multiplies)
 335
 336
 337 # We can similarly define the split-radix decimation-in-frequency DFT :
 338
 339 def unscaled_DFT_split_radix_freq (N, input, output):
 340     even_input = vector(N/2)
 341     input_1 = vector(N/4)
 342     input_3 = vector(N/4)
 343     even_output = vector(N/2)
 344     output_1 = vector(N/4)
 345     output_3 = vector(N/4)
 346     tmp_0 = vector(N/4)
 347     tmp_1 = vector(N/4)
 348
 349     for i in range(N/2):
 350         even_input[i] = input[i] + input[i+N/2]
 351
 352     for i in range(N/4):
 353         tmp_0[i] = input[i] - input[i+N/2]
 354         tmp_1[i] = input[i+N/4] - input[i+3*N/4]
 355
 356     for i in range(N/4):
 357         input_1[i] = tmp_0[i] - 1j * tmp_1[i]
 358         input_3[i] = tmp_0[i] + 1j * tmp_1[i]
 359
 360     for i in range(N/4):
 361         input_1[i] = input_1[i] * W (i, N)
 362         input_3[i] = input_3[i] * W (3*i, N)
 363
 364     unscaled_DFT (N/2, even_input, even_output)
 365     unscaled_DFT (N/4, input_1, output_1)
 366     unscaled_DFT (N/4, input_3, output_3)
 367
 368     for i in range(N/2):
 369         output[2*i] = even_output[i]
 370
 371     for i in range(N/4):
 372         output[4*i+1] = output_1[i]
 373         output[4*i+3] = output_3[i]
 374
 375 # The complexity is again the same as for the decimation-in-time varient.
 376
 377
 378 # Now let us now summarize our various algorithms for DFT decomposition :
 379
 380 # radix-2 : DFT(N) -> 2*DFT(N/2) using N/2 multiplies and N additions
 381 # radix-4 : DFT(N) -> 4*DFT(N/2) using 3*N/4 multiplies and 2*N additions
 382 # split-radix : DFT(N) -> DFT(N/2) + 2*DFT(N/4) using N/2 muls and 3*N/2 adds
 383
 384 # (we are always speaking of complex multiplies and complex additions... a
 385 # complex addition is implemented with 2 real additions, and a complex
 386 # multiply is implemented with either 2 adds and 4 muls or 3 adds and 3 muls,
 387 # so we will keep a separate count of these)
 388
 389 # If we want to take into account the special values of W(i,N), we can remove
 390 # a few complex multiplies. Supposing N>=16 we can remove :
 391 # radix-2 : remove 2 complex multiplies, simplify 2 others
 392 # radix-4 : remove 4 complex multiplies, simplify 4 others
 393 # split-radix : remove 2 complex multiplies, simplify 2 others
 394
 395 # This gives the following table for the complexity of a complex DFT :
 396 #       N       real additions  real multiplies complex multiplies
 397 #       1               0               0               0
 398 #       2               4               0               0
 399 #       4              16               0               0
 400 #       8              52               4               0
 401 #      16             136               8               4
 402 #      32             340              20              16
 403 #      64             808              40              52
 404 #     128            1876              84             144
 405 #     256            4264             168             372
 406 #     512            9556             340             912
 407 #    1024           21160             680            2164
 408 #    2048           46420            1364            5008
 409 #    4096          101032            2728           11380
 410 #    8192          218452            5460           25488
 411 #   16384          469672           10920           56436
 412 #   32768         1004884           21844          123792
 413 #   65536         2140840           43688          269428
 414
 415 # If we chose to implement complex multiplies with 3 real muls + 3 real adds,
 416 # then these results are consistent with the table at the end of the
 417 # www.cmlab.csie.ntu.edu.tw DFT tutorial that I mentionned earlier.
 418
 419
 420 # Now another important case for the DFT is the one where the inputs are
 421 # real numbers instead of complex ones. We have to find ways to optimize for
 422 # this important case.
 423
 424 # If the DFT inputs are real-valued, then the DFT outputs have nice properties
 425 # too : output[0] and output[N/2] will be real numbers, and output[N-i] will
 426 # be the conjugate of output[i] for i in 0...N/2-1
 427
 428 # Likewise, if the DFT inputs are purely imaginary numbers, then the DFT
 429 # outputs will have special properties too : output[0] and output[N/2] will be
 430 # purely imaginary, and output[N-i] will be the opposite of the conjugate of
 431 # output[i] for i in 0...N/2-1
 432
 433 # We can use these properties to calculate two real-valued DFT at once :
 434
 435 def two_real_unscaled_DFT (N, input1, input2, output1, output2):
 436     input = vector(N)
 437     output = vector(N)
 438
 439     for i in range(N):
 440         input[i] = input1[i] + 1j * input2[i]
 441
 442     unscaled_DFT (N, input, output)
 443
 444     output1[0] = output[0].real + 0j
 445     output2[0] = output[0].imag + 0j
 446
 447     for i in range(N/2)[1:]:
 448         output1[i] = 0.5 * (output[i] + conjugate(output[N-i]))
 449         output2[i] = -0.5j * (output[i] - conjugate(output[N-i]))
 450
 451         output1[N-i] = conjugate(output1[i])
 452         output2[N-i] = conjugate(output2[i])
 453
 454     output1[N/2] = output[N/2].real + 0j
 455     output2[N/2] = output[N/2].imag + 0j
 456
 457 # This routine does a total of N-2 complex additions and N-2 complex
 458 # multiplies by 0.5
 459
 460 # This routine can also be inverted to calculate the IDFT of two vectors at
 461 # once if we know that the outputs will be real-valued.
 462
 463
 464 # If we have only one real-valued DFT calculation to do, we can still cut this
 465 # calculation in several parts using one of the decimate-in-time methods
 466 # (so that the different parts are still real-valued)
 467
 468 # As with complex DFT calculations, the best method is to use a split radix.
 469 # There are a lot of symetries in the DFT outputs that we can exploit to
 470 # reduce the number of operations...
 471
 472 def real_unscaled_DFT_split_radix_time_1 (N, input, output):
 473     even_input = vector(N/2)
 474     even_output = vector(N/2)
 475     input_1 = vector(N/4)
 476     output_1 = vector(N/4)
 477     input_3 = vector(N/4)
 478     output_3 = vector(N/4)
 479     tmp_0 = vector(N/4)
 480     tmp_1 = vector(N/4)
 481
 482     for i in range(N/2):
 483         even_input[i] = input[2*i]
 484
 485     for i in range(N/4):
 486         input_1[i] = input[4*i+1]
 487         input_3[i] = input[4*i+3]
 488
 489     unscaled_DFT (N/2, even_input, even_output)
 490     # this is again a real DFT !
 491     # we will only use even_output[i] for i in 0 ... N/4 included. we know that
 492     # even_output[N/2-i] is the conjugate of even_output[i]... also we know
 493     # that even_output[0] and even_output[N/4] are purely real.
 494
 495     unscaled_DFT (N/4, input_1, output_1)
 496     unscaled_DFT (N/4, input_3, output_3)
 497     # these are real DFTs too... with symetries in the outputs... once again
 498
 499     tmp_0[0] = output_1[0] + output_3[0]        # real numbers
 500     tmp_1[0] = output_1[0] - output_3[0]        # real numbers
 501
 502     tmp__0 = (output_1[N/8] + output_3[N/8]) * sqrt(0.5)        # real numbers
 503     tmp__1 = (output_1[N/8] - output_3[N/8]) * sqrt(0.5)        # real numbers
 504     tmp_0[N/8] = tmp__1 - 1j * tmp__0           # real + 1j * real
 505     tmp_1[N/8] = tmp__0 - 1j * tmp__1           # real + 1j * real
 506
 507     for i in range(N/8)[1:]:
 508         output_1[i] = output_1[i] * W (i, N)
 509         output_3[i] = output_3[i] * W (3*i, N)
 510
 511         tmp_0[i] = output_1[i] + output_3[i]
 512         tmp_1[i] = output_1[i] - output_3[i]
 513
 514         tmp_0[N/4-i] = -1j * conjugate(tmp_1[i])
 515         tmp_1[N/4-i] = -1j * conjugate(tmp_0[i])
 516
 517     output[0] = even_output[0] + tmp_0[0]               # real numbers
 518     output[N/4] = even_output[N/4] - 1j * tmp_1[0]      # real + 1j * real
 519     output[N/2] = even_output[0] - tmp_0[0]             # real numbers
 520     output[3*N/4] = even_output[N/4] + 1j * tmp_1[0]    # real + 1j * real
 521
 522     for i in range(N/4)[1:]:
 523         output[i] = even_output[i] + tmp_0[i]
 524         output[i+N/4] = conjugate(even_output[N/4-i]) - 1j * tmp_1[i]
 525
 526         output[N-i] = conjugate(output[i])
 527         output[3*N/4-i] = conjugate(output[i+N/4])
 528
 529 # This function performs one real DFT of size N/2 and two real DFT of size
 530 # N/4, followed by 6 real additions, 2 real multiplies, 3*N/4-4 complex
 531 # additions and N/4-2 complex multiplies.
 532
 533
 534 # We can also try to combine the two real DFT of size N/4 into a single complex
 535 # DFT :
 536
 537 def real_unscaled_DFT_split_radix_time_2 (N, input, output):
 538     even_input = vector(N/2)
 539     even_output = vector(N/2)
 540     odd_input = vector(N/4)
 541     odd_output = vector(N/4)
 542     tmp_0 = vector(N/4)
 543     tmp_1 = vector(N/4)
 544
 545     for i in range(N/2):
 546         even_input[i] = input[2*i]
 547
 548     for i in range(N/4):
 549         odd_input[i] = input[4*i+1] + 1j * input[4*i+3]
 550
 551     unscaled_DFT (N/2, even_input, even_output)
 552     # this is again a real DFT !
 553     # we will only use even_output[i] for i in 0 ... N/4 included. we know that
 554     # even_output[N/2-i] is the conjugate of even_output[i]... also we know
 555     # that even_output[0] and even_output[N/4] are purely real.
 556
 557     unscaled_DFT (N/4, odd_input, odd_output)
 558     # but this one is a complex DFT so no special properties here
 559
 560     output_1 = odd_output[0].real
 561     output_3 = odd_output[0].imag
 562     tmp_0[0] = output_1 + output_3      # real numbers
 563     tmp_1[0] = output_1 - output_3      # real numbers
 564
 565     output_1 = odd_output[N/8].real
 566     output_3 = odd_output[N/8].imag
 567     tmp__0 = (output_1 + output_3) * sqrt(0.5)  # real numbers
 568     tmp__1 = (output_1 - output_3) * sqrt(0.5)  # real numbers
 569     tmp_0[N/8] = tmp__1 - 1j * tmp__0           # real + 1j * real
 570     tmp_1[N/8] = tmp__0 - 1j * tmp__1           # real + 1j * real
 571
 572     for i in range(N/8)[1:]:
 573         output_1 = odd_output[i] + conjugate(odd_output[N/4-i])
 574         output_3 = odd_output[i] - conjugate(odd_output[N/4-i])
 575
 576         output_1 = output_1 * 0.5 * W (i, N)
 577         output_3 = output_3 * -0.5j * W (3*i, N)
 578
 579         tmp_0[i] = output_1 + output_3
 580         tmp_1[i] = output_1 - output_3
 581
 582         tmp_0[N/4-i] = -1j * conjugate(tmp_1[i])
 583         tmp_1[N/4-i] = -1j * conjugate(tmp_0[i])
 584
 585     output[0] = even_output[0] + tmp_0[0]               # real numbers
 586     output[N/4] = even_output[N/4] - 1j * tmp_1[0]      # real + 1j * real
 587     output[N/2] = even_output[0] - tmp_0[0]             # real numbers
 588     output[3*N/4] = even_output[N/4] + 1j * tmp_1[0]    # real + 1j * real
 589
 590     for i in range(N/4)[1:]:
 591         output[i] = even_output[i] + tmp_0[i]
 592         output[i+N/4] = conjugate(even_output[N/4-i]) - 1j * tmp_1[i]
 593
 594         output[N-i] = conjugate(output[i])
 595         output[3*N/4-i] = conjugate(output[i+N/4])
 596
 597 # This function performs one real DFT of size N/2 and one complex DFT of size
 598 # N/4, followed by 6 real additions, 2 real multiplies, N-6 complex additions
 599 # and N/4-2 complex multiplies.
 600
 601 # After comparing the performance, it turns out that for real-valued DFT, the
 602 # version of the algorithm that subdivides the calculation into one real
 603 # DFT of size N/2 and two real DFT of size N/4 is the most efficient one.
 604 # The other version gives exactly the same number of multiplies and a few more
 605 # real additions.
 606
 607
 608 # Now we can also try the decimate-in-frequency method for a real-valued DFT.
 609 # Using the split-radix algorithm, and by taking into account the symetries of
 610 # the outputs :
 611
 612 def real_unscaled_DFT_split_radix_freq (N, input, output):
 613     even_input = vector(N/2)
 614     input_1 = vector(N/4)
 615     even_output = vector(N/2)
 616     output_1 = vector(N/4)
 617     tmp_0 = vector(N/4)
 618     tmp_1 = vector(N/4)
 619
 620     for i in range(N/2):
 621         even_input[i] = input[i] + input[i+N/2]
 622
 623     for i in range(N/4):
 624         tmp_0[i] = input[i] - input[i+N/2]
 625         tmp_1[i] = input[i+N/4] - input[i+3*N/4]
 626
 627     for i in range(N/4):
 628         input_1[i] = tmp_0[i] - 1j * tmp_1[i]
 629
 630     for i in range(N/4):
 631         input_1[i] = input_1[i] * W (i, N)
 632
 633     unscaled_DFT (N/2, even_input, even_output)
 634     # This is still a real-valued DFT
 635
 636     unscaled_DFT (N/4, input_1, output_1)
 637     # But that one is a complex-valued DFT
 638
 639     for i in range(N/2):
 640         output[2*i] = even_output[i]
 641
 642     for i in range(N/4):
 643         output[4*i+1] = output_1[i]
 644         output[N-1-4*i] = conjugate(output_1[i])
 645
 646 # I think this implementation is much more elegant than the decimate-in-time
 647 # version ! It looks very much like the complex-valued version, all we had to
 648 # do was remove one of the complex-valued internal DFT calls because we could
 649 # deduce the outputs by using the symetries of the problem.
 650
 651 # As for performance, we did N real additions, N/4 complex multiplies (a bit
 652 # less actually, because W(0,N) = 1 and W(N/8,N) is a "simple" multiply), then
 653 # one real DFT of size N/2 and one complex DFT of size N/4.
 654
 655 # It turns out that even if the methods are so different, the number of
 656 # operations is exactly the same as for the best of the two decimation-in-time
 657 # methods that we tried.
 658
 659
 660 # This gives us the following performance for real-valued DFT :
 661 #       N       real additions  real multiplies complex multiplies
 662 #       2               2               0               0
 663 #       4               6               0               0
 664 #       8              20               2               0
 665 #      16              54               4               2
 666 #      32             140              10               8
 667 #      64             342              20              26
 668 #     128             812              42              72
 669 #     256            1878              84             186
 670 #     512            4268             170             456
 671 #    1024            9558             340            1082
 672 #    2048           21164             682            2504
 673 #    4096           46422            1364            5690
 674 #    8192          101036            2730           12744
 675 #   16384          218454            5460           28218
 676 #   32768          469676           10922           61896
 677 #   65536         1004886           21844          134714
 678
 679
 680 # As an example, this is an implementation of the real-valued DFT8 :
 681
 682 def DFT8 (input, output):
 683     even_0 = input[0] + input[4]
 684     even_1 = input[1] + input[5]
 685     even_2 = input[2] + input[6]
 686     even_3 = input[3] + input[7]
 687
 688     tmp_0 = even_0 + even_2
 689     tmp_1 = even_0 - even_2
 690     tmp_2 = even_1 + even_3
 691     tmp_3 = even_1 - even_3
 692
 693     output[0] = tmp_0 + tmp_2
 694     output[2] = tmp_1 - 1j * tmp_3
 695     output[4] = tmp_0 - tmp_2
 696
 697     odd_0_r = input[0] - input[4]
 698     odd_0_i = input[2] - input[6]
 699
 700     tmp_0 = input[1] - input[5]
 701     tmp_1 = input[3] - input[7]
 702     odd_1_r = (tmp_0 - tmp_1) * sqrt(0.5)
 703     odd_1_i = (tmp_0 + tmp_1) * sqrt(0.5)
 704
 705     output[1] = (odd_0_r + odd_1_r) - 1j * (odd_0_i + odd_1_i)
 706     output[5] = (odd_0_r - odd_1_r) - 1j * (odd_0_i - odd_1_i)
 707
 708     output[3] = conjugate(output[5])
 709     output[6] = conjugate(output[2])
 710     output[7] = conjugate(output[1])
 711
 712
 713 # Also a basic implementation of the real-valued DFT4 :
 714
 715 def DFT4 (input, output):
 716     tmp_0 = input[0] + input[2]
 717     tmp_1 = input[0] - input[2]
 718     tmp_2 = input[1] + input[3]
 719     tmp_3 = input[1] - input[3]
 720
 721     output[0] = tmp_0 + tmp_2
 722     output[1] = tmp_1 - 1j * tmp_3
 723     output[2] = tmp_0 - tmp_2
 724     output[3] = tmp_1 + 1j * tmp_3
 725
 726
 727 # A similar idea might be used to calculate only the real part of the output
 728 # of a complex DFT : we take an DFT algorithm for real inputs and complex
 729 # outputs and we simply reverse it. The resulting algorithm will only work
 730 # with inputs that satisfy the conjugaison rule (input[i] is the conjugate of
 731 # input[N-i]) so we can do a first pass to modify the input so that it follows
 732 # this rule. An example implementation is as follows (adapted from the
 733 # unscaled_DFT_split_radix_time algorithm) :
 734
 735 def complex2real_unscaled_DFT_split_radix_time (N, input, output):
 736     even_input = vector(N/2)
 737     input_1 = vector(N/4)
 738     even_output = vector(N/2)
 739     output_1 = vector(N/4)
 740
 741     for i in range(N/2):
 742         even_input[i] = input[2*i]
 743
 744     for i in range(N/4):
 745         input_1[i] = input[4*i+1] + conjugate(input[N-1-4*i])
 746
 747     unscaled_DFT (N/2, even_input, even_output)
 748     unscaled_DFT (N/4, input_1, output_1)
 749
 750     for i in range(N/4):
 751         output_1[i] = output_1[i] * W (i, N)
 752
 753     for i in range(N/4):
 754         output[i] = even_output[i] + output_1[i].real
 755         output[i+N/4] = even_output[i+N/4] + output_1[i].imag
 756         output[i+N/2] = even_output[i] - output_1[i].real
 757         output[i+3*N/4] = even_output[i+N/4] - output_1[i].imag
 758
 759 # This algorithm does N/4 complex additions, N/4-1 complex multiplies
 760 # (including one "simple" multiply for i=N/8), N real additions, one
 761 # "complex-to-real" DFT of size N/2, and one complex DFT of size N/4.
 762 # Also, in the complex DFT of size N/4, we do not care about the imaginary
 763 # part of output_1[0], which in practice allows us to save one real addition.
 764
 765 # This gives us the following performance for complex DFT with real outputs :
 766 #       N       real additions  real multiplies complex multiplies
 767 #       1               0               0               0
 768 #       2               2               0               0
 769 #       4               8               0               0
 770 #       8              25               2               0
 771 #      16              66               4               2
 772 #      32             167              10               8
 773 #      64             400              20              26
 774 #     128             933              42              72
 775 #     256            2126              84             186
 776 #     512            4771             170             456
 777 #    1024           10572             340            1082
 778 #    2048           23201             682            2504
 779 #    4096           50506            1364            5690
 780 #    8192          109215            2730           12744
 781 #   16384          234824            5460           28218
 782 #   32768          502429           10922           61896
 783 #   65536         1070406           21844          134714
 784
 785
 786 # Now let's talk about the DCT algorithm. The canonical definition for it is
 787 # as follows :
 788
 789 def C (k, N):
 790     return cos ((k*pi)/(2*N))
 791
 792 def unscaled_DCT (N, input, output):
 793     for o in range(N):          # o is output index
 794         output[o] = 0
 795         for i in range(N):      # i is input index
 796             output[o] = output[o] + input[i] * C ((2*i+1)*o, N)
 797
 798 # This trivial algorithm uses N*N multiplications and N*(N-1) additions.
 799
 800
 801 # One possible decomposition on this calculus is to use the fact that C (i, N)
 802 # and C (2*N-i, N) are opposed. This can lead to this decomposition :
 803
 804 #def unscaled_DCT (N, input, output):
 805 #   even_input = vector (N)
 806 #   odd_input = vector (N)
 807 #   even_output = vector (N)
 808 #   odd_output = vector (N)
 809 #
 810 #   for i in range(N/2):
 811 #       even_input[i] = input[i] + input[N-1-i]
 812 #       odd_input[i] = input[i] - input[N-1-i]
 813 #
 814 #   unscaled_DCT (N, even_input, even_output)
 815 #   unscaled_DCT (N, odd_input, odd_output)
 816 #
 817 #   for i in range(N/2):
 818 #       output[2*i] = even_output[2*i]
 819 #       output[2*i+1] = odd_output[2*i+1]
 820
 821 # Now the even part can easily be calculated : by looking at the C(k,N)
 822 # formula, we see that the even part is actually an unscaled DCT of size N/2.
 823 # The odd part looks like a DCT of size N/2, but the coefficients are
 824 # actually C ((2*i+1)*(2*o+1), 2*N) instead of C ((2*i+1)*o, N).
 825
 826 # We use a trigonometric relation here :
 827 # 2 * C ((a+b)/2, N) * C ((a-b)/2, N) = C (a, N) + C (b, N)
 828 # Thus with a = (2*i+1)*o and b = (2*i+1)*(o+1) :
 829 # 2 * C((2*i+1)*(2*o+1),2N) * C(2*i+1,2N) = C((2*i+1)*o,N) + C((2*i+1)*(o+1),N)
 830
 831 # This leads us to the Lee DCT algorithm :
 832
 833 def unscaled_DCT_Lee (N, input, output):
 834     even_input = vector(N/2)
 835     odd_input = vector(N/2)
 836     even_output = vector(N/2)
 837     odd_output = vector(N/2)
 838
 839     for i in range(N/2):
 840         even_input[i] = input[i] + input[N-1-i]
 841         odd_input[i] = input[i] - input[N-1-i]
 842
 843     for i in range(N/2):
 844         odd_input[i] = odd_input[i] * (0.5 / C (2*i+1, N))
 845
 846     unscaled_DCT (N/2, even_input, even_output)
 847     unscaled_DCT (N/2, odd_input, odd_output)
 848
 849     for i in range(N/2-1):
 850         odd_output[i] = odd_output[i] + odd_output[i+1]
 851
 852     for i in range(N/2):
 853         output[2*i] = even_output[i]
 854         output[2*i+1] = odd_output[i];
 855
 856 # Notes about this algorithm :
 857
 858 # The algorithm can be easily inverted to calculate the IDCT instead :
 859 # each of the basic stages are separately inversible...
 860
 861 # This function does N adds, then N/2 muls, then 2 recursive calls with
 862 # size N/2, then N/2-1 adds again. If we apply it recursively, the total
 863 # number of operations will be N*log2(N)/2 multiplies and N*(3*log2(N)/2-1) + 1
 864 # additions. So this is much faster than the canonical algorithm.
 865
 866 # Some of the multiplication coefficients 0.5/cos(...) can get quite large.
 867 # This means that a small error in the input will give a large error on the
 868 # output... For a DCT of size N the biggest coefficient will be at i=N/2-1
 869 # and it will be slighly more than N/pi which can be large for large N's.
 870
 871 # In the IDCT however, the multiplication coefficients for the reverse
 872 # transformation are of the form 2*cos(...) so they can not get big and there
 873 # is no accuracy problem.
 874
 875 # You can find another description of this algorithm at
 876 # http://www.intel.com/drg/mmx/appnotes/ap533.htm
 877
 878
 879
 880 # Another idea is to observe that the DCT calculation can be made to look like
 881 # the DFT calculation : C (k, N) is the real part of W (k, 4*N) or W (-k, 4*N).
 882 # We can use this idea translate the DCT algorithm into a call to the DFT
 883 # algorithm :
 884
 885 def unscaled_DCT_DFT (N, input, output):
 886     DFT_input = vector (4*N)
 887     DFT_output = vector (4*N)
 888
 889     for i in range(N):
 890         DFT_input[2*i+1] = input[i]
 891         #DFT_input[4*N-2*i-1] = input[i]        # We could use this instead
 892
 893     unscaled_DFT (4*N, DFT_input, DFT_output)
 894
 895     for i in range(N):
 896         output[i] = DFT_output[i].real
 897
 898
 899 # We can then use our knowledge of the DFT calculation to optimize for this
 900 # particular case. For example using the radix-2 decimation-in-time method :
 901
 902 #def unscaled_DCT_DFT (N, input, output):
 903 #   DFT_input = vector (2*N)
 904 #   DFT_output = vector (2*N)
 905 #
 906 #   for i in range(N):
 907 #       DFT_input[i] = input[i]
 908 #       #DFT_input[2*N-1-i] = input[i]  # We could use this instead
 909 #
 910 #   unscaled_DFT (2*N, DFT_input, DFT_output)
 911 #
 912 #   for i in range(N):
 913 #       DFT_output[i] = DFT_output[i] * W (i, 4*N)
 914 #
 915 #   for i in range(N):
 916 #       output[i] = DFT_output[i].real
 917
 918 # This leads us to the AAN implementation of the DCT algorithm : if we set
 919 # both DFT_input[i] and DFT_input[2*N-1-i] to input[i], then the imaginary
 920 # parts of W(2*i+1) and W(-2*i-1) will compensate, and output_DFT[i] will
 921 # already be a real after the multiplication by W(i,4*N). Which means that
 922 # before the multiplication, it is the product of a real number and W(-i,4*N).
 923 # This leads to the following code, called the AAN algorithm :
 924
 925 def unscaled_DCT_AAN (N, input, output):
 926     DFT_input = vector (2*N)
 927     DFT_output = vector (2*N)
 928
 929     for i in range(N):
 930         DFT_input[i] = input[i]
 931         DFT_input[2*N-1-i] = input[i]
 932
 933     symetrical_unscaled_DFT (2*N, DFT_input, DFT_output)
 934
 935     for i in range(N):
 936         output[i] = DFT_output[i].real * (0.5 / C (i, N))
 937
 938 # Notes about the AAN algorithm :
 939
 940 # The cost of this function is N real multiplies and a DFT of size 2*N. The
 941 # DFT to calculate has special properties : the inputs are real and symmetric.
 942 # Also, we only need to calculate the real parts of the N first DFT outputs.
 943 # We can try to take advantage of all that.
 944
 945 # We can invert this algorithm to calculate the IDCT. The final multiply
 946 # stage is trivially invertible. The DFT stage is invertible too, but we have
 947 # to take into account the special properties of this particular DFT for that.
 948
 949 # Once again we have to take care of numerical precision for the DFT : the
 950 # output coefficients can get large, so that a small error in the input will
 951 # give a large error on the output... For a DCT of size N the biggest
 952 # coefficient will be at i=N/2-1 and it will be slightly more than N/pi
 953
 954 # You can find another description of this algorithm at this url :
 955 # www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fastdct.html
 956 # (It is the same server where we already found a description of the fast DFT)
 957
 958
 959 # To optimize the DFT calculation, we can take a lot of specific things into
 960 # account : the input is real and symetric, and we only care about the real
 961 # part of the output. Also, we only care about the N first output coefficients,
 962 # but that one does not save operations actually, because the other
 963 # coefficients are the conjugates of the ones we look anyway.
 964
 965 # One useful way to use the symetry of the input is to use the radix-2
 966 # decimation-in-frequency algorithm. We can write a version of
 967 # unscaled_DFT_radix2_freq for the case where the input is symetrical :
 968 # we have removed a few additions in the first stages because even_input
 969 # is symetrical and odd_input is antisymetrical. Also, we have modified the
 970 # odd_input vector so that the second half of it is set to zero and the real
 971 # part of the DFT output is not modified. After that modification, the second
 972 # part of the odd_input was null so we used the radix-2 decimation-in-frequency
 973 # again on the odd DFT. Also odd_output is symetrical because input is real...
 974
 975 def symetrical_unscaled_DFT (N, input, output):
 976     even_input = vector(N/2)
 977     odd_tmp = vector(N/2)
 978     odd_input = vector(N/2)
 979     even_output = vector(N/2)
 980     odd_output = vector(N/2)
 981
 982     for i in range(N/4):
 983         even_input[N/2-i-1] = even_input[i] = input[i] + input[N/2-1-i]
 984
 985     for i in range(N/4):
 986         odd_tmp[i] = input[i] - input[N/2-1-i]
 987
 988     odd_input[0] = odd_tmp[0]
 989     for i in range(N/4)[1:]:
 990         odd_input[i] = (odd_tmp[i] + odd_tmp[i-1]) * W (i, N)
 991
 992     unscaled_DFT (N/2, even_input, even_output)
 993     # symetrical real inputs, real outputs
 994
 995     unscaled_DFT (N/4, odd_input, odd_output)
 996     # complex inputs, real outputs
 997
 998     for i in range(N/2):
 999         output[2*i] = even_output[i]
1000
1001     for i in range(N/4):
1002         output[N-1-4*i] = output[4*i+1] = odd_output[i]
1003
1004 # This procedure takes 3*N/4-1 real additions and N/2-3 real multiplies,
1005 # followed by another symetrical real DFT of size N/2 and a "complex to real"
1006 # DFT of size N/4.
1007
1008 # We thus get the following performance results :
1009 #       N       real additions  real multiplies complex multiplies
1010 #       1               0               0               0
1011 #       2               0               0               0
1012 #       4               2               0               0
1013 #       8               9               1               0
1014 #      16              28               6               0
1015 #      32              76              21               0
1016 #      64             189              54               2
1017 #     128             451             125              10
1018 #     256            1042             270              36
1019 #     512            2358             565             108
1020 #    1024            5251            1158             294
1021 #    2048           11557            2349             750
1022 #    4096           25200            4734            1832
1023 #    8192           54544            9509            4336
1024 #   16384          117337           19062           10026
1025 #   32768          251127           38173           22770
1026 #   65536          535102           76398           50988
1027
1028
1029 # We thus get a better performance with the AAN DCT algorithm than with the
1030 # Lee DCT algorithm : we can do a DCT of size 32 with 189 additions, 54+32 real
1031 # multiplies, and 2 complex multiplies. The Lee algorithm would have used 209
1032 # additions and 80 multiplies. With the AAN algorithm, we also have the
1033 # advantage that a big number of the multiplies are actually grouped at the
1034 # output stage of the algorithm, so if we want to do a DCT followed by a
1035 # quantization stage, we will be able to group the multiply of the output with
1036 # the multiply of the quantization stage, thus saving 32 more operations. In
1037 # the mpeg audio layer 1 or 2 processing, we can also group the multiply of the
1038 # output with the multiply of the convolution stage...
1039
1040 # Another source code for the AAN algorithm (implemented on 8 points, and
1041 # without all of the explanations) can be found at this URL :
1042 # http://developer.intel.com/drg/pentiumII/appnotes/aan_org.c . This
1043 # implementation uses 28 adds and 6+8 muls instead of 29 adds and 5+8 muls -
1044 # the difference is that in the symetrical_unscaled_DFT procedure, they noticed
1045 # how odd_input[i] and odd_input[N/4-i] will be combined at the start of the
1046 # complex-to-real DFT and they took advantage of this to convert 2 real adds
1047 # and 4 real muls into one complex multiply.
1048
1049
1050 # TODO : write about multi-dimentional DCT
1051
1052
1053 # TEST CODE
1054
1055 def dump (vector):
1056     str = ""
1057     for i in range(len(vector)):
1058         if i:
1059             str = str + ", "
1060         vector[i] = vector[i] + 0j
1061         realstr = "%+.4f" % vector[i].real
1062         imagstr = "%+.4fj" % vector[i].imag
1063         if (realstr == "-0.0000"):
1064             realstr = "+0.0000"
1065         if (imagstr == "-0.0000j"):
1066             imagstr = "+0.0000j"
1067         str = str + realstr #+ imagstr
1068     return "[%s]" % str
1069
1070 import whrandom
1071
1072 def test(N):
1073     input = vector(N)
1074     output = vector(N)
1075     verify = vector(N)
1076
1077     for i in range(N):
1078         input[i] = whrandom.random() + 1j * whrandom.random()
1079
1080     unscaled_DFT (N, input, output)
1081     unscaled_DFT (N, input, verify)
1082
1083     if (dump(output) != dump(verify)):
1084         print dump(output)
1085         print dump(verify)
1086
1087 #test (64)
1088
1089
1090 # PERFORMANCE ANALYSIS CODE
1091
1092 def display (table):
1093     N = 1
1094     print "#\tN\treal additions\treal multiplies\tcomplex multiplies"
1095     while table.has_key(N):
1096         print "#%8d%16d%16d%16d" % (N, table[N][0], table[N][1], table[N][2])
1097         N = 2*N
1098     print
1099
1100 best_complex_DFT = {}
1101
1102 def complex_DFT (max_N):
1103     best_complex_DFT[1] = (0,0,0)
1104     best_complex_DFT[2] = (4,0,0)
1105     best_complex_DFT[4] = (16,0,0)
1106     N = 8
1107     while (N<=max_N):
1108         # best method = split radix
1109         best2 = best_complex_DFT[N/2]
1110         best4 = best_complex_DFT[N/4]
1111         best_complex_DFT[N] = (best2[0] + 2*best4[0] + 3*N + 4,
1112                                best2[1] + 2*best4[1] + 4,
1113                                best2[2] + 2*best4[2] + N/2 - 4)
1114         N = 2*N
1115
1116 best_real_DFT = {}
1117
1118 def real_DFT (max_N):
1119     best_real_DFT[1] = (0,0,0)
1120     best_real_DFT[2] = (2,0,0)
1121     best_real_DFT[4] = (6,0,0)
1122     N = 8
1123     while (N<=max_N):
1124         # best method = split radix decimate-in-frequency
1125         best2 = best_real_DFT[N/2]
1126         best4 = best_complex_DFT[N/4]
1127         best_real_DFT[N] = (best2[0] + best4[0] + N + 2,
1128                             best2[1] + best4[1] + 2,
1129                             best2[2] + best4[2] + N/4 - 2)
1130         N = 2*N
1131
1132 best_complex2real_DFT = {}
1133
1134 def complex2real_DFT (max_N):
1135     best_complex2real_DFT[1] = (0,0,0)
1136     best_complex2real_DFT[2] = (2,0,0)
1137     best_complex2real_DFT[4] = (8,0,0)
1138     N = 8
1139     while (N<=max_N):
1140         best2 = best_complex2real_DFT[N/2]
1141         best4 = best_complex_DFT[N/4]
1142         best_complex2real_DFT[N] = (best2[0] + best4[0] + 3*N/2 + 1,
1143                                     best2[1] + best4[1] + 2,
1144                                     best2[2] + best4[2] + N/4 - 2)
1145         N = 2*N
1146
1147 best_real_symetric_DFT = {}
1148
1149 def real_symetric_DFT (max_N):
1150     best_real_symetric_DFT[1] = (0,0,0)
1151     best_real_symetric_DFT[2] = (0,0,0)
1152     best_real_symetric_DFT[4] = (2,0,0)
1153     N = 8
1154     while (N<=max_N):
1155         best2 = best_real_symetric_DFT[N/2]
1156         best4 = best_complex2real_DFT[N/4]
1157         best_real_symetric_DFT[N] = (best2[0] + best4[0] + 3*N/4 - 1,
1158                                      best2[1] + best4[1] + N/2 - 3,
1159                                      best2[2] + best4[2])
1160         N = 2*N
1161
1162 complex_DFT (65536)
1163 real_DFT (65536)
1164 complex2real_DFT (65536)
1165 real_symetric_DFT (65536)
1166
1167
1168 print "complex DFT"
1169 display (best_complex_DFT)
1170
1171 print "real DFT"
1172 display (best_real_DFT)
1173
1174 print "complex2real DFT"
1175 display (best_complex2real_DFT)
1176
1177 print "real symetric DFT"
1178 display (best_real_symetric_DFT)