apps/plugins/pdbox/PDa/src/d_imayer_fft.c

   1 /*
   2 ** Algorithm: complex multiplication
   3 **
   4 ** Performance: Bad accuracy, great speed.
   5 **
   6 ** The simplest, way of generating trig values.  Note, this method is
   7 ** not stable, and errors accumulate rapidly.
   8 **
   9 ** Computation: 2 *, 1 + per value.
  10 */
  11
  12
  13 #include "m_fixed.h"
  14
  15
  16 #define TRIG_FAST
  17
  18 #ifdef TRIG_FAST
  19 char mtrig_algorithm[] = "Simple";
  20 #define TRIG_VARS                                        \
  21       t_fixed t_c,t_s;
  22 #define TRIG_INIT(k,c,s)                                 \
  23     {                                                    \
  24      t_c  = fcostab[k];                                   \
  25      t_s  = fsintab[k];                                   \
  26      c    = itofix(1);                                   \
  27      s    = 0;                                           \
  28     }
  29 #define TRIG_NEXT(k,c,s)                                 \
  30     {                                                    \
  31      t_fixed   t = c;                                    \
  32      c   = mult(t,t_c) - mult(s,t_s);              \
  33      s   = mult(t,t_s) + mult(s,t_c);              \
  34     }
  35 #define TRIG_23(k,c1,s1,c2,s2,c3,s3)                     \
  36         {                                                \
  37          c2 = mult(c1,c1) - mult(s1,s1);           \
  38          s2 = (mult(c1,s1)<<2);                    \
  39          c3 = mult(c2,c1) - mult(s2,s1);           \
  40          s3 = mult(c2,s1) + mult(s2,c1);           \
  41         }
  42 #endif
  43 #define TRIG_RESET(k,c,s)
  44
  45 /*
  46 ** Algorithm: O. Buneman's trig generator.
  47 **
  48 ** Performance: Good accuracy, mediocre speed.
  49 **
  50 **   Manipulates a log(n) table to stably create n evenly spaced trig
  51 **   values. The newly generated values lay halfway between two
  52 **   known values, and are calculated by appropriatelly scaling the
  53 **   average of the known trig values appropriatelly according to the
  54 **   angle between them.  This process is inherently stable; and is
  55 **   about as accurate as most trig library functions.  The errors
  56 **   caused by this code segment are primarily due to the less stable
  57 **   method to calculate values for 2t and s 3t. Note: I believe this
  58 **   algorithm is patented(!), see readme file for more details.
  59 **
  60 ** Computation: 1 +, 1 *, much integer math,  per trig value
  61 **
  62 */
  63
  64 #ifdef TRIG_GOOD
  65 #define TRIG_VARS                                                \
  66       int t_lam=0;                                               \
  67       double coswrk[TRIG_TABLE_SIZE],sinwrk[TRIG_TABLE_SIZE];
  68 #define TRIG_INIT(k,c,s)                                         \
  69      {                                                           \
  70       int i;                                                     \
  71       for (i=0 ; i<=k ; i++)                                     \
  72           {coswrk[i]=fcostab[i];sinwrk[i]=fsintab[i];}             \
  73       t_lam = 0;                                                 \
  74       c = 1;                                                     \
  75       s = 0;                                                     \
  76      }
  77 #define TRIG_NEXT(k,c,s)                                         \
  78      {                                                           \
  79          int i,j;                                                \
  80          (t_lam)++;                                              \
  81          for (i=0 ; !((1<<i)&t_lam) ; i++);                      \
  82          i = k-i;                                                \
  83          s = sinwrk[i];                                          \
  84          c = coswrk[i];                                          \
  85          if (i>1)                                                \
  86             {                                                    \
  87              for (j=k-i+2 ; (1<<j)&t_lam ; j++);                 \
  88              j         = k - j;                                  \
  89              sinwrk[i] = halsec[i] * (sinwrk[i-1] + sinwrk[j]);  \
  90              coswrk[i] = halsec[i] * (coswrk[i-1] + coswrk[j]);  \
  91             }                                                    \
  92      }
  93 #endif
  94
  95
  96 #define TRIG_TAB_SIZE 22
  97
  98 static long long halsec[TRIG_TAB_SIZE]= {1,2,3};
  99
 100 #define FFTmult(x,y) mult(x,y)
 101
 102
 103
 104
 105 #include "d_imayer_tables.h"
 106
 107 #define SQRT2    ftofix(1.414213562373095048801688724209698)
 108
 109
 110 void imayer_fht(t_fixed *fz, int n)
 111 {
 112  int  k,k1,k2,k3,k4,kx;
 113  t_fixed *fi,*fn,*gi;
 114  TRIG_VARS;
 115
 116  for (k1=1,k2=0;k1<n;k1++)
 117     {
 118      t_fixed aa;
 119      for (k=n>>1; (!((k2^=k)&k)); k>>=1);
 120      if (k1>k2)
 121         {
 122              aa=fz[k1];fz[k1]=fz[k2];fz[k2]=aa;
 123         }
 124     }
 125  for ( k=0 ; (1<<k)<n ; k++ );
 126  k  &= 1;
 127  if (k==0)
 128     {
 129          for (fi=fz,fn=fz+n;fi<fn;fi+=4)
 130             {
 131              t_fixed f0,f1,f2,f3;
 132              f1     = fi[0 ]-fi[1 ];
 133              f0     = fi[0 ]+fi[1 ];
 134              f3     = fi[2 ]-fi[3 ];
 135              f2     = fi[2 ]+fi[3 ];
 136              fi[2 ] = (f0-f2);
 137              fi[0 ] = (f0+f2);
 138              fi[3 ] = (f1-f3);
 139              fi[1 ] = (f1+f3);
 140             }
 141     }
 142  else
 143     {
 144          for (fi=fz,fn=fz+n,gi=fi+1;fi<fn;fi+=8,gi+=8)
 145             {
 146              t_fixed bs1,bc1,bs2,bc2,bs3,bc3,bs4,bc4,
 147                 bg0,bf0,bf1,bg1,bf2,bg2,bf3,bg3;
 148              bc1     = fi[0 ] - gi[0 ];
 149              bs1     = fi[0 ] + gi[0 ];
 150              bc2     = fi[2 ] - gi[2 ];
 151              bs2     = fi[2 ] + gi[2 ];
 152              bc3     = fi[4 ] - gi[4 ];
 153              bs3     = fi[4 ] + gi[4 ];
 154              bc4     = fi[6 ] - gi[6 ];
 155              bs4     = fi[6 ] + gi[6 ];
 156              bf1     = (bs1 - bs2);
 157              bf0     = (bs1 + bs2);
 158              bg1     = (bc1 - bc2);
 159              bg0     = (bc1 + bc2);
 160              bf3     = (bs3 - bs4);
 161              bf2     = (bs3 + bs4);
 162              bg3     = FFTmult(SQRT2,bc4);
 163              bg2     = FFTmult(SQRT2,bc3);
 164              fi[4 ] = bf0 - bf2;
 165              fi[0 ] = bf0 + bf2;
 166              fi[6 ] = bf1 - bf3;
 167              fi[2 ] = bf1 + bf3;
 168              gi[4 ] = bg0 - bg2;
 169              gi[0 ] = bg0 + bg2;
 170              gi[6 ] = bg1 - bg3;
 171              gi[2 ] = bg1 + bg3;
 172             }
 173     }
 174  if (n<16) return;
 175
 176  do
 177     {
 178      t_fixed s1,c1;
 179      int ii;
 180      k  += 2;
 181      k1  = 1  << k;
 182      k2  = k1 << 1;
 183      k4  = k2 << 1;
 184      k3  = k2 + k1;
 185      kx  = k1 >> 1;
 186          fi  = fz;
 187          gi  = fi + kx;
 188          fn  = fz + n;
 189          do
 190             {
 191              t_fixed g0,f0,f1,g1,f2,g2,f3,g3;
 192              f1      = fi[0 ] - fi[k1];
 193              f0      = fi[0 ] + fi[k1];
 194              f3      = fi[k2] - fi[k3];
 195              f2      = fi[k2] + fi[k3];
 196              fi[k2]  = f0         - f2;
 197              fi[0 ]  = f0         + f2;
 198              fi[k3]  = f1         - f3;
 199              fi[k1]  = f1         + f3;
 200              g1      = gi[0 ] - gi[k1];
 201              g0      = gi[0 ] + gi[k1];
 202              g3      = FFTmult(SQRT2, gi[k3]);
 203              g2      = FFTmult(SQRT2, gi[k2]);
 204              gi[k2]  = g0         - g2;
 205              gi[0 ]  = g0         + g2;
 206              gi[k3]  = g1         - g3;
 207              gi[k1]  = g1         + g3;
 208              gi     += k4;
 209              fi     += k4;
 210             } while (fi<fn);
 211      TRIG_INIT(k,c1,s1);
 212      for (ii=1;ii<kx;ii++)
 213         {
 214          t_fixed c2,s2;
 215          TRIG_NEXT(k,c1,s1);
 216          c2 = FFTmult(c1,c1) - FFTmult(s1,s1);
 217          s2 = 2*FFTmult(c1,s1);
 218              fn = fz + n;
 219              fi = fz +ii;
 220              gi = fz +k1-ii;
 221              do
 222                 {
 223                  t_fixed a,b,g0,f0,f1,g1,f2,g2,f3,g3;
 224                  b       = FFTmult(s2,fi[k1]) - FFTmult(c2,gi[k1]);
 225                  a       = FFTmult(c2,fi[k1]) + FFTmult(s2,gi[k1]);
 226                  f1      = fi[0 ]    - a;
 227                  f0      = fi[0 ]    + a;
 228                  g1      = gi[0 ]    - b;
 229                  g0      = gi[0 ]    + b;
 230                  b       = FFTmult(s2,fi[k3]) - FFTmult(c2,gi[k3]);
 231                  a       = FFTmult(c2,fi[k3]) + FFTmult(s2,gi[k3]);
 232                  f3      = fi[k2]    - a;
 233                  f2      = fi[k2]    + a;
 234                  g3      = gi[k2]    - b;
 235                  g2      = gi[k2]    + b;
 236                  b       = FFTmult(s1,f2)     - FFTmult(c1,g3);
 237                  a       = FFTmult(c1,f2)     + FFTmult(s1,g3);
 238                  fi[k2]  = f0        - a;
 239                  fi[0 ]  = f0        + a;
 240                  gi[k3]  = g1        - b;
 241                  gi[k1]  = g1        + b;
 242                  b       = FFTmult(c1,g2)     - FFTmult(s1,f3);
 243                  a       = FFTmult(s1,g2)     + FFTmult(c1,f3);
 244                  gi[k2]  = g0        - a;
 245                  gi[0 ]  = g0        + a;
 246                  fi[k3]  = f1        - b;
 247                  fi[k1]  = f1        + b;
 248                  gi     += k4;
 249                  fi     += k4;
 250                 } while (fi<fn);
 251         }
 252      TRIG_RESET(k,c1,s1);
 253     } while (k4<n);
 254 }
 255
 256
 257 void imayer_fft(int n, t_fixed *real, t_fixed *imag)
 258 {
 259  t_fixed a,b,c,d;
 260  t_fixed q,r,s,t;
 261  int i,j,k;
 262  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 263   a = real[i]; b = real[j];  q=a+b; r=a-b;
 264   c = imag[i]; d = imag[j];  s=c+d; t=c-d;
 265   real[i] = (q+t)>>1; real[j] = (q-t)>>1;
 266   imag[i] = (s-r)>>1; imag[j] = (s+r)>>1;
 267  }
 268  imayer_fht(real,n);
 269  imayer_fht(imag,n);
 270 }
 271
 272 void imayer_ifft(int n, t_fixed *real, t_fixed *imag)
 273 {
 274  t_fixed a,b,c,d;
 275  t_fixed q,r,s,t;
 276  int i,j,k;
 277  imayer_fht(real,n);
 278  imayer_fht(imag,n);
 279  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 280   a = real[i]; b = real[j];  q=a+b; r=a-b;
 281   c = imag[i]; d = imag[j];  s=c+d; t=c-d;
 282   imag[i] = (s+r)>>1;  imag[j] = (s-r)>>1;
 283   real[i] = (q-t)>>1;  real[j] = (q+t)>>1;
 284  }
 285 }
 286
 287 void imayer_realfft(int n, t_fixed *real)
 288 {
 289  t_fixed a,b,c,d;
 290  int i,j,k;
 291  imayer_fht(real,n);
 292  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 293   a = real[i];
 294   b = real[j];
 295   real[j] = (a-b)>>1;
 296   real[i] = (a+b)>>1;
 297  }
 298 }
 299
 300 void imayer_realifft(int n, t_fixed *real)
 301 {
 302  t_fixed a,b,c,d;
 303  int i,j,k;
 304  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 305   a = real[i];
 306   b = real[j];
 307   real[j] = (a-b);
 308   real[i] = (a+b);
 309  }
 310  imayer_fht(real,n);
 311 }
 312
 313
 314
 315 #ifdef TEST
 316
 317 static double dfcostab[TRIG_TAB_SIZE]=
 318     {
 319      .00000000000000000000000000000000000000000000000000,
 320      .70710678118654752440084436210484903928483593768847,
 321      .92387953251128675612818318939678828682241662586364,
 322      .98078528040323044912618223613423903697393373089333,
 323      .99518472667219688624483695310947992157547486872985,
 324      .99879545620517239271477160475910069444320361470461,
 325      .99969881869620422011576564966617219685006108125772,
 326      .99992470183914454092164649119638322435060646880221,
 327      .99998117528260114265699043772856771617391725094433,
 328      .99999529380957617151158012570011989955298763362218,
 329      .99999882345170190992902571017152601904826792288976,
 330      .99999970586288221916022821773876567711626389934930,
 331      .99999992646571785114473148070738785694820115568892,
 332      .99999998161642929380834691540290971450507605124278,
 333      .99999999540410731289097193313960614895889430318945,
 334      .99999999885102682756267330779455410840053741619428,
 335      .99999999971275670684941397221864177608908945791828,
 336      .99999999992818917670977509588385049026048033310951
 337     };
 338 static double dfsintab[TRIG_TAB_SIZE]=
 339     {
 340      1.0000000000000000000000000000000000000000000000000,
 341      .70710678118654752440084436210484903928483593768846,
 342      .38268343236508977172845998403039886676134456248561,
 343      .19509032201612826784828486847702224092769161775195,
 344      .09801714032956060199419556388864184586113667316749,
 345      .04906767432741801425495497694268265831474536302574,
 346      .02454122852291228803173452945928292506546611923944,
 347      .01227153828571992607940826195100321214037231959176,
 348      .00613588464915447535964023459037258091705788631738,
 349      .00306795676296597627014536549091984251894461021344,
 350      .00153398018628476561230369715026407907995486457522,
 351      .00076699031874270452693856835794857664314091945205,
 352      .00038349518757139558907246168118138126339502603495,
 353      .00019174759731070330743990956198900093346887403385,
 354      .00009587379909597734587051721097647635118706561284,
 355      .00004793689960306688454900399049465887274686668768,
 356      .00002396844980841821872918657716502182009476147489,
 357      .00001198422490506970642152156159698898480473197753
 358     };
 359
 360 static double dhalsec[TRIG_TAB_SIZE]=
 361     {
 362      0,
 363      0,
 364      .54119610014619698439972320536638942006107206337801,
 365      .50979557910415916894193980398784391368261849190893,
 366      .50241928618815570551167011928012092247859337193963,
 367      .50060299823519630134550410676638239611758632599591,
 368      .50015063602065098821477101271097658495974913010340,
 369      .50003765191554772296778139077905492847503165398345,
 370      .50000941253588775676512870469186533538523133757983,
 371      .50000235310628608051401267171204408939326297376426,
 372      .50000058827484117879868526730916804925780637276181,
 373      .50000014706860214875463798283871198206179118093251,
 374      .50000003676714377807315864400643020315103490883972,
 375      .50000000919178552207366560348853455333939112569380,
 376      .50000000229794635411562887767906868558991922348920,
 377      .50000000057448658687873302235147272458812263401372,
 378      .50000000014362164661654736863252589967935073278768,
 379      .50000000003590541164769084922906986545517021050714
 380     };
 381
 382
 383 #include <stdio.h>
 384
 385
 386 int writetables()
 387 {
 388   int i;
 389
 390   printf("/* Tables for fixed point lookup with %d bit precision*/\n\n",fix1);
 391
 392   printf("static int fsintab[TRIG_TAB_SIZE]= {\n");
 393
 394   for (i=0;i<TRIG_TAB_SIZE-1;i++)
 395     printf("%ld, \n",ftofix(dfsintab[i]));
 396   printf("%ld }; \n",ftofix(dfsintab[i]));
 397
 398
 399   printf("\n\nstatic int fcostab[TRIG_TAB_SIZE]= {\n");
 400
 401   for (i=0;i<TRIG_TAB_SIZE-1;i++)
 402     printf("%ld, \n",ftofix(dfcostab[i]));
 403   printf("%ld }; \n",ftofix(dfcostab[i]));
 404
 405 }
 406
 407
 408 #define OUTPUT \
 409   fprintf(stderr,"Integer - Float\n");\
 410   for (i=0;i<NP;i++)\
 411     fprintf(stderr,"%f %f --> %f %f\n",fixtof(r[i]),fixtof(im[i]),\
 412       fr[i],fim[i]);\
 413   fprintf(stderr,"\n\n");
 414
 415
 416
 417 int main()
 418 {
 419   int i;
 420   t_fixed r[256];
 421   t_fixed im[256];
 422   float fr[256];
 423   float fim[256];
 424
 425
 426 #if 1
 427      writetables();
 428      exit(0);
 429 #endif
 430
 431
 432 #if 0
 433   t_fixed c1,s1,c2,s2,c3,s3;
 434   int k;
 435   int i;
 436
 437   TRIG_VARS;
 438
 439   for (k=2;k<10;k+=2) {
 440     TRIG_INIT(k,c1,s1);
 441     for (i=0;i<8;i++) {
 442       TRIG_NEXT(k,c1,s1);
 443       TRIG_23(k,c1,s1,c2,s2,c3,s3);
 444       printf("TRIG value k=%d,%d val1 = %f %f\n",k,i,fixtof(c1),fixtof(s1));
 445     }
 446   }
 447 #endif
 448
 449
 450
 451 #if 1
 452
 453   #define NP 16
 454
 455   for (i=0;i<NP;i++) {
 456     fr[i] = 0.;
 457     r[i] = 0.;
 458     fim[i] = 0.;
 459     im[i] = 0;
 460   }
 461
 462 #if 0
 463   for (i=0;i<NP;i++) {
 464     if (i&1) {
 465       fr[i] = 0.1*i;
 466       r[i] = ftofix(0.1*i);
 467     }
 468     else {
 469          fr[i] = 0.;
 470          r[i] = 0.;
 471     }
 472   }
 473 #endif
 474 #if 0
 475   for (i=0;i<NP;i++) {
 476        fr[i] = 0.1;
 477        r[i] = ftofix(0.1);
 478   }
 479 #endif
 480
 481   r[1] = ftofix(0.1);
 482   fr[1] = 0.1;
 483
 484
 485
 486   /* TEST RUN */
 487
 488   OUTPUT
 489
 490   imayer_fft(NP,r,im);
 491   mayer_fft(NP,fr,fim);
 492 //  imayer_fht(r,NP);
 493 //  mayer_fht(fr,NP);
 494
 495 #if 1
 496   for (i=0;i<NP;i++) {
 497        r[i] = mult(ftofix(0.01),r[i]);
 498        fr[i] = 0.01*fr[i];
 499   }
 500 #endif
 501
 502   OUTPUT
 503
 504
 505   imayer_fft(NP,r,im);
 506   mayer_fft(NP,fr,fim);
 507
 508   OUTPUT
 509
 510
 511 #endif
 512
 513
 514 }
 515
 516 #endif
 517 /*
 518 ** Algorithm: complex multiplication
 519 **
 520 ** Performance: Bad accuracy, great speed.
 521 **
 522 ** The simplest, way of generating trig values.  Note, this method is
 523 ** not stable, and errors accumulate rapidly.
 524 **
 525 ** Computation: 2 *, 1 + per value.
 526 */
 527
 528
 529 #include "m_fixed.h"
 530
 531
 532 #define TRIG_FAST
 533
 534 #ifdef TRIG_FAST
 535 char mtrig_algorithm[] = "Simple";
 536 #define TRIG_VARS                                        \
 537       t_fixed t_c,t_s;
 538 #define TRIG_INIT(k,c,s)                                 \
 539     {                                                    \
 540      t_c  = fcostab[k];                                   \
 541      t_s  = fsintab[k];                                   \
 542      c    = itofix(1);                                   \
 543      s    = 0;                                           \
 544     }
 545 #define TRIG_NEXT(k,c,s)                                 \
 546     {                                                    \
 547      t_fixed   t = c;                                    \
 548      c   = mult(t,t_c) - mult(s,t_s);              \
 549      s   = mult(t,t_s) + mult(s,t_c);              \
 550     }
 551 #define TRIG_23(k,c1,s1,c2,s2,c3,s3)                     \
 552         {                                                \
 553          c2 = mult(c1,c1) - mult(s1,s1);           \
 554          s2 = (mult(c1,s1)<<2);                    \
 555          c3 = mult(c2,c1) - mult(s2,s1);           \
 556          s3 = mult(c2,s1) + mult(s2,c1);           \
 557         }
 558 #endif
 559 #define TRIG_RESET(k,c,s)
 560
 561 /*
 562 ** Algorithm: O. Buneman's trig generator.
 563 **
 564 ** Performance: Good accuracy, mediocre speed.
 565 **
 566 **   Manipulates a log(n) table to stably create n evenly spaced trig
 567 **   values. The newly generated values lay halfway between two
 568 **   known values, and are calculated by appropriatelly scaling the
 569 **   average of the known trig values appropriatelly according to the
 570 **   angle between them.  This process is inherently stable; and is
 571 **   about as accurate as most trig library functions.  The errors
 572 **   caused by this code segment are primarily due to the less stable
 573 **   method to calculate values for 2t and s 3t. Note: I believe this
 574 **   algorithm is patented(!), see readme file for more details.
 575 **
 576 ** Computation: 1 +, 1 *, much integer math,  per trig value
 577 **
 578 */
 579
 580 #ifdef TRIG_GOOD
 581 #define TRIG_VARS                                                \
 582       int t_lam=0;                                               \
 583       double coswrk[TRIG_TABLE_SIZE],sinwrk[TRIG_TABLE_SIZE];
 584 #define TRIG_INIT(k,c,s)                                         \
 585      {                                                           \
 586       int i;                                                     \
 587       for (i=0 ; i<=k ; i++)                                     \
 588           {coswrk[i]=fcostab[i];sinwrk[i]=fsintab[i];}             \
 589       t_lam = 0;                                                 \
 590       c = 1;                                                     \
 591       s = 0;                                                     \
 592      }
 593 #define TRIG_NEXT(k,c,s)                                         \
 594      {                                                           \
 595          int i,j;                                                \
 596          (t_lam)++;                                              \
 597          for (i=0 ; !((1<<i)&t_lam) ; i++);                      \
 598          i = k-i;                                                \
 599          s = sinwrk[i];                                          \
 600          c = coswrk[i];                                          \
 601          if (i>1)                                                \
 602             {                                                    \
 603              for (j=k-i+2 ; (1<<j)&t_lam ; j++);                 \
 604              j         = k - j;                                  \
 605              sinwrk[i] = halsec[i] * (sinwrk[i-1] + sinwrk[j]);  \
 606              coswrk[i] = halsec[i] * (coswrk[i-1] + coswrk[j]);  \
 607             }                                                    \
 608      }
 609 #endif
 610
 611
 612 #define TRIG_TAB_SIZE 22
 613
 614 static long long halsec[TRIG_TAB_SIZE]= {1,2,3};
 615
 616 #define FFTmult(x,y) mult(x,y)
 617
 618
 619
 620
 621 #include "d_imayer_tables.h"
 622
 623 #define SQRT2    ftofix(1.414213562373095048801688724209698)
 624
 625
 626 void imayer_fht(t_fixed *fz, int n)
 627 {
 628  int  k,k1,k2,k3,k4,kx;
 629  t_fixed *fi,*fn,*gi;
 630  TRIG_VARS;
 631
 632  for (k1=1,k2=0;k1<n;k1++)
 633     {
 634      t_fixed aa;
 635      for (k=n>>1; (!((k2^=k)&k)); k>>=1);
 636      if (k1>k2)
 637         {
 638              aa=fz[k1];fz[k1]=fz[k2];fz[k2]=aa;
 639         }
 640     }
 641  for ( k=0 ; (1<<k)<n ; k++ );
 642  k  &= 1;
 643  if (k==0)
 644     {
 645          for (fi=fz,fn=fz+n;fi<fn;fi+=4)
 646             {
 647              t_fixed f0,f1,f2,f3;
 648              f1     = fi[0 ]-fi[1 ];
 649              f0     = fi[0 ]+fi[1 ];
 650              f3     = fi[2 ]-fi[3 ];
 651              f2     = fi[2 ]+fi[3 ];
 652              fi[2 ] = (f0-f2);
 653              fi[0 ] = (f0+f2);
 654              fi[3 ] = (f1-f3);
 655              fi[1 ] = (f1+f3);
 656             }
 657     }
 658  else
 659     {
 660          for (fi=fz,fn=fz+n,gi=fi+1;fi<fn;fi+=8,gi+=8)
 661             {
 662              t_fixed bs1,bc1,bs2,bc2,bs3,bc3,bs4,bc4,
 663                 bg0,bf0,bf1,bg1,bf2,bg2,bf3,bg3;
 664              bc1     = fi[0 ] - gi[0 ];
 665              bs1     = fi[0 ] + gi[0 ];
 666              bc2     = fi[2 ] - gi[2 ];
 667              bs2     = fi[2 ] + gi[2 ];
 668              bc3     = fi[4 ] - gi[4 ];
 669              bs3     = fi[4 ] + gi[4 ];
 670              bc4     = fi[6 ] - gi[6 ];
 671              bs4     = fi[6 ] + gi[6 ];
 672              bf1     = (bs1 - bs2);
 673              bf0     = (bs1 + bs2);
 674              bg1     = (bc1 - bc2);
 675              bg0     = (bc1 + bc2);
 676              bf3     = (bs3 - bs4);
 677              bf2     = (bs3 + bs4);
 678              bg3     = FFTmult(SQRT2,bc4);
 679              bg2     = FFTmult(SQRT2,bc3);
 680              fi[4 ] = bf0 - bf2;
 681              fi[0 ] = bf0 + bf2;
 682              fi[6 ] = bf1 - bf3;
 683              fi[2 ] = bf1 + bf3;
 684              gi[4 ] = bg0 - bg2;
 685              gi[0 ] = bg0 + bg2;
 686              gi[6 ] = bg1 - bg3;
 687              gi[2 ] = bg1 + bg3;
 688             }
 689     }
 690  if (n<16) return;
 691
 692  do
 693     {
 694      t_fixed s1,c1;
 695      int ii;
 696      k  += 2;
 697      k1  = 1  << k;
 698      k2  = k1 << 1;
 699      k4  = k2 << 1;
 700      k3  = k2 + k1;
 701      kx  = k1 >> 1;
 702          fi  = fz;
 703          gi  = fi + kx;
 704          fn  = fz + n;
 705          do
 706             {
 707              t_fixed g0,f0,f1,g1,f2,g2,f3,g3;
 708              f1      = fi[0 ] - fi[k1];
 709              f0      = fi[0 ] + fi[k1];
 710              f3      = fi[k2] - fi[k3];
 711              f2      = fi[k2] + fi[k3];
 712              fi[k2]  = f0         - f2;
 713              fi[0 ]  = f0         + f2;
 714              fi[k3]  = f1         - f3;
 715              fi[k1]  = f1         + f3;
 716              g1      = gi[0 ] - gi[k1];
 717              g0      = gi[0 ] + gi[k1];
 718              g3      = FFTmult(SQRT2, gi[k3]);
 719              g2      = FFTmult(SQRT2, gi[k2]);
 720              gi[k2]  = g0         - g2;
 721              gi[0 ]  = g0         + g2;
 722              gi[k3]  = g1         - g3;
 723              gi[k1]  = g1         + g3;
 724              gi     += k4;
 725              fi     += k4;
 726             } while (fi<fn);
 727      TRIG_INIT(k,c1,s1);
 728      for (ii=1;ii<kx;ii++)
 729         {
 730          t_fixed c2,s2;
 731          TRIG_NEXT(k,c1,s1);
 732          c2 = FFTmult(c1,c1) - FFTmult(s1,s1);
 733          s2 = 2*FFTmult(c1,s1);
 734              fn = fz + n;
 735              fi = fz +ii;
 736              gi = fz +k1-ii;
 737              do
 738                 {
 739                  t_fixed a,b,g0,f0,f1,g1,f2,g2,f3,g3;
 740                  b       = FFTmult(s2,fi[k1]) - FFTmult(c2,gi[k1]);
 741                  a       = FFTmult(c2,fi[k1]) + FFTmult(s2,gi[k1]);
 742                  f1      = fi[0 ]    - a;
 743                  f0      = fi[0 ]    + a;
 744                  g1      = gi[0 ]    - b;
 745                  g0      = gi[0 ]    + b;
 746                  b       = FFTmult(s2,fi[k3]) - FFTmult(c2,gi[k3]);
 747                  a       = FFTmult(c2,fi[k3]) + FFTmult(s2,gi[k3]);
 748                  f3      = fi[k2]    - a;
 749                  f2      = fi[k2]    + a;
 750                  g3      = gi[k2]    - b;
 751                  g2      = gi[k2]    + b;
 752                  b       = FFTmult(s1,f2)     - FFTmult(c1,g3);
 753                  a       = FFTmult(c1,f2)     + FFTmult(s1,g3);
 754                  fi[k2]  = f0        - a;
 755                  fi[0 ]  = f0        + a;
 756                  gi[k3]  = g1        - b;
 757                  gi[k1]  = g1        + b;
 758                  b       = FFTmult(c1,g2)     - FFTmult(s1,f3);
 759                  a       = FFTmult(s1,g2)     + FFTmult(c1,f3);
 760                  gi[k2]  = g0        - a;
 761                  gi[0 ]  = g0        + a;
 762                  fi[k3]  = f1        - b;
 763                  fi[k1]  = f1        + b;
 764                  gi     += k4;
 765                  fi     += k4;
 766                 } while (fi<fn);
 767         }
 768      TRIG_RESET(k,c1,s1);
 769     } while (k4<n);
 770 }
 771
 772
 773 void imayer_fft(int n, t_fixed *real, t_fixed *imag)
 774 {
 775  t_fixed a,b,c,d;
 776  t_fixed q,r,s,t;
 777  int i,j,k;
 778  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 779   a = real[i]; b = real[j];  q=a+b; r=a-b;
 780   c = imag[i]; d = imag[j];  s=c+d; t=c-d;
 781   real[i] = (q+t)>>1; real[j] = (q-t)>>1;
 782   imag[i] = (s-r)>>1; imag[j] = (s+r)>>1;
 783  }
 784  imayer_fht(real,n);
 785  imayer_fht(imag,n);
 786 }
 787
 788 void imayer_ifft(int n, t_fixed *real, t_fixed *imag)
 789 {
 790  t_fixed a,b,c,d;
 791  t_fixed q,r,s,t;
 792  int i,j,k;
 793  imayer_fht(real,n);
 794  imayer_fht(imag,n);
 795  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 796   a = real[i]; b = real[j];  q=a+b; r=a-b;
 797   c = imag[i]; d = imag[j];  s=c+d; t=c-d;
 798   imag[i] = (s+r)>>1;  imag[j] = (s-r)>>1;
 799   real[i] = (q-t)>>1;  real[j] = (q+t)>>1;
 800  }
 801 }
 802
 803 void imayer_realfft(int n, t_fixed *real)
 804 {
 805  t_fixed a,b,c,d;
 806  int i,j,k;
 807  imayer_fht(real,n);
 808  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 809   a = real[i];
 810   b = real[j];
 811   real[j] = (a-b)>>1;
 812   real[i] = (a+b)>>1;
 813  }
 814 }
 815
 816 void imayer_realifft(int n, t_fixed *real)
 817 {
 818  t_fixed a,b,c,d;
 819  int i,j,k;
 820  for (i=1,j=n-1,k=n/2;i<k;i++,j--) {
 821   a = real[i];
 822   b = real[j];
 823   real[j] = (a-b);
 824   real[i] = (a+b);
 825  }
 826  imayer_fht(real,n);
 827 }
 828
 829
 830
 831 #ifdef TEST
 832
 833 static double dfcostab[TRIG_TAB_SIZE]=
 834     {
 835      .00000000000000000000000000000000000000000000000000,
 836      .70710678118654752440084436210484903928483593768847,
 837      .92387953251128675612818318939678828682241662586364,
 838      .98078528040323044912618223613423903697393373089333,
 839      .99518472667219688624483695310947992157547486872985,
 840      .99879545620517239271477160475910069444320361470461,
 841      .99969881869620422011576564966617219685006108125772,
 842      .99992470183914454092164649119638322435060646880221,
 843      .99998117528260114265699043772856771617391725094433,
 844      .99999529380957617151158012570011989955298763362218,
 845      .99999882345170190992902571017152601904826792288976,
 846      .99999970586288221916022821773876567711626389934930,
 847      .99999992646571785114473148070738785694820115568892,
 848      .99999998161642929380834691540290971450507605124278,
 849      .99999999540410731289097193313960614895889430318945,
 850      .99999999885102682756267330779455410840053741619428,
 851      .99999999971275670684941397221864177608908945791828,
 852      .99999999992818917670977509588385049026048033310951
 853     };
 854 static double dfsintab[TRIG_TAB_SIZE]=
 855     {
 856      1.0000000000000000000000000000000000000000000000000,
 857      .70710678118654752440084436210484903928483593768846,
 858      .38268343236508977172845998403039886676134456248561,
 859      .19509032201612826784828486847702224092769161775195,
 860      .09801714032956060199419556388864184586113667316749,
 861      .04906767432741801425495497694268265831474536302574,
 862      .02454122852291228803173452945928292506546611923944,
 863      .01227153828571992607940826195100321214037231959176,
 864      .00613588464915447535964023459037258091705788631738,
 865      .00306795676296597627014536549091984251894461021344,
 866      .00153398018628476561230369715026407907995486457522,
 867      .00076699031874270452693856835794857664314091945205,
 868      .00038349518757139558907246168118138126339502603495,
 869      .00019174759731070330743990956198900093346887403385,
 870      .00009587379909597734587051721097647635118706561284,
 871      .00004793689960306688454900399049465887274686668768,
 872      .00002396844980841821872918657716502182009476147489,
 873      .00001198422490506970642152156159698898480473197753
 874     };
 875
 876 static double dhalsec[TRIG_TAB_SIZE]=
 877     {
 878      0,
 879      0,
 880      .54119610014619698439972320536638942006107206337801,
 881      .50979557910415916894193980398784391368261849190893,
 882      .50241928618815570551167011928012092247859337193963,
 883      .50060299823519630134550410676638239611758632599591,
 884      .50015063602065098821477101271097658495974913010340,
 885      .50003765191554772296778139077905492847503165398345,
 886      .50000941253588775676512870469186533538523133757983,
 887      .50000235310628608051401267171204408939326297376426,
 888      .50000058827484117879868526730916804925780637276181,
 889      .50000014706860214875463798283871198206179118093251,
 890      .50000003676714377807315864400643020315103490883972,
 891      .50000000919178552207366560348853455333939112569380,
 892      .50000000229794635411562887767906868558991922348920,
 893      .50000000057448658687873302235147272458812263401372,
 894      .50000000014362164661654736863252589967935073278768,
 895      .50000000003590541164769084922906986545517021050714
 896     };
 897
 898
 899 #include <stdio.h>
 900
 901
 902 int writetables()
 903 {
 904   int i;
 905
 906   printf("/* Tables for fixed point lookup with %d bit precision*/\n\n",fix1);
 907
 908   printf("static int fsintab[TRIG_TAB_SIZE]= {\n");
 909
 910   for (i=0;i<TRIG_TAB_SIZE-1;i++)
 911     printf("%ld, \n",ftofix(dfsintab[i]));
 912   printf("%ld }; \n",ftofix(dfsintab[i]));
 913
 914
 915   printf("\n\nstatic int fcostab[TRIG_TAB_SIZE]= {\n");
 916
 917   for (i=0;i<TRIG_TAB_SIZE-1;i++)
 918     printf("%ld, \n",ftofix(dfcostab[i]));
 919   printf("%ld }; \n",ftofix(dfcostab[i]));
 920
 921 }
 922
 923
 924 #define OUTPUT \
 925   fprintf(stderr,"Integer - Float\n");\
 926   for (i=0;i<NP;i++)\
 927     fprintf(stderr,"%f %f --> %f %f\n",fixtof(r[i]),fixtof(im[i]),\
 928       fr[i],fim[i]);\
 929   fprintf(stderr,"\n\n");
 930
 931
 932
 933 int main()
 934 {
 935   int i;
 936   t_fixed r[256];
 937   t_fixed im[256];
 938   float fr[256];
 939   float fim[256];
 940
 941
 942 #if 1
 943      writetables();
 944      exit(0);
 945 #endif
 946
 947
 948 #if 0
 949   t_fixed c1,s1,c2,s2,c3,s3;
 950   int k;
 951   int i;
 952
 953   TRIG_VARS;
 954
 955   for (k=2;k<10;k+=2) {
 956     TRIG_INIT(k,c1,s1);
 957     for (i=0;i<8;i++) {
 958       TRIG_NEXT(k,c1,s1);
 959       TRIG_23(k,c1,s1,c2,s2,c3,s3);
 960       printf("TRIG value k=%d,%d val1 = %f %f\n",k,i,fixtof(c1),fixtof(s1));
 961     }
 962   }
 963 #endif
 964
 965
 966
 967 #if 1
 968
 969   #define NP 16
 970
 971   for (i=0;i<NP;i++) {
 972     fr[i] = 0.;
 973     r[i] = 0.;
 974     fim[i] = 0.;
 975     im[i] = 0;
 976   }
 977
 978 #if 0
 979   for (i=0;i<NP;i++) {
 980     if (i&1) {
 981       fr[i] = 0.1*i;
 982       r[i] = ftofix(0.1*i);
 983     }
 984     else {
 985          fr[i] = 0.;
 986          r[i] = 0.;
 987     }
 988   }
 989 #endif
 990 #if 0
 991   for (i=0;i<NP;i++) {
 992        fr[i] = 0.1;
 993        r[i] = ftofix(0.1);
 994   }
 995 #endif
 996
 997   r[1] = ftofix(0.1);
 998   fr[1] = 0.1;
 999
1000
1001
1002   /* TEST RUN */
1003
1004   OUTPUT
1005
1006   imayer_fft(NP,r,im);
1007   mayer_fft(NP,fr,fim);
1008 //  imayer_fht(r,NP);
1009 //  mayer_fht(fr,NP);
1010
1011 #if 1
1012   for (i=0;i<NP;i++) {
1013        r[i] = mult(ftofix(0.01),r[i]);
1014        fr[i] = 0.01*fr[i];
1015   }
1016 #endif
1017
1018   OUTPUT
1019
1020
1021   imayer_fft(NP,r,im);
1022   mayer_fft(NP,fr,fim);
1023
1024   OUTPUT
1025
1026
1027 #endif
1028
1029
1030 }
1031
1032 #endif