Fixed include statements such that double precision version of genborn.c
[gromacs/rigid-bodies.git] / src / mdlib / gmx_fft_fftpack.c
bloba834e0c7d24ed2210cfd34dbd4922c984c90b4cf
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs 4.0 Copyright (c) 1991-2003
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
18 #ifdef HAVE_CONFIG_H
19 #include <config.h>
20 #endif
22 #ifdef GMX_FFT_FFTPACK
24 #include <math.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
30 #include "gmx_fft.h"
31 #include "gmx_fatal.h"
34 /** Contents of the FFTPACK fft datatype.
36 * FFTPACK only does 1d transforms, so we use a pointers to another fft for
37 * the transform in the next dimension.
38 * Thus, a 3d-structure contains a pointer to a 2d one, which in turns contains
39 * a pointer to a 1d. The 1d structure has next==NULL.
41 struct gmx_fft
43 int ndim; /**< Dimensions, including our subdimensions. */
44 int n; /**< Number of points in this dimension. */
45 int ifac[15]; /**< 15 bytes needed for cfft and rfft */
46 struct gmx_fft *next; /**< Pointer to next dimension, or NULL. */
47 real * work; /**< 1st 4n reserved for cfft, 1st 2n for rfft */
50 #include <math.h>
51 #include <stdio.h>
55 static void
56 fftpack_passf2(int ido,
57 int l1,
58 real cc[],
59 real ch[],
60 real wa1[],
61 int isign)
63 int i, k, ah, ac;
64 real ti2, tr2;
66 if (ido <= 2)
68 for (k=0; k<l1; k++)
70 ah = k*ido;
71 ac = 2*k*ido;
72 ch[ah] = cc[ac] + cc[ac + ido];
73 ch[ah + ido*l1] = cc[ac] - cc[ac + ido];
74 ch[ah+1] = cc[ac+1] + cc[ac + ido + 1];
75 ch[ah + ido*l1 + 1] = cc[ac+1] - cc[ac + ido + 1];
78 else
80 for (k=0; k<l1; k++)
82 for (i=0; i<ido-1; i+=2)
84 ah = i + k*ido;
85 ac = i + 2*k*ido;
86 ch[ah] = cc[ac] + cc[ac + ido];
87 tr2 = cc[ac] - cc[ac + ido];
88 ch[ah+1] = cc[ac+1] + cc[ac + 1 + ido];
89 ti2 = cc[ac+1] - cc[ac + 1 + ido];
90 ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2;
91 ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2;
99 static void
100 fftpack_passf3(int ido,
101 int l1,
102 real cc[],
103 real ch[],
104 real wa1[],
105 real wa2[],
106 int isign)
108 const real taur = -0.5;
109 const real taui = 0.866025403784439;
111 int i, k, ac, ah;
112 real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
114 if (ido == 2)
116 for (k=1; k<=l1; k++)
118 ac = (3*k - 2)*ido;
119 tr2 = cc[ac] + cc[ac + ido];
120 cr2 = cc[ac - ido] + taur*tr2;
121 ah = (k - 1)*ido;
122 ch[ah] = cc[ac - ido] + tr2;
124 ti2 = cc[ac + 1] + cc[ac + ido + 1];
125 ci2 = cc[ac - ido + 1] + taur*ti2;
126 ch[ah + 1] = cc[ac - ido + 1] + ti2;
128 cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
129 ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
130 ch[ah + l1*ido] = cr2 - ci3;
131 ch[ah + 2*l1*ido] = cr2 + ci3;
132 ch[ah + l1*ido + 1] = ci2 + cr3;
133 ch[ah + 2*l1*ido + 1] = ci2 - cr3;
136 else
138 for (k=1; k<=l1; k++)
140 for (i=0; i<ido-1; i+=2)
142 ac = i + (3*k - 2)*ido;
143 tr2 = cc[ac] + cc[ac + ido];
144 cr2 = cc[ac - ido] + taur*tr2;
145 ah = i + (k-1)*ido;
146 ch[ah] = cc[ac - ido] + tr2;
147 ti2 = cc[ac + 1] + cc[ac + ido + 1];
148 ci2 = cc[ac - ido + 1] + taur*ti2;
149 ch[ah + 1] = cc[ac - ido + 1] + ti2;
150 cr3 = isign*taui*(cc[ac] - cc[ac + ido]);
151 ci3 = isign*taui*(cc[ac + 1] - cc[ac + ido + 1]);
152 dr2 = cr2 - ci3;
153 dr3 = cr2 + ci3;
154 di2 = ci2 + cr3;
155 di3 = ci2 - cr3;
156 ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
157 ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
158 ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
159 ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
166 static void
167 fftpack_passf4(int ido,
168 int l1,
169 real cc[],
170 real ch[],
171 real wa1[],
172 real wa2[],
173 real wa3[],
174 int isign)
176 int i, k, ac, ah;
177 real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
179 if (ido == 2)
181 for (k=0; k<l1; k++)
183 ac = 4*k*ido + 1;
184 ti1 = cc[ac] - cc[ac + 2*ido];
185 ti2 = cc[ac] + cc[ac + 2*ido];
186 tr4 = cc[ac + 3*ido] - cc[ac + ido];
187 ti3 = cc[ac + ido] + cc[ac + 3*ido];
188 tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
189 tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
190 ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
191 tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
192 ah = k*ido;
193 ch[ah] = tr2 + tr3;
194 ch[ah + 2*l1*ido] = tr2 - tr3;
195 ch[ah + 1] = ti2 + ti3;
196 ch[ah + 2*l1*ido + 1] = ti2 - ti3;
197 ch[ah + l1*ido] = tr1 + isign*tr4;
198 ch[ah + 3*l1*ido] = tr1 - isign*tr4;
199 ch[ah + l1*ido + 1] = ti1 + isign*ti4;
200 ch[ah + 3*l1*ido + 1] = ti1 - isign*ti4;
203 else
205 for (k=0; k<l1; k++)
207 for (i=0; i<ido-1; i+=2)
209 ac = i + 1 + 4*k*ido;
210 ti1 = cc[ac] - cc[ac + 2*ido];
211 ti2 = cc[ac] + cc[ac + 2*ido];
212 ti3 = cc[ac + ido] + cc[ac + 3*ido];
213 tr4 = cc[ac + 3*ido] - cc[ac + ido];
214 tr1 = cc[ac - 1] - cc[ac + 2*ido - 1];
215 tr2 = cc[ac - 1] + cc[ac + 2*ido - 1];
216 ti4 = cc[ac + ido - 1] - cc[ac + 3*ido - 1];
217 tr3 = cc[ac + ido - 1] + cc[ac + 3*ido - 1];
218 ah = i + k*ido;
219 ch[ah] = tr2 + tr3;
220 cr3 = tr2 - tr3;
221 ch[ah + 1] = ti2 + ti3;
222 ci3 = ti2 - ti3;
223 cr2 = tr1 + isign*tr4;
224 cr4 = tr1 - isign*tr4;
225 ci2 = ti1 + isign*ti4;
226 ci4 = ti1 - isign*ti4;
227 ch[ah + l1*ido] = wa1[i]*cr2 - isign*wa1[i + 1]*ci2;
228 ch[ah + l1*ido + 1] = wa1[i]*ci2 + isign*wa1[i + 1]*cr2;
229 ch[ah + 2*l1*ido] = wa2[i]*cr3 - isign*wa2[i + 1]*ci3;
230 ch[ah + 2*l1*ido + 1] = wa2[i]*ci3 + isign*wa2[i + 1]*cr3;
231 ch[ah + 3*l1*ido] = wa3[i]*cr4 -isign*wa3[i + 1]*ci4;
232 ch[ah + 3*l1*ido + 1] = wa3[i]*ci4 + isign*wa3[i + 1]*cr4;
239 static void
240 fftpack_passf5(int ido,
241 int l1,
242 real cc[],
243 real ch[],
244 real wa1[],
245 real wa2[],
246 real wa3[],
247 real wa4[],
248 int isign)
250 const real tr11 = 0.309016994374947;
251 const real ti11 = 0.951056516295154;
252 const real tr12 = -0.809016994374947;
253 const real ti12 = 0.587785252292473;
255 int i, k, ac, ah;
256 real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
257 ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
259 if (ido == 2)
261 for (k = 1; k <= l1; ++k)
263 ac = (5*k - 4)*ido + 1;
264 ti5 = cc[ac] - cc[ac + 3*ido];
265 ti2 = cc[ac] + cc[ac + 3*ido];
266 ti4 = cc[ac + ido] - cc[ac + 2*ido];
267 ti3 = cc[ac + ido] + cc[ac + 2*ido];
268 tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
269 tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
270 tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
271 tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
272 ah = (k - 1)*ido;
273 ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
274 ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
275 cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
276 ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
277 cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
278 ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
279 cr5 = isign*(ti11*tr5 + ti12*tr4);
280 ci5 = isign*(ti11*ti5 + ti12*ti4);
281 cr4 = isign*(ti12*tr5 - ti11*tr4);
282 ci4 = isign*(ti12*ti5 - ti11*ti4);
283 ch[ah + l1*ido] = cr2 - ci5;
284 ch[ah + 4*l1*ido] = cr2 + ci5;
285 ch[ah + l1*ido + 1] = ci2 + cr5;
286 ch[ah + 2*l1*ido + 1] = ci3 + cr4;
287 ch[ah + 2*l1*ido] = cr3 - ci4;
288 ch[ah + 3*l1*ido] = cr3 + ci4;
289 ch[ah + 3*l1*ido + 1] = ci3 - cr4;
290 ch[ah + 4*l1*ido + 1] = ci2 - cr5;
293 else
295 for (k=1; k<=l1; k++)
297 for (i=0; i<ido-1; i+=2)
299 ac = i + 1 + (k*5 - 4)*ido;
300 ti5 = cc[ac] - cc[ac + 3*ido];
301 ti2 = cc[ac] + cc[ac + 3*ido];
302 ti4 = cc[ac + ido] - cc[ac + 2*ido];
303 ti3 = cc[ac + ido] + cc[ac + 2*ido];
304 tr5 = cc[ac - 1] - cc[ac + 3*ido - 1];
305 tr2 = cc[ac - 1] + cc[ac + 3*ido - 1];
306 tr4 = cc[ac + ido - 1] - cc[ac + 2*ido - 1];
307 tr3 = cc[ac + ido - 1] + cc[ac + 2*ido - 1];
308 ah = i + (k - 1)*ido;
309 ch[ah] = cc[ac - ido - 1] + tr2 + tr3;
310 ch[ah + 1] = cc[ac - ido] + ti2 + ti3;
311 cr2 = cc[ac - ido - 1] + tr11*tr2 + tr12*tr3;
312 ci2 = cc[ac - ido] + tr11*ti2 + tr12*ti3;
313 cr3 = cc[ac - ido - 1] + tr12*tr2 + tr11*tr3;
314 ci3 = cc[ac - ido] + tr12*ti2 + tr11*ti3;
315 cr5 = isign*(ti11*tr5 + ti12*tr4);
316 ci5 = isign*(ti11*ti5 + ti12*ti4);
317 cr4 = isign*(ti12*tr5 - ti11*tr4);
318 ci4 = isign*(ti12*ti5 - ti11*ti4);
319 dr3 = cr3 - ci4;
320 dr4 = cr3 + ci4;
321 di3 = ci3 + cr4;
322 di4 = ci3 - cr4;
323 dr5 = cr2 + ci5;
324 dr2 = cr2 - ci5;
325 di5 = ci2 - cr5;
326 di2 = ci2 + cr5;
327 ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
328 ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
329 ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
330 ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
331 ch[ah + 3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4;
332 ch[ah + 3*l1*ido + 1] = wa3[i]*di4 + isign*wa3[i+1]*dr4;
333 ch[ah + 4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5;
334 ch[ah + 4*l1*ido + 1] = wa4[i]*di5 + isign*wa4[i+1]*dr5;
341 static void
342 fftpack_passf(int * nac,
343 int ido,
344 int ip,
345 int l1,
346 int idl1,
347 real cc[],
348 real ch[],
349 real wa[],
350 int isign)
352 int idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl, inc,idp;
353 real wai, war;
355 idot = ido / 2;
356 nt = ip*idl1;
357 ipph = (ip + 1) / 2;
358 idp = ip*ido;
359 if (ido >= l1)
361 for (j=1; j<ipph; j++)
363 jc = ip - j;
364 for (k=0; k<l1; k++)
366 for (i=0; i<ido; i++)
368 ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
369 ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
373 for (k=0; k<l1; k++)
374 for (i=0; i<ido; i++)
375 ch[i + k*ido] = cc[i + k*ip*ido];
377 else
379 for (j=1; j<ipph; j++)
381 jc = ip - j;
382 for (i=0; i<ido; i++)
384 for (k=0; k<l1; k++)
386 ch[i + (k + j*l1)*ido] = cc[i + (j + k*ip)*ido] + cc[i + (jc + k*ip)*ido];
387 ch[i + (k + jc*l1)*ido] = cc[i + (j + k*ip)*ido] - cc[i + (jc + k*ip)*ido];
391 for (i=0; i<ido; i++)
392 for (k=0; k<l1; k++)
393 ch[i + k*ido] = cc[i + k*ip*ido];
396 idl = 2 - ido;
397 inc = 0;
398 for (l=1; l<ipph; l++)
400 lc = ip - l;
401 idl += ido;
402 for (ik=0; ik<idl1; ik++)
404 cc[ik + l*idl1] = ch[ik] + wa[idl - 2]*ch[ik + idl1];
405 cc[ik + lc*idl1] = isign*wa[idl-1]*ch[ik + (ip-1)*idl1];
407 idlj = idl;
408 inc += ido;
409 for (j=2; j<ipph; j++)
411 jc = ip - j;
412 idlj += inc;
413 if (idlj > idp) idlj -= idp;
414 war = wa[idlj - 2];
415 wai = wa[idlj-1];
416 for (ik=0; ik<idl1; ik++)
418 cc[ik + l*idl1] += war*ch[ik + j*idl1];
419 cc[ik + lc*idl1] += isign*wai*ch[ik + jc*idl1];
423 for (j=1; j<ipph; j++)
424 for (ik=0; ik<idl1; ik++)
425 ch[ik] += ch[ik + j*idl1];
426 for (j=1; j<ipph; j++)
428 jc = ip - j;
429 for (ik=1; ik<idl1; ik+=2)
431 ch[ik - 1 + j*idl1] = cc[ik - 1 + j*idl1] - cc[ik + jc*idl1];
432 ch[ik - 1 + jc*idl1] = cc[ik - 1 + j*idl1] + cc[ik + jc*idl1];
433 ch[ik + j*idl1] = cc[ik + j*idl1] + cc[ik - 1 + jc*idl1];
434 ch[ik + jc*idl1] = cc[ik + j*idl1] - cc[ik - 1 + jc*idl1];
437 *nac = 1;
438 if (ido == 2)
439 return;
440 *nac = 0;
441 for (ik=0; ik<idl1; ik++)
443 cc[ik] = ch[ik];
445 for (j=1; j<ip; j++)
447 for (k=0; k<l1; k++)
449 cc[(k + j*l1)*ido + 0] = ch[(k + j*l1)*ido + 0];
450 cc[(k + j*l1)*ido + 1] = ch[(k + j*l1)*ido + 1];
453 if (idot <= l1)
455 idij = 0;
456 for (j=1; j<ip; j++)
458 idij += 2;
459 for (i=3; i<ido; i+=2)
461 idij += 2;
462 for (k=0; k<l1; k++)
464 cc[i - 1 + (k + j*l1)*ido] =
465 wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
466 isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
467 cc[i + (k + j*l1)*ido] =
468 wa[idij - 2]*ch[i + (k + j*l1)*ido] +
469 isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
474 else
476 idj = 2 - ido;
477 for (j=1; j<ip; j++)
479 idj += ido;
480 for (k = 0; k < l1; k++)
482 idij = idj;
483 for (i=3; i<ido; i+=2)
485 idij += 2;
486 cc[i - 1 + (k + j*l1)*ido] =
487 wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
488 isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
489 cc[i + (k + j*l1)*ido] =
490 wa[idij - 2]*ch[i + (k + j*l1)*ido] +
491 isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
500 static void
501 fftpack_radf2(int ido,
502 int l1,
503 real cc[],
504 real ch[],
505 real wa1[])
507 int i, k, ic;
508 real ti2, tr2;
509 for (k=0; k<l1; k++)
511 ch[2*k*ido] = cc[k*ido] + cc[(k + l1)*ido];
512 ch[(2*k+1)*ido + ido-1] = cc[k*ido] - cc[(k + l1)*ido];
514 if (ido < 2)
515 return;
516 if (ido != 2)
518 for (k=0; k<l1; k++)
520 for (i=2; i<ido; i+=2)
522 ic = ido - i;
523 tr2 = wa1[i - 2]*cc[i-1 + (k + l1)*ido] + wa1[i - 1]*cc[i + (k + l1)*ido];
524 ti2 = wa1[i - 2]*cc[i + (k + l1)*ido] - wa1[i - 1]*cc[i-1 + (k + l1)*ido];
525 ch[i + 2*k*ido] = cc[i + k*ido] + ti2;
526 ch[ic + (2*k+1)*ido] = ti2 - cc[i + k*ido];
527 ch[i - 1 + 2*k*ido] = cc[i - 1 + k*ido] + tr2;
528 ch[ic - 1 + (2*k+1)*ido] = cc[i - 1 + k*ido] - tr2;
531 if (ido % 2 == 1)
532 return;
534 for (k=0; k<l1; k++)
536 ch[(2*k+1)*ido] = -cc[ido-1 + (k + l1)*ido];
537 ch[ido-1 + 2*k*ido] = cc[ido-1 + k*ido];
542 static void
543 fftpack_radb2(int ido,
544 int l1,
545 real cc[],
546 real ch[],
547 real wa1[])
549 int i, k, ic;
550 real ti2, tr2;
551 for (k=0; k<l1; k++)
553 ch[k*ido] = cc[2*k*ido] + cc[ido-1 + (2*k+1)*ido];
554 ch[(k + l1)*ido] = cc[2*k*ido] - cc[ido-1 + (2*k+1)*ido];
556 if (ido < 2)
557 return;
558 if (ido != 2)
560 for (k = 0; k < l1; ++k)
562 for (i = 2; i < ido; i += 2)
564 ic = ido - i;
565 ch[i-1 + k*ido] = cc[i-1 + 2*k*ido] + cc[ic-1 + (2*k+1)*ido];
566 tr2 = cc[i-1 + 2*k*ido] - cc[ic-1 + (2*k+1)*ido];
567 ch[i + k*ido] = cc[i + 2*k*ido] - cc[ic + (2*k+1)*ido];
568 ti2 = cc[i + (2*k)*ido] + cc[ic + (2*k+1)*ido];
569 ch[i-1 + (k + l1)*ido] = wa1[i - 2]*tr2 - wa1[i - 1]*ti2;
570 ch[i + (k + l1)*ido] = wa1[i - 2]*ti2 + wa1[i - 1]*tr2;
573 if (ido % 2 == 1)
574 return;
576 for (k = 0; k < l1; k++)
578 ch[ido-1 + k*ido] = 2*cc[ido-1 + 2*k*ido];
579 ch[ido-1 + (k + l1)*ido] = -2*cc[(2*k+1)*ido];
584 static void
585 fftpack_radf3(int ido,
586 int l1,
587 real cc[],
588 real ch[],
589 real wa1[],
590 real wa2[])
592 const real taur = -0.5;
593 const real taui = 0.866025403784439;
594 int i, k, ic;
595 real ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
597 for (k=0; k<l1; k++)
599 cr2 = cc[(k + l1)*ido] + cc[(k + 2*l1)*ido];
600 ch[3*k*ido] = cc[k*ido] + cr2;
601 ch[(3*k+2)*ido] = taui*(cc[(k + l1*2)*ido] - cc[(k + l1)*ido]);
602 ch[ido-1 + (3*k + 1)*ido] = cc[k*ido] + taur*cr2;
604 if (ido == 1)
605 return;
606 for (k=0; k<l1; k++)
608 for (i=2; i<ido; i+=2)
610 ic = ido - i;
611 dr2 = wa1[i - 2]*cc[i - 1 + (k + l1)*ido] +wa1[i - 1]*cc[i + (k + l1)*ido];
612 di2 = wa1[i - 2]*cc[i + (k + l1)*ido] - wa1[i - 1]*cc[i - 1 + (k + l1)*ido];
613 dr3 = wa2[i - 2]*cc[i - 1 + (k + l1*2)*ido] + wa2[i - 1]*cc[i + (k + l1*2)*ido];
614 di3 = wa2[i - 2]*cc[i + (k + l1*2)*ido] - wa2[i - 1]*cc[i - 1 + (k + l1*2)*ido];
615 cr2 = dr2 + dr3;
616 ci2 = di2 + di3;
617 ch[i - 1 + 3*k*ido] = cc[i - 1 + k*ido] + cr2;
618 ch[i + 3*k*ido] = cc[i + k*ido] + ci2;
619 tr2 = cc[i - 1 + k*ido] + taur*cr2;
620 ti2 = cc[i + k*ido] + taur*ci2;
621 tr3 = taui*(di2 - di3);
622 ti3 = taui*(dr3 - dr2);
623 ch[i - 1 + (3*k + 2)*ido] = tr2 + tr3;
624 ch[ic - 1 + (3*k + 1)*ido] = tr2 - tr3;
625 ch[i + (3*k + 2)*ido] = ti2 + ti3;
626 ch[ic + (3*k + 1)*ido] = ti3 - ti2;
632 static void
633 fftpack_radb3(int ido,
634 int l1,
635 real cc[],
636 real ch[],
637 real wa1[],
638 real wa2[])
640 const real taur = -0.5;
641 const real taui = 0.866025403784439;
642 int i, k, ic;
643 real ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
645 for (k=0; k<l1; k++)
647 tr2 = 2*cc[ido-1 + (3*k + 1)*ido];
648 cr2 = cc[3*k*ido] + taur*tr2;
649 ch[k*ido] = cc[3*k*ido] + tr2;
650 ci3 = 2*taui*cc[(3*k + 2)*ido];
651 ch[(k + l1)*ido] = cr2 - ci3;
652 ch[(k + 2*l1)*ido] = cr2 + ci3;
654 if (ido == 1)
655 return;
657 for (k=0; k<l1; k++)
659 for (i=2; i<ido; i+=2)
661 ic = ido - i;
662 tr2 = cc[i - 1 + (3*k + 2)*ido] + cc[ic - 1 + (3*k + 1)*ido];
663 cr2 = cc[i - 1 + 3*k*ido] + taur*tr2;
664 ch[i - 1 + k*ido] = cc[i - 1 + 3*k*ido] + tr2;
665 ti2 = cc[i + (3*k + 2)*ido]- cc[ic + (3*k + 1)*ido];
666 ci2 = cc[i + 3*k*ido] + taur*ti2;
667 ch[i + k*ido] = cc[i + 3*k*ido] + ti2;
668 cr3 = taui*(cc[i - 1 + (3*k + 2)*ido] - cc[ic - 1 + (3*k + 1)*ido]);
669 ci3 = taui*(cc[i + (3*k + 2)*ido] + cc[ic + (3*k + 1)*ido]);
670 dr2 = cr2 - ci3;
671 dr3 = cr2 + ci3;
672 di2 = ci2 + cr3;
673 di3 = ci2 - cr3;
674 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*dr2 - wa1[i - 1]*di2;
675 ch[i + (k + l1)*ido] = wa1[i - 2]*di2 + wa1[i - 1]*dr2;
676 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*dr3 - wa2[i - 1]*di3;
677 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*di3 + wa2[i - 1]*dr3;
683 static void
684 fftpack_radf4(int ido,
685 int l1,
686 real cc[],
687 real ch[],
688 real wa1[],
689 real wa2[],
690 real wa3[])
692 const real hsqt2 = 0.7071067811865475;
693 int i, k, ic;
694 real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
696 for (k=0; k<l1; k++)
698 tr1 = cc[(k + l1)*ido] + cc[(k + 3*l1)*ido];
699 tr2 = cc[k*ido] + cc[(k + 2*l1)*ido];
700 ch[4*k*ido] = tr1 + tr2;
701 ch[ido-1 + (4*k + 3)*ido] = tr2 - tr1;
702 ch[ido-1 + (4*k + 1)*ido] = cc[k*ido] - cc[(k + 2*l1)*ido];
703 ch[(4*k + 2)*ido] = cc[(k + 3*l1)*ido] - cc[(k + l1)*ido];
705 if (ido < 2)
706 return;
707 if (ido != 2)
709 for (k=0; k<l1; k++)
711 for (i=2; i<ido; i += 2)
713 ic = ido - i;
714 cr2 = wa1[i - 2]*cc[i - 1 + (k + l1)*ido] + wa1[i - 1]*cc[i + (k + l1)*ido];
715 ci2 = wa1[i - 2]*cc[i + (k + l1)*ido] - wa1[i - 1]*cc[i - 1 + (k + l1)*ido];
716 cr3 = wa2[i - 2]*cc[i - 1 + (k + 2*l1)*ido] + wa2[i - 1]*cc[i + (k + 2*l1)*ido];
717 ci3 = wa2[i - 2]*cc[i + (k + 2*l1)*ido] - wa2[i - 1]*cc[i - 1 + (k + 2*l1)*ido];
718 cr4 = wa3[i - 2]*cc[i - 1 + (k + 3*l1)*ido] + wa3[i - 1]*cc[i + (k + 3*l1)*ido];
719 ci4 = wa3[i - 2]*cc[i + (k + 3*l1)*ido] - wa3[i - 1]*cc[i - 1 + (k + 3*l1)*ido];
720 tr1 = cr2 + cr4;
721 tr4 = cr4 - cr2;
722 ti1 = ci2 + ci4;
723 ti4 = ci2 - ci4;
724 ti2 = cc[i + k*ido] + ci3;
725 ti3 = cc[i + k*ido] - ci3;
726 tr2 = cc[i - 1 + k*ido] + cr3;
727 tr3 = cc[i - 1 + k*ido] - cr3;
728 ch[i - 1 + 4*k*ido] = tr1 + tr2;
729 ch[ic - 1 + (4*k + 3)*ido] = tr2 - tr1;
730 ch[i + 4*k*ido] = ti1 + ti2;
731 ch[ic + (4*k + 3)*ido] = ti1 - ti2;
732 ch[i - 1 + (4*k + 2)*ido] = ti4 + tr3;
733 ch[ic - 1 + (4*k + 1)*ido] = tr3 - ti4;
734 ch[i + (4*k + 2)*ido] = tr4 + ti3;
735 ch[ic + (4*k + 1)*ido] = tr4 - ti3;
738 if (ido % 2 == 1)
739 return;
741 for (k=0; k<l1; k++)
743 ti1 = -hsqt2*(cc[ido-1 + (k + l1)*ido] + cc[ido-1 + (k + 3*l1)*ido]);
744 tr1 = hsqt2*(cc[ido-1 + (k + l1)*ido] - cc[ido-1 + (k + 3*l1)*ido]);
745 ch[ido-1 + 4*k*ido] = tr1 + cc[ido-1 + k*ido];
746 ch[ido-1 + (4*k + 2)*ido] = cc[ido-1 + k*ido] - tr1;
747 ch[(4*k + 1)*ido] = ti1 - cc[ido-1 + (k + 2*l1)*ido];
748 ch[(4*k + 3)*ido] = ti1 + cc[ido-1 + (k + 2*l1)*ido];
753 static void
754 fftpack_radb4(int ido,
755 int l1,
756 real cc[],
757 real ch[],
758 real wa1[],
759 real wa2[],
760 real wa3[])
762 const real sqrt2 = 1.414213562373095;
763 int i, k, ic;
764 real ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
765 for (k = 0; k < l1; k++)
767 tr1 = cc[4*k*ido] - cc[ido-1 + (4*k + 3)*ido];
768 tr2 = cc[4*k*ido] + cc[ido-1 + (4*k + 3)*ido];
769 tr3 = cc[ido-1 + (4*k + 1)*ido] + cc[ido-1 + (4*k + 1)*ido];
770 tr4 = cc[(4*k + 2)*ido] + cc[(4*k + 2)*ido];
771 ch[k*ido] = tr2 + tr3;
772 ch[(k + l1)*ido] = tr1 - tr4;
773 ch[(k + 2*l1)*ido] = tr2 - tr3;
774 ch[(k + 3*l1)*ido] = tr1 + tr4;
776 if (ido < 2)
777 return;
778 if (ido != 2)
780 for (k = 0; k < l1; ++k)
782 for (i = 2; i < ido; i += 2)
784 ic = ido - i;
785 ti1 = cc[i + 4*k*ido] + cc[ic + (4*k + 3)*ido];
786 ti2 = cc[i + 4*k*ido] - cc[ic + (4*k + 3)*ido];
787 ti3 = cc[i + (4*k + 2)*ido] - cc[ic + (4*k + 1)*ido];
788 tr4 = cc[i + (4*k + 2)*ido] + cc[ic + (4*k + 1)*ido];
789 tr1 = cc[i - 1 + 4*k*ido] - cc[ic - 1 + (4*k + 3)*ido];
790 tr2 = cc[i - 1 + 4*k*ido] + cc[ic - 1 + (4*k + 3)*ido];
791 ti4 = cc[i - 1 + (4*k + 2)*ido] - cc[ic - 1 + (4*k + 1)*ido];
792 tr3 = cc[i - 1 + (4*k + 2)*ido] + cc[ic - 1 + (4*k + 1)*ido];
793 ch[i - 1 + k*ido] = tr2 + tr3;
794 cr3 = tr2 - tr3;
795 ch[i + k*ido] = ti2 + ti3;
796 ci3 = ti2 - ti3;
797 cr2 = tr1 - tr4;
798 cr4 = tr1 + tr4;
799 ci2 = ti1 + ti4;
800 ci4 = ti1 - ti4;
801 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*cr2 - wa1[i - 1]*ci2;
802 ch[i + (k + l1)*ido] = wa1[i - 2]*ci2 + wa1[i - 1]*cr2;
803 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*cr3 - wa2[i - 1]*ci3;
804 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*ci3 + wa2[i - 1]*cr3;
805 ch[i - 1 + (k + 3*l1)*ido] = wa3[i - 2]*cr4 - wa3[i - 1]*ci4;
806 ch[i + (k + 3*l1)*ido] = wa3[i - 2]*ci4 + wa3[i - 1]*cr4;
809 if (ido % 2 == 1)
810 return;
812 for (k = 0; k < l1; k++)
814 ti1 = cc[(4*k + 1)*ido] + cc[(4*k + 3)*ido];
815 ti2 = cc[(4*k + 3)*ido] - cc[(4*k + 1)*ido];
816 tr1 = cc[ido-1 + 4*k*ido] - cc[ido-1 + (4*k + 2)*ido];
817 tr2 = cc[ido-1 + 4*k*ido] + cc[ido-1 + (4*k + 2)*ido];
818 ch[ido-1 + k*ido] = tr2 + tr2;
819 ch[ido-1 + (k + l1)*ido] = sqrt2*(tr1 - ti1);
820 ch[ido-1 + (k + 2*l1)*ido] = ti2 + ti2;
821 ch[ido-1 + (k + 3*l1)*ido] = -sqrt2*(tr1 + ti1);
826 static void
827 fftpack_radf5(int ido,
828 int l1,
829 real cc[],
830 real ch[],
831 real wa1[],
832 real wa2[],
833 real wa3[],
834 real wa4[])
836 const real tr11 = 0.309016994374947;
837 const real ti11 = 0.951056516295154;
838 const real tr12 = -0.809016994374947;
839 const real ti12 = 0.587785252292473;
840 int i, k, ic;
841 real ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5,
842 cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
844 for (k = 0; k < l1; k++)
846 cr2 = cc[(k + 4*l1)*ido] + cc[(k + l1)*ido];
847 ci5 = cc[(k + 4*l1)*ido] - cc[(k + l1)*ido];
848 cr3 = cc[(k + 3*l1)*ido] + cc[(k + 2*l1)*ido];
849 ci4 = cc[(k + 3*l1)*ido] - cc[(k + 2*l1)*ido];
850 ch[5*k*ido] = cc[k*ido] + cr2 + cr3;
851 ch[ido-1 + (5*k + 1)*ido] = cc[k*ido] + tr11*cr2 + tr12*cr3;
852 ch[(5*k + 2)*ido] = ti11*ci5 + ti12*ci4;
853 ch[ido-1 + (5*k + 3)*ido] = cc[k*ido] + tr12*cr2 + tr11*cr3;
854 ch[(5*k + 4)*ido] = ti12*ci5 - ti11*ci4;
856 if (ido == 1)
857 return;
858 for (k = 0; k < l1; ++k)
860 for (i = 2; i < ido; i += 2)
862 ic = ido - i;
863 dr2 = wa1[i - 2]*cc[i - 1 + (k + l1)*ido] + wa1[i - 1]*cc[i + (k + l1)*ido];
864 di2 = wa1[i - 2]*cc[i + (k + l1)*ido] - wa1[i - 1]*cc[i - 1 + (k + l1)*ido];
865 dr3 = wa2[i - 2]*cc[i - 1 + (k + 2*l1)*ido] + wa2[i - 1]*cc[i + (k + 2*l1)*ido];
866 di3 = wa2[i - 2]*cc[i + (k + 2*l1)*ido] - wa2[i - 1]*cc[i - 1 + (k + 2*l1)*ido];
867 dr4 = wa3[i - 2]*cc[i - 1 + (k + 3*l1)*ido] + wa3[i - 1]*cc[i + (k + 3*l1)*ido];
868 di4 = wa3[i - 2]*cc[i + (k + 3*l1)*ido] - wa3[i - 1]*cc[i - 1 + (k + 3*l1)*ido];
869 dr5 = wa4[i - 2]*cc[i - 1 + (k + 4*l1)*ido] + wa4[i - 1]*cc[i + (k + 4*l1)*ido];
870 di5 = wa4[i - 2]*cc[i + (k + 4*l1)*ido] - wa4[i - 1]*cc[i - 1 + (k + 4*l1)*ido];
871 cr2 = dr2 + dr5;
872 ci5 = dr5 - dr2;
873 cr5 = di2 - di5;
874 ci2 = di2 + di5;
875 cr3 = dr3 + dr4;
876 ci4 = dr4 - dr3;
877 cr4 = di3 - di4;
878 ci3 = di3 + di4;
879 ch[i - 1 + 5*k*ido] = cc[i - 1 + k*ido] + cr2 + cr3;
880 ch[i + 5*k*ido] = cc[i + k*ido] + ci2 + ci3;
881 tr2 = cc[i - 1 + k*ido] + tr11*cr2 + tr12*cr3;
882 ti2 = cc[i + k*ido] + tr11*ci2 + tr12*ci3;
883 tr3 = cc[i - 1 + k*ido] + tr12*cr2 + tr11*cr3;
884 ti3 = cc[i + k*ido] + tr12*ci2 + tr11*ci3;
885 tr5 = ti11*cr5 + ti12*cr4;
886 ti5 = ti11*ci5 + ti12*ci4;
887 tr4 = ti12*cr5 - ti11*cr4;
888 ti4 = ti12*ci5 - ti11*ci4;
889 ch[i - 1 + (5*k + 2)*ido] = tr2 + tr5;
890 ch[ic - 1 + (5*k + 1)*ido] = tr2 - tr5;
891 ch[i + (5*k + 2)*ido] = ti2 + ti5;
892 ch[ic + (5*k + 1)*ido] = ti5 - ti2;
893 ch[i - 1 + (5*k + 4)*ido] = tr3 + tr4;
894 ch[ic - 1 + (5*k + 3)*ido] = tr3 - tr4;
895 ch[i + (5*k + 4)*ido] = ti3 + ti4;
896 ch[ic + (5*k + 3)*ido] = ti4 - ti3;
902 static void
903 fftpack_radb5(int ido,
904 int l1,
905 real cc[],
906 real ch[],
907 real wa1[],
908 real wa2[],
909 real wa3[],
910 real wa4[])
912 const real tr11 = 0.309016994374947;
913 const real ti11 = 0.951056516295154;
914 const real tr12 = -0.809016994374947;
915 const real ti12 = 0.587785252292473;
917 int i, k, ic;
918 real ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
919 ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
921 for (k = 0; k < l1; k++)
923 ti5 = 2*cc[(5*k + 2)*ido];
924 ti4 = 2*cc[(5*k + 4)*ido];
925 tr2 = 2*cc[ido-1 + (5*k + 1)*ido];
926 tr3 = 2*cc[ido-1 + (5*k + 3)*ido];
927 ch[k*ido] = cc[5*k*ido] + tr2 + tr3;
928 cr2 = cc[5*k*ido] + tr11*tr2 + tr12*tr3;
929 cr3 = cc[5*k*ido] + tr12*tr2 + tr11*tr3;
930 ci5 = ti11*ti5 + ti12*ti4;
931 ci4 = ti12*ti5 - ti11*ti4;
932 ch[(k + l1)*ido] = cr2 - ci5;
933 ch[(k + 2*l1)*ido] = cr3 - ci4;
934 ch[(k + 3*l1)*ido] = cr3 + ci4;
935 ch[(k + 4*l1)*ido] = cr2 + ci5;
937 if (ido == 1) return;
938 for (k = 0; k < l1; ++k)
940 for (i = 2; i < ido; i += 2)
942 ic = ido - i;
943 ti5 = cc[i + (5*k + 2)*ido] + cc[ic + (5*k + 1)*ido];
944 ti2 = cc[i + (5*k + 2)*ido] - cc[ic + (5*k + 1)*ido];
945 ti4 = cc[i + (5*k + 4)*ido] + cc[ic + (5*k + 3)*ido];
946 ti3 = cc[i + (5*k + 4)*ido] - cc[ic + (5*k + 3)*ido];
947 tr5 = cc[i - 1 + (5*k + 2)*ido] - cc[ic - 1 + (5*k + 1)*ido];
948 tr2 = cc[i - 1 + (5*k + 2)*ido] + cc[ic - 1 + (5*k + 1)*ido];
949 tr4 = cc[i - 1 + (5*k + 4)*ido] - cc[ic - 1 + (5*k + 3)*ido];
950 tr3 = cc[i - 1 + (5*k + 4)*ido] + cc[ic - 1 + (5*k + 3)*ido];
951 ch[i - 1 + k*ido] = cc[i - 1 + 5*k*ido] + tr2 + tr3;
952 ch[i + k*ido] = cc[i + 5*k*ido] + ti2 + ti3;
953 cr2 = cc[i - 1 + 5*k*ido] + tr11*tr2 + tr12*tr3;
954 ci2 = cc[i + 5*k*ido] + tr11*ti2 + tr12*ti3;
955 cr3 = cc[i - 1 + 5*k*ido] + tr12*tr2 + tr11*tr3;
956 ci3 = cc[i + 5*k*ido] + tr12*ti2 + tr11*ti3;
957 cr5 = ti11*tr5 + ti12*tr4;
958 ci5 = ti11*ti5 + ti12*ti4;
959 cr4 = ti12*tr5 - ti11*tr4;
960 ci4 = ti12*ti5 - ti11*ti4;
961 dr3 = cr3 - ci4;
962 dr4 = cr3 + ci4;
963 di3 = ci3 + cr4;
964 di4 = ci3 - cr4;
965 dr5 = cr2 + ci5;
966 dr2 = cr2 - ci5;
967 di5 = ci2 - cr5;
968 di2 = ci2 + cr5;
969 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*dr2 - wa1[i - 1]*di2;
970 ch[i + (k + l1)*ido] = wa1[i - 2]*di2 + wa1[i - 1]*dr2;
971 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*dr3 - wa2[i - 1]*di3;
972 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*di3 + wa2[i - 1]*dr3;
973 ch[i - 1 + (k + 3*l1)*ido] = wa3[i - 2]*dr4 - wa3[i - 1]*di4;
974 ch[i + (k + 3*l1)*ido] = wa3[i - 2]*di4 + wa3[i - 1]*dr4;
975 ch[i - 1 + (k + 4*l1)*ido] = wa4[i - 2]*dr5 - wa4[i - 1]*di5;
976 ch[i + (k + 4*l1)*ido] = wa4[i - 2]*di5 + wa4[i - 1]*dr5;
982 static void
983 fftpack_radfg(int ido,
984 int ip,
985 int l1,
986 int idl1,
987 real cc[],
988 real ch[],
989 real wa[])
991 const real twopi = 6.28318530717959;
992 int idij, ipph, i, j, k, l, j2, ic, jc, lc, ik, is, nbd;
993 real dc2, ai1, ai2, ar1, ar2, ds2, dcp, arg, dsp, ar1h, ar2h;
994 arg = twopi / ip;
995 dcp = cos(arg);
996 dsp = sin(arg);
997 ipph = (ip + 1) / 2;
998 nbd = (ido - 1) / 2;
999 if (ido != 1)
1001 for (ik=0; ik<idl1; ik++) ch[ik] = cc[ik];
1002 for (j=1; j<ip; j++)
1003 for (k=0; k<l1; k++)
1004 ch[(k + j*l1)*ido] = cc[(k + j*l1)*ido];
1005 if (nbd <= l1)
1007 is = -ido;
1008 for (j=1; j<ip; j++)
1010 is += ido;
1011 idij = is-1;
1012 for (i=2; i<ido; i+=2)
1014 idij += 2;
1015 for (k=0; k<l1; k++)
1017 ch[i - 1 + (k + j*l1)*ido] =
1018 wa[idij - 1]*cc[i - 1 + (k + j*l1)*ido] + wa[idij]*cc[i + (k + j*l1)*ido];
1019 ch[i + (k + j*l1)*ido] =
1020 wa[idij - 1]*cc[i + (k + j*l1)*ido] - wa[idij]*cc[i - 1 + (k + j*l1)*ido];
1025 else
1027 is = -ido;
1028 for (j=1; j<ip; j++)
1030 is += ido;
1031 for (k=0; k<l1; k++)
1033 idij = is-1;
1034 for (i=2; i<ido; i+=2)
1036 idij += 2;
1037 ch[i - 1 + (k + j*l1)*ido] =
1038 wa[idij - 1]*cc[i - 1 + (k + j*l1)*ido] + wa[idij]*cc[i + (k + j*l1)*ido];
1039 ch[i + (k + j*l1)*ido] =
1040 wa[idij - 1]*cc[i + (k + j*l1)*ido] - wa[idij]*cc[i - 1 + (k + j*l1)*ido];
1045 if (nbd >= l1)
1047 for (j=1; j<ipph; j++)
1049 jc = ip - j;
1050 for (k=0; k<l1; k++)
1052 for (i=2; i<ido; i+=2)
1054 cc[i - 1 + (k + j*l1)*ido] = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1055 cc[i - 1 + (k + jc*l1)*ido] = ch[i + (k + j*l1)*ido] - ch[i + (k + jc*l1)*ido];
1056 cc[i + (k + j*l1)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1057 cc[i + (k + jc*l1)*ido] = ch[i - 1 + (k + jc*l1)*ido] - ch[i - 1 + (k + j*l1)*ido];
1062 else
1064 for (j=1; j<ipph; j++)
1066 jc = ip - j;
1067 for (i=2; i<ido; i+=2)
1069 for (k=0; k<l1; k++)
1071 cc[i - 1 + (k + j*l1)*ido] =
1072 ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1073 cc[i - 1 + (k + jc*l1)*ido] = ch[i + (k + j*l1)*ido] - ch[i + (k + jc*l1)*ido];
1074 cc[i + (k + j*l1)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1075 cc[i + (k + jc*l1)*ido] = ch[i - 1 + (k + jc*l1)*ido] - ch[i - 1 + (k + j*l1)*ido];
1081 else
1083 for (ik=0; ik<idl1; ik++)
1084 cc[ik] = ch[ik];
1086 for (j=1; j<ipph; j++)
1088 jc = ip - j;
1089 for (k=0; k<l1; k++)
1091 cc[(k + j*l1)*ido] = ch[(k + j*l1)*ido] + ch[(k + jc*l1)*ido];
1092 cc[(k + jc*l1)*ido] = ch[(k + jc*l1)*ido] - ch[(k + j*l1)*ido];
1096 ar1 = 1;
1097 ai1 = 0;
1098 for (l=1; l<ipph; l++)
1100 lc = ip - l;
1101 ar1h = dcp*ar1 - dsp*ai1;
1102 ai1 = dcp*ai1 + dsp*ar1;
1103 ar1 = ar1h;
1104 for (ik=0; ik<idl1; ik++)
1106 ch[ik + l*idl1] = cc[ik] + ar1*cc[ik + idl1];
1107 ch[ik + lc*idl1] = ai1*cc[ik + (ip-1)*idl1];
1109 dc2 = ar1;
1110 ds2 = ai1;
1111 ar2 = ar1;
1112 ai2 = ai1;
1113 for (j=2; j<ipph; j++)
1115 jc = ip - j;
1116 ar2h = dc2*ar2 - ds2*ai2;
1117 ai2 = dc2*ai2 + ds2*ar2;
1118 ar2 = ar2h;
1119 for (ik=0; ik<idl1; ik++)
1121 ch[ik + l*idl1] += ar2*cc[ik + j*idl1];
1122 ch[ik + lc*idl1] += ai2*cc[ik + jc*idl1];
1126 for (j=1; j<ipph; j++)
1127 for (ik=0; ik<idl1; ik++)
1128 ch[ik] += cc[ik + j*idl1];
1130 if (ido >= l1)
1132 for (k=0; k<l1; k++)
1134 for (i=0; i<ido; i++)
1136 cc[i + k*ip*ido] = ch[i + k*ido];
1140 else
1142 for (i=0; i<ido; i++)
1144 for (k=0; k<l1; k++)
1146 cc[i + k*ip*ido] = ch[i + k*ido];
1150 for (j=1; j<ipph; j++)
1152 jc = ip - j;
1153 j2 = 2*j;
1154 for (k=0; k<l1; k++)
1156 cc[ido-1 + (j2 - 1 + k*ip)*ido] = ch[(k + j*l1)*ido];
1157 cc[(j2 + k*ip)*ido] = ch[(k + jc*l1)*ido];
1160 if (ido == 1) return;
1161 if (nbd >= l1)
1163 for (j=1; j<ipph; j++)
1165 jc = ip - j;
1166 j2 = 2*j;
1167 for (k=0; k<l1; k++)
1169 for (i=2; i<ido; i+=2)
1171 ic = ido - i;
1172 cc[i - 1 + (j2 + k*ip)*ido] = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1173 cc[ic - 1 + (j2 - 1 + k*ip)*ido] = ch[i - 1 + (k + j*l1)*ido] - ch[i - 1 + (k + jc*l1)*ido];
1174 cc[i + (j2 + k*ip)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1175 cc[ic + (j2 - 1 + k*ip)*ido] = ch[i + (k + jc*l1)*ido] - ch[i + (k + j*l1)*ido];
1180 else
1182 for (j=1; j<ipph; j++)
1184 jc = ip - j;
1185 j2 = 2*j;
1186 for (i=2; i<ido; i+=2)
1188 ic = ido - i;
1189 for (k=0; k<l1; k++)
1191 cc[i - 1 + (j2 + k*ip)*ido] = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1192 cc[ic - 1 + (j2 - 1 + k*ip)*ido] = ch[i - 1 + (k + j*l1)*ido] - ch[i - 1 + (k + jc*l1)*ido];
1193 cc[i + (j2 + k*ip)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1194 cc[ic + (j2 - 1 + k*ip)*ido] = ch[i + (k + jc*l1)*ido] - ch[i + (k + j*l1)*ido];
1202 static void
1203 fftpack_radbg(int ido,
1204 int ip,
1205 int l1,
1206 int idl1,
1207 real cc[],
1208 real ch[],
1209 real wa[])
1211 const real twopi = 6.28318530717959;
1212 int idij, ipph, i, j, k, l, j2, ic, jc, lc, ik, is;
1213 real dc2, ai1, ai2, ar1, ar2, ds2;
1214 int nbd;
1215 real dcp, arg, dsp, ar1h, ar2h;
1216 arg = twopi / ip;
1217 dcp = cos(arg);
1218 dsp = sin(arg);
1219 nbd = (ido - 1) / 2;
1220 ipph = (ip + 1) / 2;
1222 if (ido >= l1)
1224 for (k=0; k<l1; k++)
1226 for (i=0; i<ido; i++)
1228 ch[i + k*ido] = cc[i + k*ip*ido];
1232 else
1234 for (i=0; i<ido; i++)
1236 for (k=0; k<l1; k++)
1238 ch[i + k*ido] = cc[i + k*ip*ido];
1242 for (j=1; j<ipph; j++)
1244 jc = ip - j;
1245 j2 = 2*j;
1246 for (k=0; k<l1; k++)
1248 ch[(k + j*l1)*ido] = cc[ido-1 + (j2 - 1 + k*ip)*ido] + cc[ido-1 + (j2 - 1 + k*ip)*ido];
1249 ch[(k + jc*l1)*ido] = cc[(j2 + k*ip)*ido] + cc[(j2 + k*ip)*ido];
1253 if (ido != 1)
1255 if (nbd >= l1)
1257 for (j=1; j<ipph; j++)
1259 jc = ip - j;
1260 for (k=0; k<l1; k++)
1262 for (i=2; i<ido; i+=2)
1264 ic = ido - i;
1265 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (2*j + k*ip)*ido] + cc[ic - 1 + (2*j - 1 + k*ip)*ido];
1266 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (2*j + k*ip)*ido] - cc[ic - 1 + (2*j - 1 + k*ip)*ido];
1267 ch[i + (k + j*l1)*ido] = cc[i + (2*j + k*ip)*ido] - cc[ic + (2*j - 1 + k*ip)*ido];
1268 ch[i + (k + jc*l1)*ido] = cc[i + (2*j + k*ip)*ido] + cc[ic + (2*j - 1 + k*ip)*ido];
1273 else
1275 for (j=1; j<ipph; j++)
1277 jc = ip - j;
1278 for (i=2; i<ido; i+=2)
1280 ic = ido - i;
1281 for (k=0; k<l1; k++)
1283 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (2*j + k*ip)*ido] + cc[ic - 1 + (2*j - 1 + k*ip)*ido];
1284 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (2*j + k*ip)*ido] - cc[ic - 1 + (2*j - 1 + k*ip)*ido];
1285 ch[i + (k + j*l1)*ido] = cc[i + (2*j + k*ip)*ido] - cc[ic + (2*j - 1 + k*ip)*ido];
1286 ch[i + (k + jc*l1)*ido] = cc[i + (2*j + k*ip)*ido] + cc[ic + (2*j - 1 + k*ip)*ido];
1293 ar1 = 1;
1294 ai1 = 0;
1295 for (l=1; l<ipph; l++)
1297 lc = ip - l;
1298 ar1h = dcp*ar1 - dsp*ai1;
1299 ai1 = dcp*ai1 + dsp*ar1;
1300 ar1 = ar1h;
1301 for (ik=0; ik<idl1; ik++)
1303 cc[ik + l*idl1] = ch[ik] + ar1*ch[ik + idl1];
1304 cc[ik + lc*idl1] = ai1*ch[ik + (ip-1)*idl1];
1306 dc2 = ar1;
1307 ds2 = ai1;
1308 ar2 = ar1;
1309 ai2 = ai1;
1310 for (j=2; j<ipph; j++)
1312 jc = ip - j;
1313 ar2h = dc2*ar2 - ds2*ai2;
1314 ai2 = dc2*ai2 + ds2*ar2;
1315 ar2 = ar2h;
1316 for (ik=0; ik<idl1; ik++)
1318 cc[ik + l*idl1] += ar2*ch[ik + j*idl1];
1319 cc[ik + lc*idl1] += ai2*ch[ik + jc*idl1];
1323 for (j=1; j<ipph; j++)
1325 for (ik=0; ik<idl1; ik++)
1327 ch[ik] += ch[ik + j*idl1];
1330 for (j=1; j<ipph; j++)
1332 jc = ip - j;
1333 for (k=0; k<l1; k++)
1335 ch[(k + j*l1)*ido] = cc[(k + j*l1)*ido] - cc[(k + jc*l1)*ido];
1336 ch[(k + jc*l1)*ido] = cc[(k + j*l1)*ido] + cc[(k + jc*l1)*ido];
1340 if (ido == 1) return;
1341 if (nbd >= l1)
1343 for (j=1; j<ipph; j++)
1345 jc = ip - j;
1346 for (k=0; k<l1; k++)
1348 for (i=2; i<ido; i+=2)
1350 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] - cc[i + (k + jc*l1)*ido];
1351 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] + cc[i + (k + jc*l1)*ido];
1352 ch[i + (k + j*l1)*ido] = cc[i + (k + j*l1)*ido] + cc[i - 1 + (k + jc*l1)*ido];
1353 ch[i + (k + jc*l1)*ido] = cc[i + (k + j*l1)*ido] - cc[i - 1 + (k + jc*l1)*ido];
1358 else
1360 for (j=1; j<ipph; j++)
1362 jc = ip - j;
1363 for (i=2; i<ido; i+=2)
1365 for (k=0; k<l1; k++)
1367 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] - cc[i + (k + jc*l1)*ido];
1368 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (k + j *l1)*ido] + cc[i + (k + jc*l1)*ido];
1369 ch[i + (k + j*l1)*ido] = cc[i + (k + j*l1)*ido] + cc[i - 1 + (k + jc*l1)*ido];
1370 ch[i + (k + jc*l1)*ido] = cc[i + (k + j*l1)*ido] - cc[i - 1 + (k + jc*l1)*ido];
1375 for (ik=0; ik<idl1; ik++)
1377 cc[ik] = ch[ik];
1379 for (j=1; j<ip; j++)
1380 for (k=0; k<l1; k++)
1381 cc[(k + j*l1)*ido] = ch[(k + j*l1)*ido];
1383 if (nbd <= l1)
1385 is = -ido;
1386 for (j=1; j<ip; j++)
1388 is += ido;
1389 idij = is-1;
1390 for (i=2; i<ido; i+=2)
1392 idij += 2;
1393 for (k=0; k<l1; k++)
1395 cc[i - 1 + (k + j*l1)*ido] = wa[idij - 1]*ch[i - 1 + (k + j*l1)*ido] - wa[idij]*ch[i + (k + j*l1)*ido];
1396 cc[i + (k + j*l1)*ido] = wa[idij - 1]*ch[i + (k + j*l1)*ido] + wa[idij]*ch[i - 1 + (k + j*l1)*ido];
1401 else
1403 is = -ido;
1404 for (j=1; j<ip; j++)
1406 is += ido;
1407 for (k=0; k<l1; k++)
1409 idij = is;
1410 for (i=2; i<ido; i+=2)
1412 idij += 2;
1413 cc[i - 1 + (k + j*l1)*ido] = wa[idij-1]*ch[i - 1 + (k + j*l1)*ido] - wa[idij]*ch[i + (k + j*l1)*ido];
1414 cc[i + (k + j*l1)*ido] = wa[idij-1]*ch[i + (k + j*l1)*ido] + wa[idij]*ch[i - 1 + (k + j*l1)*ido];
1423 static void
1424 fftpack_cfftf1(int n,
1425 real c[],
1426 real ch[],
1427 real wa[],
1428 int ifac[15],
1429 int isign)
1431 int idot, i;
1432 int k1, l1, l2;
1433 int na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1;
1434 real *cinput, *coutput;
1435 nf = ifac[1];
1436 na = 0;
1437 l1 = 1;
1438 iw = 0;
1440 for (k1=2; k1<=nf+1; k1++)
1442 ip = ifac[k1];
1443 l2 = ip*l1;
1444 ido = n / l2;
1445 idot = ido + ido;
1446 idl1 = idot*l1;
1447 if (na)
1449 cinput = ch;
1450 coutput = c;
1452 else
1454 cinput = c;
1455 coutput = ch;
1457 switch (ip)
1459 case 4:
1460 ix2 = iw + idot;
1461 ix3 = ix2 + idot;
1462 fftpack_passf4(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], isign);
1463 na = !na;
1464 break;
1465 case 2:
1466 fftpack_passf2(idot, l1, cinput, coutput, &wa[iw], isign);
1467 na = !na;
1468 break;
1469 case 3:
1470 ix2 = iw + idot;
1471 fftpack_passf3(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], isign);
1472 na = !na;
1473 break;
1474 case 5:
1475 ix2 = iw + idot;
1476 ix3 = ix2 + idot;
1477 ix4 = ix3 + idot;
1478 fftpack_passf5(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
1479 na = !na;
1480 break;
1481 default:
1482 fftpack_passf(&nac, idot, ip, l1, idl1, cinput, coutput, &wa[iw], isign);
1483 if (nac != 0) na = !na;
1485 l1 = l2;
1486 iw += (ip - 1)*idot;
1488 if (na == 0)
1489 return;
1490 for (i=0; i<2*n; i++)
1491 c[i] = ch[i];
1495 void
1496 fftpack_cfftf(int n,
1497 real c[],
1498 real wsave[])
1500 int iw1, iw2;
1502 if (n == 1)
1503 return;
1504 iw1 = 2*n;
1505 iw2 = iw1 + 2*n;
1506 fftpack_cfftf1(n, c, wsave, wsave+iw1, (int*)(wsave+iw2), -1);
1510 void
1511 fftpack_cfftb(int n,
1512 real c[],
1513 real wsave[])
1515 int iw1, iw2;
1517 if (n == 1)
1518 return;
1519 iw1 = 2*n;
1520 iw2 = iw1 + 2*n;
1521 fftpack_cfftf1(n, c, wsave, wsave+iw1, (int*)(wsave+iw2), +1);
1525 static void
1526 fftpack_factorize(int n,
1527 int ifac[15])
1529 static const int ntryh[4] = { 3,4,2,5 };
1530 int ntry=3, i, j=0, ib, nf=0, nl=n, nq, nr;
1532 startloop:
1533 if (j < 4)
1534 ntry = ntryh[j];
1535 else
1536 ntry+= 2;
1537 j++;
1540 nq = nl / ntry;
1541 nr = nl - ntry*nq;
1542 if (nr != 0) goto startloop;
1543 nf++;
1544 ifac[nf + 1] = ntry;
1545 nl = nq;
1546 if (ntry == 2 && nf != 1)
1548 for (i=2; i<=nf; i++)
1550 ib = nf - i + 2;
1551 ifac[ib + 1] = ifac[ib];
1553 ifac[2] = 2;
1556 while (nl != 1);
1557 ifac[0] = n;
1558 ifac[1] = nf;
1562 static void
1563 fftpack_cffti1(int n,
1564 real wa[],
1565 int ifac[15])
1567 const real twopi = 6.28318530717959;
1568 real arg, argh, argld, fi;
1569 int idot, i, j;
1570 int i1, k1, l1, l2;
1571 int ld, ii, nf, ip;
1572 int ido, ipm;
1574 fftpack_factorize(n,ifac);
1575 nf = ifac[1];
1576 argh = twopi/(real)n;
1577 i = 1;
1578 l1 = 1;
1579 for (k1=1; k1<=nf; k1++)
1581 ip = ifac[k1+1];
1582 ld = 0;
1583 l2 = l1*ip;
1584 ido = n / l2;
1585 idot = ido + ido + 2;
1586 ipm = ip - 1;
1587 for (j=1; j<=ipm; j++)
1589 i1 = i;
1590 wa[i-1] = 1;
1591 wa[i] = 0;
1592 ld += l1;
1593 fi = 0;
1594 argld = ld*argh;
1595 for (ii=4; ii<=idot; ii+=2)
1597 i+= 2;
1598 fi+= 1;
1599 arg = fi*argld;
1600 wa[i-1] = cos(arg);
1601 wa[i] = sin(arg);
1603 if (ip > 5)
1605 wa[i1-1] = wa[i-1];
1606 wa[i1] = wa[i];
1609 l1 = l2;
1616 static void
1617 fftpack_rfftf1(int n,
1618 real c[],
1619 real ch[],
1620 real wa[],
1621 int ifac[15])
1623 int i;
1624 int k1, l1, l2, na, kh, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
1625 real *cinput, *coutput;
1626 nf = ifac[1];
1627 na = 1;
1628 l2 = n;
1629 iw = n-1;
1630 for (k1 = 1; k1 <= nf; ++k1)
1632 kh = nf - k1;
1633 ip = ifac[kh + 2];
1634 l1 = l2 / ip;
1635 ido = n / l2;
1636 idl1 = ido*l1;
1637 iw -= (ip - 1)*ido;
1638 na = !na;
1639 if (na)
1641 cinput = ch;
1642 coutput = c;
1644 else
1646 cinput = c;
1647 coutput = ch;
1649 switch (ip)
1651 case 4:
1652 ix2 = iw + ido;
1653 ix3 = ix2 + ido;
1654 fftpack_radf4(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3]);
1655 break;
1656 case 2:
1657 fftpack_radf2(ido, l1, cinput, coutput, &wa[iw]);
1658 break;
1659 case 3:
1660 ix2 = iw + ido;
1661 fftpack_radf3(ido, l1, cinput, coutput, &wa[iw], &wa[ix2]);
1662 break;
1663 case 5:
1664 ix2 = iw + ido;
1665 ix3 = ix2 + ido;
1666 ix4 = ix3 + ido;
1667 fftpack_radf5(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1668 break;
1669 default:
1670 if (ido == 1)
1671 na = !na;
1672 if (na == 0)
1674 fftpack_radfg(ido, ip, l1, idl1, c, ch, &wa[iw]);
1675 na = 1;
1677 else
1679 fftpack_radfg(ido, ip, l1, idl1, ch, c, &wa[iw]);
1680 na = 0;
1683 l2 = l1;
1685 if (na == 1)
1686 return;
1687 for (i = 0; i < n; i++)
1688 c[i] = ch[i];
1692 static void
1693 fftpack_rfftb1(int n,
1694 real c[],
1695 real ch[],
1696 real wa[],
1697 int ifac[15])
1699 int i;
1700 int k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
1701 real *cinput, *coutput;
1702 nf = ifac[1];
1703 na = 0;
1704 l1 = 1;
1705 iw = 0;
1707 for (k1=1; k1<=nf; k1++)
1709 ip = ifac[k1 + 1];
1710 l2 = ip*l1;
1711 ido = n / l2;
1712 idl1 = ido*l1;
1713 if (na)
1715 cinput = ch;
1716 coutput = c;
1718 else
1720 cinput = c;
1721 coutput = ch;
1723 switch (ip)
1725 case 4:
1726 ix2 = iw + ido;
1727 ix3 = ix2 + ido;
1728 fftpack_radb4(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3]);
1729 na = !na;
1730 break;
1731 case 2:
1732 fftpack_radb2(ido, l1, cinput, coutput, &wa[iw]);
1733 na = !na;
1734 break;
1735 case 3:
1736 ix2 = iw + ido;
1737 fftpack_radb3(ido, l1, cinput, coutput, &wa[iw], &wa[ix2]);
1738 na = !na;
1739 break;
1740 case 5:
1741 ix2 = iw + ido;
1742 ix3 = ix2 + ido;
1743 ix4 = ix3 + ido;
1744 fftpack_radb5(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1745 na = !na;
1746 break;
1747 default:
1748 fftpack_radbg(ido, ip, l1, idl1, cinput, coutput, &wa[iw]);
1749 if (ido == 1) na = !na;
1751 l1 = l2;
1752 iw += (ip - 1)*ido;
1754 if (na == 0)
1755 return;
1756 for (i=0; i<n; i++)
1757 c[i] = ch[i];
1763 static void
1764 fftpack_rffti1(int n,
1765 real wa[],
1766 int ifac[15])
1768 const real twopi = 6.28318530717959;
1769 real arg, argh, argld, fi;
1770 int i, j;
1771 int k1, l1, l2;
1772 int ld, ii, nf, ip, is;
1773 int ido, ipm, nfm1;
1774 fftpack_factorize(n,ifac);
1775 nf = ifac[1];
1776 argh = twopi / n;
1777 is = 0;
1778 nfm1 = nf - 1;
1779 l1 = 1;
1780 if (nfm1 == 0) return;
1781 for (k1 = 1; k1 <= nfm1; k1++)
1783 ip = ifac[k1 + 1];
1784 ld = 0;
1785 l2 = l1*ip;
1786 ido = n / l2;
1787 ipm = ip - 1;
1788 for (j = 1; j <= ipm; ++j)
1790 ld += l1;
1791 i = is;
1792 argld = (real) ld*argh;
1793 fi = 0;
1794 for (ii = 3; ii <= ido; ii += 2)
1796 i += 2;
1797 fi += 1;
1798 arg = fi*argld;
1799 wa[i - 2] = cos(arg);
1800 wa[i - 1] = sin(arg);
1802 is += ido;
1804 l1 = l2;
1811 /* End of fftpack - begin GROMACS code */
1815 gmx_fft_init_1d(gmx_fft_t * pfft,
1816 int nx,
1817 int flags)
1819 gmx_fft_t fft;
1821 if(pfft==NULL)
1823 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
1824 return EINVAL;
1826 *pfft = NULL;
1828 if( (fft = (struct gmx_fft *)malloc(sizeof(struct gmx_fft))) == NULL)
1830 return ENOMEM;
1833 fft->next = NULL;
1834 fft->n = nx;
1836 /* Need 4*n storage for 1D complex FFT */
1837 if( (fft->work = (real *)malloc(sizeof(real)*(4*nx))) == NULL)
1839 free(fft);
1840 return ENOMEM;
1843 if(fft->n>1)
1844 fftpack_cffti1(nx,fft->work,fft->ifac);
1846 *pfft = fft;
1847 return 0;
1853 gmx_fft_init_1d_real(gmx_fft_t * pfft,
1854 int nx,
1855 int flags)
1857 gmx_fft_t fft;
1859 if(pfft==NULL)
1861 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
1862 return EINVAL;
1864 *pfft = NULL;
1866 if( (fft = (struct gmx_fft *)malloc(sizeof(struct gmx_fft))) == NULL)
1868 return ENOMEM;
1871 fft->next = NULL;
1872 fft->n = nx;
1874 /* Need 2*n storage for 1D real FFT */
1875 if((fft->work = (real *)malloc(sizeof(real)*(2*nx)))==NULL)
1877 free(fft);
1878 return ENOMEM;
1881 if(fft->n>1)
1882 fftpack_rffti1(nx,fft->work,fft->ifac);
1884 *pfft = fft;
1885 return 0;
1891 gmx_fft_init_2d(gmx_fft_t * pfft,
1892 int nx,
1893 int ny,
1894 int flags)
1896 gmx_fft_t fft;
1897 int rc;
1899 if(pfft==NULL)
1901 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
1902 return EINVAL;
1904 *pfft = NULL;
1906 /* Create the X transform */
1907 if( (rc = gmx_fft_init_1d(&fft,nx,flags)) != 0)
1909 return rc;
1912 /* Create Y transform as a link from X */
1913 if( (rc=gmx_fft_init_1d(&(fft->next),ny,flags)) != 0)
1915 free(fft);
1916 return rc;
1919 *pfft = fft;
1920 return 0;
1925 gmx_fft_init_2d_real(gmx_fft_t * pfft,
1926 int nx,
1927 int ny,
1928 int flags)
1930 gmx_fft_t fft;
1931 int nyc = (ny/2 + 1);
1932 int rc;
1934 if(pfft==NULL)
1936 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
1937 return EINVAL;
1939 *pfft = NULL;
1941 /* Create the X transform */
1942 if( (fft = (struct gmx_fft *)malloc(sizeof(struct gmx_fft))) == NULL)
1944 return ENOMEM;
1947 fft->n = nx;
1949 /* Need 4*nx storage for 1D complex FFT, and another
1950 * 2*nx*nyc elements for complex-to-real storage in our high-level routine.
1952 if( (fft->work = (real *)malloc(sizeof(real)*(4*nx+2*nx*nyc))) == NULL)
1954 free(fft);
1955 return ENOMEM;
1957 fftpack_cffti1(nx,fft->work,fft->ifac);
1959 /* Create real Y transform as a link from X */
1960 if( (rc=gmx_fft_init_1d_real(&(fft->next),ny,flags)) != 0)
1962 free(fft);
1963 return rc;
1966 *pfft = fft;
1967 return 0;
1972 gmx_fft_init_3d(gmx_fft_t * pfft,
1973 int nx,
1974 int ny,
1975 int nz,
1976 int flags)
1978 gmx_fft_t fft;
1979 int rc;
1981 if(pfft==NULL)
1983 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
1984 return EINVAL;
1986 *pfft = NULL;
1988 /* Create the X transform */
1990 if( (fft = (struct gmx_fft *)malloc(sizeof(struct gmx_fft))) == NULL)
1992 return ENOMEM;
1995 fft->n = nx;
1997 /* Need 4*nx storage for 1D complex FFT, and another
1998 * 2*nz elements for gmx_fft_transpose_2d_nelem() storage.
2000 if( (fft->work = (real *)malloc(sizeof(real)*(4*nx+2*nz))) == NULL)
2002 free(fft);
2003 return ENOMEM;
2006 fftpack_cffti1(nx,fft->work,fft->ifac);
2009 /* Create 2D Y/Z transforms as a link from X */
2010 if( (rc=gmx_fft_init_2d(&(fft->next),ny,nz,flags)) != 0)
2012 free(fft);
2013 return rc;
2016 *pfft = fft;
2017 return 0;
2022 gmx_fft_init_3d_real(gmx_fft_t * pfft,
2023 int nx,
2024 int ny,
2025 int nz,
2026 int flags)
2028 gmx_fft_t fft;
2029 int nzc = (nz/2 + 1);
2030 int rc;
2032 if(pfft==NULL)
2034 gmx_fatal(FARGS,"Invalid FFT opaque type pointer.");
2035 return EINVAL;
2037 *pfft = NULL;
2039 /* Create the X transform */
2040 if( (fft = (struct gmx_fft *)malloc(sizeof(struct gmx_fft))) == NULL)
2042 return ENOMEM;
2045 fft->n = nx;
2047 /* Need 4*nx storage for 1D complex FFT, another
2048 * 2*nx*ny*nzc elements to copy the entire 3D matrix when
2049 * doing out-of-place complex-to-real FFTs, and finally
2050 * 2*nzc elements for transpose work space.
2052 if( (fft->work = (real *)malloc(sizeof(real)*(4*nx+2*nx*ny*nzc+2*nzc))) == NULL)
2054 free(fft);
2055 return ENOMEM;
2057 fftpack_cffti1(nx,fft->work,fft->ifac);
2059 /* Create 2D real Y/Z transform as a link from X */
2060 if( (rc=gmx_fft_init_2d_real(&(fft->next),ny,nz,flags)) != 0)
2062 free(fft);
2063 return rc;
2066 *pfft = fft;
2067 return 0;
2071 int
2072 gmx_fft_1d (gmx_fft_t fft,
2073 enum gmx_fft_direction dir,
2074 void * in_data,
2075 void * out_data)
2077 int i,n;
2078 real * p1;
2079 real * p2;
2081 n=fft->n;
2083 if(n==1)
2085 p1 = (real *)in_data;
2086 p2 = (real *)out_data;
2087 p2[0] = p1[0];
2088 p2[1] = p1[1];
2091 /* FFTPACK only does in-place transforms, so emulate out-of-place
2092 * by copying data to the output array first.
2094 if( in_data != out_data )
2096 p1 = (real *)in_data;
2097 p2 = (real *)out_data;
2099 /* n complex = 2*n real elements */
2100 for(i=0;i<2*n;i++)
2102 p2[i] = p1[i];
2106 /* Elements 0 .. 2*n-1 in work are used for ffac values,
2107 * Elements 2*n .. 4*n-1 are internal FFTPACK work space.
2110 if(dir == GMX_FFT_FORWARD)
2112 fftpack_cfftf1(n,(real *)out_data,fft->work+2*n,fft->work,fft->ifac, -1);
2114 else if(dir == GMX_FFT_BACKWARD)
2116 fftpack_cfftf1(n,(real *)out_data,fft->work+2*n,fft->work,fft->ifac, 1);
2118 else
2120 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
2121 return EINVAL;
2124 return 0;
2129 int
2130 gmx_fft_1d_real (gmx_fft_t fft,
2131 enum gmx_fft_direction dir,
2132 void * in_data,
2133 void * out_data)
2135 int i,n;
2136 real * p1;
2137 real * p2;
2139 n = fft->n;
2141 if(n==1)
2143 p1 = (real *)in_data;
2144 p2 = (real *)out_data;
2145 p2[0] = p1[0];
2146 if(dir == GMX_FFT_REAL_TO_COMPLEX)
2147 p2[1] = 0.0;
2150 if(dir == GMX_FFT_REAL_TO_COMPLEX)
2152 /* FFTPACK only does in-place transforms, so emulate out-of-place
2153 * by copying data to the output array first. This works fine, since
2154 * the complex array must be larger than the real.
2156 if( in_data != out_data )
2158 p1 = (real *)in_data;
2159 p2 = (real *)out_data;
2161 for(i=0;i<2*(n/2+1);i++)
2163 p2[i] = p1[i];
2167 /* Elements 0 .. n-1 in work are used for ffac values,
2168 * Elements n .. 2*n-1 are internal FFTPACK work space.
2170 fftpack_rfftf1(n,(real *)out_data,fft->work+n,fft->work,fft->ifac);
2173 * FFTPACK has a slightly more compact storage than we, time to
2174 * convert it: ove most of the array one step up to make room for
2175 * zero imaginary parts.
2177 p2 = (real *)out_data;
2178 for(i=n-1;i>0;i--)
2180 p2[i+1] = p2[i];
2182 /* imaginary zero freq. */
2183 p2[1] = 0;
2185 /* Is n even? */
2186 if( (n & 0x1) == 0 )
2188 p2[n+1] = 0;
2192 else if(dir == GMX_FFT_COMPLEX_TO_REAL)
2194 /* FFTPACK only does in-place transforms, and we cannot just copy
2195 * input to output first here since our real array is smaller than
2196 * the complex one. However, since the FFTPACK complex storage format
2197 * is more compact than ours (2 reals) it will fit, so compact it
2198 * and copy on-the-fly to the output array.
2200 p1 = (real *) in_data;
2201 p2 = (real *)out_data;
2203 p2[0] = p1[0];
2204 for(i=1;i<n;i++)
2206 p2[i] = p1[i+1];
2208 fftpack_rfftb1(n,(real *)out_data,fft->work+n,fft->work,fft->ifac);
2210 else
2212 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
2213 return EINVAL;
2216 return 0;
2220 int
2221 gmx_fft_2d (gmx_fft_t fft,
2222 enum gmx_fft_direction dir,
2223 void * in_data,
2224 void * out_data)
2226 int i,nx,ny;
2227 t_complex * data;
2229 nx = fft->n;
2230 ny = fft->next->n;
2232 /* FFTPACK only does in-place transforms, so emulate out-of-place
2233 * by copying data to the output array first.
2234 * For 2D there is likely enough data to benefit from memcpy().
2236 if( in_data != out_data )
2238 memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
2241 /* Much easier to do pointer arithmetic when base has the correct type */
2242 data = (t_complex *)out_data;
2244 /* y transforms */
2245 for(i=0;i<nx;i++)
2247 gmx_fft_1d(fft->next,dir,data+i*ny,data+i*ny);
2250 /* Transpose in-place to get data in place for x transform now */
2251 gmx_fft_transpose_2d(data,data,nx,ny);
2253 /* x transforms */
2254 for(i=0;i<ny;i++)
2256 gmx_fft_1d(fft,dir,data+i*nx,data+i*nx);
2259 /* Transpose in-place to get data back in original order */
2260 gmx_fft_transpose_2d(data,data,ny,nx);
2262 return 0;
2267 int
2268 gmx_fft_2d_real (gmx_fft_t fft,
2269 enum gmx_fft_direction dir,
2270 void * in_data,
2271 void * out_data)
2273 int i,j,nx,ny,nyc;
2274 t_complex * data;
2275 real * work;
2276 real * p1;
2277 real * p2;
2279 nx=fft->n;
2280 ny=fft->next->n;
2281 /* Number of complex elements in y direction */
2282 nyc=(ny/2+1);
2284 work = fft->work+4*nx;
2286 if(dir==GMX_FFT_REAL_TO_COMPLEX)
2288 /* If we are doing an in-place transform the 2D array is already
2289 * properly padded by the user, and we are all set.
2291 * For out-of-place there is no array padding, but FFTPACK only
2292 * does in-place FFTs internally, so we need to start by copying
2293 * data from the input to the padded (larger) output array.
2295 if( in_data != out_data )
2297 p1 = (real *)in_data;
2298 p2 = (real *)out_data;
2300 for(i=0;i<nx;i++)
2302 for(j=0;j<ny;j++)
2304 p2[i*nyc*2+j] = p1[i*ny+j];
2308 data = (t_complex *)out_data;
2310 /* y real-to-complex FFTs */
2311 for(i=0;i<nx;i++)
2313 gmx_fft_1d_real(fft->next,GMX_FFT_REAL_TO_COMPLEX,data+i*nyc,data+i*nyc);
2316 /* Transform to get X data in place */
2317 gmx_fft_transpose_2d(data,data,nx,nyc);
2319 /* Complex-to-complex X FFTs */
2320 for(i=0;i<nyc;i++)
2322 gmx_fft_1d(fft,GMX_FFT_FORWARD,data+i*nx,data+i*nx);
2325 /* Transpose back */
2326 gmx_fft_transpose_2d(data,data,nyc,nx);
2329 else if(dir==GMX_FFT_COMPLEX_TO_REAL)
2331 /* An in-place complex-to-real transform is straightforward,
2332 * since the output array must be large enough for the padding to fit.
2334 * For out-of-place complex-to-real transforms we cannot just copy
2335 * data to the output array, since it is smaller than the input.
2336 * In this case there's nothing to do but employing temporary work data,
2337 * starting at work+4*nx and using nx*nyc*2 elements.
2339 if(in_data != out_data)
2341 memcpy(work,in_data,sizeof(t_complex)*nx*nyc);
2342 data = (t_complex *)work;
2344 else
2346 /* in-place */
2347 data = (t_complex *)out_data;
2350 /* Transpose to get X arrays */
2351 gmx_fft_transpose_2d(data,data,nx,nyc);
2353 /* Do X iFFTs */
2354 for(i=0;i<nyc;i++)
2356 gmx_fft_1d(fft,GMX_FFT_BACKWARD,data+i*nx,data+i*nx);
2359 /* Transpose to get Y arrays */
2360 gmx_fft_transpose_2d(data,data,nyc,nx);
2362 /* Do Y iFFTs */
2363 for(i=0;i<nx;i++)
2365 gmx_fft_1d_real(fft->next,GMX_FFT_COMPLEX_TO_REAL,data+i*nyc,data+i*nyc);
2368 if( in_data != out_data )
2370 /* Output (pointed to by data) is now in padded format.
2371 * Pack it into out_data if we were doing an out-of-place transform.
2373 p1 = (real *)data;
2374 p2 = (real *)out_data;
2376 for(i=0;i<nx;i++)
2378 for(j=0;j<ny;j++)
2380 p2[i*ny+j] = p1[i*nyc*2+j];
2385 else
2387 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
2388 return EINVAL;
2391 return 0;
2396 int
2397 gmx_fft_3d (gmx_fft_t fft,
2398 enum gmx_fft_direction dir,
2399 void * in_data,
2400 void * out_data)
2402 int i,nx,ny,nz,rc;
2403 t_complex * data;
2404 t_complex * work;
2405 nx=fft->n;
2406 ny=fft->next->n;
2407 nz=fft->next->next->n;
2409 /* First 4*nx positions are FFTPACK workspace, then ours starts */
2410 work = (t_complex *)(fft->work+4*nx);
2412 /* FFTPACK only does in-place transforms, so emulate out-of-place
2413 * by copying data to the output array first.
2414 * For 3D there is likely enough data to benefit from memcpy().
2416 if( in_data != out_data )
2418 memcpy(out_data,in_data,sizeof(t_complex)*nx*ny*nz);
2421 /* Much easier to do pointer arithmetic when base has the correct type */
2422 data = (t_complex *)out_data;
2424 /* Perform z transforms */
2425 for(i=0;i<nx*ny;i++)
2426 gmx_fft_1d(fft->next->next,dir,data+i*nz,data+i*nz);
2428 /* For each X slice, transpose the y & z dimensions inside the slice */
2429 for(i=0;i<nx;i++)
2431 gmx_fft_transpose_2d(data+i*ny*nz,data+i*ny*nz,ny,nz);
2434 /* Array is now (nx,nz,ny) - perform y transforms */
2435 for(i=0;i<nx*nz;i++)
2437 gmx_fft_1d(fft->next,dir,data+i*ny,data+i*ny);
2440 /* Transpose back to (nx,ny,nz) */
2441 for(i=0;i<nx;i++)
2443 gmx_fft_transpose_2d(data+i*ny*nz,data+i*ny*nz,nz,ny);
2446 /* Transpose entire x & y slices to go from
2447 * (nx,ny,nz) to (ny,nx,nz).
2448 * Use work data elements 4*n .. 4*n+2*nz-1.
2450 rc=gmx_fft_transpose_2d_nelem(data,data,nx,ny,nz,work);
2451 if( rc != 0)
2453 gmx_fatal(FARGS,"Cannot transpose X & Y/Z in gmx_fft_3d().");
2454 return rc;
2457 /* Then go from (ny,nx,nz) to (ny,nz,nx) */
2458 for(i=0;i<ny;i++)
2460 gmx_fft_transpose_2d(data+i*nx*nz,data+i*nx*nz,nx,nz);
2463 /* Perform x transforms */
2464 for(i=0;i<ny*nz;i++)
2466 gmx_fft_1d(fft,dir,data+i*nx,data+i*nx);
2469 /* Transpose back from (ny,nz,nx) to (ny,nx,nz) */
2470 for(i=0;i<ny;i++)
2472 gmx_fft_transpose_2d(data+i*nz*nx,data+i*nz*nx,nz,nx);
2475 /* Transpose from (ny,nx,nz) to (nx,ny,nz)
2476 * Use work data elements 4*n .. 4*n+2*nz-1.
2478 rc = gmx_fft_transpose_2d_nelem(data,data,ny,nx,nz,work);
2479 if( rc != 0)
2481 gmx_fatal(FARGS,"Cannot transpose Y/Z & X in gmx_fft_3d().");
2482 return rc;
2485 return 0;
2489 int
2490 gmx_fft_3d_real (gmx_fft_t fft,
2491 enum gmx_fft_direction dir,
2492 void * in_data,
2493 void * out_data)
2495 int i,j,k;
2496 int nx,ny,nz,nzc,rc;
2497 t_complex * data;
2498 t_complex * work_transp;
2499 t_complex * work_c2r;
2500 real * p1;
2501 real * p2;
2503 nx=fft->n;
2504 ny=fft->next->n;
2505 nz=fft->next->next->n;
2506 nzc=(nz/2+1);
2509 /* First 4*nx positions are FFTPACK workspace, then ours starts.
2510 * We have 2*nx*ny*nzc elements for temp complex-to-real storage when
2511 * doing out-of-place transforms, and another 2*nzc for transpose data.
2513 work_c2r = (t_complex *)(fft->work+4*nx);
2514 work_transp = (t_complex *)(fft->work+4*nx+2*nx*ny*nzc);
2516 /* Much easier to do pointer arithmetic when base has the correct type */
2517 data = (t_complex *)out_data;
2519 if(dir==GMX_FFT_REAL_TO_COMPLEX)
2521 /* FFTPACK only does in-place transforms, so emulate out-of-place
2522 * by copying data to the output array first. This is guaranteed to
2523 * work for real-to-complex since complex data is larger than the real.
2524 * For 3D there is likely enough data to benefit from memcpy().
2526 if( in_data != out_data )
2528 p1 = (real *)in_data;
2529 p2 = (real *)out_data;
2531 for(i=0;i<nx;i++)
2533 for(j=0;j<ny;j++)
2535 for(k=0;k<nz;k++)
2537 p2[(i*ny+j)*2*nzc+k] = p1[(i*ny+j)*nz+k];
2542 data = (t_complex *)out_data;
2544 /* Transform the Y/Z slices real-to-complex */
2545 for(i=0;i<nx;i++)
2547 gmx_fft_2d_real(fft->next,dir,data+i*ny*nzc,data+i*ny*nzc);
2550 /* Transpose x & y slices to go from
2551 * (nx,ny,nzc) to (ny,nx,nzc).
2553 rc=gmx_fft_transpose_2d_nelem(data,data,nx,ny,nzc,work_transp);
2554 if( rc != 0)
2556 gmx_fatal(FARGS,"Cannot transpose X & Y/Z gmx_fft_3d_real().");
2557 return rc;
2560 /* Then transpose from (ny,nx,nzc) to (ny,nzc,nx) */
2561 for(i=0;i<ny;i++)
2563 gmx_fft_transpose_2d(data+i*nx*nzc,data+i*nx*nzc,nx,nzc);
2566 /* Perform x transforms */
2567 for(i=0;i<ny*nzc;i++)
2569 gmx_fft_1d(fft,GMX_FFT_FORWARD,data+i*nx,data+i*nx);
2572 /* Transpose from (ny,nzc,nx) back to (ny,nx,nzc) */
2573 for(i=0;i<ny;i++)
2575 gmx_fft_transpose_2d(data+i*nzc*nx,data+i*nzc*nx,nzc,nx);
2578 /* Transpose back from (ny,nx,nzc) to (nx,ny,nz) */
2579 rc=gmx_fft_transpose_2d_nelem(data,data,ny,nx,nzc,work_transp);
2580 if( rc != 0)
2582 gmx_fatal(FARGS,"Cannot transpose Y/Z & X in gmx_fft_3d_real().");
2583 return rc;
2587 else if(dir==GMX_FFT_COMPLEX_TO_REAL)
2589 /* An in-place complex-to-real transform is straightforward,
2590 * since the output array must be large enough for the padding to fit.
2592 * For out-of-place complex-to-real transforms we cannot just copy
2593 * data to the output array, since it is smaller than the input.
2594 * In this case there's nothing to do but employing temporary work data.
2596 if(in_data != out_data)
2598 memcpy(work_c2r,in_data,sizeof(t_complex)*nx*ny*nzc);
2599 data = (t_complex *)work_c2r;
2601 else
2603 /* in-place */
2604 data = (t_complex *)out_data;
2607 /* Transpose x & y slices to go from
2608 * (nx,ny,nz) to (ny,nx,nz).
2610 gmx_fft_transpose_2d_nelem(data,data,nx,ny,nzc,work_transp);
2612 /* Then go from (ny,nx,nzc) to (ny,nzc,nx) */
2613 for(i=0;i<ny;i++)
2615 gmx_fft_transpose_2d(data+i*nx*nzc,data+i*nx*nzc,nx,nzc);
2619 /* Perform x transforms */
2620 for(i=0;i<ny*nzc;i++)
2622 gmx_fft_1d(fft,GMX_FFT_BACKWARD,data+i*nx,data+i*nx);
2625 /* Transpose back from (ny,nzc,nx) to (ny,nx,nzc) */
2626 for(i=0;i<ny;i++)
2628 gmx_fft_transpose_2d(data+i*nzc*nx,data+i*nzc*nx,nzc,nx);
2631 /* Transpose back from (ny,nx,nzc) to (nx,ny,nz) */
2632 gmx_fft_transpose_2d_nelem(data,data,ny,nx,nzc,work_transp);
2635 /* Do 2D complex-to-real */
2636 for(i=0;i<nx;i++)
2638 gmx_fft_2d_real(fft->next,dir,data+i*ny*nzc,data+i*ny*nzc);
2641 if( in_data != out_data )
2643 /* Output (pointed to by data) is now in padded format.
2644 * Pack it into out_data if we were doing an out-of-place transform.
2646 p1 = (real *)data;
2647 p2 = (real *)out_data;
2649 for(i=0;i<nx;i++)
2651 for(j=0;j<ny;j++)
2653 for(k=0;k<nz;k++)
2655 p2[(i*ny+j)*nz+k] = p1[(i*ny+j)*nzc*2+k];
2662 else
2664 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
2665 return EINVAL;
2668 return 0;
2674 void
2675 gmx_fft_destroy(gmx_fft_t fft)
2677 if(fft != NULL)
2679 free(fft->work);
2680 if(fft->next != NULL)
2681 gmx_fft_destroy(fft->next);
2682 free(fft);
2685 #else
2687 gmx_fft_fftpack_empty;
2688 #endif /* GMX_FFT_FFTPACK */