1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs 4.0 Copyright (c) 1991-2003
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
22 #ifdef GMX_FFT_FFTPACK
31 #include "gmx_fatal.h"
34 /** Contents of the FFTPACK fft datatype.
36 * FFTPACK only does 1d transforms, so we use a pointers to another fft for
37 * the transform in the next dimension.
38 * Thus, a 3d-structure contains a pointer to a 2d one, which in turns contains
39 * a pointer to a 1d. The 1d structure has next==NULL.
43 int ndim
; /**< Dimensions, including our subdimensions. */
44 int n
; /**< Number of points in this dimension. */
45 int ifac
[15]; /**< 15 bytes needed for cfft and rfft */
46 struct gmx_fft
*next
; /**< Pointer to next dimension, or NULL. */
47 real
* work
; /**< 1st 4n reserved for cfft, 1st 2n for rfft */
56 fftpack_passf2(int ido
,
72 ch
[ah
] = cc
[ac
] + cc
[ac
+ ido
];
73 ch
[ah
+ ido
*l1
] = cc
[ac
] - cc
[ac
+ ido
];
74 ch
[ah
+1] = cc
[ac
+1] + cc
[ac
+ ido
+ 1];
75 ch
[ah
+ ido
*l1
+ 1] = cc
[ac
+1] - cc
[ac
+ ido
+ 1];
82 for (i
=0; i
<ido
-1; i
+=2)
86 ch
[ah
] = cc
[ac
] + cc
[ac
+ ido
];
87 tr2
= cc
[ac
] - cc
[ac
+ ido
];
88 ch
[ah
+1] = cc
[ac
+1] + cc
[ac
+ 1 + ido
];
89 ti2
= cc
[ac
+1] - cc
[ac
+ 1 + ido
];
90 ch
[ah
+l1
*ido
+1] = wa1
[i
]*ti2
+ isign
*wa1
[i
+1]*tr2
;
91 ch
[ah
+l1
*ido
] = wa1
[i
]*tr2
- isign
*wa1
[i
+1]*ti2
;
100 fftpack_passf3(int ido
,
108 const real taur
= -0.5;
109 const real taui
= 0.866025403784439;
112 real ci2
, ci3
, di2
, di3
, cr2
, cr3
, dr2
, dr3
, ti2
, tr2
;
116 for (k
=1; k
<=l1
; k
++)
119 tr2
= cc
[ac
] + cc
[ac
+ ido
];
120 cr2
= cc
[ac
- ido
] + taur
*tr2
;
122 ch
[ah
] = cc
[ac
- ido
] + tr2
;
124 ti2
= cc
[ac
+ 1] + cc
[ac
+ ido
+ 1];
125 ci2
= cc
[ac
- ido
+ 1] + taur
*ti2
;
126 ch
[ah
+ 1] = cc
[ac
- ido
+ 1] + ti2
;
128 cr3
= isign
*taui
*(cc
[ac
] - cc
[ac
+ ido
]);
129 ci3
= isign
*taui
*(cc
[ac
+ 1] - cc
[ac
+ ido
+ 1]);
130 ch
[ah
+ l1
*ido
] = cr2
- ci3
;
131 ch
[ah
+ 2*l1
*ido
] = cr2
+ ci3
;
132 ch
[ah
+ l1
*ido
+ 1] = ci2
+ cr3
;
133 ch
[ah
+ 2*l1
*ido
+ 1] = ci2
- cr3
;
138 for (k
=1; k
<=l1
; k
++)
140 for (i
=0; i
<ido
-1; i
+=2)
142 ac
= i
+ (3*k
- 2)*ido
;
143 tr2
= cc
[ac
] + cc
[ac
+ ido
];
144 cr2
= cc
[ac
- ido
] + taur
*tr2
;
146 ch
[ah
] = cc
[ac
- ido
] + tr2
;
147 ti2
= cc
[ac
+ 1] + cc
[ac
+ ido
+ 1];
148 ci2
= cc
[ac
- ido
+ 1] + taur
*ti2
;
149 ch
[ah
+ 1] = cc
[ac
- ido
+ 1] + ti2
;
150 cr3
= isign
*taui
*(cc
[ac
] - cc
[ac
+ ido
]);
151 ci3
= isign
*taui
*(cc
[ac
+ 1] - cc
[ac
+ ido
+ 1]);
156 ch
[ah
+ l1
*ido
+ 1] = wa1
[i
]*di2
+ isign
*wa1
[i
+1]*dr2
;
157 ch
[ah
+ l1
*ido
] = wa1
[i
]*dr2
- isign
*wa1
[i
+1]*di2
;
158 ch
[ah
+ 2*l1
*ido
+ 1] = wa2
[i
]*di3
+ isign
*wa2
[i
+1]*dr3
;
159 ch
[ah
+ 2*l1
*ido
] = wa2
[i
]*dr3
- isign
*wa2
[i
+1]*di3
;
167 fftpack_passf4(int ido
,
177 real ci2
, ci3
, ci4
, cr2
, cr3
, cr4
, ti1
, ti2
, ti3
, ti4
, tr1
, tr2
, tr3
, tr4
;
184 ti1
= cc
[ac
] - cc
[ac
+ 2*ido
];
185 ti2
= cc
[ac
] + cc
[ac
+ 2*ido
];
186 tr4
= cc
[ac
+ 3*ido
] - cc
[ac
+ ido
];
187 ti3
= cc
[ac
+ ido
] + cc
[ac
+ 3*ido
];
188 tr1
= cc
[ac
- 1] - cc
[ac
+ 2*ido
- 1];
189 tr2
= cc
[ac
- 1] + cc
[ac
+ 2*ido
- 1];
190 ti4
= cc
[ac
+ ido
- 1] - cc
[ac
+ 3*ido
- 1];
191 tr3
= cc
[ac
+ ido
- 1] + cc
[ac
+ 3*ido
- 1];
194 ch
[ah
+ 2*l1
*ido
] = tr2
- tr3
;
195 ch
[ah
+ 1] = ti2
+ ti3
;
196 ch
[ah
+ 2*l1
*ido
+ 1] = ti2
- ti3
;
197 ch
[ah
+ l1
*ido
] = tr1
+ isign
*tr4
;
198 ch
[ah
+ 3*l1
*ido
] = tr1
- isign
*tr4
;
199 ch
[ah
+ l1
*ido
+ 1] = ti1
+ isign
*ti4
;
200 ch
[ah
+ 3*l1
*ido
+ 1] = ti1
- isign
*ti4
;
207 for (i
=0; i
<ido
-1; i
+=2)
209 ac
= i
+ 1 + 4*k
*ido
;
210 ti1
= cc
[ac
] - cc
[ac
+ 2*ido
];
211 ti2
= cc
[ac
] + cc
[ac
+ 2*ido
];
212 ti3
= cc
[ac
+ ido
] + cc
[ac
+ 3*ido
];
213 tr4
= cc
[ac
+ 3*ido
] - cc
[ac
+ ido
];
214 tr1
= cc
[ac
- 1] - cc
[ac
+ 2*ido
- 1];
215 tr2
= cc
[ac
- 1] + cc
[ac
+ 2*ido
- 1];
216 ti4
= cc
[ac
+ ido
- 1] - cc
[ac
+ 3*ido
- 1];
217 tr3
= cc
[ac
+ ido
- 1] + cc
[ac
+ 3*ido
- 1];
221 ch
[ah
+ 1] = ti2
+ ti3
;
223 cr2
= tr1
+ isign
*tr4
;
224 cr4
= tr1
- isign
*tr4
;
225 ci2
= ti1
+ isign
*ti4
;
226 ci4
= ti1
- isign
*ti4
;
227 ch
[ah
+ l1
*ido
] = wa1
[i
]*cr2
- isign
*wa1
[i
+ 1]*ci2
;
228 ch
[ah
+ l1
*ido
+ 1] = wa1
[i
]*ci2
+ isign
*wa1
[i
+ 1]*cr2
;
229 ch
[ah
+ 2*l1
*ido
] = wa2
[i
]*cr3
- isign
*wa2
[i
+ 1]*ci3
;
230 ch
[ah
+ 2*l1
*ido
+ 1] = wa2
[i
]*ci3
+ isign
*wa2
[i
+ 1]*cr3
;
231 ch
[ah
+ 3*l1
*ido
] = wa3
[i
]*cr4
-isign
*wa3
[i
+ 1]*ci4
;
232 ch
[ah
+ 3*l1
*ido
+ 1] = wa3
[i
]*ci4
+ isign
*wa3
[i
+ 1]*cr4
;
240 fftpack_passf5(int ido
,
250 const real tr11
= 0.309016994374947;
251 const real ti11
= 0.951056516295154;
252 const real tr12
= -0.809016994374947;
253 const real ti12
= 0.587785252292473;
256 real ci2
, ci3
, ci4
, ci5
, di3
, di4
, di5
, di2
, cr2
, cr3
, cr5
, cr4
, ti2
, ti3
,
257 ti4
, ti5
, dr3
, dr4
, dr5
, dr2
, tr2
, tr3
, tr4
, tr5
;
261 for (k
= 1; k
<= l1
; ++k
)
263 ac
= (5*k
- 4)*ido
+ 1;
264 ti5
= cc
[ac
] - cc
[ac
+ 3*ido
];
265 ti2
= cc
[ac
] + cc
[ac
+ 3*ido
];
266 ti4
= cc
[ac
+ ido
] - cc
[ac
+ 2*ido
];
267 ti3
= cc
[ac
+ ido
] + cc
[ac
+ 2*ido
];
268 tr5
= cc
[ac
- 1] - cc
[ac
+ 3*ido
- 1];
269 tr2
= cc
[ac
- 1] + cc
[ac
+ 3*ido
- 1];
270 tr4
= cc
[ac
+ ido
- 1] - cc
[ac
+ 2*ido
- 1];
271 tr3
= cc
[ac
+ ido
- 1] + cc
[ac
+ 2*ido
- 1];
273 ch
[ah
] = cc
[ac
- ido
- 1] + tr2
+ tr3
;
274 ch
[ah
+ 1] = cc
[ac
- ido
] + ti2
+ ti3
;
275 cr2
= cc
[ac
- ido
- 1] + tr11
*tr2
+ tr12
*tr3
;
276 ci2
= cc
[ac
- ido
] + tr11
*ti2
+ tr12
*ti3
;
277 cr3
= cc
[ac
- ido
- 1] + tr12
*tr2
+ tr11
*tr3
;
278 ci3
= cc
[ac
- ido
] + tr12
*ti2
+ tr11
*ti3
;
279 cr5
= isign
*(ti11
*tr5
+ ti12
*tr4
);
280 ci5
= isign
*(ti11
*ti5
+ ti12
*ti4
);
281 cr4
= isign
*(ti12
*tr5
- ti11
*tr4
);
282 ci4
= isign
*(ti12
*ti5
- ti11
*ti4
);
283 ch
[ah
+ l1
*ido
] = cr2
- ci5
;
284 ch
[ah
+ 4*l1
*ido
] = cr2
+ ci5
;
285 ch
[ah
+ l1
*ido
+ 1] = ci2
+ cr5
;
286 ch
[ah
+ 2*l1
*ido
+ 1] = ci3
+ cr4
;
287 ch
[ah
+ 2*l1
*ido
] = cr3
- ci4
;
288 ch
[ah
+ 3*l1
*ido
] = cr3
+ ci4
;
289 ch
[ah
+ 3*l1
*ido
+ 1] = ci3
- cr4
;
290 ch
[ah
+ 4*l1
*ido
+ 1] = ci2
- cr5
;
295 for (k
=1; k
<=l1
; k
++)
297 for (i
=0; i
<ido
-1; i
+=2)
299 ac
= i
+ 1 + (k
*5 - 4)*ido
;
300 ti5
= cc
[ac
] - cc
[ac
+ 3*ido
];
301 ti2
= cc
[ac
] + cc
[ac
+ 3*ido
];
302 ti4
= cc
[ac
+ ido
] - cc
[ac
+ 2*ido
];
303 ti3
= cc
[ac
+ ido
] + cc
[ac
+ 2*ido
];
304 tr5
= cc
[ac
- 1] - cc
[ac
+ 3*ido
- 1];
305 tr2
= cc
[ac
- 1] + cc
[ac
+ 3*ido
- 1];
306 tr4
= cc
[ac
+ ido
- 1] - cc
[ac
+ 2*ido
- 1];
307 tr3
= cc
[ac
+ ido
- 1] + cc
[ac
+ 2*ido
- 1];
308 ah
= i
+ (k
- 1)*ido
;
309 ch
[ah
] = cc
[ac
- ido
- 1] + tr2
+ tr3
;
310 ch
[ah
+ 1] = cc
[ac
- ido
] + ti2
+ ti3
;
311 cr2
= cc
[ac
- ido
- 1] + tr11
*tr2
+ tr12
*tr3
;
312 ci2
= cc
[ac
- ido
] + tr11
*ti2
+ tr12
*ti3
;
313 cr3
= cc
[ac
- ido
- 1] + tr12
*tr2
+ tr11
*tr3
;
314 ci3
= cc
[ac
- ido
] + tr12
*ti2
+ tr11
*ti3
;
315 cr5
= isign
*(ti11
*tr5
+ ti12
*tr4
);
316 ci5
= isign
*(ti11
*ti5
+ ti12
*ti4
);
317 cr4
= isign
*(ti12
*tr5
- ti11
*tr4
);
318 ci4
= isign
*(ti12
*ti5
- ti11
*ti4
);
327 ch
[ah
+ l1
*ido
] = wa1
[i
]*dr2
- isign
*wa1
[i
+1]*di2
;
328 ch
[ah
+ l1
*ido
+ 1] = wa1
[i
]*di2
+ isign
*wa1
[i
+1]*dr2
;
329 ch
[ah
+ 2*l1
*ido
] = wa2
[i
]*dr3
- isign
*wa2
[i
+1]*di3
;
330 ch
[ah
+ 2*l1
*ido
+ 1] = wa2
[i
]*di3
+ isign
*wa2
[i
+1]*dr3
;
331 ch
[ah
+ 3*l1
*ido
] = wa3
[i
]*dr4
- isign
*wa3
[i
+1]*di4
;
332 ch
[ah
+ 3*l1
*ido
+ 1] = wa3
[i
]*di4
+ isign
*wa3
[i
+1]*dr4
;
333 ch
[ah
+ 4*l1
*ido
] = wa4
[i
]*dr5
- isign
*wa4
[i
+1]*di5
;
334 ch
[ah
+ 4*l1
*ido
+ 1] = wa4
[i
]*di5
+ isign
*wa4
[i
+1]*dr5
;
342 fftpack_passf(int * nac
,
352 int idij
, idlj
, idot
, ipph
, i
, j
, k
, l
, jc
, lc
, ik
, nt
, idj
, idl
, inc
,idp
;
361 for (j
=1; j
<ipph
; j
++)
366 for (i
=0; i
<ido
; i
++)
368 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (j
+ k
*ip
)*ido
] + cc
[i
+ (jc
+ k
*ip
)*ido
];
369 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (j
+ k
*ip
)*ido
] - cc
[i
+ (jc
+ k
*ip
)*ido
];
374 for (i
=0; i
<ido
; i
++)
375 ch
[i
+ k
*ido
] = cc
[i
+ k
*ip
*ido
];
379 for (j
=1; j
<ipph
; j
++)
382 for (i
=0; i
<ido
; i
++)
386 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (j
+ k
*ip
)*ido
] + cc
[i
+ (jc
+ k
*ip
)*ido
];
387 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (j
+ k
*ip
)*ido
] - cc
[i
+ (jc
+ k
*ip
)*ido
];
391 for (i
=0; i
<ido
; i
++)
393 ch
[i
+ k
*ido
] = cc
[i
+ k
*ip
*ido
];
398 for (l
=1; l
<ipph
; l
++)
402 for (ik
=0; ik
<idl1
; ik
++)
404 cc
[ik
+ l
*idl1
] = ch
[ik
] + wa
[idl
- 2]*ch
[ik
+ idl1
];
405 cc
[ik
+ lc
*idl1
] = isign
*wa
[idl
-1]*ch
[ik
+ (ip
-1)*idl1
];
409 for (j
=2; j
<ipph
; j
++)
413 if (idlj
> idp
) idlj
-= idp
;
416 for (ik
=0; ik
<idl1
; ik
++)
418 cc
[ik
+ l
*idl1
] += war
*ch
[ik
+ j
*idl1
];
419 cc
[ik
+ lc
*idl1
] += isign
*wai
*ch
[ik
+ jc
*idl1
];
423 for (j
=1; j
<ipph
; j
++)
424 for (ik
=0; ik
<idl1
; ik
++)
425 ch
[ik
] += ch
[ik
+ j
*idl1
];
426 for (j
=1; j
<ipph
; j
++)
429 for (ik
=1; ik
<idl1
; ik
+=2)
431 ch
[ik
- 1 + j
*idl1
] = cc
[ik
- 1 + j
*idl1
] - cc
[ik
+ jc
*idl1
];
432 ch
[ik
- 1 + jc
*idl1
] = cc
[ik
- 1 + j
*idl1
] + cc
[ik
+ jc
*idl1
];
433 ch
[ik
+ j
*idl1
] = cc
[ik
+ j
*idl1
] + cc
[ik
- 1 + jc
*idl1
];
434 ch
[ik
+ jc
*idl1
] = cc
[ik
+ j
*idl1
] - cc
[ik
- 1 + jc
*idl1
];
441 for (ik
=0; ik
<idl1
; ik
++)
449 cc
[(k
+ j
*l1
)*ido
+ 0] = ch
[(k
+ j
*l1
)*ido
+ 0];
450 cc
[(k
+ j
*l1
)*ido
+ 1] = ch
[(k
+ j
*l1
)*ido
+ 1];
459 for (i
=3; i
<ido
; i
+=2)
464 cc
[i
- 1 + (k
+ j
*l1
)*ido
] =
465 wa
[idij
- 2]*ch
[i
- 1 + (k
+ j
*l1
)*ido
] -
466 isign
*wa
[idij
-1]*ch
[i
+ (k
+ j
*l1
)*ido
];
467 cc
[i
+ (k
+ j
*l1
)*ido
] =
468 wa
[idij
- 2]*ch
[i
+ (k
+ j
*l1
)*ido
] +
469 isign
*wa
[idij
-1]*ch
[i
- 1 + (k
+ j
*l1
)*ido
];
480 for (k
= 0; k
< l1
; k
++)
483 for (i
=3; i
<ido
; i
+=2)
486 cc
[i
- 1 + (k
+ j
*l1
)*ido
] =
487 wa
[idij
- 2]*ch
[i
- 1 + (k
+ j
*l1
)*ido
] -
488 isign
*wa
[idij
-1]*ch
[i
+ (k
+ j
*l1
)*ido
];
489 cc
[i
+ (k
+ j
*l1
)*ido
] =
490 wa
[idij
- 2]*ch
[i
+ (k
+ j
*l1
)*ido
] +
491 isign
*wa
[idij
-1]*ch
[i
- 1 + (k
+ j
*l1
)*ido
];
501 fftpack_radf2(int ido
,
511 ch
[2*k
*ido
] = cc
[k
*ido
] + cc
[(k
+ l1
)*ido
];
512 ch
[(2*k
+1)*ido
+ ido
-1] = cc
[k
*ido
] - cc
[(k
+ l1
)*ido
];
520 for (i
=2; i
<ido
; i
+=2)
523 tr2
= wa1
[i
- 2]*cc
[i
-1 + (k
+ l1
)*ido
] + wa1
[i
- 1]*cc
[i
+ (k
+ l1
)*ido
];
524 ti2
= wa1
[i
- 2]*cc
[i
+ (k
+ l1
)*ido
] - wa1
[i
- 1]*cc
[i
-1 + (k
+ l1
)*ido
];
525 ch
[i
+ 2*k
*ido
] = cc
[i
+ k
*ido
] + ti2
;
526 ch
[ic
+ (2*k
+1)*ido
] = ti2
- cc
[i
+ k
*ido
];
527 ch
[i
- 1 + 2*k
*ido
] = cc
[i
- 1 + k
*ido
] + tr2
;
528 ch
[ic
- 1 + (2*k
+1)*ido
] = cc
[i
- 1 + k
*ido
] - tr2
;
536 ch
[(2*k
+1)*ido
] = -cc
[ido
-1 + (k
+ l1
)*ido
];
537 ch
[ido
-1 + 2*k
*ido
] = cc
[ido
-1 + k
*ido
];
543 fftpack_radb2(int ido
,
553 ch
[k
*ido
] = cc
[2*k
*ido
] + cc
[ido
-1 + (2*k
+1)*ido
];
554 ch
[(k
+ l1
)*ido
] = cc
[2*k
*ido
] - cc
[ido
-1 + (2*k
+1)*ido
];
560 for (k
= 0; k
< l1
; ++k
)
562 for (i
= 2; i
< ido
; i
+= 2)
565 ch
[i
-1 + k
*ido
] = cc
[i
-1 + 2*k
*ido
] + cc
[ic
-1 + (2*k
+1)*ido
];
566 tr2
= cc
[i
-1 + 2*k
*ido
] - cc
[ic
-1 + (2*k
+1)*ido
];
567 ch
[i
+ k
*ido
] = cc
[i
+ 2*k
*ido
] - cc
[ic
+ (2*k
+1)*ido
];
568 ti2
= cc
[i
+ (2*k
)*ido
] + cc
[ic
+ (2*k
+1)*ido
];
569 ch
[i
-1 + (k
+ l1
)*ido
] = wa1
[i
- 2]*tr2
- wa1
[i
- 1]*ti2
;
570 ch
[i
+ (k
+ l1
)*ido
] = wa1
[i
- 2]*ti2
+ wa1
[i
- 1]*tr2
;
576 for (k
= 0; k
< l1
; k
++)
578 ch
[ido
-1 + k
*ido
] = 2*cc
[ido
-1 + 2*k
*ido
];
579 ch
[ido
-1 + (k
+ l1
)*ido
] = -2*cc
[(2*k
+1)*ido
];
585 fftpack_radf3(int ido
,
592 const real taur
= -0.5;
593 const real taui
= 0.866025403784439;
595 real ci2
, di2
, di3
, cr2
, dr2
, dr3
, ti2
, ti3
, tr2
, tr3
;
599 cr2
= cc
[(k
+ l1
)*ido
] + cc
[(k
+ 2*l1
)*ido
];
600 ch
[3*k
*ido
] = cc
[k
*ido
] + cr2
;
601 ch
[(3*k
+2)*ido
] = taui
*(cc
[(k
+ l1
*2)*ido
] - cc
[(k
+ l1
)*ido
]);
602 ch
[ido
-1 + (3*k
+ 1)*ido
] = cc
[k
*ido
] + taur
*cr2
;
608 for (i
=2; i
<ido
; i
+=2)
611 dr2
= wa1
[i
- 2]*cc
[i
- 1 + (k
+ l1
)*ido
] +wa1
[i
- 1]*cc
[i
+ (k
+ l1
)*ido
];
612 di2
= wa1
[i
- 2]*cc
[i
+ (k
+ l1
)*ido
] - wa1
[i
- 1]*cc
[i
- 1 + (k
+ l1
)*ido
];
613 dr3
= wa2
[i
- 2]*cc
[i
- 1 + (k
+ l1
*2)*ido
] + wa2
[i
- 1]*cc
[i
+ (k
+ l1
*2)*ido
];
614 di3
= wa2
[i
- 2]*cc
[i
+ (k
+ l1
*2)*ido
] - wa2
[i
- 1]*cc
[i
- 1 + (k
+ l1
*2)*ido
];
617 ch
[i
- 1 + 3*k
*ido
] = cc
[i
- 1 + k
*ido
] + cr2
;
618 ch
[i
+ 3*k
*ido
] = cc
[i
+ k
*ido
] + ci2
;
619 tr2
= cc
[i
- 1 + k
*ido
] + taur
*cr2
;
620 ti2
= cc
[i
+ k
*ido
] + taur
*ci2
;
621 tr3
= taui
*(di2
- di3
);
622 ti3
= taui
*(dr3
- dr2
);
623 ch
[i
- 1 + (3*k
+ 2)*ido
] = tr2
+ tr3
;
624 ch
[ic
- 1 + (3*k
+ 1)*ido
] = tr2
- tr3
;
625 ch
[i
+ (3*k
+ 2)*ido
] = ti2
+ ti3
;
626 ch
[ic
+ (3*k
+ 1)*ido
] = ti3
- ti2
;
633 fftpack_radb3(int ido
,
640 const real taur
= -0.5;
641 const real taui
= 0.866025403784439;
643 real ci2
, ci3
, di2
, di3
, cr2
, cr3
, dr2
, dr3
, ti2
, tr2
;
647 tr2
= 2*cc
[ido
-1 + (3*k
+ 1)*ido
];
648 cr2
= cc
[3*k
*ido
] + taur
*tr2
;
649 ch
[k
*ido
] = cc
[3*k
*ido
] + tr2
;
650 ci3
= 2*taui
*cc
[(3*k
+ 2)*ido
];
651 ch
[(k
+ l1
)*ido
] = cr2
- ci3
;
652 ch
[(k
+ 2*l1
)*ido
] = cr2
+ ci3
;
659 for (i
=2; i
<ido
; i
+=2)
662 tr2
= cc
[i
- 1 + (3*k
+ 2)*ido
] + cc
[ic
- 1 + (3*k
+ 1)*ido
];
663 cr2
= cc
[i
- 1 + 3*k
*ido
] + taur
*tr2
;
664 ch
[i
- 1 + k
*ido
] = cc
[i
- 1 + 3*k
*ido
] + tr2
;
665 ti2
= cc
[i
+ (3*k
+ 2)*ido
]- cc
[ic
+ (3*k
+ 1)*ido
];
666 ci2
= cc
[i
+ 3*k
*ido
] + taur
*ti2
;
667 ch
[i
+ k
*ido
] = cc
[i
+ 3*k
*ido
] + ti2
;
668 cr3
= taui
*(cc
[i
- 1 + (3*k
+ 2)*ido
] - cc
[ic
- 1 + (3*k
+ 1)*ido
]);
669 ci3
= taui
*(cc
[i
+ (3*k
+ 2)*ido
] + cc
[ic
+ (3*k
+ 1)*ido
]);
674 ch
[i
- 1 + (k
+ l1
)*ido
] = wa1
[i
- 2]*dr2
- wa1
[i
- 1]*di2
;
675 ch
[i
+ (k
+ l1
)*ido
] = wa1
[i
- 2]*di2
+ wa1
[i
- 1]*dr2
;
676 ch
[i
- 1 + (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*dr3
- wa2
[i
- 1]*di3
;
677 ch
[i
+ (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*di3
+ wa2
[i
- 1]*dr3
;
684 fftpack_radf4(int ido
,
692 const real hsqt2
= 0.7071067811865475;
694 real ci2
, ci3
, ci4
, cr2
, cr3
, cr4
, ti1
, ti2
, ti3
, ti4
, tr1
, tr2
, tr3
, tr4
;
698 tr1
= cc
[(k
+ l1
)*ido
] + cc
[(k
+ 3*l1
)*ido
];
699 tr2
= cc
[k
*ido
] + cc
[(k
+ 2*l1
)*ido
];
700 ch
[4*k
*ido
] = tr1
+ tr2
;
701 ch
[ido
-1 + (4*k
+ 3)*ido
] = tr2
- tr1
;
702 ch
[ido
-1 + (4*k
+ 1)*ido
] = cc
[k
*ido
] - cc
[(k
+ 2*l1
)*ido
];
703 ch
[(4*k
+ 2)*ido
] = cc
[(k
+ 3*l1
)*ido
] - cc
[(k
+ l1
)*ido
];
711 for (i
=2; i
<ido
; i
+= 2)
714 cr2
= wa1
[i
- 2]*cc
[i
- 1 + (k
+ l1
)*ido
] + wa1
[i
- 1]*cc
[i
+ (k
+ l1
)*ido
];
715 ci2
= wa1
[i
- 2]*cc
[i
+ (k
+ l1
)*ido
] - wa1
[i
- 1]*cc
[i
- 1 + (k
+ l1
)*ido
];
716 cr3
= wa2
[i
- 2]*cc
[i
- 1 + (k
+ 2*l1
)*ido
] + wa2
[i
- 1]*cc
[i
+ (k
+ 2*l1
)*ido
];
717 ci3
= wa2
[i
- 2]*cc
[i
+ (k
+ 2*l1
)*ido
] - wa2
[i
- 1]*cc
[i
- 1 + (k
+ 2*l1
)*ido
];
718 cr4
= wa3
[i
- 2]*cc
[i
- 1 + (k
+ 3*l1
)*ido
] + wa3
[i
- 1]*cc
[i
+ (k
+ 3*l1
)*ido
];
719 ci4
= wa3
[i
- 2]*cc
[i
+ (k
+ 3*l1
)*ido
] - wa3
[i
- 1]*cc
[i
- 1 + (k
+ 3*l1
)*ido
];
724 ti2
= cc
[i
+ k
*ido
] + ci3
;
725 ti3
= cc
[i
+ k
*ido
] - ci3
;
726 tr2
= cc
[i
- 1 + k
*ido
] + cr3
;
727 tr3
= cc
[i
- 1 + k
*ido
] - cr3
;
728 ch
[i
- 1 + 4*k
*ido
] = tr1
+ tr2
;
729 ch
[ic
- 1 + (4*k
+ 3)*ido
] = tr2
- tr1
;
730 ch
[i
+ 4*k
*ido
] = ti1
+ ti2
;
731 ch
[ic
+ (4*k
+ 3)*ido
] = ti1
- ti2
;
732 ch
[i
- 1 + (4*k
+ 2)*ido
] = ti4
+ tr3
;
733 ch
[ic
- 1 + (4*k
+ 1)*ido
] = tr3
- ti4
;
734 ch
[i
+ (4*k
+ 2)*ido
] = tr4
+ ti3
;
735 ch
[ic
+ (4*k
+ 1)*ido
] = tr4
- ti3
;
743 ti1
= -hsqt2
*(cc
[ido
-1 + (k
+ l1
)*ido
] + cc
[ido
-1 + (k
+ 3*l1
)*ido
]);
744 tr1
= hsqt2
*(cc
[ido
-1 + (k
+ l1
)*ido
] - cc
[ido
-1 + (k
+ 3*l1
)*ido
]);
745 ch
[ido
-1 + 4*k
*ido
] = tr1
+ cc
[ido
-1 + k
*ido
];
746 ch
[ido
-1 + (4*k
+ 2)*ido
] = cc
[ido
-1 + k
*ido
] - tr1
;
747 ch
[(4*k
+ 1)*ido
] = ti1
- cc
[ido
-1 + (k
+ 2*l1
)*ido
];
748 ch
[(4*k
+ 3)*ido
] = ti1
+ cc
[ido
-1 + (k
+ 2*l1
)*ido
];
754 fftpack_radb4(int ido
,
762 const real sqrt2
= 1.414213562373095;
764 real ci2
, ci3
, ci4
, cr2
, cr3
, cr4
, ti1
, ti2
, ti3
, ti4
, tr1
, tr2
, tr3
, tr4
;
765 for (k
= 0; k
< l1
; k
++)
767 tr1
= cc
[4*k
*ido
] - cc
[ido
-1 + (4*k
+ 3)*ido
];
768 tr2
= cc
[4*k
*ido
] + cc
[ido
-1 + (4*k
+ 3)*ido
];
769 tr3
= cc
[ido
-1 + (4*k
+ 1)*ido
] + cc
[ido
-1 + (4*k
+ 1)*ido
];
770 tr4
= cc
[(4*k
+ 2)*ido
] + cc
[(4*k
+ 2)*ido
];
771 ch
[k
*ido
] = tr2
+ tr3
;
772 ch
[(k
+ l1
)*ido
] = tr1
- tr4
;
773 ch
[(k
+ 2*l1
)*ido
] = tr2
- tr3
;
774 ch
[(k
+ 3*l1
)*ido
] = tr1
+ tr4
;
780 for (k
= 0; k
< l1
; ++k
)
782 for (i
= 2; i
< ido
; i
+= 2)
785 ti1
= cc
[i
+ 4*k
*ido
] + cc
[ic
+ (4*k
+ 3)*ido
];
786 ti2
= cc
[i
+ 4*k
*ido
] - cc
[ic
+ (4*k
+ 3)*ido
];
787 ti3
= cc
[i
+ (4*k
+ 2)*ido
] - cc
[ic
+ (4*k
+ 1)*ido
];
788 tr4
= cc
[i
+ (4*k
+ 2)*ido
] + cc
[ic
+ (4*k
+ 1)*ido
];
789 tr1
= cc
[i
- 1 + 4*k
*ido
] - cc
[ic
- 1 + (4*k
+ 3)*ido
];
790 tr2
= cc
[i
- 1 + 4*k
*ido
] + cc
[ic
- 1 + (4*k
+ 3)*ido
];
791 ti4
= cc
[i
- 1 + (4*k
+ 2)*ido
] - cc
[ic
- 1 + (4*k
+ 1)*ido
];
792 tr3
= cc
[i
- 1 + (4*k
+ 2)*ido
] + cc
[ic
- 1 + (4*k
+ 1)*ido
];
793 ch
[i
- 1 + k
*ido
] = tr2
+ tr3
;
795 ch
[i
+ k
*ido
] = ti2
+ ti3
;
801 ch
[i
- 1 + (k
+ l1
)*ido
] = wa1
[i
- 2]*cr2
- wa1
[i
- 1]*ci2
;
802 ch
[i
+ (k
+ l1
)*ido
] = wa1
[i
- 2]*ci2
+ wa1
[i
- 1]*cr2
;
803 ch
[i
- 1 + (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*cr3
- wa2
[i
- 1]*ci3
;
804 ch
[i
+ (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*ci3
+ wa2
[i
- 1]*cr3
;
805 ch
[i
- 1 + (k
+ 3*l1
)*ido
] = wa3
[i
- 2]*cr4
- wa3
[i
- 1]*ci4
;
806 ch
[i
+ (k
+ 3*l1
)*ido
] = wa3
[i
- 2]*ci4
+ wa3
[i
- 1]*cr4
;
812 for (k
= 0; k
< l1
; k
++)
814 ti1
= cc
[(4*k
+ 1)*ido
] + cc
[(4*k
+ 3)*ido
];
815 ti2
= cc
[(4*k
+ 3)*ido
] - cc
[(4*k
+ 1)*ido
];
816 tr1
= cc
[ido
-1 + 4*k
*ido
] - cc
[ido
-1 + (4*k
+ 2)*ido
];
817 tr2
= cc
[ido
-1 + 4*k
*ido
] + cc
[ido
-1 + (4*k
+ 2)*ido
];
818 ch
[ido
-1 + k
*ido
] = tr2
+ tr2
;
819 ch
[ido
-1 + (k
+ l1
)*ido
] = sqrt2
*(tr1
- ti1
);
820 ch
[ido
-1 + (k
+ 2*l1
)*ido
] = ti2
+ ti2
;
821 ch
[ido
-1 + (k
+ 3*l1
)*ido
] = -sqrt2
*(tr1
+ ti1
);
827 fftpack_radf5(int ido
,
836 const real tr11
= 0.309016994374947;
837 const real ti11
= 0.951056516295154;
838 const real tr12
= -0.809016994374947;
839 const real ti12
= 0.587785252292473;
841 real ci2
, di2
, ci4
, ci5
, di3
, di4
, di5
, ci3
, cr2
, cr3
, dr2
, dr3
, dr4
, dr5
,
842 cr5
, cr4
, ti2
, ti3
, ti5
, ti4
, tr2
, tr3
, tr4
, tr5
;
844 for (k
= 0; k
< l1
; k
++)
846 cr2
= cc
[(k
+ 4*l1
)*ido
] + cc
[(k
+ l1
)*ido
];
847 ci5
= cc
[(k
+ 4*l1
)*ido
] - cc
[(k
+ l1
)*ido
];
848 cr3
= cc
[(k
+ 3*l1
)*ido
] + cc
[(k
+ 2*l1
)*ido
];
849 ci4
= cc
[(k
+ 3*l1
)*ido
] - cc
[(k
+ 2*l1
)*ido
];
850 ch
[5*k
*ido
] = cc
[k
*ido
] + cr2
+ cr3
;
851 ch
[ido
-1 + (5*k
+ 1)*ido
] = cc
[k
*ido
] + tr11
*cr2
+ tr12
*cr3
;
852 ch
[(5*k
+ 2)*ido
] = ti11
*ci5
+ ti12
*ci4
;
853 ch
[ido
-1 + (5*k
+ 3)*ido
] = cc
[k
*ido
] + tr12
*cr2
+ tr11
*cr3
;
854 ch
[(5*k
+ 4)*ido
] = ti12
*ci5
- ti11
*ci4
;
858 for (k
= 0; k
< l1
; ++k
)
860 for (i
= 2; i
< ido
; i
+= 2)
863 dr2
= wa1
[i
- 2]*cc
[i
- 1 + (k
+ l1
)*ido
] + wa1
[i
- 1]*cc
[i
+ (k
+ l1
)*ido
];
864 di2
= wa1
[i
- 2]*cc
[i
+ (k
+ l1
)*ido
] - wa1
[i
- 1]*cc
[i
- 1 + (k
+ l1
)*ido
];
865 dr3
= wa2
[i
- 2]*cc
[i
- 1 + (k
+ 2*l1
)*ido
] + wa2
[i
- 1]*cc
[i
+ (k
+ 2*l1
)*ido
];
866 di3
= wa2
[i
- 2]*cc
[i
+ (k
+ 2*l1
)*ido
] - wa2
[i
- 1]*cc
[i
- 1 + (k
+ 2*l1
)*ido
];
867 dr4
= wa3
[i
- 2]*cc
[i
- 1 + (k
+ 3*l1
)*ido
] + wa3
[i
- 1]*cc
[i
+ (k
+ 3*l1
)*ido
];
868 di4
= wa3
[i
- 2]*cc
[i
+ (k
+ 3*l1
)*ido
] - wa3
[i
- 1]*cc
[i
- 1 + (k
+ 3*l1
)*ido
];
869 dr5
= wa4
[i
- 2]*cc
[i
- 1 + (k
+ 4*l1
)*ido
] + wa4
[i
- 1]*cc
[i
+ (k
+ 4*l1
)*ido
];
870 di5
= wa4
[i
- 2]*cc
[i
+ (k
+ 4*l1
)*ido
] - wa4
[i
- 1]*cc
[i
- 1 + (k
+ 4*l1
)*ido
];
879 ch
[i
- 1 + 5*k
*ido
] = cc
[i
- 1 + k
*ido
] + cr2
+ cr3
;
880 ch
[i
+ 5*k
*ido
] = cc
[i
+ k
*ido
] + ci2
+ ci3
;
881 tr2
= cc
[i
- 1 + k
*ido
] + tr11
*cr2
+ tr12
*cr3
;
882 ti2
= cc
[i
+ k
*ido
] + tr11
*ci2
+ tr12
*ci3
;
883 tr3
= cc
[i
- 1 + k
*ido
] + tr12
*cr2
+ tr11
*cr3
;
884 ti3
= cc
[i
+ k
*ido
] + tr12
*ci2
+ tr11
*ci3
;
885 tr5
= ti11
*cr5
+ ti12
*cr4
;
886 ti5
= ti11
*ci5
+ ti12
*ci4
;
887 tr4
= ti12
*cr5
- ti11
*cr4
;
888 ti4
= ti12
*ci5
- ti11
*ci4
;
889 ch
[i
- 1 + (5*k
+ 2)*ido
] = tr2
+ tr5
;
890 ch
[ic
- 1 + (5*k
+ 1)*ido
] = tr2
- tr5
;
891 ch
[i
+ (5*k
+ 2)*ido
] = ti2
+ ti5
;
892 ch
[ic
+ (5*k
+ 1)*ido
] = ti5
- ti2
;
893 ch
[i
- 1 + (5*k
+ 4)*ido
] = tr3
+ tr4
;
894 ch
[ic
- 1 + (5*k
+ 3)*ido
] = tr3
- tr4
;
895 ch
[i
+ (5*k
+ 4)*ido
] = ti3
+ ti4
;
896 ch
[ic
+ (5*k
+ 3)*ido
] = ti4
- ti3
;
903 fftpack_radb5(int ido
,
912 const real tr11
= 0.309016994374947;
913 const real ti11
= 0.951056516295154;
914 const real tr12
= -0.809016994374947;
915 const real ti12
= 0.587785252292473;
918 real ci2
, ci3
, ci4
, ci5
, di3
, di4
, di5
, di2
, cr2
, cr3
, cr5
, cr4
, ti2
, ti3
,
919 ti4
, ti5
, dr3
, dr4
, dr5
, dr2
, tr2
, tr3
, tr4
, tr5
;
921 for (k
= 0; k
< l1
; k
++)
923 ti5
= 2*cc
[(5*k
+ 2)*ido
];
924 ti4
= 2*cc
[(5*k
+ 4)*ido
];
925 tr2
= 2*cc
[ido
-1 + (5*k
+ 1)*ido
];
926 tr3
= 2*cc
[ido
-1 + (5*k
+ 3)*ido
];
927 ch
[k
*ido
] = cc
[5*k
*ido
] + tr2
+ tr3
;
928 cr2
= cc
[5*k
*ido
] + tr11
*tr2
+ tr12
*tr3
;
929 cr3
= cc
[5*k
*ido
] + tr12
*tr2
+ tr11
*tr3
;
930 ci5
= ti11
*ti5
+ ti12
*ti4
;
931 ci4
= ti12
*ti5
- ti11
*ti4
;
932 ch
[(k
+ l1
)*ido
] = cr2
- ci5
;
933 ch
[(k
+ 2*l1
)*ido
] = cr3
- ci4
;
934 ch
[(k
+ 3*l1
)*ido
] = cr3
+ ci4
;
935 ch
[(k
+ 4*l1
)*ido
] = cr2
+ ci5
;
937 if (ido
== 1) return;
938 for (k
= 0; k
< l1
; ++k
)
940 for (i
= 2; i
< ido
; i
+= 2)
943 ti5
= cc
[i
+ (5*k
+ 2)*ido
] + cc
[ic
+ (5*k
+ 1)*ido
];
944 ti2
= cc
[i
+ (5*k
+ 2)*ido
] - cc
[ic
+ (5*k
+ 1)*ido
];
945 ti4
= cc
[i
+ (5*k
+ 4)*ido
] + cc
[ic
+ (5*k
+ 3)*ido
];
946 ti3
= cc
[i
+ (5*k
+ 4)*ido
] - cc
[ic
+ (5*k
+ 3)*ido
];
947 tr5
= cc
[i
- 1 + (5*k
+ 2)*ido
] - cc
[ic
- 1 + (5*k
+ 1)*ido
];
948 tr2
= cc
[i
- 1 + (5*k
+ 2)*ido
] + cc
[ic
- 1 + (5*k
+ 1)*ido
];
949 tr4
= cc
[i
- 1 + (5*k
+ 4)*ido
] - cc
[ic
- 1 + (5*k
+ 3)*ido
];
950 tr3
= cc
[i
- 1 + (5*k
+ 4)*ido
] + cc
[ic
- 1 + (5*k
+ 3)*ido
];
951 ch
[i
- 1 + k
*ido
] = cc
[i
- 1 + 5*k
*ido
] + tr2
+ tr3
;
952 ch
[i
+ k
*ido
] = cc
[i
+ 5*k
*ido
] + ti2
+ ti3
;
953 cr2
= cc
[i
- 1 + 5*k
*ido
] + tr11
*tr2
+ tr12
*tr3
;
954 ci2
= cc
[i
+ 5*k
*ido
] + tr11
*ti2
+ tr12
*ti3
;
955 cr3
= cc
[i
- 1 + 5*k
*ido
] + tr12
*tr2
+ tr11
*tr3
;
956 ci3
= cc
[i
+ 5*k
*ido
] + tr12
*ti2
+ tr11
*ti3
;
957 cr5
= ti11
*tr5
+ ti12
*tr4
;
958 ci5
= ti11
*ti5
+ ti12
*ti4
;
959 cr4
= ti12
*tr5
- ti11
*tr4
;
960 ci4
= ti12
*ti5
- ti11
*ti4
;
969 ch
[i
- 1 + (k
+ l1
)*ido
] = wa1
[i
- 2]*dr2
- wa1
[i
- 1]*di2
;
970 ch
[i
+ (k
+ l1
)*ido
] = wa1
[i
- 2]*di2
+ wa1
[i
- 1]*dr2
;
971 ch
[i
- 1 + (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*dr3
- wa2
[i
- 1]*di3
;
972 ch
[i
+ (k
+ 2*l1
)*ido
] = wa2
[i
- 2]*di3
+ wa2
[i
- 1]*dr3
;
973 ch
[i
- 1 + (k
+ 3*l1
)*ido
] = wa3
[i
- 2]*dr4
- wa3
[i
- 1]*di4
;
974 ch
[i
+ (k
+ 3*l1
)*ido
] = wa3
[i
- 2]*di4
+ wa3
[i
- 1]*dr4
;
975 ch
[i
- 1 + (k
+ 4*l1
)*ido
] = wa4
[i
- 2]*dr5
- wa4
[i
- 1]*di5
;
976 ch
[i
+ (k
+ 4*l1
)*ido
] = wa4
[i
- 2]*di5
+ wa4
[i
- 1]*dr5
;
983 fftpack_radfg(int ido
,
991 const real twopi
= 6.28318530717959;
992 int idij
, ipph
, i
, j
, k
, l
, j2
, ic
, jc
, lc
, ik
, is
, nbd
;
993 real dc2
, ai1
, ai2
, ar1
, ar2
, ds2
, dcp
, arg
, dsp
, ar1h
, ar2h
;
1001 for (ik
=0; ik
<idl1
; ik
++) ch
[ik
] = cc
[ik
];
1002 for (j
=1; j
<ip
; j
++)
1003 for (k
=0; k
<l1
; k
++)
1004 ch
[(k
+ j
*l1
)*ido
] = cc
[(k
+ j
*l1
)*ido
];
1008 for (j
=1; j
<ip
; j
++)
1012 for (i
=2; i
<ido
; i
+=2)
1015 for (k
=0; k
<l1
; k
++)
1017 ch
[i
- 1 + (k
+ j
*l1
)*ido
] =
1018 wa
[idij
- 1]*cc
[i
- 1 + (k
+ j
*l1
)*ido
] + wa
[idij
]*cc
[i
+ (k
+ j
*l1
)*ido
];
1019 ch
[i
+ (k
+ j
*l1
)*ido
] =
1020 wa
[idij
- 1]*cc
[i
+ (k
+ j
*l1
)*ido
] - wa
[idij
]*cc
[i
- 1 + (k
+ j
*l1
)*ido
];
1028 for (j
=1; j
<ip
; j
++)
1031 for (k
=0; k
<l1
; k
++)
1034 for (i
=2; i
<ido
; i
+=2)
1037 ch
[i
- 1 + (k
+ j
*l1
)*ido
] =
1038 wa
[idij
- 1]*cc
[i
- 1 + (k
+ j
*l1
)*ido
] + wa
[idij
]*cc
[i
+ (k
+ j
*l1
)*ido
];
1039 ch
[i
+ (k
+ j
*l1
)*ido
] =
1040 wa
[idij
- 1]*cc
[i
+ (k
+ j
*l1
)*ido
] - wa
[idij
]*cc
[i
- 1 + (k
+ j
*l1
)*ido
];
1047 for (j
=1; j
<ipph
; j
++)
1050 for (k
=0; k
<l1
; k
++)
1052 for (i
=2; i
<ido
; i
+=2)
1054 cc
[i
- 1 + (k
+ j
*l1
)*ido
] = ch
[i
- 1 + (k
+ j
*l1
)*ido
] + ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1055 cc
[i
- 1 + (k
+ jc
*l1
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] - ch
[i
+ (k
+ jc
*l1
)*ido
];
1056 cc
[i
+ (k
+ j
*l1
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] + ch
[i
+ (k
+ jc
*l1
)*ido
];
1057 cc
[i
+ (k
+ jc
*l1
)*ido
] = ch
[i
- 1 + (k
+ jc
*l1
)*ido
] - ch
[i
- 1 + (k
+ j
*l1
)*ido
];
1064 for (j
=1; j
<ipph
; j
++)
1067 for (i
=2; i
<ido
; i
+=2)
1069 for (k
=0; k
<l1
; k
++)
1071 cc
[i
- 1 + (k
+ j
*l1
)*ido
] =
1072 ch
[i
- 1 + (k
+ j
*l1
)*ido
] + ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1073 cc
[i
- 1 + (k
+ jc
*l1
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] - ch
[i
+ (k
+ jc
*l1
)*ido
];
1074 cc
[i
+ (k
+ j
*l1
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] + ch
[i
+ (k
+ jc
*l1
)*ido
];
1075 cc
[i
+ (k
+ jc
*l1
)*ido
] = ch
[i
- 1 + (k
+ jc
*l1
)*ido
] - ch
[i
- 1 + (k
+ j
*l1
)*ido
];
1083 for (ik
=0; ik
<idl1
; ik
++)
1086 for (j
=1; j
<ipph
; j
++)
1089 for (k
=0; k
<l1
; k
++)
1091 cc
[(k
+ j
*l1
)*ido
] = ch
[(k
+ j
*l1
)*ido
] + ch
[(k
+ jc
*l1
)*ido
];
1092 cc
[(k
+ jc
*l1
)*ido
] = ch
[(k
+ jc
*l1
)*ido
] - ch
[(k
+ j
*l1
)*ido
];
1098 for (l
=1; l
<ipph
; l
++)
1101 ar1h
= dcp
*ar1
- dsp
*ai1
;
1102 ai1
= dcp
*ai1
+ dsp
*ar1
;
1104 for (ik
=0; ik
<idl1
; ik
++)
1106 ch
[ik
+ l
*idl1
] = cc
[ik
] + ar1
*cc
[ik
+ idl1
];
1107 ch
[ik
+ lc
*idl1
] = ai1
*cc
[ik
+ (ip
-1)*idl1
];
1113 for (j
=2; j
<ipph
; j
++)
1116 ar2h
= dc2
*ar2
- ds2
*ai2
;
1117 ai2
= dc2
*ai2
+ ds2
*ar2
;
1119 for (ik
=0; ik
<idl1
; ik
++)
1121 ch
[ik
+ l
*idl1
] += ar2
*cc
[ik
+ j
*idl1
];
1122 ch
[ik
+ lc
*idl1
] += ai2
*cc
[ik
+ jc
*idl1
];
1126 for (j
=1; j
<ipph
; j
++)
1127 for (ik
=0; ik
<idl1
; ik
++)
1128 ch
[ik
] += cc
[ik
+ j
*idl1
];
1132 for (k
=0; k
<l1
; k
++)
1134 for (i
=0; i
<ido
; i
++)
1136 cc
[i
+ k
*ip
*ido
] = ch
[i
+ k
*ido
];
1142 for (i
=0; i
<ido
; i
++)
1144 for (k
=0; k
<l1
; k
++)
1146 cc
[i
+ k
*ip
*ido
] = ch
[i
+ k
*ido
];
1150 for (j
=1; j
<ipph
; j
++)
1154 for (k
=0; k
<l1
; k
++)
1156 cc
[ido
-1 + (j2
- 1 + k
*ip
)*ido
] = ch
[(k
+ j
*l1
)*ido
];
1157 cc
[(j2
+ k
*ip
)*ido
] = ch
[(k
+ jc
*l1
)*ido
];
1160 if (ido
== 1) return;
1163 for (j
=1; j
<ipph
; j
++)
1167 for (k
=0; k
<l1
; k
++)
1169 for (i
=2; i
<ido
; i
+=2)
1172 cc
[i
- 1 + (j2
+ k
*ip
)*ido
] = ch
[i
- 1 + (k
+ j
*l1
)*ido
] + ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1173 cc
[ic
- 1 + (j2
- 1 + k
*ip
)*ido
] = ch
[i
- 1 + (k
+ j
*l1
)*ido
] - ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1174 cc
[i
+ (j2
+ k
*ip
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] + ch
[i
+ (k
+ jc
*l1
)*ido
];
1175 cc
[ic
+ (j2
- 1 + k
*ip
)*ido
] = ch
[i
+ (k
+ jc
*l1
)*ido
] - ch
[i
+ (k
+ j
*l1
)*ido
];
1182 for (j
=1; j
<ipph
; j
++)
1186 for (i
=2; i
<ido
; i
+=2)
1189 for (k
=0; k
<l1
; k
++)
1191 cc
[i
- 1 + (j2
+ k
*ip
)*ido
] = ch
[i
- 1 + (k
+ j
*l1
)*ido
] + ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1192 cc
[ic
- 1 + (j2
- 1 + k
*ip
)*ido
] = ch
[i
- 1 + (k
+ j
*l1
)*ido
] - ch
[i
- 1 + (k
+ jc
*l1
)*ido
];
1193 cc
[i
+ (j2
+ k
*ip
)*ido
] = ch
[i
+ (k
+ j
*l1
)*ido
] + ch
[i
+ (k
+ jc
*l1
)*ido
];
1194 cc
[ic
+ (j2
- 1 + k
*ip
)*ido
] = ch
[i
+ (k
+ jc
*l1
)*ido
] - ch
[i
+ (k
+ j
*l1
)*ido
];
1203 fftpack_radbg(int ido
,
1211 const real twopi
= 6.28318530717959;
1212 int idij
, ipph
, i
, j
, k
, l
, j2
, ic
, jc
, lc
, ik
, is
;
1213 real dc2
, ai1
, ai2
, ar1
, ar2
, ds2
;
1215 real dcp
, arg
, dsp
, ar1h
, ar2h
;
1219 nbd
= (ido
- 1) / 2;
1220 ipph
= (ip
+ 1) / 2;
1224 for (k
=0; k
<l1
; k
++)
1226 for (i
=0; i
<ido
; i
++)
1228 ch
[i
+ k
*ido
] = cc
[i
+ k
*ip
*ido
];
1234 for (i
=0; i
<ido
; i
++)
1236 for (k
=0; k
<l1
; k
++)
1238 ch
[i
+ k
*ido
] = cc
[i
+ k
*ip
*ido
];
1242 for (j
=1; j
<ipph
; j
++)
1246 for (k
=0; k
<l1
; k
++)
1248 ch
[(k
+ j
*l1
)*ido
] = cc
[ido
-1 + (j2
- 1 + k
*ip
)*ido
] + cc
[ido
-1 + (j2
- 1 + k
*ip
)*ido
];
1249 ch
[(k
+ jc
*l1
)*ido
] = cc
[(j2
+ k
*ip
)*ido
] + cc
[(j2
+ k
*ip
)*ido
];
1257 for (j
=1; j
<ipph
; j
++)
1260 for (k
=0; k
<l1
; k
++)
1262 for (i
=2; i
<ido
; i
+=2)
1265 ch
[i
- 1 + (k
+ j
*l1
)*ido
] = cc
[i
- 1 + (2*j
+ k
*ip
)*ido
] + cc
[ic
- 1 + (2*j
- 1 + k
*ip
)*ido
];
1266 ch
[i
- 1 + (k
+ jc
*l1
)*ido
] = cc
[i
- 1 + (2*j
+ k
*ip
)*ido
] - cc
[ic
- 1 + (2*j
- 1 + k
*ip
)*ido
];
1267 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (2*j
+ k
*ip
)*ido
] - cc
[ic
+ (2*j
- 1 + k
*ip
)*ido
];
1268 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (2*j
+ k
*ip
)*ido
] + cc
[ic
+ (2*j
- 1 + k
*ip
)*ido
];
1275 for (j
=1; j
<ipph
; j
++)
1278 for (i
=2; i
<ido
; i
+=2)
1281 for (k
=0; k
<l1
; k
++)
1283 ch
[i
- 1 + (k
+ j
*l1
)*ido
] = cc
[i
- 1 + (2*j
+ k
*ip
)*ido
] + cc
[ic
- 1 + (2*j
- 1 + k
*ip
)*ido
];
1284 ch
[i
- 1 + (k
+ jc
*l1
)*ido
] = cc
[i
- 1 + (2*j
+ k
*ip
)*ido
] - cc
[ic
- 1 + (2*j
- 1 + k
*ip
)*ido
];
1285 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (2*j
+ k
*ip
)*ido
] - cc
[ic
+ (2*j
- 1 + k
*ip
)*ido
];
1286 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (2*j
+ k
*ip
)*ido
] + cc
[ic
+ (2*j
- 1 + k
*ip
)*ido
];
1295 for (l
=1; l
<ipph
; l
++)
1298 ar1h
= dcp
*ar1
- dsp
*ai1
;
1299 ai1
= dcp
*ai1
+ dsp
*ar1
;
1301 for (ik
=0; ik
<idl1
; ik
++)
1303 cc
[ik
+ l
*idl1
] = ch
[ik
] + ar1
*ch
[ik
+ idl1
];
1304 cc
[ik
+ lc
*idl1
] = ai1
*ch
[ik
+ (ip
-1)*idl1
];
1310 for (j
=2; j
<ipph
; j
++)
1313 ar2h
= dc2
*ar2
- ds2
*ai2
;
1314 ai2
= dc2
*ai2
+ ds2
*ar2
;
1316 for (ik
=0; ik
<idl1
; ik
++)
1318 cc
[ik
+ l
*idl1
] += ar2
*ch
[ik
+ j
*idl1
];
1319 cc
[ik
+ lc
*idl1
] += ai2
*ch
[ik
+ jc
*idl1
];
1323 for (j
=1; j
<ipph
; j
++)
1325 for (ik
=0; ik
<idl1
; ik
++)
1327 ch
[ik
] += ch
[ik
+ j
*idl1
];
1330 for (j
=1; j
<ipph
; j
++)
1333 for (k
=0; k
<l1
; k
++)
1335 ch
[(k
+ j
*l1
)*ido
] = cc
[(k
+ j
*l1
)*ido
] - cc
[(k
+ jc
*l1
)*ido
];
1336 ch
[(k
+ jc
*l1
)*ido
] = cc
[(k
+ j
*l1
)*ido
] + cc
[(k
+ jc
*l1
)*ido
];
1340 if (ido
== 1) return;
1343 for (j
=1; j
<ipph
; j
++)
1346 for (k
=0; k
<l1
; k
++)
1348 for (i
=2; i
<ido
; i
+=2)
1350 ch
[i
- 1 + (k
+ j
*l1
)*ido
] = cc
[i
- 1 + (k
+ j
*l1
)*ido
] - cc
[i
+ (k
+ jc
*l1
)*ido
];
1351 ch
[i
- 1 + (k
+ jc
*l1
)*ido
] = cc
[i
- 1 + (k
+ j
*l1
)*ido
] + cc
[i
+ (k
+ jc
*l1
)*ido
];
1352 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (k
+ j
*l1
)*ido
] + cc
[i
- 1 + (k
+ jc
*l1
)*ido
];
1353 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (k
+ j
*l1
)*ido
] - cc
[i
- 1 + (k
+ jc
*l1
)*ido
];
1360 for (j
=1; j
<ipph
; j
++)
1363 for (i
=2; i
<ido
; i
+=2)
1365 for (k
=0; k
<l1
; k
++)
1367 ch
[i
- 1 + (k
+ j
*l1
)*ido
] = cc
[i
- 1 + (k
+ j
*l1
)*ido
] - cc
[i
+ (k
+ jc
*l1
)*ido
];
1368 ch
[i
- 1 + (k
+ jc
*l1
)*ido
] = cc
[i
- 1 + (k
+ j
*l1
)*ido
] + cc
[i
+ (k
+ jc
*l1
)*ido
];
1369 ch
[i
+ (k
+ j
*l1
)*ido
] = cc
[i
+ (k
+ j
*l1
)*ido
] + cc
[i
- 1 + (k
+ jc
*l1
)*ido
];
1370 ch
[i
+ (k
+ jc
*l1
)*ido
] = cc
[i
+ (k
+ j
*l1
)*ido
] - cc
[i
- 1 + (k
+ jc
*l1
)*ido
];
1375 for (ik
=0; ik
<idl1
; ik
++)
1379 for (j
=1; j
<ip
; j
++)
1380 for (k
=0; k
<l1
; k
++)
1381 cc
[(k
+ j
*l1
)*ido
] = ch
[(k
+ j
*l1
)*ido
];
1386 for (j
=1; j
<ip
; j
++)
1390 for (i
=2; i
<ido
; i
+=2)
1393 for (k
=0; k
<l1
; k
++)
1395 cc
[i
- 1 + (k
+ j
*l1
)*ido
] = wa
[idij
- 1]*ch
[i
- 1 + (k
+ j
*l1
)*ido
] - wa
[idij
]*ch
[i
+ (k
+ j
*l1
)*ido
];
1396 cc
[i
+ (k
+ j
*l1
)*ido
] = wa
[idij
- 1]*ch
[i
+ (k
+ j
*l1
)*ido
] + wa
[idij
]*ch
[i
- 1 + (k
+ j
*l1
)*ido
];
1404 for (j
=1; j
<ip
; j
++)
1407 for (k
=0; k
<l1
; k
++)
1410 for (i
=2; i
<ido
; i
+=2)
1413 cc
[i
- 1 + (k
+ j
*l1
)*ido
] = wa
[idij
-1]*ch
[i
- 1 + (k
+ j
*l1
)*ido
] - wa
[idij
]*ch
[i
+ (k
+ j
*l1
)*ido
];
1414 cc
[i
+ (k
+ j
*l1
)*ido
] = wa
[idij
-1]*ch
[i
+ (k
+ j
*l1
)*ido
] + wa
[idij
]*ch
[i
- 1 + (k
+ j
*l1
)*ido
];
1424 fftpack_cfftf1(int n
,
1433 int na
, nf
, ip
, iw
, ix2
, ix3
, ix4
, nac
, ido
, idl1
;
1434 real
*cinput
, *coutput
;
1440 for (k1
=2; k1
<=nf
+1; k1
++)
1462 fftpack_passf4(idot
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
], isign
);
1466 fftpack_passf2(idot
, l1
, cinput
, coutput
, &wa
[iw
], isign
);
1471 fftpack_passf3(idot
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], isign
);
1478 fftpack_passf5(idot
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
], &wa
[ix4
], isign
);
1482 fftpack_passf(&nac
, idot
, ip
, l1
, idl1
, cinput
, coutput
, &wa
[iw
], isign
);
1483 if (nac
!= 0) na
= !na
;
1486 iw
+= (ip
- 1)*idot
;
1490 for (i
=0; i
<2*n
; i
++)
1496 fftpack_cfftf(int n
,
1506 fftpack_cfftf1(n
, c
, wsave
, wsave
+iw1
, (int*)(wsave
+iw2
), -1);
1511 fftpack_cfftb(int n
,
1521 fftpack_cfftf1(n
, c
, wsave
, wsave
+iw1
, (int*)(wsave
+iw2
), +1);
1526 fftpack_factorize(int n
,
1529 static const int ntryh
[4] = { 3,4,2,5 };
1530 int ntry
=3, i
, j
=0, ib
, nf
=0, nl
=n
, nq
, nr
;
1542 if (nr
!= 0) goto startloop
;
1544 ifac
[nf
+ 1] = ntry
;
1546 if (ntry
== 2 && nf
!= 1)
1548 for (i
=2; i
<=nf
; i
++)
1551 ifac
[ib
+ 1] = ifac
[ib
];
1563 fftpack_cffti1(int n
,
1567 const real twopi
= 6.28318530717959;
1568 real arg
, argh
, argld
, fi
;
1574 fftpack_factorize(n
,ifac
);
1576 argh
= twopi
/(real
)n
;
1579 for (k1
=1; k1
<=nf
; k1
++)
1585 idot
= ido
+ ido
+ 2;
1587 for (j
=1; j
<=ipm
; j
++)
1595 for (ii
=4; ii
<=idot
; ii
+=2)
1617 fftpack_rfftf1(int n
,
1624 int k1
, l1
, l2
, na
, kh
, nf
, ip
, iw
, ix2
, ix3
, ix4
, ido
, idl1
;
1625 real
*cinput
, *coutput
;
1630 for (k1
= 1; k1
<= nf
; ++k1
)
1654 fftpack_radf4(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
]);
1657 fftpack_radf2(ido
, l1
, cinput
, coutput
, &wa
[iw
]);
1661 fftpack_radf3(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
]);
1667 fftpack_radf5(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
], &wa
[ix4
]);
1674 fftpack_radfg(ido
, ip
, l1
, idl1
, c
, ch
, &wa
[iw
]);
1679 fftpack_radfg(ido
, ip
, l1
, idl1
, ch
, c
, &wa
[iw
]);
1687 for (i
= 0; i
< n
; i
++)
1693 fftpack_rfftb1(int n
,
1700 int k1
, l1
, l2
, na
, nf
, ip
, iw
, ix2
, ix3
, ix4
, ido
, idl1
;
1701 real
*cinput
, *coutput
;
1707 for (k1
=1; k1
<=nf
; k1
++)
1728 fftpack_radb4(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
]);
1732 fftpack_radb2(ido
, l1
, cinput
, coutput
, &wa
[iw
]);
1737 fftpack_radb3(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
]);
1744 fftpack_radb5(ido
, l1
, cinput
, coutput
, &wa
[iw
], &wa
[ix2
], &wa
[ix3
], &wa
[ix4
]);
1748 fftpack_radbg(ido
, ip
, l1
, idl1
, cinput
, coutput
, &wa
[iw
]);
1749 if (ido
== 1) na
= !na
;
1764 fftpack_rffti1(int n
,
1768 const real twopi
= 6.28318530717959;
1769 real arg
, argh
, argld
, fi
;
1772 int ld
, ii
, nf
, ip
, is
;
1774 fftpack_factorize(n
,ifac
);
1780 if (nfm1
== 0) return;
1781 for (k1
= 1; k1
<= nfm1
; k1
++)
1788 for (j
= 1; j
<= ipm
; ++j
)
1792 argld
= (real
) ld
*argh
;
1794 for (ii
= 3; ii
<= ido
; ii
+= 2)
1799 wa
[i
- 2] = cos(arg
);
1800 wa
[i
- 1] = sin(arg
);
1811 /* End of fftpack - begin GROMACS code */
1815 gmx_fft_init_1d(gmx_fft_t
* pfft
,
1823 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
1828 if( (fft
= (struct gmx_fft
*)malloc(sizeof(struct gmx_fft
))) == NULL
)
1836 /* Need 4*n storage for 1D complex FFT */
1837 if( (fft
->work
= (real
*)malloc(sizeof(real
)*(4*nx
))) == NULL
)
1844 fftpack_cffti1(nx
,fft
->work
,fft
->ifac
);
1853 gmx_fft_init_1d_real(gmx_fft_t
* pfft
,
1861 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
1866 if( (fft
= (struct gmx_fft
*)malloc(sizeof(struct gmx_fft
))) == NULL
)
1874 /* Need 2*n storage for 1D real FFT */
1875 if((fft
->work
= (real
*)malloc(sizeof(real
)*(2*nx
)))==NULL
)
1882 fftpack_rffti1(nx
,fft
->work
,fft
->ifac
);
1891 gmx_fft_init_2d(gmx_fft_t
* pfft
,
1901 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
1906 /* Create the X transform */
1907 if( (rc
= gmx_fft_init_1d(&fft
,nx
,flags
)) != 0)
1912 /* Create Y transform as a link from X */
1913 if( (rc
=gmx_fft_init_1d(&(fft
->next
),ny
,flags
)) != 0)
1925 gmx_fft_init_2d_real(gmx_fft_t
* pfft
,
1931 int nyc
= (ny
/2 + 1);
1936 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
1941 /* Create the X transform */
1942 if( (fft
= (struct gmx_fft
*)malloc(sizeof(struct gmx_fft
))) == NULL
)
1949 /* Need 4*nx storage for 1D complex FFT, and another
1950 * 2*nx*nyc elements for complex-to-real storage in our high-level routine.
1952 if( (fft
->work
= (real
*)malloc(sizeof(real
)*(4*nx
+2*nx
*nyc
))) == NULL
)
1957 fftpack_cffti1(nx
,fft
->work
,fft
->ifac
);
1959 /* Create real Y transform as a link from X */
1960 if( (rc
=gmx_fft_init_1d_real(&(fft
->next
),ny
,flags
)) != 0)
1972 gmx_fft_init_3d(gmx_fft_t
* pfft
,
1983 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
1988 /* Create the X transform */
1990 if( (fft
= (struct gmx_fft
*)malloc(sizeof(struct gmx_fft
))) == NULL
)
1997 /* Need 4*nx storage for 1D complex FFT, and another
1998 * 2*nz elements for gmx_fft_transpose_2d_nelem() storage.
2000 if( (fft
->work
= (real
*)malloc(sizeof(real
)*(4*nx
+2*nz
))) == NULL
)
2006 fftpack_cffti1(nx
,fft
->work
,fft
->ifac
);
2009 /* Create 2D Y/Z transforms as a link from X */
2010 if( (rc
=gmx_fft_init_2d(&(fft
->next
),ny
,nz
,flags
)) != 0)
2022 gmx_fft_init_3d_real(gmx_fft_t
* pfft
,
2029 int nzc
= (nz
/2 + 1);
2034 gmx_fatal(FARGS
,"Invalid FFT opaque type pointer.");
2039 /* Create the X transform */
2040 if( (fft
= (struct gmx_fft
*)malloc(sizeof(struct gmx_fft
))) == NULL
)
2047 /* Need 4*nx storage for 1D complex FFT, another
2048 * 2*nx*ny*nzc elements to copy the entire 3D matrix when
2049 * doing out-of-place complex-to-real FFTs, and finally
2050 * 2*nzc elements for transpose work space.
2052 if( (fft
->work
= (real
*)malloc(sizeof(real
)*(4*nx
+2*nx
*ny
*nzc
+2*nzc
))) == NULL
)
2057 fftpack_cffti1(nx
,fft
->work
,fft
->ifac
);
2059 /* Create 2D real Y/Z transform as a link from X */
2060 if( (rc
=gmx_fft_init_2d_real(&(fft
->next
),ny
,nz
,flags
)) != 0)
2072 gmx_fft_1d (gmx_fft_t fft
,
2073 enum gmx_fft_direction dir
,
2085 p1
= (real
*)in_data
;
2086 p2
= (real
*)out_data
;
2091 /* FFTPACK only does in-place transforms, so emulate out-of-place
2092 * by copying data to the output array first.
2094 if( in_data
!= out_data
)
2096 p1
= (real
*)in_data
;
2097 p2
= (real
*)out_data
;
2099 /* n complex = 2*n real elements */
2106 /* Elements 0 .. 2*n-1 in work are used for ffac values,
2107 * Elements 2*n .. 4*n-1 are internal FFTPACK work space.
2110 if(dir
== GMX_FFT_FORWARD
)
2112 fftpack_cfftf1(n
,(real
*)out_data
,fft
->work
+2*n
,fft
->work
,fft
->ifac
, -1);
2114 else if(dir
== GMX_FFT_BACKWARD
)
2116 fftpack_cfftf1(n
,(real
*)out_data
,fft
->work
+2*n
,fft
->work
,fft
->ifac
, 1);
2120 gmx_fatal(FARGS
,"FFT plan mismatch - bad plan or direction.");
2130 gmx_fft_1d_real (gmx_fft_t fft
,
2131 enum gmx_fft_direction dir
,
2143 p1
= (real
*)in_data
;
2144 p2
= (real
*)out_data
;
2146 if(dir
== GMX_FFT_REAL_TO_COMPLEX
)
2150 if(dir
== GMX_FFT_REAL_TO_COMPLEX
)
2152 /* FFTPACK only does in-place transforms, so emulate out-of-place
2153 * by copying data to the output array first. This works fine, since
2154 * the complex array must be larger than the real.
2156 if( in_data
!= out_data
)
2158 p1
= (real
*)in_data
;
2159 p2
= (real
*)out_data
;
2161 for(i
=0;i
<2*(n
/2+1);i
++)
2167 /* Elements 0 .. n-1 in work are used for ffac values,
2168 * Elements n .. 2*n-1 are internal FFTPACK work space.
2170 fftpack_rfftf1(n
,(real
*)out_data
,fft
->work
+n
,fft
->work
,fft
->ifac
);
2173 * FFTPACK has a slightly more compact storage than we, time to
2174 * convert it: ove most of the array one step up to make room for
2175 * zero imaginary parts.
2177 p2
= (real
*)out_data
;
2182 /* imaginary zero freq. */
2186 if( (n
& 0x1) == 0 )
2192 else if(dir
== GMX_FFT_COMPLEX_TO_REAL
)
2194 /* FFTPACK only does in-place transforms, and we cannot just copy
2195 * input to output first here since our real array is smaller than
2196 * the complex one. However, since the FFTPACK complex storage format
2197 * is more compact than ours (2 reals) it will fit, so compact it
2198 * and copy on-the-fly to the output array.
2200 p1
= (real
*) in_data
;
2201 p2
= (real
*)out_data
;
2208 fftpack_rfftb1(n
,(real
*)out_data
,fft
->work
+n
,fft
->work
,fft
->ifac
);
2212 gmx_fatal(FARGS
,"FFT plan mismatch - bad plan or direction.");
2221 gmx_fft_2d (gmx_fft_t fft
,
2222 enum gmx_fft_direction dir
,
2232 /* FFTPACK only does in-place transforms, so emulate out-of-place
2233 * by copying data to the output array first.
2234 * For 2D there is likely enough data to benefit from memcpy().
2236 if( in_data
!= out_data
)
2238 memcpy(out_data
,in_data
,sizeof(t_complex
)*nx
*ny
);
2241 /* Much easier to do pointer arithmetic when base has the correct type */
2242 data
= (t_complex
*)out_data
;
2247 gmx_fft_1d(fft
->next
,dir
,data
+i
*ny
,data
+i
*ny
);
2250 /* Transpose in-place to get data in place for x transform now */
2251 gmx_fft_transpose_2d(data
,data
,nx
,ny
);
2256 gmx_fft_1d(fft
,dir
,data
+i
*nx
,data
+i
*nx
);
2259 /* Transpose in-place to get data back in original order */
2260 gmx_fft_transpose_2d(data
,data
,ny
,nx
);
2268 gmx_fft_2d_real (gmx_fft_t fft
,
2269 enum gmx_fft_direction dir
,
2281 /* Number of complex elements in y direction */
2284 work
= fft
->work
+4*nx
;
2286 if(dir
==GMX_FFT_REAL_TO_COMPLEX
)
2288 /* If we are doing an in-place transform the 2D array is already
2289 * properly padded by the user, and we are all set.
2291 * For out-of-place there is no array padding, but FFTPACK only
2292 * does in-place FFTs internally, so we need to start by copying
2293 * data from the input to the padded (larger) output array.
2295 if( in_data
!= out_data
)
2297 p1
= (real
*)in_data
;
2298 p2
= (real
*)out_data
;
2304 p2
[i
*nyc
*2+j
] = p1
[i
*ny
+j
];
2308 data
= (t_complex
*)out_data
;
2310 /* y real-to-complex FFTs */
2313 gmx_fft_1d_real(fft
->next
,GMX_FFT_REAL_TO_COMPLEX
,data
+i
*nyc
,data
+i
*nyc
);
2316 /* Transform to get X data in place */
2317 gmx_fft_transpose_2d(data
,data
,nx
,nyc
);
2319 /* Complex-to-complex X FFTs */
2322 gmx_fft_1d(fft
,GMX_FFT_FORWARD
,data
+i
*nx
,data
+i
*nx
);
2325 /* Transpose back */
2326 gmx_fft_transpose_2d(data
,data
,nyc
,nx
);
2329 else if(dir
==GMX_FFT_COMPLEX_TO_REAL
)
2331 /* An in-place complex-to-real transform is straightforward,
2332 * since the output array must be large enough for the padding to fit.
2334 * For out-of-place complex-to-real transforms we cannot just copy
2335 * data to the output array, since it is smaller than the input.
2336 * In this case there's nothing to do but employing temporary work data,
2337 * starting at work+4*nx and using nx*nyc*2 elements.
2339 if(in_data
!= out_data
)
2341 memcpy(work
,in_data
,sizeof(t_complex
)*nx
*nyc
);
2342 data
= (t_complex
*)work
;
2347 data
= (t_complex
*)out_data
;
2350 /* Transpose to get X arrays */
2351 gmx_fft_transpose_2d(data
,data
,nx
,nyc
);
2356 gmx_fft_1d(fft
,GMX_FFT_BACKWARD
,data
+i
*nx
,data
+i
*nx
);
2359 /* Transpose to get Y arrays */
2360 gmx_fft_transpose_2d(data
,data
,nyc
,nx
);
2365 gmx_fft_1d_real(fft
->next
,GMX_FFT_COMPLEX_TO_REAL
,data
+i
*nyc
,data
+i
*nyc
);
2368 if( in_data
!= out_data
)
2370 /* Output (pointed to by data) is now in padded format.
2371 * Pack it into out_data if we were doing an out-of-place transform.
2374 p2
= (real
*)out_data
;
2380 p2
[i
*ny
+j
] = p1
[i
*nyc
*2+j
];
2387 gmx_fatal(FARGS
,"FFT plan mismatch - bad plan or direction.");
2397 gmx_fft_3d (gmx_fft_t fft
,
2398 enum gmx_fft_direction dir
,
2407 nz
=fft
->next
->next
->n
;
2409 /* First 4*nx positions are FFTPACK workspace, then ours starts */
2410 work
= (t_complex
*)(fft
->work
+4*nx
);
2412 /* FFTPACK only does in-place transforms, so emulate out-of-place
2413 * by copying data to the output array first.
2414 * For 3D there is likely enough data to benefit from memcpy().
2416 if( in_data
!= out_data
)
2418 memcpy(out_data
,in_data
,sizeof(t_complex
)*nx
*ny
*nz
);
2421 /* Much easier to do pointer arithmetic when base has the correct type */
2422 data
= (t_complex
*)out_data
;
2424 /* Perform z transforms */
2425 for(i
=0;i
<nx
*ny
;i
++)
2426 gmx_fft_1d(fft
->next
->next
,dir
,data
+i
*nz
,data
+i
*nz
);
2428 /* For each X slice, transpose the y & z dimensions inside the slice */
2431 gmx_fft_transpose_2d(data
+i
*ny
*nz
,data
+i
*ny
*nz
,ny
,nz
);
2434 /* Array is now (nx,nz,ny) - perform y transforms */
2435 for(i
=0;i
<nx
*nz
;i
++)
2437 gmx_fft_1d(fft
->next
,dir
,data
+i
*ny
,data
+i
*ny
);
2440 /* Transpose back to (nx,ny,nz) */
2443 gmx_fft_transpose_2d(data
+i
*ny
*nz
,data
+i
*ny
*nz
,nz
,ny
);
2446 /* Transpose entire x & y slices to go from
2447 * (nx,ny,nz) to (ny,nx,nz).
2448 * Use work data elements 4*n .. 4*n+2*nz-1.
2450 rc
=gmx_fft_transpose_2d_nelem(data
,data
,nx
,ny
,nz
,work
);
2453 gmx_fatal(FARGS
,"Cannot transpose X & Y/Z in gmx_fft_3d().");
2457 /* Then go from (ny,nx,nz) to (ny,nz,nx) */
2460 gmx_fft_transpose_2d(data
+i
*nx
*nz
,data
+i
*nx
*nz
,nx
,nz
);
2463 /* Perform x transforms */
2464 for(i
=0;i
<ny
*nz
;i
++)
2466 gmx_fft_1d(fft
,dir
,data
+i
*nx
,data
+i
*nx
);
2469 /* Transpose back from (ny,nz,nx) to (ny,nx,nz) */
2472 gmx_fft_transpose_2d(data
+i
*nz
*nx
,data
+i
*nz
*nx
,nz
,nx
);
2475 /* Transpose from (ny,nx,nz) to (nx,ny,nz)
2476 * Use work data elements 4*n .. 4*n+2*nz-1.
2478 rc
= gmx_fft_transpose_2d_nelem(data
,data
,ny
,nx
,nz
,work
);
2481 gmx_fatal(FARGS
,"Cannot transpose Y/Z & X in gmx_fft_3d().");
2490 gmx_fft_3d_real (gmx_fft_t fft
,
2491 enum gmx_fft_direction dir
,
2496 int nx
,ny
,nz
,nzc
,rc
;
2498 t_complex
* work_transp
;
2499 t_complex
* work_c2r
;
2505 nz
=fft
->next
->next
->n
;
2509 /* First 4*nx positions are FFTPACK workspace, then ours starts.
2510 * We have 2*nx*ny*nzc elements for temp complex-to-real storage when
2511 * doing out-of-place transforms, and another 2*nzc for transpose data.
2513 work_c2r
= (t_complex
*)(fft
->work
+4*nx
);
2514 work_transp
= (t_complex
*)(fft
->work
+4*nx
+2*nx
*ny
*nzc
);
2516 /* Much easier to do pointer arithmetic when base has the correct type */
2517 data
= (t_complex
*)out_data
;
2519 if(dir
==GMX_FFT_REAL_TO_COMPLEX
)
2521 /* FFTPACK only does in-place transforms, so emulate out-of-place
2522 * by copying data to the output array first. This is guaranteed to
2523 * work for real-to-complex since complex data is larger than the real.
2524 * For 3D there is likely enough data to benefit from memcpy().
2526 if( in_data
!= out_data
)
2528 p1
= (real
*)in_data
;
2529 p2
= (real
*)out_data
;
2537 p2
[(i
*ny
+j
)*2*nzc
+k
] = p1
[(i
*ny
+j
)*nz
+k
];
2542 data
= (t_complex
*)out_data
;
2544 /* Transform the Y/Z slices real-to-complex */
2547 gmx_fft_2d_real(fft
->next
,dir
,data
+i
*ny
*nzc
,data
+i
*ny
*nzc
);
2550 /* Transpose x & y slices to go from
2551 * (nx,ny,nzc) to (ny,nx,nzc).
2553 rc
=gmx_fft_transpose_2d_nelem(data
,data
,nx
,ny
,nzc
,work_transp
);
2556 gmx_fatal(FARGS
,"Cannot transpose X & Y/Z gmx_fft_3d_real().");
2560 /* Then transpose from (ny,nx,nzc) to (ny,nzc,nx) */
2563 gmx_fft_transpose_2d(data
+i
*nx
*nzc
,data
+i
*nx
*nzc
,nx
,nzc
);
2566 /* Perform x transforms */
2567 for(i
=0;i
<ny
*nzc
;i
++)
2569 gmx_fft_1d(fft
,GMX_FFT_FORWARD
,data
+i
*nx
,data
+i
*nx
);
2572 /* Transpose from (ny,nzc,nx) back to (ny,nx,nzc) */
2575 gmx_fft_transpose_2d(data
+i
*nzc
*nx
,data
+i
*nzc
*nx
,nzc
,nx
);
2578 /* Transpose back from (ny,nx,nzc) to (nx,ny,nz) */
2579 rc
=gmx_fft_transpose_2d_nelem(data
,data
,ny
,nx
,nzc
,work_transp
);
2582 gmx_fatal(FARGS
,"Cannot transpose Y/Z & X in gmx_fft_3d_real().");
2587 else if(dir
==GMX_FFT_COMPLEX_TO_REAL
)
2589 /* An in-place complex-to-real transform is straightforward,
2590 * since the output array must be large enough for the padding to fit.
2592 * For out-of-place complex-to-real transforms we cannot just copy
2593 * data to the output array, since it is smaller than the input.
2594 * In this case there's nothing to do but employing temporary work data.
2596 if(in_data
!= out_data
)
2598 memcpy(work_c2r
,in_data
,sizeof(t_complex
)*nx
*ny
*nzc
);
2599 data
= (t_complex
*)work_c2r
;
2604 data
= (t_complex
*)out_data
;
2607 /* Transpose x & y slices to go from
2608 * (nx,ny,nz) to (ny,nx,nz).
2610 gmx_fft_transpose_2d_nelem(data
,data
,nx
,ny
,nzc
,work_transp
);
2612 /* Then go from (ny,nx,nzc) to (ny,nzc,nx) */
2615 gmx_fft_transpose_2d(data
+i
*nx
*nzc
,data
+i
*nx
*nzc
,nx
,nzc
);
2619 /* Perform x transforms */
2620 for(i
=0;i
<ny
*nzc
;i
++)
2622 gmx_fft_1d(fft
,GMX_FFT_BACKWARD
,data
+i
*nx
,data
+i
*nx
);
2625 /* Transpose back from (ny,nzc,nx) to (ny,nx,nzc) */
2628 gmx_fft_transpose_2d(data
+i
*nzc
*nx
,data
+i
*nzc
*nx
,nzc
,nx
);
2631 /* Transpose back from (ny,nx,nzc) to (nx,ny,nz) */
2632 gmx_fft_transpose_2d_nelem(data
,data
,ny
,nx
,nzc
,work_transp
);
2635 /* Do 2D complex-to-real */
2638 gmx_fft_2d_real(fft
->next
,dir
,data
+i
*ny
*nzc
,data
+i
*ny
*nzc
);
2641 if( in_data
!= out_data
)
2643 /* Output (pointed to by data) is now in padded format.
2644 * Pack it into out_data if we were doing an out-of-place transform.
2647 p2
= (real
*)out_data
;
2655 p2
[(i
*ny
+j
)*nz
+k
] = p1
[(i
*ny
+j
)*nzc
*2+k
];
2664 gmx_fatal(FARGS
,"FFT plan mismatch - bad plan or direction.");
2675 gmx_fft_destroy(gmx_fft_t fft
)
2680 if(fft
->next
!= NULL
)
2681 gmx_fft_destroy(fft
->next
);
2687 gmx_fft_fftpack_empty
;
2688 #endif /* GMX_FFT_FFTPACK */