2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/fft/fft.h"
47 #include "gromacs/utility/exceptions.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/utility/mutex.h"
52 # define FFTWPREFIX(name) fftw_##name
54 # define FFTWPREFIX(name) fftwf_##name
57 /* none of the fftw3 calls, except execute(), are thread-safe, so
58 we need to serialize them with this mutex. */
59 static gmx::Mutex big_fftw_mutex
;
63 big_fftw_mutex.lock(); \
65 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
69 big_fftw_mutex.unlock(); \
71 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
73 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
75 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
76 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
77 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
81 * Contents of the FFTW3 fft datatype.
83 * Note that this is one of several possible implementations of gmx_fft_t.
94 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
95 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
96 * first index: 0=unaligned, 1=aligned
97 * second index: 0=out-of-place, 1=in-place
98 * third index: 0=backward, 1=forward
100 FFTWPREFIX(plan
) plan
[2][2][2];
101 /** Used to catch user mistakes */
103 /** Number of dimensions in the FFT */
107 int gmx_fft_init_1d(gmx_fft_t
* pfft
, int nx
, gmx_fft_flag flags
)
109 return gmx_fft_init_many_1d(pfft
, nx
, 1, flags
);
113 int gmx_fft_init_many_1d(gmx_fft_t
* pfft
, int nx
, int howmany
, gmx_fft_flag flags
)
116 FFTWPREFIX(complex) * p1
, *p2
, *up1
, *up2
;
121 #if GMX_DISABLE_FFTW_MEASURE
122 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
125 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
129 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
135 if ((fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
141 /* allocate aligned, and extra memory to make it unaligned */
142 p1
= static_cast<FFTWPREFIX(complex)*>(
143 FFTWPREFIX(malloc
)(sizeof(FFTWPREFIX(complex)) * (nx
+ 2) * howmany
));
146 FFTWPREFIX(free
)(fft
);
151 p2
= static_cast<FFTWPREFIX(complex)*>(
152 FFTWPREFIX(malloc
)(sizeof(FFTWPREFIX(complex)) * (nx
+ 2) * howmany
));
155 FFTWPREFIX(free
)(p1
);
156 FFTWPREFIX(free
)(fft
);
161 /* make unaligned pointers.
162 * In double precision the actual complex datatype will be 16 bytes,
163 * so go to a char pointer and force an offset of 8 bytes instead.
165 pc
= reinterpret_cast<char*>(p1
);
167 up1
= reinterpret_cast<FFTWPREFIX(complex)*>(pc
);
169 pc
= reinterpret_cast<char*>(p2
);
171 up2
= reinterpret_cast<FFTWPREFIX(complex)*>(pc
);
173 /* int rank, const int *n, int howmany,
174 fftw_complex *in, const int *inembed,
175 int istride, int idist,
176 fftw_complex *out, const int *onembed,
177 int ostride, int odist,
178 int sign, unsigned flags */
179 fft
->plan
[0][0][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up2
, &nx
, 1,
180 nx
, FFTW_BACKWARD
, fftw_flags
);
181 fft
->plan
[0][0][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up2
, &nx
, 1,
182 nx
, FFTW_FORWARD
, fftw_flags
);
183 fft
->plan
[0][1][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up1
, &nx
, 1,
184 nx
, FFTW_BACKWARD
, fftw_flags
);
185 fft
->plan
[0][1][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, up1
, &nx
, 1, nx
, up1
, &nx
, 1,
186 nx
, FFTW_FORWARD
, fftw_flags
);
187 fft
->plan
[1][0][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p2
, &nx
, 1, nx
,
188 FFTW_BACKWARD
, fftw_flags
);
189 fft
->plan
[1][0][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p2
, &nx
, 1, nx
,
190 FFTW_FORWARD
, fftw_flags
);
191 fft
->plan
[1][1][0] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p1
, &nx
, 1, nx
,
192 FFTW_BACKWARD
, fftw_flags
);
193 fft
->plan
[1][1][1] = FFTWPREFIX(plan_many_dft
)(1, &nx
, howmany
, p1
, &nx
, 1, nx
, p1
, &nx
, 1, nx
,
194 FFTW_FORWARD
, fftw_flags
);
196 for (i
= 0; i
< 2; i
++)
198 for (j
= 0; j
< 2; j
++)
200 for (k
= 0; k
< 2; k
++)
202 if (fft
->plan
[i
][j
][k
] == nullptr)
204 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
206 gmx_fft_destroy(fft
);
208 FFTWPREFIX(free
)(p1
);
209 FFTWPREFIX(free
)(p2
);
217 FFTWPREFIX(free
)(p1
);
218 FFTWPREFIX(free
)(p2
);
220 fft
->real_transform
= 0;
228 int gmx_fft_init_1d_real(gmx_fft_t
* pfft
, int nx
, gmx_fft_flag flags
)
230 return gmx_fft_init_many_1d_real(pfft
, nx
, 1, flags
);
233 int gmx_fft_init_many_1d_real(gmx_fft_t
* pfft
, int nx
, int howmany
, gmx_fft_flag flags
)
236 real
* p1
, *p2
, *up1
, *up2
;
241 #if GMX_DISABLE_FFTW_MEASURE
242 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
245 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
249 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
255 if ((fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
261 /* allocate aligned, and extra memory to make it unaligned */
262 p1
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) * (nx
/ 2 + 1) * 2 * howmany
+ 8));
265 FFTWPREFIX(free
)(fft
);
270 p2
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) * (nx
/ 2 + 1) * 2 * howmany
+ 8));
273 FFTWPREFIX(free
)(p1
);
274 FFTWPREFIX(free
)(fft
);
279 /* make unaligned pointers.
280 * In double precision the actual complex datatype will be 16 bytes,
281 * so go to a char pointer and force an offset of 8 bytes instead.
283 pc
= reinterpret_cast<char*>(p1
);
285 up1
= reinterpret_cast<real
*>(pc
);
287 pc
= reinterpret_cast<char*>(p2
);
289 up2
= reinterpret_cast<real
*>(pc
);
291 /* int rank, const int *n, int howmany,
292 double *in, const int *inembed,
293 int istride, int idist,
294 fftw_complex *out, const int *onembed,
295 int ostride, int odist,
297 fft
->plan
[0][0][1] = FFTWPREFIX(plan_many_dft_r2c
)(
298 1, &nx
, howmany
, up1
, nullptr, 1, (nx
/ 2 + 1) * 2,
299 reinterpret_cast<FFTWPREFIX(complex)*>(up2
), nullptr, 1, (nx
/ 2 + 1), fftw_flags
);
300 fft
->plan
[0][1][1] = FFTWPREFIX(plan_many_dft_r2c
)(
301 1, &nx
, howmany
, up1
, nullptr, 1, (nx
/ 2 + 1) * 2,
302 reinterpret_cast<FFTWPREFIX(complex)*>(up1
), nullptr, 1, (nx
/ 2 + 1), fftw_flags
);
303 fft
->plan
[1][0][1] = FFTWPREFIX(plan_many_dft_r2c
)(
304 1, &nx
, howmany
, p1
, nullptr, 1, (nx
/ 2 + 1) * 2,
305 reinterpret_cast<FFTWPREFIX(complex)*>(p2
), nullptr, 1, (nx
/ 2 + 1), fftw_flags
);
306 fft
->plan
[1][1][1] = FFTWPREFIX(plan_many_dft_r2c
)(
307 1, &nx
, howmany
, p1
, nullptr, 1, (nx
/ 2 + 1) * 2,
308 reinterpret_cast<FFTWPREFIX(complex)*>(p1
), nullptr, 1, (nx
/ 2 + 1), fftw_flags
);
310 fft
->plan
[0][0][0] = FFTWPREFIX(plan_many_dft_c2r
)(
311 1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex)*>(up1
), nullptr, 1, (nx
/ 2 + 1),
312 up2
, nullptr, 1, (nx
/ 2 + 1) * 2, fftw_flags
);
313 fft
->plan
[0][1][0] = FFTWPREFIX(plan_many_dft_c2r
)(
314 1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex)*>(up1
), nullptr, 1, (nx
/ 2 + 1),
315 up1
, nullptr, 1, (nx
/ 2 + 1) * 2, fftw_flags
);
316 fft
->plan
[1][0][0] = FFTWPREFIX(plan_many_dft_c2r
)(
317 1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex)*>(p1
), nullptr, 1, (nx
/ 2 + 1),
318 p2
, nullptr, 1, (nx
/ 2 + 1) * 2, fftw_flags
);
319 fft
->plan
[1][1][0] = FFTWPREFIX(plan_many_dft_c2r
)(
320 1, &nx
, howmany
, reinterpret_cast<FFTWPREFIX(complex)*>(p1
), nullptr, 1, (nx
/ 2 + 1),
321 p1
, nullptr, 1, (nx
/ 2 + 1) * 2, fftw_flags
);
323 for (i
= 0; i
< 2; i
++)
325 for (j
= 0; j
< 2; j
++)
327 for (k
= 0; k
< 2; k
++)
329 if (fft
->plan
[i
][j
][k
] == nullptr)
331 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
333 gmx_fft_destroy(fft
);
335 FFTWPREFIX(free
)(p1
);
336 FFTWPREFIX(free
)(p2
);
344 FFTWPREFIX(free
)(p1
);
345 FFTWPREFIX(free
)(p2
);
347 fft
->real_transform
= 1;
356 int gmx_fft_init_2d_real(gmx_fft_t
* pfft
, int nx
, int ny
, gmx_fft_flag flags
)
359 real
* p1
, *p2
, *up1
, *up2
;
364 #if GMX_DISABLE_FFTW_MEASURE
365 flags
|= GMX_FFT_FLAG_CONSERVATIVE
;
368 fftw_flags
= (flags
& GMX_FFT_FLAG_CONSERVATIVE
) ? FFTW_ESTIMATE
: FFTW_MEASURE
;
372 gmx_fatal(FARGS
, "Invalid opaque FFT datatype pointer.");
378 if ((fft
= static_cast<gmx_fft_t
>(FFTWPREFIX(malloc
)(sizeof(struct gmx_fft
)))) == nullptr)
384 /* allocate aligned, and extra memory to make it unaligned */
385 p1
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) * (nx
* (ny
/ 2 + 1) * 2 + 2)));
388 FFTWPREFIX(free
)(fft
);
393 p2
= static_cast<real
*>(FFTWPREFIX(malloc
)(sizeof(real
) * (nx
* (ny
/ 2 + 1) * 2 + 2)));
396 FFTWPREFIX(free
)(p1
);
397 FFTWPREFIX(free
)(fft
);
402 /* make unaligned pointers.
403 * In double precision the actual complex datatype will be 16 bytes,
404 * so go to a char pointer and force an offset of 8 bytes instead.
406 pc
= reinterpret_cast<char*>(p1
);
408 up1
= reinterpret_cast<real
*>(pc
);
410 pc
= reinterpret_cast<char*>(p2
);
412 up2
= reinterpret_cast<real
*>(pc
);
415 fft
->plan
[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d
)(
416 nx
, ny
, reinterpret_cast<FFTWPREFIX(complex)*>(up1
), up2
, fftw_flags
);
417 fft
->plan
[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d
)(
418 nx
, ny
, up1
, reinterpret_cast<FFTWPREFIX(complex)*>(up2
), fftw_flags
);
419 fft
->plan
[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d
)(
420 nx
, ny
, reinterpret_cast<FFTWPREFIX(complex)*>(up1
), up1
, fftw_flags
);
421 fft
->plan
[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d
)(
422 nx
, ny
, up1
, reinterpret_cast<FFTWPREFIX(complex)*>(up1
), fftw_flags
);
424 fft
->plan
[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d
)(
425 nx
, ny
, reinterpret_cast<FFTWPREFIX(complex)*>(p1
), p2
, fftw_flags
);
426 fft
->plan
[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d
)(
427 nx
, ny
, p1
, reinterpret_cast<FFTWPREFIX(complex)*>(p2
), fftw_flags
);
428 fft
->plan
[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d
)(
429 nx
, ny
, reinterpret_cast<FFTWPREFIX(complex)*>(p1
), p1
, fftw_flags
);
430 fft
->plan
[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d
)(
431 nx
, ny
, p1
, reinterpret_cast<FFTWPREFIX(complex)*>(p1
), fftw_flags
);
434 for (i
= 0; i
< 2; i
++)
436 for (j
= 0; j
< 2; j
++)
438 for (k
= 0; k
< 2; k
++)
440 if (fft
->plan
[i
][j
][k
] == nullptr)
442 gmx_fatal(FARGS
, "Error initializing FFTW3 plan.");
444 gmx_fft_destroy(fft
);
446 FFTWPREFIX(free
)(p1
);
447 FFTWPREFIX(free
)(p2
);
455 FFTWPREFIX(free
)(p1
);
456 FFTWPREFIX(free
)(p2
);
458 fft
->real_transform
= 1;
466 int gmx_fft_1d(gmx_fft_t fft
, enum gmx_fft_direction dir
, void* in_data
, void* out_data
)
468 bool aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
469 bool inplace
= (in_data
== out_data
);
470 bool isforward
= (dir
== GMX_FFT_FORWARD
);
473 if ((fft
->real_transform
== 1) || (fft
->ndim
!= 1)
474 || ((dir
!= GMX_FFT_FORWARD
) && (dir
!= GMX_FFT_BACKWARD
)))
476 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
480 FFTWPREFIX(execute_dft
)
481 (fft
->plan
[aligned
][inplace
][isforward
], static_cast<FFTWPREFIX(complex)*>(in_data
),
482 static_cast<FFTWPREFIX(complex)*>(out_data
));
487 int gmx_fft_many_1d(gmx_fft_t fft
, enum gmx_fft_direction dir
, void* in_data
, void* out_data
)
489 return gmx_fft_1d(fft
, dir
, in_data
, out_data
);
492 int gmx_fft_1d_real(gmx_fft_t fft
, enum gmx_fft_direction dir
, void* in_data
, void* out_data
)
494 bool aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
495 bool inplace
= (in_data
== out_data
);
496 bool isforward
= (dir
== GMX_FFT_REAL_TO_COMPLEX
);
499 if ((fft
->real_transform
!= 1) || (fft
->ndim
!= 1)
500 || ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)))
502 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
508 FFTWPREFIX(execute_dft_r2c
)
509 (fft
->plan
[aligned
][inplace
][isforward
], static_cast<real
*>(in_data
),
510 static_cast<FFTWPREFIX(complex)*>(out_data
));
514 FFTWPREFIX(execute_dft_c2r
)
515 (fft
->plan
[aligned
][inplace
][isforward
], static_cast<FFTWPREFIX(complex)*>(in_data
),
516 static_cast<real
*>(out_data
));
522 int gmx_fft_many_1d_real(gmx_fft_t fft
, enum gmx_fft_direction dir
, void* in_data
, void* out_data
)
524 return gmx_fft_1d_real(fft
, dir
, in_data
, out_data
);
527 int gmx_fft_2d_real(gmx_fft_t fft
, enum gmx_fft_direction dir
, void* in_data
, void* out_data
)
529 bool aligned
= (((size_t(in_data
) | size_t(out_data
)) & 0xf) == 0);
530 bool inplace
= (in_data
== out_data
);
531 bool isforward
= (dir
== GMX_FFT_REAL_TO_COMPLEX
);
534 if ((fft
->real_transform
!= 1) || (fft
->ndim
!= 2)
535 || ((dir
!= GMX_FFT_REAL_TO_COMPLEX
) && (dir
!= GMX_FFT_COMPLEX_TO_REAL
)))
537 gmx_fatal(FARGS
, "FFT plan mismatch - bad plan or direction.");
543 FFTWPREFIX(execute_dft_r2c
)
544 (fft
->plan
[aligned
][inplace
][isforward
], static_cast<real
*>(in_data
),
545 static_cast<FFTWPREFIX(complex)*>(out_data
));
549 FFTWPREFIX(execute_dft_c2r
)
550 (fft
->plan
[aligned
][inplace
][isforward
], static_cast<FFTWPREFIX(complex)*>(in_data
),
551 static_cast<real
*>(out_data
));
558 void gmx_fft_destroy(gmx_fft_t fft
)
564 for (i
= 0; i
< 2; i
++)
566 for (j
= 0; j
< 2; j
++)
568 for (k
= 0; k
< 2; k
++)
570 if (fft
->plan
[i
][j
][k
] != nullptr)
573 FFTWPREFIX(destroy_plan
)(fft
->plan
[i
][j
][k
]);
575 fft
->plan
[i
][j
][k
] = nullptr;
581 FFTWPREFIX(free
)(fft
);
586 void gmx_many_fft_destroy(gmx_fft_t fft
)
588 gmx_fft_destroy(fft
);
591 void gmx_fft_cleanup()
593 FFTWPREFIX(cleanup
)();