Split lines with many copyright years
[gromacs.git] / src / gromacs / fft / fft_fftw3.cpp
blob33c9b8c1841a904ede1190da3a9f2aada1d5a27e
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "config.h"
41 #include <cerrno>
42 #include <cstdlib>
44 #include <fftw3.h>
46 #include "gromacs/fft/fft.h"
47 #include "gromacs/utility/exceptions.h"
48 #include "gromacs/utility/fatalerror.h"
49 #include "gromacs/utility/mutex.h"
51 #if GMX_DOUBLE
52 # define FFTWPREFIX(name) fftw_##name
53 #else
54 # define FFTWPREFIX(name) fftwf_##name
55 #endif
57 /* none of the fftw3 calls, except execute(), are thread-safe, so
58 we need to serialize them with this mutex. */
59 static gmx::Mutex big_fftw_mutex;
60 #define FFTW_LOCK \
61 try \
62 { \
63 big_fftw_mutex.lock(); \
64 } \
65 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
66 #define FFTW_UNLOCK \
67 try \
68 { \
69 big_fftw_mutex.unlock(); \
70 } \
71 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
73 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
74 Consequesences:
75 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
76 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
77 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
79 /*! \internal
80 * \brief
81 * Contents of the FFTW3 fft datatype.
83 * Note that this is one of several possible implementations of gmx_fft_t.
85 #ifdef DOXYGEN
86 struct gmx_fft_fftw3
87 #else
88 struct gmx_fft
89 #endif
91 /*! \brief
92 * FFTW plans.
94 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
95 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
96 * first index: 0=unaligned, 1=aligned
97 * second index: 0=out-of-place, 1=in-place
98 * third index: 0=backward, 1=forward
100 FFTWPREFIX(plan) plan[2][2][2];
101 /** Used to catch user mistakes */
102 int real_transform;
103 /** Number of dimensions in the FFT */
104 int ndim;
107 int gmx_fft_init_1d(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
109 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
113 int gmx_fft_init_many_1d(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
115 gmx_fft_t fft;
116 FFTWPREFIX(complex) * p1, *p2, *up1, *up2;
117 char* pc;
118 int i, j, k;
119 int fftw_flags;
121 #if GMX_DISABLE_FFTW_MEASURE
122 flags |= GMX_FFT_FLAG_CONSERVATIVE;
123 #endif
125 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
127 if (pfft == nullptr)
129 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
130 return EINVAL;
132 *pfft = nullptr;
134 FFTW_LOCK
135 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
137 FFTW_UNLOCK
138 return ENOMEM;
141 /* allocate aligned, and extra memory to make it unaligned */
142 p1 = static_cast<FFTWPREFIX(complex)*>(
143 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
144 if (p1 == nullptr)
146 FFTWPREFIX(free)(fft);
147 FFTW_UNLOCK
148 return ENOMEM;
151 p2 = static_cast<FFTWPREFIX(complex)*>(
152 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
153 if (p2 == nullptr)
155 FFTWPREFIX(free)(p1);
156 FFTWPREFIX(free)(fft);
157 FFTW_UNLOCK
158 return ENOMEM;
161 /* make unaligned pointers.
162 * In double precision the actual complex datatype will be 16 bytes,
163 * so go to a char pointer and force an offset of 8 bytes instead.
165 pc = reinterpret_cast<char*>(p1);
166 pc += 8;
167 up1 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
169 pc = reinterpret_cast<char*>(p2);
170 pc += 8;
171 up2 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
173 /* int rank, const int *n, int howmany,
174 fftw_complex *in, const int *inembed,
175 int istride, int idist,
176 fftw_complex *out, const int *onembed,
177 int ostride, int odist,
178 int sign, unsigned flags */
179 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1,
180 nx, FFTW_BACKWARD, fftw_flags);
181 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1,
182 nx, FFTW_FORWARD, fftw_flags);
183 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1,
184 nx, FFTW_BACKWARD, fftw_flags);
185 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1,
186 nx, FFTW_FORWARD, fftw_flags);
187 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx,
188 FFTW_BACKWARD, fftw_flags);
189 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx,
190 FFTW_FORWARD, fftw_flags);
191 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx,
192 FFTW_BACKWARD, fftw_flags);
193 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx,
194 FFTW_FORWARD, fftw_flags);
196 for (i = 0; i < 2; i++)
198 for (j = 0; j < 2; j++)
200 for (k = 0; k < 2; k++)
202 if (fft->plan[i][j][k] == nullptr)
204 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
205 FFTW_UNLOCK
206 gmx_fft_destroy(fft);
207 FFTW_LOCK
208 FFTWPREFIX(free)(p1);
209 FFTWPREFIX(free)(p2);
210 FFTW_UNLOCK
211 return -1;
217 FFTWPREFIX(free)(p1);
218 FFTWPREFIX(free)(p2);
220 fft->real_transform = 0;
221 fft->ndim = 1;
223 *pfft = fft;
224 FFTW_UNLOCK
225 return 0;
228 int gmx_fft_init_1d_real(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
230 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
233 int gmx_fft_init_many_1d_real(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
235 gmx_fft_t fft;
236 real * p1, *p2, *up1, *up2;
237 char* pc;
238 int i, j, k;
239 int fftw_flags;
241 #if GMX_DISABLE_FFTW_MEASURE
242 flags |= GMX_FFT_FLAG_CONSERVATIVE;
243 #endif
245 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
247 if (pfft == nullptr)
249 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
250 return EINVAL;
252 *pfft = nullptr;
254 FFTW_LOCK
255 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
257 FFTW_UNLOCK
258 return ENOMEM;
261 /* allocate aligned, and extra memory to make it unaligned */
262 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
263 if (p1 == nullptr)
265 FFTWPREFIX(free)(fft);
266 FFTW_UNLOCK
267 return ENOMEM;
270 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
271 if (p2 == nullptr)
273 FFTWPREFIX(free)(p1);
274 FFTWPREFIX(free)(fft);
275 FFTW_UNLOCK
276 return ENOMEM;
279 /* make unaligned pointers.
280 * In double precision the actual complex datatype will be 16 bytes,
281 * so go to a char pointer and force an offset of 8 bytes instead.
283 pc = reinterpret_cast<char*>(p1);
284 pc += 8;
285 up1 = reinterpret_cast<real*>(pc);
287 pc = reinterpret_cast<char*>(p2);
288 pc += 8;
289 up2 = reinterpret_cast<real*>(pc);
291 /* int rank, const int *n, int howmany,
292 double *in, const int *inembed,
293 int istride, int idist,
294 fftw_complex *out, const int *onembed,
295 int ostride, int odist,
296 unsigned flag */
297 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(
298 1, &nx, howmany, up1, nullptr, 1, (nx / 2 + 1) * 2,
299 reinterpret_cast<FFTWPREFIX(complex)*>(up2), nullptr, 1, (nx / 2 + 1), fftw_flags);
300 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(
301 1, &nx, howmany, up1, nullptr, 1, (nx / 2 + 1) * 2,
302 reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1), fftw_flags);
303 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(
304 1, &nx, howmany, p1, nullptr, 1, (nx / 2 + 1) * 2,
305 reinterpret_cast<FFTWPREFIX(complex)*>(p2), nullptr, 1, (nx / 2 + 1), fftw_flags);
306 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(
307 1, &nx, howmany, p1, nullptr, 1, (nx / 2 + 1) * 2,
308 reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1), fftw_flags);
310 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(
311 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1),
312 up2, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
313 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(
314 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1),
315 up1, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
316 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(
317 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1),
318 p2, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
319 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(
320 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1),
321 p1, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
323 for (i = 0; i < 2; i++)
325 for (j = 0; j < 2; j++)
327 for (k = 0; k < 2; k++)
329 if (fft->plan[i][j][k] == nullptr)
331 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
332 FFTW_UNLOCK
333 gmx_fft_destroy(fft);
334 FFTW_LOCK
335 FFTWPREFIX(free)(p1);
336 FFTWPREFIX(free)(p2);
337 FFTW_UNLOCK
338 return -1;
344 FFTWPREFIX(free)(p1);
345 FFTWPREFIX(free)(p2);
347 fft->real_transform = 1;
348 fft->ndim = 1;
350 *pfft = fft;
351 FFTW_UNLOCK
352 return 0;
356 int gmx_fft_init_2d_real(gmx_fft_t* pfft, int nx, int ny, gmx_fft_flag flags)
358 gmx_fft_t fft;
359 real * p1, *p2, *up1, *up2;
360 char* pc;
361 int i, j, k;
362 int fftw_flags;
364 #if GMX_DISABLE_FFTW_MEASURE
365 flags |= GMX_FFT_FLAG_CONSERVATIVE;
366 #endif
368 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
370 if (pfft == nullptr)
372 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
373 return EINVAL;
375 *pfft = nullptr;
377 FFTW_LOCK
378 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
380 FFTW_UNLOCK
381 return ENOMEM;
384 /* allocate aligned, and extra memory to make it unaligned */
385 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
386 if (p1 == nullptr)
388 FFTWPREFIX(free)(fft);
389 FFTW_UNLOCK
390 return ENOMEM;
393 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
394 if (p2 == nullptr)
396 FFTWPREFIX(free)(p1);
397 FFTWPREFIX(free)(fft);
398 FFTW_UNLOCK
399 return ENOMEM;
402 /* make unaligned pointers.
403 * In double precision the actual complex datatype will be 16 bytes,
404 * so go to a char pointer and force an offset of 8 bytes instead.
406 pc = reinterpret_cast<char*>(p1);
407 pc += 8;
408 up1 = reinterpret_cast<real*>(pc);
410 pc = reinterpret_cast<char*>(p2);
411 pc += 8;
412 up2 = reinterpret_cast<real*>(pc);
415 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
416 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up2, fftw_flags);
417 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
418 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up2), fftw_flags);
419 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
420 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up1, fftw_flags);
421 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
422 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up1), fftw_flags);
424 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
425 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p2, fftw_flags);
426 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
427 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p2), fftw_flags);
428 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
429 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p1, fftw_flags);
430 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
431 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p1), fftw_flags);
434 for (i = 0; i < 2; i++)
436 for (j = 0; j < 2; j++)
438 for (k = 0; k < 2; k++)
440 if (fft->plan[i][j][k] == nullptr)
442 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
443 FFTW_UNLOCK
444 gmx_fft_destroy(fft);
445 FFTW_LOCK
446 FFTWPREFIX(free)(p1);
447 FFTWPREFIX(free)(p2);
448 FFTW_UNLOCK
449 return -1;
455 FFTWPREFIX(free)(p1);
456 FFTWPREFIX(free)(p2);
458 fft->real_transform = 1;
459 fft->ndim = 2;
461 *pfft = fft;
462 FFTW_UNLOCK
463 return 0;
466 int gmx_fft_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
468 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
469 bool inplace = (in_data == out_data);
470 bool isforward = (dir == GMX_FFT_FORWARD);
472 /* Some checks */
473 if ((fft->real_transform == 1) || (fft->ndim != 1)
474 || ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)))
476 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
477 return EINVAL;
480 FFTWPREFIX(execute_dft)
481 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
482 static_cast<FFTWPREFIX(complex)*>(out_data));
484 return 0;
487 int gmx_fft_many_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
489 return gmx_fft_1d(fft, dir, in_data, out_data);
492 int gmx_fft_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
494 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
495 bool inplace = (in_data == out_data);
496 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
498 /* Some checks */
499 if ((fft->real_transform != 1) || (fft->ndim != 1)
500 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
502 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
503 return EINVAL;
506 if (isforward)
508 FFTWPREFIX(execute_dft_r2c)
509 (fft->plan[aligned][inplace][isforward], static_cast<real*>(in_data),
510 static_cast<FFTWPREFIX(complex)*>(out_data));
512 else
514 FFTWPREFIX(execute_dft_c2r)
515 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
516 static_cast<real*>(out_data));
519 return 0;
522 int gmx_fft_many_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
524 return gmx_fft_1d_real(fft, dir, in_data, out_data);
527 int gmx_fft_2d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
529 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
530 bool inplace = (in_data == out_data);
531 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
533 /* Some checks */
534 if ((fft->real_transform != 1) || (fft->ndim != 2)
535 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
537 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
538 return EINVAL;
541 if (isforward)
543 FFTWPREFIX(execute_dft_r2c)
544 (fft->plan[aligned][inplace][isforward], static_cast<real*>(in_data),
545 static_cast<FFTWPREFIX(complex)*>(out_data));
547 else
549 FFTWPREFIX(execute_dft_c2r)
550 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
551 static_cast<real*>(out_data));
555 return 0;
558 void gmx_fft_destroy(gmx_fft_t fft)
560 int i, j, k;
562 if (fft != nullptr)
564 for (i = 0; i < 2; i++)
566 for (j = 0; j < 2; j++)
568 for (k = 0; k < 2; k++)
570 if (fft->plan[i][j][k] != nullptr)
572 FFTW_LOCK
573 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
574 FFTW_UNLOCK
575 fft->plan[i][j][k] = nullptr;
580 FFTW_LOCK
581 FFTWPREFIX(free)(fft);
582 FFTW_UNLOCK
586 void gmx_many_fft_destroy(gmx_fft_t fft)
588 gmx_fft_destroy(fft);
591 void gmx_fft_cleanup()
593 FFTWPREFIX(cleanup)();