Reduce preprocessor dependency in resourcedivision.cpp
[gromacs.git] / src / gromacs / fft / fft_fftw3.cpp
blob69f314cfe2fba5753fbc347740914afc913a252a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include "config.h"
40 #include <cerrno>
41 #include <cstdlib>
43 #include <fftw3.h>
45 #include "gromacs/fft/fft.h"
46 #include "gromacs/utility/exceptions.h"
47 #include "gromacs/utility/fatalerror.h"
48 #include "gromacs/utility/mutex.h"
50 #if GMX_DOUBLE
51 #define FFTWPREFIX(name) fftw_ ## name
52 #else
53 #define FFTWPREFIX(name) fftwf_ ## name
54 #endif
56 /* none of the fftw3 calls, except execute(), are thread-safe, so
57 we need to serialize them with this mutex. */
58 static gmx::Mutex big_fftw_mutex;
59 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
60 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
62 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
63 Consequesences:
64 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
65 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
66 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
68 /*! \internal
69 * \brief
70 * Contents of the FFTW3 fft datatype.
72 * Note that this is one of several possible implementations of gmx_fft_t.
74 #ifdef DOXYGEN
75 struct gmx_fft_fftw3
76 #else
77 struct gmx_fft
78 #endif
80 /*! \brief
81 * FFTW plans.
83 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
84 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
85 * first index: 0=unaligned, 1=aligned
86 * second index: 0=out-of-place, 1=in-place
87 * third index: 0=backward, 1=forward
89 FFTWPREFIX(plan) plan[2][2][2];
90 /** Used to catch user mistakes */
91 int real_transform;
92 /** Number of dimensions in the FFT */
93 int ndim;
96 int
97 gmx_fft_init_1d(gmx_fft_t * pfft,
98 int nx,
99 gmx_fft_flag flags)
101 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
106 gmx_fft_init_many_1d(gmx_fft_t * pfft,
107 int nx,
108 int howmany,
109 gmx_fft_flag flags)
111 gmx_fft_t fft;
112 FFTWPREFIX(complex) *p1, *p2, *up1, *up2;
113 char* pc;
114 int i, j, k;
115 int fftw_flags;
117 #if GMX_DISABLE_FFTW_MEASURE
118 flags |= GMX_FFT_FLAG_CONSERVATIVE;
119 #endif
121 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
123 if (pfft == nullptr)
125 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
126 return EINVAL;
128 *pfft = nullptr;
130 FFTW_LOCK;
131 if ( (fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
133 FFTW_UNLOCK;
134 return ENOMEM;
137 /* allocate aligned, and extra memory to make it unaligned */
138 p1 = static_cast<FFTWPREFIX(complex) *>(FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany));
139 if (p1 == nullptr)
141 FFTWPREFIX(free)(fft);
142 FFTW_UNLOCK;
143 return ENOMEM;
146 p2 = static_cast<FFTWPREFIX(complex) *>(FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany));
147 if (p2 == nullptr)
149 FFTWPREFIX(free)(p1);
150 FFTWPREFIX(free)(fft);
151 FFTW_UNLOCK;
152 return ENOMEM;
155 /* make unaligned pointers.
156 * In double precision the actual complex datatype will be 16 bytes,
157 * so go to a char pointer and force an offset of 8 bytes instead.
159 pc = reinterpret_cast<char*>(p1);
160 pc += 8;
161 up1 = reinterpret_cast<FFTWPREFIX(complex) *>(pc);
163 pc = reinterpret_cast<char*>(p2);
164 pc += 8;
165 up2 = reinterpret_cast<FFTWPREFIX(complex) *>(pc);
167 /* int rank, const int *n, int howmany,
168 fftw_complex *in, const int *inembed,
169 int istride, int idist,
170 fftw_complex *out, const int *onembed,
171 int ostride, int odist,
172 int sign, unsigned flags */
173 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
174 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
175 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
176 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
177 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
178 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
179 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
180 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
182 for (i = 0; i < 2; i++)
184 for (j = 0; j < 2; j++)
186 for (k = 0; k < 2; k++)
188 if (fft->plan[i][j][k] == nullptr)
190 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
191 FFTW_UNLOCK;
192 gmx_fft_destroy(fft);
193 FFTW_LOCK;
194 FFTWPREFIX(free)(p1);
195 FFTWPREFIX(free)(p2);
196 FFTW_UNLOCK;
197 return -1;
203 FFTWPREFIX(free)(p1);
204 FFTWPREFIX(free)(p2);
206 fft->real_transform = 0;
207 fft->ndim = 1;
209 *pfft = fft;
210 FFTW_UNLOCK;
211 return 0;
215 gmx_fft_init_1d_real(gmx_fft_t * pfft,
216 int nx,
217 gmx_fft_flag flags)
219 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
223 gmx_fft_init_many_1d_real(gmx_fft_t * pfft,
224 int nx,
225 int howmany,
226 gmx_fft_flag flags)
228 gmx_fft_t fft;
229 real *p1, *p2, *up1, *up2;
230 char* pc;
231 int i, j, k;
232 int fftw_flags;
234 #if GMX_DISABLE_FFTW_MEASURE
235 flags |= GMX_FFT_FLAG_CONSERVATIVE;
236 #endif
238 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
240 if (pfft == nullptr)
242 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
243 return EINVAL;
245 *pfft = nullptr;
247 FFTW_LOCK;
248 if ( (fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
250 FFTW_UNLOCK;
251 return ENOMEM;
254 /* allocate aligned, and extra memory to make it unaligned */
255 p1 = static_cast<real *>(FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8));
256 if (p1 == nullptr)
258 FFTWPREFIX(free)(fft);
259 FFTW_UNLOCK;
260 return ENOMEM;
263 p2 = static_cast<real *>(FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8));
264 if (p2 == nullptr)
266 FFTWPREFIX(free)(p1);
267 FFTWPREFIX(free)(fft);
268 FFTW_UNLOCK;
269 return ENOMEM;
272 /* make unaligned pointers.
273 * In double precision the actual complex datatype will be 16 bytes,
274 * so go to a char pointer and force an offset of 8 bytes instead.
276 pc = reinterpret_cast<char*>(p1);
277 pc += 8;
278 up1 = reinterpret_cast<real*>(pc);
280 pc = reinterpret_cast<char*>(p2);
281 pc += 8;
282 up2 = reinterpret_cast<real*>(pc);
284 /* int rank, const int *n, int howmany,
285 double *in, const int *inembed,
286 int istride, int idist,
287 fftw_complex *out, const int *onembed,
288 int ostride, int odist,
289 unsigned flag */
290 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, nullptr, 1, (nx/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(up2), nullptr, 1, (nx/2+1), fftw_flags);
291 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, nullptr, 1, (nx/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(up1), nullptr, 1, (nx/2+1), fftw_flags);
292 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, nullptr, 1, (nx/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(p2), nullptr, 1, (nx/2+1), fftw_flags);
293 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, nullptr, 1, (nx/2+1) *2, reinterpret_cast<FFTWPREFIX(complex) *>(p1), nullptr, 1, (nx/2+1), fftw_flags);
295 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex) *>(up1), nullptr, 1, (nx/2+1), up2, nullptr, 1, (nx/2+1) *2, fftw_flags);
296 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex) *>(up1), nullptr, 1, (nx/2+1), up1, nullptr, 1, (nx/2+1) *2, fftw_flags);
297 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex) *>(p1), nullptr, 1, (nx/2+1), p2, nullptr, 1, (nx/2+1) *2, fftw_flags);
298 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex) *>(p1), nullptr, 1, (nx/2+1), p1, nullptr, 1, (nx/2+1) *2, fftw_flags);
300 for (i = 0; i < 2; i++)
302 for (j = 0; j < 2; j++)
304 for (k = 0; k < 2; k++)
306 if (fft->plan[i][j][k] == nullptr)
308 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
309 FFTW_UNLOCK;
310 gmx_fft_destroy(fft);
311 FFTW_LOCK;
312 FFTWPREFIX(free)(p1);
313 FFTWPREFIX(free)(p2);
314 FFTW_UNLOCK;
315 return -1;
321 FFTWPREFIX(free)(p1);
322 FFTWPREFIX(free)(p2);
324 fft->real_transform = 1;
325 fft->ndim = 1;
327 *pfft = fft;
328 FFTW_UNLOCK;
329 return 0;
334 gmx_fft_init_2d_real(gmx_fft_t * pfft,
335 int nx,
336 int ny,
337 gmx_fft_flag flags)
339 gmx_fft_t fft;
340 real *p1, *p2, *up1, *up2;
341 char* pc;
342 int i, j, k;
343 int fftw_flags;
345 #if GMX_DISABLE_FFTW_MEASURE
346 flags |= GMX_FFT_FLAG_CONSERVATIVE;
347 #endif
349 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
351 if (pfft == nullptr)
353 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
354 return EINVAL;
356 *pfft = nullptr;
358 FFTW_LOCK;
359 if ( (fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
361 FFTW_UNLOCK;
362 return ENOMEM;
365 /* allocate aligned, and extra memory to make it unaligned */
366 p1 = static_cast<real *>(FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) ));
367 if (p1 == nullptr)
369 FFTWPREFIX(free)(fft);
370 FFTW_UNLOCK;
371 return ENOMEM;
374 p2 = static_cast<real *>(FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) ));
375 if (p2 == nullptr)
377 FFTWPREFIX(free)(p1);
378 FFTWPREFIX(free)(fft);
379 FFTW_UNLOCK;
380 return ENOMEM;
383 /* make unaligned pointers.
384 * In double precision the actual complex datatype will be 16 bytes,
385 * so go to a char pointer and force an offset of 8 bytes instead.
387 pc = reinterpret_cast<char*>(p1);
388 pc += 8;
389 up1 = reinterpret_cast<real*>(pc);
391 pc = reinterpret_cast<char*>(p2);
392 pc += 8;
393 up2 = reinterpret_cast<real*>(pc);
396 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, reinterpret_cast<FFTWPREFIX(complex) *>(up1), up2, fftw_flags);
397 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex) *>(up2), fftw_flags);
398 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, reinterpret_cast<FFTWPREFIX(complex) *>(up1), up1, fftw_flags);
399 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex) *>(up1), fftw_flags);
401 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, reinterpret_cast<FFTWPREFIX(complex) *>(p1), p2, fftw_flags);
402 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex) *>(p2), fftw_flags);
403 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, reinterpret_cast<FFTWPREFIX(complex) *>(p1), p1, fftw_flags);
404 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex) *>(p1), fftw_flags);
407 for (i = 0; i < 2; i++)
409 for (j = 0; j < 2; j++)
411 for (k = 0; k < 2; k++)
413 if (fft->plan[i][j][k] == nullptr)
415 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
416 FFTW_UNLOCK;
417 gmx_fft_destroy(fft);
418 FFTW_LOCK;
419 FFTWPREFIX(free)(p1);
420 FFTWPREFIX(free)(p2);
421 FFTW_UNLOCK;
422 return -1;
428 FFTWPREFIX(free)(p1);
429 FFTWPREFIX(free)(p2);
431 fft->real_transform = 1;
432 fft->ndim = 2;
434 *pfft = fft;
435 FFTW_UNLOCK;
436 return 0;
440 gmx_fft_1d (gmx_fft_t fft,
441 enum gmx_fft_direction dir,
442 void * in_data,
443 void * out_data)
445 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
446 bool inplace = (in_data == out_data);
447 bool isforward = (dir == GMX_FFT_FORWARD);
449 /* Some checks */
450 if ( (fft->real_transform == 1) || (fft->ndim != 1) ||
451 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
453 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
454 return EINVAL;
457 FFTWPREFIX(execute_dft)(fft->plan[aligned][inplace][isforward],
458 static_cast<FFTWPREFIX(complex) *>(in_data),
459 static_cast<FFTWPREFIX(complex) *>(out_data));
461 return 0;
465 gmx_fft_many_1d (gmx_fft_t fft,
466 enum gmx_fft_direction dir,
467 void * in_data,
468 void * out_data)
470 return gmx_fft_1d(fft, dir, in_data, out_data);
474 gmx_fft_1d_real (gmx_fft_t fft,
475 enum gmx_fft_direction dir,
476 void * in_data,
477 void * out_data)
479 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
480 bool inplace = (in_data == out_data);
481 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
483 /* Some checks */
484 if ( (fft->real_transform != 1) || (fft->ndim != 1) ||
485 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
487 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
488 return EINVAL;
491 if (isforward)
493 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
494 static_cast<real *>(in_data), static_cast<FFTWPREFIX(complex) *>(out_data));
496 else
498 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
499 static_cast<FFTWPREFIX(complex) *>(in_data), static_cast<real *>(out_data));
502 return 0;
506 gmx_fft_many_1d_real (gmx_fft_t fft,
507 enum gmx_fft_direction dir,
508 void * in_data,
509 void * out_data)
511 return gmx_fft_1d_real(fft, dir, in_data, out_data);
515 gmx_fft_2d_real (gmx_fft_t fft,
516 enum gmx_fft_direction dir,
517 void * in_data,
518 void * out_data)
520 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
521 bool inplace = (in_data == out_data);
522 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
524 /* Some checks */
525 if ( (fft->real_transform != 1) || (fft->ndim != 2) ||
526 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
528 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
529 return EINVAL;
532 if (isforward)
534 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
535 static_cast<real *>(in_data),
536 static_cast<FFTWPREFIX(complex) *>(out_data));
538 else
540 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
541 static_cast<FFTWPREFIX(complex) *>(in_data),
542 static_cast<real *>(out_data));
546 return 0;
549 void
550 gmx_fft_destroy(gmx_fft_t fft)
552 int i, j, k;
554 if (fft != nullptr)
556 for (i = 0; i < 2; i++)
558 for (j = 0; j < 2; j++)
560 for (k = 0; k < 2; k++)
562 if (fft->plan[i][j][k] != nullptr)
564 FFTW_LOCK;
565 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
566 FFTW_UNLOCK;
567 fft->plan[i][j][k] = nullptr;
572 FFTW_LOCK;
573 FFTWPREFIX(free)(fft);
574 FFTW_UNLOCK;
579 void
580 gmx_many_fft_destroy(gmx_fft_t fft)
582 gmx_fft_destroy(fft);
585 void gmx_fft_cleanup()
587 FFTWPREFIX(cleanup)();