Move responsibility for FFT description string
[gromacs.git] / src / gromacs / fft / fft_mkl.cpp
blob037818b9a73904099e05b8ba4972f61ea5a9761d
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include <errno.h>
39 #include <stdlib.h>
41 #include <mkl_dfti.h>
42 #include <mkl_service.h>
44 #include "gromacs/fft/fft.h"
45 #include "gromacs/utility/fatalerror.h"
48 /* For MKL version (<10.0), we should define MKL_LONG. */
49 #ifndef MKL_LONG
50 #define MKL_LONG long int
51 #endif
54 #ifdef GMX_DOUBLE
55 #define GMX_DFTI_PREC DFTI_DOUBLE
56 #else
57 #define GMX_DFTI_PREC DFTI_SINGLE
58 #endif
60 /*! \internal
61 * \brief
62 * Contents of the Intel MKL FFT fft datatype.
64 * Note that this is one of several possible implementations of gmx_fft_t.
66 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
67 * Unfortunately the actual library implementation does not support 3D real
68 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
69 * either. In addition, the multi-dimensional storage format for real data
70 * is not compatible with our padding.
72 * To work around this we roll our own 2D and 3D real-to-complex transforms,
73 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
74 * (nx*ny) transforms at once when necessary. To perform strided multiple
75 * transforms out-of-place (i.e., without padding in the last dimension)
76 * on the fly we also need to separate the forward and backward
77 * handles for real-to-complex/complex-to-real data permutation.
79 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
80 * the out-of-place transforms. Still, whenever possible we try to use
81 * a single 3D-transform handle instead.
83 * So, the handles are enumerated as follows:
85 * 1D FFT (real too): Index 0 is the handle for the entire FFT
86 * 2D complex FFT: Index 0 is the handle for the entire FFT
87 * 3D complex FFT: Index 0 is the handle for the entire FFT
88 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
89 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
90 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
91 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
93 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
95 #ifdef DOXYGEN
96 struct gmx_fft_mkl
97 #else
98 struct gmx_fft
99 #endif
101 int ndim; /**< Number of dimensions in FFT */
102 int nx; /**< Length of X transform */
103 int ny; /**< Length of Y transform */
104 int nz; /**< Length of Z transform */
105 int real_fft; /**< 1 if real FFT, otherwise 0 */
106 DFTI_DESCRIPTOR * inplace[3]; /**< in-place FFT */
107 DFTI_DESCRIPTOR * ooplace[4]; /**< out-of-place FFT */
108 t_complex * work; /**< Enable out-of-place c2r FFT */
114 gmx_fft_init_1d(gmx_fft_t * pfft,
115 int nx,
116 gmx_fft_flag flags)
118 gmx_fft_t fft;
119 int d;
120 int status;
122 if (pfft == NULL)
124 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
125 return EINVAL;
127 *pfft = NULL;
129 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
131 return ENOMEM;
134 /* Mark all handles invalid */
135 for (d = 0; d < 3; d++)
137 fft->inplace[d] = fft->ooplace[d] = NULL;
139 fft->ooplace[3] = NULL;
142 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
144 if (status == 0)
146 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
149 if (status == 0)
151 status = DftiCommitDescriptor(fft->inplace[0]);
155 if (status == 0)
157 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
160 if (status == 0)
162 DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
165 if (status == 0)
167 DftiCommitDescriptor(fft->ooplace[0]);
171 if (status != 0)
173 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
174 gmx_fft_destroy(fft);
175 return status;
178 fft->ndim = 1;
179 fft->nx = nx;
180 fft->real_fft = 0;
181 fft->work = NULL;
183 *pfft = fft;
184 return 0;
190 gmx_fft_init_1d_real(gmx_fft_t * pfft,
191 int nx,
192 gmx_fft_flag flags)
194 gmx_fft_t fft;
195 int d;
196 int status;
198 if (pfft == NULL)
200 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
201 return EINVAL;
203 *pfft = NULL;
205 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
207 return ENOMEM;
210 /* Mark all handles invalid */
211 for (d = 0; d < 3; d++)
213 fft->inplace[d] = fft->ooplace[d] = NULL;
215 fft->ooplace[3] = NULL;
217 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
219 if (status == 0)
221 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
224 if (status == 0)
226 status = DftiCommitDescriptor(fft->inplace[0]);
230 if (status == 0)
232 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
235 if (status == 0)
237 status = DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
240 if (status == 0)
242 status = DftiCommitDescriptor(fft->ooplace[0]);
246 if (status == DFTI_UNIMPLEMENTED)
248 gmx_fatal(FARGS,
249 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
250 gmx_fft_destroy(fft);
251 return status;
255 if (status != 0)
257 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
258 gmx_fft_destroy(fft);
259 return status;
262 fft->ndim = 1;
263 fft->nx = nx;
264 fft->real_fft = 1;
265 fft->work = NULL;
267 *pfft = fft;
268 return 0;
274 gmx_fft_init_2d_real(gmx_fft_t * pfft,
275 int nx,
276 int ny,
277 gmx_fft_flag flags)
279 gmx_fft_t fft;
280 int d;
281 int status;
282 MKL_LONG stride[2];
283 MKL_LONG nyc;
285 if (pfft == NULL)
287 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
288 return EINVAL;
290 *pfft = NULL;
292 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
294 return ENOMEM;
297 nyc = (ny/2 + 1);
299 /* Mark all handles invalid */
300 for (d = 0; d < 3; d++)
302 fft->inplace[d] = fft->ooplace[d] = NULL;
304 fft->ooplace[3] = NULL;
306 /* Roll our own 2D real transform using multiple transforms in MKL,
307 * since the current MKL versions does not support our storage format,
308 * and all but the most recent don't even have 2D real FFTs.
311 /* In-place X FFT */
312 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
314 if (status == 0)
316 stride[0] = 0;
317 stride[1] = nyc;
319 status =
320 (DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE) ||
321 DftiSetValue(fft->inplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
322 DftiSetValue(fft->inplace[0], DFTI_INPUT_DISTANCE, 1) ||
323 DftiSetValue(fft->inplace[0], DFTI_INPUT_STRIDES, stride) ||
324 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
325 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_STRIDES, stride));
328 if (status == 0)
330 status = DftiCommitDescriptor(fft->inplace[0]);
333 /* Out-of-place X FFT */
334 if (status == 0)
336 status = DftiCreateDescriptor(&(fft->ooplace[0]), GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
339 if (status == 0)
341 stride[0] = 0;
342 stride[1] = nyc;
344 status =
345 (DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
346 DftiSetValue(fft->ooplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
347 DftiSetValue(fft->ooplace[0], DFTI_INPUT_DISTANCE, 1) ||
348 DftiSetValue(fft->ooplace[0], DFTI_INPUT_STRIDES, stride) ||
349 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
350 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_STRIDES, stride));
353 if (status == 0)
355 status = DftiCommitDescriptor(fft->ooplace[0]);
359 /* In-place Y FFT */
360 if (status == 0)
362 status = DftiCreateDescriptor(&fft->inplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
365 if (status == 0)
367 stride[0] = 0;
368 stride[1] = 1;
370 status =
371 (DftiSetValue(fft->inplace[1], DFTI_PLACEMENT, DFTI_INPLACE) ||
372 DftiSetValue(fft->inplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
373 DftiSetValue(fft->inplace[1], DFTI_INPUT_DISTANCE, 2*nyc) ||
374 DftiSetValue(fft->inplace[1], DFTI_INPUT_STRIDES, stride) ||
375 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
376 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_STRIDES, stride) ||
377 DftiCommitDescriptor(fft->inplace[1]));
381 /* Out-of-place real-to-complex (affects output distance) Y FFT */
382 if (status == 0)
384 status = DftiCreateDescriptor(&fft->ooplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
387 if (status == 0)
389 stride[0] = 0;
390 stride[1] = 1;
392 status =
393 (DftiSetValue(fft->ooplace[1], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
394 DftiSetValue(fft->ooplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
395 DftiSetValue(fft->ooplace[1], DFTI_INPUT_DISTANCE, (MKL_LONG)ny) ||
396 DftiSetValue(fft->ooplace[1], DFTI_INPUT_STRIDES, stride) ||
397 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
398 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_STRIDES, stride) ||
399 DftiCommitDescriptor(fft->ooplace[1]));
403 /* Out-of-place complex-to-real (affects output distance) Y FFT */
404 if (status == 0)
406 status = DftiCreateDescriptor(&fft->ooplace[2], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
409 if (status == 0)
411 stride[0] = 0;
412 stride[1] = 1;
414 status =
415 (DftiSetValue(fft->ooplace[2], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
416 DftiSetValue(fft->ooplace[2], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
417 DftiSetValue(fft->ooplace[2], DFTI_INPUT_DISTANCE, 2*nyc) ||
418 DftiSetValue(fft->ooplace[2], DFTI_INPUT_STRIDES, stride) ||
419 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_DISTANCE, (MKL_LONG)ny) ||
420 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_STRIDES, stride) ||
421 DftiCommitDescriptor(fft->ooplace[2]));
425 if (status == 0)
427 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
429 status = ENOMEM;
433 if (status != 0)
435 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
436 gmx_fft_destroy(fft);
437 return status;
440 fft->ndim = 2;
441 fft->nx = nx;
442 fft->ny = ny;
443 fft->real_fft = 1;
445 *pfft = fft;
446 return 0;
450 gmx_fft_1d(gmx_fft_t fft,
451 enum gmx_fft_direction dir,
452 void * in_data,
453 void * out_data)
455 int inplace = (in_data == out_data);
456 int status = 0;
458 if ( (fft->real_fft == 1) || (fft->ndim != 1) ||
459 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
461 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
462 return EINVAL;
465 if (dir == GMX_FFT_FORWARD)
467 if (inplace)
469 status = DftiComputeForward(fft->inplace[0], in_data);
471 else
473 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
476 else
478 if (inplace)
480 status = DftiComputeBackward(fft->inplace[0], in_data);
482 else
484 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
488 if (status != 0)
490 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
491 status = -1;
494 return status;
500 gmx_fft_1d_real(gmx_fft_t fft,
501 enum gmx_fft_direction dir,
502 void * in_data,
503 void * out_data)
505 int inplace = (in_data == out_data);
506 int status = 0;
508 if ( (fft->real_fft != 1) || (fft->ndim != 1) ||
509 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
511 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
512 return EINVAL;
515 if (dir == GMX_FFT_REAL_TO_COMPLEX)
517 if (inplace)
519 status = DftiComputeForward(fft->inplace[0], in_data);
521 else
523 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
526 else
528 if (inplace)
530 status = DftiComputeBackward(fft->inplace[0], in_data);
532 else
534 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
538 if (status != 0)
540 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
541 status = -1;
544 return status;
549 gmx_fft_2d_real(gmx_fft_t fft,
550 enum gmx_fft_direction dir,
551 void * in_data,
552 void * out_data)
554 int inplace = (in_data == out_data);
555 int status = 0;
557 if ( (fft->real_fft != 1) || (fft->ndim != 2) ||
558 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
560 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
561 return EINVAL;
564 if (dir == GMX_FFT_REAL_TO_COMPLEX)
566 if (inplace)
568 /* real-to-complex in Y dimension, in-place */
569 status = DftiComputeForward(fft->inplace[1], in_data);
571 /* complex-to-complex in X dimension, in-place */
572 if (status == 0)
574 status = DftiComputeForward(fft->inplace[0], in_data);
577 else
579 /* real-to-complex in Y dimension, in_data to out_data */
580 status = DftiComputeForward(fft->ooplace[1], in_data, out_data);
582 /* complex-to-complex in X dimension, in-place to out_data */
583 if (status == 0)
585 status = DftiComputeForward(fft->inplace[0], out_data);
589 else
591 /* prior implementation was incorrect. See fft.cpp unit test */
592 gmx_incons("Complex -> Real is not supported by MKL.");
595 if (status != 0)
597 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
598 status = -1;
601 return status;
604 void
605 gmx_fft_destroy(gmx_fft_t fft)
607 int d;
609 if (fft != NULL)
611 for (d = 0; d < 3; d++)
613 if (fft->inplace[d] != NULL)
615 DftiFreeDescriptor(&fft->inplace[d]);
617 if (fft->ooplace[d] != NULL)
619 DftiFreeDescriptor(&fft->ooplace[d]);
622 if (fft->ooplace[3] != NULL)
624 DftiFreeDescriptor(&fft->ooplace[3]);
626 if (fft->work != NULL)
628 free(fft->work);
630 free(fft);
634 void gmx_fft_cleanup()
636 mkl_free_buffers();