Fixed include statements such that double precision version of genborn.c
[gromacs/rigid-bodies.git] / src / mdlib / gmx_fft_mkl.c
blob8f4cd4d0dbca7c5656045f3df29f27cb88596236
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs 4.0 Copyright (c) 1991-2003
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
18 #ifdef HAVE_CONFIG_H
19 #include <config.h>
20 #endif
22 #ifdef GMX_FFT_MKL
24 #include <errno.h>
25 #include <stdlib.h>
27 #include <mkl_dfti.h>
30 #include "gmx_fft.h"
31 #include "gmx_fatal.h"
34 /* For MKL version (<10.0), we should define MKL_LONG. */
35 #ifndef MKL_LONG
36 #define MKL_LONG long int
37 #endif
40 #ifdef GMX_DOUBLE
41 #define GMX_DFTI_PREC DFTI_DOUBLE
42 #else
43 #define GMX_DFTI_PREC DFTI_SINGLE
44 #endif
46 /* Contents of the Intel MKL FFT fft datatype.
48 * Note that this is one of several possible implementations of gmx_fft_t.
50 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
51 * Unfortunately the actual library implementation does not support 3D real
52 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
53 * either. In addition, the multi-dimensional storage format for real data
54 * is not compatible with our padding.
56 * To work around this we roll our own 2D and 3D real-to-complex transforms,
57 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
58 * (nx*ny) transforms at once when necessary. To perform strided multiple
59 * transforms out-of-place (i.e., without padding in the last dimension)
60 * on the fly we also need to separate the forward and backward
61 * handles for real-to-complex/complex-to-real data permutation.
63 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
64 * the out-of-place transforms. Still, whenever possible we try to use
65 * a single 3D-transform handle instead.
67 * So, the handles are enumerated as follows:
69 * 1D FFT (real too): Index 0 is the handle for the entire FFT
70 * 2D complex FFT: Index 0 is the handle for the entire FFT
71 * 3D complex FFT: Index 0 is the handle for the entire FFT
72 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
73 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
74 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
75 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
77 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
79 */
80 struct gmx_fft
82 int ndim; /**< Number of dimensions in FFT */
83 int nx; /**< Length of X transform */
84 int ny; /**< Length of Y transform */
85 int nz; /**< Length of Z transform */
86 int real_fft; /**< 1 if real FFT, otherwise 0 */
87 DFTI_DESCRIPTOR * inplace[3]; /**< in-place FFT */
88 DFTI_DESCRIPTOR * ooplace[4]; /**< out-of-place FFT */
89 t_complex * work; /**< Enable out-of-place c2r FFT */
94 int
95 gmx_fft_init_1d(gmx_fft_t * pfft,
96 int nx,
97 gmx_fft_flag flags)
99 gmx_fft_t fft;
100 int d;
101 int status;
103 if(pfft==NULL)
105 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
106 return EINVAL;
108 *pfft = NULL;
110 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
112 return ENOMEM;
115 /* Mark all handles invalid */
116 for(d=0;d<3;d++)
118 fft->inplace[d] = fft->ooplace[d] = NULL;
120 fft->ooplace[3] = NULL;
123 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
125 if( status == 0 )
126 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
128 if( status == 0 )
129 status = DftiCommitDescriptor(fft->inplace[0]);
132 if( status == 0 )
133 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
135 if( status == 0)
136 DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
138 if( status == 0)
139 DftiCommitDescriptor(fft->ooplace[0]);
142 if( status != 0 )
144 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
145 gmx_fft_destroy(fft);
146 return status;
149 fft->ndim = 1;
150 fft->nx = nx;
151 fft->real_fft = 0;
152 fft->work = NULL;
154 *pfft = fft;
155 return 0;
161 gmx_fft_init_1d_real(gmx_fft_t * pfft,
162 int nx,
163 gmx_fft_flag flags)
165 gmx_fft_t fft;
166 int d;
167 int status;
169 if(pfft==NULL)
171 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
172 return EINVAL;
174 *pfft = NULL;
176 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
178 return ENOMEM;
181 /* Mark all handles invalid */
182 for(d=0;d<3;d++)
184 fft->inplace[d] = fft->ooplace[d] = NULL;
186 fft->ooplace[3] = NULL;
188 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
190 if( status == 0 )
191 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
193 if( status == 0 )
194 status = DftiCommitDescriptor(fft->inplace[0]);
197 if( status == 0 )
198 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
200 if( status == 0 )
201 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
203 if( status == 0 )
204 status = DftiCommitDescriptor(fft->ooplace[0]);
207 if(status == DFTI_UNIMPLEMENTED)
209 gmx_fatal(FARGS,
210 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
211 gmx_fft_destroy(fft);
212 return status;
216 if( status != 0 )
218 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
219 gmx_fft_destroy(fft);
220 return status;
223 fft->ndim = 1;
224 fft->nx = nx;
225 fft->real_fft = 1;
226 fft->work = NULL;
228 *pfft = fft;
229 return 0;
235 gmx_fft_init_2d(gmx_fft_t * pfft,
236 int nx,
237 int ny,
238 gmx_fft_flag flags)
240 gmx_fft_t fft;
241 int d;
242 int status;
243 MKL_LONG length[2];
245 if(pfft==NULL)
247 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
248 return EINVAL;
250 *pfft = NULL;
252 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
254 return ENOMEM;
257 /* Mark all handles invalid */
258 for(d=0;d<3;d++)
260 fft->inplace[d] = fft->ooplace[d] = NULL;
262 fft->ooplace[3] = NULL;
264 length[0] = nx;
265 length[1] = ny;
267 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
269 if( status == 0 )
270 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
272 if( status == 0 )
273 status = DftiCommitDescriptor(fft->inplace[0]);
276 if( status == 0 )
277 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
279 if( status == 0 )
280 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
282 if( status == 0 )
283 status = DftiCommitDescriptor(fft->ooplace[0]);
286 if( status != 0 )
288 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
289 gmx_fft_destroy(fft);
290 return status;
293 fft->ndim = 2;
294 fft->nx = nx;
295 fft->ny = ny;
296 fft->real_fft = 0;
297 fft->work = NULL;
299 *pfft = fft;
300 return 0;
305 int
306 gmx_fft_init_2d_real(gmx_fft_t * pfft,
307 int nx,
308 int ny,
309 gmx_fft_flag flags)
311 gmx_fft_t fft;
312 int d;
313 int status;
314 MKL_LONG stride[2];
315 MKL_LONG nyc;
317 if(pfft==NULL)
319 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
320 return EINVAL;
322 *pfft = NULL;
324 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
326 return ENOMEM;
329 nyc = (ny/2 + 1);
331 /* Mark all handles invalid */
332 for(d=0;d<3;d++)
334 fft->inplace[d] = fft->ooplace[d] = NULL;
336 fft->ooplace[3] = NULL;
338 /* Roll our own 2D real transform using multiple transforms in MKL,
339 * since the current MKL versions does not support our storage format,
340 * and all but the most recent don't even have 2D real FFTs.
343 /* In-place X FFT */
344 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
346 if ( status == 0 )
348 stride[0] = 0;
349 stride[1] = nyc;
351 status =
352 (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) ||
353 DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) ||
354 DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) ||
355 DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) ||
356 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) ||
357 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride));
360 if( status == 0 )
361 status = DftiCommitDescriptor(fft->inplace[0]);
363 /* Out-of-place X FFT */
364 if( status == 0 )
365 status = DftiCreateDescriptor(&(fft->ooplace[0]),GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
367 if( status == 0 )
369 stride[0] = 0;
370 stride[1] = nyc;
372 status =
373 (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
374 DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) ||
375 DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) ||
376 DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) ||
377 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) ||
378 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride));
381 if( status == 0 )
382 status = DftiCommitDescriptor(fft->ooplace[0]);
385 /* In-place Y FFT */
386 if( status == 0 )
387 status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
389 if( status == 0 )
391 stride[0] = 0;
392 stride[1] = 1;
394 status =
395 (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) ||
396 DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
397 DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,2*nyc) ||
398 DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) ||
399 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) ||
400 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) ||
401 DftiCommitDescriptor(fft->inplace[1]));
405 /* Out-of-place real-to-complex (affects output distance) Y FFT */
406 if( status == 0 )
407 status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
409 if( status == 0 )
411 stride[0] = 0;
412 stride[1] = 1;
414 status =
415 (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
416 DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
417 DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,(MKL_LONG)ny) ||
418 DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) ||
419 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) ||
420 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) ||
421 DftiCommitDescriptor(fft->ooplace[1]));
425 /* Out-of-place complex-to-real (affects output distance) Y FFT */
426 if( status == 0 )
427 status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
429 if( status == 0 )
431 stride[0] = 0;
432 stride[1] = 1;
434 status =
435 (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
436 DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
437 DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,2*nyc) ||
438 DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) ||
439 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)ny) ||
440 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) ||
441 DftiCommitDescriptor(fft->ooplace[2]));
445 if ( status == 0 )
447 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
449 status = ENOMEM;
453 if( status != 0 )
455 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
456 gmx_fft_destroy(fft);
457 return status;
460 fft->ndim = 2;
461 fft->nx = nx;
462 fft->ny = ny;
463 fft->real_fft = 1;
465 *pfft = fft;
466 return 0;
472 gmx_fft_init_3d(gmx_fft_t * pfft,
473 int nx,
474 int ny,
475 int nz,
476 gmx_fft_flag flags)
478 gmx_fft_t fft;
479 int d;
480 MKL_LONG length[3];
481 int status;
483 if(pfft==NULL)
485 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
486 return EINVAL;
488 *pfft = NULL;
490 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
492 return ENOMEM;
495 /* Mark all handles invalid */
496 for(d=0;d<3;d++)
498 fft->inplace[d] = fft->ooplace[d] = NULL;
500 fft->ooplace[3] = NULL;
502 length[0] = nx;
503 length[1] = ny;
504 length[2] = nz;
506 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,(MKL_LONG)3,length);
508 if( status == 0 )
509 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
511 if( status == 0 )
512 status = DftiCommitDescriptor(fft->inplace[0]);
515 if( status == 0 )
516 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,(MKL_LONG)3,length);
518 if( status == 0 )
519 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
521 if( status == 0 )
522 status = DftiCommitDescriptor(fft->ooplace[0]);
525 if( status != 0 )
527 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
528 gmx_fft_destroy(fft);
529 return status;
533 fft->ndim = 3;
534 fft->nx = nx;
535 fft->ny = ny;
536 fft->nz = nz;
537 fft->real_fft = 0;
538 fft->work = NULL;
540 *pfft = fft;
541 return 0;
548 gmx_fft_init_3d_real(gmx_fft_t * pfft,
549 int nx,
550 int ny,
551 int nz,
552 gmx_fft_flag flags)
554 gmx_fft_t fft;
555 int d;
556 int status;
557 MKL_LONG stride[2];
558 int nzc;
560 if(pfft==NULL)
562 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
563 return EINVAL;
565 *pfft = NULL;
567 nzc = (nz/2 + 1);
569 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
571 return ENOMEM;
574 /* Mark all handles invalid */
575 for(d=0;d<3;d++)
577 fft->inplace[d] = fft->ooplace[d] = NULL;
579 fft->ooplace[3] = NULL;
581 /* Roll our own 3D real transform using multiple transforms in MKL,
582 * since the current MKL versions does not support our storage format
583 * or 3D real transforms.
586 /* In-place X FFT.
587 * ny*nzc complex-to-complex transforms, length nx
588 * transform distance: 1
589 * element strides: ny*nzc
591 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
593 if ( status == 0)
595 stride[0] = 0;
596 stride[1] = ny*nzc;
598 status =
599 (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) ||
600 DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) ||
601 DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) ||
602 DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) ||
603 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) ||
604 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride) ||
605 DftiCommitDescriptor(fft->inplace[0]));
608 /* Out-of-place X FFT:
609 * ny*nzc complex-to-complex transforms, length nx
610 * transform distance: 1
611 * element strides: ny*nzc
613 if( status == 0 )
614 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
616 if( status == 0 )
618 stride[0] = 0;
619 stride[1] = ny*nzc;
621 status =
622 (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
623 DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) ||
624 DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) ||
625 DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) ||
626 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) ||
627 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride) ||
628 DftiCommitDescriptor(fft->ooplace[0]));
632 /* In-place Y FFT.
633 * We cannot do all NX*NZC transforms at once, so define a handle to do
634 * NZC transforms, and then execute it NX times.
635 * nzc complex-to-complex transforms, length ny
636 * transform distance: 1
637 * element strides: nzc
639 if( status == 0 )
640 status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
642 if( status == 0 )
644 stride[0] = 0;
645 stride[1] = nzc;
647 status =
648 (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) ||
649 DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) ||
650 DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,1) ||
651 DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) ||
652 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,1) ||
653 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) ||
654 DftiCommitDescriptor(fft->inplace[1]));
658 /* Out-of-place Y FFT:
659 * We cannot do all NX*NZC transforms at once, so define a handle to do
660 * NZC transforms, and then execute it NX times.
661 * nzc complex-to-complex transforms, length ny
662 * transform distance: 1
663 * element strides: nzc
665 if( status == 0 )
666 status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
668 if( status == 0 )
670 stride[0] = 0;
671 stride[1] = nzc;
673 status =
674 (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
675 DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) ||
676 DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,1) ||
677 DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) ||
678 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,1) ||
679 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) ||
680 DftiCommitDescriptor(fft->ooplace[1]));
683 /* In-place Z FFT:
684 * nx*ny real-to-complex transforms, length nz
685 * transform distance: nzc*2 -> nzc*2
686 * element strides: 1
688 if( status == 0 )
689 status = DftiCreateDescriptor(&fft->inplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
691 if( status == 0 )
693 stride[0] = 0;
694 stride[1] = 1;
696 status =
697 (DftiSetValue(fft->inplace[2],DFTI_PLACEMENT,DFTI_INPLACE) ||
698 DftiSetValue(fft->inplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
699 DftiSetValue(fft->inplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) ||
700 DftiSetValue(fft->inplace[2],DFTI_INPUT_STRIDES,stride) ||
701 DftiSetValue(fft->inplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) ||
702 DftiSetValue(fft->inplace[2],DFTI_OUTPUT_STRIDES,stride) ||
703 DftiCommitDescriptor(fft->inplace[2]));
707 /* Out-of-place real-to-complex (affects distance) Z FFT:
708 * nx*ny real-to-complex transforms, length nz
709 * transform distance: nz -> nzc*2
710 * element STRIDES: 1
712 if( status == 0 )
713 status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
715 if( status == 0 )
717 stride[0] = 0;
718 stride[1] = 1;
720 status =
721 (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
722 DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
723 DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nz) ||
724 DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) ||
725 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) ||
726 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) ||
727 DftiCommitDescriptor(fft->ooplace[2]));
731 /* Out-of-place complex-to-real (affects distance) Z FFT:
732 * nx*ny real-to-complex transforms, length nz
733 * transform distance: nzc*2 -> nz
734 * element STRIDES: 1
736 if( status == 0 )
737 status = DftiCreateDescriptor(&fft->ooplace[3],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
739 if( status == 0 )
741 stride[0] = 0;
742 stride[1] = 1;
744 status =
745 (DftiSetValue(fft->ooplace[3],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
746 DftiSetValue(fft->ooplace[3],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
747 DftiSetValue(fft->ooplace[3],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) ||
748 DftiSetValue(fft->ooplace[3],DFTI_INPUT_STRIDES,stride) ||
749 DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nz) ||
750 DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_STRIDES,stride) ||
751 DftiCommitDescriptor(fft->ooplace[3]));
755 if ( status == 0 )
757 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*ny*(nz/2+1)))) == NULL)
759 status = ENOMEM;
764 if( status != 0 )
766 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
767 gmx_fft_destroy(fft);
768 return status;
772 fft->ndim = 3;
773 fft->nx = nx;
774 fft->ny = ny;
775 fft->nz = nz;
776 fft->real_fft = 1;
778 *pfft = fft;
779 return 0;
785 int
786 gmx_fft_1d(gmx_fft_t fft,
787 enum gmx_fft_direction dir,
788 void * in_data,
789 void * out_data)
791 int inplace = (in_data == out_data);
792 int status = 0;
794 if( (fft->real_fft == 1) || (fft->ndim != 1) ||
795 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
797 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
798 return EINVAL;
801 if(dir==GMX_FFT_FORWARD)
803 if(inplace)
805 status = DftiComputeForward(fft->inplace[0],in_data);
807 else
809 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
812 else
814 if(inplace)
816 status = DftiComputeBackward(fft->inplace[0],in_data);
818 else
820 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
824 if( status != 0 )
826 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
827 status = -1;
830 return status;
835 int
836 gmx_fft_1d_real(gmx_fft_t fft,
837 enum gmx_fft_direction dir,
838 void * in_data,
839 void * out_data)
841 int inplace = (in_data == out_data);
842 int status = 0;
844 if( (fft->real_fft != 1) || (fft->ndim != 1) ||
845 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
847 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
848 return EINVAL;
851 if(dir==GMX_FFT_REAL_TO_COMPLEX)
853 if(inplace)
855 status = DftiComputeForward(fft->inplace[0],in_data);
857 else
859 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
862 else
864 if(inplace)
866 status = DftiComputeBackward(fft->inplace[0],in_data);
868 else
870 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
874 if( status != 0 )
876 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
877 status = -1;
880 return status;
884 int
885 gmx_fft_2d(gmx_fft_t fft,
886 enum gmx_fft_direction dir,
887 void * in_data,
888 void * out_data)
890 int inplace = (in_data == out_data);
891 int status = 0;
893 if( (fft->real_fft == 1) || (fft->ndim != 2) ||
894 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
896 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
897 return EINVAL;
900 if(dir==GMX_FFT_FORWARD)
902 if(inplace)
904 status = DftiComputeForward(fft->inplace[0],in_data);
906 else
908 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
911 else
913 if(inplace)
915 status = DftiComputeBackward(fft->inplace[0],in_data);
917 else
919 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
923 if( status != 0 )
925 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
926 status = -1;
929 return status;
933 int
934 gmx_fft_2d_real(gmx_fft_t fft,
935 enum gmx_fft_direction dir,
936 void * in_data,
937 void * out_data)
939 int inplace = (in_data == out_data);
940 int status = 0;
942 if( (fft->real_fft != 1) || (fft->ndim != 2) ||
943 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
945 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
946 return EINVAL;
949 if(dir==GMX_FFT_REAL_TO_COMPLEX)
951 if(inplace)
953 /* real-to-complex in Y dimension, in-place */
954 status = DftiComputeForward(fft->inplace[1],in_data);
956 /* complex-to-complex in X dimension, in-place */
957 if ( status == 0 )
958 status = DftiComputeForward(fft->inplace[0],in_data);
960 else
962 /* real-to-complex in Y dimension, in_data to out_data */
963 status = DftiComputeForward(fft->ooplace[1],in_data,out_data);
965 /* complex-to-complex in X dimension, in-place to out_data */
966 if ( status == 0 )
967 status = DftiComputeForward(fft->inplace[0],out_data);
970 else
972 if(inplace)
974 /* complex-to-complex in X dimension, in-place */
975 status = DftiComputeBackward(fft->inplace[0],in_data);
977 /* complex-to-real in Y dimension, in-place */
978 if ( status == 0 )
979 status = DftiComputeBackward(fft->inplace[1],in_data);
982 else
984 /* complex-to-complex in X dimension, from in_data to work */
985 status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
987 /* complex-to-real in Y dimension, from work to out_data */
988 if ( status == 0 )
989 status = DftiComputeBackward(fft->ooplace[1],fft->work,out_data);
994 if( status != 0 )
996 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
997 status = -1;
1000 return status;
1004 int
1005 gmx_fft_3d(gmx_fft_t fft,
1006 enum gmx_fft_direction dir,
1007 void * in_data,
1008 void * out_data)
1010 int inplace = (in_data == out_data);
1011 int status = 0;
1013 if( (fft->real_fft == 1) || (fft->ndim != 3) ||
1014 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
1016 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1017 return EINVAL;
1020 if(dir==GMX_FFT_FORWARD)
1022 if(inplace)
1024 status = DftiComputeForward(fft->inplace[0],in_data);
1026 else
1028 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
1031 else
1033 if(inplace)
1035 status = DftiComputeBackward(fft->inplace[0],in_data);
1037 else
1039 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
1043 if( status != 0 )
1045 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1046 status = -1;
1049 return status;
1053 int
1054 gmx_fft_3d_real(gmx_fft_t fft,
1055 enum gmx_fft_direction dir,
1056 void * in_data,
1057 void * out_data)
1059 int inplace = (in_data == out_data);
1060 int status = 0;
1061 int i;
1062 int nx,ny,nzc;
1064 nx = fft->nx;
1065 ny = fft->ny;
1066 nzc = fft->nz/2 + 1;
1068 if( (fft->real_fft != 1) || (fft->ndim != 3) ||
1069 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
1071 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1072 return EINVAL;
1075 if(dir==GMX_FFT_REAL_TO_COMPLEX)
1077 if(inplace)
1079 /* real-to-complex in Z dimension, in-place */
1080 status = DftiComputeForward(fft->inplace[2],in_data);
1082 /* complex-to-complex in Y dimension, in-place */
1083 for(i=0;i<nx;i++)
1085 if ( status == 0 )
1086 status = DftiComputeForward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1089 /* complex-to-complex in X dimension, in-place */
1090 if ( status == 0 )
1091 status = DftiComputeForward(fft->inplace[0],in_data);
1093 else
1095 /* real-to-complex in Z dimension, from in_data to out_data */
1096 status = DftiComputeForward(fft->ooplace[2],in_data,out_data);
1098 /* complex-to-complex in Y dimension, in-place */
1099 for(i=0;i<nx;i++)
1101 if ( status == 0 )
1102 status = DftiComputeForward(fft->inplace[1],(t_complex *)out_data+i*ny*nzc);
1105 /* complex-to-complex in X dimension, in-place */
1106 if ( status == 0 )
1107 status = DftiComputeForward(fft->inplace[0],out_data);
1110 else
1112 if(inplace)
1114 /* complex-to-complex in X dimension, in-place */
1115 status = DftiComputeBackward(fft->inplace[0],in_data);
1117 /* complex-to-complex in Y dimension, in-place */
1118 for(i=0;i<nx;i++)
1120 if ( status == 0 )
1121 status = DftiComputeBackward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1124 /* complex-to-real in Z dimension, in-place */
1125 if ( status == 0 )
1126 status = DftiComputeBackward(fft->inplace[2],in_data);
1128 else
1130 /* complex-to-complex in X dimension, from in_data to work */
1131 status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
1133 /* complex-to-complex in Y dimension, in-place */
1134 for(i=0;i<nx;i++)
1136 if ( status == 0 )
1137 status = DftiComputeBackward(fft->inplace[1],fft->work+i*ny*nzc);
1140 /* complex-to-real in Z dimension, work to out_data */
1141 if ( status == 0 )
1142 status = DftiComputeBackward(fft->ooplace[2],fft->work,out_data);
1146 if( status != 0 )
1148 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1149 status = -1;
1152 return status;
1157 void
1158 gmx_fft_destroy(gmx_fft_t fft)
1160 int d;
1162 if(fft != NULL)
1164 for(d=0;d<3;d++)
1166 if(fft->inplace[d] != NULL)
1168 DftiFreeDescriptor(&fft->inplace[d]);
1170 if(fft->ooplace[d] != NULL)
1172 DftiFreeDescriptor(&fft->ooplace[d]);
1175 if(fft->ooplace[3] != NULL)
1177 DftiFreeDescriptor(&fft->ooplace[3]);
1179 free(fft);
1183 #else
1185 gmx_fft_mkl_empty;
1186 #endif /* GMX_FFT_MKL */