Merge branch 'master' of git@git.gromacs.org:gromacs
[gromacs/rigid-bodies.git] / include / gmx_parallel_3dfft.h
blobb5fe72ee554df9ad09cd11760bbf73cc54e17c2d
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs Copyright (c) 1991-2005
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifndef _gmx_parallel_3dfft_h_
20 #define _gmx_parallel_3dfft_h_
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
26 #ifdef GMX_MPI
28 #include "types/simple.h"
29 #include "gmxcomplex.h"
30 #include "gmx_fft.h"
32 /* We NEED MPI here. */
33 #ifdef GMX_LIB_MPI
34 #include <mpi.h>
35 #endif
36 #ifdef GMX_THREADS
37 #include "tmpi.h"
38 #endif
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
44 typedef struct gmx_parallel_3dfft *
45 gmx_parallel_3dfft_t;
49 /*! \brief Initialize parallel MPI-based 3D-FFT.
51 * This routine performs real-to-complex and complex-to-real parallel 3D FFTs,
52 * but not complex-to-complex.
54 * The routine is optimized for small-to-medium size FFTs used for PME and
55 * PPPM algorithms, and do allocate extra workspace whenever it might improve
56 * performance.
58 * \param pfft_setup Pointer to parallel 3dfft setup structure, previously
59 * allocated or with automatic storage.
60 * \param ngridx Global number of grid cells in the x direction. Must be
61 * divisible by the number of nodes.
62 * \param ngridy Global number of grid cells in the y direction. Must be
63 * divisible by the number of nodes.
64 * \param ngridz Global number of grid cells in the z direction.
65 * \param node2slab Node id to slab index array, can be NULL.
66 * \param slab2grid_x Slab index to grid_x array (nnodes+1), can be NULL.
67 * \param comm MPI communicator, must have been initialized.
68 * \param bReproducible Try to avoid FFT timing optimizations and other stuff
69 * that could make results differ for two runs with
70 * identical input (reproducibility for debugging).
72 * \return 0 or a standard error code.
74 int
75 gmx_parallel_3dfft_init (gmx_parallel_3dfft_t * pfft_setup,
76 int ngridx,
77 int ngridy,
78 int ngridz,
79 int *node2slab,
80 int *slab2grid_x,
81 MPI_Comm comm,
82 bool bReproducible);
88 /*! \brief Get direct space grid index limits
90 * The z dimension is never distributed. In the direct space, the x dimension
91 * is distributed over nodes, and after the real-to-complex FFT we work with
92 * a transposed grid where the y dimension is partitioned over nodes.
94 * The node2slab array translates to node ids to slab indices,
95 * when NULL the slab ids are assumed to be identical to the node ids
96 * in the communicator comm.
98 int
99 gmx_parallel_3dfft_limits(gmx_parallel_3dfft_t pfft_setup,
100 int * local_x_start,
101 int * local_nx,
102 int * local_y_start,
103 int * local_ny);
107 gmx_parallel_transpose(t_complex * data,
108 t_complex * work,
109 int nx,
110 int ny,
111 int local_x_start,
112 int local_nx,
113 int local_y_start,
114 int local_ny,
115 int nelem,
116 int nnodes,
117 int *node2slab,
118 MPI_Comm comm);
121 /*! \brief Perform forward parallel MPI FFT.
123 * Direction is either GMX_FFT_REAL_TO_COMPLEX or GMX_FFT_COMPLEX_TO_REAL.
125 * If input and output arrays are separate there is no packing to consider.
126 * Input is simply nx*ny*nz in real, and output ny*nx*nzc in complex.
128 * In they are identical we need to make sure there is room for the complex
129 * (length nzc=nz/2+1) in the array, so the _real_ space dimensions is
130 * always padded to nzc*2.
131 * In this case, the real dimensions are nx*ny*(nzc*2) while the complex
132 * dimensions is ny*nx*nzc (of type complex).
134 * Note that the X and Y dimensions are transposed in the reciprocal space
135 * to avoid extra communication!
137 * The node2slab array translates to node ids to slab indices,
138 * when NULL the slab ids are assumed to be identical to the node ids
139 * in the communicator comm.
142 gmx_parallel_3dfft(gmx_parallel_3dfft_t pfft_setup,
143 enum gmx_fft_direction dir,
144 void * in_data,
145 void * out_data);
149 /*! \brief Release all data in parallel fft setup
151 * All temporary storage and FFT plans are released. The structure itself
152 * is not released, but the contents is invalid after this call.
154 * \param pfft_setup Parallel 3dfft setup.
156 * \return 0 or a standard error code.
159 gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t pfft_setup);
161 #ifdef __cplusplus
163 #endif
165 #endif /* GMX_MPI */
167 #endif /* _gmx_parallel_3dfft_h_ */