Two sets of coefficients for Coulomb FEP PME on GPU
[gromacs.git] / src / gromacs / ewald / pme_gpu_types.h
blob4274f095d99619e642b4e6d423e161af180a51bc
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief Defines the PME GPU data structures
38 * (the GPU function parameters used both on host and device sides).
40 * \author Aleksei Iupinov <a.yupinov@gmail.com>
41 * \ingroup module_ewald
44 #ifndef GMX_EWALD_PME_GPU_TYPES_H
45 #define GMX_EWALD_PME_GPU_TYPES_H
48 * In OpenCL, the structures must be laid out on the host and device exactly the same way.
49 * If something is off, one might get an error CL_INVALID_ARG_SIZE if any structure's sizes don't
50 * match. What's worse, structures might be of same size but members might be aligned differently,
51 * resulting in wrong kernel results. The structures below are aligned manually.
52 * The pattern is ordering the members of structs from smallest to largest sizeof
53 * (arrays behave the same way as sequences of separate fields),
54 * as described in "The Lost Art of C Structure Packing".
56 * However, if the need arises at some point, they can all be aligned forcefully:
58 * #define GMX_GPU_ALIGNED __attribute__ ((aligned(8)))
59 * struct GMX_GPU_ALIGNED PmeGpuConstParams
60 * struct GMX_GPU_ALIGNED PmeGpuGridParams
61 * etc...
63 * One might also try __attribute__ ((packed)), but it doesn't work with DeviceBuffer,
64 * as it appears to not be POD.
68 /*! \brief A workaround to hide DeviceBuffer template from OpenCL kernel compilation
69 * - to turn it into a dummy of the same size as host implementation of device buffer.
70 * As we only care about 64-bit, 8 bytes is fine.
71 * TODO: what we should be doing is providing separate device-side views of the same structures -
72 * then there would be no need for macro.
74 #ifndef __OPENCL_C_VERSION__
75 # include "gromacs/gpu_utils/devicebuffer.h"
76 # define HIDE_FROM_OPENCL_COMPILER(x) x
77 static_assert(sizeof(DeviceBuffer<float>) == 8,
78 "DeviceBuffer is defined as an 8 byte stub for OpenCL C");
79 static_assert(sizeof(DeviceBuffer<int>) == 8,
80 "DeviceBuffer is defined as an 8 byte stub for OpenCL C");
81 #else
82 # define HIDE_FROM_OPENCL_COMPILER(x) char8
83 #endif
85 #ifndef NUMFEPSTATES
86 //! Number of FEP states.
87 # define NUMFEPSTATES 2
88 #endif
90 /* What follows is all the PME GPU function arguments,
91 * sorted into several device-side structures depending on the update rate.
92 * This is GPU agnostic (float3 replaced by float[3], etc.).
93 * The GPU-framework specifics (e.g. cudaTextureObject_t handles) are described
94 * in the larger structure PmeGpuCudaKernelParams in the pme.cuh.
97 /*! \internal \brief
98 * A GPU data structure for storing the constant PME data.
99 * This only has to be initialized once.
101 struct PmeGpuConstParams
103 /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
104 float elFactor;
105 /*! \brief Virial and energy GPU array. Size is c_virialAndEnergyCount (7) floats.
106 * The element order is virxx, viryy, virzz, virxy, virxz, viryz, energy. */
107 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_virialAndEnergy[NUMFEPSTATES];
110 /*! \internal \brief
111 * A GPU data structure for storing the PME data related to the grid sizes and cut-off.
112 * This only has to be updated at every DD step.
114 struct PmeGpuGridParams
116 /*! \brief Ewald solving factor = (M_PI / pme->ewaldcoeff_q)^2 */
117 float ewaldFactor;
119 /* Grid sizes */
120 /*! \brief Real-space grid data dimensions. */
121 int realGridSize[DIM];
122 /*! \brief Real-space grid dimensions, only converted to floating point. */
123 float realGridSizeFP[DIM];
124 /*! \brief Real-space grid dimensions (padded). The padding as compared to realGridSize includes the (order - 1) overlap. */
125 int realGridSizePadded[DIM]; /* Is major dimension of this ever used in kernels? */
126 /*! \brief Fourier grid dimensions. This counts the complex numbers! */
127 int complexGridSize[DIM];
128 /*! \brief Fourier grid dimensions (padded). This counts the complex numbers! */
129 int complexGridSizePadded[DIM];
131 /*! \brief Offsets for X/Y/Z components of d_splineModuli */
132 int splineValuesOffset[DIM];
133 /*! \brief Offsets for X/Y/Z components of d_fractShiftsTable and d_gridlineIndicesTable */
134 int tablesOffsets[DIM];
136 /* Grid arrays */
137 /*! \brief Real space grid. */
138 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_realGrid[NUMFEPSTATES];
139 /*! \brief Complex grid - used in FFT/solve. If inplace cu/clFFT is used, then it is the same handle as realGrid. */
140 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_fourierGrid[NUMFEPSTATES];
142 /*! \brief Grid spline values as in pme->bsp_mod
143 * (laid out sequentially (XXX....XYYY......YZZZ.....Z))
145 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_splineModuli[NUMFEPSTATES];
146 /*! \brief Fractional shifts lookup table as in pme->fshx/fshy/fshz, laid out sequentially (XXX....XYYY......YZZZ.....Z) */
147 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_fractShiftsTable;
148 /*! \brief Gridline indices lookup table
149 * (modulo lookup table as in pme->nnx/nny/nnz, laid out sequentially (XXX....XYYY......YZZZ.....Z)) */
150 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<int>) d_gridlineIndicesTable;
153 /*! \internal \brief
154 * A GPU data structure for storing the PME data of the atoms, local to this process' domain
155 * partition. This only has to be updated every DD step.
157 struct PmeGpuAtomParams
159 /*! \brief Number of local atoms */
160 int nAtoms;
161 /*! \brief Global GPU memory array handle with input rvec atom coordinates.
162 * The coordinates themselves change and need to be copied to the GPU for every PME computation,
163 * but reallocation happens only at DD.
165 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<gmx::RVec>) d_coordinates;
166 /*! \brief Global GPU memory array handle with input atom charges in states A and B.
167 * The charges only need to be reallocated and copied to the GPU at DD step.
169 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_coefficients[NUMFEPSTATES];
170 /*! \brief Global GPU memory array handle with input/output rvec atom forces.
171 * The forces change and need to be copied from (and possibly to) the GPU for every PME
172 * computation, but reallocation happens only at DD.
174 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_forces;
175 /*! \brief Global GPU memory array handle with ivec atom gridline indices.
176 * Computed on GPU in the spline calculation part.
178 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<int>) d_gridlineIndices;
179 /* B-spline parameters are computed entirely on GPU for every PME computation, not copied.
180 * Unless we want to try something like GPU spread + CPU gather?
182 /*! \brief Global GPU memory array handle with B-spline values */
183 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_theta;
184 /*! \brief Global GPU memory array handle with B-spline derivative values */
185 HIDE_FROM_OPENCL_COMPILER(DeviceBuffer<float>) d_dtheta;
188 /*! \internal \brief
189 * A GPU data structure for storing the PME data which might change for each new PME computation.
191 struct PmeGpuDynamicParams
193 /* The box parameters. The box only changes size with pressure coupling enabled. */
194 /*! \brief
195 * Reciprocal (inverted unit cell) box.
197 * The box is transposed as compared to the CPU pme->recipbox.
198 * Basically, spread uses matrix columns (while solve and gather use rows).
199 * This storage format might be not the most optimal since the box is always triangular so there are zeroes.
201 float recipBox[DIM][DIM];
202 /*! \brief The unit cell volume for solving. */
203 float boxVolume;
205 /*! \brief The current coefficient scaling value. */
206 float scale;
209 /*! \internal \brief
210 * A single structure encompassing all the PME data used in GPU kernels on device.
211 * To extend the list with platform-specific parameters, this can be inherited by the
212 * GPU framework-specific structure.
214 struct PmeGpuKernelParamsBase
216 /*! \brief Constant data that is set once. */
217 struct PmeGpuConstParams constants;
218 /*! \brief Data dependent on the grid size/cutoff. */
219 struct PmeGpuGridParams grid;
220 /*! \brief Data dependent on the DD and local atoms. */
221 struct PmeGpuAtomParams atoms;
222 /*! \brief Data that possibly changes for every new PME computation.
223 * This should be kept up-to-date by calling pme_gpu_prepare_computation(...)
224 * before launching spreading.
226 struct PmeGpuDynamicParams current;
227 /* These texture objects are only used in CUDA and are related to the grid size. */
228 /*! \brief Texture object for accessing grid.d_fractShiftsTable */
229 HIDE_FROM_OPENCL_COMPILER(DeviceTexture) fractShiftsTableTexture;
230 /*! \brief Texture object for accessing grid.d_gridlineIndicesTable */
231 HIDE_FROM_OPENCL_COMPILER(DeviceTexture) gridlineIndicesTableTexture;
234 #endif