Fix for the CUDA version of LINCS
[gromacs.git] / src / gromacs / mdlib / lincs_cuda_impl.h
blob880ae7a045076f01b97c52c3b3c3a9c00f7b3052
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \internal \file
37 * \brief Declares CUDA implementation class for LINCS
39 * This header file is needed to include from both the device-side
40 * kernels file, and the host-side management code.
42 * \author Artem Zhmurov <zhmurov@gmail.com>
44 * \ingroup module_mdlib
46 #ifndef GMX_MDLIB_LINCS_CUDA_IMPL_H
47 #define GMX_MDLIB_LINCS_CUDA_IMPL_H
50 #include "gromacs/mdlib/constr.h"
51 #include "gromacs/mdlib/lincs_cuda.h"
52 #include "gromacs/mdtypes/mdatom.h"
53 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
54 #include "gromacs/topology/idef.h"
56 namespace gmx
59 /* \brief LINCS parameters and GPU pointers
61 * This is used to accumulate all the parameters and pointers so they can be passed
62 * to the GPU as a single structure.
65 struct LincsCudaKernelParameters
67 //! Periodic boundary data
68 PbcAiuc pbcAiuc;
69 //! Order of expansion when inverting the matrix
70 int expansionOrder;
71 //! Number of iterations used to correct the projection
72 int numIterations;
73 //! Coordinates before the timestep (in the GPU memory)
74 float3 *d_x;
75 //! Coordinates after the timestep, before constraining (on GPU)
76 float3 *d_xp;
77 //! Velocities of atoms (on GPU)
78 float3 *d_v;
79 //! 1/mass for all atoms (GPU)
80 float *d_inverseMasses;
81 //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ], GPU)
82 float *d_virialScaled;
83 /*! \brief Total number of threads.
85 * This covers all constraints and gaps in the ends of the thread blocks
86 * that are necessary to avoid inter-block synchronizations.
87 * Should be a multiple of block size (the last block is filled with dummy to the end).
89 int numConstraintsThreads;
90 //! List of constrained atoms (GPU memory)
91 int2 *d_constraints;
92 //! Equilibrium distances for the constraints (GPU)
93 float *d_constraintsTargetLengths;
94 //! Number of constraints, coupled with the current one (GPU)
95 int *d_coupledConstraintsCounts;
96 //! List of coupled with the current one (GPU)
97 int *d_coupledConstraintsIndices;
98 //! Elements of the coupling matrix.
99 float *d_matrixA;
100 //! Mass factors (GPU)
101 float *d_massFactors;
104 /*! \internal \brief Class with interfaces and data for CUDA version of LINCS. */
105 class LincsCuda::Impl
108 public:
110 /*! \brief Constructor.
112 * \param[in] numAtoms Number of atoms
113 * \param[in] numIterations Number of iteration for the correction of the projection.
114 * \param[in] expansionOrder Order of the matrix inversion algorithm.
116 Impl(int numAtoms,
117 int numIterations,
118 int expansionOrder);
119 /*! \brief Destructor.*/
120 ~Impl();
122 /*! \brief Apply LINCS.
124 * Applies LINCS to coordinates and velocities, stored on GPU.
125 * Data at pointers xPrime and v (class fields) change in the GPU
126 * memory. The results are not automatically copied back to the CPU
127 * memory. Method uses this class data structures which should be
128 * updated when needed using update method.
130 * \param[in] updateVelocities If the velocities should be constrained.
131 * \param[in] invdt Reciprocal timestep (to scale Lagrange
132 * multipliers when velocities are updated)
133 * \param[in] computeVirial If virial should be updated.
134 * \param[in,out] virialScaled Scaled virial tensor to be updated.
136 void apply(bool updateVelocities,
137 real invdt,
138 bool computeVirial,
139 tensor virialScaled);
141 /*! \brief
142 * Update data-structures (e.g. after NB search step).
144 * Updates the constraints data and copies it to the GPU. Should be
145 * called if the particles were sorted, redistributed between domains, etc.
146 * This version uses common data formats so it can be called from anywhere
147 * in the code. Does not recycle the data preparation routines from the CPU
148 * version. Works only with simple case when all the constraints in idef are
149 * are handled by a single GPU. Triangles are not handled as special case.
151 * Information about constraints is taken from:
152 * idef.il[F_CONSTR].iatoms --- type (T) of constraint and two atom indexes (i1, i2)
153 * idef.iparams[T].constr.dA --- target length for constraint of type T
154 * From t_mdatom, the code takes:
155 * md.invmass --- array of inverse square root of masses for each atom in the system.
157 * \param[in] idef Local topology data to get information on constraints from.
158 * \param[in] md Atoms data to get atom masses from.
160 void set(const t_idef &idef,
161 const t_mdatoms &md);
163 /*! \brief
164 * Update PBC data.
166 * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
168 * \param[in] pbc The PBC data in t_pbc format.
170 void setPbc(const t_pbc *pbc);
172 /*! \brief
173 * Copy coordinates from provided CPU location to GPU.
175 * Copies the coordinates before the integration step (x) and coordinates
176 * after the integration step (xp) from the provided CPU location to GPU.
177 * The data are assumed to be in float3/fvec format (single precision).
179 * \param[in] h_x CPU pointer where coordinates should be copied from.
180 * \param[in] h_xp CPU pointer where coordinates should be copied from.
182 void copyCoordinatesToGpu(const rvec *h_x, const rvec *h_xp);
184 /*! \brief
185 * Copy velocities from provided CPU location to GPU.
187 * Nothing is done if the argument provided is a nullptr.
188 * The data are assumed to be in float3/fvec format (single precision).
190 * \param[in] h_v CPU pointer where velocities should be copied from.
192 void copyVelocitiesToGpu(const rvec *h_v);
194 /*! \brief
195 * Copy coordinates from GPU to provided CPU location.
197 * Copies the constrained coordinates to the provided location. The coordinates
198 * are assumed to be in float3/fvec format (single precision).
200 * \param[out] h_xp CPU pointer where coordinates should be copied to.
202 void copyCoordinatesFromGpu(rvec *h_xp);
204 /*! \brief
205 * Copy velocities from GPU to provided CPU location.
207 * The velocities are assumed to be in float3/fvec format (single precision).
209 * \param[in] h_v Pointer to velocities data.
211 void copyVelocitiesFromGpu(rvec *h_v);
213 /*! \brief
214 * Set the internal GPU-memory x, xprime and v pointers.
216 * Data is not copied. The data are assumed to be in float3/fvec format
217 * (float3 is used internally, but the data layout should be identical).
219 * \param[in] d_x Pointer to the coordinates before integrator update (on GPU)
220 * \param[in] d_xp Pointer to the coordinates after integrator update, before update (on GPU)
221 * \param[in] d_v Pointer to the velocities before integrator update (on GPU)
223 void setXVPointers(rvec *d_x, rvec *d_xp, rvec *d_v);
225 private:
227 /*! \brief CUDA stream
229 * \todo Currently default stream (nullptr) is used. The streams should be managed outside this module
230 * and passed here if needed.
232 cudaStream_t stream_;
234 //! Parameters and pointers, passed to the CUDA kernel
235 LincsCudaKernelParameters kernelParams_;
237 /*! \brief Number of atoms
239 * \todo Probably, it should not be stored by this class and passed as an argument when needed.
240 * This question is coupled with the general relocation of the coordinates and velocities.
242 int numAtoms_;
244 //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ])
245 std::vector<float> h_virialScaled_;
247 /*! \brief Maximum total of constraints assigned to this class so far.
249 * If the new number of constraints is larger then previous maximum, the GPU data arrays are
250 * reallocated.
252 int maxConstraintsNumberSoFar_;
255 } // namespace gmx
257 #endif