src/gromacs/mdlib/lincs_cuda_impl.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *
  37  * \brief Declares CUDA implementation class for LINCS
  38  *
  39  * This header file is needed to include from both the device-side
  40  * kernels file, and the host-side management code.
  41  *
  42  * \author Artem Zhmurov <zhmurov@gmail.com>
  43  *
  44  * \ingroup module_mdlib
  45  */
  46 #ifndef GMX_MDLIB_LINCS_CUDA_IMPL_H
  47 #define GMX_MDLIB_LINCS_CUDA_IMPL_H
  48
  49
  50 #include "gromacs/mdlib/constr.h"
  51 #include "gromacs/mdlib/lincs_cuda.h"
  52 #include "gromacs/mdtypes/mdatom.h"
  53 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
  54 #include "gromacs/topology/idef.h"
  55
  56 namespace gmx
  57 {
  58
  59 /* \brief LINCS parameters and GPU pointers
  60  *
  61  * This is used to accumulate all the parameters and pointers so they can be passed
  62  * to the GPU as a single structure.
  63  *
  64  */
  65 struct LincsCudaKernelParameters
  66 {
  67     //! Periodic boundary data
  68     PbcAiuc             pbcAiuc;
  69     //! Order of expansion when inverting the matrix
  70     int                 expansionOrder;
  71     //! Number of iterations used to correct the projection
  72     int                 numIterations;
  73     //! Coordinates before the timestep (in the GPU memory)
  74     float3             *d_x;
  75     //! Coordinates after the timestep, before constraining (on GPU)
  76     float3             *d_xp;
  77     //! Velocities of atoms (on GPU)
  78     float3             *d_v;
  79     //! 1/mass for all atoms (GPU)
  80     float              *d_inverseMasses;
  81     //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ], GPU)
  82     float              *d_virialScaled;
  83     /*! \brief Total number of threads.
  84      *
  85      *  This covers all constraints and gaps in the ends of the thread blocks
  86      *  that are necessary to avoid inter-block synchronizations.
  87      *  Should be a multiple of block size (the last block is filled with dummy to the end).
  88      */
  89     int                 numConstraintsThreads;
  90     //! List of constrained atoms (GPU memory)
  91     int2               *d_constraints;
  92     //! Equilibrium distances for the constraints (GPU)
  93     float              *d_constraintsTargetLengths;
  94     //! Number of constraints, coupled with the current one (GPU)
  95     int                *d_coupledConstraintsCounts;
  96     //! List of coupled with the current one (GPU)
  97     int                *d_coupledConstraintsIndices;
  98     //! Elements of the coupling matrix.
  99     float              *d_matrixA;
 100     //! Mass factors (GPU)
 101     float              *d_massFactors;
 102 };
 103
 104 /*! \internal \brief Class with interfaces and data for CUDA version of LINCS. */
 105 class LincsCuda::Impl
 106 {
 107
 108     public:
 109
 110         /*! \brief Constructor.
 111          *
 112          * \param[in] numAtoms         Number of atoms
 113          * \param[in] numIterations    Number of iteration for the correction of the projection.
 114          * \param[in] expansionOrder   Order of the matrix inversion algorithm.
 115          */
 116         Impl(int numAtoms,
 117              int numIterations,
 118              int expansionOrder);
 119         /*! \brief Destructor.*/
 120         ~Impl();
 121
 122         /*! \brief Apply LINCS.
 123          *
 124          * Applies LINCS to coordinates and velocities, stored on GPU.
 125          * Data at pointers xPrime and v (class fields) change in the GPU
 126          * memory. The results are not automatically copied back to the CPU
 127          * memory. Method uses this class data structures which should be
 128          * updated when needed using update method.
 129          *
 130          * \param[in] updateVelocities  If the velocities should be constrained.
 131          * \param[in] invdt             Reciprocal timestep (to scale Lagrange
 132          *                              multipliers when velocities are updated)
 133          * \param[in] computeVirial     If virial should be updated.
 134          * \param[in,out] virialScaled  Scaled virial tensor to be updated.
 135          */
 136         void apply(bool    updateVelocities,
 137                    real    invdt,
 138                    bool    computeVirial,
 139                    tensor  virialScaled);
 140
 141         /*! \brief
 142          * Update data-structures (e.g. after NB search step).
 143          *
 144          * Updates the constraints data and copies it to the GPU. Should be
 145          * called if the particles were sorted, redistributed between domains, etc.
 146          * This version uses common data formats so it can be called from anywhere
 147          * in the code. Does not recycle the data preparation routines from the CPU
 148          * version. Works only with simple case when all the constraints in idef are
 149          * are handled by a single GPU. Triangles are not handled as special case.
 150          *
 151          * Information about constraints is taken from:
 152          *     idef.il[F_CONSTR].iatoms  --- type (T) of constraint and two atom indexes (i1, i2)
 153          *     idef.iparams[T].constr.dA --- target length for constraint of type T
 154          * From t_mdatom, the code takes:
 155          *     md.invmass  --- array of inverse square root of masses for each atom in the system.
 156          *
 157          * \param[in] idef  Local topology data to get information on constraints from.
 158          * \param[in] md    Atoms data to get atom masses from.
 159          */
 160         void set(const t_idef    &idef,
 161                  const t_mdatoms &md);
 162
 163         /*! \brief
 164          * Update PBC data.
 165          *
 166          * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter.
 167          *
 168          * \param[in] pbc The PBC data in t_pbc format.
 169          */
 170         void setPbc(const t_pbc *pbc);
 171
 172         /*! \brief
 173          * Copy coordinates from provided CPU location to GPU.
 174          *
 175          * Copies the coordinates before the integration step (x) and coordinates
 176          * after the integration step (xp) from the provided CPU location to GPU.
 177          * The data are assumed to be in float3/fvec format (single precision).
 178          *
 179          * \param[in] h_x   CPU pointer where coordinates should be copied from.
 180          * \param[in] h_xp  CPU pointer where coordinates should be copied from.
 181          */
 182         void copyCoordinatesToGpu(const rvec *h_x, const rvec *h_xp);
 183
 184         /*! \brief
 185          * Copy velocities from provided CPU location to GPU.
 186          *
 187          * Nothing is done if the argument provided is a nullptr.
 188          * The data are assumed to be in float3/fvec format (single precision).
 189          *
 190          * \param[in] h_v  CPU pointer where velocities should be copied from.
 191          */
 192         void copyVelocitiesToGpu(const rvec *h_v);
 193
 194         /*! \brief
 195          * Copy coordinates from GPU to provided CPU location.
 196          *
 197          * Copies the constrained coordinates to the provided location. The coordinates
 198          * are assumed to be in float3/fvec format (single precision).
 199          *
 200          * \param[out] h_xp CPU pointer where coordinates should be copied to.
 201          */
 202         void copyCoordinatesFromGpu(rvec *h_xp);
 203
 204         /*! \brief
 205          * Copy velocities from GPU to provided CPU location.
 206          *
 207          * The velocities are assumed to be in float3/fvec format (single precision).
 208          *
 209          * \param[in] h_v  Pointer to velocities data.
 210          */
 211         void copyVelocitiesFromGpu(rvec *h_v);
 212
 213         /*! \brief
 214          * Set the internal GPU-memory x, xprime and v pointers.
 215          *
 216          * Data is not copied. The data are assumed to be in float3/fvec format
 217          * (float3 is used internally, but the data layout should be identical).
 218          *
 219          * \param[in] d_x   Pointer to the coordinates before integrator update (on GPU)
 220          * \param[in] d_xp  Pointer to the coordinates after integrator update, before update (on GPU)
 221          * \param[in] d_v   Pointer to the velocities before integrator update (on GPU)
 222          */
 223         void setXVPointers(rvec *d_x, rvec *d_xp, rvec *d_v);
 224
 225     private:
 226
 227         /*! \brief CUDA stream
 228          *
 229          *  \todo Currently default stream (nullptr) is used. The streams should be managed outside this module
 230          *        and passed here if needed.
 231          */
 232         cudaStream_t              stream_;
 233
 234         //! Parameters and pointers, passed to the CUDA kernel
 235         LincsCudaKernelParameters kernelParams_;
 236
 237         /*! \brief Number of atoms
 238          *
 239          * \todo Probably, it should not be stored by this class and passed as an argument when needed.
 240          *       This question is coupled with the general relocation of the coordinates and velocities.
 241          */
 242         int                       numAtoms_;
 243
 244         //! Scaled virial tensor (6 floats: [XX, XY, XZ, YY, YZ, ZZ])
 245         std::vector<float>        h_virialScaled_;
 246
 247         /*! \brief Maximum total of constraints assigned to this class so far.
 248          *
 249          * If the new number of constraints is larger then previous maximum, the GPU data arrays are
 250          * reallocated.
 251          */
 252         int                       maxConstraintsNumberSoFar_;
 253 };
 254
 255 } // namespace gmx
 256
 257 #endif