From 1c8eb7c5398332b4d485f2cba086c3d8d80068c2 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Mon, 18 Mar 2019 19:03:59 +0100 Subject: [PATCH] Combine CUDA Leap-Frog, LINCS and SETTLE. I. This is the first step in combining constraints and integrator into "UpdateAndConstraints" module. The initial merge does not imply any performance optimisation or code clean-up. Hence, this patch keeps all the temporary infrastructure that was built around SETTLE, LINCS and Leap-Frog to allow them to function as a separate units. In the following commits, this infrastructure will be removed and these three implementations will be more closely integrated. To enable, set GMX_UPDATE_CONSTRAIN_GPU environment variable. Note, that environment variables GMX_LINCS_GPU, GMX_SETTLE_GPU and GMX_INTEGRATE_GPU will no longer work. Refs #2816, #2888 Change-Id: I8730aad0ecaa0230686fe89d1157b0da2f01f7bc --- src/gromacs/mdlib/CMakeLists.txt | 8 +- src/gromacs/mdlib/constr.cpp | 144 +++------- src/gromacs/mdlib/update_constrain_cuda.h | 188 +++++++++++++ src/gromacs/mdlib/update_constrain_cuda_impl.cpp | 139 ++++++++++ src/gromacs/mdlib/update_constrain_cuda_impl.cu | 335 +++++++++++++++++++++++ src/gromacs/mdlib/update_constrain_cuda_impl.h | 215 +++++++++++++++ src/gromacs/mdrun/md.cpp | 73 +++-- 7 files changed, 954 insertions(+), 148 deletions(-) create mode 100644 src/gromacs/mdlib/update_constrain_cuda.h create mode 100644 src/gromacs/mdlib/update_constrain_cuda_impl.cpp create mode 100644 src/gromacs/mdlib/update_constrain_cuda_impl.cu create mode 100644 src/gromacs/mdlib/update_constrain_cuda_impl.h diff --git a/src/gromacs/mdlib/CMakeLists.txt b/src/gromacs/mdlib/CMakeLists.txt index e613fecab6..d543f04651 100644 --- a/src/gromacs/mdlib/CMakeLists.txt +++ b/src/gromacs/mdlib/CMakeLists.txt @@ -40,14 +40,10 @@ if (BUILD_TESTING) endif() if(GMX_USE_CUDA) gmx_add_libgromacs_sources( + leapfrog_cuda_impl.cu lincs_cuda_impl.cu settle_cuda_impl.cu - ) -endif() - -if(GMX_USE_CUDA) - gmx_add_libgromacs_sources( - leapfrog_cuda_impl.cu + update_constrain_cuda_impl.cu ) endif() diff --git a/src/gromacs/mdlib/constr.cpp b/src/gromacs/mdlib/constr.cpp index 3e2673ce75..fe61663bff 100644 --- a/src/gromacs/mdlib/constr.cpp +++ b/src/gromacs/mdlib/constr.cpp @@ -62,9 +62,7 @@ #include "gromacs/math/vec.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdlib/lincs.h" -#include "gromacs/mdlib/lincs_cuda.h" #include "gromacs/mdlib/settle.h" -#include "gromacs/mdlib/settle_cuda.h" #include "gromacs/mdlib/shake.h" #include "gromacs/mdtypes/commrec.h" #include "gromacs/mdtypes/inputrec.h" @@ -89,11 +87,6 @@ namespace gmx { -//! Whether the GPU version of LINCS should be used. -static const bool c_useGpuLincs = (getenv("GMX_LINCS_GPU") != nullptr); -//! Whether the GPU version of SETTLE should be used. -static const bool c_useGpuSettle = (getenv("GMX_SETTLE_GPU") != nullptr); - /* \brief Impl class for Constraints * * \todo Members like md, idef are valid only for the lifetime of a @@ -185,15 +178,11 @@ class Constraints::Impl /*!\brief Input options. * * \todo Replace with IMdpOptions */ - const t_inputrec &ir; + const t_inputrec &ir; //! Flop counting support. - t_nrnb *nrnb = nullptr; + t_nrnb *nrnb = nullptr; //! Tracks wallcycle usage. - gmx_wallcycle *wcycle; - //! Valid LINCS CUDA object when that implementation is being used, nullptr otherwise. - std::unique_ptr lincsCuda; - //! SETTLE CUDA object or a dummy if CUDA is not enabled for SETTLE. - std::unique_ptr settleCuda; + gmx_wallcycle *wcycle; }; Constraints::~Constraints() = default; @@ -493,25 +482,6 @@ Constraints::Impl::apply(bool bLog, bDump = TRUE; } } - else - { - if (lincsCuda != nullptr && c_useGpuLincs) - { - GMX_RELEASE_ASSERT(ir.efep == efepNO || dvdlambda == nullptr, - "Free energy perturbation is not supported by the GPU version of LINCS.\n"); - lincsCuda->setPbc(pbc_null); - lincsCuda->copyCoordinatesToGpu(x, xprime); - lincsCuda->copyVelocitiesToGpu(v); - lincsCuda->apply(v != nullptr, - invdt, - vir != nullptr, vir_r_m_dr); - lincsCuda->copyCoordinatesFromGpu(xprime); - if (v != nullptr) - { - lincsCuda->copyVelocitiesFromGpu(v); - } - } - } if (shaked != nullptr) { @@ -540,61 +510,41 @@ Constraints::Impl::apply(bool bLog, switch (econq) { case ConstraintVariable::Positions: - // If settled was initialized, use CPU version of settle - if (settled != nullptr) - { #pragma omp parallel for num_threads(nth) schedule(static) - for (th = 0; th < nth; th++) + for (th = 0; th < nth; th++) + { + try { - try + if (th > 0) { - if (th > 0) - { - clear_mat(vir_r_m_dr_th[th]); - } - - csettle(settled, - nth, th, - pbc_null, - x[0], xprime[0], - invdt, v ? v[0] : nullptr, - vir != nullptr, - th == 0 ? vir_r_m_dr : vir_r_m_dr_th[th], - th == 0 ? &bSettleErrorHasOccurred0 : &bSettleErrorHasOccurred[th]); + clear_mat(vir_r_m_dr_th[th]); } - GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; - } - inc_nrnb(nrnb, eNR_SETTLE, nsettle); - if (v != nullptr) - { - inc_nrnb(nrnb, eNR_CONSTR_V, nsettle*3); - } - if (vir != nullptr) - { - inc_nrnb(nrnb, eNR_CONSTR_VIR, nsettle*3); + + csettle(settled, + nth, th, + pbc_null, + x[0], xprime[0], + invdt, v ? v[0] : nullptr, + vir != nullptr, + th == 0 ? vir_r_m_dr : vir_r_m_dr_th[th], + th == 0 ? &bSettleErrorHasOccurred0 : &bSettleErrorHasOccurred[th]); } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } - else + inc_nrnb(nrnb, eNR_SETTLE, nsettle); + if (v != nullptr) { - // If CPU version of SETTLE was not initialized, GPU version should have being. - GMX_ASSERT(settleCuda != nullptr, "There are settles, but nither CPU nor CUDA version of SETTLE was initialized."); - settleCuda->setPbc(pbc_null); - settleCuda->copyCoordinatesToGpu(x, xprime); - settleCuda->copyVelocitiesToGpu(v); - settleCuda->apply(v != nullptr, invdt, - vir != nullptr, vir_r_m_dr); - settleCuda->copyCoordinatesFromGpu(xprime); - if (v != nullptr) - { - settleCuda->copyVelocitiesFromGpu(v); - } + inc_nrnb(nrnb, eNR_CONSTR_V, nsettle*3); + } + if (vir != nullptr) + { + inc_nrnb(nrnb, eNR_CONSTR_VIR, nsettle*3); } break; case ConstraintVariable::Velocities: case ConstraintVariable::Derivative: case ConstraintVariable::Force: case ConstraintVariable::ForceDispl: - GMX_RELEASE_ASSERT(settled != nullptr, "SETTLE projection correction is not implemented on GPU."); #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { @@ -967,14 +917,7 @@ Constraints::Impl::setConstraints(const gmx_localtop_t &top, */ if (ir.eConstrAlg == econtLINCS) { - if (c_useGpuLincs) - { - lincsCuda->set(top.idef, md); - } - else - { - set_lincs(top.idef, md, EI_DYNAMICS(ir.eI), cr, lincsd); - } + set_lincs(top.idef, md, EI_DYNAMICS(ir.eI), cr, lincsd); } if (ir.eConstrAlg == econtSHAKE) { @@ -996,10 +939,6 @@ Constraints::Impl::setConstraints(const gmx_localtop_t &top, settle_set_constraints(settled, &idef->il[F_SETTLE], md); } - else if (settleCuda != nullptr && c_useGpuSettle) - { - settleCuda->set(top.idef, md); - } /* Make a selection of the local atoms for essential dynamics */ if (ed && cr->dd) @@ -1128,20 +1067,10 @@ Constraints::Impl::Impl(const gmx_mtop_t &mtop_p, if (ir.eConstrAlg == econtLINCS) { - if (c_useGpuLincs) - { - fprintf(log, "Initializing LINCS on a GPU for %d atoms\n", mtop.natoms); - GMX_RELEASE_ASSERT(nflexcon == 0, "Flexible constraints are not supported by the GPU-based implementation of LINCS.\n"); - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), "CUDA version of LINCS is not supported with domain decomposition"); - lincsCuda = std::make_unique(mtop.natoms, ir.nLincsIter, ir.nProjOrder); - } - else - { - lincsd = init_lincs(log, mtop, - nflexcon, at2con_mt, - DOMAINDECOMP(cr) && cr->dd->splitConstraints, - ir.nLincsIter, ir.nProjOrder); - } + lincsd = init_lincs(log, mtop, + nflexcon, at2con_mt, + DOMAINDECOMP(cr) && cr->dd->splitConstraints, + ir.nLincsIter, ir.nProjOrder); } if (ir.eConstrAlg == econtSHAKE) @@ -1170,17 +1099,8 @@ Constraints::Impl::Impl(const gmx_mtop_t &mtop_p, bInterCGsettles = inter_charge_group_settles(mtop); - if (c_useGpuSettle) - { - fprintf(log, "Initializing SETTLE on a GPU for %d atoms\n", mtop.natoms); - GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr), "CUDA version of SETTLE is not supported with domain decomposition"); - gmx_omp_nthreads_set(emntSETTLE, 1); - settleCuda = std::make_unique(mtop.natoms, mtop); - } - else - { - settled = settle_init(mtop); - } + settled = settle_init(mtop); + /* Make an atom to settle index for use in domain decomposition */ for (size_t mt = 0; mt < mtop.moltype.size(); mt++) { diff --git a/src/gromacs/mdlib/update_constrain_cuda.h b/src/gromacs/mdlib/update_constrain_cuda.h new file mode 100644 index 0000000000..f80b7c91dd --- /dev/null +++ b/src/gromacs/mdlib/update_constrain_cuda.h @@ -0,0 +1,188 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \libinternal \file + * + * \brief Declaration of high-level functions of CUDA implementation of update and constrain class. + * + * \todo This should only list interfaces needed for libgromacs clients (e.g. + * management of coordinates, velocities and forces should not be here). + * \todo Change "cuda" suffix to "gpu" + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + * \inlibraryapi + */ +#ifndef GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_H +#define GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_H + +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/mdatom.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/topology/idef.h" +#include "gromacs/utility/classhelpers.h" + +namespace gmx +{ + +class UpdateConstrainCuda +{ + + public: + /*! \brief Create Update-Constrain object. + * + * \param[in] numAtoms Number of atoms. + * + * \param[in] ir Input record data: LINCS takes number of iterations and order of + * projection from it. + * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms + * and target O-H and H-H distances from this object. + */ + UpdateConstrainCuda(int numAtoms, + const t_inputrec &ir, + const gmx_mtop_t &mtop); + + ~UpdateConstrainCuda(); + + /*! \brief Integrate + * + * Integrates the equation of motion using Leap-Frog algorithm and applies + * LINCS and SETTLE constraints. + * Updates d_xp_ and d_v_ fields of this object. + * + * \param[in] dt Timestep + * \param[in] updateVelocities If the velocities should be constrained. + * \param[in] computeVirial If virial should be updated. + * \param[out] virial Place to save virial tensor. + */ + void integrate(real dt, + bool updateVelocities, + bool computeVirial, + tensor virial); + + /*! \brief + * Update data-structures (e.g. after NB search step). + * + * \param[in] idef System topology + * \param[in] md Atoms data. Can be used to update masses if needed (not used now). + */ + void set(const t_idef &idef, + const t_mdatoms &md); + + /*! \brief + * Update PBC data. + * + * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter. + * + * \param[in] pbc The PBC data in t_pbc format. + */ + void setPbc(const t_pbc *pbc); + + /*! \brief + * Copy coordinates from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_x CPU pointer where coordinates should be copied from. + */ + void copyCoordinatesToGpu(const rvec *h_x); + + /*! \brief + * Copy velocities from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v CPU pointer where velocities should be copied from. + */ + void copyVelocitiesToGpu(const rvec *h_v); + + /*! \brief + * Copy forces from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f CPU pointer where forces should be copied from. + */ + void copyForcesToGpu(const rvec *h_f); + + /*! \brief + * Copy coordinates from GPU to CPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[out] h_xp CPU pointer where coordinates should be copied to. + */ + void copyCoordinatesFromGpu(rvec *h_xp); + + /*! \brief + * Copy velocities from GPU to CPU. + * + * The velocities are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v Pointer to velocities data. + */ + void copyVelocitiesFromGpu(rvec *h_v); + + /*! \brief + * Copy forces from GPU to CPU. + * + * The forces are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f Pointer to forces data. + */ + void copyForcesFromGpu(rvec *h_f); + + /*! \brief + * Set the internal GPU-memory x, xprime and v pointers. + * + * Data is not copied. The data are assumed to be in float3/fvec format + * (float3 is used internally, but the data layout should be identical). + * + * \param[in] d_x Pointer to the coordinates for the input (on GPU) + * \param[in] d_xp Pointer to the coordinates for the output (on GPU) + * \param[in] d_v Pointer to the velocities (on GPU) + * \param[in] d_f Pointer to the forces (on GPU) + */ + void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f); + + private: + class Impl; + gmx::PrivateImplPointer impl_; + +}; + +} //namespace gmx + +#endif diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.cpp b/src/gromacs/mdlib/update_constrain_cuda_impl.cpp new file mode 100644 index 0000000000..fd12faacf0 --- /dev/null +++ b/src/gromacs/mdlib/update_constrain_cuda_impl.cpp @@ -0,0 +1,139 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Stub for update and constraints class CPU implementation. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "config.h" + +#include "gromacs/mdlib/update_constrain_cuda.h" + +#if GMX_GPU != GMX_GPU_CUDA + +namespace gmx +{ + +/*!\brief Impl class stub. */ +class UpdateConstrainCuda::Impl +{ +}; + +/*!\brief Constructor stub. */ +UpdateConstrainCuda::UpdateConstrainCuda(gmx_unused int numAtoms, + gmx_unused const t_inputrec &ir, + gmx_unused const gmx_mtop_t &mtop) + : impl_(nullptr) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +UpdateConstrainCuda::~UpdateConstrainCuda() = default; + +/*!\brief integrate stub. */ +void UpdateConstrainCuda::integrate(gmx_unused const real dt, + gmx_unused const bool updateVelocities, + gmx_unused const bool computeVirial, + gmx_unused tensor virialScaled) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*!\brief Set method stub. */ +void UpdateConstrainCuda::set(gmx_unused const t_idef &idef, + gmx_unused const t_mdatoms &md) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*!\brief Set PBC stub. */ +void UpdateConstrainCuda::setPbc(gmx_unused const t_pbc *pbc) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy coordinates from provided CPU location to GPU stub. */ +void UpdateConstrainCuda::copyCoordinatesToGpu(gmx_unused const rvec *h_x) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy velocities from provided CPU location to GPU stub. */ +void UpdateConstrainCuda::copyVelocitiesToGpu(gmx_unused const rvec *h_v) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy forces from CPU to GPU stub. */ +void UpdateConstrainCuda::copyForcesToGpu(gmx_unused const rvec *h_f) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy coordinates from GPU to provided CPU location stub. */ +void UpdateConstrainCuda::copyCoordinatesFromGpu(gmx_unused rvec *h_xp) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy velocities from GPU to provided CPU location stub. */ +void UpdateConstrainCuda::copyVelocitiesFromGpu(gmx_unused rvec *h_v) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Copy forces from GPU to CPU stub. */ +void UpdateConstrainCuda::copyForcesFromGpu(gmx_unused rvec *h_f) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +/*! \brief Set the internal GPU-memory x, xprime and v pointers stub. */ +void UpdateConstrainCuda::setXVFPointers(gmx_unused rvec *d_x, + gmx_unused rvec *d_xp, + gmx_unused rvec *d_v, + gmx_unused rvec *d_f) +{ + GMX_ASSERT(false, "A CPU stub for UpdateConstrain was called insted of the correct implementation."); +} + +} // namespace gmx + +#endif /* GMX_GPU != GMX_GPU_CUDA */ diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.cu b/src/gromacs/mdlib/update_constrain_cuda_impl.cu new file mode 100644 index 0000000000..957963f49c --- /dev/null +++ b/src/gromacs/mdlib/update_constrain_cuda_impl.cu @@ -0,0 +1,335 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Implements update and constraints class using CUDA. + * + * The class combines Leap-Frog integrator with LINCS and SETTLE constraints. + * + * \todo This class should take over the management of coordinates, velocities + * forces, virial, and PBC from its members (i.e. from Leap-Frog, LINCS + * and SETTLE). + * \todo The computational procedures in members should be integrated to improve + * computational performance. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ +#include "gmxpre.h" + +#include "update_constrain_cuda_impl.h" + +#include +#include + +#include + +#include + +#include "gromacs/gpu_utils/cudautils.cuh" +#include "gromacs/gpu_utils/devicebuffer.cuh" +#include "gromacs/gpu_utils/gputraits.cuh" +#include "gromacs/gpu_utils/vectype_ops.cuh" +#include "gromacs/math/vec.h" +#include "gromacs/mdlib/leapfrog_cuda.h" +#include "gromacs/mdlib/lincs_cuda.h" +#include "gromacs/mdlib/settle_cuda.h" +#include "gromacs/mdlib/update_constrain_cuda.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh" + +namespace gmx +{ + +/*! \brief Integrate + * + * Integrates the equation of motion using Leap-Frog algorithm and applies + * LINCS and SETTLE constraints. + * Updates d_xp_ and d_v_ fields of this object. + * + * \param[in] dt Timestep + * \param[in] updateVelocities If the velocities should be constrained. + * \param[in] computeVirial If virial should be updated. + * \param[out] virial Place to save virial tensor. + */ +void UpdateConstrainCuda::Impl::integrate(const real dt, + const bool updateVelocities, + const bool computeVirial, + tensor virial) +{ + // Clearing virial matrix + // TODO There is no point in having saparate virial matrix for constraints + clear_mat(virial); + + integrator_->integrate(dt); + lincsCuda_->apply(updateVelocities, 1.0/dt, computeVirial, virial); + settleCuda_->apply(updateVelocities, 1.0/dt, computeVirial, virial); + + // scaledVirial -> virial (methods above returns scaled values) + float scaleFactor = 0.5f/(dt*dt); + for (int i = 0; i < DIM; i++) + { + for (int j = 0; j < DIM; j++) + { + virial[i][j] = scaleFactor*virial[i][j]; + } + } + + return; +} + +/*! \brief Create Update-Constrain object + * + * \param[in] numAtoms Number of atoms. + * \param[in] ir Input record data: LINCS takes number of iterations and order of + * projection from it. + * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms + * and target O-H and H-H distances from this object. + */ +UpdateConstrainCuda::Impl::Impl(int numAtoms, + const t_inputrec &ir, + const gmx_mtop_t &mtop) + : numAtoms_(numAtoms) +{ + allocateDeviceBuffer(&d_x_, numAtoms, nullptr); + allocateDeviceBuffer(&d_xp_, numAtoms, nullptr); + allocateDeviceBuffer(&d_v_, numAtoms, nullptr); + allocateDeviceBuffer(&d_f_, numAtoms, nullptr); + allocateDeviceBuffer(&d_inverseMasses_, numAtoms, nullptr); + + // TODO When the code will be integrated into the schedule, it will be assigned non-default stream. + stream_ = nullptr; + + GMX_RELEASE_ASSERT(numAtoms == mtop.natoms, "State and topology number of atoms should be the same."); + integrator_ = std::make_unique(numAtoms); + lincsCuda_ = std::make_unique(mtop.natoms, ir.nLincsIter, ir.nProjOrder); + settleCuda_ = std::make_unique(mtop.natoms, mtop); + + integrator_->setXVFPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_, (rvec*)d_f_); + lincsCuda_->setXVPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_); + settleCuda_->setXVPointers((rvec*)d_x_, (rvec*)d_xp_, (rvec*)d_v_); +} + +UpdateConstrainCuda::Impl::~Impl() +{ +} + +/*! \brief + * Update data-structures (e.g. after NB search step). + * + * \param[in] idef System topology + * \param[in] md Atoms data. + */ +void UpdateConstrainCuda::Impl::set(const t_idef &idef, + const t_mdatoms &md) +{ + // Integrator should also update something, but it does not even have a method yet + integrator_->set(md); + lincsCuda_->set(idef, md); + settleCuda_->set(idef, md); +} + +/*! \brief + * Update PBC data. + * + * Converts pbc data from t_pbc into the PbcAiuc format and stores the latter. + * + * \param[in] pbc The PBC data in t_pbc format. + */ +void UpdateConstrainCuda::Impl::setPbc(const t_pbc *pbc) +{ + setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_); + integrator_->setPbc(pbc); + lincsCuda_->setPbc(pbc); + settleCuda_->setPbc(pbc); +} + +/*! \brief + * Copy coordinates from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_x CPU pointer where coordinates should be copied from. + */ +void UpdateConstrainCuda::Impl::copyCoordinatesToGpu(const rvec *h_x) +{ + copyToDeviceBuffer(&d_x_, (float3*)h_x, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Copy velocities from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v CPU pointer where velocities should be copied from. + */ +void UpdateConstrainCuda::Impl::copyVelocitiesToGpu(const rvec *h_v) +{ + copyToDeviceBuffer(&d_v_, (float3*)h_v, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Copy forces from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f CPU pointer where forces should be copied from. + */ +void UpdateConstrainCuda::Impl::copyForcesToGpu(const rvec *h_f) +{ + copyToDeviceBuffer(&d_f_, (float3*)h_f, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Copy coordinates from GPU to CPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[out] h_xp CPU pointer where coordinates should be copied to. + */ +void UpdateConstrainCuda::Impl::copyCoordinatesFromGpu(rvec *h_xp) +{ + copyFromDeviceBuffer((float3*)h_xp, &d_xp_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Copy velocities from GPU to CPU. + * + * The velocities are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v Pointer to velocities data. + */ +void UpdateConstrainCuda::Impl::copyVelocitiesFromGpu(rvec *h_v) +{ + copyFromDeviceBuffer((float3*)h_v, &d_v_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Copy forces from GPU to CPU. + * + * The forces are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f Pointer to forces data. + */ +void UpdateConstrainCuda::Impl::copyForcesFromGpu(rvec *h_f) +{ + copyFromDeviceBuffer((float3*)h_f, &d_f_, 0, numAtoms_, stream_, GpuApiCallBehavior::Sync, nullptr); +} + +/*! \brief + * Set the internal GPU-memory x, xprime and v pointers. + * + * Data is not copied. The data are assumed to be in float3/fvec format + * (float3 is used internally, but the data layout should be identical). + * + * \param[in] d_x Pointer to the coordinates for the input (on GPU) + * \param[in] d_xp Pointer to the coordinates for the output (on GPU) + * \param[in] d_v Pointer to the velocities (on GPU) + * \param[in] d_f Pointer to the forces (on GPU) + */ +void UpdateConstrainCuda::Impl::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f) +{ + d_x_ = (float3*)d_x; + d_xp_ = (float3*)d_xp; + d_v_ = (float3*)d_v; + d_f_ = (float3*)d_f; +} + + +UpdateConstrainCuda::UpdateConstrainCuda(int numAtoms, + const t_inputrec &ir, + const gmx_mtop_t &mtop) + : impl_(new Impl(numAtoms, ir, mtop)) +{ +} + +UpdateConstrainCuda::~UpdateConstrainCuda() = default; + +void UpdateConstrainCuda::integrate(const real dt, + const bool updateVelocities, + const bool computeVirial, + tensor virialScaled) +{ + impl_->integrate(dt, updateVelocities, computeVirial, virialScaled); +} + +void UpdateConstrainCuda::set(const t_idef &idef, + const t_mdatoms gmx_unused &md) +{ + impl_->set(idef, md); +} + +void UpdateConstrainCuda::setPbc(const t_pbc *pbc) +{ + impl_->setPbc(pbc); +} + +void UpdateConstrainCuda::copyCoordinatesToGpu(const rvec *h_x) +{ + impl_->copyCoordinatesToGpu(h_x); +} + +void UpdateConstrainCuda::copyVelocitiesToGpu(const rvec *h_v) +{ + impl_->copyVelocitiesToGpu(h_v); +} + +void UpdateConstrainCuda::copyForcesToGpu(const rvec *h_f) +{ + impl_->copyForcesToGpu(h_f); +} + +void UpdateConstrainCuda::copyCoordinatesFromGpu(rvec *h_xp) +{ + impl_->copyCoordinatesFromGpu(h_xp); +} + +void UpdateConstrainCuda::copyVelocitiesFromGpu(rvec *h_v) +{ + impl_->copyVelocitiesFromGpu(h_v); +} + +void UpdateConstrainCuda::copyForcesFromGpu(rvec *h_f) +{ + impl_->copyForcesFromGpu(h_f); +} + +void UpdateConstrainCuda::setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f) +{ + impl_->setXVFPointers(d_x, d_xp, d_v, d_f); +} + +} //namespace gmx diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.h b/src/gromacs/mdlib/update_constrain_cuda_impl.h new file mode 100644 index 0000000000..ca526431bf --- /dev/null +++ b/src/gromacs/mdlib/update_constrain_cuda_impl.h @@ -0,0 +1,215 @@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2019, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * + * \brief Declares CUDA implementation class for update and constraints. + * + * This header file is needed to include from both the device-side + * kernels file, and the host-side management code. + * + * \author Artem Zhmurov + * + * \ingroup module_mdlib + */ +#ifndef GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_IMPL_H +#define GMX_MDLIB_UPDATE_CONSTRAIN_CUDA_IMPL_H + +#include "gromacs/mdlib/leapfrog_cuda.h" +#include "gromacs/mdlib/lincs_cuda.h" +#include "gromacs/mdlib/settle_cuda.h" +#include "gromacs/mdlib/update_constrain_cuda.h" +#include "gromacs/mdtypes/inputrec.h" +#include "gromacs/pbcutil/pbc.h" +#include "gromacs/pbcutil/pbc_aiuc_cuda.cuh" +#include "gromacs/topology/idef.h" + +namespace gmx +{ + +/*! \internal \brief Class with interfaces and data for CUDA version of Update-Constraint. */ +class UpdateConstrainCuda::Impl +{ + + public: + /*! \brief Create Update-Constrain object + * + * \param[in] numAtoms Number of atoms. + * \param[in] ir Input record data: LINCS takes number of iterations and order of + * projection from it. + * \param[in] mtop Topology of the system: SETTLE gets the masses for O and H atoms + * and target O-H and H-H distances from this object. + */ + Impl(int numAtoms, + const t_inputrec &ir, + const gmx_mtop_t &mtop); + + ~Impl(); + + /*! \brief Integrate + * + * Integrates the equation of motion using Leap-Frog algorithm and applies + * LINCS and SETTLE constraints. + * Updates d_xp_ and d_v_ fields of this object. + * + * \param[in] dt Timestep + * \param[in] updateVelocities If the velocities should be constrained. + * \param[in] computeVirial If virial should be updated. + * \param[out] virial Place to save virial tensor. + */ + void integrate(const real dt, + const bool updateVelocities, + const bool computeVirial, + tensor virial); + + /*! \brief + * Update data-structures (e.g. after NB search step). + * + * \param[in] idef System topology + * \param[in] md Atoms data. Can be used to update masses if needed (not used now). + */ + void set(const t_idef &idef, + const t_mdatoms &md); + + /*! \brief + * Update PBC data. + * + * Converts PBC data from t_pbc into the PbcAiuc format and stores the latter. + * + * \param[in] pbc The PBC data in t_pbc format. + */ + void setPbc(const t_pbc *pbc); + + /*! \brief + * Copy coordinates from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_x CPU pointer where coordinates should be copied from. + */ + void copyCoordinatesToGpu(const rvec *h_x); + + /*! \brief + * Copy velocities from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v CPU pointer where velocities should be copied from. + */ + void copyVelocitiesToGpu(const rvec *h_v); + + /*! \brief + * Copy forces from CPU to GPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f CPU pointer where forces should be copied from. + */ + void copyForcesToGpu(const rvec *h_f); + + /*! \brief + * Copy coordinates from GPU to CPU. + * + * The data are assumed to be in float3/fvec format (single precision). + * + * \param[out] h_xp CPU pointer where coordinates should be copied to. + */ + void copyCoordinatesFromGpu(rvec *h_xp); + + /*! \brief + * Copy velocities from GPU to CPU. + * + * The velocities are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_v Pointer to velocities data. + */ + void copyVelocitiesFromGpu(rvec *h_v); + + /*! \brief + * Copy forces from GPU to CPU. + * + * The forces are assumed to be in float3/fvec format (single precision). + * + * \param[in] h_f Pointer to forces data. + */ + void copyForcesFromGpu(rvec *h_f); + + /*! \brief + * Set the internal GPU-memory x, xprime and v pointers. + * + * Data is not copied. The data are assumed to be in float3/fvec format + * (float3 is used internally, but the data layout should be identical). + * + * \param[in] d_x Pointer to the coordinates for the input (on GPU) + * \param[in] d_xp Pointer to the coordinates for the output (on GPU) + * \param[in] d_v Pointer to the velocities (on GPU) + * \param[in] d_f Pointer to the forces (on GPU) + */ + void setXVFPointers(rvec *d_x, rvec *d_xp, rvec *d_v, rvec *d_f); + + private: + + //! CUDA stream + cudaStream_t stream_; + + //! Periodic boundary data + PbcAiuc pbcAiuc_; + + //! Number of atoms + int numAtoms_; + + //! Coordinates before the timestep (on GPU) + float3 *d_x_; + //! Coordinates after the timestep (on GPU). + float3 *d_xp_; + //! Velocities of atoms (on GPU) + float3 *d_v_; + //! Forces, exerted by atoms (on GPU) + float3 *d_f_; + + //! 1/mass for all atoms (GPU) + real *d_inverseMasses_; + + //! Leap-Frog integrator + std::unique_ptr integrator_; + //! LINCS CUDA object to use for non-water constraints + std::unique_ptr lincsCuda_; + //! SETTLE CUDA object for water constrains + std::unique_ptr settleCuda_; + +}; + +} // namespace gmx + +#endif diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index ab53955c31..aba6ba9ca1 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -83,7 +83,6 @@ #include "gromacs/mdlib/force.h" #include "gromacs/mdlib/force_flags.h" #include "gromacs/mdlib/forcerec.h" -#include "gromacs/mdlib/leapfrog_cuda.h" #include "gromacs/mdlib/md_support.h" #include "gromacs/mdlib/mdatoms.h" #include "gromacs/mdlib/mdoutf.h" @@ -96,6 +95,7 @@ #include "gromacs/mdlib/tgroup.h" #include "gromacs/mdlib/trajectory_writing.h" #include "gromacs/mdlib/update.h" +#include "gromacs/mdlib/update_constrain_cuda.h" #include "gromacs/mdlib/vcm.h" #include "gromacs/mdlib/vsite.h" #include "gromacs/mdrunutility/handlerestart.h" @@ -147,8 +147,8 @@ using gmx::SimulationSignaller; -//! Whether the GPU version of Leap-Frog integrator should be used. -static const bool c_useGpuLeapFrog = (getenv("GMX_INTEGRATE_GPU") != nullptr); +//! Whether the GPU versions of Leap-Frog integrator and LINCS and SHAKE constraints +static const bool c_useGpuUpdateConstrain = (getenv("GMX_UPDATE_CONSTRAIN_GPU") != nullptr); void gmx::Simulator::do_md() { @@ -289,6 +289,11 @@ void gmx::Simulator::do_md() std::unique_ptr stateInstance; t_state * state; + + auto mdatoms = mdAtoms->mdatoms(); + + std::unique_ptr integrator; + if (DOMAINDECOMP(cr)) { dd_init_local_top(*top_global, &top); @@ -323,9 +328,23 @@ void gmx::Simulator::do_md() &graph, mdAtoms, constr, vsite, shellfc); upd.setNumAtoms(state->natoms); - } - auto mdatoms = mdAtoms->mdatoms(); + if (c_useGpuUpdateConstrain) + { + GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU."); + GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU."); + GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU."); + GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU"); + GMX_LOG(mdlog.info).asParagraph(). + appendText("Using CUDA GPU-based update and constraints module."); + integrator = std::make_unique(state->natoms, *ir, *top_global); + integrator->set(top.idef, *mdatoms); + t_pbc pbc; + set_pbc(&pbc, epbcXYZ, state->box); + integrator->setPbc(&pbc); + } + + } // NOTE: The global state is no longer used at this point. // But state_global is still used as temporary storage space for writing @@ -624,16 +643,6 @@ void gmx::Simulator::do_md() * Loop over MD steps * ************************************************************/ - std::unique_ptr integrator; - if (c_useGpuLeapFrog) - { - GMX_RELEASE_ASSERT(ir->eI == eiMD, "Only md integrator is supported on the GPU."); - GMX_RELEASE_ASSERT(ir->etc == etcNO, "Temperature coupling is not supported on the GPU."); - GMX_RELEASE_ASSERT(ir->epc == epcNO, "Pressure coupling is not supported on the GPU."); - GMX_RELEASE_ASSERT(!mdatoms->haveVsites, "Virtual sites are not supported on the GPU"); - integrator = std::make_unique(state->natoms); - integrator->set(*mdatoms); - } bFirstStep = TRUE; /* Skip the first Nose-Hoover integration when we get the state from tpx */ @@ -1183,32 +1192,36 @@ void gmx::Simulator::do_md() copy_rvecn(as_rvec_array(state->x.data()), cbuf, 0, state->natoms); } - - if (c_useGpuLeapFrog) + if (c_useGpuUpdateConstrain) { integrator->copyCoordinatesToGpu(state->x.rvec_array()); integrator->copyVelocitiesToGpu(state->v.rvec_array()); integrator->copyForcesToGpu(as_rvec_array(f.data())); - integrator->integrate(ir->delta_t); - integrator->copyCoordinatesFromGpu(upd.xp()->rvec_array()); + + // This applies Leap-Frog, LINCS and SETTLE in a succession + integrator->integrate(ir->delta_t, true, bCalcVir, shake_vir); + + integrator->copyCoordinatesFromGpu(state->x.rvec_array()); integrator->copyVelocitiesFromGpu(state->v.rvec_array()); } else { update_coords(step, ir, mdatoms, state, f.arrayRefWithPadding(), fcd, ekind, M, &upd, etrtPOSITION, cr, constr); + + wallcycle_stop(wcycle, ewcUPDATE); + + constrain_coordinates(step, &dvdl_constr, state, + shake_vir, + &upd, constr, + bCalcVir, do_log, do_ene); + + update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, + cr, nrnb, wcycle, &upd, constr, do_log, do_ene); + finish_update(ir, mdatoms, + state, graph, + nrnb, wcycle, &upd, constr); } - wallcycle_stop(wcycle, ewcUPDATE); - - constrain_coordinates(step, &dvdl_constr, state, - shake_vir, - &upd, constr, - bCalcVir, do_log, do_ene); - update_sd_second_half(step, &dvdl_constr, ir, mdatoms, state, - cr, nrnb, wcycle, &upd, constr, do_log, do_ene); - finish_update(ir, mdatoms, - state, graph, - nrnb, wcycle, &upd, constr); if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM) { -- 2.11.4.GIT