Wrong logic was used
[gromacs.git] / src / gromacs / nbnxm / nbnxm_gpu_data_mgmt.cpp
blob3fd85ae473c54b0cb36b086cc598a6a52acb9400
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief Define common implementation of nbnxm_gpu_data_mgmt.h
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Szilárd Páll <pall.szilard@gmail.com>
43 * \author Artem Zhmurov <zhmurov@gmail.com>
45 * \ingroup module_nbnxm
47 #include "gmxpre.h"
49 #include "config.h"
51 #if GMX_GPU_CUDA
52 # include "cuda/nbnxm_cuda_types.h"
53 #endif
55 #if GMX_GPU_OPENCL
56 # include "opencl/nbnxm_ocl_types.h"
57 #endif
59 #include "nbnxm_gpu_data_mgmt.h"
61 #include "gromacs/nbnxm/gpu_data_mgmt.h"
62 #include "gromacs/timing/gpu_timing.h"
63 #include "gromacs/utility/cstringutil.h"
65 #include "nbnxm_gpu.h"
66 #include "pairlistsets.h"
68 namespace Nbnxm
71 void init_ewald_coulomb_force_table(const EwaldCorrectionTables& tables,
72 NBParamGpu* nbp,
73 const DeviceContext& deviceContext)
75 if (!nbp->coulomb_tab)
77 destroyParamLookupTable(&nbp->coulomb_tab, nbp->coulomb_tab_texobj);
80 nbp->coulomb_tab_scale = tables.scale;
81 initParamLookupTable(&nbp->coulomb_tab, &nbp->coulomb_tab_texobj, tables.tableF.data(),
82 tables.tableF.size(), deviceContext);
85 void inline printEnviromnentVariableDeprecationMessage(bool isEnvironmentVariableSet,
86 const std::string& environmentVariableSuffix)
88 if (isEnvironmentVariableSet)
90 fprintf(stderr,
91 "Environment variables GMX_CUDA_%s and GMX_OCL_%s are deprecated and will be\n"
92 "removed in release 2022, please use GMX_GPU_%s instead.",
93 environmentVariableSuffix.c_str(), environmentVariableSuffix.c_str(),
94 environmentVariableSuffix.c_str());
98 int nbnxn_gpu_pick_ewald_kernel_type(const interaction_const_t& ic)
100 bool bTwinCut = (ic.rcoulomb != ic.rvdw);
101 int kernel_type;
103 /* Benchmarking/development environment variables to force the use of
104 analytical or tabulated Ewald kernel. */
106 // Remove these when old environment variables are deprecated
107 const bool forceAnalyticalEwaldLegacy = (getenv("GMX_CUDA_NB_ANA_EWALD") != nullptr)
108 || (getenv("GMX_OCL_NB_ANA_EWALD") != nullptr);
109 const bool forceTabulatedEwaldLegacy = (getenv("GMX_CUDA_NB_TAB_EWALD") != nullptr)
110 || (getenv("GMX_OCL_NB_TAB_EWALD") != nullptr);
111 const bool forceTwinCutoffEwaldLegacy = (getenv("GMX_CUDA_NB_EWALD_TWINCUT") != nullptr)
112 || (getenv("GMX_OCL_NB_EWALD_TWINCUT") != nullptr);
114 printEnviromnentVariableDeprecationMessage(forceAnalyticalEwaldLegacy, "NB_ANA_EWALD");
115 printEnviromnentVariableDeprecationMessage(forceTabulatedEwaldLegacy, "NB_TAB_EWALD");
116 printEnviromnentVariableDeprecationMessage(forceTwinCutoffEwaldLegacy, "NB_EWALD_TWINCUT");
118 const bool forceAnalyticalEwald =
119 (getenv("GMX_GPU_NB_ANA_EWALD") != nullptr) || forceAnalyticalEwaldLegacy;
120 const bool forceTabulatedEwald =
121 (getenv("GMX_GPU_NB_TAB_EWALD") != nullptr) || forceTabulatedEwaldLegacy;
123 if (forceAnalyticalEwald && forceTabulatedEwald)
125 gmx_incons(
126 "Both analytical and tabulated Ewald GPU non-bonded kernels "
127 "requested through environment variables.");
130 /* By default, use analytical Ewald
131 * TODO: tabulated does not work in OpenCL, it needs fixing, see init_nbparam() in nbnxn_ocl_data_mgmt.cpp
134 bool bUseAnalyticalEwald = true;
135 if (forceAnalyticalEwald)
137 if (debug)
139 fprintf(debug, "Using analytical Ewald GPU kernels\n");
142 else if (forceTabulatedEwald)
144 bUseAnalyticalEwald = false;
146 if (debug)
148 fprintf(debug, "Using tabulated Ewald GPU kernels\n");
152 /* Use twin cut-off kernels if requested by bTwinCut or the env. var.
153 forces it (use it for debugging/benchmarking only). */
154 if (!bTwinCut && ((getenv("GMX_GPU_NB_EWALD_TWINCUT") == nullptr) || forceTwinCutoffEwaldLegacy))
156 kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA : eelTypeEWALD_TAB;
158 else
160 kernel_type = bUseAnalyticalEwald ? eelTypeEWALD_ANA_TWIN : eelTypeEWALD_TAB_TWIN;
163 return kernel_type;
166 void set_cutoff_parameters(NBParamGpu* nbp, const interaction_const_t* ic, const PairlistParams& listParams)
168 nbp->ewald_beta = ic->ewaldcoeff_q;
169 nbp->sh_ewald = ic->sh_ewald;
170 nbp->epsfac = ic->epsfac;
171 nbp->two_k_rf = 2.0 * ic->k_rf;
172 nbp->c_rf = ic->c_rf;
173 nbp->rvdw_sq = ic->rvdw * ic->rvdw;
174 nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb;
175 nbp->rlistOuter_sq = listParams.rlistOuter * listParams.rlistOuter;
176 nbp->rlistInner_sq = listParams.rlistInner * listParams.rlistInner;
177 nbp->useDynamicPruning = listParams.useDynamicPruning;
179 nbp->sh_lj_ewald = ic->sh_lj_ewald;
180 nbp->ewaldcoeff_lj = ic->ewaldcoeff_lj;
182 nbp->rvdw_switch = ic->rvdw_switch;
183 nbp->dispersion_shift = ic->dispersion_shift;
184 nbp->repulsion_shift = ic->repulsion_shift;
185 nbp->vdw_switch = ic->vdw_switch;
188 void gpu_pme_loadbal_update_param(const nonbonded_verlet_t* nbv, const interaction_const_t* ic)
190 if (!nbv || !nbv->useGpu())
192 return;
194 NbnxmGpu* nb = nbv->gpu_nbv;
195 NBParamGpu* nbp = nb->nbparam;
197 set_cutoff_parameters(nbp, ic, nbv->pairlistSets().params());
199 nbp->eeltype = nbnxn_gpu_pick_ewald_kernel_type(*ic);
201 GMX_RELEASE_ASSERT(ic->coulombEwaldTables, "Need valid Coulomb Ewald correction tables");
202 init_ewald_coulomb_force_table(*ic->coulombEwaldTables, nbp, *nb->deviceContext_);
205 void init_plist(gpu_plist* pl)
207 /* initialize to nullptr pointers to data that is not allocated here and will
208 need reallocation in nbnxn_gpu_init_pairlist */
209 pl->sci = nullptr;
210 pl->cj4 = nullptr;
211 pl->imask = nullptr;
212 pl->excl = nullptr;
214 /* size -1 indicates that the respective array hasn't been initialized yet */
215 pl->na_c = -1;
216 pl->nsci = -1;
217 pl->sci_nalloc = -1;
218 pl->ncj4 = -1;
219 pl->cj4_nalloc = -1;
220 pl->nimask = -1;
221 pl->imask_nalloc = -1;
222 pl->nexcl = -1;
223 pl->excl_nalloc = -1;
224 pl->haveFreshList = false;
227 void init_timings(gmx_wallclock_gpu_nbnxn_t* t)
229 int i, j;
231 t->nb_h2d_t = 0.0;
232 t->nb_d2h_t = 0.0;
233 t->nb_c = 0;
234 t->pl_h2d_t = 0.0;
235 t->pl_h2d_c = 0;
236 for (i = 0; i < 2; i++)
238 for (j = 0; j < 2; j++)
240 t->ktime[i][j].t = 0.0;
241 t->ktime[i][j].c = 0;
244 t->pruneTime.c = 0;
245 t->pruneTime.t = 0.0;
246 t->dynamicPruneTime.c = 0;
247 t->dynamicPruneTime.t = 0.0;
250 //! This function is documented in the header file
251 void gpu_init_pairlist(NbnxmGpu* nb, const NbnxnPairlistGpu* h_plist, const InteractionLocality iloc)
253 char sbuf[STRLEN];
254 // Timing accumulation should happen only if there was work to do
255 // because getLastRangeTime() gets skipped with empty lists later
256 // which leads to the counter not being reset.
257 bool bDoTime = (nb->bDoTime && !h_plist->sci.empty());
258 const DeviceStream& deviceStream = *nb->deviceStreams[iloc];
259 gpu_plist* d_plist = nb->plist[iloc];
261 if (d_plist->na_c < 0)
263 d_plist->na_c = h_plist->na_ci;
265 else
267 if (d_plist->na_c != h_plist->na_ci)
269 sprintf(sbuf, "In init_plist: the #atoms per cell has changed (from %d to %d)",
270 d_plist->na_c, h_plist->na_ci);
271 gmx_incons(sbuf);
275 gpu_timers_t::Interaction& iTimers = nb->timers->interaction[iloc];
277 if (bDoTime)
279 iTimers.pl_h2d.openTimingRegion(deviceStream);
280 iTimers.didPairlistH2D = true;
283 // TODO most of this function is same in CUDA and OpenCL, move into the header
284 const DeviceContext& deviceContext = *nb->deviceContext_;
286 reallocateDeviceBuffer(&d_plist->sci, h_plist->sci.size(), &d_plist->nsci, &d_plist->sci_nalloc,
287 deviceContext);
288 copyToDeviceBuffer(&d_plist->sci, h_plist->sci.data(), 0, h_plist->sci.size(), deviceStream,
289 GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
291 reallocateDeviceBuffer(&d_plist->cj4, h_plist->cj4.size(), &d_plist->ncj4, &d_plist->cj4_nalloc,
292 deviceContext);
293 copyToDeviceBuffer(&d_plist->cj4, h_plist->cj4.data(), 0, h_plist->cj4.size(), deviceStream,
294 GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
296 reallocateDeviceBuffer(&d_plist->imask, h_plist->cj4.size() * c_nbnxnGpuClusterpairSplit,
297 &d_plist->nimask, &d_plist->imask_nalloc, deviceContext);
299 reallocateDeviceBuffer(&d_plist->excl, h_plist->excl.size(), &d_plist->nexcl,
300 &d_plist->excl_nalloc, deviceContext);
301 copyToDeviceBuffer(&d_plist->excl, h_plist->excl.data(), 0, h_plist->excl.size(), deviceStream,
302 GpuApiCallBehavior::Async, bDoTime ? iTimers.pl_h2d.fetchNextEvent() : nullptr);
304 if (bDoTime)
306 iTimers.pl_h2d.closeTimingRegion(deviceStream);
309 /* need to prune the pair list during the next step */
310 d_plist->haveFreshList = true;
313 //! This function is documented in the header file
314 gmx_wallclock_gpu_nbnxn_t* gpu_get_timings(NbnxmGpu* nb)
316 return (nb != nullptr && nb->bDoTime) ? nb->timings : nullptr;
319 //! This function is documented in the header file
320 void gpu_reset_timings(nonbonded_verlet_t* nbv)
322 if (nbv->gpu_nbv && nbv->gpu_nbv->bDoTime)
324 init_timings(nbv->gpu_nbv->timings);
328 bool gpu_is_kernel_ewald_analytical(const NbnxmGpu* nb)
330 return ((nb->nbparam->eeltype == eelTypeEWALD_ANA) || (nb->nbparam->eeltype == eelTypeEWALD_ANA_TWIN));
333 } // namespace Nbnxm