Continue removing -nb gpu_cpu
[gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_cpu.cpp
blobbf9ba49f1fb0dd623b71f228c45503825149212b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #include "gmxpre.h"
38 #include "nbnxn_kernel_cpu.h"
40 #include "gromacs/math/vectypes.h"
41 #include "gromacs/mdlib/force_flags.h"
42 #include "gromacs/mdlib/gmx_omp_nthreads.h"
43 #include "gromacs/mdlib/nb_verlet.h"
44 #include "gromacs/mdlib/nbnxn_consts.h"
45 #include "gromacs/mdlib/nbnxn_simd.h"
46 #include "gromacs/mdtypes/interaction_const.h"
47 #include "gromacs/mdtypes/md_enums.h"
48 #include "gromacs/simd/simd.h"
49 #include "gromacs/utility/gmxassert.h"
50 #include "gromacs/utility/real.h"
52 #include "nbnxn_kernel_common.h"
53 #define INCLUDE_KERNELFUNCTION_TABLES
54 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
55 #ifdef GMX_NBNXN_SIMD_2XNN
56 #include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
57 #endif
58 #ifdef GMX_NBNXN_SIMD_4XN
59 #include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
60 #endif
61 #undef INCLUDE_FUNCTION_TABLES
63 /*! \brief Clears the energy group output buffers
65 * \param[in,out] out nbnxn kernel output struct
67 static void clearGroupEnergies(nbnxn_atomdata_output_t *out)
69 for (int i = 0; i < out->nV; i++)
71 out->Vvdw[i] = 0;
72 out->Vc[i] = 0;
75 for (int i = 0; i < out->nVS; i++)
77 out->VSvdw[i] = 0;
79 for (int i = 0; i < out->nVS; i++)
81 out->VSc[i] = 0;
85 /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
86 * to single terms in the output buffers.
88 * The SIMD kernels produce a large number of energy buffer in SIMD registers
89 * to avoid scattered reads and writes.
91 * \tparam unrollj The unroll size for j-particles in the SIMD kernel
92 * \param[in] numGroups The number of energy groups
93 * \param[in] numGroups_2log Log2 of numGroups, rounded up
94 * \param[in] vVdwSimd SIMD Van der Waals energy buffers
95 * \param[in] vCoulombSimd SIMD Coulomb energy buffers
96 * \param[in,out] vVdw Van der Waals energy output buffer
97 * \param[in,out] vCoulomb Coulomb energy output buffer
99 template <int unrollj> static void
100 reduceGroupEnergySimdBuffers(int numGroups,
101 int numGroups_2log,
102 const real * gmx_restrict vVdwSimd,
103 const real * gmx_restrict vCoulombSimd,
104 real * gmx_restrict vVdw,
105 real * gmx_restrict vCoulomb)
107 // cppcheck-suppress duplicateExpression
108 const int unrollj_half = unrollj/2;
109 /* Energies are stored in SIMD registers with size 2^numGroups_2log */
110 const int numGroupsStorage = (1 << numGroups_2log);
112 /* The size of the SIMD energy group buffer array is:
113 * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
115 for (int i = 0; i < numGroups; i++)
117 for (int j1 = 0; j1 < numGroups; j1++)
119 for (int j0 = 0; j0 < numGroups; j0++)
121 int c = ((i*numGroups + j1)*numGroupsStorage + j0)*unrollj_half*unrollj;
122 for (int s = 0; s < unrollj_half; s++)
124 vVdw [i*numGroups + j0] += vVdwSimd [c + 0];
125 vVdw [i*numGroups + j1] += vVdwSimd [c + 1];
126 vCoulomb[i*numGroups + j0] += vCoulombSimd[c + 0];
127 vCoulomb[i*numGroups + j1] += vCoulombSimd[c + 1];
128 c += unrollj + 2;
135 void
136 nbnxn_kernel_cpu(nonbonded_verlet_group_t *nbvg,
137 const nbnxn_atomdata_t *nbat,
138 const interaction_const_t *ic,
139 rvec *shiftVectors,
140 int forceFlags,
141 int clearF,
142 real *fshift,
143 real *vCoulomb,
144 real *vVdw)
147 int coulkt;
148 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
150 coulkt = coulktRF;
152 else
154 if (nbvg->ewald_excl == ewaldexclTable)
156 if (ic->rcoulomb == ic->rvdw)
158 coulkt = coulktTAB;
160 else
162 coulkt = coulktTAB_TWIN;
165 else
167 if (ic->rcoulomb == ic->rvdw)
169 coulkt = coulktEWALD;
171 else
173 coulkt = coulktEWALD_TWIN;
178 int vdwkt = 0;
179 if (ic->vdwtype == evdwCUT)
181 switch (ic->vdw_modifier)
183 case eintmodNONE:
184 case eintmodPOTSHIFT:
185 switch (nbat->comb_rule)
187 case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
188 case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
189 case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
190 default:
191 GMX_RELEASE_ASSERT(false, "Unknown combination rule");
193 break;
194 case eintmodFORCESWITCH:
195 vdwkt = vdwktLJFORCESWITCH;
196 break;
197 case eintmodPOTSWITCH:
198 vdwkt = vdwktLJPOTSWITCH;
199 break;
200 default:
201 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
204 else if (ic->vdwtype == evdwPME)
206 if (ic->ljpme_comb_rule == eljpmeGEOM)
208 vdwkt = vdwktLJEWALDCOMBGEOM;
210 else
212 vdwkt = vdwktLJEWALDCOMBLB;
213 /* At setup we (should have) selected the C reference kernel */
214 GMX_RELEASE_ASSERT(nbvg->kernel_type == nbnxnk4x4_PlainC, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
217 else
219 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
222 int nnbl = nbvg->nbl_lists.nnbl;
223 nbnxn_pairlist_t **nbl = nbvg->nbl_lists.nbl;
225 GMX_ASSERT(nbl[0]->nci >= 0, "nci<0, which signals an invalid pair-list");
227 // cppcheck-suppress unreadVariable
228 int gmx_unused nthreads = gmx_omp_nthreads_get(emntNonbonded);
229 #pragma omp parallel for schedule(static) num_threads(nthreads)
230 for (int nb = 0; nb < nnbl; nb++)
232 // Presently, the kernels do not call C++ code that can throw,
233 // so no need for a try/catch pair in this OpenMP region.
234 nbnxn_atomdata_output_t *out = &nbat->out[nb];
236 if (clearF == enbvClearFYes)
238 clear_f(nbat, nb, out->f);
241 real *fshift_p;
242 if ((forceFlags & GMX_FORCE_VIRIAL) && nnbl == 1)
244 fshift_p = fshift;
246 else
248 fshift_p = out->fshift;
250 if (clearF == enbvClearFYes)
252 clear_fshift(fshift_p);
256 if (!(forceFlags & GMX_FORCE_ENERGY))
258 /* Don't calculate energies */
259 switch (nbvg->kernel_type)
261 case nbnxnk4x4_PlainC:
262 nbnxn_kernel_noener_ref[coulkt][vdwkt](nbl[nb], nbat,
264 shiftVectors,
265 out->f,
266 fshift_p);
267 break;
268 #ifdef GMX_NBNXN_SIMD_2XNN
269 case nbnxnk4xN_SIMD_2xNN:
270 nbnxn_kernel_noener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
272 shiftVectors,
273 out->f,
274 fshift_p);
275 break;
276 #endif
277 #ifdef GMX_NBNXN_SIMD_4XN
278 case nbnxnk4xN_SIMD_4xN:
279 nbnxn_kernel_noener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
281 shiftVectors,
282 out->f,
283 fshift_p);
284 break;
285 #endif
286 default:
287 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
290 else if (out->nV == 1)
292 /* A single energy group (pair) */
293 out->Vvdw[0] = 0;
294 out->Vc[0] = 0;
296 switch (nbvg->kernel_type)
298 case nbnxnk4x4_PlainC:
299 nbnxn_kernel_ener_ref[coulkt][vdwkt](nbl[nb], nbat,
301 shiftVectors,
302 out->f,
303 fshift_p,
304 out->Vvdw,
305 out->Vc);
306 break;
307 #ifdef GMX_NBNXN_SIMD_2XNN
308 case nbnxnk4xN_SIMD_2xNN:
309 nbnxn_kernel_ener_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
311 shiftVectors,
312 out->f,
313 fshift_p,
314 out->Vvdw,
315 out->Vc);
316 break;
317 #endif
318 #ifdef GMX_NBNXN_SIMD_4XN
319 case nbnxnk4xN_SIMD_4xN:
320 nbnxn_kernel_ener_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
322 shiftVectors,
323 out->f,
324 fshift_p,
325 out->Vvdw,
326 out->Vc);
327 break;
328 #endif
329 default:
330 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
333 else
335 /* Calculate energy group contributions */
336 clearGroupEnergies(out);
338 int unrollj = 0;
340 switch (nbvg->kernel_type)
342 case nbnxnk4x4_PlainC:
343 unrollj = NBNXN_CPU_CLUSTER_I_SIZE;
344 nbnxn_kernel_energrp_ref[coulkt][vdwkt](nbl[nb], nbat,
346 shiftVectors,
347 out->f,
348 fshift_p,
349 out->Vvdw,
350 out->Vc);
351 break;
352 #ifdef GMX_NBNXN_SIMD_2XNN
353 case nbnxnk4xN_SIMD_2xNN:
354 unrollj = GMX_SIMD_REAL_WIDTH/2;
355 nbnxn_kernel_energrp_simd_2xnn[coulkt][vdwkt](nbl[nb], nbat,
357 shiftVectors,
358 out->f,
359 fshift_p,
360 out->VSvdw,
361 out->VSc);
362 break;
363 #endif
364 #ifdef GMX_NBNXN_SIMD_4XN
365 case nbnxnk4xN_SIMD_4xN:
366 unrollj = GMX_SIMD_REAL_WIDTH;
367 nbnxn_kernel_energrp_simd_4xn[coulkt][vdwkt](nbl[nb], nbat,
369 shiftVectors,
370 out->f,
371 fshift_p,
372 out->VSvdw,
373 out->VSc);
374 break;
375 #endif
376 default:
377 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
380 if (nbvg->kernel_type != nbnxnk4x4_PlainC)
382 switch (unrollj)
384 case 2:
385 reduceGroupEnergySimdBuffers<2>(nbat->nenergrp,
386 nbat->neg_2log,
387 out->VSvdw, out->VSc,
388 out->Vvdw, out->Vc);
389 break;
390 case 4:
391 reduceGroupEnergySimdBuffers<4>(nbat->nenergrp,
392 nbat->neg_2log,
393 out->VSvdw, out->VSc,
394 out->Vvdw, out->Vc);
395 break;
396 case 8:
397 reduceGroupEnergySimdBuffers<8>(nbat->nenergrp,
398 nbat->neg_2log,
399 out->VSvdw, out->VSc,
400 out->Vvdw, out->Vc);
401 break;
402 default:
403 GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
409 if (forceFlags & GMX_FORCE_ENERGY)
411 reduce_energies_over_lists(nbat, nnbl, vVdw, vCoulomb);