2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "nbnxn_kernel_cpu.h"
40 #include "gromacs/math/vectypes.h"
41 #include "gromacs/mdlib/force_flags.h"
42 #include "gromacs/mdlib/gmx_omp_nthreads.h"
43 #include "gromacs/mdlib/nb_verlet.h"
44 #include "gromacs/mdlib/nbnxn_consts.h"
45 #include "gromacs/mdlib/nbnxn_simd.h"
46 #include "gromacs/mdtypes/interaction_const.h"
47 #include "gromacs/mdtypes/md_enums.h"
48 #include "gromacs/simd/simd.h"
49 #include "gromacs/utility/gmxassert.h"
50 #include "gromacs/utility/real.h"
52 #include "nbnxn_kernel_common.h"
53 #define INCLUDE_KERNELFUNCTION_TABLES
54 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h"
55 #ifdef GMX_NBNXN_SIMD_2XNN
56 #include "gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
58 #ifdef GMX_NBNXN_SIMD_4XN
59 #include "gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
61 #undef INCLUDE_FUNCTION_TABLES
63 /*! \brief Clears the energy group output buffers
65 * \param[in,out] out nbnxn kernel output struct
67 static void clearGroupEnergies(nbnxn_atomdata_output_t
*out
)
69 for (int i
= 0; i
< out
->nV
; i
++)
75 for (int i
= 0; i
< out
->nVS
; i
++)
79 for (int i
= 0; i
< out
->nVS
; i
++)
85 /*! \brief Reduce the group-pair energy buffers produced by a SIMD kernel
86 * to single terms in the output buffers.
88 * The SIMD kernels produce a large number of energy buffer in SIMD registers
89 * to avoid scattered reads and writes.
91 * \tparam unrollj The unroll size for j-particles in the SIMD kernel
92 * \param[in] numGroups The number of energy groups
93 * \param[in] numGroups_2log Log2 of numGroups, rounded up
94 * \param[in] vVdwSimd SIMD Van der Waals energy buffers
95 * \param[in] vCoulombSimd SIMD Coulomb energy buffers
96 * \param[in,out] vVdw Van der Waals energy output buffer
97 * \param[in,out] vCoulomb Coulomb energy output buffer
99 template <int unrollj
> static void
100 reduceGroupEnergySimdBuffers(int numGroups
,
102 const real
* gmx_restrict vVdwSimd
,
103 const real
* gmx_restrict vCoulombSimd
,
104 real
* gmx_restrict vVdw
,
105 real
* gmx_restrict vCoulomb
)
107 // cppcheck-suppress duplicateExpression
108 const int unrollj_half
= unrollj
/2;
109 /* Energies are stored in SIMD registers with size 2^numGroups_2log */
110 const int numGroupsStorage
= (1 << numGroups_2log
);
112 /* The size of the SIMD energy group buffer array is:
113 * numGroups*numGroups*numGroupsStorage*unrollj_half*simd_width
115 for (int i
= 0; i
< numGroups
; i
++)
117 for (int j1
= 0; j1
< numGroups
; j1
++)
119 for (int j0
= 0; j0
< numGroups
; j0
++)
121 int c
= ((i
*numGroups
+ j1
)*numGroupsStorage
+ j0
)*unrollj_half
*unrollj
;
122 for (int s
= 0; s
< unrollj_half
; s
++)
124 vVdw
[i
*numGroups
+ j0
] += vVdwSimd
[c
+ 0];
125 vVdw
[i
*numGroups
+ j1
] += vVdwSimd
[c
+ 1];
126 vCoulomb
[i
*numGroups
+ j0
] += vCoulombSimd
[c
+ 0];
127 vCoulomb
[i
*numGroups
+ j1
] += vCoulombSimd
[c
+ 1];
136 nbnxn_kernel_cpu(nonbonded_verlet_group_t
*nbvg
,
137 const nbnxn_atomdata_t
*nbat
,
138 const interaction_const_t
*ic
,
148 if (EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
)
154 if (nbvg
->ewald_excl
== ewaldexclTable
)
156 if (ic
->rcoulomb
== ic
->rvdw
)
162 coulkt
= coulktTAB_TWIN
;
167 if (ic
->rcoulomb
== ic
->rvdw
)
169 coulkt
= coulktEWALD
;
173 coulkt
= coulktEWALD_TWIN
;
179 if (ic
->vdwtype
== evdwCUT
)
181 switch (ic
->vdw_modifier
)
184 case eintmodPOTSHIFT
:
185 switch (nbat
->comb_rule
)
187 case ljcrGEOM
: vdwkt
= vdwktLJCUT_COMBGEOM
; break;
188 case ljcrLB
: vdwkt
= vdwktLJCUT_COMBLB
; break;
189 case ljcrNONE
: vdwkt
= vdwktLJCUT_COMBNONE
; break;
191 GMX_RELEASE_ASSERT(false, "Unknown combination rule");
194 case eintmodFORCESWITCH
:
195 vdwkt
= vdwktLJFORCESWITCH
;
197 case eintmodPOTSWITCH
:
198 vdwkt
= vdwktLJPOTSWITCH
;
201 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction modifier");
204 else if (ic
->vdwtype
== evdwPME
)
206 if (ic
->ljpme_comb_rule
== eljpmeGEOM
)
208 vdwkt
= vdwktLJEWALDCOMBGEOM
;
212 vdwkt
= vdwktLJEWALDCOMBLB
;
213 /* At setup we (should have) selected the C reference kernel */
214 GMX_RELEASE_ASSERT(nbvg
->kernel_type
== nbnxnk4x4_PlainC
, "Only the C reference nbnxn SIMD kernel supports LJ-PME with LB combination rules");
219 GMX_RELEASE_ASSERT(false, "Unsupported VdW interaction type");
222 int nnbl
= nbvg
->nbl_lists
.nnbl
;
223 nbnxn_pairlist_t
**nbl
= nbvg
->nbl_lists
.nbl
;
225 GMX_ASSERT(nbl
[0]->nci
>= 0, "nci<0, which signals an invalid pair-list");
227 // cppcheck-suppress unreadVariable
228 int gmx_unused nthreads
= gmx_omp_nthreads_get(emntNonbonded
);
229 #pragma omp parallel for schedule(static) num_threads(nthreads)
230 for (int nb
= 0; nb
< nnbl
; nb
++)
232 // Presently, the kernels do not call C++ code that can throw,
233 // so no need for a try/catch pair in this OpenMP region.
234 nbnxn_atomdata_output_t
*out
= &nbat
->out
[nb
];
236 if (clearF
== enbvClearFYes
)
238 clear_f(nbat
, nb
, out
->f
);
242 if ((forceFlags
& GMX_FORCE_VIRIAL
) && nnbl
== 1)
248 fshift_p
= out
->fshift
;
250 if (clearF
== enbvClearFYes
)
252 clear_fshift(fshift_p
);
256 if (!(forceFlags
& GMX_FORCE_ENERGY
))
258 /* Don't calculate energies */
259 switch (nbvg
->kernel_type
)
261 case nbnxnk4x4_PlainC
:
262 nbnxn_kernel_noener_ref
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
268 #ifdef GMX_NBNXN_SIMD_2XNN
269 case nbnxnk4xN_SIMD_2xNN
:
270 nbnxn_kernel_noener_simd_2xnn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
277 #ifdef GMX_NBNXN_SIMD_4XN
278 case nbnxnk4xN_SIMD_4xN
:
279 nbnxn_kernel_noener_simd_4xn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
287 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
290 else if (out
->nV
== 1)
292 /* A single energy group (pair) */
296 switch (nbvg
->kernel_type
)
298 case nbnxnk4x4_PlainC
:
299 nbnxn_kernel_ener_ref
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
307 #ifdef GMX_NBNXN_SIMD_2XNN
308 case nbnxnk4xN_SIMD_2xNN
:
309 nbnxn_kernel_ener_simd_2xnn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
318 #ifdef GMX_NBNXN_SIMD_4XN
319 case nbnxnk4xN_SIMD_4xN
:
320 nbnxn_kernel_ener_simd_4xn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
330 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
335 /* Calculate energy group contributions */
336 clearGroupEnergies(out
);
340 switch (nbvg
->kernel_type
)
342 case nbnxnk4x4_PlainC
:
343 unrollj
= NBNXN_CPU_CLUSTER_I_SIZE
;
344 nbnxn_kernel_energrp_ref
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
352 #ifdef GMX_NBNXN_SIMD_2XNN
353 case nbnxnk4xN_SIMD_2xNN
:
354 unrollj
= GMX_SIMD_REAL_WIDTH
/2;
355 nbnxn_kernel_energrp_simd_2xnn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
364 #ifdef GMX_NBNXN_SIMD_4XN
365 case nbnxnk4xN_SIMD_4xN
:
366 unrollj
= GMX_SIMD_REAL_WIDTH
;
367 nbnxn_kernel_energrp_simd_4xn
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
377 GMX_RELEASE_ASSERT(false, "Unsupported kernel architecture");
380 if (nbvg
->kernel_type
!= nbnxnk4x4_PlainC
)
385 reduceGroupEnergySimdBuffers
<2>(nbat
->nenergrp
,
387 out
->VSvdw
, out
->VSc
,
391 reduceGroupEnergySimdBuffers
<4>(nbat
->nenergrp
,
393 out
->VSvdw
, out
->VSc
,
397 reduceGroupEnergySimdBuffers
<8>(nbat
->nenergrp
,
399 out
->VSvdw
, out
->VSc
,
403 GMX_RELEASE_ASSERT(false, "Unsupported j-unroll size");
409 if (forceFlags
& GMX_FORCE_ENERGY
)
411 reduce_energies_over_lists(nbat
, nnbl
, vVdw
, vCoulomb
);