2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the Verlet kernel generator for
46 #ifdef GMX_NBNXN_SIMD_2XNN
48 /* Include the full-width SIMD macros */
50 #include "gromacs/simd/macros.h"
51 #include "gromacs/simd/vector_operations.h"
53 #if !(GMX_SIMD_REAL_WIDTH == 8 || GMX_SIMD_REAL_WIDTH == 16)
54 #error "unsupported SIMD width"
57 #define GMX_SIMD_J_UNROLL_SIZE 2
58 #include "nbnxn_kernel_simd_2xnn.h"
59 #include "../nbnxn_kernel_common.h"
60 #include "gmx_omp_nthreads.h"
61 #include "types/force_flags.h"
62 #include "gmx_fatal.h"
64 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
67 coulktRF
, coulktTAB
, coulktTAB_TWIN
, coulktEWALD
, coulktEWALD_TWIN
, coulktNR
70 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
73 vdwktLJCUT_COMBGEOM
, vdwktLJCUT_COMBLB
, vdwktLJCUT_COMBNONE
, vdwktLJFORCESWITCH
, vdwktLJPOTSWITCH
, vdwktLJEWALDCOMBGEOM
, vdwktNR
76 /* Declare and define the kernel function pointer lookup tables.
77 * The minor index of the array goes over both the LJ combination rules,
78 * which is only supported by plain cut-off, and the LJ switch/PME functions.
80 static p_nbk_func_noener p_nbk_noener
[coulktNR
][vdwktNR
] =
83 nbnxn_kernel_ElecRF_VdwLJCombGeom_F_2xnn
,
84 nbnxn_kernel_ElecRF_VdwLJCombLB_F_2xnn
,
85 nbnxn_kernel_ElecRF_VdwLJ_F_2xnn
,
86 nbnxn_kernel_ElecRF_VdwLJFSw_F_2xnn
,
87 nbnxn_kernel_ElecRF_VdwLJPSw_F_2xnn
,
88 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_2xnn
,
91 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_2xnn
,
92 nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_2xnn
,
93 nbnxn_kernel_ElecQSTab_VdwLJ_F_2xnn
,
94 nbnxn_kernel_ElecQSTab_VdwLJFSw_F_2xnn
,
95 nbnxn_kernel_ElecQSTab_VdwLJPSw_F_2xnn
,
96 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_2xnn
,
99 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_2xnn
,
100 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_2xnn
,
101 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_2xnn
,
102 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_2xnn
,
103 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_2xnn
,
104 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_2xnn
,
107 nbnxn_kernel_ElecEw_VdwLJCombGeom_F_2xnn
,
108 nbnxn_kernel_ElecEw_VdwLJCombLB_F_2xnn
,
109 nbnxn_kernel_ElecEw_VdwLJ_F_2xnn
,
110 nbnxn_kernel_ElecEw_VdwLJFSw_F_2xnn
,
111 nbnxn_kernel_ElecEw_VdwLJPSw_F_2xnn
,
112 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_2xnn
,
115 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_2xnn
,
116 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_2xnn
,
117 nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_2xnn
,
118 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_2xnn
,
119 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_2xnn
,
120 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_2xnn
,
124 static p_nbk_func_ener p_nbk_ener
[coulktNR
][vdwktNR
] =
127 nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_2xnn
,
128 nbnxn_kernel_ElecRF_VdwLJCombLB_VF_2xnn
,
129 nbnxn_kernel_ElecRF_VdwLJ_VF_2xnn
,
130 nbnxn_kernel_ElecRF_VdwLJFSw_VF_2xnn
,
131 nbnxn_kernel_ElecRF_VdwLJPSw_VF_2xnn
,
132 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_2xnn
,
135 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_2xnn
,
136 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_2xnn
,
137 nbnxn_kernel_ElecQSTab_VdwLJ_VF_2xnn
,
138 nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_2xnn
,
139 nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_2xnn
,
140 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_2xnn
,
143 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_2xnn
,
144 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_2xnn
,
145 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_2xnn
,
146 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_2xnn
,
147 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_2xnn
,
148 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_2xnn
,
151 nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_2xnn
,
152 nbnxn_kernel_ElecEw_VdwLJCombLB_VF_2xnn
,
153 nbnxn_kernel_ElecEw_VdwLJ_VF_2xnn
,
154 nbnxn_kernel_ElecEw_VdwLJFSw_VF_2xnn
,
155 nbnxn_kernel_ElecEw_VdwLJPSw_VF_2xnn
,
156 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_2xnn
,
159 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_2xnn
,
160 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_2xnn
,
161 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_2xnn
,
162 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_2xnn
,
163 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_2xnn
,
164 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_2xnn
,
168 static p_nbk_func_ener p_nbk_energrp
[coulktNR
][vdwktNR
] =
171 nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_2xnn
,
172 nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_2xnn
,
173 nbnxn_kernel_ElecRF_VdwLJ_VgrpF_2xnn
,
174 nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_2xnn
,
175 nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_2xnn
,
176 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_2xnn
,
179 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_2xnn
,
180 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_2xnn
,
181 nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_2xnn
,
182 nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_2xnn
,
183 nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_2xnn
,
184 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_2xnn
,
187 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_2xnn
,
188 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_2xnn
,
189 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_2xnn
,
190 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_2xnn
,
191 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_2xnn
,
192 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_2xnn
,
195 nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_2xnn
,
196 nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_2xnn
,
197 nbnxn_kernel_ElecEw_VdwLJ_VgrpF_2xnn
,
198 nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_2xnn
,
199 nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_2xnn
,
200 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_2xnn
,
203 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_2xnn
,
204 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_2xnn
,
205 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_2xnn
,
206 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_2xnn
,
207 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_2xnn
,
208 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_2xnn
,
214 reduce_group_energies(int ng
, int ng_2log
,
215 const real
*VSvdw
, const real
*VSc
,
216 real
*Vvdw
, real
*Vc
)
218 const int unrollj
= GMX_SIMD_REAL_WIDTH
/GMX_SIMD_J_UNROLL_SIZE
;
219 const int unrollj_half
= unrollj
/2;
220 int ng_p2
, i
, j
, j0
, j1
, c
, s
;
222 ng_p2
= (1<<ng_2log
);
224 /* The size of the x86 SIMD energy group buffer array is:
225 * ng*ng*ng_p2*unrollj_half*simd_width
227 for (i
= 0; i
< ng
; i
++)
229 for (j
= 0; j
< ng
; j
++)
235 for (j1
= 0; j1
< ng
; j1
++)
237 for (j0
= 0; j0
< ng
; j0
++)
239 c
= ((i
*ng
+ j1
)*ng_p2
+ j0
)*unrollj_half
*unrollj
;
240 for (s
= 0; s
< unrollj_half
; s
++)
242 Vvdw
[i
*ng
+j0
] += VSvdw
[c
+0];
243 Vvdw
[i
*ng
+j1
] += VSvdw
[c
+1];
244 Vc
[i
*ng
+j0
] += VSc
[c
+0];
245 Vc
[i
*ng
+j1
] += VSc
[c
+1];
253 #else /* GMX_NBNXN_SIMD_2XNN */
255 #include "gmx_fatal.h"
257 #endif /* GMX_NBNXN_SIMD_2XNN */
260 nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused
*nbl_list
,
261 const nbnxn_atomdata_t gmx_unused
*nbat
,
262 const interaction_const_t gmx_unused
*ic
,
263 int gmx_unused ewald_excl
,
264 rvec gmx_unused
*shift_vec
,
265 int gmx_unused force_flags
,
266 int gmx_unused clearF
,
267 real gmx_unused
*fshift
,
269 real gmx_unused
*Vvdw
)
270 #ifdef GMX_NBNXN_SIMD_2XNN
273 nbnxn_pairlist_t
**nbl
;
274 int coulkt
, vdwkt
= 0;
277 nnbl
= nbl_list
->nnbl
;
280 if (EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
)
286 if (ewald_excl
== ewaldexclTable
)
288 if (ic
->rcoulomb
== ic
->rvdw
)
294 coulkt
= coulktTAB_TWIN
;
299 if (ic
->rcoulomb
== ic
->rvdw
)
301 coulkt
= coulktEWALD
;
305 coulkt
= coulktEWALD_TWIN
;
310 if (ic
->vdwtype
== evdwCUT
)
312 switch (ic
->vdw_modifier
)
315 case eintmodPOTSHIFT
:
316 switch (nbat
->comb_rule
)
318 case ljcrGEOM
: vdwkt
= vdwktLJCUT_COMBGEOM
; break;
319 case ljcrLB
: vdwkt
= vdwktLJCUT_COMBLB
; break;
320 case ljcrNONE
: vdwkt
= vdwktLJCUT_COMBNONE
; break;
321 default: gmx_incons("Unknown combination rule");
324 case eintmodFORCESWITCH
:
325 vdwkt
= vdwktLJFORCESWITCH
;
327 case eintmodPOTSWITCH
:
328 vdwkt
= vdwktLJPOTSWITCH
;
331 gmx_incons("Unsupported VdW interaction modifier");
334 else if (ic
->vdwtype
== evdwPME
)
336 if (ic
->ljpme_comb_rule
== eljpmeLB
)
338 gmx_incons("The nbnxn SIMD kernels don't suport LJ-PME with LB");
340 vdwkt
= vdwktLJEWALDCOMBGEOM
;
344 gmx_incons("Unsupported VdW interaction type");
347 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
348 for (nb
= 0; nb
< nnbl
; nb
++)
350 nbnxn_atomdata_output_t
*out
;
353 out
= &nbat
->out
[nb
];
355 if (clearF
== enbvClearFYes
)
357 clear_f(nbat
, nb
, out
->f
);
360 if ((force_flags
& GMX_FORCE_VIRIAL
) && nnbl
== 1)
366 fshift_p
= out
->fshift
;
368 if (clearF
== enbvClearFYes
)
370 clear_fshift(fshift_p
);
374 if (!(force_flags
& GMX_FORCE_ENERGY
))
376 /* Don't calculate energies */
377 p_nbk_noener
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
383 else if (out
->nV
== 1)
385 /* No energy groups */
389 p_nbk_ener
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
399 /* Calculate energy group contributions */
402 for (i
= 0; i
< out
->nVS
; i
++)
406 for (i
= 0; i
< out
->nVS
; i
++)
411 p_nbk_energrp
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
419 reduce_group_energies(nbat
->nenergrp
, nbat
->neg_2log
,
420 out
->VSvdw
, out
->VSc
,
425 if (force_flags
& GMX_FORCE_ENERGY
)
427 reduce_energies_over_lists(nbat
, nnbl
, Vvdw
, Vc
);
432 gmx_incons("nbnxn_kernel_simd_2xnn called when such kernels "
433 " are not enabled.");
436 #undef GMX_SIMD_J_UNROLL_SIZE