added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / mdlib / nbnxn_kernels / nbnxn_kernel_x86_simd256.c
blob2396702b6e5550b92e75db355725d16edf87fc13
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
6 * G R O M A C S
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2012, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include <math.h>
41 #include "typedefs.h"
42 #include "vec.h"
43 #include "smalloc.h"
44 #include "force.h"
45 #include "gmx_omp_nthreads.h"
46 #include "../nbnxn_consts.h"
47 #include "nbnxn_kernel_common.h"
49 #ifdef GMX_X86_AVX_256
51 #include "nbnxn_kernel_x86_simd256.h"
53 /* Include all flavors of the 256-bit AVX kernel loops */
55 #define GMX_MM256_HERE
57 /* Analytical reaction-field kernels */
58 #define CALC_COUL_RF
60 #include "nbnxn_kernel_x86_simd_includes.h"
62 #undef CALC_COUL_RF
64 /* Tabulated exclusion interaction electrostatics kernels */
65 #define CALC_COUL_TAB
67 /* Single cut-off: rcoulomb = rvdw */
68 #include "nbnxn_kernel_x86_simd_includes.h"
70 /* Twin cut-off: rcoulomb >= rvdw */
71 #define VDW_CUTOFF_CHECK
72 #include "nbnxn_kernel_x86_simd_includes.h"
73 #undef VDW_CUTOFF_CHECK
75 #undef CALC_COUL_TAB
78 typedef void (*p_nbk_func_ener)(const nbnxn_pairlist_t *nbl,
79 const nbnxn_atomdata_t *nbat,
80 const interaction_const_t *ic,
81 rvec *shift_vec,
82 real *f,
83 real *fshift,
84 real *Vvdw,
85 real *Vc);
87 typedef void (*p_nbk_func_noener)(const nbnxn_pairlist_t *nbl,
88 const nbnxn_atomdata_t *nbat,
89 const interaction_const_t *ic,
90 rvec *shift_vec,
91 real *f,
92 real *fshift);
94 enum { coultRF, coultTAB, coultTAB_TWIN, coultNR };
97 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
98 { { nbnxn_kernel_x86_simd256_rf_comb_geom_ener,
99 nbnxn_kernel_x86_simd256_rf_comb_lb_ener,
100 nbnxn_kernel_x86_simd256_rf_comb_none_ener },
101 { nbnxn_kernel_x86_simd256_tab_comb_geom_ener,
102 nbnxn_kernel_x86_simd256_tab_comb_lb_ener,
103 nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener },
104 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_ener,
105 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_ener,
106 nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener } };
108 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
109 { { nbnxn_kernel_x86_simd256_rf_comb_geom_energrp,
110 nbnxn_kernel_x86_simd256_rf_comb_lb_energrp,
111 nbnxn_kernel_x86_simd256_rf_comb_none_energrp },
112 { nbnxn_kernel_x86_simd256_tab_comb_geom_energrp,
113 nbnxn_kernel_x86_simd256_tab_comb_lb_energrp,
114 nbnxn_kernel_x86_simd256_tab_comb_none_energrp },
115 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_energrp,
116 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_energrp,
117 nbnxn_kernel_x86_simd256_tab_twin_comb_none_energrp } };
119 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
120 { { nbnxn_kernel_x86_simd256_rf_comb_geom_noener,
121 nbnxn_kernel_x86_simd256_rf_comb_lb_noener,
122 nbnxn_kernel_x86_simd256_rf_comb_none_noener },
123 { nbnxn_kernel_x86_simd256_tab_comb_geom_noener,
124 nbnxn_kernel_x86_simd256_tab_comb_lb_noener,
125 nbnxn_kernel_x86_simd256_tab_comb_none_noener },
126 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_noener,
127 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_noener,
128 nbnxn_kernel_x86_simd256_tab_twin_comb_none_noener } };
130 #endif /* SSE */
133 static void reduce_group_energies(int ng,int ng_2log,
134 const real *VSvdw,const real *VSc,
135 real *Vvdw,real *Vc)
137 int ng_p2,i,j,j0,j1,c,s;
139 #ifndef GMX_DOUBLE
140 #define SIMD_WIDTH 4
141 #define SIMD_WIDTH_2 2
142 #else
143 #define SIMD_WIDTH 2
144 #define SIMD_WIDTH_2 1
145 #endif
147 ng_p2 = (1<<ng_2log);
149 /* The size of the SSE energy group buffer array is:
150 * stride^3*SIMD_WIDTH_2*SIMD_WIDTH
152 for(i=0; i<ng; i++)
154 for(j=0; j<ng; j++)
156 Vvdw[i*ng+j] = 0;
157 Vc[i*ng+j] = 0;
160 for(j1=0; j1<ng; j1++)
162 for(j0=0; j0<ng; j0++)
164 c = ((i*ng + j1)*ng_p2 + j0)*SIMD_WIDTH_2*SIMD_WIDTH;
165 for(s=0; s<SIMD_WIDTH_2; s++)
167 Vvdw[i*ng+j0] += VSvdw[c+0];
168 Vvdw[i*ng+j1] += VSvdw[c+1];
169 Vc [i*ng+j0] += VSc [c+0];
170 Vc [i*ng+j1] += VSc [c+1];
171 c += SIMD_WIDTH + 2;
178 void
179 nbnxn_kernel_x86_simd256(nbnxn_pairlist_set_t *nbl_list,
180 const nbnxn_atomdata_t *nbat,
181 const interaction_const_t *ic,
182 rvec *shift_vec,
183 int force_flags,
184 int clearF,
185 real *fshift,
186 real *Vc,
187 real *Vvdw)
188 #ifdef GMX_X86_AVX_256
190 int nnbl;
191 nbnxn_pairlist_t **nbl;
192 int coult;
193 int nb;
195 nnbl = nbl_list->nnbl;
196 nbl = nbl_list->nbl;
198 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
200 coult = coultRF;
202 else
204 if (ic->rcoulomb == ic->rvdw)
206 coult = coultTAB;
208 else
210 coult = coultTAB_TWIN;
214 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
215 for(nb=0; nb<nnbl; nb++)
217 nbnxn_atomdata_output_t *out;
218 real *fshift_p;
220 out = &nbat->out[nb];
222 if (clearF == enbvClearFYes)
224 clear_f(nbat,out->f);
227 if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
229 fshift_p = fshift;
231 else
233 fshift_p = out->fshift;
235 if (clearF == enbvClearFYes)
237 clear_fshift(fshift_p);
241 /* With Ewald type electrostatics we the forces for excluded atom pairs
242 * should not contribute to the virial sum. The exclusion forces
243 * are not calculate in the energy kernels, but are in _noener.
245 if (!((force_flags & GMX_FORCE_ENERGY) ||
246 (EEL_FULL(ic->eeltype) && (force_flags & GMX_FORCE_VIRIAL))))
248 /* Don't calculate energies */
249 p_nbk_noener[coult][nbat->comb_rule](nbl[nb],nbat,
251 shift_vec,
252 out->f,
253 fshift_p);
255 else if (out->nV == 1 || !(force_flags & GMX_FORCE_ENERGY))
257 /* No energy groups */
258 out->Vvdw[0] = 0;
259 out->Vc[0] = 0;
261 p_nbk_ener[coult][nbat->comb_rule](nbl[nb],nbat,
263 shift_vec,
264 out->f,
265 fshift_p,
266 out->Vvdw,
267 out->Vc);
269 else
271 /* Calculate energy group contributions */
272 int i;
274 for(i=0; i<out->nVS; i++)
276 out->VSvdw[i] = 0;
278 for(i=0; i<out->nVS; i++)
280 out->VSc[i] = 0;
283 p_nbk_energrp[coult][nbat->comb_rule](nbl[nb],nbat,
285 shift_vec,
286 out->f,
287 fshift_p,
288 out->VSvdw,
289 out->VSc);
291 reduce_group_energies(nbat->nenergrp,nbat->neg_2log,
292 out->VSvdw,out->VSc,
293 out->Vvdw,out->Vc);
297 if (force_flags & GMX_FORCE_ENERGY)
299 /* Reduce the energies */
300 for(nb=0; nb<nnbl; nb++)
302 int i;
304 for(i=0; i<nbat->out[nb].nV; i++)
306 Vvdw[i] += nbat->out[nb].Vvdw[i];
307 Vc[i] += nbat->out[nb].Vc[i];
312 #else
314 gmx_incons("nbnxn_kernel_x86_simd256 called while GROMACS was configured without AVX enabled");
316 #endif