1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2012, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
45 #include "gmx_omp_nthreads.h"
46 #include "../nbnxn_consts.h"
47 #include "nbnxn_kernel_common.h"
49 #ifdef GMX_X86_AVX_256
51 #include "nbnxn_kernel_x86_simd256.h"
53 /* Include all flavors of the 256-bit AVX kernel loops */
55 #define GMX_MM256_HERE
57 /* Analytical reaction-field kernels */
60 #include "nbnxn_kernel_x86_simd_includes.h"
64 /* Tabulated exclusion interaction electrostatics kernels */
67 /* Single cut-off: rcoulomb = rvdw */
68 #include "nbnxn_kernel_x86_simd_includes.h"
70 /* Twin cut-off: rcoulomb >= rvdw */
71 #define VDW_CUTOFF_CHECK
72 #include "nbnxn_kernel_x86_simd_includes.h"
73 #undef VDW_CUTOFF_CHECK
78 typedef void (*p_nbk_func_ener
)(const nbnxn_pairlist_t
*nbl
,
79 const nbnxn_atomdata_t
*nbat
,
80 const interaction_const_t
*ic
,
87 typedef void (*p_nbk_func_noener
)(const nbnxn_pairlist_t
*nbl
,
88 const nbnxn_atomdata_t
*nbat
,
89 const interaction_const_t
*ic
,
94 enum { coultRF
, coultTAB
, coultTAB_TWIN
, coultNR
};
97 static p_nbk_func_ener p_nbk_ener
[coultNR
][ljcrNR
] =
98 { { nbnxn_kernel_x86_simd256_rf_comb_geom_ener
,
99 nbnxn_kernel_x86_simd256_rf_comb_lb_ener
,
100 nbnxn_kernel_x86_simd256_rf_comb_none_ener
},
101 { nbnxn_kernel_x86_simd256_tab_comb_geom_ener
,
102 nbnxn_kernel_x86_simd256_tab_comb_lb_ener
,
103 nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener
},
104 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_ener
,
105 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_ener
,
106 nbnxn_kernel_x86_simd256_tab_twin_comb_none_ener
} };
108 static p_nbk_func_ener p_nbk_energrp
[coultNR
][ljcrNR
] =
109 { { nbnxn_kernel_x86_simd256_rf_comb_geom_energrp
,
110 nbnxn_kernel_x86_simd256_rf_comb_lb_energrp
,
111 nbnxn_kernel_x86_simd256_rf_comb_none_energrp
},
112 { nbnxn_kernel_x86_simd256_tab_comb_geom_energrp
,
113 nbnxn_kernel_x86_simd256_tab_comb_lb_energrp
,
114 nbnxn_kernel_x86_simd256_tab_comb_none_energrp
},
115 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_energrp
,
116 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_energrp
,
117 nbnxn_kernel_x86_simd256_tab_twin_comb_none_energrp
} };
119 static p_nbk_func_noener p_nbk_noener
[coultNR
][ljcrNR
] =
120 { { nbnxn_kernel_x86_simd256_rf_comb_geom_noener
,
121 nbnxn_kernel_x86_simd256_rf_comb_lb_noener
,
122 nbnxn_kernel_x86_simd256_rf_comb_none_noener
},
123 { nbnxn_kernel_x86_simd256_tab_comb_geom_noener
,
124 nbnxn_kernel_x86_simd256_tab_comb_lb_noener
,
125 nbnxn_kernel_x86_simd256_tab_comb_none_noener
},
126 { nbnxn_kernel_x86_simd256_tab_twin_comb_geom_noener
,
127 nbnxn_kernel_x86_simd256_tab_twin_comb_lb_noener
,
128 nbnxn_kernel_x86_simd256_tab_twin_comb_none_noener
} };
133 static void reduce_group_energies(int ng
,int ng_2log
,
134 const real
*VSvdw
,const real
*VSc
,
137 int ng_p2
,i
,j
,j0
,j1
,c
,s
;
141 #define SIMD_WIDTH_2 2
144 #define SIMD_WIDTH_2 1
147 ng_p2
= (1<<ng_2log
);
149 /* The size of the SSE energy group buffer array is:
150 * stride^3*SIMD_WIDTH_2*SIMD_WIDTH
160 for(j1
=0; j1
<ng
; j1
++)
162 for(j0
=0; j0
<ng
; j0
++)
164 c
= ((i
*ng
+ j1
)*ng_p2
+ j0
)*SIMD_WIDTH_2
*SIMD_WIDTH
;
165 for(s
=0; s
<SIMD_WIDTH_2
; s
++)
167 Vvdw
[i
*ng
+j0
] += VSvdw
[c
+0];
168 Vvdw
[i
*ng
+j1
] += VSvdw
[c
+1];
169 Vc
[i
*ng
+j0
] += VSc
[c
+0];
170 Vc
[i
*ng
+j1
] += VSc
[c
+1];
179 nbnxn_kernel_x86_simd256(nbnxn_pairlist_set_t
*nbl_list
,
180 const nbnxn_atomdata_t
*nbat
,
181 const interaction_const_t
*ic
,
188 #ifdef GMX_X86_AVX_256
191 nbnxn_pairlist_t
**nbl
;
195 nnbl
= nbl_list
->nnbl
;
198 if (EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
)
204 if (ic
->rcoulomb
== ic
->rvdw
)
210 coult
= coultTAB_TWIN
;
214 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
215 for(nb
=0; nb
<nnbl
; nb
++)
217 nbnxn_atomdata_output_t
*out
;
220 out
= &nbat
->out
[nb
];
222 if (clearF
== enbvClearFYes
)
224 clear_f(nbat
,out
->f
);
227 if ((force_flags
& GMX_FORCE_VIRIAL
) && nnbl
== 1)
233 fshift_p
= out
->fshift
;
235 if (clearF
== enbvClearFYes
)
237 clear_fshift(fshift_p
);
241 /* With Ewald type electrostatics we the forces for excluded atom pairs
242 * should not contribute to the virial sum. The exclusion forces
243 * are not calculate in the energy kernels, but are in _noener.
245 if (!((force_flags
& GMX_FORCE_ENERGY
) ||
246 (EEL_FULL(ic
->eeltype
) && (force_flags
& GMX_FORCE_VIRIAL
))))
248 /* Don't calculate energies */
249 p_nbk_noener
[coult
][nbat
->comb_rule
](nbl
[nb
],nbat
,
255 else if (out
->nV
== 1 || !(force_flags
& GMX_FORCE_ENERGY
))
257 /* No energy groups */
261 p_nbk_ener
[coult
][nbat
->comb_rule
](nbl
[nb
],nbat
,
271 /* Calculate energy group contributions */
274 for(i
=0; i
<out
->nVS
; i
++)
278 for(i
=0; i
<out
->nVS
; i
++)
283 p_nbk_energrp
[coult
][nbat
->comb_rule
](nbl
[nb
],nbat
,
291 reduce_group_energies(nbat
->nenergrp
,nbat
->neg_2log
,
297 if (force_flags
& GMX_FORCE_ENERGY
)
299 /* Reduce the energies */
300 for(nb
=0; nb
<nnbl
; nb
++)
304 for(i
=0; i
<nbat
->out
[nb
].nV
; i
++)
306 Vvdw
[i
] += nbat
->out
[nb
].Vvdw
[i
];
307 Vc
[i
] += nbat
->out
[nb
].Vc
[i
];
314 gmx_incons("nbnxn_kernel_x86_simd256 called while GROMACS was configured without AVX enabled");