2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2012, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
39 /* Get the half-width SIMD stuff from the kernel utils files */
40 #include "nbnxn_kernels/nbnxn_kernel_simd_utils.h"
43 #if GMX_SIMD_WIDTH_HERE >= 2*NBNXN_CPU_CLUSTER_I_SIZE
44 #define STRIDE_S (GMX_SIMD_WIDTH_HERE/2)
46 #define STRIDE_S NBNXN_CPU_CLUSTER_I_SIZE
49 static gmx_inline gmx_mm_pr
gmx_load_hpr_hilo_pr(const real
*a
)
54 gmx_load_hpr(&a_S
, a
);
56 gmx_2hpr_to_pr(a_S
, a_S
, &a_a_S
);
61 static gmx_inline gmx_mm_pr
gmx_set_2real_shift_pr(const real
*a
, real shift
)
63 gmx_mm_hpr a0_S
, a1_S
;
66 gmx_set1_hpr(&a0_S
, a
[0] + shift
);
67 gmx_set1_hpr(&a1_S
, a
[1] + shift
);
69 gmx_2hpr_to_pr(a0_S
, a1_S
, &a0_a1_S
);
74 /* Copies PBC shifted i-cell packed atom coordinates to working array */
75 static gmx_inline
void
76 icell_set_x_simd_2xnn(int ci
,
77 real shx
, real shy
, real shz
,
79 int stride
, const real
*x
,
80 nbnxn_list_work_t
*work
)
83 nbnxn_x_ci_simd_2xnn_t
*x_ci
;
85 x_ci
= work
->x_ci_simd_2xnn
;
87 ia
= X_IND_CI_SIMD_2XNN(ci
);
89 x_ci
->ix_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 0*STRIDE_S
+ 0, shx
);
90 x_ci
->iy_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 1*STRIDE_S
+ 0, shy
);
91 x_ci
->iz_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 2*STRIDE_S
+ 0, shz
);
92 x_ci
->ix_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 0*STRIDE_S
+ 2, shx
);
93 x_ci
->iy_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 1*STRIDE_S
+ 2, shy
);
94 x_ci
->iz_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 2*STRIDE_S
+ 2, shz
);
97 /* SIMD code for making a pair list of cell ci vs cell cjf-cjl
98 * for coordinates in packed format.
99 * Checks bouding box distances and possibly atom pair distances.
100 * This is an accelerated version of make_cluster_list_simple.
102 static gmx_inline
void
103 make_cluster_list_simd_2xnn(const nbnxn_grid_t
*gridj
,
104 nbnxn_pairlist_t
*nbl
,
105 int ci
, int cjf
, int cjl
,
106 gmx_bool remove_sub_diag
,
108 real rl2
, float rbb2
,
111 const nbnxn_x_ci_simd_2xnn_t
*work
;
112 const nbnxn_bb_t
*bb_ci
;
114 gmx_mm_pr jx_S
, jy_S
, jz_S
;
116 gmx_mm_pr dx_S0
, dy_S0
, dz_S0
;
117 gmx_mm_pr dx_S2
, dy_S2
, dz_S2
;
130 int xind_f
, xind_l
, cj
;
132 cjf
= CI_TO_CJ_SIMD_2XNN(cjf
);
133 cjl
= CI_TO_CJ_SIMD_2XNN(cjl
+1) - 1;
135 work
= nbl
->work
->x_ci_simd_2xnn
;
137 bb_ci
= nbl
->work
->bb_ci
;
139 rc2_S
= gmx_set1_pr(rl2
);
142 while (!InRange
&& cjf
<= cjl
)
144 #ifdef NBNXN_SEARCH_BB_SIMD4
145 d2
= subc_bb_dist2_simd4(0, bb_ci
, cjf
, gridj
->bbj
);
147 d2
= subc_bb_dist2(0, bb_ci
, cjf
, gridj
->bbj
);
151 /* Check if the distance is within the distance where
152 * we use only the bounding box distance rbb,
153 * or within the cut-off and there is at least one atom pair
154 * within the cut-off.
162 xind_f
= X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cjf
);
164 jx_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+0*STRIDE_S
);
165 jy_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+1*STRIDE_S
);
166 jz_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+2*STRIDE_S
);
168 /* Calculate distance */
169 dx_S0
= gmx_sub_pr(work
->ix_S0
, jx_S
);
170 dy_S0
= gmx_sub_pr(work
->iy_S0
, jy_S
);
171 dz_S0
= gmx_sub_pr(work
->iz_S0
, jz_S
);
172 dx_S2
= gmx_sub_pr(work
->ix_S2
, jx_S
);
173 dy_S2
= gmx_sub_pr(work
->iy_S2
, jy_S
);
174 dz_S2
= gmx_sub_pr(work
->iz_S2
, jz_S
);
176 /* rsq = dx*dx+dy*dy+dz*dz */
177 rsq_S0
= gmx_calc_rsq_pr(dx_S0
, dy_S0
, dz_S0
);
178 rsq_S2
= gmx_calc_rsq_pr(dx_S2
, dy_S2
, dz_S2
);
180 wco_S0
= gmx_cmplt_pr(rsq_S0
, rc2_S
);
181 wco_S2
= gmx_cmplt_pr(rsq_S2
, rc2_S
);
183 wco_any_S
= gmx_or_pb(wco_S0
, wco_S2
);
185 InRange
= gmx_anytrue_pb(wco_any_S
);
187 *ndistc
+= 2*GMX_SIMD_WIDTH_HERE
;
200 while (!InRange
&& cjl
> cjf
)
202 #ifdef NBNXN_SEARCH_BB_SIMD4
203 d2
= subc_bb_dist2_simd4(0, bb_ci
, cjl
, gridj
->bbj
);
205 d2
= subc_bb_dist2(0, bb_ci
, cjl
, gridj
->bbj
);
209 /* Check if the distance is within the distance where
210 * we use only the bounding box distance rbb,
211 * or within the cut-off and there is at least one atom pair
212 * within the cut-off.
220 xind_l
= X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cjl
);
222 jx_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+0*STRIDE_S
);
223 jy_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+1*STRIDE_S
);
224 jz_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+2*STRIDE_S
);
226 /* Calculate distance */
227 dx_S0
= gmx_sub_pr(work
->ix_S0
, jx_S
);
228 dy_S0
= gmx_sub_pr(work
->iy_S0
, jy_S
);
229 dz_S0
= gmx_sub_pr(work
->iz_S0
, jz_S
);
230 dx_S2
= gmx_sub_pr(work
->ix_S2
, jx_S
);
231 dy_S2
= gmx_sub_pr(work
->iy_S2
, jy_S
);
232 dz_S2
= gmx_sub_pr(work
->iz_S2
, jz_S
);
234 /* rsq = dx*dx+dy*dy+dz*dz */
235 rsq_S0
= gmx_calc_rsq_pr(dx_S0
, dy_S0
, dz_S0
);
236 rsq_S2
= gmx_calc_rsq_pr(dx_S2
, dy_S2
, dz_S2
);
238 wco_S0
= gmx_cmplt_pr(rsq_S0
, rc2_S
);
239 wco_S2
= gmx_cmplt_pr(rsq_S2
, rc2_S
);
241 wco_any_S
= gmx_or_pb(wco_S0
, wco_S2
);
243 InRange
= gmx_anytrue_pb(wco_any_S
);
245 *ndistc
+= 2*GMX_SIMD_WIDTH_HERE
;
255 for (cj
= cjf
; cj
<= cjl
; cj
++)
257 /* Store cj and the interaction mask */
258 nbl
->cj
[nbl
->ncj
].cj
= CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cj
;
259 nbl
->cj
[nbl
->ncj
].excl
= get_imask_simd_2xnn(remove_sub_diag
, ci
, cj
);
262 /* Increase the closing index in i super-cell list */
263 nbl
->ci
[nbl
->nci
].cj_ind_end
= nbl
->ncj
;