2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/gmxlib/nrnb.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: None
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double
58 (t_nblist
* gmx_restrict nlist
,
59 rvec
* gmx_restrict xx
,
60 rvec
* gmx_restrict ff
,
61 struct t_forcerec
* gmx_restrict fr
,
62 t_mdatoms
* gmx_restrict mdatoms
,
63 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
64 t_nrnb
* gmx_restrict nrnb
)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
72 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
74 int j_coord_offsetA
,j_coord_offsetB
;
75 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
77 real
*shiftvec
,*fshift
,*x
,*f
;
78 _fjsp_v2r8 tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
80 _fjsp_v2r8 ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
82 _fjsp_v2r8 ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
84 _fjsp_v2r8 ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
85 int vdwjidx0A
,vdwjidx0B
;
86 _fjsp_v2r8 jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
87 int vdwjidx1A
,vdwjidx1B
;
88 _fjsp_v2r8 jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
89 int vdwjidx2A
,vdwjidx2B
;
90 _fjsp_v2r8 jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
91 _fjsp_v2r8 dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
92 _fjsp_v2r8 dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
93 _fjsp_v2r8 dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
94 _fjsp_v2r8 dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
95 _fjsp_v2r8 dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
96 _fjsp_v2r8 dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
97 _fjsp_v2r8 dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
98 _fjsp_v2r8 dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
99 _fjsp_v2r8 dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
100 _fjsp_v2r8 velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
103 _fjsp_v2r8 dummy_mask
,cutoff_mask
;
104 _fjsp_v2r8 one
= gmx_fjsp_set1_v2r8(1.0);
105 _fjsp_v2r8 two
= gmx_fjsp_set1_v2r8(2.0);
106 union { _fjsp_v2r8 simd
; long long int i
[2]; } vfconv
,gbconv
,ewconv
;
113 jindex
= nlist
->jindex
;
115 shiftidx
= nlist
->shift
;
117 shiftvec
= fr
->shift_vec
[0];
118 fshift
= fr
->fshift
[0];
119 facel
= gmx_fjsp_set1_v2r8(fr
->ic
->epsfac
);
120 charge
= mdatoms
->chargeA
;
121 krf
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
);
122 krf2
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
*2.0);
123 crf
= gmx_fjsp_set1_v2r8(fr
->ic
->c_rf
);
125 /* Setup water-specific parameters */
126 inr
= nlist
->iinr
[0];
127 iq0
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+0]));
128 iq1
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+1]));
129 iq2
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+2]));
131 jq0
= gmx_fjsp_set1_v2r8(charge
[inr
+0]);
132 jq1
= gmx_fjsp_set1_v2r8(charge
[inr
+1]);
133 jq2
= gmx_fjsp_set1_v2r8(charge
[inr
+2]);
134 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
135 qq01
= _fjsp_mul_v2r8(iq0
,jq1
);
136 qq02
= _fjsp_mul_v2r8(iq0
,jq2
);
137 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
138 qq11
= _fjsp_mul_v2r8(iq1
,jq1
);
139 qq12
= _fjsp_mul_v2r8(iq1
,jq2
);
140 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
141 qq21
= _fjsp_mul_v2r8(iq2
,jq1
);
142 qq22
= _fjsp_mul_v2r8(iq2
,jq2
);
144 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
145 rcutoff_scalar
= fr
->ic
->rcoulomb
;
146 rcutoff
= gmx_fjsp_set1_v2r8(rcutoff_scalar
);
147 rcutoff2
= _fjsp_mul_v2r8(rcutoff
,rcutoff
);
149 /* Avoid stupid compiler warnings */
157 /* Start outer loop over neighborlists */
158 for(iidx
=0; iidx
<nri
; iidx
++)
160 /* Load shift vector for this list */
161 i_shift_offset
= DIM
*shiftidx
[iidx
];
163 /* Load limits for loop over neighbors */
164 j_index_start
= jindex
[iidx
];
165 j_index_end
= jindex
[iidx
+1];
167 /* Get outer coordinate index */
169 i_coord_offset
= DIM
*inr
;
171 /* Load i particle coords and add shift vector */
172 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
173 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
175 fix0
= _fjsp_setzero_v2r8();
176 fiy0
= _fjsp_setzero_v2r8();
177 fiz0
= _fjsp_setzero_v2r8();
178 fix1
= _fjsp_setzero_v2r8();
179 fiy1
= _fjsp_setzero_v2r8();
180 fiz1
= _fjsp_setzero_v2r8();
181 fix2
= _fjsp_setzero_v2r8();
182 fiy2
= _fjsp_setzero_v2r8();
183 fiz2
= _fjsp_setzero_v2r8();
185 /* Reset potential sums */
186 velecsum
= _fjsp_setzero_v2r8();
188 /* Start inner kernel loop */
189 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
192 /* Get j neighbor index, and coordinate index */
195 j_coord_offsetA
= DIM
*jnrA
;
196 j_coord_offsetB
= DIM
*jnrB
;
198 /* load j atom coordinates */
199 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
200 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
202 /* Calculate displacement vector */
203 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
204 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
205 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
206 dx01
= _fjsp_sub_v2r8(ix0
,jx1
);
207 dy01
= _fjsp_sub_v2r8(iy0
,jy1
);
208 dz01
= _fjsp_sub_v2r8(iz0
,jz1
);
209 dx02
= _fjsp_sub_v2r8(ix0
,jx2
);
210 dy02
= _fjsp_sub_v2r8(iy0
,jy2
);
211 dz02
= _fjsp_sub_v2r8(iz0
,jz2
);
212 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
213 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
214 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
215 dx11
= _fjsp_sub_v2r8(ix1
,jx1
);
216 dy11
= _fjsp_sub_v2r8(iy1
,jy1
);
217 dz11
= _fjsp_sub_v2r8(iz1
,jz1
);
218 dx12
= _fjsp_sub_v2r8(ix1
,jx2
);
219 dy12
= _fjsp_sub_v2r8(iy1
,jy2
);
220 dz12
= _fjsp_sub_v2r8(iz1
,jz2
);
221 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
222 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
223 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
224 dx21
= _fjsp_sub_v2r8(ix2
,jx1
);
225 dy21
= _fjsp_sub_v2r8(iy2
,jy1
);
226 dz21
= _fjsp_sub_v2r8(iz2
,jz1
);
227 dx22
= _fjsp_sub_v2r8(ix2
,jx2
);
228 dy22
= _fjsp_sub_v2r8(iy2
,jy2
);
229 dz22
= _fjsp_sub_v2r8(iz2
,jz2
);
231 /* Calculate squared distance and things based on it */
232 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
233 rsq01
= gmx_fjsp_calc_rsq_v2r8(dx01
,dy01
,dz01
);
234 rsq02
= gmx_fjsp_calc_rsq_v2r8(dx02
,dy02
,dz02
);
235 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
236 rsq11
= gmx_fjsp_calc_rsq_v2r8(dx11
,dy11
,dz11
);
237 rsq12
= gmx_fjsp_calc_rsq_v2r8(dx12
,dy12
,dz12
);
238 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
239 rsq21
= gmx_fjsp_calc_rsq_v2r8(dx21
,dy21
,dz21
);
240 rsq22
= gmx_fjsp_calc_rsq_v2r8(dx22
,dy22
,dz22
);
242 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
243 rinv01
= gmx_fjsp_invsqrt_v2r8(rsq01
);
244 rinv02
= gmx_fjsp_invsqrt_v2r8(rsq02
);
245 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
246 rinv11
= gmx_fjsp_invsqrt_v2r8(rsq11
);
247 rinv12
= gmx_fjsp_invsqrt_v2r8(rsq12
);
248 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
249 rinv21
= gmx_fjsp_invsqrt_v2r8(rsq21
);
250 rinv22
= gmx_fjsp_invsqrt_v2r8(rsq22
);
252 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
253 rinvsq01
= _fjsp_mul_v2r8(rinv01
,rinv01
);
254 rinvsq02
= _fjsp_mul_v2r8(rinv02
,rinv02
);
255 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
256 rinvsq11
= _fjsp_mul_v2r8(rinv11
,rinv11
);
257 rinvsq12
= _fjsp_mul_v2r8(rinv12
,rinv12
);
258 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
259 rinvsq21
= _fjsp_mul_v2r8(rinv21
,rinv21
);
260 rinvsq22
= _fjsp_mul_v2r8(rinv22
,rinv22
);
262 fjx0
= _fjsp_setzero_v2r8();
263 fjy0
= _fjsp_setzero_v2r8();
264 fjz0
= _fjsp_setzero_v2r8();
265 fjx1
= _fjsp_setzero_v2r8();
266 fjy1
= _fjsp_setzero_v2r8();
267 fjz1
= _fjsp_setzero_v2r8();
268 fjx2
= _fjsp_setzero_v2r8();
269 fjy2
= _fjsp_setzero_v2r8();
270 fjz2
= _fjsp_setzero_v2r8();
272 /**************************
273 * CALCULATE INTERACTIONS *
274 **************************/
276 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
279 /* REACTION-FIELD ELECTROSTATICS */
280 velec
= _fjsp_mul_v2r8(qq00
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq00
,rinv00
),crf
));
281 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
283 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
285 /* Update potential sum for this i atom from the interaction with this j atom. */
286 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
287 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
291 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
293 /* Update vectorial force */
294 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
295 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
296 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
298 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
299 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
300 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
304 /**************************
305 * CALCULATE INTERACTIONS *
306 **************************/
308 if (gmx_fjsp_any_lt_v2r8(rsq01
,rcutoff2
))
311 /* REACTION-FIELD ELECTROSTATICS */
312 velec
= _fjsp_mul_v2r8(qq01
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq01
,rinv01
),crf
));
313 felec
= _fjsp_mul_v2r8(qq01
,_fjsp_msub_v2r8(rinv01
,rinvsq01
,krf2
));
315 cutoff_mask
= _fjsp_cmplt_v2r8(rsq01
,rcutoff2
);
317 /* Update potential sum for this i atom from the interaction with this j atom. */
318 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
319 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
323 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
325 /* Update vectorial force */
326 fix0
= _fjsp_madd_v2r8(dx01
,fscal
,fix0
);
327 fiy0
= _fjsp_madd_v2r8(dy01
,fscal
,fiy0
);
328 fiz0
= _fjsp_madd_v2r8(dz01
,fscal
,fiz0
);
330 fjx1
= _fjsp_madd_v2r8(dx01
,fscal
,fjx1
);
331 fjy1
= _fjsp_madd_v2r8(dy01
,fscal
,fjy1
);
332 fjz1
= _fjsp_madd_v2r8(dz01
,fscal
,fjz1
);
336 /**************************
337 * CALCULATE INTERACTIONS *
338 **************************/
340 if (gmx_fjsp_any_lt_v2r8(rsq02
,rcutoff2
))
343 /* REACTION-FIELD ELECTROSTATICS */
344 velec
= _fjsp_mul_v2r8(qq02
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq02
,rinv02
),crf
));
345 felec
= _fjsp_mul_v2r8(qq02
,_fjsp_msub_v2r8(rinv02
,rinvsq02
,krf2
));
347 cutoff_mask
= _fjsp_cmplt_v2r8(rsq02
,rcutoff2
);
349 /* Update potential sum for this i atom from the interaction with this j atom. */
350 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
351 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
355 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
357 /* Update vectorial force */
358 fix0
= _fjsp_madd_v2r8(dx02
,fscal
,fix0
);
359 fiy0
= _fjsp_madd_v2r8(dy02
,fscal
,fiy0
);
360 fiz0
= _fjsp_madd_v2r8(dz02
,fscal
,fiz0
);
362 fjx2
= _fjsp_madd_v2r8(dx02
,fscal
,fjx2
);
363 fjy2
= _fjsp_madd_v2r8(dy02
,fscal
,fjy2
);
364 fjz2
= _fjsp_madd_v2r8(dz02
,fscal
,fjz2
);
368 /**************************
369 * CALCULATE INTERACTIONS *
370 **************************/
372 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
375 /* REACTION-FIELD ELECTROSTATICS */
376 velec
= _fjsp_mul_v2r8(qq10
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq10
,rinv10
),crf
));
377 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
379 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
381 /* Update potential sum for this i atom from the interaction with this j atom. */
382 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
383 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
387 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
389 /* Update vectorial force */
390 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
391 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
392 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
394 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
395 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
396 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
400 /**************************
401 * CALCULATE INTERACTIONS *
402 **************************/
404 if (gmx_fjsp_any_lt_v2r8(rsq11
,rcutoff2
))
407 /* REACTION-FIELD ELECTROSTATICS */
408 velec
= _fjsp_mul_v2r8(qq11
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq11
,rinv11
),crf
));
409 felec
= _fjsp_mul_v2r8(qq11
,_fjsp_msub_v2r8(rinv11
,rinvsq11
,krf2
));
411 cutoff_mask
= _fjsp_cmplt_v2r8(rsq11
,rcutoff2
);
413 /* Update potential sum for this i atom from the interaction with this j atom. */
414 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
415 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
419 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
421 /* Update vectorial force */
422 fix1
= _fjsp_madd_v2r8(dx11
,fscal
,fix1
);
423 fiy1
= _fjsp_madd_v2r8(dy11
,fscal
,fiy1
);
424 fiz1
= _fjsp_madd_v2r8(dz11
,fscal
,fiz1
);
426 fjx1
= _fjsp_madd_v2r8(dx11
,fscal
,fjx1
);
427 fjy1
= _fjsp_madd_v2r8(dy11
,fscal
,fjy1
);
428 fjz1
= _fjsp_madd_v2r8(dz11
,fscal
,fjz1
);
432 /**************************
433 * CALCULATE INTERACTIONS *
434 **************************/
436 if (gmx_fjsp_any_lt_v2r8(rsq12
,rcutoff2
))
439 /* REACTION-FIELD ELECTROSTATICS */
440 velec
= _fjsp_mul_v2r8(qq12
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq12
,rinv12
),crf
));
441 felec
= _fjsp_mul_v2r8(qq12
,_fjsp_msub_v2r8(rinv12
,rinvsq12
,krf2
));
443 cutoff_mask
= _fjsp_cmplt_v2r8(rsq12
,rcutoff2
);
445 /* Update potential sum for this i atom from the interaction with this j atom. */
446 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
447 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
451 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
453 /* Update vectorial force */
454 fix1
= _fjsp_madd_v2r8(dx12
,fscal
,fix1
);
455 fiy1
= _fjsp_madd_v2r8(dy12
,fscal
,fiy1
);
456 fiz1
= _fjsp_madd_v2r8(dz12
,fscal
,fiz1
);
458 fjx2
= _fjsp_madd_v2r8(dx12
,fscal
,fjx2
);
459 fjy2
= _fjsp_madd_v2r8(dy12
,fscal
,fjy2
);
460 fjz2
= _fjsp_madd_v2r8(dz12
,fscal
,fjz2
);
464 /**************************
465 * CALCULATE INTERACTIONS *
466 **************************/
468 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
471 /* REACTION-FIELD ELECTROSTATICS */
472 velec
= _fjsp_mul_v2r8(qq20
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq20
,rinv20
),crf
));
473 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
475 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
477 /* Update potential sum for this i atom from the interaction with this j atom. */
478 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
479 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
483 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
485 /* Update vectorial force */
486 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
487 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
488 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
490 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
491 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
492 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
496 /**************************
497 * CALCULATE INTERACTIONS *
498 **************************/
500 if (gmx_fjsp_any_lt_v2r8(rsq21
,rcutoff2
))
503 /* REACTION-FIELD ELECTROSTATICS */
504 velec
= _fjsp_mul_v2r8(qq21
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq21
,rinv21
),crf
));
505 felec
= _fjsp_mul_v2r8(qq21
,_fjsp_msub_v2r8(rinv21
,rinvsq21
,krf2
));
507 cutoff_mask
= _fjsp_cmplt_v2r8(rsq21
,rcutoff2
);
509 /* Update potential sum for this i atom from the interaction with this j atom. */
510 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
511 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
515 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
517 /* Update vectorial force */
518 fix2
= _fjsp_madd_v2r8(dx21
,fscal
,fix2
);
519 fiy2
= _fjsp_madd_v2r8(dy21
,fscal
,fiy2
);
520 fiz2
= _fjsp_madd_v2r8(dz21
,fscal
,fiz2
);
522 fjx1
= _fjsp_madd_v2r8(dx21
,fscal
,fjx1
);
523 fjy1
= _fjsp_madd_v2r8(dy21
,fscal
,fjy1
);
524 fjz1
= _fjsp_madd_v2r8(dz21
,fscal
,fjz1
);
528 /**************************
529 * CALCULATE INTERACTIONS *
530 **************************/
532 if (gmx_fjsp_any_lt_v2r8(rsq22
,rcutoff2
))
535 /* REACTION-FIELD ELECTROSTATICS */
536 velec
= _fjsp_mul_v2r8(qq22
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq22
,rinv22
),crf
));
537 felec
= _fjsp_mul_v2r8(qq22
,_fjsp_msub_v2r8(rinv22
,rinvsq22
,krf2
));
539 cutoff_mask
= _fjsp_cmplt_v2r8(rsq22
,rcutoff2
);
541 /* Update potential sum for this i atom from the interaction with this j atom. */
542 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
543 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
547 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
549 /* Update vectorial force */
550 fix2
= _fjsp_madd_v2r8(dx22
,fscal
,fix2
);
551 fiy2
= _fjsp_madd_v2r8(dy22
,fscal
,fiy2
);
552 fiz2
= _fjsp_madd_v2r8(dz22
,fscal
,fiz2
);
554 fjx2
= _fjsp_madd_v2r8(dx22
,fscal
,fjx2
);
555 fjy2
= _fjsp_madd_v2r8(dy22
,fscal
,fjy2
);
556 fjz2
= _fjsp_madd_v2r8(dz22
,fscal
,fjz2
);
560 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
562 /* Inner loop uses 351 flops */
569 j_coord_offsetA
= DIM
*jnrA
;
571 /* load j atom coordinates */
572 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x
+j_coord_offsetA
,
573 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
575 /* Calculate displacement vector */
576 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
577 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
578 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
579 dx01
= _fjsp_sub_v2r8(ix0
,jx1
);
580 dy01
= _fjsp_sub_v2r8(iy0
,jy1
);
581 dz01
= _fjsp_sub_v2r8(iz0
,jz1
);
582 dx02
= _fjsp_sub_v2r8(ix0
,jx2
);
583 dy02
= _fjsp_sub_v2r8(iy0
,jy2
);
584 dz02
= _fjsp_sub_v2r8(iz0
,jz2
);
585 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
586 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
587 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
588 dx11
= _fjsp_sub_v2r8(ix1
,jx1
);
589 dy11
= _fjsp_sub_v2r8(iy1
,jy1
);
590 dz11
= _fjsp_sub_v2r8(iz1
,jz1
);
591 dx12
= _fjsp_sub_v2r8(ix1
,jx2
);
592 dy12
= _fjsp_sub_v2r8(iy1
,jy2
);
593 dz12
= _fjsp_sub_v2r8(iz1
,jz2
);
594 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
595 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
596 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
597 dx21
= _fjsp_sub_v2r8(ix2
,jx1
);
598 dy21
= _fjsp_sub_v2r8(iy2
,jy1
);
599 dz21
= _fjsp_sub_v2r8(iz2
,jz1
);
600 dx22
= _fjsp_sub_v2r8(ix2
,jx2
);
601 dy22
= _fjsp_sub_v2r8(iy2
,jy2
);
602 dz22
= _fjsp_sub_v2r8(iz2
,jz2
);
604 /* Calculate squared distance and things based on it */
605 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
606 rsq01
= gmx_fjsp_calc_rsq_v2r8(dx01
,dy01
,dz01
);
607 rsq02
= gmx_fjsp_calc_rsq_v2r8(dx02
,dy02
,dz02
);
608 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
609 rsq11
= gmx_fjsp_calc_rsq_v2r8(dx11
,dy11
,dz11
);
610 rsq12
= gmx_fjsp_calc_rsq_v2r8(dx12
,dy12
,dz12
);
611 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
612 rsq21
= gmx_fjsp_calc_rsq_v2r8(dx21
,dy21
,dz21
);
613 rsq22
= gmx_fjsp_calc_rsq_v2r8(dx22
,dy22
,dz22
);
615 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
616 rinv01
= gmx_fjsp_invsqrt_v2r8(rsq01
);
617 rinv02
= gmx_fjsp_invsqrt_v2r8(rsq02
);
618 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
619 rinv11
= gmx_fjsp_invsqrt_v2r8(rsq11
);
620 rinv12
= gmx_fjsp_invsqrt_v2r8(rsq12
);
621 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
622 rinv21
= gmx_fjsp_invsqrt_v2r8(rsq21
);
623 rinv22
= gmx_fjsp_invsqrt_v2r8(rsq22
);
625 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
626 rinvsq01
= _fjsp_mul_v2r8(rinv01
,rinv01
);
627 rinvsq02
= _fjsp_mul_v2r8(rinv02
,rinv02
);
628 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
629 rinvsq11
= _fjsp_mul_v2r8(rinv11
,rinv11
);
630 rinvsq12
= _fjsp_mul_v2r8(rinv12
,rinv12
);
631 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
632 rinvsq21
= _fjsp_mul_v2r8(rinv21
,rinv21
);
633 rinvsq22
= _fjsp_mul_v2r8(rinv22
,rinv22
);
635 fjx0
= _fjsp_setzero_v2r8();
636 fjy0
= _fjsp_setzero_v2r8();
637 fjz0
= _fjsp_setzero_v2r8();
638 fjx1
= _fjsp_setzero_v2r8();
639 fjy1
= _fjsp_setzero_v2r8();
640 fjz1
= _fjsp_setzero_v2r8();
641 fjx2
= _fjsp_setzero_v2r8();
642 fjy2
= _fjsp_setzero_v2r8();
643 fjz2
= _fjsp_setzero_v2r8();
645 /**************************
646 * CALCULATE INTERACTIONS *
647 **************************/
649 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
652 /* REACTION-FIELD ELECTROSTATICS */
653 velec
= _fjsp_mul_v2r8(qq00
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq00
,rinv00
),crf
));
654 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
656 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
658 /* Update potential sum for this i atom from the interaction with this j atom. */
659 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
660 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
661 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
665 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
667 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
669 /* Update vectorial force */
670 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
671 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
672 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
674 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
675 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
676 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
680 /**************************
681 * CALCULATE INTERACTIONS *
682 **************************/
684 if (gmx_fjsp_any_lt_v2r8(rsq01
,rcutoff2
))
687 /* REACTION-FIELD ELECTROSTATICS */
688 velec
= _fjsp_mul_v2r8(qq01
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq01
,rinv01
),crf
));
689 felec
= _fjsp_mul_v2r8(qq01
,_fjsp_msub_v2r8(rinv01
,rinvsq01
,krf2
));
691 cutoff_mask
= _fjsp_cmplt_v2r8(rsq01
,rcutoff2
);
693 /* Update potential sum for this i atom from the interaction with this j atom. */
694 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
695 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
696 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
700 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
702 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
704 /* Update vectorial force */
705 fix0
= _fjsp_madd_v2r8(dx01
,fscal
,fix0
);
706 fiy0
= _fjsp_madd_v2r8(dy01
,fscal
,fiy0
);
707 fiz0
= _fjsp_madd_v2r8(dz01
,fscal
,fiz0
);
709 fjx1
= _fjsp_madd_v2r8(dx01
,fscal
,fjx1
);
710 fjy1
= _fjsp_madd_v2r8(dy01
,fscal
,fjy1
);
711 fjz1
= _fjsp_madd_v2r8(dz01
,fscal
,fjz1
);
715 /**************************
716 * CALCULATE INTERACTIONS *
717 **************************/
719 if (gmx_fjsp_any_lt_v2r8(rsq02
,rcutoff2
))
722 /* REACTION-FIELD ELECTROSTATICS */
723 velec
= _fjsp_mul_v2r8(qq02
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq02
,rinv02
),crf
));
724 felec
= _fjsp_mul_v2r8(qq02
,_fjsp_msub_v2r8(rinv02
,rinvsq02
,krf2
));
726 cutoff_mask
= _fjsp_cmplt_v2r8(rsq02
,rcutoff2
);
728 /* Update potential sum for this i atom from the interaction with this j atom. */
729 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
730 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
731 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
735 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
737 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
739 /* Update vectorial force */
740 fix0
= _fjsp_madd_v2r8(dx02
,fscal
,fix0
);
741 fiy0
= _fjsp_madd_v2r8(dy02
,fscal
,fiy0
);
742 fiz0
= _fjsp_madd_v2r8(dz02
,fscal
,fiz0
);
744 fjx2
= _fjsp_madd_v2r8(dx02
,fscal
,fjx2
);
745 fjy2
= _fjsp_madd_v2r8(dy02
,fscal
,fjy2
);
746 fjz2
= _fjsp_madd_v2r8(dz02
,fscal
,fjz2
);
750 /**************************
751 * CALCULATE INTERACTIONS *
752 **************************/
754 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
757 /* REACTION-FIELD ELECTROSTATICS */
758 velec
= _fjsp_mul_v2r8(qq10
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq10
,rinv10
),crf
));
759 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
761 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
763 /* Update potential sum for this i atom from the interaction with this j atom. */
764 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
765 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
766 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
770 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
772 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
774 /* Update vectorial force */
775 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
776 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
777 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
779 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
780 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
781 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
785 /**************************
786 * CALCULATE INTERACTIONS *
787 **************************/
789 if (gmx_fjsp_any_lt_v2r8(rsq11
,rcutoff2
))
792 /* REACTION-FIELD ELECTROSTATICS */
793 velec
= _fjsp_mul_v2r8(qq11
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq11
,rinv11
),crf
));
794 felec
= _fjsp_mul_v2r8(qq11
,_fjsp_msub_v2r8(rinv11
,rinvsq11
,krf2
));
796 cutoff_mask
= _fjsp_cmplt_v2r8(rsq11
,rcutoff2
);
798 /* Update potential sum for this i atom from the interaction with this j atom. */
799 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
800 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
801 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
805 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
807 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
809 /* Update vectorial force */
810 fix1
= _fjsp_madd_v2r8(dx11
,fscal
,fix1
);
811 fiy1
= _fjsp_madd_v2r8(dy11
,fscal
,fiy1
);
812 fiz1
= _fjsp_madd_v2r8(dz11
,fscal
,fiz1
);
814 fjx1
= _fjsp_madd_v2r8(dx11
,fscal
,fjx1
);
815 fjy1
= _fjsp_madd_v2r8(dy11
,fscal
,fjy1
);
816 fjz1
= _fjsp_madd_v2r8(dz11
,fscal
,fjz1
);
820 /**************************
821 * CALCULATE INTERACTIONS *
822 **************************/
824 if (gmx_fjsp_any_lt_v2r8(rsq12
,rcutoff2
))
827 /* REACTION-FIELD ELECTROSTATICS */
828 velec
= _fjsp_mul_v2r8(qq12
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq12
,rinv12
),crf
));
829 felec
= _fjsp_mul_v2r8(qq12
,_fjsp_msub_v2r8(rinv12
,rinvsq12
,krf2
));
831 cutoff_mask
= _fjsp_cmplt_v2r8(rsq12
,rcutoff2
);
833 /* Update potential sum for this i atom from the interaction with this j atom. */
834 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
835 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
836 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
840 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
842 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
844 /* Update vectorial force */
845 fix1
= _fjsp_madd_v2r8(dx12
,fscal
,fix1
);
846 fiy1
= _fjsp_madd_v2r8(dy12
,fscal
,fiy1
);
847 fiz1
= _fjsp_madd_v2r8(dz12
,fscal
,fiz1
);
849 fjx2
= _fjsp_madd_v2r8(dx12
,fscal
,fjx2
);
850 fjy2
= _fjsp_madd_v2r8(dy12
,fscal
,fjy2
);
851 fjz2
= _fjsp_madd_v2r8(dz12
,fscal
,fjz2
);
855 /**************************
856 * CALCULATE INTERACTIONS *
857 **************************/
859 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
862 /* REACTION-FIELD ELECTROSTATICS */
863 velec
= _fjsp_mul_v2r8(qq20
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq20
,rinv20
),crf
));
864 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
866 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
868 /* Update potential sum for this i atom from the interaction with this j atom. */
869 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
870 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
871 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
875 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
877 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
879 /* Update vectorial force */
880 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
881 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
882 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
884 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
885 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
886 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
890 /**************************
891 * CALCULATE INTERACTIONS *
892 **************************/
894 if (gmx_fjsp_any_lt_v2r8(rsq21
,rcutoff2
))
897 /* REACTION-FIELD ELECTROSTATICS */
898 velec
= _fjsp_mul_v2r8(qq21
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq21
,rinv21
),crf
));
899 felec
= _fjsp_mul_v2r8(qq21
,_fjsp_msub_v2r8(rinv21
,rinvsq21
,krf2
));
901 cutoff_mask
= _fjsp_cmplt_v2r8(rsq21
,rcutoff2
);
903 /* Update potential sum for this i atom from the interaction with this j atom. */
904 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
905 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
906 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
910 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
912 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
914 /* Update vectorial force */
915 fix2
= _fjsp_madd_v2r8(dx21
,fscal
,fix2
);
916 fiy2
= _fjsp_madd_v2r8(dy21
,fscal
,fiy2
);
917 fiz2
= _fjsp_madd_v2r8(dz21
,fscal
,fiz2
);
919 fjx1
= _fjsp_madd_v2r8(dx21
,fscal
,fjx1
);
920 fjy1
= _fjsp_madd_v2r8(dy21
,fscal
,fjy1
);
921 fjz1
= _fjsp_madd_v2r8(dz21
,fscal
,fjz1
);
925 /**************************
926 * CALCULATE INTERACTIONS *
927 **************************/
929 if (gmx_fjsp_any_lt_v2r8(rsq22
,rcutoff2
))
932 /* REACTION-FIELD ELECTROSTATICS */
933 velec
= _fjsp_mul_v2r8(qq22
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq22
,rinv22
),crf
));
934 felec
= _fjsp_mul_v2r8(qq22
,_fjsp_msub_v2r8(rinv22
,rinvsq22
,krf2
));
936 cutoff_mask
= _fjsp_cmplt_v2r8(rsq22
,rcutoff2
);
938 /* Update potential sum for this i atom from the interaction with this j atom. */
939 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
940 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
941 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
945 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
947 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
949 /* Update vectorial force */
950 fix2
= _fjsp_madd_v2r8(dx22
,fscal
,fix2
);
951 fiy2
= _fjsp_madd_v2r8(dy22
,fscal
,fiy2
);
952 fiz2
= _fjsp_madd_v2r8(dz22
,fscal
,fiz2
);
954 fjx2
= _fjsp_madd_v2r8(dx22
,fscal
,fjx2
);
955 fjy2
= _fjsp_madd_v2r8(dy22
,fscal
,fjy2
);
956 fjz2
= _fjsp_madd_v2r8(dz22
,fscal
,fjz2
);
960 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
962 /* Inner loop uses 351 flops */
965 /* End of innermost loop */
967 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
968 f
+i_coord_offset
,fshift
+i_shift_offset
);
971 /* Update potential energies */
972 gmx_fjsp_update_1pot_v2r8(velecsum
,kernel_data
->energygrp_elec
+ggid
);
974 /* Increment number of inner iterations */
975 inneriter
+= j_index_end
- j_index_start
;
977 /* Outer loop uses 19 flops */
980 /* Increment number of outer iterations */
983 /* Update outer/inner flops */
985 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_VF
,outeriter
*19 + inneriter
*351);
988 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
989 * Electrostatics interaction: ReactionField
990 * VdW interaction: None
991 * Geometry: Water3-Water3
992 * Calculate force/pot: Force
995 nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double
996 (t_nblist
* gmx_restrict nlist
,
997 rvec
* gmx_restrict xx
,
998 rvec
* gmx_restrict ff
,
999 struct t_forcerec
* gmx_restrict fr
,
1000 t_mdatoms
* gmx_restrict mdatoms
,
1001 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
1002 t_nrnb
* gmx_restrict nrnb
)
1004 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1005 * just 0 for non-waters.
1006 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
1007 * jnr indices corresponding to data put in the four positions in the SIMD register.
1009 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
1010 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
1012 int j_coord_offsetA
,j_coord_offsetB
;
1013 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
1014 real rcutoff_scalar
;
1015 real
*shiftvec
,*fshift
,*x
,*f
;
1016 _fjsp_v2r8 tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
1018 _fjsp_v2r8 ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
1020 _fjsp_v2r8 ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
1022 _fjsp_v2r8 ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
1023 int vdwjidx0A
,vdwjidx0B
;
1024 _fjsp_v2r8 jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
1025 int vdwjidx1A
,vdwjidx1B
;
1026 _fjsp_v2r8 jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
1027 int vdwjidx2A
,vdwjidx2B
;
1028 _fjsp_v2r8 jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
1029 _fjsp_v2r8 dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
1030 _fjsp_v2r8 dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
1031 _fjsp_v2r8 dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
1032 _fjsp_v2r8 dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
1033 _fjsp_v2r8 dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
1034 _fjsp_v2r8 dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
1035 _fjsp_v2r8 dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
1036 _fjsp_v2r8 dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
1037 _fjsp_v2r8 dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
1038 _fjsp_v2r8 velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
1040 _fjsp_v2r8 itab_tmp
;
1041 _fjsp_v2r8 dummy_mask
,cutoff_mask
;
1042 _fjsp_v2r8 one
= gmx_fjsp_set1_v2r8(1.0);
1043 _fjsp_v2r8 two
= gmx_fjsp_set1_v2r8(2.0);
1044 union { _fjsp_v2r8 simd
; long long int i
[2]; } vfconv
,gbconv
,ewconv
;
1051 jindex
= nlist
->jindex
;
1053 shiftidx
= nlist
->shift
;
1055 shiftvec
= fr
->shift_vec
[0];
1056 fshift
= fr
->fshift
[0];
1057 facel
= gmx_fjsp_set1_v2r8(fr
->ic
->epsfac
);
1058 charge
= mdatoms
->chargeA
;
1059 krf
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
);
1060 krf2
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
*2.0);
1061 crf
= gmx_fjsp_set1_v2r8(fr
->ic
->c_rf
);
1063 /* Setup water-specific parameters */
1064 inr
= nlist
->iinr
[0];
1065 iq0
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+0]));
1066 iq1
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+1]));
1067 iq2
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+2]));
1069 jq0
= gmx_fjsp_set1_v2r8(charge
[inr
+0]);
1070 jq1
= gmx_fjsp_set1_v2r8(charge
[inr
+1]);
1071 jq2
= gmx_fjsp_set1_v2r8(charge
[inr
+2]);
1072 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
1073 qq01
= _fjsp_mul_v2r8(iq0
,jq1
);
1074 qq02
= _fjsp_mul_v2r8(iq0
,jq2
);
1075 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
1076 qq11
= _fjsp_mul_v2r8(iq1
,jq1
);
1077 qq12
= _fjsp_mul_v2r8(iq1
,jq2
);
1078 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
1079 qq21
= _fjsp_mul_v2r8(iq2
,jq1
);
1080 qq22
= _fjsp_mul_v2r8(iq2
,jq2
);
1082 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1083 rcutoff_scalar
= fr
->ic
->rcoulomb
;
1084 rcutoff
= gmx_fjsp_set1_v2r8(rcutoff_scalar
);
1085 rcutoff2
= _fjsp_mul_v2r8(rcutoff
,rcutoff
);
1087 /* Avoid stupid compiler warnings */
1089 j_coord_offsetA
= 0;
1090 j_coord_offsetB
= 0;
1095 /* Start outer loop over neighborlists */
1096 for(iidx
=0; iidx
<nri
; iidx
++)
1098 /* Load shift vector for this list */
1099 i_shift_offset
= DIM
*shiftidx
[iidx
];
1101 /* Load limits for loop over neighbors */
1102 j_index_start
= jindex
[iidx
];
1103 j_index_end
= jindex
[iidx
+1];
1105 /* Get outer coordinate index */
1107 i_coord_offset
= DIM
*inr
;
1109 /* Load i particle coords and add shift vector */
1110 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
1111 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
1113 fix0
= _fjsp_setzero_v2r8();
1114 fiy0
= _fjsp_setzero_v2r8();
1115 fiz0
= _fjsp_setzero_v2r8();
1116 fix1
= _fjsp_setzero_v2r8();
1117 fiy1
= _fjsp_setzero_v2r8();
1118 fiz1
= _fjsp_setzero_v2r8();
1119 fix2
= _fjsp_setzero_v2r8();
1120 fiy2
= _fjsp_setzero_v2r8();
1121 fiz2
= _fjsp_setzero_v2r8();
1123 /* Start inner kernel loop */
1124 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
1127 /* Get j neighbor index, and coordinate index */
1129 jnrB
= jjnr
[jidx
+1];
1130 j_coord_offsetA
= DIM
*jnrA
;
1131 j_coord_offsetB
= DIM
*jnrB
;
1133 /* load j atom coordinates */
1134 gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
1135 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1137 /* Calculate displacement vector */
1138 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
1139 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
1140 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
1141 dx01
= _fjsp_sub_v2r8(ix0
,jx1
);
1142 dy01
= _fjsp_sub_v2r8(iy0
,jy1
);
1143 dz01
= _fjsp_sub_v2r8(iz0
,jz1
);
1144 dx02
= _fjsp_sub_v2r8(ix0
,jx2
);
1145 dy02
= _fjsp_sub_v2r8(iy0
,jy2
);
1146 dz02
= _fjsp_sub_v2r8(iz0
,jz2
);
1147 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
1148 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
1149 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
1150 dx11
= _fjsp_sub_v2r8(ix1
,jx1
);
1151 dy11
= _fjsp_sub_v2r8(iy1
,jy1
);
1152 dz11
= _fjsp_sub_v2r8(iz1
,jz1
);
1153 dx12
= _fjsp_sub_v2r8(ix1
,jx2
);
1154 dy12
= _fjsp_sub_v2r8(iy1
,jy2
);
1155 dz12
= _fjsp_sub_v2r8(iz1
,jz2
);
1156 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
1157 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
1158 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
1159 dx21
= _fjsp_sub_v2r8(ix2
,jx1
);
1160 dy21
= _fjsp_sub_v2r8(iy2
,jy1
);
1161 dz21
= _fjsp_sub_v2r8(iz2
,jz1
);
1162 dx22
= _fjsp_sub_v2r8(ix2
,jx2
);
1163 dy22
= _fjsp_sub_v2r8(iy2
,jy2
);
1164 dz22
= _fjsp_sub_v2r8(iz2
,jz2
);
1166 /* Calculate squared distance and things based on it */
1167 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
1168 rsq01
= gmx_fjsp_calc_rsq_v2r8(dx01
,dy01
,dz01
);
1169 rsq02
= gmx_fjsp_calc_rsq_v2r8(dx02
,dy02
,dz02
);
1170 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
1171 rsq11
= gmx_fjsp_calc_rsq_v2r8(dx11
,dy11
,dz11
);
1172 rsq12
= gmx_fjsp_calc_rsq_v2r8(dx12
,dy12
,dz12
);
1173 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
1174 rsq21
= gmx_fjsp_calc_rsq_v2r8(dx21
,dy21
,dz21
);
1175 rsq22
= gmx_fjsp_calc_rsq_v2r8(dx22
,dy22
,dz22
);
1177 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
1178 rinv01
= gmx_fjsp_invsqrt_v2r8(rsq01
);
1179 rinv02
= gmx_fjsp_invsqrt_v2r8(rsq02
);
1180 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
1181 rinv11
= gmx_fjsp_invsqrt_v2r8(rsq11
);
1182 rinv12
= gmx_fjsp_invsqrt_v2r8(rsq12
);
1183 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
1184 rinv21
= gmx_fjsp_invsqrt_v2r8(rsq21
);
1185 rinv22
= gmx_fjsp_invsqrt_v2r8(rsq22
);
1187 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
1188 rinvsq01
= _fjsp_mul_v2r8(rinv01
,rinv01
);
1189 rinvsq02
= _fjsp_mul_v2r8(rinv02
,rinv02
);
1190 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
1191 rinvsq11
= _fjsp_mul_v2r8(rinv11
,rinv11
);
1192 rinvsq12
= _fjsp_mul_v2r8(rinv12
,rinv12
);
1193 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
1194 rinvsq21
= _fjsp_mul_v2r8(rinv21
,rinv21
);
1195 rinvsq22
= _fjsp_mul_v2r8(rinv22
,rinv22
);
1197 fjx0
= _fjsp_setzero_v2r8();
1198 fjy0
= _fjsp_setzero_v2r8();
1199 fjz0
= _fjsp_setzero_v2r8();
1200 fjx1
= _fjsp_setzero_v2r8();
1201 fjy1
= _fjsp_setzero_v2r8();
1202 fjz1
= _fjsp_setzero_v2r8();
1203 fjx2
= _fjsp_setzero_v2r8();
1204 fjy2
= _fjsp_setzero_v2r8();
1205 fjz2
= _fjsp_setzero_v2r8();
1207 /**************************
1208 * CALCULATE INTERACTIONS *
1209 **************************/
1211 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
1214 /* REACTION-FIELD ELECTROSTATICS */
1215 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
1217 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
1221 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1223 /* Update vectorial force */
1224 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
1225 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
1226 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
1228 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
1229 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
1230 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
1234 /**************************
1235 * CALCULATE INTERACTIONS *
1236 **************************/
1238 if (gmx_fjsp_any_lt_v2r8(rsq01
,rcutoff2
))
1241 /* REACTION-FIELD ELECTROSTATICS */
1242 felec
= _fjsp_mul_v2r8(qq01
,_fjsp_msub_v2r8(rinv01
,rinvsq01
,krf2
));
1244 cutoff_mask
= _fjsp_cmplt_v2r8(rsq01
,rcutoff2
);
1248 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1250 /* Update vectorial force */
1251 fix0
= _fjsp_madd_v2r8(dx01
,fscal
,fix0
);
1252 fiy0
= _fjsp_madd_v2r8(dy01
,fscal
,fiy0
);
1253 fiz0
= _fjsp_madd_v2r8(dz01
,fscal
,fiz0
);
1255 fjx1
= _fjsp_madd_v2r8(dx01
,fscal
,fjx1
);
1256 fjy1
= _fjsp_madd_v2r8(dy01
,fscal
,fjy1
);
1257 fjz1
= _fjsp_madd_v2r8(dz01
,fscal
,fjz1
);
1261 /**************************
1262 * CALCULATE INTERACTIONS *
1263 **************************/
1265 if (gmx_fjsp_any_lt_v2r8(rsq02
,rcutoff2
))
1268 /* REACTION-FIELD ELECTROSTATICS */
1269 felec
= _fjsp_mul_v2r8(qq02
,_fjsp_msub_v2r8(rinv02
,rinvsq02
,krf2
));
1271 cutoff_mask
= _fjsp_cmplt_v2r8(rsq02
,rcutoff2
);
1275 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1277 /* Update vectorial force */
1278 fix0
= _fjsp_madd_v2r8(dx02
,fscal
,fix0
);
1279 fiy0
= _fjsp_madd_v2r8(dy02
,fscal
,fiy0
);
1280 fiz0
= _fjsp_madd_v2r8(dz02
,fscal
,fiz0
);
1282 fjx2
= _fjsp_madd_v2r8(dx02
,fscal
,fjx2
);
1283 fjy2
= _fjsp_madd_v2r8(dy02
,fscal
,fjy2
);
1284 fjz2
= _fjsp_madd_v2r8(dz02
,fscal
,fjz2
);
1288 /**************************
1289 * CALCULATE INTERACTIONS *
1290 **************************/
1292 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
1295 /* REACTION-FIELD ELECTROSTATICS */
1296 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
1298 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
1302 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1304 /* Update vectorial force */
1305 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
1306 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
1307 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
1309 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
1310 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
1311 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
1315 /**************************
1316 * CALCULATE INTERACTIONS *
1317 **************************/
1319 if (gmx_fjsp_any_lt_v2r8(rsq11
,rcutoff2
))
1322 /* REACTION-FIELD ELECTROSTATICS */
1323 felec
= _fjsp_mul_v2r8(qq11
,_fjsp_msub_v2r8(rinv11
,rinvsq11
,krf2
));
1325 cutoff_mask
= _fjsp_cmplt_v2r8(rsq11
,rcutoff2
);
1329 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1331 /* Update vectorial force */
1332 fix1
= _fjsp_madd_v2r8(dx11
,fscal
,fix1
);
1333 fiy1
= _fjsp_madd_v2r8(dy11
,fscal
,fiy1
);
1334 fiz1
= _fjsp_madd_v2r8(dz11
,fscal
,fiz1
);
1336 fjx1
= _fjsp_madd_v2r8(dx11
,fscal
,fjx1
);
1337 fjy1
= _fjsp_madd_v2r8(dy11
,fscal
,fjy1
);
1338 fjz1
= _fjsp_madd_v2r8(dz11
,fscal
,fjz1
);
1342 /**************************
1343 * CALCULATE INTERACTIONS *
1344 **************************/
1346 if (gmx_fjsp_any_lt_v2r8(rsq12
,rcutoff2
))
1349 /* REACTION-FIELD ELECTROSTATICS */
1350 felec
= _fjsp_mul_v2r8(qq12
,_fjsp_msub_v2r8(rinv12
,rinvsq12
,krf2
));
1352 cutoff_mask
= _fjsp_cmplt_v2r8(rsq12
,rcutoff2
);
1356 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1358 /* Update vectorial force */
1359 fix1
= _fjsp_madd_v2r8(dx12
,fscal
,fix1
);
1360 fiy1
= _fjsp_madd_v2r8(dy12
,fscal
,fiy1
);
1361 fiz1
= _fjsp_madd_v2r8(dz12
,fscal
,fiz1
);
1363 fjx2
= _fjsp_madd_v2r8(dx12
,fscal
,fjx2
);
1364 fjy2
= _fjsp_madd_v2r8(dy12
,fscal
,fjy2
);
1365 fjz2
= _fjsp_madd_v2r8(dz12
,fscal
,fjz2
);
1369 /**************************
1370 * CALCULATE INTERACTIONS *
1371 **************************/
1373 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
1376 /* REACTION-FIELD ELECTROSTATICS */
1377 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
1379 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
1383 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1385 /* Update vectorial force */
1386 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
1387 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
1388 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
1390 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
1391 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
1392 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
1396 /**************************
1397 * CALCULATE INTERACTIONS *
1398 **************************/
1400 if (gmx_fjsp_any_lt_v2r8(rsq21
,rcutoff2
))
1403 /* REACTION-FIELD ELECTROSTATICS */
1404 felec
= _fjsp_mul_v2r8(qq21
,_fjsp_msub_v2r8(rinv21
,rinvsq21
,krf2
));
1406 cutoff_mask
= _fjsp_cmplt_v2r8(rsq21
,rcutoff2
);
1410 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1412 /* Update vectorial force */
1413 fix2
= _fjsp_madd_v2r8(dx21
,fscal
,fix2
);
1414 fiy2
= _fjsp_madd_v2r8(dy21
,fscal
,fiy2
);
1415 fiz2
= _fjsp_madd_v2r8(dz21
,fscal
,fiz2
);
1417 fjx1
= _fjsp_madd_v2r8(dx21
,fscal
,fjx1
);
1418 fjy1
= _fjsp_madd_v2r8(dy21
,fscal
,fjy1
);
1419 fjz1
= _fjsp_madd_v2r8(dz21
,fscal
,fjz1
);
1423 /**************************
1424 * CALCULATE INTERACTIONS *
1425 **************************/
1427 if (gmx_fjsp_any_lt_v2r8(rsq22
,rcutoff2
))
1430 /* REACTION-FIELD ELECTROSTATICS */
1431 felec
= _fjsp_mul_v2r8(qq22
,_fjsp_msub_v2r8(rinv22
,rinvsq22
,krf2
));
1433 cutoff_mask
= _fjsp_cmplt_v2r8(rsq22
,rcutoff2
);
1437 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1439 /* Update vectorial force */
1440 fix2
= _fjsp_madd_v2r8(dx22
,fscal
,fix2
);
1441 fiy2
= _fjsp_madd_v2r8(dy22
,fscal
,fiy2
);
1442 fiz2
= _fjsp_madd_v2r8(dz22
,fscal
,fiz2
);
1444 fjx2
= _fjsp_madd_v2r8(dx22
,fscal
,fjx2
);
1445 fjy2
= _fjsp_madd_v2r8(dy22
,fscal
,fjy2
);
1446 fjz2
= _fjsp_madd_v2r8(dz22
,fscal
,fjz2
);
1450 gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1452 /* Inner loop uses 297 flops */
1455 if(jidx
<j_index_end
)
1459 j_coord_offsetA
= DIM
*jnrA
;
1461 /* load j atom coordinates */
1462 gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(x
+j_coord_offsetA
,
1463 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1465 /* Calculate displacement vector */
1466 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
1467 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
1468 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
1469 dx01
= _fjsp_sub_v2r8(ix0
,jx1
);
1470 dy01
= _fjsp_sub_v2r8(iy0
,jy1
);
1471 dz01
= _fjsp_sub_v2r8(iz0
,jz1
);
1472 dx02
= _fjsp_sub_v2r8(ix0
,jx2
);
1473 dy02
= _fjsp_sub_v2r8(iy0
,jy2
);
1474 dz02
= _fjsp_sub_v2r8(iz0
,jz2
);
1475 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
1476 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
1477 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
1478 dx11
= _fjsp_sub_v2r8(ix1
,jx1
);
1479 dy11
= _fjsp_sub_v2r8(iy1
,jy1
);
1480 dz11
= _fjsp_sub_v2r8(iz1
,jz1
);
1481 dx12
= _fjsp_sub_v2r8(ix1
,jx2
);
1482 dy12
= _fjsp_sub_v2r8(iy1
,jy2
);
1483 dz12
= _fjsp_sub_v2r8(iz1
,jz2
);
1484 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
1485 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
1486 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
1487 dx21
= _fjsp_sub_v2r8(ix2
,jx1
);
1488 dy21
= _fjsp_sub_v2r8(iy2
,jy1
);
1489 dz21
= _fjsp_sub_v2r8(iz2
,jz1
);
1490 dx22
= _fjsp_sub_v2r8(ix2
,jx2
);
1491 dy22
= _fjsp_sub_v2r8(iy2
,jy2
);
1492 dz22
= _fjsp_sub_v2r8(iz2
,jz2
);
1494 /* Calculate squared distance and things based on it */
1495 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
1496 rsq01
= gmx_fjsp_calc_rsq_v2r8(dx01
,dy01
,dz01
);
1497 rsq02
= gmx_fjsp_calc_rsq_v2r8(dx02
,dy02
,dz02
);
1498 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
1499 rsq11
= gmx_fjsp_calc_rsq_v2r8(dx11
,dy11
,dz11
);
1500 rsq12
= gmx_fjsp_calc_rsq_v2r8(dx12
,dy12
,dz12
);
1501 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
1502 rsq21
= gmx_fjsp_calc_rsq_v2r8(dx21
,dy21
,dz21
);
1503 rsq22
= gmx_fjsp_calc_rsq_v2r8(dx22
,dy22
,dz22
);
1505 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
1506 rinv01
= gmx_fjsp_invsqrt_v2r8(rsq01
);
1507 rinv02
= gmx_fjsp_invsqrt_v2r8(rsq02
);
1508 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
1509 rinv11
= gmx_fjsp_invsqrt_v2r8(rsq11
);
1510 rinv12
= gmx_fjsp_invsqrt_v2r8(rsq12
);
1511 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
1512 rinv21
= gmx_fjsp_invsqrt_v2r8(rsq21
);
1513 rinv22
= gmx_fjsp_invsqrt_v2r8(rsq22
);
1515 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
1516 rinvsq01
= _fjsp_mul_v2r8(rinv01
,rinv01
);
1517 rinvsq02
= _fjsp_mul_v2r8(rinv02
,rinv02
);
1518 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
1519 rinvsq11
= _fjsp_mul_v2r8(rinv11
,rinv11
);
1520 rinvsq12
= _fjsp_mul_v2r8(rinv12
,rinv12
);
1521 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
1522 rinvsq21
= _fjsp_mul_v2r8(rinv21
,rinv21
);
1523 rinvsq22
= _fjsp_mul_v2r8(rinv22
,rinv22
);
1525 fjx0
= _fjsp_setzero_v2r8();
1526 fjy0
= _fjsp_setzero_v2r8();
1527 fjz0
= _fjsp_setzero_v2r8();
1528 fjx1
= _fjsp_setzero_v2r8();
1529 fjy1
= _fjsp_setzero_v2r8();
1530 fjz1
= _fjsp_setzero_v2r8();
1531 fjx2
= _fjsp_setzero_v2r8();
1532 fjy2
= _fjsp_setzero_v2r8();
1533 fjz2
= _fjsp_setzero_v2r8();
1535 /**************************
1536 * CALCULATE INTERACTIONS *
1537 **************************/
1539 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
1542 /* REACTION-FIELD ELECTROSTATICS */
1543 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
1545 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
1549 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1551 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1553 /* Update vectorial force */
1554 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
1555 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
1556 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
1558 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
1559 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
1560 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
1564 /**************************
1565 * CALCULATE INTERACTIONS *
1566 **************************/
1568 if (gmx_fjsp_any_lt_v2r8(rsq01
,rcutoff2
))
1571 /* REACTION-FIELD ELECTROSTATICS */
1572 felec
= _fjsp_mul_v2r8(qq01
,_fjsp_msub_v2r8(rinv01
,rinvsq01
,krf2
));
1574 cutoff_mask
= _fjsp_cmplt_v2r8(rsq01
,rcutoff2
);
1578 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1580 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1582 /* Update vectorial force */
1583 fix0
= _fjsp_madd_v2r8(dx01
,fscal
,fix0
);
1584 fiy0
= _fjsp_madd_v2r8(dy01
,fscal
,fiy0
);
1585 fiz0
= _fjsp_madd_v2r8(dz01
,fscal
,fiz0
);
1587 fjx1
= _fjsp_madd_v2r8(dx01
,fscal
,fjx1
);
1588 fjy1
= _fjsp_madd_v2r8(dy01
,fscal
,fjy1
);
1589 fjz1
= _fjsp_madd_v2r8(dz01
,fscal
,fjz1
);
1593 /**************************
1594 * CALCULATE INTERACTIONS *
1595 **************************/
1597 if (gmx_fjsp_any_lt_v2r8(rsq02
,rcutoff2
))
1600 /* REACTION-FIELD ELECTROSTATICS */
1601 felec
= _fjsp_mul_v2r8(qq02
,_fjsp_msub_v2r8(rinv02
,rinvsq02
,krf2
));
1603 cutoff_mask
= _fjsp_cmplt_v2r8(rsq02
,rcutoff2
);
1607 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1609 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1611 /* Update vectorial force */
1612 fix0
= _fjsp_madd_v2r8(dx02
,fscal
,fix0
);
1613 fiy0
= _fjsp_madd_v2r8(dy02
,fscal
,fiy0
);
1614 fiz0
= _fjsp_madd_v2r8(dz02
,fscal
,fiz0
);
1616 fjx2
= _fjsp_madd_v2r8(dx02
,fscal
,fjx2
);
1617 fjy2
= _fjsp_madd_v2r8(dy02
,fscal
,fjy2
);
1618 fjz2
= _fjsp_madd_v2r8(dz02
,fscal
,fjz2
);
1622 /**************************
1623 * CALCULATE INTERACTIONS *
1624 **************************/
1626 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
1629 /* REACTION-FIELD ELECTROSTATICS */
1630 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
1632 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
1636 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1638 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1640 /* Update vectorial force */
1641 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
1642 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
1643 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
1645 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
1646 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
1647 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
1651 /**************************
1652 * CALCULATE INTERACTIONS *
1653 **************************/
1655 if (gmx_fjsp_any_lt_v2r8(rsq11
,rcutoff2
))
1658 /* REACTION-FIELD ELECTROSTATICS */
1659 felec
= _fjsp_mul_v2r8(qq11
,_fjsp_msub_v2r8(rinv11
,rinvsq11
,krf2
));
1661 cutoff_mask
= _fjsp_cmplt_v2r8(rsq11
,rcutoff2
);
1665 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1667 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1669 /* Update vectorial force */
1670 fix1
= _fjsp_madd_v2r8(dx11
,fscal
,fix1
);
1671 fiy1
= _fjsp_madd_v2r8(dy11
,fscal
,fiy1
);
1672 fiz1
= _fjsp_madd_v2r8(dz11
,fscal
,fiz1
);
1674 fjx1
= _fjsp_madd_v2r8(dx11
,fscal
,fjx1
);
1675 fjy1
= _fjsp_madd_v2r8(dy11
,fscal
,fjy1
);
1676 fjz1
= _fjsp_madd_v2r8(dz11
,fscal
,fjz1
);
1680 /**************************
1681 * CALCULATE INTERACTIONS *
1682 **************************/
1684 if (gmx_fjsp_any_lt_v2r8(rsq12
,rcutoff2
))
1687 /* REACTION-FIELD ELECTROSTATICS */
1688 felec
= _fjsp_mul_v2r8(qq12
,_fjsp_msub_v2r8(rinv12
,rinvsq12
,krf2
));
1690 cutoff_mask
= _fjsp_cmplt_v2r8(rsq12
,rcutoff2
);
1694 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1696 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1698 /* Update vectorial force */
1699 fix1
= _fjsp_madd_v2r8(dx12
,fscal
,fix1
);
1700 fiy1
= _fjsp_madd_v2r8(dy12
,fscal
,fiy1
);
1701 fiz1
= _fjsp_madd_v2r8(dz12
,fscal
,fiz1
);
1703 fjx2
= _fjsp_madd_v2r8(dx12
,fscal
,fjx2
);
1704 fjy2
= _fjsp_madd_v2r8(dy12
,fscal
,fjy2
);
1705 fjz2
= _fjsp_madd_v2r8(dz12
,fscal
,fjz2
);
1709 /**************************
1710 * CALCULATE INTERACTIONS *
1711 **************************/
1713 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
1716 /* REACTION-FIELD ELECTROSTATICS */
1717 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
1719 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
1723 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1725 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1727 /* Update vectorial force */
1728 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
1729 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
1730 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
1732 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
1733 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
1734 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
1738 /**************************
1739 * CALCULATE INTERACTIONS *
1740 **************************/
1742 if (gmx_fjsp_any_lt_v2r8(rsq21
,rcutoff2
))
1745 /* REACTION-FIELD ELECTROSTATICS */
1746 felec
= _fjsp_mul_v2r8(qq21
,_fjsp_msub_v2r8(rinv21
,rinvsq21
,krf2
));
1748 cutoff_mask
= _fjsp_cmplt_v2r8(rsq21
,rcutoff2
);
1752 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1754 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1756 /* Update vectorial force */
1757 fix2
= _fjsp_madd_v2r8(dx21
,fscal
,fix2
);
1758 fiy2
= _fjsp_madd_v2r8(dy21
,fscal
,fiy2
);
1759 fiz2
= _fjsp_madd_v2r8(dz21
,fscal
,fiz2
);
1761 fjx1
= _fjsp_madd_v2r8(dx21
,fscal
,fjx1
);
1762 fjy1
= _fjsp_madd_v2r8(dy21
,fscal
,fjy1
);
1763 fjz1
= _fjsp_madd_v2r8(dz21
,fscal
,fjz1
);
1767 /**************************
1768 * CALCULATE INTERACTIONS *
1769 **************************/
1771 if (gmx_fjsp_any_lt_v2r8(rsq22
,rcutoff2
))
1774 /* REACTION-FIELD ELECTROSTATICS */
1775 felec
= _fjsp_mul_v2r8(qq22
,_fjsp_msub_v2r8(rinv22
,rinvsq22
,krf2
));
1777 cutoff_mask
= _fjsp_cmplt_v2r8(rsq22
,rcutoff2
);
1781 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1783 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1785 /* Update vectorial force */
1786 fix2
= _fjsp_madd_v2r8(dx22
,fscal
,fix2
);
1787 fiy2
= _fjsp_madd_v2r8(dy22
,fscal
,fiy2
);
1788 fiz2
= _fjsp_madd_v2r8(dz22
,fscal
,fiz2
);
1790 fjx2
= _fjsp_madd_v2r8(dx22
,fscal
,fjx2
);
1791 fjy2
= _fjsp_madd_v2r8(dy22
,fscal
,fjy2
);
1792 fjz2
= _fjsp_madd_v2r8(dz22
,fscal
,fjz2
);
1796 gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1798 /* Inner loop uses 297 flops */
1801 /* End of innermost loop */
1803 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1804 f
+i_coord_offset
,fshift
+i_shift_offset
);
1806 /* Increment number of inner iterations */
1807 inneriter
+= j_index_end
- j_index_start
;
1809 /* Outer loop uses 18 flops */
1812 /* Increment number of outer iterations */
1815 /* Update outer/inner flops */
1817 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_F
,outeriter
*18 + inneriter
*297);