2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/gmxlib/nrnb.h"
47 #include "kernelutil_sparc64_hpc_ace_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: LennardJones
53 * Geometry: Water3-Particle
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double
58 (t_nblist
* gmx_restrict nlist
,
59 rvec
* gmx_restrict xx
,
60 rvec
* gmx_restrict ff
,
61 struct t_forcerec
* gmx_restrict fr
,
62 t_mdatoms
* gmx_restrict mdatoms
,
63 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
64 t_nrnb
* gmx_restrict nrnb
)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
72 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
74 int j_coord_offsetA
,j_coord_offsetB
;
75 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
77 real
*shiftvec
,*fshift
,*x
,*f
;
78 _fjsp_v2r8 tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
80 _fjsp_v2r8 ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
82 _fjsp_v2r8 ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
84 _fjsp_v2r8 ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
85 int vdwjidx0A
,vdwjidx0B
;
86 _fjsp_v2r8 jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
87 _fjsp_v2r8 dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
88 _fjsp_v2r8 dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
89 _fjsp_v2r8 dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
90 _fjsp_v2r8 velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
93 _fjsp_v2r8 rinvsix
,rvdw
,vvdw
,vvdw6
,vvdw12
,fvdw
,fvdw6
,fvdw12
,vvdwsum
,sh_vdw_invrcut6
;
96 _fjsp_v2r8 one_sixth
= gmx_fjsp_set1_v2r8(1.0/6.0);
97 _fjsp_v2r8 one_twelfth
= gmx_fjsp_set1_v2r8(1.0/12.0);
98 _fjsp_v2r8 rswitch
,swV3
,swV4
,swV5
,swF2
,swF3
,swF4
,d
,d2
,sw
,dsw
;
99 real rswitch_scalar
,d_scalar
;
101 _fjsp_v2r8 dummy_mask
,cutoff_mask
;
102 _fjsp_v2r8 one
= gmx_fjsp_set1_v2r8(1.0);
103 _fjsp_v2r8 two
= gmx_fjsp_set1_v2r8(2.0);
104 union { _fjsp_v2r8 simd
; long long int i
[2]; } vfconv
,gbconv
,ewconv
;
111 jindex
= nlist
->jindex
;
113 shiftidx
= nlist
->shift
;
115 shiftvec
= fr
->shift_vec
[0];
116 fshift
= fr
->fshift
[0];
117 facel
= gmx_fjsp_set1_v2r8(fr
->ic
->epsfac
);
118 charge
= mdatoms
->chargeA
;
119 krf
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
);
120 krf2
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
*2.0);
121 crf
= gmx_fjsp_set1_v2r8(fr
->ic
->c_rf
);
122 nvdwtype
= fr
->ntype
;
124 vdwtype
= mdatoms
->typeA
;
126 /* Setup water-specific parameters */
127 inr
= nlist
->iinr
[0];
128 iq0
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+0]));
129 iq1
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+1]));
130 iq2
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+2]));
131 vdwioffset0
= 2*nvdwtype
*vdwtype
[inr
+0];
133 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
134 rcutoff_scalar
= fr
->ic
->rcoulomb
;
135 rcutoff
= gmx_fjsp_set1_v2r8(rcutoff_scalar
);
136 rcutoff2
= _fjsp_mul_v2r8(rcutoff
,rcutoff
);
138 rswitch_scalar
= fr
->ic
->rvdw_switch
;
139 rswitch
= gmx_fjsp_set1_v2r8(rswitch_scalar
);
140 /* Setup switch parameters */
141 d_scalar
= rcutoff_scalar
-rswitch_scalar
;
142 d
= gmx_fjsp_set1_v2r8(d_scalar
);
143 swV3
= gmx_fjsp_set1_v2r8(-10.0/(d_scalar
*d_scalar
*d_scalar
));
144 swV4
= gmx_fjsp_set1_v2r8( 15.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
145 swV5
= gmx_fjsp_set1_v2r8( -6.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
146 swF2
= gmx_fjsp_set1_v2r8(-30.0/(d_scalar
*d_scalar
*d_scalar
));
147 swF3
= gmx_fjsp_set1_v2r8( 60.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
148 swF4
= gmx_fjsp_set1_v2r8(-30.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
150 /* Avoid stupid compiler warnings */
158 /* Start outer loop over neighborlists */
159 for(iidx
=0; iidx
<nri
; iidx
++)
161 /* Load shift vector for this list */
162 i_shift_offset
= DIM
*shiftidx
[iidx
];
164 /* Load limits for loop over neighbors */
165 j_index_start
= jindex
[iidx
];
166 j_index_end
= jindex
[iidx
+1];
168 /* Get outer coordinate index */
170 i_coord_offset
= DIM
*inr
;
172 /* Load i particle coords and add shift vector */
173 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
174 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
176 fix0
= _fjsp_setzero_v2r8();
177 fiy0
= _fjsp_setzero_v2r8();
178 fiz0
= _fjsp_setzero_v2r8();
179 fix1
= _fjsp_setzero_v2r8();
180 fiy1
= _fjsp_setzero_v2r8();
181 fiz1
= _fjsp_setzero_v2r8();
182 fix2
= _fjsp_setzero_v2r8();
183 fiy2
= _fjsp_setzero_v2r8();
184 fiz2
= _fjsp_setzero_v2r8();
186 /* Reset potential sums */
187 velecsum
= _fjsp_setzero_v2r8();
188 vvdwsum
= _fjsp_setzero_v2r8();
190 /* Start inner kernel loop */
191 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
194 /* Get j neighbor index, and coordinate index */
197 j_coord_offsetA
= DIM
*jnrA
;
198 j_coord_offsetB
= DIM
*jnrB
;
200 /* load j atom coordinates */
201 gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
204 /* Calculate displacement vector */
205 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
206 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
207 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
208 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
209 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
210 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
211 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
212 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
213 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
215 /* Calculate squared distance and things based on it */
216 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
217 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
218 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
220 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
221 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
222 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
224 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
225 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
226 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
228 /* Load parameters for j particles */
229 jq0
= gmx_fjsp_load_2real_swizzle_v2r8(charge
+jnrA
+0,charge
+jnrB
+0);
230 vdwjidx0A
= 2*vdwtype
[jnrA
+0];
231 vdwjidx0B
= 2*vdwtype
[jnrB
+0];
233 fjx0
= _fjsp_setzero_v2r8();
234 fjy0
= _fjsp_setzero_v2r8();
235 fjz0
= _fjsp_setzero_v2r8();
237 /**************************
238 * CALCULATE INTERACTIONS *
239 **************************/
241 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
244 r00
= _fjsp_mul_v2r8(rsq00
,rinv00
);
246 /* Compute parameters for interactions between i and j atoms */
247 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
248 gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam
+vdwioffset0
+vdwjidx0A
,
249 vdwparam
+vdwioffset0
+vdwjidx0B
,&c6_00
,&c12_00
);
251 /* REACTION-FIELD ELECTROSTATICS */
252 velec
= _fjsp_mul_v2r8(qq00
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq00
,rinv00
),crf
));
253 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
255 /* LENNARD-JONES DISPERSION/REPULSION */
257 rinvsix
= _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00
,rinvsq00
),rinvsq00
);
258 vvdw6
= _fjsp_mul_v2r8(c6_00
,rinvsix
);
259 vvdw12
= _fjsp_mul_v2r8(c12_00
,_fjsp_mul_v2r8(rinvsix
,rinvsix
));
260 vvdw
= _fjsp_msub_v2r8( vvdw12
,one_twelfth
, _fjsp_mul_v2r8(vvdw6
,one_sixth
) );
261 fvdw
= _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12
,vvdw6
),rinvsq00
);
263 d
= _fjsp_sub_v2r8(r00
,rswitch
);
264 d
= _fjsp_max_v2r8(d
,_fjsp_setzero_v2r8());
265 d2
= _fjsp_mul_v2r8(d
,d
);
266 sw
= _fjsp_add_v2r8(one
,_fjsp_mul_v2r8(d2
,_fjsp_mul_v2r8(d
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swV5
,swV4
),swV3
))));
268 dsw
= _fjsp_mul_v2r8(d2
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swF4
,swF3
),swF2
));
270 /* Evaluate switch function */
271 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
272 fvdw
= _fjsp_msub_v2r8( fvdw
,sw
, _fjsp_mul_v2r8(rinv00
,_fjsp_mul_v2r8(vvdw
,dsw
)) );
273 vvdw
= _fjsp_mul_v2r8(vvdw
,sw
);
274 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
276 /* Update potential sum for this i atom from the interaction with this j atom. */
277 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
278 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
279 vvdw
= _fjsp_and_v2r8(vvdw
,cutoff_mask
);
280 vvdwsum
= _fjsp_add_v2r8(vvdwsum
,vvdw
);
282 fscal
= _fjsp_add_v2r8(felec
,fvdw
);
284 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
286 /* Update vectorial force */
287 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
288 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
289 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
291 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
292 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
293 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
297 /**************************
298 * CALCULATE INTERACTIONS *
299 **************************/
301 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
304 /* Compute parameters for interactions between i and j atoms */
305 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
307 /* REACTION-FIELD ELECTROSTATICS */
308 velec
= _fjsp_mul_v2r8(qq10
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq10
,rinv10
),crf
));
309 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
311 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
313 /* Update potential sum for this i atom from the interaction with this j atom. */
314 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
315 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
319 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
321 /* Update vectorial force */
322 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
323 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
324 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
326 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
327 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
328 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
332 /**************************
333 * CALCULATE INTERACTIONS *
334 **************************/
336 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
339 /* Compute parameters for interactions between i and j atoms */
340 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
342 /* REACTION-FIELD ELECTROSTATICS */
343 velec
= _fjsp_mul_v2r8(qq20
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq20
,rinv20
),crf
));
344 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
346 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
348 /* Update potential sum for this i atom from the interaction with this j atom. */
349 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
350 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
354 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
356 /* Update vectorial force */
357 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
358 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
359 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
361 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
362 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
363 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
367 gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
);
369 /* Inner loop uses 154 flops */
376 j_coord_offsetA
= DIM
*jnrA
;
378 /* load j atom coordinates */
379 gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x
+j_coord_offsetA
,
382 /* Calculate displacement vector */
383 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
384 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
385 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
386 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
387 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
388 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
389 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
390 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
391 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
393 /* Calculate squared distance and things based on it */
394 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
395 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
396 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
398 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
399 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
400 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
402 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
403 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
404 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
406 /* Load parameters for j particles */
407 jq0
= _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge
+jnrA
+0);
408 vdwjidx0A
= 2*vdwtype
[jnrA
+0];
410 fjx0
= _fjsp_setzero_v2r8();
411 fjy0
= _fjsp_setzero_v2r8();
412 fjz0
= _fjsp_setzero_v2r8();
414 /**************************
415 * CALCULATE INTERACTIONS *
416 **************************/
418 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
421 r00
= _fjsp_mul_v2r8(rsq00
,rinv00
);
423 /* Compute parameters for interactions between i and j atoms */
424 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
425 gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam
+vdwioffset0
+vdwjidx0A
,
426 vdwparam
+vdwioffset0
+vdwjidx0B
,&c6_00
,&c12_00
);
428 /* REACTION-FIELD ELECTROSTATICS */
429 velec
= _fjsp_mul_v2r8(qq00
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq00
,rinv00
),crf
));
430 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
432 /* LENNARD-JONES DISPERSION/REPULSION */
434 rinvsix
= _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00
,rinvsq00
),rinvsq00
);
435 vvdw6
= _fjsp_mul_v2r8(c6_00
,rinvsix
);
436 vvdw12
= _fjsp_mul_v2r8(c12_00
,_fjsp_mul_v2r8(rinvsix
,rinvsix
));
437 vvdw
= _fjsp_msub_v2r8( vvdw12
,one_twelfth
, _fjsp_mul_v2r8(vvdw6
,one_sixth
) );
438 fvdw
= _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12
,vvdw6
),rinvsq00
);
440 d
= _fjsp_sub_v2r8(r00
,rswitch
);
441 d
= _fjsp_max_v2r8(d
,_fjsp_setzero_v2r8());
442 d2
= _fjsp_mul_v2r8(d
,d
);
443 sw
= _fjsp_add_v2r8(one
,_fjsp_mul_v2r8(d2
,_fjsp_mul_v2r8(d
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swV5
,swV4
),swV3
))));
445 dsw
= _fjsp_mul_v2r8(d2
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swF4
,swF3
),swF2
));
447 /* Evaluate switch function */
448 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
449 fvdw
= _fjsp_msub_v2r8( fvdw
,sw
, _fjsp_mul_v2r8(rinv00
,_fjsp_mul_v2r8(vvdw
,dsw
)) );
450 vvdw
= _fjsp_mul_v2r8(vvdw
,sw
);
451 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
453 /* Update potential sum for this i atom from the interaction with this j atom. */
454 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
455 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
456 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
457 vvdw
= _fjsp_and_v2r8(vvdw
,cutoff_mask
);
458 vvdw
= _fjsp_unpacklo_v2r8(vvdw
,_fjsp_setzero_v2r8());
459 vvdwsum
= _fjsp_add_v2r8(vvdwsum
,vvdw
);
461 fscal
= _fjsp_add_v2r8(felec
,fvdw
);
463 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
465 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
467 /* Update vectorial force */
468 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
469 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
470 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
472 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
473 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
474 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
478 /**************************
479 * CALCULATE INTERACTIONS *
480 **************************/
482 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
485 /* Compute parameters for interactions between i and j atoms */
486 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
488 /* REACTION-FIELD ELECTROSTATICS */
489 velec
= _fjsp_mul_v2r8(qq10
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq10
,rinv10
),crf
));
490 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
492 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
494 /* Update potential sum for this i atom from the interaction with this j atom. */
495 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
496 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
497 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
501 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
503 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
505 /* Update vectorial force */
506 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
507 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
508 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
510 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
511 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
512 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
516 /**************************
517 * CALCULATE INTERACTIONS *
518 **************************/
520 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
523 /* Compute parameters for interactions between i and j atoms */
524 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
526 /* REACTION-FIELD ELECTROSTATICS */
527 velec
= _fjsp_mul_v2r8(qq20
,_fjsp_sub_v2r8(_fjsp_madd_v2r8(krf
,rsq20
,rinv20
),crf
));
528 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
530 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
532 /* Update potential sum for this i atom from the interaction with this j atom. */
533 velec
= _fjsp_and_v2r8(velec
,cutoff_mask
);
534 velec
= _fjsp_unpacklo_v2r8(velec
,_fjsp_setzero_v2r8());
535 velecsum
= _fjsp_add_v2r8(velecsum
,velec
);
539 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
541 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
543 /* Update vectorial force */
544 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
545 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
546 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
548 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
549 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
550 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
554 gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
);
556 /* Inner loop uses 154 flops */
559 /* End of innermost loop */
561 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
562 f
+i_coord_offset
,fshift
+i_shift_offset
);
565 /* Update potential energies */
566 gmx_fjsp_update_1pot_v2r8(velecsum
,kernel_data
->energygrp_elec
+ggid
);
567 gmx_fjsp_update_1pot_v2r8(vvdwsum
,kernel_data
->energygrp_vdw
+ggid
);
569 /* Increment number of inner iterations */
570 inneriter
+= j_index_end
- j_index_start
;
572 /* Outer loop uses 20 flops */
575 /* Increment number of outer iterations */
578 /* Update outer/inner flops */
580 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_VDW_W3_VF
,outeriter
*20 + inneriter
*154);
583 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
584 * Electrostatics interaction: ReactionField
585 * VdW interaction: LennardJones
586 * Geometry: Water3-Particle
587 * Calculate force/pot: Force
590 nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double
591 (t_nblist
* gmx_restrict nlist
,
592 rvec
* gmx_restrict xx
,
593 rvec
* gmx_restrict ff
,
594 struct t_forcerec
* gmx_restrict fr
,
595 t_mdatoms
* gmx_restrict mdatoms
,
596 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
597 t_nrnb
* gmx_restrict nrnb
)
599 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
600 * just 0 for non-waters.
601 * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different
602 * jnr indices corresponding to data put in the four positions in the SIMD register.
604 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
605 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
607 int j_coord_offsetA
,j_coord_offsetB
;
608 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
610 real
*shiftvec
,*fshift
,*x
,*f
;
611 _fjsp_v2r8 tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
613 _fjsp_v2r8 ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
615 _fjsp_v2r8 ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
617 _fjsp_v2r8 ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
618 int vdwjidx0A
,vdwjidx0B
;
619 _fjsp_v2r8 jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
620 _fjsp_v2r8 dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
621 _fjsp_v2r8 dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
622 _fjsp_v2r8 dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
623 _fjsp_v2r8 velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
626 _fjsp_v2r8 rinvsix
,rvdw
,vvdw
,vvdw6
,vvdw12
,fvdw
,fvdw6
,fvdw12
,vvdwsum
,sh_vdw_invrcut6
;
629 _fjsp_v2r8 one_sixth
= gmx_fjsp_set1_v2r8(1.0/6.0);
630 _fjsp_v2r8 one_twelfth
= gmx_fjsp_set1_v2r8(1.0/12.0);
631 _fjsp_v2r8 rswitch
,swV3
,swV4
,swV5
,swF2
,swF3
,swF4
,d
,d2
,sw
,dsw
;
632 real rswitch_scalar
,d_scalar
;
634 _fjsp_v2r8 dummy_mask
,cutoff_mask
;
635 _fjsp_v2r8 one
= gmx_fjsp_set1_v2r8(1.0);
636 _fjsp_v2r8 two
= gmx_fjsp_set1_v2r8(2.0);
637 union { _fjsp_v2r8 simd
; long long int i
[2]; } vfconv
,gbconv
,ewconv
;
644 jindex
= nlist
->jindex
;
646 shiftidx
= nlist
->shift
;
648 shiftvec
= fr
->shift_vec
[0];
649 fshift
= fr
->fshift
[0];
650 facel
= gmx_fjsp_set1_v2r8(fr
->ic
->epsfac
);
651 charge
= mdatoms
->chargeA
;
652 krf
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
);
653 krf2
= gmx_fjsp_set1_v2r8(fr
->ic
->k_rf
*2.0);
654 crf
= gmx_fjsp_set1_v2r8(fr
->ic
->c_rf
);
655 nvdwtype
= fr
->ntype
;
657 vdwtype
= mdatoms
->typeA
;
659 /* Setup water-specific parameters */
660 inr
= nlist
->iinr
[0];
661 iq0
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+0]));
662 iq1
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+1]));
663 iq2
= _fjsp_mul_v2r8(facel
,gmx_fjsp_set1_v2r8(charge
[inr
+2]));
664 vdwioffset0
= 2*nvdwtype
*vdwtype
[inr
+0];
666 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
667 rcutoff_scalar
= fr
->ic
->rcoulomb
;
668 rcutoff
= gmx_fjsp_set1_v2r8(rcutoff_scalar
);
669 rcutoff2
= _fjsp_mul_v2r8(rcutoff
,rcutoff
);
671 rswitch_scalar
= fr
->ic
->rvdw_switch
;
672 rswitch
= gmx_fjsp_set1_v2r8(rswitch_scalar
);
673 /* Setup switch parameters */
674 d_scalar
= rcutoff_scalar
-rswitch_scalar
;
675 d
= gmx_fjsp_set1_v2r8(d_scalar
);
676 swV3
= gmx_fjsp_set1_v2r8(-10.0/(d_scalar
*d_scalar
*d_scalar
));
677 swV4
= gmx_fjsp_set1_v2r8( 15.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
678 swV5
= gmx_fjsp_set1_v2r8( -6.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
679 swF2
= gmx_fjsp_set1_v2r8(-30.0/(d_scalar
*d_scalar
*d_scalar
));
680 swF3
= gmx_fjsp_set1_v2r8( 60.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
681 swF4
= gmx_fjsp_set1_v2r8(-30.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
683 /* Avoid stupid compiler warnings */
691 /* Start outer loop over neighborlists */
692 for(iidx
=0; iidx
<nri
; iidx
++)
694 /* Load shift vector for this list */
695 i_shift_offset
= DIM
*shiftidx
[iidx
];
697 /* Load limits for loop over neighbors */
698 j_index_start
= jindex
[iidx
];
699 j_index_end
= jindex
[iidx
+1];
701 /* Get outer coordinate index */
703 i_coord_offset
= DIM
*inr
;
705 /* Load i particle coords and add shift vector */
706 gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
707 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
709 fix0
= _fjsp_setzero_v2r8();
710 fiy0
= _fjsp_setzero_v2r8();
711 fiz0
= _fjsp_setzero_v2r8();
712 fix1
= _fjsp_setzero_v2r8();
713 fiy1
= _fjsp_setzero_v2r8();
714 fiz1
= _fjsp_setzero_v2r8();
715 fix2
= _fjsp_setzero_v2r8();
716 fiy2
= _fjsp_setzero_v2r8();
717 fiz2
= _fjsp_setzero_v2r8();
719 /* Start inner kernel loop */
720 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
723 /* Get j neighbor index, and coordinate index */
726 j_coord_offsetA
= DIM
*jnrA
;
727 j_coord_offsetB
= DIM
*jnrB
;
729 /* load j atom coordinates */
730 gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
733 /* Calculate displacement vector */
734 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
735 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
736 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
737 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
738 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
739 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
740 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
741 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
742 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
744 /* Calculate squared distance and things based on it */
745 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
746 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
747 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
749 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
750 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
751 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
753 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
754 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
755 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
757 /* Load parameters for j particles */
758 jq0
= gmx_fjsp_load_2real_swizzle_v2r8(charge
+jnrA
+0,charge
+jnrB
+0);
759 vdwjidx0A
= 2*vdwtype
[jnrA
+0];
760 vdwjidx0B
= 2*vdwtype
[jnrB
+0];
762 fjx0
= _fjsp_setzero_v2r8();
763 fjy0
= _fjsp_setzero_v2r8();
764 fjz0
= _fjsp_setzero_v2r8();
766 /**************************
767 * CALCULATE INTERACTIONS *
768 **************************/
770 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
773 r00
= _fjsp_mul_v2r8(rsq00
,rinv00
);
775 /* Compute parameters for interactions between i and j atoms */
776 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
777 gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam
+vdwioffset0
+vdwjidx0A
,
778 vdwparam
+vdwioffset0
+vdwjidx0B
,&c6_00
,&c12_00
);
780 /* REACTION-FIELD ELECTROSTATICS */
781 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
783 /* LENNARD-JONES DISPERSION/REPULSION */
785 rinvsix
= _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00
,rinvsq00
),rinvsq00
);
786 vvdw6
= _fjsp_mul_v2r8(c6_00
,rinvsix
);
787 vvdw12
= _fjsp_mul_v2r8(c12_00
,_fjsp_mul_v2r8(rinvsix
,rinvsix
));
788 vvdw
= _fjsp_msub_v2r8( vvdw12
,one_twelfth
, _fjsp_mul_v2r8(vvdw6
,one_sixth
) );
789 fvdw
= _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12
,vvdw6
),rinvsq00
);
791 d
= _fjsp_sub_v2r8(r00
,rswitch
);
792 d
= _fjsp_max_v2r8(d
,_fjsp_setzero_v2r8());
793 d2
= _fjsp_mul_v2r8(d
,d
);
794 sw
= _fjsp_add_v2r8(one
,_fjsp_mul_v2r8(d2
,_fjsp_mul_v2r8(d
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swV5
,swV4
),swV3
))));
796 dsw
= _fjsp_mul_v2r8(d2
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swF4
,swF3
),swF2
));
798 /* Evaluate switch function */
799 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
800 fvdw
= _fjsp_msub_v2r8( fvdw
,sw
, _fjsp_mul_v2r8(rinv00
,_fjsp_mul_v2r8(vvdw
,dsw
)) );
801 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
803 fscal
= _fjsp_add_v2r8(felec
,fvdw
);
805 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
807 /* Update vectorial force */
808 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
809 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
810 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
812 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
813 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
814 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
818 /**************************
819 * CALCULATE INTERACTIONS *
820 **************************/
822 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
825 /* Compute parameters for interactions between i and j atoms */
826 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
828 /* REACTION-FIELD ELECTROSTATICS */
829 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
831 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
835 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
837 /* Update vectorial force */
838 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
839 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
840 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
842 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
843 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
844 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
848 /**************************
849 * CALCULATE INTERACTIONS *
850 **************************/
852 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
855 /* Compute parameters for interactions between i and j atoms */
856 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
858 /* REACTION-FIELD ELECTROSTATICS */
859 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
861 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
865 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
867 /* Update vectorial force */
868 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
869 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
870 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
872 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
873 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
874 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
878 gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
);
880 /* Inner loop uses 133 flops */
887 j_coord_offsetA
= DIM
*jnrA
;
889 /* load j atom coordinates */
890 gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(x
+j_coord_offsetA
,
893 /* Calculate displacement vector */
894 dx00
= _fjsp_sub_v2r8(ix0
,jx0
);
895 dy00
= _fjsp_sub_v2r8(iy0
,jy0
);
896 dz00
= _fjsp_sub_v2r8(iz0
,jz0
);
897 dx10
= _fjsp_sub_v2r8(ix1
,jx0
);
898 dy10
= _fjsp_sub_v2r8(iy1
,jy0
);
899 dz10
= _fjsp_sub_v2r8(iz1
,jz0
);
900 dx20
= _fjsp_sub_v2r8(ix2
,jx0
);
901 dy20
= _fjsp_sub_v2r8(iy2
,jy0
);
902 dz20
= _fjsp_sub_v2r8(iz2
,jz0
);
904 /* Calculate squared distance and things based on it */
905 rsq00
= gmx_fjsp_calc_rsq_v2r8(dx00
,dy00
,dz00
);
906 rsq10
= gmx_fjsp_calc_rsq_v2r8(dx10
,dy10
,dz10
);
907 rsq20
= gmx_fjsp_calc_rsq_v2r8(dx20
,dy20
,dz20
);
909 rinv00
= gmx_fjsp_invsqrt_v2r8(rsq00
);
910 rinv10
= gmx_fjsp_invsqrt_v2r8(rsq10
);
911 rinv20
= gmx_fjsp_invsqrt_v2r8(rsq20
);
913 rinvsq00
= _fjsp_mul_v2r8(rinv00
,rinv00
);
914 rinvsq10
= _fjsp_mul_v2r8(rinv10
,rinv10
);
915 rinvsq20
= _fjsp_mul_v2r8(rinv20
,rinv20
);
917 /* Load parameters for j particles */
918 jq0
= _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(),charge
+jnrA
+0);
919 vdwjidx0A
= 2*vdwtype
[jnrA
+0];
921 fjx0
= _fjsp_setzero_v2r8();
922 fjy0
= _fjsp_setzero_v2r8();
923 fjz0
= _fjsp_setzero_v2r8();
925 /**************************
926 * CALCULATE INTERACTIONS *
927 **************************/
929 if (gmx_fjsp_any_lt_v2r8(rsq00
,rcutoff2
))
932 r00
= _fjsp_mul_v2r8(rsq00
,rinv00
);
934 /* Compute parameters for interactions between i and j atoms */
935 qq00
= _fjsp_mul_v2r8(iq0
,jq0
);
936 gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam
+vdwioffset0
+vdwjidx0A
,
937 vdwparam
+vdwioffset0
+vdwjidx0B
,&c6_00
,&c12_00
);
939 /* REACTION-FIELD ELECTROSTATICS */
940 felec
= _fjsp_mul_v2r8(qq00
,_fjsp_msub_v2r8(rinv00
,rinvsq00
,krf2
));
942 /* LENNARD-JONES DISPERSION/REPULSION */
944 rinvsix
= _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00
,rinvsq00
),rinvsq00
);
945 vvdw6
= _fjsp_mul_v2r8(c6_00
,rinvsix
);
946 vvdw12
= _fjsp_mul_v2r8(c12_00
,_fjsp_mul_v2r8(rinvsix
,rinvsix
));
947 vvdw
= _fjsp_msub_v2r8( vvdw12
,one_twelfth
, _fjsp_mul_v2r8(vvdw6
,one_sixth
) );
948 fvdw
= _fjsp_mul_v2r8(_fjsp_sub_v2r8(vvdw12
,vvdw6
),rinvsq00
);
950 d
= _fjsp_sub_v2r8(r00
,rswitch
);
951 d
= _fjsp_max_v2r8(d
,_fjsp_setzero_v2r8());
952 d2
= _fjsp_mul_v2r8(d
,d
);
953 sw
= _fjsp_add_v2r8(one
,_fjsp_mul_v2r8(d2
,_fjsp_mul_v2r8(d
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swV5
,swV4
),swV3
))));
955 dsw
= _fjsp_mul_v2r8(d2
,_fjsp_madd_v2r8(d
,_fjsp_madd_v2r8(d
,swF4
,swF3
),swF2
));
957 /* Evaluate switch function */
958 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
959 fvdw
= _fjsp_msub_v2r8( fvdw
,sw
, _fjsp_mul_v2r8(rinv00
,_fjsp_mul_v2r8(vvdw
,dsw
)) );
960 cutoff_mask
= _fjsp_cmplt_v2r8(rsq00
,rcutoff2
);
962 fscal
= _fjsp_add_v2r8(felec
,fvdw
);
964 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
966 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
968 /* Update vectorial force */
969 fix0
= _fjsp_madd_v2r8(dx00
,fscal
,fix0
);
970 fiy0
= _fjsp_madd_v2r8(dy00
,fscal
,fiy0
);
971 fiz0
= _fjsp_madd_v2r8(dz00
,fscal
,fiz0
);
973 fjx0
= _fjsp_madd_v2r8(dx00
,fscal
,fjx0
);
974 fjy0
= _fjsp_madd_v2r8(dy00
,fscal
,fjy0
);
975 fjz0
= _fjsp_madd_v2r8(dz00
,fscal
,fjz0
);
979 /**************************
980 * CALCULATE INTERACTIONS *
981 **************************/
983 if (gmx_fjsp_any_lt_v2r8(rsq10
,rcutoff2
))
986 /* Compute parameters for interactions between i and j atoms */
987 qq10
= _fjsp_mul_v2r8(iq1
,jq0
);
989 /* REACTION-FIELD ELECTROSTATICS */
990 felec
= _fjsp_mul_v2r8(qq10
,_fjsp_msub_v2r8(rinv10
,rinvsq10
,krf2
));
992 cutoff_mask
= _fjsp_cmplt_v2r8(rsq10
,rcutoff2
);
996 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
998 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1000 /* Update vectorial force */
1001 fix1
= _fjsp_madd_v2r8(dx10
,fscal
,fix1
);
1002 fiy1
= _fjsp_madd_v2r8(dy10
,fscal
,fiy1
);
1003 fiz1
= _fjsp_madd_v2r8(dz10
,fscal
,fiz1
);
1005 fjx0
= _fjsp_madd_v2r8(dx10
,fscal
,fjx0
);
1006 fjy0
= _fjsp_madd_v2r8(dy10
,fscal
,fjy0
);
1007 fjz0
= _fjsp_madd_v2r8(dz10
,fscal
,fjz0
);
1011 /**************************
1012 * CALCULATE INTERACTIONS *
1013 **************************/
1015 if (gmx_fjsp_any_lt_v2r8(rsq20
,rcutoff2
))
1018 /* Compute parameters for interactions between i and j atoms */
1019 qq20
= _fjsp_mul_v2r8(iq2
,jq0
);
1021 /* REACTION-FIELD ELECTROSTATICS */
1022 felec
= _fjsp_mul_v2r8(qq20
,_fjsp_msub_v2r8(rinv20
,rinvsq20
,krf2
));
1024 cutoff_mask
= _fjsp_cmplt_v2r8(rsq20
,rcutoff2
);
1028 fscal
= _fjsp_and_v2r8(fscal
,cutoff_mask
);
1030 fscal
= _fjsp_unpacklo_v2r8(fscal
,_fjsp_setzero_v2r8());
1032 /* Update vectorial force */
1033 fix2
= _fjsp_madd_v2r8(dx20
,fscal
,fix2
);
1034 fiy2
= _fjsp_madd_v2r8(dy20
,fscal
,fiy2
);
1035 fiz2
= _fjsp_madd_v2r8(dz20
,fscal
,fiz2
);
1037 fjx0
= _fjsp_madd_v2r8(dx20
,fscal
,fjx0
);
1038 fjy0
= _fjsp_madd_v2r8(dy20
,fscal
,fjy0
);
1039 fjz0
= _fjsp_madd_v2r8(dz20
,fscal
,fjz0
);
1043 gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
);
1045 /* Inner loop uses 133 flops */
1048 /* End of innermost loop */
1050 gmx_fjsp_update_iforce_3atom_swizzle_v2r8(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1051 f
+i_coord_offset
,fshift
+i_shift_offset
);
1053 /* Increment number of inner iterations */
1054 inneriter
+= j_index_end
- j_index_start
;
1056 /* Outer loop uses 18 flops */
1059 /* Increment number of outer iterations */
1062 /* Update outer/inner flops */
1064 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_VDW_W3_F
,outeriter
*18 + inneriter
*133);