2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sse2_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/gmxlib/nrnb.h"
47 #include "kernelutil_x86_sse2_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: LennardJones
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sse2_double
58 (t_nblist
* gmx_restrict nlist
,
59 rvec
* gmx_restrict xx
,
60 rvec
* gmx_restrict ff
,
61 struct t_forcerec
* gmx_restrict fr
,
62 t_mdatoms
* gmx_restrict mdatoms
,
63 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
64 t_nrnb
* gmx_restrict nrnb
)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
72 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
74 int j_coord_offsetA
,j_coord_offsetB
;
75 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
77 real
*shiftvec
,*fshift
,*x
,*f
;
78 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
80 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
82 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
84 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
85 int vdwjidx0A
,vdwjidx0B
;
86 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
87 int vdwjidx1A
,vdwjidx1B
;
88 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
89 int vdwjidx2A
,vdwjidx2B
;
90 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
91 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
92 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
93 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
94 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
95 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
96 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
97 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
98 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
99 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
100 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
103 __m128d rinvsix
,rvdw
,vvdw
,vvdw6
,vvdw12
,fvdw
,fvdw6
,fvdw12
,vvdwsum
,sh_vdw_invrcut6
;
106 __m128d one_sixth
= _mm_set1_pd(1.0/6.0);
107 __m128d one_twelfth
= _mm_set1_pd(1.0/12.0);
108 __m128d rswitch
,swV3
,swV4
,swV5
,swF2
,swF3
,swF4
,d
,d2
,sw
,dsw
;
109 real rswitch_scalar
,d_scalar
;
110 __m128d dummy_mask
,cutoff_mask
;
111 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
112 __m128d one
= _mm_set1_pd(1.0);
113 __m128d two
= _mm_set1_pd(2.0);
119 jindex
= nlist
->jindex
;
121 shiftidx
= nlist
->shift
;
123 shiftvec
= fr
->shift_vec
[0];
124 fshift
= fr
->fshift
[0];
125 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
126 charge
= mdatoms
->chargeA
;
127 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
128 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
129 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
130 nvdwtype
= fr
->ntype
;
132 vdwtype
= mdatoms
->typeA
;
134 /* Setup water-specific parameters */
135 inr
= nlist
->iinr
[0];
136 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
137 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
138 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
139 vdwioffset0
= 2*nvdwtype
*vdwtype
[inr
+0];
141 jq0
= _mm_set1_pd(charge
[inr
+0]);
142 jq1
= _mm_set1_pd(charge
[inr
+1]);
143 jq2
= _mm_set1_pd(charge
[inr
+2]);
144 vdwjidx0A
= 2*vdwtype
[inr
+0];
145 qq00
= _mm_mul_pd(iq0
,jq0
);
146 c6_00
= _mm_set1_pd(vdwparam
[vdwioffset0
+vdwjidx0A
]);
147 c12_00
= _mm_set1_pd(vdwparam
[vdwioffset0
+vdwjidx0A
+1]);
148 qq01
= _mm_mul_pd(iq0
,jq1
);
149 qq02
= _mm_mul_pd(iq0
,jq2
);
150 qq10
= _mm_mul_pd(iq1
,jq0
);
151 qq11
= _mm_mul_pd(iq1
,jq1
);
152 qq12
= _mm_mul_pd(iq1
,jq2
);
153 qq20
= _mm_mul_pd(iq2
,jq0
);
154 qq21
= _mm_mul_pd(iq2
,jq1
);
155 qq22
= _mm_mul_pd(iq2
,jq2
);
157 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
158 rcutoff_scalar
= fr
->ic
->rcoulomb
;
159 rcutoff
= _mm_set1_pd(rcutoff_scalar
);
160 rcutoff2
= _mm_mul_pd(rcutoff
,rcutoff
);
162 rswitch_scalar
= fr
->ic
->rvdw_switch
;
163 rswitch
= _mm_set1_pd(rswitch_scalar
);
164 /* Setup switch parameters */
165 d_scalar
= rcutoff_scalar
-rswitch_scalar
;
166 d
= _mm_set1_pd(d_scalar
);
167 swV3
= _mm_set1_pd(-10.0/(d_scalar
*d_scalar
*d_scalar
));
168 swV4
= _mm_set1_pd( 15.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
169 swV5
= _mm_set1_pd( -6.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
170 swF2
= _mm_set1_pd(-30.0/(d_scalar
*d_scalar
*d_scalar
));
171 swF3
= _mm_set1_pd( 60.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
172 swF4
= _mm_set1_pd(-30.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
174 /* Avoid stupid compiler warnings */
182 /* Start outer loop over neighborlists */
183 for(iidx
=0; iidx
<nri
; iidx
++)
185 /* Load shift vector for this list */
186 i_shift_offset
= DIM
*shiftidx
[iidx
];
188 /* Load limits for loop over neighbors */
189 j_index_start
= jindex
[iidx
];
190 j_index_end
= jindex
[iidx
+1];
192 /* Get outer coordinate index */
194 i_coord_offset
= DIM
*inr
;
196 /* Load i particle coords and add shift vector */
197 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
198 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
200 fix0
= _mm_setzero_pd();
201 fiy0
= _mm_setzero_pd();
202 fiz0
= _mm_setzero_pd();
203 fix1
= _mm_setzero_pd();
204 fiy1
= _mm_setzero_pd();
205 fiz1
= _mm_setzero_pd();
206 fix2
= _mm_setzero_pd();
207 fiy2
= _mm_setzero_pd();
208 fiz2
= _mm_setzero_pd();
210 /* Reset potential sums */
211 velecsum
= _mm_setzero_pd();
212 vvdwsum
= _mm_setzero_pd();
214 /* Start inner kernel loop */
215 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
218 /* Get j neighbor index, and coordinate index */
221 j_coord_offsetA
= DIM
*jnrA
;
222 j_coord_offsetB
= DIM
*jnrB
;
224 /* load j atom coordinates */
225 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
226 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
228 /* Calculate displacement vector */
229 dx00
= _mm_sub_pd(ix0
,jx0
);
230 dy00
= _mm_sub_pd(iy0
,jy0
);
231 dz00
= _mm_sub_pd(iz0
,jz0
);
232 dx01
= _mm_sub_pd(ix0
,jx1
);
233 dy01
= _mm_sub_pd(iy0
,jy1
);
234 dz01
= _mm_sub_pd(iz0
,jz1
);
235 dx02
= _mm_sub_pd(ix0
,jx2
);
236 dy02
= _mm_sub_pd(iy0
,jy2
);
237 dz02
= _mm_sub_pd(iz0
,jz2
);
238 dx10
= _mm_sub_pd(ix1
,jx0
);
239 dy10
= _mm_sub_pd(iy1
,jy0
);
240 dz10
= _mm_sub_pd(iz1
,jz0
);
241 dx11
= _mm_sub_pd(ix1
,jx1
);
242 dy11
= _mm_sub_pd(iy1
,jy1
);
243 dz11
= _mm_sub_pd(iz1
,jz1
);
244 dx12
= _mm_sub_pd(ix1
,jx2
);
245 dy12
= _mm_sub_pd(iy1
,jy2
);
246 dz12
= _mm_sub_pd(iz1
,jz2
);
247 dx20
= _mm_sub_pd(ix2
,jx0
);
248 dy20
= _mm_sub_pd(iy2
,jy0
);
249 dz20
= _mm_sub_pd(iz2
,jz0
);
250 dx21
= _mm_sub_pd(ix2
,jx1
);
251 dy21
= _mm_sub_pd(iy2
,jy1
);
252 dz21
= _mm_sub_pd(iz2
,jz1
);
253 dx22
= _mm_sub_pd(ix2
,jx2
);
254 dy22
= _mm_sub_pd(iy2
,jy2
);
255 dz22
= _mm_sub_pd(iz2
,jz2
);
257 /* Calculate squared distance and things based on it */
258 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
259 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
260 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
261 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
262 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
263 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
264 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
265 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
266 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
268 rinv00
= sse2_invsqrt_d(rsq00
);
269 rinv01
= sse2_invsqrt_d(rsq01
);
270 rinv02
= sse2_invsqrt_d(rsq02
);
271 rinv10
= sse2_invsqrt_d(rsq10
);
272 rinv11
= sse2_invsqrt_d(rsq11
);
273 rinv12
= sse2_invsqrt_d(rsq12
);
274 rinv20
= sse2_invsqrt_d(rsq20
);
275 rinv21
= sse2_invsqrt_d(rsq21
);
276 rinv22
= sse2_invsqrt_d(rsq22
);
278 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
279 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
280 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
281 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
282 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
283 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
284 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
285 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
286 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
288 fjx0
= _mm_setzero_pd();
289 fjy0
= _mm_setzero_pd();
290 fjz0
= _mm_setzero_pd();
291 fjx1
= _mm_setzero_pd();
292 fjy1
= _mm_setzero_pd();
293 fjz1
= _mm_setzero_pd();
294 fjx2
= _mm_setzero_pd();
295 fjy2
= _mm_setzero_pd();
296 fjz2
= _mm_setzero_pd();
298 /**************************
299 * CALCULATE INTERACTIONS *
300 **************************/
302 if (gmx_mm_any_lt(rsq00
,rcutoff2
))
305 r00
= _mm_mul_pd(rsq00
,rinv00
);
307 /* REACTION-FIELD ELECTROSTATICS */
308 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_add_pd(rinv00
,_mm_mul_pd(krf
,rsq00
)),crf
));
309 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
311 /* LENNARD-JONES DISPERSION/REPULSION */
313 rinvsix
= _mm_mul_pd(_mm_mul_pd(rinvsq00
,rinvsq00
),rinvsq00
);
314 vvdw6
= _mm_mul_pd(c6_00
,rinvsix
);
315 vvdw12
= _mm_mul_pd(c12_00
,_mm_mul_pd(rinvsix
,rinvsix
));
316 vvdw
= _mm_sub_pd( _mm_mul_pd(vvdw12
,one_twelfth
) , _mm_mul_pd(vvdw6
,one_sixth
) );
317 fvdw
= _mm_mul_pd(_mm_sub_pd(vvdw12
,vvdw6
),rinvsq00
);
319 d
= _mm_sub_pd(r00
,rswitch
);
320 d
= _mm_max_pd(d
,_mm_setzero_pd());
321 d2
= _mm_mul_pd(d
,d
);
322 sw
= _mm_add_pd(one
,_mm_mul_pd(d2
,_mm_mul_pd(d
,_mm_add_pd(swV3
,_mm_mul_pd(d
,_mm_add_pd(swV4
,_mm_mul_pd(d
,swV5
)))))));
324 dsw
= _mm_mul_pd(d2
,_mm_add_pd(swF2
,_mm_mul_pd(d
,_mm_add_pd(swF3
,_mm_mul_pd(d
,swF4
)))));
326 /* Evaluate switch function */
327 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
328 fvdw
= _mm_sub_pd( _mm_mul_pd(fvdw
,sw
) , _mm_mul_pd(rinv00
,_mm_mul_pd(vvdw
,dsw
)) );
329 vvdw
= _mm_mul_pd(vvdw
,sw
);
330 cutoff_mask
= _mm_cmplt_pd(rsq00
,rcutoff2
);
332 /* Update potential sum for this i atom from the interaction with this j atom. */
333 velec
= _mm_and_pd(velec
,cutoff_mask
);
334 velecsum
= _mm_add_pd(velecsum
,velec
);
335 vvdw
= _mm_and_pd(vvdw
,cutoff_mask
);
336 vvdwsum
= _mm_add_pd(vvdwsum
,vvdw
);
338 fscal
= _mm_add_pd(felec
,fvdw
);
340 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
342 /* Calculate temporary vectorial force */
343 tx
= _mm_mul_pd(fscal
,dx00
);
344 ty
= _mm_mul_pd(fscal
,dy00
);
345 tz
= _mm_mul_pd(fscal
,dz00
);
347 /* Update vectorial force */
348 fix0
= _mm_add_pd(fix0
,tx
);
349 fiy0
= _mm_add_pd(fiy0
,ty
);
350 fiz0
= _mm_add_pd(fiz0
,tz
);
352 fjx0
= _mm_add_pd(fjx0
,tx
);
353 fjy0
= _mm_add_pd(fjy0
,ty
);
354 fjz0
= _mm_add_pd(fjz0
,tz
);
358 /**************************
359 * CALCULATE INTERACTIONS *
360 **************************/
362 if (gmx_mm_any_lt(rsq01
,rcutoff2
))
365 /* REACTION-FIELD ELECTROSTATICS */
366 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_add_pd(rinv01
,_mm_mul_pd(krf
,rsq01
)),crf
));
367 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
369 cutoff_mask
= _mm_cmplt_pd(rsq01
,rcutoff2
);
371 /* Update potential sum for this i atom from the interaction with this j atom. */
372 velec
= _mm_and_pd(velec
,cutoff_mask
);
373 velecsum
= _mm_add_pd(velecsum
,velec
);
377 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
379 /* Calculate temporary vectorial force */
380 tx
= _mm_mul_pd(fscal
,dx01
);
381 ty
= _mm_mul_pd(fscal
,dy01
);
382 tz
= _mm_mul_pd(fscal
,dz01
);
384 /* Update vectorial force */
385 fix0
= _mm_add_pd(fix0
,tx
);
386 fiy0
= _mm_add_pd(fiy0
,ty
);
387 fiz0
= _mm_add_pd(fiz0
,tz
);
389 fjx1
= _mm_add_pd(fjx1
,tx
);
390 fjy1
= _mm_add_pd(fjy1
,ty
);
391 fjz1
= _mm_add_pd(fjz1
,tz
);
395 /**************************
396 * CALCULATE INTERACTIONS *
397 **************************/
399 if (gmx_mm_any_lt(rsq02
,rcutoff2
))
402 /* REACTION-FIELD ELECTROSTATICS */
403 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_add_pd(rinv02
,_mm_mul_pd(krf
,rsq02
)),crf
));
404 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
406 cutoff_mask
= _mm_cmplt_pd(rsq02
,rcutoff2
);
408 /* Update potential sum for this i atom from the interaction with this j atom. */
409 velec
= _mm_and_pd(velec
,cutoff_mask
);
410 velecsum
= _mm_add_pd(velecsum
,velec
);
414 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
416 /* Calculate temporary vectorial force */
417 tx
= _mm_mul_pd(fscal
,dx02
);
418 ty
= _mm_mul_pd(fscal
,dy02
);
419 tz
= _mm_mul_pd(fscal
,dz02
);
421 /* Update vectorial force */
422 fix0
= _mm_add_pd(fix0
,tx
);
423 fiy0
= _mm_add_pd(fiy0
,ty
);
424 fiz0
= _mm_add_pd(fiz0
,tz
);
426 fjx2
= _mm_add_pd(fjx2
,tx
);
427 fjy2
= _mm_add_pd(fjy2
,ty
);
428 fjz2
= _mm_add_pd(fjz2
,tz
);
432 /**************************
433 * CALCULATE INTERACTIONS *
434 **************************/
436 if (gmx_mm_any_lt(rsq10
,rcutoff2
))
439 /* REACTION-FIELD ELECTROSTATICS */
440 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_add_pd(rinv10
,_mm_mul_pd(krf
,rsq10
)),crf
));
441 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
443 cutoff_mask
= _mm_cmplt_pd(rsq10
,rcutoff2
);
445 /* Update potential sum for this i atom from the interaction with this j atom. */
446 velec
= _mm_and_pd(velec
,cutoff_mask
);
447 velecsum
= _mm_add_pd(velecsum
,velec
);
451 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
453 /* Calculate temporary vectorial force */
454 tx
= _mm_mul_pd(fscal
,dx10
);
455 ty
= _mm_mul_pd(fscal
,dy10
);
456 tz
= _mm_mul_pd(fscal
,dz10
);
458 /* Update vectorial force */
459 fix1
= _mm_add_pd(fix1
,tx
);
460 fiy1
= _mm_add_pd(fiy1
,ty
);
461 fiz1
= _mm_add_pd(fiz1
,tz
);
463 fjx0
= _mm_add_pd(fjx0
,tx
);
464 fjy0
= _mm_add_pd(fjy0
,ty
);
465 fjz0
= _mm_add_pd(fjz0
,tz
);
469 /**************************
470 * CALCULATE INTERACTIONS *
471 **************************/
473 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
476 /* REACTION-FIELD ELECTROSTATICS */
477 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
478 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
480 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
482 /* Update potential sum for this i atom from the interaction with this j atom. */
483 velec
= _mm_and_pd(velec
,cutoff_mask
);
484 velecsum
= _mm_add_pd(velecsum
,velec
);
488 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
490 /* Calculate temporary vectorial force */
491 tx
= _mm_mul_pd(fscal
,dx11
);
492 ty
= _mm_mul_pd(fscal
,dy11
);
493 tz
= _mm_mul_pd(fscal
,dz11
);
495 /* Update vectorial force */
496 fix1
= _mm_add_pd(fix1
,tx
);
497 fiy1
= _mm_add_pd(fiy1
,ty
);
498 fiz1
= _mm_add_pd(fiz1
,tz
);
500 fjx1
= _mm_add_pd(fjx1
,tx
);
501 fjy1
= _mm_add_pd(fjy1
,ty
);
502 fjz1
= _mm_add_pd(fjz1
,tz
);
506 /**************************
507 * CALCULATE INTERACTIONS *
508 **************************/
510 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
513 /* REACTION-FIELD ELECTROSTATICS */
514 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
515 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
517 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
519 /* Update potential sum for this i atom from the interaction with this j atom. */
520 velec
= _mm_and_pd(velec
,cutoff_mask
);
521 velecsum
= _mm_add_pd(velecsum
,velec
);
525 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
527 /* Calculate temporary vectorial force */
528 tx
= _mm_mul_pd(fscal
,dx12
);
529 ty
= _mm_mul_pd(fscal
,dy12
);
530 tz
= _mm_mul_pd(fscal
,dz12
);
532 /* Update vectorial force */
533 fix1
= _mm_add_pd(fix1
,tx
);
534 fiy1
= _mm_add_pd(fiy1
,ty
);
535 fiz1
= _mm_add_pd(fiz1
,tz
);
537 fjx2
= _mm_add_pd(fjx2
,tx
);
538 fjy2
= _mm_add_pd(fjy2
,ty
);
539 fjz2
= _mm_add_pd(fjz2
,tz
);
543 /**************************
544 * CALCULATE INTERACTIONS *
545 **************************/
547 if (gmx_mm_any_lt(rsq20
,rcutoff2
))
550 /* REACTION-FIELD ELECTROSTATICS */
551 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_add_pd(rinv20
,_mm_mul_pd(krf
,rsq20
)),crf
));
552 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
554 cutoff_mask
= _mm_cmplt_pd(rsq20
,rcutoff2
);
556 /* Update potential sum for this i atom from the interaction with this j atom. */
557 velec
= _mm_and_pd(velec
,cutoff_mask
);
558 velecsum
= _mm_add_pd(velecsum
,velec
);
562 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
564 /* Calculate temporary vectorial force */
565 tx
= _mm_mul_pd(fscal
,dx20
);
566 ty
= _mm_mul_pd(fscal
,dy20
);
567 tz
= _mm_mul_pd(fscal
,dz20
);
569 /* Update vectorial force */
570 fix2
= _mm_add_pd(fix2
,tx
);
571 fiy2
= _mm_add_pd(fiy2
,ty
);
572 fiz2
= _mm_add_pd(fiz2
,tz
);
574 fjx0
= _mm_add_pd(fjx0
,tx
);
575 fjy0
= _mm_add_pd(fjy0
,ty
);
576 fjz0
= _mm_add_pd(fjz0
,tz
);
580 /**************************
581 * CALCULATE INTERACTIONS *
582 **************************/
584 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
587 /* REACTION-FIELD ELECTROSTATICS */
588 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
589 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
591 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
593 /* Update potential sum for this i atom from the interaction with this j atom. */
594 velec
= _mm_and_pd(velec
,cutoff_mask
);
595 velecsum
= _mm_add_pd(velecsum
,velec
);
599 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
601 /* Calculate temporary vectorial force */
602 tx
= _mm_mul_pd(fscal
,dx21
);
603 ty
= _mm_mul_pd(fscal
,dy21
);
604 tz
= _mm_mul_pd(fscal
,dz21
);
606 /* Update vectorial force */
607 fix2
= _mm_add_pd(fix2
,tx
);
608 fiy2
= _mm_add_pd(fiy2
,ty
);
609 fiz2
= _mm_add_pd(fiz2
,tz
);
611 fjx1
= _mm_add_pd(fjx1
,tx
);
612 fjy1
= _mm_add_pd(fjy1
,ty
);
613 fjz1
= _mm_add_pd(fjz1
,tz
);
617 /**************************
618 * CALCULATE INTERACTIONS *
619 **************************/
621 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
624 /* REACTION-FIELD ELECTROSTATICS */
625 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
626 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
628 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
630 /* Update potential sum for this i atom from the interaction with this j atom. */
631 velec
= _mm_and_pd(velec
,cutoff_mask
);
632 velecsum
= _mm_add_pd(velecsum
,velec
);
636 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
638 /* Calculate temporary vectorial force */
639 tx
= _mm_mul_pd(fscal
,dx22
);
640 ty
= _mm_mul_pd(fscal
,dy22
);
641 tz
= _mm_mul_pd(fscal
,dz22
);
643 /* Update vectorial force */
644 fix2
= _mm_add_pd(fix2
,tx
);
645 fiy2
= _mm_add_pd(fiy2
,ty
);
646 fiz2
= _mm_add_pd(fiz2
,tz
);
648 fjx2
= _mm_add_pd(fjx2
,tx
);
649 fjy2
= _mm_add_pd(fjy2
,ty
);
650 fjz2
= _mm_add_pd(fjz2
,tz
);
654 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
656 /* Inner loop uses 358 flops */
663 j_coord_offsetA
= DIM
*jnrA
;
665 /* load j atom coordinates */
666 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
667 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
669 /* Calculate displacement vector */
670 dx00
= _mm_sub_pd(ix0
,jx0
);
671 dy00
= _mm_sub_pd(iy0
,jy0
);
672 dz00
= _mm_sub_pd(iz0
,jz0
);
673 dx01
= _mm_sub_pd(ix0
,jx1
);
674 dy01
= _mm_sub_pd(iy0
,jy1
);
675 dz01
= _mm_sub_pd(iz0
,jz1
);
676 dx02
= _mm_sub_pd(ix0
,jx2
);
677 dy02
= _mm_sub_pd(iy0
,jy2
);
678 dz02
= _mm_sub_pd(iz0
,jz2
);
679 dx10
= _mm_sub_pd(ix1
,jx0
);
680 dy10
= _mm_sub_pd(iy1
,jy0
);
681 dz10
= _mm_sub_pd(iz1
,jz0
);
682 dx11
= _mm_sub_pd(ix1
,jx1
);
683 dy11
= _mm_sub_pd(iy1
,jy1
);
684 dz11
= _mm_sub_pd(iz1
,jz1
);
685 dx12
= _mm_sub_pd(ix1
,jx2
);
686 dy12
= _mm_sub_pd(iy1
,jy2
);
687 dz12
= _mm_sub_pd(iz1
,jz2
);
688 dx20
= _mm_sub_pd(ix2
,jx0
);
689 dy20
= _mm_sub_pd(iy2
,jy0
);
690 dz20
= _mm_sub_pd(iz2
,jz0
);
691 dx21
= _mm_sub_pd(ix2
,jx1
);
692 dy21
= _mm_sub_pd(iy2
,jy1
);
693 dz21
= _mm_sub_pd(iz2
,jz1
);
694 dx22
= _mm_sub_pd(ix2
,jx2
);
695 dy22
= _mm_sub_pd(iy2
,jy2
);
696 dz22
= _mm_sub_pd(iz2
,jz2
);
698 /* Calculate squared distance and things based on it */
699 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
700 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
701 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
702 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
703 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
704 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
705 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
706 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
707 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
709 rinv00
= sse2_invsqrt_d(rsq00
);
710 rinv01
= sse2_invsqrt_d(rsq01
);
711 rinv02
= sse2_invsqrt_d(rsq02
);
712 rinv10
= sse2_invsqrt_d(rsq10
);
713 rinv11
= sse2_invsqrt_d(rsq11
);
714 rinv12
= sse2_invsqrt_d(rsq12
);
715 rinv20
= sse2_invsqrt_d(rsq20
);
716 rinv21
= sse2_invsqrt_d(rsq21
);
717 rinv22
= sse2_invsqrt_d(rsq22
);
719 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
720 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
721 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
722 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
723 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
724 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
725 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
726 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
727 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
729 fjx0
= _mm_setzero_pd();
730 fjy0
= _mm_setzero_pd();
731 fjz0
= _mm_setzero_pd();
732 fjx1
= _mm_setzero_pd();
733 fjy1
= _mm_setzero_pd();
734 fjz1
= _mm_setzero_pd();
735 fjx2
= _mm_setzero_pd();
736 fjy2
= _mm_setzero_pd();
737 fjz2
= _mm_setzero_pd();
739 /**************************
740 * CALCULATE INTERACTIONS *
741 **************************/
743 if (gmx_mm_any_lt(rsq00
,rcutoff2
))
746 r00
= _mm_mul_pd(rsq00
,rinv00
);
748 /* REACTION-FIELD ELECTROSTATICS */
749 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_add_pd(rinv00
,_mm_mul_pd(krf
,rsq00
)),crf
));
750 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
752 /* LENNARD-JONES DISPERSION/REPULSION */
754 rinvsix
= _mm_mul_pd(_mm_mul_pd(rinvsq00
,rinvsq00
),rinvsq00
);
755 vvdw6
= _mm_mul_pd(c6_00
,rinvsix
);
756 vvdw12
= _mm_mul_pd(c12_00
,_mm_mul_pd(rinvsix
,rinvsix
));
757 vvdw
= _mm_sub_pd( _mm_mul_pd(vvdw12
,one_twelfth
) , _mm_mul_pd(vvdw6
,one_sixth
) );
758 fvdw
= _mm_mul_pd(_mm_sub_pd(vvdw12
,vvdw6
),rinvsq00
);
760 d
= _mm_sub_pd(r00
,rswitch
);
761 d
= _mm_max_pd(d
,_mm_setzero_pd());
762 d2
= _mm_mul_pd(d
,d
);
763 sw
= _mm_add_pd(one
,_mm_mul_pd(d2
,_mm_mul_pd(d
,_mm_add_pd(swV3
,_mm_mul_pd(d
,_mm_add_pd(swV4
,_mm_mul_pd(d
,swV5
)))))));
765 dsw
= _mm_mul_pd(d2
,_mm_add_pd(swF2
,_mm_mul_pd(d
,_mm_add_pd(swF3
,_mm_mul_pd(d
,swF4
)))));
767 /* Evaluate switch function */
768 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
769 fvdw
= _mm_sub_pd( _mm_mul_pd(fvdw
,sw
) , _mm_mul_pd(rinv00
,_mm_mul_pd(vvdw
,dsw
)) );
770 vvdw
= _mm_mul_pd(vvdw
,sw
);
771 cutoff_mask
= _mm_cmplt_pd(rsq00
,rcutoff2
);
773 /* Update potential sum for this i atom from the interaction with this j atom. */
774 velec
= _mm_and_pd(velec
,cutoff_mask
);
775 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
776 velecsum
= _mm_add_pd(velecsum
,velec
);
777 vvdw
= _mm_and_pd(vvdw
,cutoff_mask
);
778 vvdw
= _mm_unpacklo_pd(vvdw
,_mm_setzero_pd());
779 vvdwsum
= _mm_add_pd(vvdwsum
,vvdw
);
781 fscal
= _mm_add_pd(felec
,fvdw
);
783 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
785 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
787 /* Calculate temporary vectorial force */
788 tx
= _mm_mul_pd(fscal
,dx00
);
789 ty
= _mm_mul_pd(fscal
,dy00
);
790 tz
= _mm_mul_pd(fscal
,dz00
);
792 /* Update vectorial force */
793 fix0
= _mm_add_pd(fix0
,tx
);
794 fiy0
= _mm_add_pd(fiy0
,ty
);
795 fiz0
= _mm_add_pd(fiz0
,tz
);
797 fjx0
= _mm_add_pd(fjx0
,tx
);
798 fjy0
= _mm_add_pd(fjy0
,ty
);
799 fjz0
= _mm_add_pd(fjz0
,tz
);
803 /**************************
804 * CALCULATE INTERACTIONS *
805 **************************/
807 if (gmx_mm_any_lt(rsq01
,rcutoff2
))
810 /* REACTION-FIELD ELECTROSTATICS */
811 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_add_pd(rinv01
,_mm_mul_pd(krf
,rsq01
)),crf
));
812 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
814 cutoff_mask
= _mm_cmplt_pd(rsq01
,rcutoff2
);
816 /* Update potential sum for this i atom from the interaction with this j atom. */
817 velec
= _mm_and_pd(velec
,cutoff_mask
);
818 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
819 velecsum
= _mm_add_pd(velecsum
,velec
);
823 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
825 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
827 /* Calculate temporary vectorial force */
828 tx
= _mm_mul_pd(fscal
,dx01
);
829 ty
= _mm_mul_pd(fscal
,dy01
);
830 tz
= _mm_mul_pd(fscal
,dz01
);
832 /* Update vectorial force */
833 fix0
= _mm_add_pd(fix0
,tx
);
834 fiy0
= _mm_add_pd(fiy0
,ty
);
835 fiz0
= _mm_add_pd(fiz0
,tz
);
837 fjx1
= _mm_add_pd(fjx1
,tx
);
838 fjy1
= _mm_add_pd(fjy1
,ty
);
839 fjz1
= _mm_add_pd(fjz1
,tz
);
843 /**************************
844 * CALCULATE INTERACTIONS *
845 **************************/
847 if (gmx_mm_any_lt(rsq02
,rcutoff2
))
850 /* REACTION-FIELD ELECTROSTATICS */
851 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_add_pd(rinv02
,_mm_mul_pd(krf
,rsq02
)),crf
));
852 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
854 cutoff_mask
= _mm_cmplt_pd(rsq02
,rcutoff2
);
856 /* Update potential sum for this i atom from the interaction with this j atom. */
857 velec
= _mm_and_pd(velec
,cutoff_mask
);
858 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
859 velecsum
= _mm_add_pd(velecsum
,velec
);
863 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
865 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
867 /* Calculate temporary vectorial force */
868 tx
= _mm_mul_pd(fscal
,dx02
);
869 ty
= _mm_mul_pd(fscal
,dy02
);
870 tz
= _mm_mul_pd(fscal
,dz02
);
872 /* Update vectorial force */
873 fix0
= _mm_add_pd(fix0
,tx
);
874 fiy0
= _mm_add_pd(fiy0
,ty
);
875 fiz0
= _mm_add_pd(fiz0
,tz
);
877 fjx2
= _mm_add_pd(fjx2
,tx
);
878 fjy2
= _mm_add_pd(fjy2
,ty
);
879 fjz2
= _mm_add_pd(fjz2
,tz
);
883 /**************************
884 * CALCULATE INTERACTIONS *
885 **************************/
887 if (gmx_mm_any_lt(rsq10
,rcutoff2
))
890 /* REACTION-FIELD ELECTROSTATICS */
891 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_add_pd(rinv10
,_mm_mul_pd(krf
,rsq10
)),crf
));
892 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
894 cutoff_mask
= _mm_cmplt_pd(rsq10
,rcutoff2
);
896 /* Update potential sum for this i atom from the interaction with this j atom. */
897 velec
= _mm_and_pd(velec
,cutoff_mask
);
898 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
899 velecsum
= _mm_add_pd(velecsum
,velec
);
903 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
905 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
907 /* Calculate temporary vectorial force */
908 tx
= _mm_mul_pd(fscal
,dx10
);
909 ty
= _mm_mul_pd(fscal
,dy10
);
910 tz
= _mm_mul_pd(fscal
,dz10
);
912 /* Update vectorial force */
913 fix1
= _mm_add_pd(fix1
,tx
);
914 fiy1
= _mm_add_pd(fiy1
,ty
);
915 fiz1
= _mm_add_pd(fiz1
,tz
);
917 fjx0
= _mm_add_pd(fjx0
,tx
);
918 fjy0
= _mm_add_pd(fjy0
,ty
);
919 fjz0
= _mm_add_pd(fjz0
,tz
);
923 /**************************
924 * CALCULATE INTERACTIONS *
925 **************************/
927 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
930 /* REACTION-FIELD ELECTROSTATICS */
931 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
932 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
934 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
936 /* Update potential sum for this i atom from the interaction with this j atom. */
937 velec
= _mm_and_pd(velec
,cutoff_mask
);
938 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
939 velecsum
= _mm_add_pd(velecsum
,velec
);
943 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
945 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
947 /* Calculate temporary vectorial force */
948 tx
= _mm_mul_pd(fscal
,dx11
);
949 ty
= _mm_mul_pd(fscal
,dy11
);
950 tz
= _mm_mul_pd(fscal
,dz11
);
952 /* Update vectorial force */
953 fix1
= _mm_add_pd(fix1
,tx
);
954 fiy1
= _mm_add_pd(fiy1
,ty
);
955 fiz1
= _mm_add_pd(fiz1
,tz
);
957 fjx1
= _mm_add_pd(fjx1
,tx
);
958 fjy1
= _mm_add_pd(fjy1
,ty
);
959 fjz1
= _mm_add_pd(fjz1
,tz
);
963 /**************************
964 * CALCULATE INTERACTIONS *
965 **************************/
967 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
970 /* REACTION-FIELD ELECTROSTATICS */
971 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
972 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
974 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
976 /* Update potential sum for this i atom from the interaction with this j atom. */
977 velec
= _mm_and_pd(velec
,cutoff_mask
);
978 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
979 velecsum
= _mm_add_pd(velecsum
,velec
);
983 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
985 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
987 /* Calculate temporary vectorial force */
988 tx
= _mm_mul_pd(fscal
,dx12
);
989 ty
= _mm_mul_pd(fscal
,dy12
);
990 tz
= _mm_mul_pd(fscal
,dz12
);
992 /* Update vectorial force */
993 fix1
= _mm_add_pd(fix1
,tx
);
994 fiy1
= _mm_add_pd(fiy1
,ty
);
995 fiz1
= _mm_add_pd(fiz1
,tz
);
997 fjx2
= _mm_add_pd(fjx2
,tx
);
998 fjy2
= _mm_add_pd(fjy2
,ty
);
999 fjz2
= _mm_add_pd(fjz2
,tz
);
1003 /**************************
1004 * CALCULATE INTERACTIONS *
1005 **************************/
1007 if (gmx_mm_any_lt(rsq20
,rcutoff2
))
1010 /* REACTION-FIELD ELECTROSTATICS */
1011 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_add_pd(rinv20
,_mm_mul_pd(krf
,rsq20
)),crf
));
1012 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
1014 cutoff_mask
= _mm_cmplt_pd(rsq20
,rcutoff2
);
1016 /* Update potential sum for this i atom from the interaction with this j atom. */
1017 velec
= _mm_and_pd(velec
,cutoff_mask
);
1018 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
1019 velecsum
= _mm_add_pd(velecsum
,velec
);
1023 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1025 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1027 /* Calculate temporary vectorial force */
1028 tx
= _mm_mul_pd(fscal
,dx20
);
1029 ty
= _mm_mul_pd(fscal
,dy20
);
1030 tz
= _mm_mul_pd(fscal
,dz20
);
1032 /* Update vectorial force */
1033 fix2
= _mm_add_pd(fix2
,tx
);
1034 fiy2
= _mm_add_pd(fiy2
,ty
);
1035 fiz2
= _mm_add_pd(fiz2
,tz
);
1037 fjx0
= _mm_add_pd(fjx0
,tx
);
1038 fjy0
= _mm_add_pd(fjy0
,ty
);
1039 fjz0
= _mm_add_pd(fjz0
,tz
);
1043 /**************************
1044 * CALCULATE INTERACTIONS *
1045 **************************/
1047 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
1050 /* REACTION-FIELD ELECTROSTATICS */
1051 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
1052 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1054 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
1056 /* Update potential sum for this i atom from the interaction with this j atom. */
1057 velec
= _mm_and_pd(velec
,cutoff_mask
);
1058 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
1059 velecsum
= _mm_add_pd(velecsum
,velec
);
1063 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1065 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1067 /* Calculate temporary vectorial force */
1068 tx
= _mm_mul_pd(fscal
,dx21
);
1069 ty
= _mm_mul_pd(fscal
,dy21
);
1070 tz
= _mm_mul_pd(fscal
,dz21
);
1072 /* Update vectorial force */
1073 fix2
= _mm_add_pd(fix2
,tx
);
1074 fiy2
= _mm_add_pd(fiy2
,ty
);
1075 fiz2
= _mm_add_pd(fiz2
,tz
);
1077 fjx1
= _mm_add_pd(fjx1
,tx
);
1078 fjy1
= _mm_add_pd(fjy1
,ty
);
1079 fjz1
= _mm_add_pd(fjz1
,tz
);
1083 /**************************
1084 * CALCULATE INTERACTIONS *
1085 **************************/
1087 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
1090 /* REACTION-FIELD ELECTROSTATICS */
1091 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
1092 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1094 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
1096 /* Update potential sum for this i atom from the interaction with this j atom. */
1097 velec
= _mm_and_pd(velec
,cutoff_mask
);
1098 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
1099 velecsum
= _mm_add_pd(velecsum
,velec
);
1103 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1105 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1107 /* Calculate temporary vectorial force */
1108 tx
= _mm_mul_pd(fscal
,dx22
);
1109 ty
= _mm_mul_pd(fscal
,dy22
);
1110 tz
= _mm_mul_pd(fscal
,dz22
);
1112 /* Update vectorial force */
1113 fix2
= _mm_add_pd(fix2
,tx
);
1114 fiy2
= _mm_add_pd(fiy2
,ty
);
1115 fiz2
= _mm_add_pd(fiz2
,tz
);
1117 fjx2
= _mm_add_pd(fjx2
,tx
);
1118 fjy2
= _mm_add_pd(fjy2
,ty
);
1119 fjz2
= _mm_add_pd(fjz2
,tz
);
1123 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1125 /* Inner loop uses 358 flops */
1128 /* End of innermost loop */
1130 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1131 f
+i_coord_offset
,fshift
+i_shift_offset
);
1134 /* Update potential energies */
1135 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
1136 gmx_mm_update_1pot_pd(vvdwsum
,kernel_data
->energygrp_vdw
+ggid
);
1138 /* Increment number of inner iterations */
1139 inneriter
+= j_index_end
- j_index_start
;
1141 /* Outer loop uses 20 flops */
1144 /* Increment number of outer iterations */
1147 /* Update outer/inner flops */
1149 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_VDW_W3W3_VF
,outeriter
*20 + inneriter
*358);
1152 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_double
1153 * Electrostatics interaction: ReactionField
1154 * VdW interaction: LennardJones
1155 * Geometry: Water3-Water3
1156 * Calculate force/pot: Force
1159 nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sse2_double
1160 (t_nblist
* gmx_restrict nlist
,
1161 rvec
* gmx_restrict xx
,
1162 rvec
* gmx_restrict ff
,
1163 struct t_forcerec
* gmx_restrict fr
,
1164 t_mdatoms
* gmx_restrict mdatoms
,
1165 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
1166 t_nrnb
* gmx_restrict nrnb
)
1168 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1169 * just 0 for non-waters.
1170 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1171 * jnr indices corresponding to data put in the four positions in the SIMD register.
1173 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
1174 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
1176 int j_coord_offsetA
,j_coord_offsetB
;
1177 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
1178 real rcutoff_scalar
;
1179 real
*shiftvec
,*fshift
,*x
,*f
;
1180 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
1182 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
1184 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
1186 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
1187 int vdwjidx0A
,vdwjidx0B
;
1188 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
1189 int vdwjidx1A
,vdwjidx1B
;
1190 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
1191 int vdwjidx2A
,vdwjidx2B
;
1192 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
1193 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
1194 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
1195 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
1196 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
1197 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
1198 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
1199 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
1200 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
1201 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
1202 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
1205 __m128d rinvsix
,rvdw
,vvdw
,vvdw6
,vvdw12
,fvdw
,fvdw6
,fvdw12
,vvdwsum
,sh_vdw_invrcut6
;
1208 __m128d one_sixth
= _mm_set1_pd(1.0/6.0);
1209 __m128d one_twelfth
= _mm_set1_pd(1.0/12.0);
1210 __m128d rswitch
,swV3
,swV4
,swV5
,swF2
,swF3
,swF4
,d
,d2
,sw
,dsw
;
1211 real rswitch_scalar
,d_scalar
;
1212 __m128d dummy_mask
,cutoff_mask
;
1213 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1214 __m128d one
= _mm_set1_pd(1.0);
1215 __m128d two
= _mm_set1_pd(2.0);
1221 jindex
= nlist
->jindex
;
1223 shiftidx
= nlist
->shift
;
1225 shiftvec
= fr
->shift_vec
[0];
1226 fshift
= fr
->fshift
[0];
1227 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
1228 charge
= mdatoms
->chargeA
;
1229 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
1230 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
1231 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
1232 nvdwtype
= fr
->ntype
;
1233 vdwparam
= fr
->nbfp
;
1234 vdwtype
= mdatoms
->typeA
;
1236 /* Setup water-specific parameters */
1237 inr
= nlist
->iinr
[0];
1238 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
1239 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
1240 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
1241 vdwioffset0
= 2*nvdwtype
*vdwtype
[inr
+0];
1243 jq0
= _mm_set1_pd(charge
[inr
+0]);
1244 jq1
= _mm_set1_pd(charge
[inr
+1]);
1245 jq2
= _mm_set1_pd(charge
[inr
+2]);
1246 vdwjidx0A
= 2*vdwtype
[inr
+0];
1247 qq00
= _mm_mul_pd(iq0
,jq0
);
1248 c6_00
= _mm_set1_pd(vdwparam
[vdwioffset0
+vdwjidx0A
]);
1249 c12_00
= _mm_set1_pd(vdwparam
[vdwioffset0
+vdwjidx0A
+1]);
1250 qq01
= _mm_mul_pd(iq0
,jq1
);
1251 qq02
= _mm_mul_pd(iq0
,jq2
);
1252 qq10
= _mm_mul_pd(iq1
,jq0
);
1253 qq11
= _mm_mul_pd(iq1
,jq1
);
1254 qq12
= _mm_mul_pd(iq1
,jq2
);
1255 qq20
= _mm_mul_pd(iq2
,jq0
);
1256 qq21
= _mm_mul_pd(iq2
,jq1
);
1257 qq22
= _mm_mul_pd(iq2
,jq2
);
1259 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1260 rcutoff_scalar
= fr
->ic
->rcoulomb
;
1261 rcutoff
= _mm_set1_pd(rcutoff_scalar
);
1262 rcutoff2
= _mm_mul_pd(rcutoff
,rcutoff
);
1264 rswitch_scalar
= fr
->ic
->rvdw_switch
;
1265 rswitch
= _mm_set1_pd(rswitch_scalar
);
1266 /* Setup switch parameters */
1267 d_scalar
= rcutoff_scalar
-rswitch_scalar
;
1268 d
= _mm_set1_pd(d_scalar
);
1269 swV3
= _mm_set1_pd(-10.0/(d_scalar
*d_scalar
*d_scalar
));
1270 swV4
= _mm_set1_pd( 15.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
1271 swV5
= _mm_set1_pd( -6.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
1272 swF2
= _mm_set1_pd(-30.0/(d_scalar
*d_scalar
*d_scalar
));
1273 swF3
= _mm_set1_pd( 60.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
));
1274 swF4
= _mm_set1_pd(-30.0/(d_scalar
*d_scalar
*d_scalar
*d_scalar
*d_scalar
));
1276 /* Avoid stupid compiler warnings */
1278 j_coord_offsetA
= 0;
1279 j_coord_offsetB
= 0;
1284 /* Start outer loop over neighborlists */
1285 for(iidx
=0; iidx
<nri
; iidx
++)
1287 /* Load shift vector for this list */
1288 i_shift_offset
= DIM
*shiftidx
[iidx
];
1290 /* Load limits for loop over neighbors */
1291 j_index_start
= jindex
[iidx
];
1292 j_index_end
= jindex
[iidx
+1];
1294 /* Get outer coordinate index */
1296 i_coord_offset
= DIM
*inr
;
1298 /* Load i particle coords and add shift vector */
1299 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
1300 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
1302 fix0
= _mm_setzero_pd();
1303 fiy0
= _mm_setzero_pd();
1304 fiz0
= _mm_setzero_pd();
1305 fix1
= _mm_setzero_pd();
1306 fiy1
= _mm_setzero_pd();
1307 fiz1
= _mm_setzero_pd();
1308 fix2
= _mm_setzero_pd();
1309 fiy2
= _mm_setzero_pd();
1310 fiz2
= _mm_setzero_pd();
1312 /* Start inner kernel loop */
1313 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
1316 /* Get j neighbor index, and coordinate index */
1318 jnrB
= jjnr
[jidx
+1];
1319 j_coord_offsetA
= DIM
*jnrA
;
1320 j_coord_offsetB
= DIM
*jnrB
;
1322 /* load j atom coordinates */
1323 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
1324 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1326 /* Calculate displacement vector */
1327 dx00
= _mm_sub_pd(ix0
,jx0
);
1328 dy00
= _mm_sub_pd(iy0
,jy0
);
1329 dz00
= _mm_sub_pd(iz0
,jz0
);
1330 dx01
= _mm_sub_pd(ix0
,jx1
);
1331 dy01
= _mm_sub_pd(iy0
,jy1
);
1332 dz01
= _mm_sub_pd(iz0
,jz1
);
1333 dx02
= _mm_sub_pd(ix0
,jx2
);
1334 dy02
= _mm_sub_pd(iy0
,jy2
);
1335 dz02
= _mm_sub_pd(iz0
,jz2
);
1336 dx10
= _mm_sub_pd(ix1
,jx0
);
1337 dy10
= _mm_sub_pd(iy1
,jy0
);
1338 dz10
= _mm_sub_pd(iz1
,jz0
);
1339 dx11
= _mm_sub_pd(ix1
,jx1
);
1340 dy11
= _mm_sub_pd(iy1
,jy1
);
1341 dz11
= _mm_sub_pd(iz1
,jz1
);
1342 dx12
= _mm_sub_pd(ix1
,jx2
);
1343 dy12
= _mm_sub_pd(iy1
,jy2
);
1344 dz12
= _mm_sub_pd(iz1
,jz2
);
1345 dx20
= _mm_sub_pd(ix2
,jx0
);
1346 dy20
= _mm_sub_pd(iy2
,jy0
);
1347 dz20
= _mm_sub_pd(iz2
,jz0
);
1348 dx21
= _mm_sub_pd(ix2
,jx1
);
1349 dy21
= _mm_sub_pd(iy2
,jy1
);
1350 dz21
= _mm_sub_pd(iz2
,jz1
);
1351 dx22
= _mm_sub_pd(ix2
,jx2
);
1352 dy22
= _mm_sub_pd(iy2
,jy2
);
1353 dz22
= _mm_sub_pd(iz2
,jz2
);
1355 /* Calculate squared distance and things based on it */
1356 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1357 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1358 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1359 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1360 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1361 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1362 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1363 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1364 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1366 rinv00
= sse2_invsqrt_d(rsq00
);
1367 rinv01
= sse2_invsqrt_d(rsq01
);
1368 rinv02
= sse2_invsqrt_d(rsq02
);
1369 rinv10
= sse2_invsqrt_d(rsq10
);
1370 rinv11
= sse2_invsqrt_d(rsq11
);
1371 rinv12
= sse2_invsqrt_d(rsq12
);
1372 rinv20
= sse2_invsqrt_d(rsq20
);
1373 rinv21
= sse2_invsqrt_d(rsq21
);
1374 rinv22
= sse2_invsqrt_d(rsq22
);
1376 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1377 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1378 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1379 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1380 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1381 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1382 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1383 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1384 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1386 fjx0
= _mm_setzero_pd();
1387 fjy0
= _mm_setzero_pd();
1388 fjz0
= _mm_setzero_pd();
1389 fjx1
= _mm_setzero_pd();
1390 fjy1
= _mm_setzero_pd();
1391 fjz1
= _mm_setzero_pd();
1392 fjx2
= _mm_setzero_pd();
1393 fjy2
= _mm_setzero_pd();
1394 fjz2
= _mm_setzero_pd();
1396 /**************************
1397 * CALCULATE INTERACTIONS *
1398 **************************/
1400 if (gmx_mm_any_lt(rsq00
,rcutoff2
))
1403 r00
= _mm_mul_pd(rsq00
,rinv00
);
1405 /* REACTION-FIELD ELECTROSTATICS */
1406 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
1408 /* LENNARD-JONES DISPERSION/REPULSION */
1410 rinvsix
= _mm_mul_pd(_mm_mul_pd(rinvsq00
,rinvsq00
),rinvsq00
);
1411 vvdw6
= _mm_mul_pd(c6_00
,rinvsix
);
1412 vvdw12
= _mm_mul_pd(c12_00
,_mm_mul_pd(rinvsix
,rinvsix
));
1413 vvdw
= _mm_sub_pd( _mm_mul_pd(vvdw12
,one_twelfth
) , _mm_mul_pd(vvdw6
,one_sixth
) );
1414 fvdw
= _mm_mul_pd(_mm_sub_pd(vvdw12
,vvdw6
),rinvsq00
);
1416 d
= _mm_sub_pd(r00
,rswitch
);
1417 d
= _mm_max_pd(d
,_mm_setzero_pd());
1418 d2
= _mm_mul_pd(d
,d
);
1419 sw
= _mm_add_pd(one
,_mm_mul_pd(d2
,_mm_mul_pd(d
,_mm_add_pd(swV3
,_mm_mul_pd(d
,_mm_add_pd(swV4
,_mm_mul_pd(d
,swV5
)))))));
1421 dsw
= _mm_mul_pd(d2
,_mm_add_pd(swF2
,_mm_mul_pd(d
,_mm_add_pd(swF3
,_mm_mul_pd(d
,swF4
)))));
1423 /* Evaluate switch function */
1424 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1425 fvdw
= _mm_sub_pd( _mm_mul_pd(fvdw
,sw
) , _mm_mul_pd(rinv00
,_mm_mul_pd(vvdw
,dsw
)) );
1426 cutoff_mask
= _mm_cmplt_pd(rsq00
,rcutoff2
);
1428 fscal
= _mm_add_pd(felec
,fvdw
);
1430 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1432 /* Calculate temporary vectorial force */
1433 tx
= _mm_mul_pd(fscal
,dx00
);
1434 ty
= _mm_mul_pd(fscal
,dy00
);
1435 tz
= _mm_mul_pd(fscal
,dz00
);
1437 /* Update vectorial force */
1438 fix0
= _mm_add_pd(fix0
,tx
);
1439 fiy0
= _mm_add_pd(fiy0
,ty
);
1440 fiz0
= _mm_add_pd(fiz0
,tz
);
1442 fjx0
= _mm_add_pd(fjx0
,tx
);
1443 fjy0
= _mm_add_pd(fjy0
,ty
);
1444 fjz0
= _mm_add_pd(fjz0
,tz
);
1448 /**************************
1449 * CALCULATE INTERACTIONS *
1450 **************************/
1452 if (gmx_mm_any_lt(rsq01
,rcutoff2
))
1455 /* REACTION-FIELD ELECTROSTATICS */
1456 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
1458 cutoff_mask
= _mm_cmplt_pd(rsq01
,rcutoff2
);
1462 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1464 /* Calculate temporary vectorial force */
1465 tx
= _mm_mul_pd(fscal
,dx01
);
1466 ty
= _mm_mul_pd(fscal
,dy01
);
1467 tz
= _mm_mul_pd(fscal
,dz01
);
1469 /* Update vectorial force */
1470 fix0
= _mm_add_pd(fix0
,tx
);
1471 fiy0
= _mm_add_pd(fiy0
,ty
);
1472 fiz0
= _mm_add_pd(fiz0
,tz
);
1474 fjx1
= _mm_add_pd(fjx1
,tx
);
1475 fjy1
= _mm_add_pd(fjy1
,ty
);
1476 fjz1
= _mm_add_pd(fjz1
,tz
);
1480 /**************************
1481 * CALCULATE INTERACTIONS *
1482 **************************/
1484 if (gmx_mm_any_lt(rsq02
,rcutoff2
))
1487 /* REACTION-FIELD ELECTROSTATICS */
1488 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
1490 cutoff_mask
= _mm_cmplt_pd(rsq02
,rcutoff2
);
1494 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1496 /* Calculate temporary vectorial force */
1497 tx
= _mm_mul_pd(fscal
,dx02
);
1498 ty
= _mm_mul_pd(fscal
,dy02
);
1499 tz
= _mm_mul_pd(fscal
,dz02
);
1501 /* Update vectorial force */
1502 fix0
= _mm_add_pd(fix0
,tx
);
1503 fiy0
= _mm_add_pd(fiy0
,ty
);
1504 fiz0
= _mm_add_pd(fiz0
,tz
);
1506 fjx2
= _mm_add_pd(fjx2
,tx
);
1507 fjy2
= _mm_add_pd(fjy2
,ty
);
1508 fjz2
= _mm_add_pd(fjz2
,tz
);
1512 /**************************
1513 * CALCULATE INTERACTIONS *
1514 **************************/
1516 if (gmx_mm_any_lt(rsq10
,rcutoff2
))
1519 /* REACTION-FIELD ELECTROSTATICS */
1520 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
1522 cutoff_mask
= _mm_cmplt_pd(rsq10
,rcutoff2
);
1526 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1528 /* Calculate temporary vectorial force */
1529 tx
= _mm_mul_pd(fscal
,dx10
);
1530 ty
= _mm_mul_pd(fscal
,dy10
);
1531 tz
= _mm_mul_pd(fscal
,dz10
);
1533 /* Update vectorial force */
1534 fix1
= _mm_add_pd(fix1
,tx
);
1535 fiy1
= _mm_add_pd(fiy1
,ty
);
1536 fiz1
= _mm_add_pd(fiz1
,tz
);
1538 fjx0
= _mm_add_pd(fjx0
,tx
);
1539 fjy0
= _mm_add_pd(fjy0
,ty
);
1540 fjz0
= _mm_add_pd(fjz0
,tz
);
1544 /**************************
1545 * CALCULATE INTERACTIONS *
1546 **************************/
1548 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
1551 /* REACTION-FIELD ELECTROSTATICS */
1552 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1554 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
1558 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1560 /* Calculate temporary vectorial force */
1561 tx
= _mm_mul_pd(fscal
,dx11
);
1562 ty
= _mm_mul_pd(fscal
,dy11
);
1563 tz
= _mm_mul_pd(fscal
,dz11
);
1565 /* Update vectorial force */
1566 fix1
= _mm_add_pd(fix1
,tx
);
1567 fiy1
= _mm_add_pd(fiy1
,ty
);
1568 fiz1
= _mm_add_pd(fiz1
,tz
);
1570 fjx1
= _mm_add_pd(fjx1
,tx
);
1571 fjy1
= _mm_add_pd(fjy1
,ty
);
1572 fjz1
= _mm_add_pd(fjz1
,tz
);
1576 /**************************
1577 * CALCULATE INTERACTIONS *
1578 **************************/
1580 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
1583 /* REACTION-FIELD ELECTROSTATICS */
1584 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1586 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
1590 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1592 /* Calculate temporary vectorial force */
1593 tx
= _mm_mul_pd(fscal
,dx12
);
1594 ty
= _mm_mul_pd(fscal
,dy12
);
1595 tz
= _mm_mul_pd(fscal
,dz12
);
1597 /* Update vectorial force */
1598 fix1
= _mm_add_pd(fix1
,tx
);
1599 fiy1
= _mm_add_pd(fiy1
,ty
);
1600 fiz1
= _mm_add_pd(fiz1
,tz
);
1602 fjx2
= _mm_add_pd(fjx2
,tx
);
1603 fjy2
= _mm_add_pd(fjy2
,ty
);
1604 fjz2
= _mm_add_pd(fjz2
,tz
);
1608 /**************************
1609 * CALCULATE INTERACTIONS *
1610 **************************/
1612 if (gmx_mm_any_lt(rsq20
,rcutoff2
))
1615 /* REACTION-FIELD ELECTROSTATICS */
1616 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
1618 cutoff_mask
= _mm_cmplt_pd(rsq20
,rcutoff2
);
1622 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1624 /* Calculate temporary vectorial force */
1625 tx
= _mm_mul_pd(fscal
,dx20
);
1626 ty
= _mm_mul_pd(fscal
,dy20
);
1627 tz
= _mm_mul_pd(fscal
,dz20
);
1629 /* Update vectorial force */
1630 fix2
= _mm_add_pd(fix2
,tx
);
1631 fiy2
= _mm_add_pd(fiy2
,ty
);
1632 fiz2
= _mm_add_pd(fiz2
,tz
);
1634 fjx0
= _mm_add_pd(fjx0
,tx
);
1635 fjy0
= _mm_add_pd(fjy0
,ty
);
1636 fjz0
= _mm_add_pd(fjz0
,tz
);
1640 /**************************
1641 * CALCULATE INTERACTIONS *
1642 **************************/
1644 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
1647 /* REACTION-FIELD ELECTROSTATICS */
1648 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1650 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
1654 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1656 /* Calculate temporary vectorial force */
1657 tx
= _mm_mul_pd(fscal
,dx21
);
1658 ty
= _mm_mul_pd(fscal
,dy21
);
1659 tz
= _mm_mul_pd(fscal
,dz21
);
1661 /* Update vectorial force */
1662 fix2
= _mm_add_pd(fix2
,tx
);
1663 fiy2
= _mm_add_pd(fiy2
,ty
);
1664 fiz2
= _mm_add_pd(fiz2
,tz
);
1666 fjx1
= _mm_add_pd(fjx1
,tx
);
1667 fjy1
= _mm_add_pd(fjy1
,ty
);
1668 fjz1
= _mm_add_pd(fjz1
,tz
);
1672 /**************************
1673 * CALCULATE INTERACTIONS *
1674 **************************/
1676 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
1679 /* REACTION-FIELD ELECTROSTATICS */
1680 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1682 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
1686 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1688 /* Calculate temporary vectorial force */
1689 tx
= _mm_mul_pd(fscal
,dx22
);
1690 ty
= _mm_mul_pd(fscal
,dy22
);
1691 tz
= _mm_mul_pd(fscal
,dz22
);
1693 /* Update vectorial force */
1694 fix2
= _mm_add_pd(fix2
,tx
);
1695 fiy2
= _mm_add_pd(fiy2
,ty
);
1696 fiz2
= _mm_add_pd(fiz2
,tz
);
1698 fjx2
= _mm_add_pd(fjx2
,tx
);
1699 fjy2
= _mm_add_pd(fjy2
,ty
);
1700 fjz2
= _mm_add_pd(fjz2
,tz
);
1704 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1706 /* Inner loop uses 301 flops */
1709 if(jidx
<j_index_end
)
1713 j_coord_offsetA
= DIM
*jnrA
;
1715 /* load j atom coordinates */
1716 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
1717 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1719 /* Calculate displacement vector */
1720 dx00
= _mm_sub_pd(ix0
,jx0
);
1721 dy00
= _mm_sub_pd(iy0
,jy0
);
1722 dz00
= _mm_sub_pd(iz0
,jz0
);
1723 dx01
= _mm_sub_pd(ix0
,jx1
);
1724 dy01
= _mm_sub_pd(iy0
,jy1
);
1725 dz01
= _mm_sub_pd(iz0
,jz1
);
1726 dx02
= _mm_sub_pd(ix0
,jx2
);
1727 dy02
= _mm_sub_pd(iy0
,jy2
);
1728 dz02
= _mm_sub_pd(iz0
,jz2
);
1729 dx10
= _mm_sub_pd(ix1
,jx0
);
1730 dy10
= _mm_sub_pd(iy1
,jy0
);
1731 dz10
= _mm_sub_pd(iz1
,jz0
);
1732 dx11
= _mm_sub_pd(ix1
,jx1
);
1733 dy11
= _mm_sub_pd(iy1
,jy1
);
1734 dz11
= _mm_sub_pd(iz1
,jz1
);
1735 dx12
= _mm_sub_pd(ix1
,jx2
);
1736 dy12
= _mm_sub_pd(iy1
,jy2
);
1737 dz12
= _mm_sub_pd(iz1
,jz2
);
1738 dx20
= _mm_sub_pd(ix2
,jx0
);
1739 dy20
= _mm_sub_pd(iy2
,jy0
);
1740 dz20
= _mm_sub_pd(iz2
,jz0
);
1741 dx21
= _mm_sub_pd(ix2
,jx1
);
1742 dy21
= _mm_sub_pd(iy2
,jy1
);
1743 dz21
= _mm_sub_pd(iz2
,jz1
);
1744 dx22
= _mm_sub_pd(ix2
,jx2
);
1745 dy22
= _mm_sub_pd(iy2
,jy2
);
1746 dz22
= _mm_sub_pd(iz2
,jz2
);
1748 /* Calculate squared distance and things based on it */
1749 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1750 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1751 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1752 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1753 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1754 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1755 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1756 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1757 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1759 rinv00
= sse2_invsqrt_d(rsq00
);
1760 rinv01
= sse2_invsqrt_d(rsq01
);
1761 rinv02
= sse2_invsqrt_d(rsq02
);
1762 rinv10
= sse2_invsqrt_d(rsq10
);
1763 rinv11
= sse2_invsqrt_d(rsq11
);
1764 rinv12
= sse2_invsqrt_d(rsq12
);
1765 rinv20
= sse2_invsqrt_d(rsq20
);
1766 rinv21
= sse2_invsqrt_d(rsq21
);
1767 rinv22
= sse2_invsqrt_d(rsq22
);
1769 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1770 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1771 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1772 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1773 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1774 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1775 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1776 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1777 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1779 fjx0
= _mm_setzero_pd();
1780 fjy0
= _mm_setzero_pd();
1781 fjz0
= _mm_setzero_pd();
1782 fjx1
= _mm_setzero_pd();
1783 fjy1
= _mm_setzero_pd();
1784 fjz1
= _mm_setzero_pd();
1785 fjx2
= _mm_setzero_pd();
1786 fjy2
= _mm_setzero_pd();
1787 fjz2
= _mm_setzero_pd();
1789 /**************************
1790 * CALCULATE INTERACTIONS *
1791 **************************/
1793 if (gmx_mm_any_lt(rsq00
,rcutoff2
))
1796 r00
= _mm_mul_pd(rsq00
,rinv00
);
1798 /* REACTION-FIELD ELECTROSTATICS */
1799 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
1801 /* LENNARD-JONES DISPERSION/REPULSION */
1803 rinvsix
= _mm_mul_pd(_mm_mul_pd(rinvsq00
,rinvsq00
),rinvsq00
);
1804 vvdw6
= _mm_mul_pd(c6_00
,rinvsix
);
1805 vvdw12
= _mm_mul_pd(c12_00
,_mm_mul_pd(rinvsix
,rinvsix
));
1806 vvdw
= _mm_sub_pd( _mm_mul_pd(vvdw12
,one_twelfth
) , _mm_mul_pd(vvdw6
,one_sixth
) );
1807 fvdw
= _mm_mul_pd(_mm_sub_pd(vvdw12
,vvdw6
),rinvsq00
);
1809 d
= _mm_sub_pd(r00
,rswitch
);
1810 d
= _mm_max_pd(d
,_mm_setzero_pd());
1811 d2
= _mm_mul_pd(d
,d
);
1812 sw
= _mm_add_pd(one
,_mm_mul_pd(d2
,_mm_mul_pd(d
,_mm_add_pd(swV3
,_mm_mul_pd(d
,_mm_add_pd(swV4
,_mm_mul_pd(d
,swV5
)))))));
1814 dsw
= _mm_mul_pd(d2
,_mm_add_pd(swF2
,_mm_mul_pd(d
,_mm_add_pd(swF3
,_mm_mul_pd(d
,swF4
)))));
1816 /* Evaluate switch function */
1817 /* fscal'=f'/r=-(v*sw)'/r=-(v'*sw+v*dsw)/r=-v'*sw/r-v*dsw/r=fscal*sw-v*dsw/r */
1818 fvdw
= _mm_sub_pd( _mm_mul_pd(fvdw
,sw
) , _mm_mul_pd(rinv00
,_mm_mul_pd(vvdw
,dsw
)) );
1819 cutoff_mask
= _mm_cmplt_pd(rsq00
,rcutoff2
);
1821 fscal
= _mm_add_pd(felec
,fvdw
);
1823 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1825 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1827 /* Calculate temporary vectorial force */
1828 tx
= _mm_mul_pd(fscal
,dx00
);
1829 ty
= _mm_mul_pd(fscal
,dy00
);
1830 tz
= _mm_mul_pd(fscal
,dz00
);
1832 /* Update vectorial force */
1833 fix0
= _mm_add_pd(fix0
,tx
);
1834 fiy0
= _mm_add_pd(fiy0
,ty
);
1835 fiz0
= _mm_add_pd(fiz0
,tz
);
1837 fjx0
= _mm_add_pd(fjx0
,tx
);
1838 fjy0
= _mm_add_pd(fjy0
,ty
);
1839 fjz0
= _mm_add_pd(fjz0
,tz
);
1843 /**************************
1844 * CALCULATE INTERACTIONS *
1845 **************************/
1847 if (gmx_mm_any_lt(rsq01
,rcutoff2
))
1850 /* REACTION-FIELD ELECTROSTATICS */
1851 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
1853 cutoff_mask
= _mm_cmplt_pd(rsq01
,rcutoff2
);
1857 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1859 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1861 /* Calculate temporary vectorial force */
1862 tx
= _mm_mul_pd(fscal
,dx01
);
1863 ty
= _mm_mul_pd(fscal
,dy01
);
1864 tz
= _mm_mul_pd(fscal
,dz01
);
1866 /* Update vectorial force */
1867 fix0
= _mm_add_pd(fix0
,tx
);
1868 fiy0
= _mm_add_pd(fiy0
,ty
);
1869 fiz0
= _mm_add_pd(fiz0
,tz
);
1871 fjx1
= _mm_add_pd(fjx1
,tx
);
1872 fjy1
= _mm_add_pd(fjy1
,ty
);
1873 fjz1
= _mm_add_pd(fjz1
,tz
);
1877 /**************************
1878 * CALCULATE INTERACTIONS *
1879 **************************/
1881 if (gmx_mm_any_lt(rsq02
,rcutoff2
))
1884 /* REACTION-FIELD ELECTROSTATICS */
1885 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
1887 cutoff_mask
= _mm_cmplt_pd(rsq02
,rcutoff2
);
1891 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1893 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1895 /* Calculate temporary vectorial force */
1896 tx
= _mm_mul_pd(fscal
,dx02
);
1897 ty
= _mm_mul_pd(fscal
,dy02
);
1898 tz
= _mm_mul_pd(fscal
,dz02
);
1900 /* Update vectorial force */
1901 fix0
= _mm_add_pd(fix0
,tx
);
1902 fiy0
= _mm_add_pd(fiy0
,ty
);
1903 fiz0
= _mm_add_pd(fiz0
,tz
);
1905 fjx2
= _mm_add_pd(fjx2
,tx
);
1906 fjy2
= _mm_add_pd(fjy2
,ty
);
1907 fjz2
= _mm_add_pd(fjz2
,tz
);
1911 /**************************
1912 * CALCULATE INTERACTIONS *
1913 **************************/
1915 if (gmx_mm_any_lt(rsq10
,rcutoff2
))
1918 /* REACTION-FIELD ELECTROSTATICS */
1919 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
1921 cutoff_mask
= _mm_cmplt_pd(rsq10
,rcutoff2
);
1925 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1927 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1929 /* Calculate temporary vectorial force */
1930 tx
= _mm_mul_pd(fscal
,dx10
);
1931 ty
= _mm_mul_pd(fscal
,dy10
);
1932 tz
= _mm_mul_pd(fscal
,dz10
);
1934 /* Update vectorial force */
1935 fix1
= _mm_add_pd(fix1
,tx
);
1936 fiy1
= _mm_add_pd(fiy1
,ty
);
1937 fiz1
= _mm_add_pd(fiz1
,tz
);
1939 fjx0
= _mm_add_pd(fjx0
,tx
);
1940 fjy0
= _mm_add_pd(fjy0
,ty
);
1941 fjz0
= _mm_add_pd(fjz0
,tz
);
1945 /**************************
1946 * CALCULATE INTERACTIONS *
1947 **************************/
1949 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
1952 /* REACTION-FIELD ELECTROSTATICS */
1953 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1955 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
1959 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1961 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1963 /* Calculate temporary vectorial force */
1964 tx
= _mm_mul_pd(fscal
,dx11
);
1965 ty
= _mm_mul_pd(fscal
,dy11
);
1966 tz
= _mm_mul_pd(fscal
,dz11
);
1968 /* Update vectorial force */
1969 fix1
= _mm_add_pd(fix1
,tx
);
1970 fiy1
= _mm_add_pd(fiy1
,ty
);
1971 fiz1
= _mm_add_pd(fiz1
,tz
);
1973 fjx1
= _mm_add_pd(fjx1
,tx
);
1974 fjy1
= _mm_add_pd(fjy1
,ty
);
1975 fjz1
= _mm_add_pd(fjz1
,tz
);
1979 /**************************
1980 * CALCULATE INTERACTIONS *
1981 **************************/
1983 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
1986 /* REACTION-FIELD ELECTROSTATICS */
1987 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1989 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
1993 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1995 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1997 /* Calculate temporary vectorial force */
1998 tx
= _mm_mul_pd(fscal
,dx12
);
1999 ty
= _mm_mul_pd(fscal
,dy12
);
2000 tz
= _mm_mul_pd(fscal
,dz12
);
2002 /* Update vectorial force */
2003 fix1
= _mm_add_pd(fix1
,tx
);
2004 fiy1
= _mm_add_pd(fiy1
,ty
);
2005 fiz1
= _mm_add_pd(fiz1
,tz
);
2007 fjx2
= _mm_add_pd(fjx2
,tx
);
2008 fjy2
= _mm_add_pd(fjy2
,ty
);
2009 fjz2
= _mm_add_pd(fjz2
,tz
);
2013 /**************************
2014 * CALCULATE INTERACTIONS *
2015 **************************/
2017 if (gmx_mm_any_lt(rsq20
,rcutoff2
))
2020 /* REACTION-FIELD ELECTROSTATICS */
2021 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
2023 cutoff_mask
= _mm_cmplt_pd(rsq20
,rcutoff2
);
2027 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
2029 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
2031 /* Calculate temporary vectorial force */
2032 tx
= _mm_mul_pd(fscal
,dx20
);
2033 ty
= _mm_mul_pd(fscal
,dy20
);
2034 tz
= _mm_mul_pd(fscal
,dz20
);
2036 /* Update vectorial force */
2037 fix2
= _mm_add_pd(fix2
,tx
);
2038 fiy2
= _mm_add_pd(fiy2
,ty
);
2039 fiz2
= _mm_add_pd(fiz2
,tz
);
2041 fjx0
= _mm_add_pd(fjx0
,tx
);
2042 fjy0
= _mm_add_pd(fjy0
,ty
);
2043 fjz0
= _mm_add_pd(fjz0
,tz
);
2047 /**************************
2048 * CALCULATE INTERACTIONS *
2049 **************************/
2051 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
2054 /* REACTION-FIELD ELECTROSTATICS */
2055 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
2057 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
2061 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
2063 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
2065 /* Calculate temporary vectorial force */
2066 tx
= _mm_mul_pd(fscal
,dx21
);
2067 ty
= _mm_mul_pd(fscal
,dy21
);
2068 tz
= _mm_mul_pd(fscal
,dz21
);
2070 /* Update vectorial force */
2071 fix2
= _mm_add_pd(fix2
,tx
);
2072 fiy2
= _mm_add_pd(fiy2
,ty
);
2073 fiz2
= _mm_add_pd(fiz2
,tz
);
2075 fjx1
= _mm_add_pd(fjx1
,tx
);
2076 fjy1
= _mm_add_pd(fjy1
,ty
);
2077 fjz1
= _mm_add_pd(fjz1
,tz
);
2081 /**************************
2082 * CALCULATE INTERACTIONS *
2083 **************************/
2085 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
2088 /* REACTION-FIELD ELECTROSTATICS */
2089 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
2091 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
2095 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
2097 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
2099 /* Calculate temporary vectorial force */
2100 tx
= _mm_mul_pd(fscal
,dx22
);
2101 ty
= _mm_mul_pd(fscal
,dy22
);
2102 tz
= _mm_mul_pd(fscal
,dz22
);
2104 /* Update vectorial force */
2105 fix2
= _mm_add_pd(fix2
,tx
);
2106 fiy2
= _mm_add_pd(fiy2
,ty
);
2107 fiz2
= _mm_add_pd(fiz2
,tz
);
2109 fjx2
= _mm_add_pd(fjx2
,tx
);
2110 fjy2
= _mm_add_pd(fjy2
,ty
);
2111 fjz2
= _mm_add_pd(fjz2
,tz
);
2115 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
2117 /* Inner loop uses 301 flops */
2120 /* End of innermost loop */
2122 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
2123 f
+i_coord_offset
,fshift
+i_shift_offset
);
2125 /* Increment number of inner iterations */
2126 inneriter
+= j_index_end
- j_index_start
;
2128 /* Outer loop uses 18 flops */
2131 /* Increment number of outer iterations */
2134 /* Update outer/inner flops */
2136 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_VDW_W3W3_F
,outeriter
*18 + inneriter
*301);