2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sse2_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/gmxlib/nrnb.h"
47 #include "kernelutil_x86_sse2_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: None
53 * Geometry: Water4-Water4
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sse2_double
58 (t_nblist
* gmx_restrict nlist
,
59 rvec
* gmx_restrict xx
,
60 rvec
* gmx_restrict ff
,
61 struct t_forcerec
* gmx_restrict fr
,
62 t_mdatoms
* gmx_restrict mdatoms
,
63 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
64 t_nrnb
* gmx_restrict nrnb
)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
72 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
74 int j_coord_offsetA
,j_coord_offsetB
;
75 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
77 real
*shiftvec
,*fshift
,*x
,*f
;
78 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
80 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
82 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
84 __m128d ix3
,iy3
,iz3
,fix3
,fiy3
,fiz3
,iq3
,isai3
;
85 int vdwjidx1A
,vdwjidx1B
;
86 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
87 int vdwjidx2A
,vdwjidx2B
;
88 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
89 int vdwjidx3A
,vdwjidx3B
;
90 __m128d jx3
,jy3
,jz3
,fjx3
,fjy3
,fjz3
,jq3
,isaj3
;
91 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
92 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
93 __m128d dx13
,dy13
,dz13
,rsq13
,rinv13
,rinvsq13
,r13
,qq13
,c6_13
,c12_13
;
94 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
95 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
96 __m128d dx23
,dy23
,dz23
,rsq23
,rinv23
,rinvsq23
,r23
,qq23
,c6_23
,c12_23
;
97 __m128d dx31
,dy31
,dz31
,rsq31
,rinv31
,rinvsq31
,r31
,qq31
,c6_31
,c12_31
;
98 __m128d dx32
,dy32
,dz32
,rsq32
,rinv32
,rinvsq32
,r32
,qq32
,c6_32
,c12_32
;
99 __m128d dx33
,dy33
,dz33
,rsq33
,rinv33
,rinvsq33
,r33
,qq33
,c6_33
,c12_33
;
100 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
102 __m128d dummy_mask
,cutoff_mask
;
103 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
104 __m128d one
= _mm_set1_pd(1.0);
105 __m128d two
= _mm_set1_pd(2.0);
111 jindex
= nlist
->jindex
;
113 shiftidx
= nlist
->shift
;
115 shiftvec
= fr
->shift_vec
[0];
116 fshift
= fr
->fshift
[0];
117 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
118 charge
= mdatoms
->chargeA
;
119 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
120 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
121 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
123 /* Setup water-specific parameters */
124 inr
= nlist
->iinr
[0];
125 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
126 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
127 iq3
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+3]));
129 jq1
= _mm_set1_pd(charge
[inr
+1]);
130 jq2
= _mm_set1_pd(charge
[inr
+2]);
131 jq3
= _mm_set1_pd(charge
[inr
+3]);
132 qq11
= _mm_mul_pd(iq1
,jq1
);
133 qq12
= _mm_mul_pd(iq1
,jq2
);
134 qq13
= _mm_mul_pd(iq1
,jq3
);
135 qq21
= _mm_mul_pd(iq2
,jq1
);
136 qq22
= _mm_mul_pd(iq2
,jq2
);
137 qq23
= _mm_mul_pd(iq2
,jq3
);
138 qq31
= _mm_mul_pd(iq3
,jq1
);
139 qq32
= _mm_mul_pd(iq3
,jq2
);
140 qq33
= _mm_mul_pd(iq3
,jq3
);
142 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
143 rcutoff_scalar
= fr
->ic
->rcoulomb
;
144 rcutoff
= _mm_set1_pd(rcutoff_scalar
);
145 rcutoff2
= _mm_mul_pd(rcutoff
,rcutoff
);
147 /* Avoid stupid compiler warnings */
155 /* Start outer loop over neighborlists */
156 for(iidx
=0; iidx
<nri
; iidx
++)
158 /* Load shift vector for this list */
159 i_shift_offset
= DIM
*shiftidx
[iidx
];
161 /* Load limits for loop over neighbors */
162 j_index_start
= jindex
[iidx
];
163 j_index_end
= jindex
[iidx
+1];
165 /* Get outer coordinate index */
167 i_coord_offset
= DIM
*inr
;
169 /* Load i particle coords and add shift vector */
170 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
+DIM
,
171 &ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
173 fix1
= _mm_setzero_pd();
174 fiy1
= _mm_setzero_pd();
175 fiz1
= _mm_setzero_pd();
176 fix2
= _mm_setzero_pd();
177 fiy2
= _mm_setzero_pd();
178 fiz2
= _mm_setzero_pd();
179 fix3
= _mm_setzero_pd();
180 fiy3
= _mm_setzero_pd();
181 fiz3
= _mm_setzero_pd();
183 /* Reset potential sums */
184 velecsum
= _mm_setzero_pd();
186 /* Start inner kernel loop */
187 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
190 /* Get j neighbor index, and coordinate index */
193 j_coord_offsetA
= DIM
*jnrA
;
194 j_coord_offsetB
= DIM
*jnrB
;
196 /* load j atom coordinates */
197 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
+DIM
,x
+j_coord_offsetB
+DIM
,
198 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
200 /* Calculate displacement vector */
201 dx11
= _mm_sub_pd(ix1
,jx1
);
202 dy11
= _mm_sub_pd(iy1
,jy1
);
203 dz11
= _mm_sub_pd(iz1
,jz1
);
204 dx12
= _mm_sub_pd(ix1
,jx2
);
205 dy12
= _mm_sub_pd(iy1
,jy2
);
206 dz12
= _mm_sub_pd(iz1
,jz2
);
207 dx13
= _mm_sub_pd(ix1
,jx3
);
208 dy13
= _mm_sub_pd(iy1
,jy3
);
209 dz13
= _mm_sub_pd(iz1
,jz3
);
210 dx21
= _mm_sub_pd(ix2
,jx1
);
211 dy21
= _mm_sub_pd(iy2
,jy1
);
212 dz21
= _mm_sub_pd(iz2
,jz1
);
213 dx22
= _mm_sub_pd(ix2
,jx2
);
214 dy22
= _mm_sub_pd(iy2
,jy2
);
215 dz22
= _mm_sub_pd(iz2
,jz2
);
216 dx23
= _mm_sub_pd(ix2
,jx3
);
217 dy23
= _mm_sub_pd(iy2
,jy3
);
218 dz23
= _mm_sub_pd(iz2
,jz3
);
219 dx31
= _mm_sub_pd(ix3
,jx1
);
220 dy31
= _mm_sub_pd(iy3
,jy1
);
221 dz31
= _mm_sub_pd(iz3
,jz1
);
222 dx32
= _mm_sub_pd(ix3
,jx2
);
223 dy32
= _mm_sub_pd(iy3
,jy2
);
224 dz32
= _mm_sub_pd(iz3
,jz2
);
225 dx33
= _mm_sub_pd(ix3
,jx3
);
226 dy33
= _mm_sub_pd(iy3
,jy3
);
227 dz33
= _mm_sub_pd(iz3
,jz3
);
229 /* Calculate squared distance and things based on it */
230 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
231 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
232 rsq13
= gmx_mm_calc_rsq_pd(dx13
,dy13
,dz13
);
233 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
234 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
235 rsq23
= gmx_mm_calc_rsq_pd(dx23
,dy23
,dz23
);
236 rsq31
= gmx_mm_calc_rsq_pd(dx31
,dy31
,dz31
);
237 rsq32
= gmx_mm_calc_rsq_pd(dx32
,dy32
,dz32
);
238 rsq33
= gmx_mm_calc_rsq_pd(dx33
,dy33
,dz33
);
240 rinv11
= sse2_invsqrt_d(rsq11
);
241 rinv12
= sse2_invsqrt_d(rsq12
);
242 rinv13
= sse2_invsqrt_d(rsq13
);
243 rinv21
= sse2_invsqrt_d(rsq21
);
244 rinv22
= sse2_invsqrt_d(rsq22
);
245 rinv23
= sse2_invsqrt_d(rsq23
);
246 rinv31
= sse2_invsqrt_d(rsq31
);
247 rinv32
= sse2_invsqrt_d(rsq32
);
248 rinv33
= sse2_invsqrt_d(rsq33
);
250 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
251 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
252 rinvsq13
= _mm_mul_pd(rinv13
,rinv13
);
253 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
254 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
255 rinvsq23
= _mm_mul_pd(rinv23
,rinv23
);
256 rinvsq31
= _mm_mul_pd(rinv31
,rinv31
);
257 rinvsq32
= _mm_mul_pd(rinv32
,rinv32
);
258 rinvsq33
= _mm_mul_pd(rinv33
,rinv33
);
260 fjx1
= _mm_setzero_pd();
261 fjy1
= _mm_setzero_pd();
262 fjz1
= _mm_setzero_pd();
263 fjx2
= _mm_setzero_pd();
264 fjy2
= _mm_setzero_pd();
265 fjz2
= _mm_setzero_pd();
266 fjx3
= _mm_setzero_pd();
267 fjy3
= _mm_setzero_pd();
268 fjz3
= _mm_setzero_pd();
270 /**************************
271 * CALCULATE INTERACTIONS *
272 **************************/
274 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
277 /* REACTION-FIELD ELECTROSTATICS */
278 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
279 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
281 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
283 /* Update potential sum for this i atom from the interaction with this j atom. */
284 velec
= _mm_and_pd(velec
,cutoff_mask
);
285 velecsum
= _mm_add_pd(velecsum
,velec
);
289 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
291 /* Calculate temporary vectorial force */
292 tx
= _mm_mul_pd(fscal
,dx11
);
293 ty
= _mm_mul_pd(fscal
,dy11
);
294 tz
= _mm_mul_pd(fscal
,dz11
);
296 /* Update vectorial force */
297 fix1
= _mm_add_pd(fix1
,tx
);
298 fiy1
= _mm_add_pd(fiy1
,ty
);
299 fiz1
= _mm_add_pd(fiz1
,tz
);
301 fjx1
= _mm_add_pd(fjx1
,tx
);
302 fjy1
= _mm_add_pd(fjy1
,ty
);
303 fjz1
= _mm_add_pd(fjz1
,tz
);
307 /**************************
308 * CALCULATE INTERACTIONS *
309 **************************/
311 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
314 /* REACTION-FIELD ELECTROSTATICS */
315 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
316 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
318 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
320 /* Update potential sum for this i atom from the interaction with this j atom. */
321 velec
= _mm_and_pd(velec
,cutoff_mask
);
322 velecsum
= _mm_add_pd(velecsum
,velec
);
326 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
328 /* Calculate temporary vectorial force */
329 tx
= _mm_mul_pd(fscal
,dx12
);
330 ty
= _mm_mul_pd(fscal
,dy12
);
331 tz
= _mm_mul_pd(fscal
,dz12
);
333 /* Update vectorial force */
334 fix1
= _mm_add_pd(fix1
,tx
);
335 fiy1
= _mm_add_pd(fiy1
,ty
);
336 fiz1
= _mm_add_pd(fiz1
,tz
);
338 fjx2
= _mm_add_pd(fjx2
,tx
);
339 fjy2
= _mm_add_pd(fjy2
,ty
);
340 fjz2
= _mm_add_pd(fjz2
,tz
);
344 /**************************
345 * CALCULATE INTERACTIONS *
346 **************************/
348 if (gmx_mm_any_lt(rsq13
,rcutoff2
))
351 /* REACTION-FIELD ELECTROSTATICS */
352 velec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_add_pd(rinv13
,_mm_mul_pd(krf
,rsq13
)),crf
));
353 felec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_mul_pd(rinv13
,rinvsq13
),krf2
));
355 cutoff_mask
= _mm_cmplt_pd(rsq13
,rcutoff2
);
357 /* Update potential sum for this i atom from the interaction with this j atom. */
358 velec
= _mm_and_pd(velec
,cutoff_mask
);
359 velecsum
= _mm_add_pd(velecsum
,velec
);
363 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
365 /* Calculate temporary vectorial force */
366 tx
= _mm_mul_pd(fscal
,dx13
);
367 ty
= _mm_mul_pd(fscal
,dy13
);
368 tz
= _mm_mul_pd(fscal
,dz13
);
370 /* Update vectorial force */
371 fix1
= _mm_add_pd(fix1
,tx
);
372 fiy1
= _mm_add_pd(fiy1
,ty
);
373 fiz1
= _mm_add_pd(fiz1
,tz
);
375 fjx3
= _mm_add_pd(fjx3
,tx
);
376 fjy3
= _mm_add_pd(fjy3
,ty
);
377 fjz3
= _mm_add_pd(fjz3
,tz
);
381 /**************************
382 * CALCULATE INTERACTIONS *
383 **************************/
385 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
388 /* REACTION-FIELD ELECTROSTATICS */
389 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
390 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
392 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
394 /* Update potential sum for this i atom from the interaction with this j atom. */
395 velec
= _mm_and_pd(velec
,cutoff_mask
);
396 velecsum
= _mm_add_pd(velecsum
,velec
);
400 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
402 /* Calculate temporary vectorial force */
403 tx
= _mm_mul_pd(fscal
,dx21
);
404 ty
= _mm_mul_pd(fscal
,dy21
);
405 tz
= _mm_mul_pd(fscal
,dz21
);
407 /* Update vectorial force */
408 fix2
= _mm_add_pd(fix2
,tx
);
409 fiy2
= _mm_add_pd(fiy2
,ty
);
410 fiz2
= _mm_add_pd(fiz2
,tz
);
412 fjx1
= _mm_add_pd(fjx1
,tx
);
413 fjy1
= _mm_add_pd(fjy1
,ty
);
414 fjz1
= _mm_add_pd(fjz1
,tz
);
418 /**************************
419 * CALCULATE INTERACTIONS *
420 **************************/
422 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
425 /* REACTION-FIELD ELECTROSTATICS */
426 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
427 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
429 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
431 /* Update potential sum for this i atom from the interaction with this j atom. */
432 velec
= _mm_and_pd(velec
,cutoff_mask
);
433 velecsum
= _mm_add_pd(velecsum
,velec
);
437 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
439 /* Calculate temporary vectorial force */
440 tx
= _mm_mul_pd(fscal
,dx22
);
441 ty
= _mm_mul_pd(fscal
,dy22
);
442 tz
= _mm_mul_pd(fscal
,dz22
);
444 /* Update vectorial force */
445 fix2
= _mm_add_pd(fix2
,tx
);
446 fiy2
= _mm_add_pd(fiy2
,ty
);
447 fiz2
= _mm_add_pd(fiz2
,tz
);
449 fjx2
= _mm_add_pd(fjx2
,tx
);
450 fjy2
= _mm_add_pd(fjy2
,ty
);
451 fjz2
= _mm_add_pd(fjz2
,tz
);
455 /**************************
456 * CALCULATE INTERACTIONS *
457 **************************/
459 if (gmx_mm_any_lt(rsq23
,rcutoff2
))
462 /* REACTION-FIELD ELECTROSTATICS */
463 velec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_add_pd(rinv23
,_mm_mul_pd(krf
,rsq23
)),crf
));
464 felec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_mul_pd(rinv23
,rinvsq23
),krf2
));
466 cutoff_mask
= _mm_cmplt_pd(rsq23
,rcutoff2
);
468 /* Update potential sum for this i atom from the interaction with this j atom. */
469 velec
= _mm_and_pd(velec
,cutoff_mask
);
470 velecsum
= _mm_add_pd(velecsum
,velec
);
474 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
476 /* Calculate temporary vectorial force */
477 tx
= _mm_mul_pd(fscal
,dx23
);
478 ty
= _mm_mul_pd(fscal
,dy23
);
479 tz
= _mm_mul_pd(fscal
,dz23
);
481 /* Update vectorial force */
482 fix2
= _mm_add_pd(fix2
,tx
);
483 fiy2
= _mm_add_pd(fiy2
,ty
);
484 fiz2
= _mm_add_pd(fiz2
,tz
);
486 fjx3
= _mm_add_pd(fjx3
,tx
);
487 fjy3
= _mm_add_pd(fjy3
,ty
);
488 fjz3
= _mm_add_pd(fjz3
,tz
);
492 /**************************
493 * CALCULATE INTERACTIONS *
494 **************************/
496 if (gmx_mm_any_lt(rsq31
,rcutoff2
))
499 /* REACTION-FIELD ELECTROSTATICS */
500 velec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_add_pd(rinv31
,_mm_mul_pd(krf
,rsq31
)),crf
));
501 felec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_mul_pd(rinv31
,rinvsq31
),krf2
));
503 cutoff_mask
= _mm_cmplt_pd(rsq31
,rcutoff2
);
505 /* Update potential sum for this i atom from the interaction with this j atom. */
506 velec
= _mm_and_pd(velec
,cutoff_mask
);
507 velecsum
= _mm_add_pd(velecsum
,velec
);
511 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
513 /* Calculate temporary vectorial force */
514 tx
= _mm_mul_pd(fscal
,dx31
);
515 ty
= _mm_mul_pd(fscal
,dy31
);
516 tz
= _mm_mul_pd(fscal
,dz31
);
518 /* Update vectorial force */
519 fix3
= _mm_add_pd(fix3
,tx
);
520 fiy3
= _mm_add_pd(fiy3
,ty
);
521 fiz3
= _mm_add_pd(fiz3
,tz
);
523 fjx1
= _mm_add_pd(fjx1
,tx
);
524 fjy1
= _mm_add_pd(fjy1
,ty
);
525 fjz1
= _mm_add_pd(fjz1
,tz
);
529 /**************************
530 * CALCULATE INTERACTIONS *
531 **************************/
533 if (gmx_mm_any_lt(rsq32
,rcutoff2
))
536 /* REACTION-FIELD ELECTROSTATICS */
537 velec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_add_pd(rinv32
,_mm_mul_pd(krf
,rsq32
)),crf
));
538 felec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_mul_pd(rinv32
,rinvsq32
),krf2
));
540 cutoff_mask
= _mm_cmplt_pd(rsq32
,rcutoff2
);
542 /* Update potential sum for this i atom from the interaction with this j atom. */
543 velec
= _mm_and_pd(velec
,cutoff_mask
);
544 velecsum
= _mm_add_pd(velecsum
,velec
);
548 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
550 /* Calculate temporary vectorial force */
551 tx
= _mm_mul_pd(fscal
,dx32
);
552 ty
= _mm_mul_pd(fscal
,dy32
);
553 tz
= _mm_mul_pd(fscal
,dz32
);
555 /* Update vectorial force */
556 fix3
= _mm_add_pd(fix3
,tx
);
557 fiy3
= _mm_add_pd(fiy3
,ty
);
558 fiz3
= _mm_add_pd(fiz3
,tz
);
560 fjx2
= _mm_add_pd(fjx2
,tx
);
561 fjy2
= _mm_add_pd(fjy2
,ty
);
562 fjz2
= _mm_add_pd(fjz2
,tz
);
566 /**************************
567 * CALCULATE INTERACTIONS *
568 **************************/
570 if (gmx_mm_any_lt(rsq33
,rcutoff2
))
573 /* REACTION-FIELD ELECTROSTATICS */
574 velec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_add_pd(rinv33
,_mm_mul_pd(krf
,rsq33
)),crf
));
575 felec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_mul_pd(rinv33
,rinvsq33
),krf2
));
577 cutoff_mask
= _mm_cmplt_pd(rsq33
,rcutoff2
);
579 /* Update potential sum for this i atom from the interaction with this j atom. */
580 velec
= _mm_and_pd(velec
,cutoff_mask
);
581 velecsum
= _mm_add_pd(velecsum
,velec
);
585 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
587 /* Calculate temporary vectorial force */
588 tx
= _mm_mul_pd(fscal
,dx33
);
589 ty
= _mm_mul_pd(fscal
,dy33
);
590 tz
= _mm_mul_pd(fscal
,dz33
);
592 /* Update vectorial force */
593 fix3
= _mm_add_pd(fix3
,tx
);
594 fiy3
= _mm_add_pd(fiy3
,ty
);
595 fiz3
= _mm_add_pd(fiz3
,tz
);
597 fjx3
= _mm_add_pd(fjx3
,tx
);
598 fjy3
= _mm_add_pd(fjy3
,ty
);
599 fjz3
= _mm_add_pd(fjz3
,tz
);
603 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
+DIM
,f
+j_coord_offsetB
+DIM
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
605 /* Inner loop uses 324 flops */
612 j_coord_offsetA
= DIM
*jnrA
;
614 /* load j atom coordinates */
615 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
+DIM
,
616 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
618 /* Calculate displacement vector */
619 dx11
= _mm_sub_pd(ix1
,jx1
);
620 dy11
= _mm_sub_pd(iy1
,jy1
);
621 dz11
= _mm_sub_pd(iz1
,jz1
);
622 dx12
= _mm_sub_pd(ix1
,jx2
);
623 dy12
= _mm_sub_pd(iy1
,jy2
);
624 dz12
= _mm_sub_pd(iz1
,jz2
);
625 dx13
= _mm_sub_pd(ix1
,jx3
);
626 dy13
= _mm_sub_pd(iy1
,jy3
);
627 dz13
= _mm_sub_pd(iz1
,jz3
);
628 dx21
= _mm_sub_pd(ix2
,jx1
);
629 dy21
= _mm_sub_pd(iy2
,jy1
);
630 dz21
= _mm_sub_pd(iz2
,jz1
);
631 dx22
= _mm_sub_pd(ix2
,jx2
);
632 dy22
= _mm_sub_pd(iy2
,jy2
);
633 dz22
= _mm_sub_pd(iz2
,jz2
);
634 dx23
= _mm_sub_pd(ix2
,jx3
);
635 dy23
= _mm_sub_pd(iy2
,jy3
);
636 dz23
= _mm_sub_pd(iz2
,jz3
);
637 dx31
= _mm_sub_pd(ix3
,jx1
);
638 dy31
= _mm_sub_pd(iy3
,jy1
);
639 dz31
= _mm_sub_pd(iz3
,jz1
);
640 dx32
= _mm_sub_pd(ix3
,jx2
);
641 dy32
= _mm_sub_pd(iy3
,jy2
);
642 dz32
= _mm_sub_pd(iz3
,jz2
);
643 dx33
= _mm_sub_pd(ix3
,jx3
);
644 dy33
= _mm_sub_pd(iy3
,jy3
);
645 dz33
= _mm_sub_pd(iz3
,jz3
);
647 /* Calculate squared distance and things based on it */
648 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
649 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
650 rsq13
= gmx_mm_calc_rsq_pd(dx13
,dy13
,dz13
);
651 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
652 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
653 rsq23
= gmx_mm_calc_rsq_pd(dx23
,dy23
,dz23
);
654 rsq31
= gmx_mm_calc_rsq_pd(dx31
,dy31
,dz31
);
655 rsq32
= gmx_mm_calc_rsq_pd(dx32
,dy32
,dz32
);
656 rsq33
= gmx_mm_calc_rsq_pd(dx33
,dy33
,dz33
);
658 rinv11
= sse2_invsqrt_d(rsq11
);
659 rinv12
= sse2_invsqrt_d(rsq12
);
660 rinv13
= sse2_invsqrt_d(rsq13
);
661 rinv21
= sse2_invsqrt_d(rsq21
);
662 rinv22
= sse2_invsqrt_d(rsq22
);
663 rinv23
= sse2_invsqrt_d(rsq23
);
664 rinv31
= sse2_invsqrt_d(rsq31
);
665 rinv32
= sse2_invsqrt_d(rsq32
);
666 rinv33
= sse2_invsqrt_d(rsq33
);
668 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
669 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
670 rinvsq13
= _mm_mul_pd(rinv13
,rinv13
);
671 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
672 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
673 rinvsq23
= _mm_mul_pd(rinv23
,rinv23
);
674 rinvsq31
= _mm_mul_pd(rinv31
,rinv31
);
675 rinvsq32
= _mm_mul_pd(rinv32
,rinv32
);
676 rinvsq33
= _mm_mul_pd(rinv33
,rinv33
);
678 fjx1
= _mm_setzero_pd();
679 fjy1
= _mm_setzero_pd();
680 fjz1
= _mm_setzero_pd();
681 fjx2
= _mm_setzero_pd();
682 fjy2
= _mm_setzero_pd();
683 fjz2
= _mm_setzero_pd();
684 fjx3
= _mm_setzero_pd();
685 fjy3
= _mm_setzero_pd();
686 fjz3
= _mm_setzero_pd();
688 /**************************
689 * CALCULATE INTERACTIONS *
690 **************************/
692 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
695 /* REACTION-FIELD ELECTROSTATICS */
696 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
697 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
699 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
701 /* Update potential sum for this i atom from the interaction with this j atom. */
702 velec
= _mm_and_pd(velec
,cutoff_mask
);
703 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
704 velecsum
= _mm_add_pd(velecsum
,velec
);
708 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
710 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
712 /* Calculate temporary vectorial force */
713 tx
= _mm_mul_pd(fscal
,dx11
);
714 ty
= _mm_mul_pd(fscal
,dy11
);
715 tz
= _mm_mul_pd(fscal
,dz11
);
717 /* Update vectorial force */
718 fix1
= _mm_add_pd(fix1
,tx
);
719 fiy1
= _mm_add_pd(fiy1
,ty
);
720 fiz1
= _mm_add_pd(fiz1
,tz
);
722 fjx1
= _mm_add_pd(fjx1
,tx
);
723 fjy1
= _mm_add_pd(fjy1
,ty
);
724 fjz1
= _mm_add_pd(fjz1
,tz
);
728 /**************************
729 * CALCULATE INTERACTIONS *
730 **************************/
732 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
735 /* REACTION-FIELD ELECTROSTATICS */
736 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
737 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
739 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
741 /* Update potential sum for this i atom from the interaction with this j atom. */
742 velec
= _mm_and_pd(velec
,cutoff_mask
);
743 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
744 velecsum
= _mm_add_pd(velecsum
,velec
);
748 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
750 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
752 /* Calculate temporary vectorial force */
753 tx
= _mm_mul_pd(fscal
,dx12
);
754 ty
= _mm_mul_pd(fscal
,dy12
);
755 tz
= _mm_mul_pd(fscal
,dz12
);
757 /* Update vectorial force */
758 fix1
= _mm_add_pd(fix1
,tx
);
759 fiy1
= _mm_add_pd(fiy1
,ty
);
760 fiz1
= _mm_add_pd(fiz1
,tz
);
762 fjx2
= _mm_add_pd(fjx2
,tx
);
763 fjy2
= _mm_add_pd(fjy2
,ty
);
764 fjz2
= _mm_add_pd(fjz2
,tz
);
768 /**************************
769 * CALCULATE INTERACTIONS *
770 **************************/
772 if (gmx_mm_any_lt(rsq13
,rcutoff2
))
775 /* REACTION-FIELD ELECTROSTATICS */
776 velec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_add_pd(rinv13
,_mm_mul_pd(krf
,rsq13
)),crf
));
777 felec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_mul_pd(rinv13
,rinvsq13
),krf2
));
779 cutoff_mask
= _mm_cmplt_pd(rsq13
,rcutoff2
);
781 /* Update potential sum for this i atom from the interaction with this j atom. */
782 velec
= _mm_and_pd(velec
,cutoff_mask
);
783 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
784 velecsum
= _mm_add_pd(velecsum
,velec
);
788 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
790 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
792 /* Calculate temporary vectorial force */
793 tx
= _mm_mul_pd(fscal
,dx13
);
794 ty
= _mm_mul_pd(fscal
,dy13
);
795 tz
= _mm_mul_pd(fscal
,dz13
);
797 /* Update vectorial force */
798 fix1
= _mm_add_pd(fix1
,tx
);
799 fiy1
= _mm_add_pd(fiy1
,ty
);
800 fiz1
= _mm_add_pd(fiz1
,tz
);
802 fjx3
= _mm_add_pd(fjx3
,tx
);
803 fjy3
= _mm_add_pd(fjy3
,ty
);
804 fjz3
= _mm_add_pd(fjz3
,tz
);
808 /**************************
809 * CALCULATE INTERACTIONS *
810 **************************/
812 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
815 /* REACTION-FIELD ELECTROSTATICS */
816 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
817 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
819 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
821 /* Update potential sum for this i atom from the interaction with this j atom. */
822 velec
= _mm_and_pd(velec
,cutoff_mask
);
823 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
824 velecsum
= _mm_add_pd(velecsum
,velec
);
828 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
830 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
832 /* Calculate temporary vectorial force */
833 tx
= _mm_mul_pd(fscal
,dx21
);
834 ty
= _mm_mul_pd(fscal
,dy21
);
835 tz
= _mm_mul_pd(fscal
,dz21
);
837 /* Update vectorial force */
838 fix2
= _mm_add_pd(fix2
,tx
);
839 fiy2
= _mm_add_pd(fiy2
,ty
);
840 fiz2
= _mm_add_pd(fiz2
,tz
);
842 fjx1
= _mm_add_pd(fjx1
,tx
);
843 fjy1
= _mm_add_pd(fjy1
,ty
);
844 fjz1
= _mm_add_pd(fjz1
,tz
);
848 /**************************
849 * CALCULATE INTERACTIONS *
850 **************************/
852 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
855 /* REACTION-FIELD ELECTROSTATICS */
856 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
857 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
859 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
861 /* Update potential sum for this i atom from the interaction with this j atom. */
862 velec
= _mm_and_pd(velec
,cutoff_mask
);
863 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
864 velecsum
= _mm_add_pd(velecsum
,velec
);
868 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
870 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
872 /* Calculate temporary vectorial force */
873 tx
= _mm_mul_pd(fscal
,dx22
);
874 ty
= _mm_mul_pd(fscal
,dy22
);
875 tz
= _mm_mul_pd(fscal
,dz22
);
877 /* Update vectorial force */
878 fix2
= _mm_add_pd(fix2
,tx
);
879 fiy2
= _mm_add_pd(fiy2
,ty
);
880 fiz2
= _mm_add_pd(fiz2
,tz
);
882 fjx2
= _mm_add_pd(fjx2
,tx
);
883 fjy2
= _mm_add_pd(fjy2
,ty
);
884 fjz2
= _mm_add_pd(fjz2
,tz
);
888 /**************************
889 * CALCULATE INTERACTIONS *
890 **************************/
892 if (gmx_mm_any_lt(rsq23
,rcutoff2
))
895 /* REACTION-FIELD ELECTROSTATICS */
896 velec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_add_pd(rinv23
,_mm_mul_pd(krf
,rsq23
)),crf
));
897 felec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_mul_pd(rinv23
,rinvsq23
),krf2
));
899 cutoff_mask
= _mm_cmplt_pd(rsq23
,rcutoff2
);
901 /* Update potential sum for this i atom from the interaction with this j atom. */
902 velec
= _mm_and_pd(velec
,cutoff_mask
);
903 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
904 velecsum
= _mm_add_pd(velecsum
,velec
);
908 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
910 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
912 /* Calculate temporary vectorial force */
913 tx
= _mm_mul_pd(fscal
,dx23
);
914 ty
= _mm_mul_pd(fscal
,dy23
);
915 tz
= _mm_mul_pd(fscal
,dz23
);
917 /* Update vectorial force */
918 fix2
= _mm_add_pd(fix2
,tx
);
919 fiy2
= _mm_add_pd(fiy2
,ty
);
920 fiz2
= _mm_add_pd(fiz2
,tz
);
922 fjx3
= _mm_add_pd(fjx3
,tx
);
923 fjy3
= _mm_add_pd(fjy3
,ty
);
924 fjz3
= _mm_add_pd(fjz3
,tz
);
928 /**************************
929 * CALCULATE INTERACTIONS *
930 **************************/
932 if (gmx_mm_any_lt(rsq31
,rcutoff2
))
935 /* REACTION-FIELD ELECTROSTATICS */
936 velec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_add_pd(rinv31
,_mm_mul_pd(krf
,rsq31
)),crf
));
937 felec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_mul_pd(rinv31
,rinvsq31
),krf2
));
939 cutoff_mask
= _mm_cmplt_pd(rsq31
,rcutoff2
);
941 /* Update potential sum for this i atom from the interaction with this j atom. */
942 velec
= _mm_and_pd(velec
,cutoff_mask
);
943 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
944 velecsum
= _mm_add_pd(velecsum
,velec
);
948 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
950 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
952 /* Calculate temporary vectorial force */
953 tx
= _mm_mul_pd(fscal
,dx31
);
954 ty
= _mm_mul_pd(fscal
,dy31
);
955 tz
= _mm_mul_pd(fscal
,dz31
);
957 /* Update vectorial force */
958 fix3
= _mm_add_pd(fix3
,tx
);
959 fiy3
= _mm_add_pd(fiy3
,ty
);
960 fiz3
= _mm_add_pd(fiz3
,tz
);
962 fjx1
= _mm_add_pd(fjx1
,tx
);
963 fjy1
= _mm_add_pd(fjy1
,ty
);
964 fjz1
= _mm_add_pd(fjz1
,tz
);
968 /**************************
969 * CALCULATE INTERACTIONS *
970 **************************/
972 if (gmx_mm_any_lt(rsq32
,rcutoff2
))
975 /* REACTION-FIELD ELECTROSTATICS */
976 velec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_add_pd(rinv32
,_mm_mul_pd(krf
,rsq32
)),crf
));
977 felec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_mul_pd(rinv32
,rinvsq32
),krf2
));
979 cutoff_mask
= _mm_cmplt_pd(rsq32
,rcutoff2
);
981 /* Update potential sum for this i atom from the interaction with this j atom. */
982 velec
= _mm_and_pd(velec
,cutoff_mask
);
983 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
984 velecsum
= _mm_add_pd(velecsum
,velec
);
988 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
990 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
992 /* Calculate temporary vectorial force */
993 tx
= _mm_mul_pd(fscal
,dx32
);
994 ty
= _mm_mul_pd(fscal
,dy32
);
995 tz
= _mm_mul_pd(fscal
,dz32
);
997 /* Update vectorial force */
998 fix3
= _mm_add_pd(fix3
,tx
);
999 fiy3
= _mm_add_pd(fiy3
,ty
);
1000 fiz3
= _mm_add_pd(fiz3
,tz
);
1002 fjx2
= _mm_add_pd(fjx2
,tx
);
1003 fjy2
= _mm_add_pd(fjy2
,ty
);
1004 fjz2
= _mm_add_pd(fjz2
,tz
);
1008 /**************************
1009 * CALCULATE INTERACTIONS *
1010 **************************/
1012 if (gmx_mm_any_lt(rsq33
,rcutoff2
))
1015 /* REACTION-FIELD ELECTROSTATICS */
1016 velec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_add_pd(rinv33
,_mm_mul_pd(krf
,rsq33
)),crf
));
1017 felec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_mul_pd(rinv33
,rinvsq33
),krf2
));
1019 cutoff_mask
= _mm_cmplt_pd(rsq33
,rcutoff2
);
1021 /* Update potential sum for this i atom from the interaction with this j atom. */
1022 velec
= _mm_and_pd(velec
,cutoff_mask
);
1023 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
1024 velecsum
= _mm_add_pd(velecsum
,velec
);
1028 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1030 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1032 /* Calculate temporary vectorial force */
1033 tx
= _mm_mul_pd(fscal
,dx33
);
1034 ty
= _mm_mul_pd(fscal
,dy33
);
1035 tz
= _mm_mul_pd(fscal
,dz33
);
1037 /* Update vectorial force */
1038 fix3
= _mm_add_pd(fix3
,tx
);
1039 fiy3
= _mm_add_pd(fiy3
,ty
);
1040 fiz3
= _mm_add_pd(fiz3
,tz
);
1042 fjx3
= _mm_add_pd(fjx3
,tx
);
1043 fjy3
= _mm_add_pd(fjy3
,ty
);
1044 fjz3
= _mm_add_pd(fjz3
,tz
);
1048 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
+DIM
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
1050 /* Inner loop uses 324 flops */
1053 /* End of innermost loop */
1055 gmx_mm_update_iforce_3atom_swizzle_pd(fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
,
1056 f
+i_coord_offset
+DIM
,fshift
+i_shift_offset
);
1059 /* Update potential energies */
1060 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
1062 /* Increment number of inner iterations */
1063 inneriter
+= j_index_end
- j_index_start
;
1065 /* Outer loop uses 19 flops */
1068 /* Increment number of outer iterations */
1071 /* Update outer/inner flops */
1073 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W4W4_VF
,outeriter
*19 + inneriter
*324);
1076 * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_double
1077 * Electrostatics interaction: ReactionField
1078 * VdW interaction: None
1079 * Geometry: Water4-Water4
1080 * Calculate force/pot: Force
1083 nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sse2_double
1084 (t_nblist
* gmx_restrict nlist
,
1085 rvec
* gmx_restrict xx
,
1086 rvec
* gmx_restrict ff
,
1087 struct t_forcerec
* gmx_restrict fr
,
1088 t_mdatoms
* gmx_restrict mdatoms
,
1089 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
1090 t_nrnb
* gmx_restrict nrnb
)
1092 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
1093 * just 0 for non-waters.
1094 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
1095 * jnr indices corresponding to data put in the four positions in the SIMD register.
1097 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
1098 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
1100 int j_coord_offsetA
,j_coord_offsetB
;
1101 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
1102 real rcutoff_scalar
;
1103 real
*shiftvec
,*fshift
,*x
,*f
;
1104 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
1106 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
1108 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
1110 __m128d ix3
,iy3
,iz3
,fix3
,fiy3
,fiz3
,iq3
,isai3
;
1111 int vdwjidx1A
,vdwjidx1B
;
1112 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
1113 int vdwjidx2A
,vdwjidx2B
;
1114 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
1115 int vdwjidx3A
,vdwjidx3B
;
1116 __m128d jx3
,jy3
,jz3
,fjx3
,fjy3
,fjz3
,jq3
,isaj3
;
1117 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
1118 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
1119 __m128d dx13
,dy13
,dz13
,rsq13
,rinv13
,rinvsq13
,r13
,qq13
,c6_13
,c12_13
;
1120 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
1121 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
1122 __m128d dx23
,dy23
,dz23
,rsq23
,rinv23
,rinvsq23
,r23
,qq23
,c6_23
,c12_23
;
1123 __m128d dx31
,dy31
,dz31
,rsq31
,rinv31
,rinvsq31
,r31
,qq31
,c6_31
,c12_31
;
1124 __m128d dx32
,dy32
,dz32
,rsq32
,rinv32
,rinvsq32
,r32
,qq32
,c6_32
,c12_32
;
1125 __m128d dx33
,dy33
,dz33
,rsq33
,rinv33
,rinvsq33
,r33
,qq33
,c6_33
,c12_33
;
1126 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
1128 __m128d dummy_mask
,cutoff_mask
;
1129 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
1130 __m128d one
= _mm_set1_pd(1.0);
1131 __m128d two
= _mm_set1_pd(2.0);
1137 jindex
= nlist
->jindex
;
1139 shiftidx
= nlist
->shift
;
1141 shiftvec
= fr
->shift_vec
[0];
1142 fshift
= fr
->fshift
[0];
1143 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
1144 charge
= mdatoms
->chargeA
;
1145 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
1146 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
1147 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
1149 /* Setup water-specific parameters */
1150 inr
= nlist
->iinr
[0];
1151 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
1152 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
1153 iq3
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+3]));
1155 jq1
= _mm_set1_pd(charge
[inr
+1]);
1156 jq2
= _mm_set1_pd(charge
[inr
+2]);
1157 jq3
= _mm_set1_pd(charge
[inr
+3]);
1158 qq11
= _mm_mul_pd(iq1
,jq1
);
1159 qq12
= _mm_mul_pd(iq1
,jq2
);
1160 qq13
= _mm_mul_pd(iq1
,jq3
);
1161 qq21
= _mm_mul_pd(iq2
,jq1
);
1162 qq22
= _mm_mul_pd(iq2
,jq2
);
1163 qq23
= _mm_mul_pd(iq2
,jq3
);
1164 qq31
= _mm_mul_pd(iq3
,jq1
);
1165 qq32
= _mm_mul_pd(iq3
,jq2
);
1166 qq33
= _mm_mul_pd(iq3
,jq3
);
1168 /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
1169 rcutoff_scalar
= fr
->ic
->rcoulomb
;
1170 rcutoff
= _mm_set1_pd(rcutoff_scalar
);
1171 rcutoff2
= _mm_mul_pd(rcutoff
,rcutoff
);
1173 /* Avoid stupid compiler warnings */
1175 j_coord_offsetA
= 0;
1176 j_coord_offsetB
= 0;
1181 /* Start outer loop over neighborlists */
1182 for(iidx
=0; iidx
<nri
; iidx
++)
1184 /* Load shift vector for this list */
1185 i_shift_offset
= DIM
*shiftidx
[iidx
];
1187 /* Load limits for loop over neighbors */
1188 j_index_start
= jindex
[iidx
];
1189 j_index_end
= jindex
[iidx
+1];
1191 /* Get outer coordinate index */
1193 i_coord_offset
= DIM
*inr
;
1195 /* Load i particle coords and add shift vector */
1196 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
+DIM
,
1197 &ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
1199 fix1
= _mm_setzero_pd();
1200 fiy1
= _mm_setzero_pd();
1201 fiz1
= _mm_setzero_pd();
1202 fix2
= _mm_setzero_pd();
1203 fiy2
= _mm_setzero_pd();
1204 fiz2
= _mm_setzero_pd();
1205 fix3
= _mm_setzero_pd();
1206 fiy3
= _mm_setzero_pd();
1207 fiz3
= _mm_setzero_pd();
1209 /* Start inner kernel loop */
1210 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
1213 /* Get j neighbor index, and coordinate index */
1215 jnrB
= jjnr
[jidx
+1];
1216 j_coord_offsetA
= DIM
*jnrA
;
1217 j_coord_offsetB
= DIM
*jnrB
;
1219 /* load j atom coordinates */
1220 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
+DIM
,x
+j_coord_offsetB
+DIM
,
1221 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
1223 /* Calculate displacement vector */
1224 dx11
= _mm_sub_pd(ix1
,jx1
);
1225 dy11
= _mm_sub_pd(iy1
,jy1
);
1226 dz11
= _mm_sub_pd(iz1
,jz1
);
1227 dx12
= _mm_sub_pd(ix1
,jx2
);
1228 dy12
= _mm_sub_pd(iy1
,jy2
);
1229 dz12
= _mm_sub_pd(iz1
,jz2
);
1230 dx13
= _mm_sub_pd(ix1
,jx3
);
1231 dy13
= _mm_sub_pd(iy1
,jy3
);
1232 dz13
= _mm_sub_pd(iz1
,jz3
);
1233 dx21
= _mm_sub_pd(ix2
,jx1
);
1234 dy21
= _mm_sub_pd(iy2
,jy1
);
1235 dz21
= _mm_sub_pd(iz2
,jz1
);
1236 dx22
= _mm_sub_pd(ix2
,jx2
);
1237 dy22
= _mm_sub_pd(iy2
,jy2
);
1238 dz22
= _mm_sub_pd(iz2
,jz2
);
1239 dx23
= _mm_sub_pd(ix2
,jx3
);
1240 dy23
= _mm_sub_pd(iy2
,jy3
);
1241 dz23
= _mm_sub_pd(iz2
,jz3
);
1242 dx31
= _mm_sub_pd(ix3
,jx1
);
1243 dy31
= _mm_sub_pd(iy3
,jy1
);
1244 dz31
= _mm_sub_pd(iz3
,jz1
);
1245 dx32
= _mm_sub_pd(ix3
,jx2
);
1246 dy32
= _mm_sub_pd(iy3
,jy2
);
1247 dz32
= _mm_sub_pd(iz3
,jz2
);
1248 dx33
= _mm_sub_pd(ix3
,jx3
);
1249 dy33
= _mm_sub_pd(iy3
,jy3
);
1250 dz33
= _mm_sub_pd(iz3
,jz3
);
1252 /* Calculate squared distance and things based on it */
1253 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1254 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1255 rsq13
= gmx_mm_calc_rsq_pd(dx13
,dy13
,dz13
);
1256 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1257 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1258 rsq23
= gmx_mm_calc_rsq_pd(dx23
,dy23
,dz23
);
1259 rsq31
= gmx_mm_calc_rsq_pd(dx31
,dy31
,dz31
);
1260 rsq32
= gmx_mm_calc_rsq_pd(dx32
,dy32
,dz32
);
1261 rsq33
= gmx_mm_calc_rsq_pd(dx33
,dy33
,dz33
);
1263 rinv11
= sse2_invsqrt_d(rsq11
);
1264 rinv12
= sse2_invsqrt_d(rsq12
);
1265 rinv13
= sse2_invsqrt_d(rsq13
);
1266 rinv21
= sse2_invsqrt_d(rsq21
);
1267 rinv22
= sse2_invsqrt_d(rsq22
);
1268 rinv23
= sse2_invsqrt_d(rsq23
);
1269 rinv31
= sse2_invsqrt_d(rsq31
);
1270 rinv32
= sse2_invsqrt_d(rsq32
);
1271 rinv33
= sse2_invsqrt_d(rsq33
);
1273 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1274 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1275 rinvsq13
= _mm_mul_pd(rinv13
,rinv13
);
1276 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1277 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1278 rinvsq23
= _mm_mul_pd(rinv23
,rinv23
);
1279 rinvsq31
= _mm_mul_pd(rinv31
,rinv31
);
1280 rinvsq32
= _mm_mul_pd(rinv32
,rinv32
);
1281 rinvsq33
= _mm_mul_pd(rinv33
,rinv33
);
1283 fjx1
= _mm_setzero_pd();
1284 fjy1
= _mm_setzero_pd();
1285 fjz1
= _mm_setzero_pd();
1286 fjx2
= _mm_setzero_pd();
1287 fjy2
= _mm_setzero_pd();
1288 fjz2
= _mm_setzero_pd();
1289 fjx3
= _mm_setzero_pd();
1290 fjy3
= _mm_setzero_pd();
1291 fjz3
= _mm_setzero_pd();
1293 /**************************
1294 * CALCULATE INTERACTIONS *
1295 **************************/
1297 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
1300 /* REACTION-FIELD ELECTROSTATICS */
1301 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1303 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
1307 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1309 /* Calculate temporary vectorial force */
1310 tx
= _mm_mul_pd(fscal
,dx11
);
1311 ty
= _mm_mul_pd(fscal
,dy11
);
1312 tz
= _mm_mul_pd(fscal
,dz11
);
1314 /* Update vectorial force */
1315 fix1
= _mm_add_pd(fix1
,tx
);
1316 fiy1
= _mm_add_pd(fiy1
,ty
);
1317 fiz1
= _mm_add_pd(fiz1
,tz
);
1319 fjx1
= _mm_add_pd(fjx1
,tx
);
1320 fjy1
= _mm_add_pd(fjy1
,ty
);
1321 fjz1
= _mm_add_pd(fjz1
,tz
);
1325 /**************************
1326 * CALCULATE INTERACTIONS *
1327 **************************/
1329 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
1332 /* REACTION-FIELD ELECTROSTATICS */
1333 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1335 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
1339 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1341 /* Calculate temporary vectorial force */
1342 tx
= _mm_mul_pd(fscal
,dx12
);
1343 ty
= _mm_mul_pd(fscal
,dy12
);
1344 tz
= _mm_mul_pd(fscal
,dz12
);
1346 /* Update vectorial force */
1347 fix1
= _mm_add_pd(fix1
,tx
);
1348 fiy1
= _mm_add_pd(fiy1
,ty
);
1349 fiz1
= _mm_add_pd(fiz1
,tz
);
1351 fjx2
= _mm_add_pd(fjx2
,tx
);
1352 fjy2
= _mm_add_pd(fjy2
,ty
);
1353 fjz2
= _mm_add_pd(fjz2
,tz
);
1357 /**************************
1358 * CALCULATE INTERACTIONS *
1359 **************************/
1361 if (gmx_mm_any_lt(rsq13
,rcutoff2
))
1364 /* REACTION-FIELD ELECTROSTATICS */
1365 felec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_mul_pd(rinv13
,rinvsq13
),krf2
));
1367 cutoff_mask
= _mm_cmplt_pd(rsq13
,rcutoff2
);
1371 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1373 /* Calculate temporary vectorial force */
1374 tx
= _mm_mul_pd(fscal
,dx13
);
1375 ty
= _mm_mul_pd(fscal
,dy13
);
1376 tz
= _mm_mul_pd(fscal
,dz13
);
1378 /* Update vectorial force */
1379 fix1
= _mm_add_pd(fix1
,tx
);
1380 fiy1
= _mm_add_pd(fiy1
,ty
);
1381 fiz1
= _mm_add_pd(fiz1
,tz
);
1383 fjx3
= _mm_add_pd(fjx3
,tx
);
1384 fjy3
= _mm_add_pd(fjy3
,ty
);
1385 fjz3
= _mm_add_pd(fjz3
,tz
);
1389 /**************************
1390 * CALCULATE INTERACTIONS *
1391 **************************/
1393 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
1396 /* REACTION-FIELD ELECTROSTATICS */
1397 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1399 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
1403 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1405 /* Calculate temporary vectorial force */
1406 tx
= _mm_mul_pd(fscal
,dx21
);
1407 ty
= _mm_mul_pd(fscal
,dy21
);
1408 tz
= _mm_mul_pd(fscal
,dz21
);
1410 /* Update vectorial force */
1411 fix2
= _mm_add_pd(fix2
,tx
);
1412 fiy2
= _mm_add_pd(fiy2
,ty
);
1413 fiz2
= _mm_add_pd(fiz2
,tz
);
1415 fjx1
= _mm_add_pd(fjx1
,tx
);
1416 fjy1
= _mm_add_pd(fjy1
,ty
);
1417 fjz1
= _mm_add_pd(fjz1
,tz
);
1421 /**************************
1422 * CALCULATE INTERACTIONS *
1423 **************************/
1425 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
1428 /* REACTION-FIELD ELECTROSTATICS */
1429 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1431 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
1435 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1437 /* Calculate temporary vectorial force */
1438 tx
= _mm_mul_pd(fscal
,dx22
);
1439 ty
= _mm_mul_pd(fscal
,dy22
);
1440 tz
= _mm_mul_pd(fscal
,dz22
);
1442 /* Update vectorial force */
1443 fix2
= _mm_add_pd(fix2
,tx
);
1444 fiy2
= _mm_add_pd(fiy2
,ty
);
1445 fiz2
= _mm_add_pd(fiz2
,tz
);
1447 fjx2
= _mm_add_pd(fjx2
,tx
);
1448 fjy2
= _mm_add_pd(fjy2
,ty
);
1449 fjz2
= _mm_add_pd(fjz2
,tz
);
1453 /**************************
1454 * CALCULATE INTERACTIONS *
1455 **************************/
1457 if (gmx_mm_any_lt(rsq23
,rcutoff2
))
1460 /* REACTION-FIELD ELECTROSTATICS */
1461 felec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_mul_pd(rinv23
,rinvsq23
),krf2
));
1463 cutoff_mask
= _mm_cmplt_pd(rsq23
,rcutoff2
);
1467 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1469 /* Calculate temporary vectorial force */
1470 tx
= _mm_mul_pd(fscal
,dx23
);
1471 ty
= _mm_mul_pd(fscal
,dy23
);
1472 tz
= _mm_mul_pd(fscal
,dz23
);
1474 /* Update vectorial force */
1475 fix2
= _mm_add_pd(fix2
,tx
);
1476 fiy2
= _mm_add_pd(fiy2
,ty
);
1477 fiz2
= _mm_add_pd(fiz2
,tz
);
1479 fjx3
= _mm_add_pd(fjx3
,tx
);
1480 fjy3
= _mm_add_pd(fjy3
,ty
);
1481 fjz3
= _mm_add_pd(fjz3
,tz
);
1485 /**************************
1486 * CALCULATE INTERACTIONS *
1487 **************************/
1489 if (gmx_mm_any_lt(rsq31
,rcutoff2
))
1492 /* REACTION-FIELD ELECTROSTATICS */
1493 felec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_mul_pd(rinv31
,rinvsq31
),krf2
));
1495 cutoff_mask
= _mm_cmplt_pd(rsq31
,rcutoff2
);
1499 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1501 /* Calculate temporary vectorial force */
1502 tx
= _mm_mul_pd(fscal
,dx31
);
1503 ty
= _mm_mul_pd(fscal
,dy31
);
1504 tz
= _mm_mul_pd(fscal
,dz31
);
1506 /* Update vectorial force */
1507 fix3
= _mm_add_pd(fix3
,tx
);
1508 fiy3
= _mm_add_pd(fiy3
,ty
);
1509 fiz3
= _mm_add_pd(fiz3
,tz
);
1511 fjx1
= _mm_add_pd(fjx1
,tx
);
1512 fjy1
= _mm_add_pd(fjy1
,ty
);
1513 fjz1
= _mm_add_pd(fjz1
,tz
);
1517 /**************************
1518 * CALCULATE INTERACTIONS *
1519 **************************/
1521 if (gmx_mm_any_lt(rsq32
,rcutoff2
))
1524 /* REACTION-FIELD ELECTROSTATICS */
1525 felec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_mul_pd(rinv32
,rinvsq32
),krf2
));
1527 cutoff_mask
= _mm_cmplt_pd(rsq32
,rcutoff2
);
1531 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1533 /* Calculate temporary vectorial force */
1534 tx
= _mm_mul_pd(fscal
,dx32
);
1535 ty
= _mm_mul_pd(fscal
,dy32
);
1536 tz
= _mm_mul_pd(fscal
,dz32
);
1538 /* Update vectorial force */
1539 fix3
= _mm_add_pd(fix3
,tx
);
1540 fiy3
= _mm_add_pd(fiy3
,ty
);
1541 fiz3
= _mm_add_pd(fiz3
,tz
);
1543 fjx2
= _mm_add_pd(fjx2
,tx
);
1544 fjy2
= _mm_add_pd(fjy2
,ty
);
1545 fjz2
= _mm_add_pd(fjz2
,tz
);
1549 /**************************
1550 * CALCULATE INTERACTIONS *
1551 **************************/
1553 if (gmx_mm_any_lt(rsq33
,rcutoff2
))
1556 /* REACTION-FIELD ELECTROSTATICS */
1557 felec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_mul_pd(rinv33
,rinvsq33
),krf2
));
1559 cutoff_mask
= _mm_cmplt_pd(rsq33
,rcutoff2
);
1563 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1565 /* Calculate temporary vectorial force */
1566 tx
= _mm_mul_pd(fscal
,dx33
);
1567 ty
= _mm_mul_pd(fscal
,dy33
);
1568 tz
= _mm_mul_pd(fscal
,dz33
);
1570 /* Update vectorial force */
1571 fix3
= _mm_add_pd(fix3
,tx
);
1572 fiy3
= _mm_add_pd(fiy3
,ty
);
1573 fiz3
= _mm_add_pd(fiz3
,tz
);
1575 fjx3
= _mm_add_pd(fjx3
,tx
);
1576 fjy3
= _mm_add_pd(fjy3
,ty
);
1577 fjz3
= _mm_add_pd(fjz3
,tz
);
1581 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
+DIM
,f
+j_coord_offsetB
+DIM
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
1583 /* Inner loop uses 270 flops */
1586 if(jidx
<j_index_end
)
1590 j_coord_offsetA
= DIM
*jnrA
;
1592 /* load j atom coordinates */
1593 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
+DIM
,
1594 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
1596 /* Calculate displacement vector */
1597 dx11
= _mm_sub_pd(ix1
,jx1
);
1598 dy11
= _mm_sub_pd(iy1
,jy1
);
1599 dz11
= _mm_sub_pd(iz1
,jz1
);
1600 dx12
= _mm_sub_pd(ix1
,jx2
);
1601 dy12
= _mm_sub_pd(iy1
,jy2
);
1602 dz12
= _mm_sub_pd(iz1
,jz2
);
1603 dx13
= _mm_sub_pd(ix1
,jx3
);
1604 dy13
= _mm_sub_pd(iy1
,jy3
);
1605 dz13
= _mm_sub_pd(iz1
,jz3
);
1606 dx21
= _mm_sub_pd(ix2
,jx1
);
1607 dy21
= _mm_sub_pd(iy2
,jy1
);
1608 dz21
= _mm_sub_pd(iz2
,jz1
);
1609 dx22
= _mm_sub_pd(ix2
,jx2
);
1610 dy22
= _mm_sub_pd(iy2
,jy2
);
1611 dz22
= _mm_sub_pd(iz2
,jz2
);
1612 dx23
= _mm_sub_pd(ix2
,jx3
);
1613 dy23
= _mm_sub_pd(iy2
,jy3
);
1614 dz23
= _mm_sub_pd(iz2
,jz3
);
1615 dx31
= _mm_sub_pd(ix3
,jx1
);
1616 dy31
= _mm_sub_pd(iy3
,jy1
);
1617 dz31
= _mm_sub_pd(iz3
,jz1
);
1618 dx32
= _mm_sub_pd(ix3
,jx2
);
1619 dy32
= _mm_sub_pd(iy3
,jy2
);
1620 dz32
= _mm_sub_pd(iz3
,jz2
);
1621 dx33
= _mm_sub_pd(ix3
,jx3
);
1622 dy33
= _mm_sub_pd(iy3
,jy3
);
1623 dz33
= _mm_sub_pd(iz3
,jz3
);
1625 /* Calculate squared distance and things based on it */
1626 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1627 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1628 rsq13
= gmx_mm_calc_rsq_pd(dx13
,dy13
,dz13
);
1629 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1630 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1631 rsq23
= gmx_mm_calc_rsq_pd(dx23
,dy23
,dz23
);
1632 rsq31
= gmx_mm_calc_rsq_pd(dx31
,dy31
,dz31
);
1633 rsq32
= gmx_mm_calc_rsq_pd(dx32
,dy32
,dz32
);
1634 rsq33
= gmx_mm_calc_rsq_pd(dx33
,dy33
,dz33
);
1636 rinv11
= sse2_invsqrt_d(rsq11
);
1637 rinv12
= sse2_invsqrt_d(rsq12
);
1638 rinv13
= sse2_invsqrt_d(rsq13
);
1639 rinv21
= sse2_invsqrt_d(rsq21
);
1640 rinv22
= sse2_invsqrt_d(rsq22
);
1641 rinv23
= sse2_invsqrt_d(rsq23
);
1642 rinv31
= sse2_invsqrt_d(rsq31
);
1643 rinv32
= sse2_invsqrt_d(rsq32
);
1644 rinv33
= sse2_invsqrt_d(rsq33
);
1646 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1647 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1648 rinvsq13
= _mm_mul_pd(rinv13
,rinv13
);
1649 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1650 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1651 rinvsq23
= _mm_mul_pd(rinv23
,rinv23
);
1652 rinvsq31
= _mm_mul_pd(rinv31
,rinv31
);
1653 rinvsq32
= _mm_mul_pd(rinv32
,rinv32
);
1654 rinvsq33
= _mm_mul_pd(rinv33
,rinv33
);
1656 fjx1
= _mm_setzero_pd();
1657 fjy1
= _mm_setzero_pd();
1658 fjz1
= _mm_setzero_pd();
1659 fjx2
= _mm_setzero_pd();
1660 fjy2
= _mm_setzero_pd();
1661 fjz2
= _mm_setzero_pd();
1662 fjx3
= _mm_setzero_pd();
1663 fjy3
= _mm_setzero_pd();
1664 fjz3
= _mm_setzero_pd();
1666 /**************************
1667 * CALCULATE INTERACTIONS *
1668 **************************/
1670 if (gmx_mm_any_lt(rsq11
,rcutoff2
))
1673 /* REACTION-FIELD ELECTROSTATICS */
1674 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1676 cutoff_mask
= _mm_cmplt_pd(rsq11
,rcutoff2
);
1680 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1682 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1684 /* Calculate temporary vectorial force */
1685 tx
= _mm_mul_pd(fscal
,dx11
);
1686 ty
= _mm_mul_pd(fscal
,dy11
);
1687 tz
= _mm_mul_pd(fscal
,dz11
);
1689 /* Update vectorial force */
1690 fix1
= _mm_add_pd(fix1
,tx
);
1691 fiy1
= _mm_add_pd(fiy1
,ty
);
1692 fiz1
= _mm_add_pd(fiz1
,tz
);
1694 fjx1
= _mm_add_pd(fjx1
,tx
);
1695 fjy1
= _mm_add_pd(fjy1
,ty
);
1696 fjz1
= _mm_add_pd(fjz1
,tz
);
1700 /**************************
1701 * CALCULATE INTERACTIONS *
1702 **************************/
1704 if (gmx_mm_any_lt(rsq12
,rcutoff2
))
1707 /* REACTION-FIELD ELECTROSTATICS */
1708 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1710 cutoff_mask
= _mm_cmplt_pd(rsq12
,rcutoff2
);
1714 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1716 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1718 /* Calculate temporary vectorial force */
1719 tx
= _mm_mul_pd(fscal
,dx12
);
1720 ty
= _mm_mul_pd(fscal
,dy12
);
1721 tz
= _mm_mul_pd(fscal
,dz12
);
1723 /* Update vectorial force */
1724 fix1
= _mm_add_pd(fix1
,tx
);
1725 fiy1
= _mm_add_pd(fiy1
,ty
);
1726 fiz1
= _mm_add_pd(fiz1
,tz
);
1728 fjx2
= _mm_add_pd(fjx2
,tx
);
1729 fjy2
= _mm_add_pd(fjy2
,ty
);
1730 fjz2
= _mm_add_pd(fjz2
,tz
);
1734 /**************************
1735 * CALCULATE INTERACTIONS *
1736 **************************/
1738 if (gmx_mm_any_lt(rsq13
,rcutoff2
))
1741 /* REACTION-FIELD ELECTROSTATICS */
1742 felec
= _mm_mul_pd(qq13
,_mm_sub_pd(_mm_mul_pd(rinv13
,rinvsq13
),krf2
));
1744 cutoff_mask
= _mm_cmplt_pd(rsq13
,rcutoff2
);
1748 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1750 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1752 /* Calculate temporary vectorial force */
1753 tx
= _mm_mul_pd(fscal
,dx13
);
1754 ty
= _mm_mul_pd(fscal
,dy13
);
1755 tz
= _mm_mul_pd(fscal
,dz13
);
1757 /* Update vectorial force */
1758 fix1
= _mm_add_pd(fix1
,tx
);
1759 fiy1
= _mm_add_pd(fiy1
,ty
);
1760 fiz1
= _mm_add_pd(fiz1
,tz
);
1762 fjx3
= _mm_add_pd(fjx3
,tx
);
1763 fjy3
= _mm_add_pd(fjy3
,ty
);
1764 fjz3
= _mm_add_pd(fjz3
,tz
);
1768 /**************************
1769 * CALCULATE INTERACTIONS *
1770 **************************/
1772 if (gmx_mm_any_lt(rsq21
,rcutoff2
))
1775 /* REACTION-FIELD ELECTROSTATICS */
1776 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1778 cutoff_mask
= _mm_cmplt_pd(rsq21
,rcutoff2
);
1782 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1784 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1786 /* Calculate temporary vectorial force */
1787 tx
= _mm_mul_pd(fscal
,dx21
);
1788 ty
= _mm_mul_pd(fscal
,dy21
);
1789 tz
= _mm_mul_pd(fscal
,dz21
);
1791 /* Update vectorial force */
1792 fix2
= _mm_add_pd(fix2
,tx
);
1793 fiy2
= _mm_add_pd(fiy2
,ty
);
1794 fiz2
= _mm_add_pd(fiz2
,tz
);
1796 fjx1
= _mm_add_pd(fjx1
,tx
);
1797 fjy1
= _mm_add_pd(fjy1
,ty
);
1798 fjz1
= _mm_add_pd(fjz1
,tz
);
1802 /**************************
1803 * CALCULATE INTERACTIONS *
1804 **************************/
1806 if (gmx_mm_any_lt(rsq22
,rcutoff2
))
1809 /* REACTION-FIELD ELECTROSTATICS */
1810 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1812 cutoff_mask
= _mm_cmplt_pd(rsq22
,rcutoff2
);
1816 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1818 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1820 /* Calculate temporary vectorial force */
1821 tx
= _mm_mul_pd(fscal
,dx22
);
1822 ty
= _mm_mul_pd(fscal
,dy22
);
1823 tz
= _mm_mul_pd(fscal
,dz22
);
1825 /* Update vectorial force */
1826 fix2
= _mm_add_pd(fix2
,tx
);
1827 fiy2
= _mm_add_pd(fiy2
,ty
);
1828 fiz2
= _mm_add_pd(fiz2
,tz
);
1830 fjx2
= _mm_add_pd(fjx2
,tx
);
1831 fjy2
= _mm_add_pd(fjy2
,ty
);
1832 fjz2
= _mm_add_pd(fjz2
,tz
);
1836 /**************************
1837 * CALCULATE INTERACTIONS *
1838 **************************/
1840 if (gmx_mm_any_lt(rsq23
,rcutoff2
))
1843 /* REACTION-FIELD ELECTROSTATICS */
1844 felec
= _mm_mul_pd(qq23
,_mm_sub_pd(_mm_mul_pd(rinv23
,rinvsq23
),krf2
));
1846 cutoff_mask
= _mm_cmplt_pd(rsq23
,rcutoff2
);
1850 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1852 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1854 /* Calculate temporary vectorial force */
1855 tx
= _mm_mul_pd(fscal
,dx23
);
1856 ty
= _mm_mul_pd(fscal
,dy23
);
1857 tz
= _mm_mul_pd(fscal
,dz23
);
1859 /* Update vectorial force */
1860 fix2
= _mm_add_pd(fix2
,tx
);
1861 fiy2
= _mm_add_pd(fiy2
,ty
);
1862 fiz2
= _mm_add_pd(fiz2
,tz
);
1864 fjx3
= _mm_add_pd(fjx3
,tx
);
1865 fjy3
= _mm_add_pd(fjy3
,ty
);
1866 fjz3
= _mm_add_pd(fjz3
,tz
);
1870 /**************************
1871 * CALCULATE INTERACTIONS *
1872 **************************/
1874 if (gmx_mm_any_lt(rsq31
,rcutoff2
))
1877 /* REACTION-FIELD ELECTROSTATICS */
1878 felec
= _mm_mul_pd(qq31
,_mm_sub_pd(_mm_mul_pd(rinv31
,rinvsq31
),krf2
));
1880 cutoff_mask
= _mm_cmplt_pd(rsq31
,rcutoff2
);
1884 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1886 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1888 /* Calculate temporary vectorial force */
1889 tx
= _mm_mul_pd(fscal
,dx31
);
1890 ty
= _mm_mul_pd(fscal
,dy31
);
1891 tz
= _mm_mul_pd(fscal
,dz31
);
1893 /* Update vectorial force */
1894 fix3
= _mm_add_pd(fix3
,tx
);
1895 fiy3
= _mm_add_pd(fiy3
,ty
);
1896 fiz3
= _mm_add_pd(fiz3
,tz
);
1898 fjx1
= _mm_add_pd(fjx1
,tx
);
1899 fjy1
= _mm_add_pd(fjy1
,ty
);
1900 fjz1
= _mm_add_pd(fjz1
,tz
);
1904 /**************************
1905 * CALCULATE INTERACTIONS *
1906 **************************/
1908 if (gmx_mm_any_lt(rsq32
,rcutoff2
))
1911 /* REACTION-FIELD ELECTROSTATICS */
1912 felec
= _mm_mul_pd(qq32
,_mm_sub_pd(_mm_mul_pd(rinv32
,rinvsq32
),krf2
));
1914 cutoff_mask
= _mm_cmplt_pd(rsq32
,rcutoff2
);
1918 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1920 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1922 /* Calculate temporary vectorial force */
1923 tx
= _mm_mul_pd(fscal
,dx32
);
1924 ty
= _mm_mul_pd(fscal
,dy32
);
1925 tz
= _mm_mul_pd(fscal
,dz32
);
1927 /* Update vectorial force */
1928 fix3
= _mm_add_pd(fix3
,tx
);
1929 fiy3
= _mm_add_pd(fiy3
,ty
);
1930 fiz3
= _mm_add_pd(fiz3
,tz
);
1932 fjx2
= _mm_add_pd(fjx2
,tx
);
1933 fjy2
= _mm_add_pd(fjy2
,ty
);
1934 fjz2
= _mm_add_pd(fjz2
,tz
);
1938 /**************************
1939 * CALCULATE INTERACTIONS *
1940 **************************/
1942 if (gmx_mm_any_lt(rsq33
,rcutoff2
))
1945 /* REACTION-FIELD ELECTROSTATICS */
1946 felec
= _mm_mul_pd(qq33
,_mm_sub_pd(_mm_mul_pd(rinv33
,rinvsq33
),krf2
));
1948 cutoff_mask
= _mm_cmplt_pd(rsq33
,rcutoff2
);
1952 fscal
= _mm_and_pd(fscal
,cutoff_mask
);
1954 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1956 /* Calculate temporary vectorial force */
1957 tx
= _mm_mul_pd(fscal
,dx33
);
1958 ty
= _mm_mul_pd(fscal
,dy33
);
1959 tz
= _mm_mul_pd(fscal
,dz33
);
1961 /* Update vectorial force */
1962 fix3
= _mm_add_pd(fix3
,tx
);
1963 fiy3
= _mm_add_pd(fiy3
,ty
);
1964 fiz3
= _mm_add_pd(fiz3
,tz
);
1966 fjx3
= _mm_add_pd(fjx3
,tx
);
1967 fjy3
= _mm_add_pd(fjy3
,ty
);
1968 fjz3
= _mm_add_pd(fjz3
,tz
);
1972 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
+DIM
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
1974 /* Inner loop uses 270 flops */
1977 /* End of innermost loop */
1979 gmx_mm_update_iforce_3atom_swizzle_pd(fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
,
1980 f
+i_coord_offset
+DIM
,fshift
+i_shift_offset
);
1982 /* Increment number of inner iterations */
1983 inneriter
+= j_index_end
- j_index_start
;
1985 /* Outer loop uses 18 flops */
1988 /* Increment number of outer iterations */
1991 /* Update outer/inner flops */
1993 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W4W4_F
,outeriter
*18 + inneriter
*270);