2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS sse2_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/legacyheaders/nrnb.h"
48 #include "gromacs/simd/math_x86_sse2_double.h"
49 #include "kernelutil_x86_sse2_double.h"
52 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_double
53 * Electrostatics interaction: ReactionField
54 * VdW interaction: None
55 * Geometry: Water3-Water3
56 * Calculate force/pot: PotentialAndForce
59 nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sse2_double
60 (t_nblist
* gmx_restrict nlist
,
61 rvec
* gmx_restrict xx
,
62 rvec
* gmx_restrict ff
,
63 t_forcerec
* gmx_restrict fr
,
64 t_mdatoms
* gmx_restrict mdatoms
,
65 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
66 t_nrnb
* gmx_restrict nrnb
)
68 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
69 * just 0 for non-waters.
70 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
71 * jnr indices corresponding to data put in the four positions in the SIMD register.
73 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
74 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
76 int j_coord_offsetA
,j_coord_offsetB
;
77 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
79 real
*shiftvec
,*fshift
,*x
,*f
;
80 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
82 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
84 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
86 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
87 int vdwjidx0A
,vdwjidx0B
;
88 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
89 int vdwjidx1A
,vdwjidx1B
;
90 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
91 int vdwjidx2A
,vdwjidx2B
;
92 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
93 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
94 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
95 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
96 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
97 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
98 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
99 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
100 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
101 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
102 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
104 __m128d dummy_mask
,cutoff_mask
;
105 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
106 __m128d one
= _mm_set1_pd(1.0);
107 __m128d two
= _mm_set1_pd(2.0);
113 jindex
= nlist
->jindex
;
115 shiftidx
= nlist
->shift
;
117 shiftvec
= fr
->shift_vec
[0];
118 fshift
= fr
->fshift
[0];
119 facel
= _mm_set1_pd(fr
->epsfac
);
120 charge
= mdatoms
->chargeA
;
121 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
122 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
123 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
125 /* Setup water-specific parameters */
126 inr
= nlist
->iinr
[0];
127 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
128 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
129 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
131 jq0
= _mm_set1_pd(charge
[inr
+0]);
132 jq1
= _mm_set1_pd(charge
[inr
+1]);
133 jq2
= _mm_set1_pd(charge
[inr
+2]);
134 qq00
= _mm_mul_pd(iq0
,jq0
);
135 qq01
= _mm_mul_pd(iq0
,jq1
);
136 qq02
= _mm_mul_pd(iq0
,jq2
);
137 qq10
= _mm_mul_pd(iq1
,jq0
);
138 qq11
= _mm_mul_pd(iq1
,jq1
);
139 qq12
= _mm_mul_pd(iq1
,jq2
);
140 qq20
= _mm_mul_pd(iq2
,jq0
);
141 qq21
= _mm_mul_pd(iq2
,jq1
);
142 qq22
= _mm_mul_pd(iq2
,jq2
);
144 /* Avoid stupid compiler warnings */
152 /* Start outer loop over neighborlists */
153 for(iidx
=0; iidx
<nri
; iidx
++)
155 /* Load shift vector for this list */
156 i_shift_offset
= DIM
*shiftidx
[iidx
];
158 /* Load limits for loop over neighbors */
159 j_index_start
= jindex
[iidx
];
160 j_index_end
= jindex
[iidx
+1];
162 /* Get outer coordinate index */
164 i_coord_offset
= DIM
*inr
;
166 /* Load i particle coords and add shift vector */
167 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
168 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
170 fix0
= _mm_setzero_pd();
171 fiy0
= _mm_setzero_pd();
172 fiz0
= _mm_setzero_pd();
173 fix1
= _mm_setzero_pd();
174 fiy1
= _mm_setzero_pd();
175 fiz1
= _mm_setzero_pd();
176 fix2
= _mm_setzero_pd();
177 fiy2
= _mm_setzero_pd();
178 fiz2
= _mm_setzero_pd();
180 /* Reset potential sums */
181 velecsum
= _mm_setzero_pd();
183 /* Start inner kernel loop */
184 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
187 /* Get j neighbor index, and coordinate index */
190 j_coord_offsetA
= DIM
*jnrA
;
191 j_coord_offsetB
= DIM
*jnrB
;
193 /* load j atom coordinates */
194 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
195 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
197 /* Calculate displacement vector */
198 dx00
= _mm_sub_pd(ix0
,jx0
);
199 dy00
= _mm_sub_pd(iy0
,jy0
);
200 dz00
= _mm_sub_pd(iz0
,jz0
);
201 dx01
= _mm_sub_pd(ix0
,jx1
);
202 dy01
= _mm_sub_pd(iy0
,jy1
);
203 dz01
= _mm_sub_pd(iz0
,jz1
);
204 dx02
= _mm_sub_pd(ix0
,jx2
);
205 dy02
= _mm_sub_pd(iy0
,jy2
);
206 dz02
= _mm_sub_pd(iz0
,jz2
);
207 dx10
= _mm_sub_pd(ix1
,jx0
);
208 dy10
= _mm_sub_pd(iy1
,jy0
);
209 dz10
= _mm_sub_pd(iz1
,jz0
);
210 dx11
= _mm_sub_pd(ix1
,jx1
);
211 dy11
= _mm_sub_pd(iy1
,jy1
);
212 dz11
= _mm_sub_pd(iz1
,jz1
);
213 dx12
= _mm_sub_pd(ix1
,jx2
);
214 dy12
= _mm_sub_pd(iy1
,jy2
);
215 dz12
= _mm_sub_pd(iz1
,jz2
);
216 dx20
= _mm_sub_pd(ix2
,jx0
);
217 dy20
= _mm_sub_pd(iy2
,jy0
);
218 dz20
= _mm_sub_pd(iz2
,jz0
);
219 dx21
= _mm_sub_pd(ix2
,jx1
);
220 dy21
= _mm_sub_pd(iy2
,jy1
);
221 dz21
= _mm_sub_pd(iz2
,jz1
);
222 dx22
= _mm_sub_pd(ix2
,jx2
);
223 dy22
= _mm_sub_pd(iy2
,jy2
);
224 dz22
= _mm_sub_pd(iz2
,jz2
);
226 /* Calculate squared distance and things based on it */
227 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
228 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
229 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
230 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
231 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
232 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
233 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
234 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
235 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
237 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
238 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
239 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
240 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
241 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
242 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
243 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
244 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
245 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
247 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
248 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
249 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
250 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
251 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
252 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
253 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
254 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
255 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
257 fjx0
= _mm_setzero_pd();
258 fjy0
= _mm_setzero_pd();
259 fjz0
= _mm_setzero_pd();
260 fjx1
= _mm_setzero_pd();
261 fjy1
= _mm_setzero_pd();
262 fjz1
= _mm_setzero_pd();
263 fjx2
= _mm_setzero_pd();
264 fjy2
= _mm_setzero_pd();
265 fjz2
= _mm_setzero_pd();
267 /**************************
268 * CALCULATE INTERACTIONS *
269 **************************/
271 /* REACTION-FIELD ELECTROSTATICS */
272 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_add_pd(rinv00
,_mm_mul_pd(krf
,rsq00
)),crf
));
273 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
275 /* Update potential sum for this i atom from the interaction with this j atom. */
276 velecsum
= _mm_add_pd(velecsum
,velec
);
280 /* Calculate temporary vectorial force */
281 tx
= _mm_mul_pd(fscal
,dx00
);
282 ty
= _mm_mul_pd(fscal
,dy00
);
283 tz
= _mm_mul_pd(fscal
,dz00
);
285 /* Update vectorial force */
286 fix0
= _mm_add_pd(fix0
,tx
);
287 fiy0
= _mm_add_pd(fiy0
,ty
);
288 fiz0
= _mm_add_pd(fiz0
,tz
);
290 fjx0
= _mm_add_pd(fjx0
,tx
);
291 fjy0
= _mm_add_pd(fjy0
,ty
);
292 fjz0
= _mm_add_pd(fjz0
,tz
);
294 /**************************
295 * CALCULATE INTERACTIONS *
296 **************************/
298 /* REACTION-FIELD ELECTROSTATICS */
299 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_add_pd(rinv01
,_mm_mul_pd(krf
,rsq01
)),crf
));
300 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
302 /* Update potential sum for this i atom from the interaction with this j atom. */
303 velecsum
= _mm_add_pd(velecsum
,velec
);
307 /* Calculate temporary vectorial force */
308 tx
= _mm_mul_pd(fscal
,dx01
);
309 ty
= _mm_mul_pd(fscal
,dy01
);
310 tz
= _mm_mul_pd(fscal
,dz01
);
312 /* Update vectorial force */
313 fix0
= _mm_add_pd(fix0
,tx
);
314 fiy0
= _mm_add_pd(fiy0
,ty
);
315 fiz0
= _mm_add_pd(fiz0
,tz
);
317 fjx1
= _mm_add_pd(fjx1
,tx
);
318 fjy1
= _mm_add_pd(fjy1
,ty
);
319 fjz1
= _mm_add_pd(fjz1
,tz
);
321 /**************************
322 * CALCULATE INTERACTIONS *
323 **************************/
325 /* REACTION-FIELD ELECTROSTATICS */
326 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_add_pd(rinv02
,_mm_mul_pd(krf
,rsq02
)),crf
));
327 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
329 /* Update potential sum for this i atom from the interaction with this j atom. */
330 velecsum
= _mm_add_pd(velecsum
,velec
);
334 /* Calculate temporary vectorial force */
335 tx
= _mm_mul_pd(fscal
,dx02
);
336 ty
= _mm_mul_pd(fscal
,dy02
);
337 tz
= _mm_mul_pd(fscal
,dz02
);
339 /* Update vectorial force */
340 fix0
= _mm_add_pd(fix0
,tx
);
341 fiy0
= _mm_add_pd(fiy0
,ty
);
342 fiz0
= _mm_add_pd(fiz0
,tz
);
344 fjx2
= _mm_add_pd(fjx2
,tx
);
345 fjy2
= _mm_add_pd(fjy2
,ty
);
346 fjz2
= _mm_add_pd(fjz2
,tz
);
348 /**************************
349 * CALCULATE INTERACTIONS *
350 **************************/
352 /* REACTION-FIELD ELECTROSTATICS */
353 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_add_pd(rinv10
,_mm_mul_pd(krf
,rsq10
)),crf
));
354 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
356 /* Update potential sum for this i atom from the interaction with this j atom. */
357 velecsum
= _mm_add_pd(velecsum
,velec
);
361 /* Calculate temporary vectorial force */
362 tx
= _mm_mul_pd(fscal
,dx10
);
363 ty
= _mm_mul_pd(fscal
,dy10
);
364 tz
= _mm_mul_pd(fscal
,dz10
);
366 /* Update vectorial force */
367 fix1
= _mm_add_pd(fix1
,tx
);
368 fiy1
= _mm_add_pd(fiy1
,ty
);
369 fiz1
= _mm_add_pd(fiz1
,tz
);
371 fjx0
= _mm_add_pd(fjx0
,tx
);
372 fjy0
= _mm_add_pd(fjy0
,ty
);
373 fjz0
= _mm_add_pd(fjz0
,tz
);
375 /**************************
376 * CALCULATE INTERACTIONS *
377 **************************/
379 /* REACTION-FIELD ELECTROSTATICS */
380 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
381 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
383 /* Update potential sum for this i atom from the interaction with this j atom. */
384 velecsum
= _mm_add_pd(velecsum
,velec
);
388 /* Calculate temporary vectorial force */
389 tx
= _mm_mul_pd(fscal
,dx11
);
390 ty
= _mm_mul_pd(fscal
,dy11
);
391 tz
= _mm_mul_pd(fscal
,dz11
);
393 /* Update vectorial force */
394 fix1
= _mm_add_pd(fix1
,tx
);
395 fiy1
= _mm_add_pd(fiy1
,ty
);
396 fiz1
= _mm_add_pd(fiz1
,tz
);
398 fjx1
= _mm_add_pd(fjx1
,tx
);
399 fjy1
= _mm_add_pd(fjy1
,ty
);
400 fjz1
= _mm_add_pd(fjz1
,tz
);
402 /**************************
403 * CALCULATE INTERACTIONS *
404 **************************/
406 /* REACTION-FIELD ELECTROSTATICS */
407 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
408 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
410 /* Update potential sum for this i atom from the interaction with this j atom. */
411 velecsum
= _mm_add_pd(velecsum
,velec
);
415 /* Calculate temporary vectorial force */
416 tx
= _mm_mul_pd(fscal
,dx12
);
417 ty
= _mm_mul_pd(fscal
,dy12
);
418 tz
= _mm_mul_pd(fscal
,dz12
);
420 /* Update vectorial force */
421 fix1
= _mm_add_pd(fix1
,tx
);
422 fiy1
= _mm_add_pd(fiy1
,ty
);
423 fiz1
= _mm_add_pd(fiz1
,tz
);
425 fjx2
= _mm_add_pd(fjx2
,tx
);
426 fjy2
= _mm_add_pd(fjy2
,ty
);
427 fjz2
= _mm_add_pd(fjz2
,tz
);
429 /**************************
430 * CALCULATE INTERACTIONS *
431 **************************/
433 /* REACTION-FIELD ELECTROSTATICS */
434 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_add_pd(rinv20
,_mm_mul_pd(krf
,rsq20
)),crf
));
435 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
437 /* Update potential sum for this i atom from the interaction with this j atom. */
438 velecsum
= _mm_add_pd(velecsum
,velec
);
442 /* Calculate temporary vectorial force */
443 tx
= _mm_mul_pd(fscal
,dx20
);
444 ty
= _mm_mul_pd(fscal
,dy20
);
445 tz
= _mm_mul_pd(fscal
,dz20
);
447 /* Update vectorial force */
448 fix2
= _mm_add_pd(fix2
,tx
);
449 fiy2
= _mm_add_pd(fiy2
,ty
);
450 fiz2
= _mm_add_pd(fiz2
,tz
);
452 fjx0
= _mm_add_pd(fjx0
,tx
);
453 fjy0
= _mm_add_pd(fjy0
,ty
);
454 fjz0
= _mm_add_pd(fjz0
,tz
);
456 /**************************
457 * CALCULATE INTERACTIONS *
458 **************************/
460 /* REACTION-FIELD ELECTROSTATICS */
461 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
462 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
464 /* Update potential sum for this i atom from the interaction with this j atom. */
465 velecsum
= _mm_add_pd(velecsum
,velec
);
469 /* Calculate temporary vectorial force */
470 tx
= _mm_mul_pd(fscal
,dx21
);
471 ty
= _mm_mul_pd(fscal
,dy21
);
472 tz
= _mm_mul_pd(fscal
,dz21
);
474 /* Update vectorial force */
475 fix2
= _mm_add_pd(fix2
,tx
);
476 fiy2
= _mm_add_pd(fiy2
,ty
);
477 fiz2
= _mm_add_pd(fiz2
,tz
);
479 fjx1
= _mm_add_pd(fjx1
,tx
);
480 fjy1
= _mm_add_pd(fjy1
,ty
);
481 fjz1
= _mm_add_pd(fjz1
,tz
);
483 /**************************
484 * CALCULATE INTERACTIONS *
485 **************************/
487 /* REACTION-FIELD ELECTROSTATICS */
488 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
489 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
491 /* Update potential sum for this i atom from the interaction with this j atom. */
492 velecsum
= _mm_add_pd(velecsum
,velec
);
496 /* Calculate temporary vectorial force */
497 tx
= _mm_mul_pd(fscal
,dx22
);
498 ty
= _mm_mul_pd(fscal
,dy22
);
499 tz
= _mm_mul_pd(fscal
,dz22
);
501 /* Update vectorial force */
502 fix2
= _mm_add_pd(fix2
,tx
);
503 fiy2
= _mm_add_pd(fiy2
,ty
);
504 fiz2
= _mm_add_pd(fiz2
,tz
);
506 fjx2
= _mm_add_pd(fjx2
,tx
);
507 fjy2
= _mm_add_pd(fjy2
,ty
);
508 fjz2
= _mm_add_pd(fjz2
,tz
);
510 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
512 /* Inner loop uses 288 flops */
519 j_coord_offsetA
= DIM
*jnrA
;
521 /* load j atom coordinates */
522 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
523 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
525 /* Calculate displacement vector */
526 dx00
= _mm_sub_pd(ix0
,jx0
);
527 dy00
= _mm_sub_pd(iy0
,jy0
);
528 dz00
= _mm_sub_pd(iz0
,jz0
);
529 dx01
= _mm_sub_pd(ix0
,jx1
);
530 dy01
= _mm_sub_pd(iy0
,jy1
);
531 dz01
= _mm_sub_pd(iz0
,jz1
);
532 dx02
= _mm_sub_pd(ix0
,jx2
);
533 dy02
= _mm_sub_pd(iy0
,jy2
);
534 dz02
= _mm_sub_pd(iz0
,jz2
);
535 dx10
= _mm_sub_pd(ix1
,jx0
);
536 dy10
= _mm_sub_pd(iy1
,jy0
);
537 dz10
= _mm_sub_pd(iz1
,jz0
);
538 dx11
= _mm_sub_pd(ix1
,jx1
);
539 dy11
= _mm_sub_pd(iy1
,jy1
);
540 dz11
= _mm_sub_pd(iz1
,jz1
);
541 dx12
= _mm_sub_pd(ix1
,jx2
);
542 dy12
= _mm_sub_pd(iy1
,jy2
);
543 dz12
= _mm_sub_pd(iz1
,jz2
);
544 dx20
= _mm_sub_pd(ix2
,jx0
);
545 dy20
= _mm_sub_pd(iy2
,jy0
);
546 dz20
= _mm_sub_pd(iz2
,jz0
);
547 dx21
= _mm_sub_pd(ix2
,jx1
);
548 dy21
= _mm_sub_pd(iy2
,jy1
);
549 dz21
= _mm_sub_pd(iz2
,jz1
);
550 dx22
= _mm_sub_pd(ix2
,jx2
);
551 dy22
= _mm_sub_pd(iy2
,jy2
);
552 dz22
= _mm_sub_pd(iz2
,jz2
);
554 /* Calculate squared distance and things based on it */
555 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
556 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
557 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
558 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
559 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
560 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
561 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
562 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
563 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
565 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
566 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
567 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
568 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
569 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
570 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
571 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
572 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
573 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
575 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
576 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
577 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
578 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
579 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
580 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
581 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
582 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
583 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
585 fjx0
= _mm_setzero_pd();
586 fjy0
= _mm_setzero_pd();
587 fjz0
= _mm_setzero_pd();
588 fjx1
= _mm_setzero_pd();
589 fjy1
= _mm_setzero_pd();
590 fjz1
= _mm_setzero_pd();
591 fjx2
= _mm_setzero_pd();
592 fjy2
= _mm_setzero_pd();
593 fjz2
= _mm_setzero_pd();
595 /**************************
596 * CALCULATE INTERACTIONS *
597 **************************/
599 /* REACTION-FIELD ELECTROSTATICS */
600 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_add_pd(rinv00
,_mm_mul_pd(krf
,rsq00
)),crf
));
601 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
603 /* Update potential sum for this i atom from the interaction with this j atom. */
604 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
605 velecsum
= _mm_add_pd(velecsum
,velec
);
609 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
611 /* Calculate temporary vectorial force */
612 tx
= _mm_mul_pd(fscal
,dx00
);
613 ty
= _mm_mul_pd(fscal
,dy00
);
614 tz
= _mm_mul_pd(fscal
,dz00
);
616 /* Update vectorial force */
617 fix0
= _mm_add_pd(fix0
,tx
);
618 fiy0
= _mm_add_pd(fiy0
,ty
);
619 fiz0
= _mm_add_pd(fiz0
,tz
);
621 fjx0
= _mm_add_pd(fjx0
,tx
);
622 fjy0
= _mm_add_pd(fjy0
,ty
);
623 fjz0
= _mm_add_pd(fjz0
,tz
);
625 /**************************
626 * CALCULATE INTERACTIONS *
627 **************************/
629 /* REACTION-FIELD ELECTROSTATICS */
630 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_add_pd(rinv01
,_mm_mul_pd(krf
,rsq01
)),crf
));
631 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
633 /* Update potential sum for this i atom from the interaction with this j atom. */
634 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
635 velecsum
= _mm_add_pd(velecsum
,velec
);
639 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
641 /* Calculate temporary vectorial force */
642 tx
= _mm_mul_pd(fscal
,dx01
);
643 ty
= _mm_mul_pd(fscal
,dy01
);
644 tz
= _mm_mul_pd(fscal
,dz01
);
646 /* Update vectorial force */
647 fix0
= _mm_add_pd(fix0
,tx
);
648 fiy0
= _mm_add_pd(fiy0
,ty
);
649 fiz0
= _mm_add_pd(fiz0
,tz
);
651 fjx1
= _mm_add_pd(fjx1
,tx
);
652 fjy1
= _mm_add_pd(fjy1
,ty
);
653 fjz1
= _mm_add_pd(fjz1
,tz
);
655 /**************************
656 * CALCULATE INTERACTIONS *
657 **************************/
659 /* REACTION-FIELD ELECTROSTATICS */
660 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_add_pd(rinv02
,_mm_mul_pd(krf
,rsq02
)),crf
));
661 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
663 /* Update potential sum for this i atom from the interaction with this j atom. */
664 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
665 velecsum
= _mm_add_pd(velecsum
,velec
);
669 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
671 /* Calculate temporary vectorial force */
672 tx
= _mm_mul_pd(fscal
,dx02
);
673 ty
= _mm_mul_pd(fscal
,dy02
);
674 tz
= _mm_mul_pd(fscal
,dz02
);
676 /* Update vectorial force */
677 fix0
= _mm_add_pd(fix0
,tx
);
678 fiy0
= _mm_add_pd(fiy0
,ty
);
679 fiz0
= _mm_add_pd(fiz0
,tz
);
681 fjx2
= _mm_add_pd(fjx2
,tx
);
682 fjy2
= _mm_add_pd(fjy2
,ty
);
683 fjz2
= _mm_add_pd(fjz2
,tz
);
685 /**************************
686 * CALCULATE INTERACTIONS *
687 **************************/
689 /* REACTION-FIELD ELECTROSTATICS */
690 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_add_pd(rinv10
,_mm_mul_pd(krf
,rsq10
)),crf
));
691 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
693 /* Update potential sum for this i atom from the interaction with this j atom. */
694 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
695 velecsum
= _mm_add_pd(velecsum
,velec
);
699 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
701 /* Calculate temporary vectorial force */
702 tx
= _mm_mul_pd(fscal
,dx10
);
703 ty
= _mm_mul_pd(fscal
,dy10
);
704 tz
= _mm_mul_pd(fscal
,dz10
);
706 /* Update vectorial force */
707 fix1
= _mm_add_pd(fix1
,tx
);
708 fiy1
= _mm_add_pd(fiy1
,ty
);
709 fiz1
= _mm_add_pd(fiz1
,tz
);
711 fjx0
= _mm_add_pd(fjx0
,tx
);
712 fjy0
= _mm_add_pd(fjy0
,ty
);
713 fjz0
= _mm_add_pd(fjz0
,tz
);
715 /**************************
716 * CALCULATE INTERACTIONS *
717 **************************/
719 /* REACTION-FIELD ELECTROSTATICS */
720 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_add_pd(rinv11
,_mm_mul_pd(krf
,rsq11
)),crf
));
721 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
723 /* Update potential sum for this i atom from the interaction with this j atom. */
724 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
725 velecsum
= _mm_add_pd(velecsum
,velec
);
729 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
731 /* Calculate temporary vectorial force */
732 tx
= _mm_mul_pd(fscal
,dx11
);
733 ty
= _mm_mul_pd(fscal
,dy11
);
734 tz
= _mm_mul_pd(fscal
,dz11
);
736 /* Update vectorial force */
737 fix1
= _mm_add_pd(fix1
,tx
);
738 fiy1
= _mm_add_pd(fiy1
,ty
);
739 fiz1
= _mm_add_pd(fiz1
,tz
);
741 fjx1
= _mm_add_pd(fjx1
,tx
);
742 fjy1
= _mm_add_pd(fjy1
,ty
);
743 fjz1
= _mm_add_pd(fjz1
,tz
);
745 /**************************
746 * CALCULATE INTERACTIONS *
747 **************************/
749 /* REACTION-FIELD ELECTROSTATICS */
750 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_add_pd(rinv12
,_mm_mul_pd(krf
,rsq12
)),crf
));
751 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
753 /* Update potential sum for this i atom from the interaction with this j atom. */
754 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
755 velecsum
= _mm_add_pd(velecsum
,velec
);
759 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
761 /* Calculate temporary vectorial force */
762 tx
= _mm_mul_pd(fscal
,dx12
);
763 ty
= _mm_mul_pd(fscal
,dy12
);
764 tz
= _mm_mul_pd(fscal
,dz12
);
766 /* Update vectorial force */
767 fix1
= _mm_add_pd(fix1
,tx
);
768 fiy1
= _mm_add_pd(fiy1
,ty
);
769 fiz1
= _mm_add_pd(fiz1
,tz
);
771 fjx2
= _mm_add_pd(fjx2
,tx
);
772 fjy2
= _mm_add_pd(fjy2
,ty
);
773 fjz2
= _mm_add_pd(fjz2
,tz
);
775 /**************************
776 * CALCULATE INTERACTIONS *
777 **************************/
779 /* REACTION-FIELD ELECTROSTATICS */
780 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_add_pd(rinv20
,_mm_mul_pd(krf
,rsq20
)),crf
));
781 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
783 /* Update potential sum for this i atom from the interaction with this j atom. */
784 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
785 velecsum
= _mm_add_pd(velecsum
,velec
);
789 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
791 /* Calculate temporary vectorial force */
792 tx
= _mm_mul_pd(fscal
,dx20
);
793 ty
= _mm_mul_pd(fscal
,dy20
);
794 tz
= _mm_mul_pd(fscal
,dz20
);
796 /* Update vectorial force */
797 fix2
= _mm_add_pd(fix2
,tx
);
798 fiy2
= _mm_add_pd(fiy2
,ty
);
799 fiz2
= _mm_add_pd(fiz2
,tz
);
801 fjx0
= _mm_add_pd(fjx0
,tx
);
802 fjy0
= _mm_add_pd(fjy0
,ty
);
803 fjz0
= _mm_add_pd(fjz0
,tz
);
805 /**************************
806 * CALCULATE INTERACTIONS *
807 **************************/
809 /* REACTION-FIELD ELECTROSTATICS */
810 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_add_pd(rinv21
,_mm_mul_pd(krf
,rsq21
)),crf
));
811 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
813 /* Update potential sum for this i atom from the interaction with this j atom. */
814 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
815 velecsum
= _mm_add_pd(velecsum
,velec
);
819 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
821 /* Calculate temporary vectorial force */
822 tx
= _mm_mul_pd(fscal
,dx21
);
823 ty
= _mm_mul_pd(fscal
,dy21
);
824 tz
= _mm_mul_pd(fscal
,dz21
);
826 /* Update vectorial force */
827 fix2
= _mm_add_pd(fix2
,tx
);
828 fiy2
= _mm_add_pd(fiy2
,ty
);
829 fiz2
= _mm_add_pd(fiz2
,tz
);
831 fjx1
= _mm_add_pd(fjx1
,tx
);
832 fjy1
= _mm_add_pd(fjy1
,ty
);
833 fjz1
= _mm_add_pd(fjz1
,tz
);
835 /**************************
836 * CALCULATE INTERACTIONS *
837 **************************/
839 /* REACTION-FIELD ELECTROSTATICS */
840 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_add_pd(rinv22
,_mm_mul_pd(krf
,rsq22
)),crf
));
841 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
843 /* Update potential sum for this i atom from the interaction with this j atom. */
844 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
845 velecsum
= _mm_add_pd(velecsum
,velec
);
849 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
851 /* Calculate temporary vectorial force */
852 tx
= _mm_mul_pd(fscal
,dx22
);
853 ty
= _mm_mul_pd(fscal
,dy22
);
854 tz
= _mm_mul_pd(fscal
,dz22
);
856 /* Update vectorial force */
857 fix2
= _mm_add_pd(fix2
,tx
);
858 fiy2
= _mm_add_pd(fiy2
,ty
);
859 fiz2
= _mm_add_pd(fiz2
,tz
);
861 fjx2
= _mm_add_pd(fjx2
,tx
);
862 fjy2
= _mm_add_pd(fjy2
,ty
);
863 fjz2
= _mm_add_pd(fjz2
,tz
);
865 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
867 /* Inner loop uses 288 flops */
870 /* End of innermost loop */
872 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
873 f
+i_coord_offset
,fshift
+i_shift_offset
);
876 /* Update potential energies */
877 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
879 /* Increment number of inner iterations */
880 inneriter
+= j_index_end
- j_index_start
;
882 /* Outer loop uses 19 flops */
885 /* Increment number of outer iterations */
888 /* Update outer/inner flops */
890 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_VF
,outeriter
*19 + inneriter
*288);
893 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_double
894 * Electrostatics interaction: ReactionField
895 * VdW interaction: None
896 * Geometry: Water3-Water3
897 * Calculate force/pot: Force
900 nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sse2_double
901 (t_nblist
* gmx_restrict nlist
,
902 rvec
* gmx_restrict xx
,
903 rvec
* gmx_restrict ff
,
904 t_forcerec
* gmx_restrict fr
,
905 t_mdatoms
* gmx_restrict mdatoms
,
906 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
907 t_nrnb
* gmx_restrict nrnb
)
909 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
910 * just 0 for non-waters.
911 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
912 * jnr indices corresponding to data put in the four positions in the SIMD register.
914 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
915 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
917 int j_coord_offsetA
,j_coord_offsetB
;
918 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
920 real
*shiftvec
,*fshift
,*x
,*f
;
921 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
923 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
925 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
927 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
928 int vdwjidx0A
,vdwjidx0B
;
929 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
930 int vdwjidx1A
,vdwjidx1B
;
931 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
932 int vdwjidx2A
,vdwjidx2B
;
933 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
934 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
935 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
936 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
937 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
938 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
939 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
940 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
941 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
942 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
943 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
945 __m128d dummy_mask
,cutoff_mask
;
946 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
947 __m128d one
= _mm_set1_pd(1.0);
948 __m128d two
= _mm_set1_pd(2.0);
954 jindex
= nlist
->jindex
;
956 shiftidx
= nlist
->shift
;
958 shiftvec
= fr
->shift_vec
[0];
959 fshift
= fr
->fshift
[0];
960 facel
= _mm_set1_pd(fr
->epsfac
);
961 charge
= mdatoms
->chargeA
;
962 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
963 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
964 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
966 /* Setup water-specific parameters */
967 inr
= nlist
->iinr
[0];
968 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
969 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
970 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
972 jq0
= _mm_set1_pd(charge
[inr
+0]);
973 jq1
= _mm_set1_pd(charge
[inr
+1]);
974 jq2
= _mm_set1_pd(charge
[inr
+2]);
975 qq00
= _mm_mul_pd(iq0
,jq0
);
976 qq01
= _mm_mul_pd(iq0
,jq1
);
977 qq02
= _mm_mul_pd(iq0
,jq2
);
978 qq10
= _mm_mul_pd(iq1
,jq0
);
979 qq11
= _mm_mul_pd(iq1
,jq1
);
980 qq12
= _mm_mul_pd(iq1
,jq2
);
981 qq20
= _mm_mul_pd(iq2
,jq0
);
982 qq21
= _mm_mul_pd(iq2
,jq1
);
983 qq22
= _mm_mul_pd(iq2
,jq2
);
985 /* Avoid stupid compiler warnings */
993 /* Start outer loop over neighborlists */
994 for(iidx
=0; iidx
<nri
; iidx
++)
996 /* Load shift vector for this list */
997 i_shift_offset
= DIM
*shiftidx
[iidx
];
999 /* Load limits for loop over neighbors */
1000 j_index_start
= jindex
[iidx
];
1001 j_index_end
= jindex
[iidx
+1];
1003 /* Get outer coordinate index */
1005 i_coord_offset
= DIM
*inr
;
1007 /* Load i particle coords and add shift vector */
1008 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
1009 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
1011 fix0
= _mm_setzero_pd();
1012 fiy0
= _mm_setzero_pd();
1013 fiz0
= _mm_setzero_pd();
1014 fix1
= _mm_setzero_pd();
1015 fiy1
= _mm_setzero_pd();
1016 fiz1
= _mm_setzero_pd();
1017 fix2
= _mm_setzero_pd();
1018 fiy2
= _mm_setzero_pd();
1019 fiz2
= _mm_setzero_pd();
1021 /* Start inner kernel loop */
1022 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
1025 /* Get j neighbor index, and coordinate index */
1027 jnrB
= jjnr
[jidx
+1];
1028 j_coord_offsetA
= DIM
*jnrA
;
1029 j_coord_offsetB
= DIM
*jnrB
;
1031 /* load j atom coordinates */
1032 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
1033 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1035 /* Calculate displacement vector */
1036 dx00
= _mm_sub_pd(ix0
,jx0
);
1037 dy00
= _mm_sub_pd(iy0
,jy0
);
1038 dz00
= _mm_sub_pd(iz0
,jz0
);
1039 dx01
= _mm_sub_pd(ix0
,jx1
);
1040 dy01
= _mm_sub_pd(iy0
,jy1
);
1041 dz01
= _mm_sub_pd(iz0
,jz1
);
1042 dx02
= _mm_sub_pd(ix0
,jx2
);
1043 dy02
= _mm_sub_pd(iy0
,jy2
);
1044 dz02
= _mm_sub_pd(iz0
,jz2
);
1045 dx10
= _mm_sub_pd(ix1
,jx0
);
1046 dy10
= _mm_sub_pd(iy1
,jy0
);
1047 dz10
= _mm_sub_pd(iz1
,jz0
);
1048 dx11
= _mm_sub_pd(ix1
,jx1
);
1049 dy11
= _mm_sub_pd(iy1
,jy1
);
1050 dz11
= _mm_sub_pd(iz1
,jz1
);
1051 dx12
= _mm_sub_pd(ix1
,jx2
);
1052 dy12
= _mm_sub_pd(iy1
,jy2
);
1053 dz12
= _mm_sub_pd(iz1
,jz2
);
1054 dx20
= _mm_sub_pd(ix2
,jx0
);
1055 dy20
= _mm_sub_pd(iy2
,jy0
);
1056 dz20
= _mm_sub_pd(iz2
,jz0
);
1057 dx21
= _mm_sub_pd(ix2
,jx1
);
1058 dy21
= _mm_sub_pd(iy2
,jy1
);
1059 dz21
= _mm_sub_pd(iz2
,jz1
);
1060 dx22
= _mm_sub_pd(ix2
,jx2
);
1061 dy22
= _mm_sub_pd(iy2
,jy2
);
1062 dz22
= _mm_sub_pd(iz2
,jz2
);
1064 /* Calculate squared distance and things based on it */
1065 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1066 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1067 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1068 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1069 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1070 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1071 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1072 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1073 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1075 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
1076 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
1077 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
1078 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
1079 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
1080 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
1081 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
1082 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
1083 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
1085 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1086 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1087 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1088 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1089 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1090 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1091 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1092 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1093 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1095 fjx0
= _mm_setzero_pd();
1096 fjy0
= _mm_setzero_pd();
1097 fjz0
= _mm_setzero_pd();
1098 fjx1
= _mm_setzero_pd();
1099 fjy1
= _mm_setzero_pd();
1100 fjz1
= _mm_setzero_pd();
1101 fjx2
= _mm_setzero_pd();
1102 fjy2
= _mm_setzero_pd();
1103 fjz2
= _mm_setzero_pd();
1105 /**************************
1106 * CALCULATE INTERACTIONS *
1107 **************************/
1109 /* REACTION-FIELD ELECTROSTATICS */
1110 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
1114 /* Calculate temporary vectorial force */
1115 tx
= _mm_mul_pd(fscal
,dx00
);
1116 ty
= _mm_mul_pd(fscal
,dy00
);
1117 tz
= _mm_mul_pd(fscal
,dz00
);
1119 /* Update vectorial force */
1120 fix0
= _mm_add_pd(fix0
,tx
);
1121 fiy0
= _mm_add_pd(fiy0
,ty
);
1122 fiz0
= _mm_add_pd(fiz0
,tz
);
1124 fjx0
= _mm_add_pd(fjx0
,tx
);
1125 fjy0
= _mm_add_pd(fjy0
,ty
);
1126 fjz0
= _mm_add_pd(fjz0
,tz
);
1128 /**************************
1129 * CALCULATE INTERACTIONS *
1130 **************************/
1132 /* REACTION-FIELD ELECTROSTATICS */
1133 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
1137 /* Calculate temporary vectorial force */
1138 tx
= _mm_mul_pd(fscal
,dx01
);
1139 ty
= _mm_mul_pd(fscal
,dy01
);
1140 tz
= _mm_mul_pd(fscal
,dz01
);
1142 /* Update vectorial force */
1143 fix0
= _mm_add_pd(fix0
,tx
);
1144 fiy0
= _mm_add_pd(fiy0
,ty
);
1145 fiz0
= _mm_add_pd(fiz0
,tz
);
1147 fjx1
= _mm_add_pd(fjx1
,tx
);
1148 fjy1
= _mm_add_pd(fjy1
,ty
);
1149 fjz1
= _mm_add_pd(fjz1
,tz
);
1151 /**************************
1152 * CALCULATE INTERACTIONS *
1153 **************************/
1155 /* REACTION-FIELD ELECTROSTATICS */
1156 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
1160 /* Calculate temporary vectorial force */
1161 tx
= _mm_mul_pd(fscal
,dx02
);
1162 ty
= _mm_mul_pd(fscal
,dy02
);
1163 tz
= _mm_mul_pd(fscal
,dz02
);
1165 /* Update vectorial force */
1166 fix0
= _mm_add_pd(fix0
,tx
);
1167 fiy0
= _mm_add_pd(fiy0
,ty
);
1168 fiz0
= _mm_add_pd(fiz0
,tz
);
1170 fjx2
= _mm_add_pd(fjx2
,tx
);
1171 fjy2
= _mm_add_pd(fjy2
,ty
);
1172 fjz2
= _mm_add_pd(fjz2
,tz
);
1174 /**************************
1175 * CALCULATE INTERACTIONS *
1176 **************************/
1178 /* REACTION-FIELD ELECTROSTATICS */
1179 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
1183 /* Calculate temporary vectorial force */
1184 tx
= _mm_mul_pd(fscal
,dx10
);
1185 ty
= _mm_mul_pd(fscal
,dy10
);
1186 tz
= _mm_mul_pd(fscal
,dz10
);
1188 /* Update vectorial force */
1189 fix1
= _mm_add_pd(fix1
,tx
);
1190 fiy1
= _mm_add_pd(fiy1
,ty
);
1191 fiz1
= _mm_add_pd(fiz1
,tz
);
1193 fjx0
= _mm_add_pd(fjx0
,tx
);
1194 fjy0
= _mm_add_pd(fjy0
,ty
);
1195 fjz0
= _mm_add_pd(fjz0
,tz
);
1197 /**************************
1198 * CALCULATE INTERACTIONS *
1199 **************************/
1201 /* REACTION-FIELD ELECTROSTATICS */
1202 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1206 /* Calculate temporary vectorial force */
1207 tx
= _mm_mul_pd(fscal
,dx11
);
1208 ty
= _mm_mul_pd(fscal
,dy11
);
1209 tz
= _mm_mul_pd(fscal
,dz11
);
1211 /* Update vectorial force */
1212 fix1
= _mm_add_pd(fix1
,tx
);
1213 fiy1
= _mm_add_pd(fiy1
,ty
);
1214 fiz1
= _mm_add_pd(fiz1
,tz
);
1216 fjx1
= _mm_add_pd(fjx1
,tx
);
1217 fjy1
= _mm_add_pd(fjy1
,ty
);
1218 fjz1
= _mm_add_pd(fjz1
,tz
);
1220 /**************************
1221 * CALCULATE INTERACTIONS *
1222 **************************/
1224 /* REACTION-FIELD ELECTROSTATICS */
1225 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1229 /* Calculate temporary vectorial force */
1230 tx
= _mm_mul_pd(fscal
,dx12
);
1231 ty
= _mm_mul_pd(fscal
,dy12
);
1232 tz
= _mm_mul_pd(fscal
,dz12
);
1234 /* Update vectorial force */
1235 fix1
= _mm_add_pd(fix1
,tx
);
1236 fiy1
= _mm_add_pd(fiy1
,ty
);
1237 fiz1
= _mm_add_pd(fiz1
,tz
);
1239 fjx2
= _mm_add_pd(fjx2
,tx
);
1240 fjy2
= _mm_add_pd(fjy2
,ty
);
1241 fjz2
= _mm_add_pd(fjz2
,tz
);
1243 /**************************
1244 * CALCULATE INTERACTIONS *
1245 **************************/
1247 /* REACTION-FIELD ELECTROSTATICS */
1248 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
1252 /* Calculate temporary vectorial force */
1253 tx
= _mm_mul_pd(fscal
,dx20
);
1254 ty
= _mm_mul_pd(fscal
,dy20
);
1255 tz
= _mm_mul_pd(fscal
,dz20
);
1257 /* Update vectorial force */
1258 fix2
= _mm_add_pd(fix2
,tx
);
1259 fiy2
= _mm_add_pd(fiy2
,ty
);
1260 fiz2
= _mm_add_pd(fiz2
,tz
);
1262 fjx0
= _mm_add_pd(fjx0
,tx
);
1263 fjy0
= _mm_add_pd(fjy0
,ty
);
1264 fjz0
= _mm_add_pd(fjz0
,tz
);
1266 /**************************
1267 * CALCULATE INTERACTIONS *
1268 **************************/
1270 /* REACTION-FIELD ELECTROSTATICS */
1271 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1275 /* Calculate temporary vectorial force */
1276 tx
= _mm_mul_pd(fscal
,dx21
);
1277 ty
= _mm_mul_pd(fscal
,dy21
);
1278 tz
= _mm_mul_pd(fscal
,dz21
);
1280 /* Update vectorial force */
1281 fix2
= _mm_add_pd(fix2
,tx
);
1282 fiy2
= _mm_add_pd(fiy2
,ty
);
1283 fiz2
= _mm_add_pd(fiz2
,tz
);
1285 fjx1
= _mm_add_pd(fjx1
,tx
);
1286 fjy1
= _mm_add_pd(fjy1
,ty
);
1287 fjz1
= _mm_add_pd(fjz1
,tz
);
1289 /**************************
1290 * CALCULATE INTERACTIONS *
1291 **************************/
1293 /* REACTION-FIELD ELECTROSTATICS */
1294 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1298 /* Calculate temporary vectorial force */
1299 tx
= _mm_mul_pd(fscal
,dx22
);
1300 ty
= _mm_mul_pd(fscal
,dy22
);
1301 tz
= _mm_mul_pd(fscal
,dz22
);
1303 /* Update vectorial force */
1304 fix2
= _mm_add_pd(fix2
,tx
);
1305 fiy2
= _mm_add_pd(fiy2
,ty
);
1306 fiz2
= _mm_add_pd(fiz2
,tz
);
1308 fjx2
= _mm_add_pd(fjx2
,tx
);
1309 fjy2
= _mm_add_pd(fjy2
,ty
);
1310 fjz2
= _mm_add_pd(fjz2
,tz
);
1312 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1314 /* Inner loop uses 243 flops */
1317 if(jidx
<j_index_end
)
1321 j_coord_offsetA
= DIM
*jnrA
;
1323 /* load j atom coordinates */
1324 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
1325 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1327 /* Calculate displacement vector */
1328 dx00
= _mm_sub_pd(ix0
,jx0
);
1329 dy00
= _mm_sub_pd(iy0
,jy0
);
1330 dz00
= _mm_sub_pd(iz0
,jz0
);
1331 dx01
= _mm_sub_pd(ix0
,jx1
);
1332 dy01
= _mm_sub_pd(iy0
,jy1
);
1333 dz01
= _mm_sub_pd(iz0
,jz1
);
1334 dx02
= _mm_sub_pd(ix0
,jx2
);
1335 dy02
= _mm_sub_pd(iy0
,jy2
);
1336 dz02
= _mm_sub_pd(iz0
,jz2
);
1337 dx10
= _mm_sub_pd(ix1
,jx0
);
1338 dy10
= _mm_sub_pd(iy1
,jy0
);
1339 dz10
= _mm_sub_pd(iz1
,jz0
);
1340 dx11
= _mm_sub_pd(ix1
,jx1
);
1341 dy11
= _mm_sub_pd(iy1
,jy1
);
1342 dz11
= _mm_sub_pd(iz1
,jz1
);
1343 dx12
= _mm_sub_pd(ix1
,jx2
);
1344 dy12
= _mm_sub_pd(iy1
,jy2
);
1345 dz12
= _mm_sub_pd(iz1
,jz2
);
1346 dx20
= _mm_sub_pd(ix2
,jx0
);
1347 dy20
= _mm_sub_pd(iy2
,jy0
);
1348 dz20
= _mm_sub_pd(iz2
,jz0
);
1349 dx21
= _mm_sub_pd(ix2
,jx1
);
1350 dy21
= _mm_sub_pd(iy2
,jy1
);
1351 dz21
= _mm_sub_pd(iz2
,jz1
);
1352 dx22
= _mm_sub_pd(ix2
,jx2
);
1353 dy22
= _mm_sub_pd(iy2
,jy2
);
1354 dz22
= _mm_sub_pd(iz2
,jz2
);
1356 /* Calculate squared distance and things based on it */
1357 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1358 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1359 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1360 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1361 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1362 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1363 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1364 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1365 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1367 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
1368 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
1369 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
1370 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
1371 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
1372 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
1373 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
1374 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
1375 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
1377 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1378 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1379 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1380 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1381 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1382 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1383 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1384 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1385 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1387 fjx0
= _mm_setzero_pd();
1388 fjy0
= _mm_setzero_pd();
1389 fjz0
= _mm_setzero_pd();
1390 fjx1
= _mm_setzero_pd();
1391 fjy1
= _mm_setzero_pd();
1392 fjz1
= _mm_setzero_pd();
1393 fjx2
= _mm_setzero_pd();
1394 fjy2
= _mm_setzero_pd();
1395 fjz2
= _mm_setzero_pd();
1397 /**************************
1398 * CALCULATE INTERACTIONS *
1399 **************************/
1401 /* REACTION-FIELD ELECTROSTATICS */
1402 felec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_mul_pd(rinv00
,rinvsq00
),krf2
));
1406 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1408 /* Calculate temporary vectorial force */
1409 tx
= _mm_mul_pd(fscal
,dx00
);
1410 ty
= _mm_mul_pd(fscal
,dy00
);
1411 tz
= _mm_mul_pd(fscal
,dz00
);
1413 /* Update vectorial force */
1414 fix0
= _mm_add_pd(fix0
,tx
);
1415 fiy0
= _mm_add_pd(fiy0
,ty
);
1416 fiz0
= _mm_add_pd(fiz0
,tz
);
1418 fjx0
= _mm_add_pd(fjx0
,tx
);
1419 fjy0
= _mm_add_pd(fjy0
,ty
);
1420 fjz0
= _mm_add_pd(fjz0
,tz
);
1422 /**************************
1423 * CALCULATE INTERACTIONS *
1424 **************************/
1426 /* REACTION-FIELD ELECTROSTATICS */
1427 felec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_mul_pd(rinv01
,rinvsq01
),krf2
));
1431 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1433 /* Calculate temporary vectorial force */
1434 tx
= _mm_mul_pd(fscal
,dx01
);
1435 ty
= _mm_mul_pd(fscal
,dy01
);
1436 tz
= _mm_mul_pd(fscal
,dz01
);
1438 /* Update vectorial force */
1439 fix0
= _mm_add_pd(fix0
,tx
);
1440 fiy0
= _mm_add_pd(fiy0
,ty
);
1441 fiz0
= _mm_add_pd(fiz0
,tz
);
1443 fjx1
= _mm_add_pd(fjx1
,tx
);
1444 fjy1
= _mm_add_pd(fjy1
,ty
);
1445 fjz1
= _mm_add_pd(fjz1
,tz
);
1447 /**************************
1448 * CALCULATE INTERACTIONS *
1449 **************************/
1451 /* REACTION-FIELD ELECTROSTATICS */
1452 felec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_mul_pd(rinv02
,rinvsq02
),krf2
));
1456 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1458 /* Calculate temporary vectorial force */
1459 tx
= _mm_mul_pd(fscal
,dx02
);
1460 ty
= _mm_mul_pd(fscal
,dy02
);
1461 tz
= _mm_mul_pd(fscal
,dz02
);
1463 /* Update vectorial force */
1464 fix0
= _mm_add_pd(fix0
,tx
);
1465 fiy0
= _mm_add_pd(fiy0
,ty
);
1466 fiz0
= _mm_add_pd(fiz0
,tz
);
1468 fjx2
= _mm_add_pd(fjx2
,tx
);
1469 fjy2
= _mm_add_pd(fjy2
,ty
);
1470 fjz2
= _mm_add_pd(fjz2
,tz
);
1472 /**************************
1473 * CALCULATE INTERACTIONS *
1474 **************************/
1476 /* REACTION-FIELD ELECTROSTATICS */
1477 felec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_mul_pd(rinv10
,rinvsq10
),krf2
));
1481 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1483 /* Calculate temporary vectorial force */
1484 tx
= _mm_mul_pd(fscal
,dx10
);
1485 ty
= _mm_mul_pd(fscal
,dy10
);
1486 tz
= _mm_mul_pd(fscal
,dz10
);
1488 /* Update vectorial force */
1489 fix1
= _mm_add_pd(fix1
,tx
);
1490 fiy1
= _mm_add_pd(fiy1
,ty
);
1491 fiz1
= _mm_add_pd(fiz1
,tz
);
1493 fjx0
= _mm_add_pd(fjx0
,tx
);
1494 fjy0
= _mm_add_pd(fjy0
,ty
);
1495 fjz0
= _mm_add_pd(fjz0
,tz
);
1497 /**************************
1498 * CALCULATE INTERACTIONS *
1499 **************************/
1501 /* REACTION-FIELD ELECTROSTATICS */
1502 felec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_mul_pd(rinv11
,rinvsq11
),krf2
));
1506 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1508 /* Calculate temporary vectorial force */
1509 tx
= _mm_mul_pd(fscal
,dx11
);
1510 ty
= _mm_mul_pd(fscal
,dy11
);
1511 tz
= _mm_mul_pd(fscal
,dz11
);
1513 /* Update vectorial force */
1514 fix1
= _mm_add_pd(fix1
,tx
);
1515 fiy1
= _mm_add_pd(fiy1
,ty
);
1516 fiz1
= _mm_add_pd(fiz1
,tz
);
1518 fjx1
= _mm_add_pd(fjx1
,tx
);
1519 fjy1
= _mm_add_pd(fjy1
,ty
);
1520 fjz1
= _mm_add_pd(fjz1
,tz
);
1522 /**************************
1523 * CALCULATE INTERACTIONS *
1524 **************************/
1526 /* REACTION-FIELD ELECTROSTATICS */
1527 felec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_mul_pd(rinv12
,rinvsq12
),krf2
));
1531 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1533 /* Calculate temporary vectorial force */
1534 tx
= _mm_mul_pd(fscal
,dx12
);
1535 ty
= _mm_mul_pd(fscal
,dy12
);
1536 tz
= _mm_mul_pd(fscal
,dz12
);
1538 /* Update vectorial force */
1539 fix1
= _mm_add_pd(fix1
,tx
);
1540 fiy1
= _mm_add_pd(fiy1
,ty
);
1541 fiz1
= _mm_add_pd(fiz1
,tz
);
1543 fjx2
= _mm_add_pd(fjx2
,tx
);
1544 fjy2
= _mm_add_pd(fjy2
,ty
);
1545 fjz2
= _mm_add_pd(fjz2
,tz
);
1547 /**************************
1548 * CALCULATE INTERACTIONS *
1549 **************************/
1551 /* REACTION-FIELD ELECTROSTATICS */
1552 felec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_mul_pd(rinv20
,rinvsq20
),krf2
));
1556 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1558 /* Calculate temporary vectorial force */
1559 tx
= _mm_mul_pd(fscal
,dx20
);
1560 ty
= _mm_mul_pd(fscal
,dy20
);
1561 tz
= _mm_mul_pd(fscal
,dz20
);
1563 /* Update vectorial force */
1564 fix2
= _mm_add_pd(fix2
,tx
);
1565 fiy2
= _mm_add_pd(fiy2
,ty
);
1566 fiz2
= _mm_add_pd(fiz2
,tz
);
1568 fjx0
= _mm_add_pd(fjx0
,tx
);
1569 fjy0
= _mm_add_pd(fjy0
,ty
);
1570 fjz0
= _mm_add_pd(fjz0
,tz
);
1572 /**************************
1573 * CALCULATE INTERACTIONS *
1574 **************************/
1576 /* REACTION-FIELD ELECTROSTATICS */
1577 felec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_mul_pd(rinv21
,rinvsq21
),krf2
));
1581 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1583 /* Calculate temporary vectorial force */
1584 tx
= _mm_mul_pd(fscal
,dx21
);
1585 ty
= _mm_mul_pd(fscal
,dy21
);
1586 tz
= _mm_mul_pd(fscal
,dz21
);
1588 /* Update vectorial force */
1589 fix2
= _mm_add_pd(fix2
,tx
);
1590 fiy2
= _mm_add_pd(fiy2
,ty
);
1591 fiz2
= _mm_add_pd(fiz2
,tz
);
1593 fjx1
= _mm_add_pd(fjx1
,tx
);
1594 fjy1
= _mm_add_pd(fjy1
,ty
);
1595 fjz1
= _mm_add_pd(fjz1
,tz
);
1597 /**************************
1598 * CALCULATE INTERACTIONS *
1599 **************************/
1601 /* REACTION-FIELD ELECTROSTATICS */
1602 felec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_mul_pd(rinv22
,rinvsq22
),krf2
));
1606 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1608 /* Calculate temporary vectorial force */
1609 tx
= _mm_mul_pd(fscal
,dx22
);
1610 ty
= _mm_mul_pd(fscal
,dy22
);
1611 tz
= _mm_mul_pd(fscal
,dz22
);
1613 /* Update vectorial force */
1614 fix2
= _mm_add_pd(fix2
,tx
);
1615 fiy2
= _mm_add_pd(fiy2
,ty
);
1616 fiz2
= _mm_add_pd(fiz2
,tz
);
1618 fjx2
= _mm_add_pd(fjx2
,tx
);
1619 fjy2
= _mm_add_pd(fjy2
,ty
);
1620 fjz2
= _mm_add_pd(fjz2
,tz
);
1622 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1624 /* Inner loop uses 243 flops */
1627 /* End of innermost loop */
1629 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1630 f
+i_coord_offset
,fshift
+i_shift_offset
);
1632 /* Increment number of inner iterations */
1633 inneriter
+= j_index_end
- j_index_start
;
1635 /* Outer loop uses 18 flops */
1638 /* Increment number of outer iterations */
1641 /* Update outer/inner flops */
1643 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_F
,outeriter
*18 + inneriter
*243);