2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator.
42 #include "../nb_kernel.h"
43 #include "types/simple.h"
44 #include "gromacs/math/vec.h"
47 #include "gromacs/simd/math_x86_avx_128_fma_double.h"
48 #include "kernelutil_x86_avx_128_fma_double.h"
51 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double
52 * Electrostatics interaction: ReactionField
53 * VdW interaction: None
54 * Geometry: Water3-Water3
55 * Calculate force/pot: PotentialAndForce
58 nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double
59 (t_nblist
* gmx_restrict nlist
,
60 rvec
* gmx_restrict xx
,
61 rvec
* gmx_restrict ff
,
62 t_forcerec
* gmx_restrict fr
,
63 t_mdatoms
* gmx_restrict mdatoms
,
64 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
65 t_nrnb
* gmx_restrict nrnb
)
67 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
68 * just 0 for non-waters.
69 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
70 * jnr indices corresponding to data put in the four positions in the SIMD register.
72 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
73 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
75 int j_coord_offsetA
,j_coord_offsetB
;
76 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
78 real
*shiftvec
,*fshift
,*x
,*f
;
79 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
81 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
83 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
85 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
86 int vdwjidx0A
,vdwjidx0B
;
87 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
88 int vdwjidx1A
,vdwjidx1B
;
89 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
90 int vdwjidx2A
,vdwjidx2B
;
91 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
92 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
93 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
94 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
95 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
96 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
97 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
98 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
99 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
100 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
101 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
103 __m128d dummy_mask
,cutoff_mask
;
104 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
105 __m128d one
= _mm_set1_pd(1.0);
106 __m128d two
= _mm_set1_pd(2.0);
112 jindex
= nlist
->jindex
;
114 shiftidx
= nlist
->shift
;
116 shiftvec
= fr
->shift_vec
[0];
117 fshift
= fr
->fshift
[0];
118 facel
= _mm_set1_pd(fr
->epsfac
);
119 charge
= mdatoms
->chargeA
;
120 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
121 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
122 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
124 /* Setup water-specific parameters */
125 inr
= nlist
->iinr
[0];
126 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
127 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
128 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
130 jq0
= _mm_set1_pd(charge
[inr
+0]);
131 jq1
= _mm_set1_pd(charge
[inr
+1]);
132 jq2
= _mm_set1_pd(charge
[inr
+2]);
133 qq00
= _mm_mul_pd(iq0
,jq0
);
134 qq01
= _mm_mul_pd(iq0
,jq1
);
135 qq02
= _mm_mul_pd(iq0
,jq2
);
136 qq10
= _mm_mul_pd(iq1
,jq0
);
137 qq11
= _mm_mul_pd(iq1
,jq1
);
138 qq12
= _mm_mul_pd(iq1
,jq2
);
139 qq20
= _mm_mul_pd(iq2
,jq0
);
140 qq21
= _mm_mul_pd(iq2
,jq1
);
141 qq22
= _mm_mul_pd(iq2
,jq2
);
143 /* Avoid stupid compiler warnings */
151 /* Start outer loop over neighborlists */
152 for(iidx
=0; iidx
<nri
; iidx
++)
154 /* Load shift vector for this list */
155 i_shift_offset
= DIM
*shiftidx
[iidx
];
157 /* Load limits for loop over neighbors */
158 j_index_start
= jindex
[iidx
];
159 j_index_end
= jindex
[iidx
+1];
161 /* Get outer coordinate index */
163 i_coord_offset
= DIM
*inr
;
165 /* Load i particle coords and add shift vector */
166 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
167 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
169 fix0
= _mm_setzero_pd();
170 fiy0
= _mm_setzero_pd();
171 fiz0
= _mm_setzero_pd();
172 fix1
= _mm_setzero_pd();
173 fiy1
= _mm_setzero_pd();
174 fiz1
= _mm_setzero_pd();
175 fix2
= _mm_setzero_pd();
176 fiy2
= _mm_setzero_pd();
177 fiz2
= _mm_setzero_pd();
179 /* Reset potential sums */
180 velecsum
= _mm_setzero_pd();
182 /* Start inner kernel loop */
183 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
186 /* Get j neighbor index, and coordinate index */
189 j_coord_offsetA
= DIM
*jnrA
;
190 j_coord_offsetB
= DIM
*jnrB
;
192 /* load j atom coordinates */
193 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
194 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
196 /* Calculate displacement vector */
197 dx00
= _mm_sub_pd(ix0
,jx0
);
198 dy00
= _mm_sub_pd(iy0
,jy0
);
199 dz00
= _mm_sub_pd(iz0
,jz0
);
200 dx01
= _mm_sub_pd(ix0
,jx1
);
201 dy01
= _mm_sub_pd(iy0
,jy1
);
202 dz01
= _mm_sub_pd(iz0
,jz1
);
203 dx02
= _mm_sub_pd(ix0
,jx2
);
204 dy02
= _mm_sub_pd(iy0
,jy2
);
205 dz02
= _mm_sub_pd(iz0
,jz2
);
206 dx10
= _mm_sub_pd(ix1
,jx0
);
207 dy10
= _mm_sub_pd(iy1
,jy0
);
208 dz10
= _mm_sub_pd(iz1
,jz0
);
209 dx11
= _mm_sub_pd(ix1
,jx1
);
210 dy11
= _mm_sub_pd(iy1
,jy1
);
211 dz11
= _mm_sub_pd(iz1
,jz1
);
212 dx12
= _mm_sub_pd(ix1
,jx2
);
213 dy12
= _mm_sub_pd(iy1
,jy2
);
214 dz12
= _mm_sub_pd(iz1
,jz2
);
215 dx20
= _mm_sub_pd(ix2
,jx0
);
216 dy20
= _mm_sub_pd(iy2
,jy0
);
217 dz20
= _mm_sub_pd(iz2
,jz0
);
218 dx21
= _mm_sub_pd(ix2
,jx1
);
219 dy21
= _mm_sub_pd(iy2
,jy1
);
220 dz21
= _mm_sub_pd(iz2
,jz1
);
221 dx22
= _mm_sub_pd(ix2
,jx2
);
222 dy22
= _mm_sub_pd(iy2
,jy2
);
223 dz22
= _mm_sub_pd(iz2
,jz2
);
225 /* Calculate squared distance and things based on it */
226 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
227 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
228 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
229 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
230 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
231 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
232 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
233 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
234 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
236 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
237 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
238 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
239 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
240 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
241 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
242 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
243 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
244 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
246 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
247 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
248 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
249 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
250 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
251 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
252 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
253 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
254 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
256 fjx0
= _mm_setzero_pd();
257 fjy0
= _mm_setzero_pd();
258 fjz0
= _mm_setzero_pd();
259 fjx1
= _mm_setzero_pd();
260 fjy1
= _mm_setzero_pd();
261 fjz1
= _mm_setzero_pd();
262 fjx2
= _mm_setzero_pd();
263 fjy2
= _mm_setzero_pd();
264 fjz2
= _mm_setzero_pd();
266 /**************************
267 * CALCULATE INTERACTIONS *
268 **************************/
270 /* REACTION-FIELD ELECTROSTATICS */
271 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_macc_pd(krf
,rsq00
,rinv00
),crf
));
272 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
274 /* Update potential sum for this i atom from the interaction with this j atom. */
275 velecsum
= _mm_add_pd(velecsum
,velec
);
279 /* Update vectorial force */
280 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
281 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
282 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
284 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
285 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
286 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
288 /**************************
289 * CALCULATE INTERACTIONS *
290 **************************/
292 /* REACTION-FIELD ELECTROSTATICS */
293 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_macc_pd(krf
,rsq01
,rinv01
),crf
));
294 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
296 /* Update potential sum for this i atom from the interaction with this j atom. */
297 velecsum
= _mm_add_pd(velecsum
,velec
);
301 /* Update vectorial force */
302 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
303 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
304 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
306 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
307 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
308 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
310 /**************************
311 * CALCULATE INTERACTIONS *
312 **************************/
314 /* REACTION-FIELD ELECTROSTATICS */
315 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_macc_pd(krf
,rsq02
,rinv02
),crf
));
316 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
318 /* Update potential sum for this i atom from the interaction with this j atom. */
319 velecsum
= _mm_add_pd(velecsum
,velec
);
323 /* Update vectorial force */
324 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
325 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
326 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
328 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
329 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
330 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
332 /**************************
333 * CALCULATE INTERACTIONS *
334 **************************/
336 /* REACTION-FIELD ELECTROSTATICS */
337 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_macc_pd(krf
,rsq10
,rinv10
),crf
));
338 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
340 /* Update potential sum for this i atom from the interaction with this j atom. */
341 velecsum
= _mm_add_pd(velecsum
,velec
);
345 /* Update vectorial force */
346 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
347 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
348 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
350 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
351 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
352 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
354 /**************************
355 * CALCULATE INTERACTIONS *
356 **************************/
358 /* REACTION-FIELD ELECTROSTATICS */
359 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_macc_pd(krf
,rsq11
,rinv11
),crf
));
360 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
362 /* Update potential sum for this i atom from the interaction with this j atom. */
363 velecsum
= _mm_add_pd(velecsum
,velec
);
367 /* Update vectorial force */
368 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
369 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
370 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
372 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
373 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
374 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
376 /**************************
377 * CALCULATE INTERACTIONS *
378 **************************/
380 /* REACTION-FIELD ELECTROSTATICS */
381 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_macc_pd(krf
,rsq12
,rinv12
),crf
));
382 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
384 /* Update potential sum for this i atom from the interaction with this j atom. */
385 velecsum
= _mm_add_pd(velecsum
,velec
);
389 /* Update vectorial force */
390 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
391 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
392 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
394 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
395 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
396 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
398 /**************************
399 * CALCULATE INTERACTIONS *
400 **************************/
402 /* REACTION-FIELD ELECTROSTATICS */
403 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_macc_pd(krf
,rsq20
,rinv20
),crf
));
404 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
406 /* Update potential sum for this i atom from the interaction with this j atom. */
407 velecsum
= _mm_add_pd(velecsum
,velec
);
411 /* Update vectorial force */
412 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
413 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
414 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
416 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
417 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
418 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
420 /**************************
421 * CALCULATE INTERACTIONS *
422 **************************/
424 /* REACTION-FIELD ELECTROSTATICS */
425 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_macc_pd(krf
,rsq21
,rinv21
),crf
));
426 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
428 /* Update potential sum for this i atom from the interaction with this j atom. */
429 velecsum
= _mm_add_pd(velecsum
,velec
);
433 /* Update vectorial force */
434 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
435 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
436 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
438 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
439 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
440 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
442 /**************************
443 * CALCULATE INTERACTIONS *
444 **************************/
446 /* REACTION-FIELD ELECTROSTATICS */
447 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_macc_pd(krf
,rsq22
,rinv22
),crf
));
448 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
450 /* Update potential sum for this i atom from the interaction with this j atom. */
451 velecsum
= _mm_add_pd(velecsum
,velec
);
455 /* Update vectorial force */
456 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
457 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
458 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
460 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
461 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
462 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
464 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
466 /* Inner loop uses 315 flops */
473 j_coord_offsetA
= DIM
*jnrA
;
475 /* load j atom coordinates */
476 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
477 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
479 /* Calculate displacement vector */
480 dx00
= _mm_sub_pd(ix0
,jx0
);
481 dy00
= _mm_sub_pd(iy0
,jy0
);
482 dz00
= _mm_sub_pd(iz0
,jz0
);
483 dx01
= _mm_sub_pd(ix0
,jx1
);
484 dy01
= _mm_sub_pd(iy0
,jy1
);
485 dz01
= _mm_sub_pd(iz0
,jz1
);
486 dx02
= _mm_sub_pd(ix0
,jx2
);
487 dy02
= _mm_sub_pd(iy0
,jy2
);
488 dz02
= _mm_sub_pd(iz0
,jz2
);
489 dx10
= _mm_sub_pd(ix1
,jx0
);
490 dy10
= _mm_sub_pd(iy1
,jy0
);
491 dz10
= _mm_sub_pd(iz1
,jz0
);
492 dx11
= _mm_sub_pd(ix1
,jx1
);
493 dy11
= _mm_sub_pd(iy1
,jy1
);
494 dz11
= _mm_sub_pd(iz1
,jz1
);
495 dx12
= _mm_sub_pd(ix1
,jx2
);
496 dy12
= _mm_sub_pd(iy1
,jy2
);
497 dz12
= _mm_sub_pd(iz1
,jz2
);
498 dx20
= _mm_sub_pd(ix2
,jx0
);
499 dy20
= _mm_sub_pd(iy2
,jy0
);
500 dz20
= _mm_sub_pd(iz2
,jz0
);
501 dx21
= _mm_sub_pd(ix2
,jx1
);
502 dy21
= _mm_sub_pd(iy2
,jy1
);
503 dz21
= _mm_sub_pd(iz2
,jz1
);
504 dx22
= _mm_sub_pd(ix2
,jx2
);
505 dy22
= _mm_sub_pd(iy2
,jy2
);
506 dz22
= _mm_sub_pd(iz2
,jz2
);
508 /* Calculate squared distance and things based on it */
509 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
510 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
511 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
512 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
513 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
514 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
515 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
516 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
517 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
519 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
520 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
521 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
522 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
523 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
524 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
525 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
526 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
527 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
529 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
530 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
531 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
532 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
533 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
534 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
535 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
536 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
537 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
539 fjx0
= _mm_setzero_pd();
540 fjy0
= _mm_setzero_pd();
541 fjz0
= _mm_setzero_pd();
542 fjx1
= _mm_setzero_pd();
543 fjy1
= _mm_setzero_pd();
544 fjz1
= _mm_setzero_pd();
545 fjx2
= _mm_setzero_pd();
546 fjy2
= _mm_setzero_pd();
547 fjz2
= _mm_setzero_pd();
549 /**************************
550 * CALCULATE INTERACTIONS *
551 **************************/
553 /* REACTION-FIELD ELECTROSTATICS */
554 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_macc_pd(krf
,rsq00
,rinv00
),crf
));
555 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
557 /* Update potential sum for this i atom from the interaction with this j atom. */
558 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
559 velecsum
= _mm_add_pd(velecsum
,velec
);
563 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
565 /* Update vectorial force */
566 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
567 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
568 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
570 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
571 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
572 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
574 /**************************
575 * CALCULATE INTERACTIONS *
576 **************************/
578 /* REACTION-FIELD ELECTROSTATICS */
579 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_macc_pd(krf
,rsq01
,rinv01
),crf
));
580 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
582 /* Update potential sum for this i atom from the interaction with this j atom. */
583 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
584 velecsum
= _mm_add_pd(velecsum
,velec
);
588 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
590 /* Update vectorial force */
591 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
592 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
593 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
595 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
596 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
597 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
599 /**************************
600 * CALCULATE INTERACTIONS *
601 **************************/
603 /* REACTION-FIELD ELECTROSTATICS */
604 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_macc_pd(krf
,rsq02
,rinv02
),crf
));
605 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
607 /* Update potential sum for this i atom from the interaction with this j atom. */
608 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
609 velecsum
= _mm_add_pd(velecsum
,velec
);
613 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
615 /* Update vectorial force */
616 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
617 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
618 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
620 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
621 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
622 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
624 /**************************
625 * CALCULATE INTERACTIONS *
626 **************************/
628 /* REACTION-FIELD ELECTROSTATICS */
629 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_macc_pd(krf
,rsq10
,rinv10
),crf
));
630 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
632 /* Update potential sum for this i atom from the interaction with this j atom. */
633 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
634 velecsum
= _mm_add_pd(velecsum
,velec
);
638 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
640 /* Update vectorial force */
641 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
642 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
643 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
645 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
646 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
647 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
649 /**************************
650 * CALCULATE INTERACTIONS *
651 **************************/
653 /* REACTION-FIELD ELECTROSTATICS */
654 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_macc_pd(krf
,rsq11
,rinv11
),crf
));
655 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
657 /* Update potential sum for this i atom from the interaction with this j atom. */
658 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
659 velecsum
= _mm_add_pd(velecsum
,velec
);
663 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
665 /* Update vectorial force */
666 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
667 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
668 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
670 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
671 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
672 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
674 /**************************
675 * CALCULATE INTERACTIONS *
676 **************************/
678 /* REACTION-FIELD ELECTROSTATICS */
679 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_macc_pd(krf
,rsq12
,rinv12
),crf
));
680 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
682 /* Update potential sum for this i atom from the interaction with this j atom. */
683 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
684 velecsum
= _mm_add_pd(velecsum
,velec
);
688 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
690 /* Update vectorial force */
691 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
692 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
693 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
695 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
696 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
697 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
699 /**************************
700 * CALCULATE INTERACTIONS *
701 **************************/
703 /* REACTION-FIELD ELECTROSTATICS */
704 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_macc_pd(krf
,rsq20
,rinv20
),crf
));
705 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
707 /* Update potential sum for this i atom from the interaction with this j atom. */
708 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
709 velecsum
= _mm_add_pd(velecsum
,velec
);
713 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
715 /* Update vectorial force */
716 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
717 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
718 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
720 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
721 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
722 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
724 /**************************
725 * CALCULATE INTERACTIONS *
726 **************************/
728 /* REACTION-FIELD ELECTROSTATICS */
729 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_macc_pd(krf
,rsq21
,rinv21
),crf
));
730 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
732 /* Update potential sum for this i atom from the interaction with this j atom. */
733 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
734 velecsum
= _mm_add_pd(velecsum
,velec
);
738 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
740 /* Update vectorial force */
741 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
742 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
743 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
745 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
746 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
747 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
749 /**************************
750 * CALCULATE INTERACTIONS *
751 **************************/
753 /* REACTION-FIELD ELECTROSTATICS */
754 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_macc_pd(krf
,rsq22
,rinv22
),crf
));
755 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
757 /* Update potential sum for this i atom from the interaction with this j atom. */
758 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
759 velecsum
= _mm_add_pd(velecsum
,velec
);
763 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
765 /* Update vectorial force */
766 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
767 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
768 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
770 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
771 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
772 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
774 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
776 /* Inner loop uses 315 flops */
779 /* End of innermost loop */
781 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
782 f
+i_coord_offset
,fshift
+i_shift_offset
);
785 /* Update potential energies */
786 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
788 /* Increment number of inner iterations */
789 inneriter
+= j_index_end
- j_index_start
;
791 /* Outer loop uses 19 flops */
794 /* Increment number of outer iterations */
797 /* Update outer/inner flops */
799 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_VF
,outeriter
*19 + inneriter
*315);
802 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double
803 * Electrostatics interaction: ReactionField
804 * VdW interaction: None
805 * Geometry: Water3-Water3
806 * Calculate force/pot: Force
809 nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double
810 (t_nblist
* gmx_restrict nlist
,
811 rvec
* gmx_restrict xx
,
812 rvec
* gmx_restrict ff
,
813 t_forcerec
* gmx_restrict fr
,
814 t_mdatoms
* gmx_restrict mdatoms
,
815 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
816 t_nrnb
* gmx_restrict nrnb
)
818 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
819 * just 0 for non-waters.
820 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
821 * jnr indices corresponding to data put in the four positions in the SIMD register.
823 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
824 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
826 int j_coord_offsetA
,j_coord_offsetB
;
827 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
829 real
*shiftvec
,*fshift
,*x
,*f
;
830 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
832 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
834 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
836 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
837 int vdwjidx0A
,vdwjidx0B
;
838 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
839 int vdwjidx1A
,vdwjidx1B
;
840 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
841 int vdwjidx2A
,vdwjidx2B
;
842 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
843 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
844 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
845 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
846 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
847 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
848 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
849 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
850 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
851 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
852 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
854 __m128d dummy_mask
,cutoff_mask
;
855 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
856 __m128d one
= _mm_set1_pd(1.0);
857 __m128d two
= _mm_set1_pd(2.0);
863 jindex
= nlist
->jindex
;
865 shiftidx
= nlist
->shift
;
867 shiftvec
= fr
->shift_vec
[0];
868 fshift
= fr
->fshift
[0];
869 facel
= _mm_set1_pd(fr
->epsfac
);
870 charge
= mdatoms
->chargeA
;
871 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
872 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
873 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
875 /* Setup water-specific parameters */
876 inr
= nlist
->iinr
[0];
877 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
878 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
879 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
881 jq0
= _mm_set1_pd(charge
[inr
+0]);
882 jq1
= _mm_set1_pd(charge
[inr
+1]);
883 jq2
= _mm_set1_pd(charge
[inr
+2]);
884 qq00
= _mm_mul_pd(iq0
,jq0
);
885 qq01
= _mm_mul_pd(iq0
,jq1
);
886 qq02
= _mm_mul_pd(iq0
,jq2
);
887 qq10
= _mm_mul_pd(iq1
,jq0
);
888 qq11
= _mm_mul_pd(iq1
,jq1
);
889 qq12
= _mm_mul_pd(iq1
,jq2
);
890 qq20
= _mm_mul_pd(iq2
,jq0
);
891 qq21
= _mm_mul_pd(iq2
,jq1
);
892 qq22
= _mm_mul_pd(iq2
,jq2
);
894 /* Avoid stupid compiler warnings */
902 /* Start outer loop over neighborlists */
903 for(iidx
=0; iidx
<nri
; iidx
++)
905 /* Load shift vector for this list */
906 i_shift_offset
= DIM
*shiftidx
[iidx
];
908 /* Load limits for loop over neighbors */
909 j_index_start
= jindex
[iidx
];
910 j_index_end
= jindex
[iidx
+1];
912 /* Get outer coordinate index */
914 i_coord_offset
= DIM
*inr
;
916 /* Load i particle coords and add shift vector */
917 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
918 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
920 fix0
= _mm_setzero_pd();
921 fiy0
= _mm_setzero_pd();
922 fiz0
= _mm_setzero_pd();
923 fix1
= _mm_setzero_pd();
924 fiy1
= _mm_setzero_pd();
925 fiz1
= _mm_setzero_pd();
926 fix2
= _mm_setzero_pd();
927 fiy2
= _mm_setzero_pd();
928 fiz2
= _mm_setzero_pd();
930 /* Start inner kernel loop */
931 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
934 /* Get j neighbor index, and coordinate index */
937 j_coord_offsetA
= DIM
*jnrA
;
938 j_coord_offsetB
= DIM
*jnrB
;
940 /* load j atom coordinates */
941 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
942 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
944 /* Calculate displacement vector */
945 dx00
= _mm_sub_pd(ix0
,jx0
);
946 dy00
= _mm_sub_pd(iy0
,jy0
);
947 dz00
= _mm_sub_pd(iz0
,jz0
);
948 dx01
= _mm_sub_pd(ix0
,jx1
);
949 dy01
= _mm_sub_pd(iy0
,jy1
);
950 dz01
= _mm_sub_pd(iz0
,jz1
);
951 dx02
= _mm_sub_pd(ix0
,jx2
);
952 dy02
= _mm_sub_pd(iy0
,jy2
);
953 dz02
= _mm_sub_pd(iz0
,jz2
);
954 dx10
= _mm_sub_pd(ix1
,jx0
);
955 dy10
= _mm_sub_pd(iy1
,jy0
);
956 dz10
= _mm_sub_pd(iz1
,jz0
);
957 dx11
= _mm_sub_pd(ix1
,jx1
);
958 dy11
= _mm_sub_pd(iy1
,jy1
);
959 dz11
= _mm_sub_pd(iz1
,jz1
);
960 dx12
= _mm_sub_pd(ix1
,jx2
);
961 dy12
= _mm_sub_pd(iy1
,jy2
);
962 dz12
= _mm_sub_pd(iz1
,jz2
);
963 dx20
= _mm_sub_pd(ix2
,jx0
);
964 dy20
= _mm_sub_pd(iy2
,jy0
);
965 dz20
= _mm_sub_pd(iz2
,jz0
);
966 dx21
= _mm_sub_pd(ix2
,jx1
);
967 dy21
= _mm_sub_pd(iy2
,jy1
);
968 dz21
= _mm_sub_pd(iz2
,jz1
);
969 dx22
= _mm_sub_pd(ix2
,jx2
);
970 dy22
= _mm_sub_pd(iy2
,jy2
);
971 dz22
= _mm_sub_pd(iz2
,jz2
);
973 /* Calculate squared distance and things based on it */
974 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
975 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
976 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
977 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
978 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
979 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
980 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
981 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
982 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
984 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
985 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
986 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
987 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
988 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
989 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
990 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
991 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
992 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
994 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
995 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
996 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
997 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
998 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
999 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1000 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1001 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1002 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1004 fjx0
= _mm_setzero_pd();
1005 fjy0
= _mm_setzero_pd();
1006 fjz0
= _mm_setzero_pd();
1007 fjx1
= _mm_setzero_pd();
1008 fjy1
= _mm_setzero_pd();
1009 fjz1
= _mm_setzero_pd();
1010 fjx2
= _mm_setzero_pd();
1011 fjy2
= _mm_setzero_pd();
1012 fjz2
= _mm_setzero_pd();
1014 /**************************
1015 * CALCULATE INTERACTIONS *
1016 **************************/
1018 /* REACTION-FIELD ELECTROSTATICS */
1019 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
1023 /* Update vectorial force */
1024 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
1025 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
1026 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
1028 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
1029 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
1030 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
1032 /**************************
1033 * CALCULATE INTERACTIONS *
1034 **************************/
1036 /* REACTION-FIELD ELECTROSTATICS */
1037 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
1041 /* Update vectorial force */
1042 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
1043 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
1044 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
1046 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
1047 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
1048 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
1050 /**************************
1051 * CALCULATE INTERACTIONS *
1052 **************************/
1054 /* REACTION-FIELD ELECTROSTATICS */
1055 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
1059 /* Update vectorial force */
1060 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
1061 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
1062 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
1064 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
1065 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
1066 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
1068 /**************************
1069 * CALCULATE INTERACTIONS *
1070 **************************/
1072 /* REACTION-FIELD ELECTROSTATICS */
1073 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
1077 /* Update vectorial force */
1078 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
1079 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
1080 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
1082 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
1083 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
1084 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
1086 /**************************
1087 * CALCULATE INTERACTIONS *
1088 **************************/
1090 /* REACTION-FIELD ELECTROSTATICS */
1091 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
1095 /* Update vectorial force */
1096 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
1097 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
1098 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
1100 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
1101 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
1102 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
1104 /**************************
1105 * CALCULATE INTERACTIONS *
1106 **************************/
1108 /* REACTION-FIELD ELECTROSTATICS */
1109 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
1113 /* Update vectorial force */
1114 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
1115 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
1116 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
1118 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
1119 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
1120 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
1122 /**************************
1123 * CALCULATE INTERACTIONS *
1124 **************************/
1126 /* REACTION-FIELD ELECTROSTATICS */
1127 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
1131 /* Update vectorial force */
1132 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
1133 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
1134 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
1136 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
1137 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
1138 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
1140 /**************************
1141 * CALCULATE INTERACTIONS *
1142 **************************/
1144 /* REACTION-FIELD ELECTROSTATICS */
1145 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
1149 /* Update vectorial force */
1150 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
1151 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
1152 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
1154 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
1155 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
1156 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
1158 /**************************
1159 * CALCULATE INTERACTIONS *
1160 **************************/
1162 /* REACTION-FIELD ELECTROSTATICS */
1163 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
1167 /* Update vectorial force */
1168 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
1169 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
1170 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
1172 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
1173 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
1174 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
1176 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1178 /* Inner loop uses 270 flops */
1181 if(jidx
<j_index_end
)
1185 j_coord_offsetA
= DIM
*jnrA
;
1187 /* load j atom coordinates */
1188 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
1189 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1191 /* Calculate displacement vector */
1192 dx00
= _mm_sub_pd(ix0
,jx0
);
1193 dy00
= _mm_sub_pd(iy0
,jy0
);
1194 dz00
= _mm_sub_pd(iz0
,jz0
);
1195 dx01
= _mm_sub_pd(ix0
,jx1
);
1196 dy01
= _mm_sub_pd(iy0
,jy1
);
1197 dz01
= _mm_sub_pd(iz0
,jz1
);
1198 dx02
= _mm_sub_pd(ix0
,jx2
);
1199 dy02
= _mm_sub_pd(iy0
,jy2
);
1200 dz02
= _mm_sub_pd(iz0
,jz2
);
1201 dx10
= _mm_sub_pd(ix1
,jx0
);
1202 dy10
= _mm_sub_pd(iy1
,jy0
);
1203 dz10
= _mm_sub_pd(iz1
,jz0
);
1204 dx11
= _mm_sub_pd(ix1
,jx1
);
1205 dy11
= _mm_sub_pd(iy1
,jy1
);
1206 dz11
= _mm_sub_pd(iz1
,jz1
);
1207 dx12
= _mm_sub_pd(ix1
,jx2
);
1208 dy12
= _mm_sub_pd(iy1
,jy2
);
1209 dz12
= _mm_sub_pd(iz1
,jz2
);
1210 dx20
= _mm_sub_pd(ix2
,jx0
);
1211 dy20
= _mm_sub_pd(iy2
,jy0
);
1212 dz20
= _mm_sub_pd(iz2
,jz0
);
1213 dx21
= _mm_sub_pd(ix2
,jx1
);
1214 dy21
= _mm_sub_pd(iy2
,jy1
);
1215 dz21
= _mm_sub_pd(iz2
,jz1
);
1216 dx22
= _mm_sub_pd(ix2
,jx2
);
1217 dy22
= _mm_sub_pd(iy2
,jy2
);
1218 dz22
= _mm_sub_pd(iz2
,jz2
);
1220 /* Calculate squared distance and things based on it */
1221 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1222 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1223 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1224 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1225 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1226 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1227 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1228 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1229 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1231 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
1232 rinv01
= gmx_mm_invsqrt_pd(rsq01
);
1233 rinv02
= gmx_mm_invsqrt_pd(rsq02
);
1234 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
1235 rinv11
= gmx_mm_invsqrt_pd(rsq11
);
1236 rinv12
= gmx_mm_invsqrt_pd(rsq12
);
1237 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
1238 rinv21
= gmx_mm_invsqrt_pd(rsq21
);
1239 rinv22
= gmx_mm_invsqrt_pd(rsq22
);
1241 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1242 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1243 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1244 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1245 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1246 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1247 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1248 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1249 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1251 fjx0
= _mm_setzero_pd();
1252 fjy0
= _mm_setzero_pd();
1253 fjz0
= _mm_setzero_pd();
1254 fjx1
= _mm_setzero_pd();
1255 fjy1
= _mm_setzero_pd();
1256 fjz1
= _mm_setzero_pd();
1257 fjx2
= _mm_setzero_pd();
1258 fjy2
= _mm_setzero_pd();
1259 fjz2
= _mm_setzero_pd();
1261 /**************************
1262 * CALCULATE INTERACTIONS *
1263 **************************/
1265 /* REACTION-FIELD ELECTROSTATICS */
1266 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
1270 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1272 /* Update vectorial force */
1273 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
1274 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
1275 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
1277 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
1278 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
1279 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
1281 /**************************
1282 * CALCULATE INTERACTIONS *
1283 **************************/
1285 /* REACTION-FIELD ELECTROSTATICS */
1286 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
1290 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1292 /* Update vectorial force */
1293 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
1294 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
1295 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
1297 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
1298 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
1299 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
1301 /**************************
1302 * CALCULATE INTERACTIONS *
1303 **************************/
1305 /* REACTION-FIELD ELECTROSTATICS */
1306 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
1310 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1312 /* Update vectorial force */
1313 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
1314 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
1315 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
1317 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
1318 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
1319 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
1321 /**************************
1322 * CALCULATE INTERACTIONS *
1323 **************************/
1325 /* REACTION-FIELD ELECTROSTATICS */
1326 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
1330 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1332 /* Update vectorial force */
1333 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
1334 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
1335 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
1337 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
1338 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
1339 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
1341 /**************************
1342 * CALCULATE INTERACTIONS *
1343 **************************/
1345 /* REACTION-FIELD ELECTROSTATICS */
1346 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
1350 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1352 /* Update vectorial force */
1353 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
1354 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
1355 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
1357 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
1358 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
1359 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
1361 /**************************
1362 * CALCULATE INTERACTIONS *
1363 **************************/
1365 /* REACTION-FIELD ELECTROSTATICS */
1366 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
1370 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1372 /* Update vectorial force */
1373 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
1374 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
1375 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
1377 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
1378 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
1379 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
1381 /**************************
1382 * CALCULATE INTERACTIONS *
1383 **************************/
1385 /* REACTION-FIELD ELECTROSTATICS */
1386 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
1390 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1392 /* Update vectorial force */
1393 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
1394 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
1395 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
1397 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
1398 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
1399 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
1401 /**************************
1402 * CALCULATE INTERACTIONS *
1403 **************************/
1405 /* REACTION-FIELD ELECTROSTATICS */
1406 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
1410 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1412 /* Update vectorial force */
1413 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
1414 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
1415 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
1417 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
1418 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
1419 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
1421 /**************************
1422 * CALCULATE INTERACTIONS *
1423 **************************/
1425 /* REACTION-FIELD ELECTROSTATICS */
1426 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
1430 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1432 /* Update vectorial force */
1433 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
1434 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
1435 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
1437 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
1438 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
1439 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
1441 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1443 /* Inner loop uses 270 flops */
1446 /* End of innermost loop */
1448 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1449 f
+i_coord_offset
,fshift
+i_shift_offset
);
1451 /* Increment number of inner iterations */
1452 inneriter
+= j_index_end
- j_index_start
;
1454 /* Outer loop uses 18 flops */
1457 /* Increment number of outer iterations */
1460 /* Update outer/inner flops */
1462 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_F
,outeriter
*18 + inneriter
*270);