2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the GROMACS avx_128_fma_double kernel generator.
44 #include "../nb_kernel.h"
45 #include "gromacs/gmxlib/nrnb.h"
47 #include "kernelutil_x86_avx_128_fma_double.h"
50 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double
51 * Electrostatics interaction: ReactionField
52 * VdW interaction: None
53 * Geometry: Water3-Water3
54 * Calculate force/pot: PotentialAndForce
57 nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_avx_128_fma_double
58 (t_nblist
* gmx_restrict nlist
,
59 rvec
* gmx_restrict xx
,
60 rvec
* gmx_restrict ff
,
61 struct t_forcerec
* gmx_restrict fr
,
62 t_mdatoms
* gmx_restrict mdatoms
,
63 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
64 t_nrnb
* gmx_restrict nrnb
)
66 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
67 * just 0 for non-waters.
68 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
69 * jnr indices corresponding to data put in the four positions in the SIMD register.
71 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
72 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
74 int j_coord_offsetA
,j_coord_offsetB
;
75 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
77 real
*shiftvec
,*fshift
,*x
,*f
;
78 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
80 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
82 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
84 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
85 int vdwjidx0A
,vdwjidx0B
;
86 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
87 int vdwjidx1A
,vdwjidx1B
;
88 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
89 int vdwjidx2A
,vdwjidx2B
;
90 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
91 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
92 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
93 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
94 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
95 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
96 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
97 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
98 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
99 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
100 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
102 __m128d dummy_mask
,cutoff_mask
;
103 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
104 __m128d one
= _mm_set1_pd(1.0);
105 __m128d two
= _mm_set1_pd(2.0);
111 jindex
= nlist
->jindex
;
113 shiftidx
= nlist
->shift
;
115 shiftvec
= fr
->shift_vec
[0];
116 fshift
= fr
->fshift
[0];
117 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
118 charge
= mdatoms
->chargeA
;
119 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
120 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
121 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
123 /* Setup water-specific parameters */
124 inr
= nlist
->iinr
[0];
125 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
126 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
127 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
129 jq0
= _mm_set1_pd(charge
[inr
+0]);
130 jq1
= _mm_set1_pd(charge
[inr
+1]);
131 jq2
= _mm_set1_pd(charge
[inr
+2]);
132 qq00
= _mm_mul_pd(iq0
,jq0
);
133 qq01
= _mm_mul_pd(iq0
,jq1
);
134 qq02
= _mm_mul_pd(iq0
,jq2
);
135 qq10
= _mm_mul_pd(iq1
,jq0
);
136 qq11
= _mm_mul_pd(iq1
,jq1
);
137 qq12
= _mm_mul_pd(iq1
,jq2
);
138 qq20
= _mm_mul_pd(iq2
,jq0
);
139 qq21
= _mm_mul_pd(iq2
,jq1
);
140 qq22
= _mm_mul_pd(iq2
,jq2
);
142 /* Avoid stupid compiler warnings */
150 /* Start outer loop over neighborlists */
151 for(iidx
=0; iidx
<nri
; iidx
++)
153 /* Load shift vector for this list */
154 i_shift_offset
= DIM
*shiftidx
[iidx
];
156 /* Load limits for loop over neighbors */
157 j_index_start
= jindex
[iidx
];
158 j_index_end
= jindex
[iidx
+1];
160 /* Get outer coordinate index */
162 i_coord_offset
= DIM
*inr
;
164 /* Load i particle coords and add shift vector */
165 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
166 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
168 fix0
= _mm_setzero_pd();
169 fiy0
= _mm_setzero_pd();
170 fiz0
= _mm_setzero_pd();
171 fix1
= _mm_setzero_pd();
172 fiy1
= _mm_setzero_pd();
173 fiz1
= _mm_setzero_pd();
174 fix2
= _mm_setzero_pd();
175 fiy2
= _mm_setzero_pd();
176 fiz2
= _mm_setzero_pd();
178 /* Reset potential sums */
179 velecsum
= _mm_setzero_pd();
181 /* Start inner kernel loop */
182 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
185 /* Get j neighbor index, and coordinate index */
188 j_coord_offsetA
= DIM
*jnrA
;
189 j_coord_offsetB
= DIM
*jnrB
;
191 /* load j atom coordinates */
192 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
193 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
195 /* Calculate displacement vector */
196 dx00
= _mm_sub_pd(ix0
,jx0
);
197 dy00
= _mm_sub_pd(iy0
,jy0
);
198 dz00
= _mm_sub_pd(iz0
,jz0
);
199 dx01
= _mm_sub_pd(ix0
,jx1
);
200 dy01
= _mm_sub_pd(iy0
,jy1
);
201 dz01
= _mm_sub_pd(iz0
,jz1
);
202 dx02
= _mm_sub_pd(ix0
,jx2
);
203 dy02
= _mm_sub_pd(iy0
,jy2
);
204 dz02
= _mm_sub_pd(iz0
,jz2
);
205 dx10
= _mm_sub_pd(ix1
,jx0
);
206 dy10
= _mm_sub_pd(iy1
,jy0
);
207 dz10
= _mm_sub_pd(iz1
,jz0
);
208 dx11
= _mm_sub_pd(ix1
,jx1
);
209 dy11
= _mm_sub_pd(iy1
,jy1
);
210 dz11
= _mm_sub_pd(iz1
,jz1
);
211 dx12
= _mm_sub_pd(ix1
,jx2
);
212 dy12
= _mm_sub_pd(iy1
,jy2
);
213 dz12
= _mm_sub_pd(iz1
,jz2
);
214 dx20
= _mm_sub_pd(ix2
,jx0
);
215 dy20
= _mm_sub_pd(iy2
,jy0
);
216 dz20
= _mm_sub_pd(iz2
,jz0
);
217 dx21
= _mm_sub_pd(ix2
,jx1
);
218 dy21
= _mm_sub_pd(iy2
,jy1
);
219 dz21
= _mm_sub_pd(iz2
,jz1
);
220 dx22
= _mm_sub_pd(ix2
,jx2
);
221 dy22
= _mm_sub_pd(iy2
,jy2
);
222 dz22
= _mm_sub_pd(iz2
,jz2
);
224 /* Calculate squared distance and things based on it */
225 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
226 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
227 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
228 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
229 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
230 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
231 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
232 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
233 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
235 rinv00
= avx128fma_invsqrt_d(rsq00
);
236 rinv01
= avx128fma_invsqrt_d(rsq01
);
237 rinv02
= avx128fma_invsqrt_d(rsq02
);
238 rinv10
= avx128fma_invsqrt_d(rsq10
);
239 rinv11
= avx128fma_invsqrt_d(rsq11
);
240 rinv12
= avx128fma_invsqrt_d(rsq12
);
241 rinv20
= avx128fma_invsqrt_d(rsq20
);
242 rinv21
= avx128fma_invsqrt_d(rsq21
);
243 rinv22
= avx128fma_invsqrt_d(rsq22
);
245 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
246 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
247 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
248 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
249 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
250 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
251 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
252 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
253 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
255 fjx0
= _mm_setzero_pd();
256 fjy0
= _mm_setzero_pd();
257 fjz0
= _mm_setzero_pd();
258 fjx1
= _mm_setzero_pd();
259 fjy1
= _mm_setzero_pd();
260 fjz1
= _mm_setzero_pd();
261 fjx2
= _mm_setzero_pd();
262 fjy2
= _mm_setzero_pd();
263 fjz2
= _mm_setzero_pd();
265 /**************************
266 * CALCULATE INTERACTIONS *
267 **************************/
269 /* REACTION-FIELD ELECTROSTATICS */
270 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_macc_pd(krf
,rsq00
,rinv00
),crf
));
271 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
273 /* Update potential sum for this i atom from the interaction with this j atom. */
274 velecsum
= _mm_add_pd(velecsum
,velec
);
278 /* Update vectorial force */
279 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
280 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
281 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
283 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
284 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
285 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
287 /**************************
288 * CALCULATE INTERACTIONS *
289 **************************/
291 /* REACTION-FIELD ELECTROSTATICS */
292 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_macc_pd(krf
,rsq01
,rinv01
),crf
));
293 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
295 /* Update potential sum for this i atom from the interaction with this j atom. */
296 velecsum
= _mm_add_pd(velecsum
,velec
);
300 /* Update vectorial force */
301 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
302 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
303 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
305 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
306 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
307 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
309 /**************************
310 * CALCULATE INTERACTIONS *
311 **************************/
313 /* REACTION-FIELD ELECTROSTATICS */
314 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_macc_pd(krf
,rsq02
,rinv02
),crf
));
315 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
317 /* Update potential sum for this i atom from the interaction with this j atom. */
318 velecsum
= _mm_add_pd(velecsum
,velec
);
322 /* Update vectorial force */
323 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
324 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
325 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
327 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
328 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
329 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
331 /**************************
332 * CALCULATE INTERACTIONS *
333 **************************/
335 /* REACTION-FIELD ELECTROSTATICS */
336 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_macc_pd(krf
,rsq10
,rinv10
),crf
));
337 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
339 /* Update potential sum for this i atom from the interaction with this j atom. */
340 velecsum
= _mm_add_pd(velecsum
,velec
);
344 /* Update vectorial force */
345 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
346 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
347 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
349 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
350 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
351 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
353 /**************************
354 * CALCULATE INTERACTIONS *
355 **************************/
357 /* REACTION-FIELD ELECTROSTATICS */
358 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_macc_pd(krf
,rsq11
,rinv11
),crf
));
359 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
361 /* Update potential sum for this i atom from the interaction with this j atom. */
362 velecsum
= _mm_add_pd(velecsum
,velec
);
366 /* Update vectorial force */
367 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
368 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
369 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
371 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
372 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
373 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
375 /**************************
376 * CALCULATE INTERACTIONS *
377 **************************/
379 /* REACTION-FIELD ELECTROSTATICS */
380 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_macc_pd(krf
,rsq12
,rinv12
),crf
));
381 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
383 /* Update potential sum for this i atom from the interaction with this j atom. */
384 velecsum
= _mm_add_pd(velecsum
,velec
);
388 /* Update vectorial force */
389 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
390 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
391 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
393 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
394 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
395 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
397 /**************************
398 * CALCULATE INTERACTIONS *
399 **************************/
401 /* REACTION-FIELD ELECTROSTATICS */
402 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_macc_pd(krf
,rsq20
,rinv20
),crf
));
403 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
405 /* Update potential sum for this i atom from the interaction with this j atom. */
406 velecsum
= _mm_add_pd(velecsum
,velec
);
410 /* Update vectorial force */
411 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
412 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
413 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
415 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
416 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
417 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
419 /**************************
420 * CALCULATE INTERACTIONS *
421 **************************/
423 /* REACTION-FIELD ELECTROSTATICS */
424 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_macc_pd(krf
,rsq21
,rinv21
),crf
));
425 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
427 /* Update potential sum for this i atom from the interaction with this j atom. */
428 velecsum
= _mm_add_pd(velecsum
,velec
);
432 /* Update vectorial force */
433 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
434 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
435 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
437 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
438 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
439 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
441 /**************************
442 * CALCULATE INTERACTIONS *
443 **************************/
445 /* REACTION-FIELD ELECTROSTATICS */
446 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_macc_pd(krf
,rsq22
,rinv22
),crf
));
447 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
449 /* Update potential sum for this i atom from the interaction with this j atom. */
450 velecsum
= _mm_add_pd(velecsum
,velec
);
454 /* Update vectorial force */
455 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
456 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
457 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
459 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
460 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
461 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
463 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
465 /* Inner loop uses 315 flops */
472 j_coord_offsetA
= DIM
*jnrA
;
474 /* load j atom coordinates */
475 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
476 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
478 /* Calculate displacement vector */
479 dx00
= _mm_sub_pd(ix0
,jx0
);
480 dy00
= _mm_sub_pd(iy0
,jy0
);
481 dz00
= _mm_sub_pd(iz0
,jz0
);
482 dx01
= _mm_sub_pd(ix0
,jx1
);
483 dy01
= _mm_sub_pd(iy0
,jy1
);
484 dz01
= _mm_sub_pd(iz0
,jz1
);
485 dx02
= _mm_sub_pd(ix0
,jx2
);
486 dy02
= _mm_sub_pd(iy0
,jy2
);
487 dz02
= _mm_sub_pd(iz0
,jz2
);
488 dx10
= _mm_sub_pd(ix1
,jx0
);
489 dy10
= _mm_sub_pd(iy1
,jy0
);
490 dz10
= _mm_sub_pd(iz1
,jz0
);
491 dx11
= _mm_sub_pd(ix1
,jx1
);
492 dy11
= _mm_sub_pd(iy1
,jy1
);
493 dz11
= _mm_sub_pd(iz1
,jz1
);
494 dx12
= _mm_sub_pd(ix1
,jx2
);
495 dy12
= _mm_sub_pd(iy1
,jy2
);
496 dz12
= _mm_sub_pd(iz1
,jz2
);
497 dx20
= _mm_sub_pd(ix2
,jx0
);
498 dy20
= _mm_sub_pd(iy2
,jy0
);
499 dz20
= _mm_sub_pd(iz2
,jz0
);
500 dx21
= _mm_sub_pd(ix2
,jx1
);
501 dy21
= _mm_sub_pd(iy2
,jy1
);
502 dz21
= _mm_sub_pd(iz2
,jz1
);
503 dx22
= _mm_sub_pd(ix2
,jx2
);
504 dy22
= _mm_sub_pd(iy2
,jy2
);
505 dz22
= _mm_sub_pd(iz2
,jz2
);
507 /* Calculate squared distance and things based on it */
508 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
509 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
510 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
511 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
512 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
513 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
514 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
515 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
516 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
518 rinv00
= avx128fma_invsqrt_d(rsq00
);
519 rinv01
= avx128fma_invsqrt_d(rsq01
);
520 rinv02
= avx128fma_invsqrt_d(rsq02
);
521 rinv10
= avx128fma_invsqrt_d(rsq10
);
522 rinv11
= avx128fma_invsqrt_d(rsq11
);
523 rinv12
= avx128fma_invsqrt_d(rsq12
);
524 rinv20
= avx128fma_invsqrt_d(rsq20
);
525 rinv21
= avx128fma_invsqrt_d(rsq21
);
526 rinv22
= avx128fma_invsqrt_d(rsq22
);
528 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
529 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
530 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
531 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
532 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
533 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
534 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
535 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
536 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
538 fjx0
= _mm_setzero_pd();
539 fjy0
= _mm_setzero_pd();
540 fjz0
= _mm_setzero_pd();
541 fjx1
= _mm_setzero_pd();
542 fjy1
= _mm_setzero_pd();
543 fjz1
= _mm_setzero_pd();
544 fjx2
= _mm_setzero_pd();
545 fjy2
= _mm_setzero_pd();
546 fjz2
= _mm_setzero_pd();
548 /**************************
549 * CALCULATE INTERACTIONS *
550 **************************/
552 /* REACTION-FIELD ELECTROSTATICS */
553 velec
= _mm_mul_pd(qq00
,_mm_sub_pd(_mm_macc_pd(krf
,rsq00
,rinv00
),crf
));
554 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
556 /* Update potential sum for this i atom from the interaction with this j atom. */
557 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
558 velecsum
= _mm_add_pd(velecsum
,velec
);
562 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
564 /* Update vectorial force */
565 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
566 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
567 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
569 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
570 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
571 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
573 /**************************
574 * CALCULATE INTERACTIONS *
575 **************************/
577 /* REACTION-FIELD ELECTROSTATICS */
578 velec
= _mm_mul_pd(qq01
,_mm_sub_pd(_mm_macc_pd(krf
,rsq01
,rinv01
),crf
));
579 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
581 /* Update potential sum for this i atom from the interaction with this j atom. */
582 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
583 velecsum
= _mm_add_pd(velecsum
,velec
);
587 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
589 /* Update vectorial force */
590 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
591 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
592 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
594 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
595 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
596 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
598 /**************************
599 * CALCULATE INTERACTIONS *
600 **************************/
602 /* REACTION-FIELD ELECTROSTATICS */
603 velec
= _mm_mul_pd(qq02
,_mm_sub_pd(_mm_macc_pd(krf
,rsq02
,rinv02
),crf
));
604 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
606 /* Update potential sum for this i atom from the interaction with this j atom. */
607 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
608 velecsum
= _mm_add_pd(velecsum
,velec
);
612 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
614 /* Update vectorial force */
615 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
616 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
617 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
619 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
620 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
621 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
623 /**************************
624 * CALCULATE INTERACTIONS *
625 **************************/
627 /* REACTION-FIELD ELECTROSTATICS */
628 velec
= _mm_mul_pd(qq10
,_mm_sub_pd(_mm_macc_pd(krf
,rsq10
,rinv10
),crf
));
629 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
631 /* Update potential sum for this i atom from the interaction with this j atom. */
632 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
633 velecsum
= _mm_add_pd(velecsum
,velec
);
637 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
639 /* Update vectorial force */
640 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
641 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
642 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
644 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
645 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
646 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
648 /**************************
649 * CALCULATE INTERACTIONS *
650 **************************/
652 /* REACTION-FIELD ELECTROSTATICS */
653 velec
= _mm_mul_pd(qq11
,_mm_sub_pd(_mm_macc_pd(krf
,rsq11
,rinv11
),crf
));
654 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
656 /* Update potential sum for this i atom from the interaction with this j atom. */
657 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
658 velecsum
= _mm_add_pd(velecsum
,velec
);
662 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
664 /* Update vectorial force */
665 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
666 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
667 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
669 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
670 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
671 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
673 /**************************
674 * CALCULATE INTERACTIONS *
675 **************************/
677 /* REACTION-FIELD ELECTROSTATICS */
678 velec
= _mm_mul_pd(qq12
,_mm_sub_pd(_mm_macc_pd(krf
,rsq12
,rinv12
),crf
));
679 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
681 /* Update potential sum for this i atom from the interaction with this j atom. */
682 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
683 velecsum
= _mm_add_pd(velecsum
,velec
);
687 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
689 /* Update vectorial force */
690 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
691 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
692 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
694 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
695 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
696 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
698 /**************************
699 * CALCULATE INTERACTIONS *
700 **************************/
702 /* REACTION-FIELD ELECTROSTATICS */
703 velec
= _mm_mul_pd(qq20
,_mm_sub_pd(_mm_macc_pd(krf
,rsq20
,rinv20
),crf
));
704 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
706 /* Update potential sum for this i atom from the interaction with this j atom. */
707 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
708 velecsum
= _mm_add_pd(velecsum
,velec
);
712 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
714 /* Update vectorial force */
715 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
716 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
717 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
719 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
720 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
721 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
723 /**************************
724 * CALCULATE INTERACTIONS *
725 **************************/
727 /* REACTION-FIELD ELECTROSTATICS */
728 velec
= _mm_mul_pd(qq21
,_mm_sub_pd(_mm_macc_pd(krf
,rsq21
,rinv21
),crf
));
729 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
731 /* Update potential sum for this i atom from the interaction with this j atom. */
732 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
733 velecsum
= _mm_add_pd(velecsum
,velec
);
737 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
739 /* Update vectorial force */
740 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
741 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
742 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
744 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
745 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
746 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
748 /**************************
749 * CALCULATE INTERACTIONS *
750 **************************/
752 /* REACTION-FIELD ELECTROSTATICS */
753 velec
= _mm_mul_pd(qq22
,_mm_sub_pd(_mm_macc_pd(krf
,rsq22
,rinv22
),crf
));
754 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
756 /* Update potential sum for this i atom from the interaction with this j atom. */
757 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
758 velecsum
= _mm_add_pd(velecsum
,velec
);
762 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
764 /* Update vectorial force */
765 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
766 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
767 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
769 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
770 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
771 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
773 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
775 /* Inner loop uses 315 flops */
778 /* End of innermost loop */
780 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
781 f
+i_coord_offset
,fshift
+i_shift_offset
);
784 /* Update potential energies */
785 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
787 /* Increment number of inner iterations */
788 inneriter
+= j_index_end
- j_index_start
;
790 /* Outer loop uses 19 flops */
793 /* Increment number of outer iterations */
796 /* Update outer/inner flops */
798 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_VF
,outeriter
*19 + inneriter
*315);
801 * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double
802 * Electrostatics interaction: ReactionField
803 * VdW interaction: None
804 * Geometry: Water3-Water3
805 * Calculate force/pot: Force
808 nb_kernel_ElecRF_VdwNone_GeomW3W3_F_avx_128_fma_double
809 (t_nblist
* gmx_restrict nlist
,
810 rvec
* gmx_restrict xx
,
811 rvec
* gmx_restrict ff
,
812 struct t_forcerec
* gmx_restrict fr
,
813 t_mdatoms
* gmx_restrict mdatoms
,
814 nb_kernel_data_t gmx_unused
* gmx_restrict kernel_data
,
815 t_nrnb
* gmx_restrict nrnb
)
817 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
818 * just 0 for non-waters.
819 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
820 * jnr indices corresponding to data put in the four positions in the SIMD register.
822 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
823 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
825 int j_coord_offsetA
,j_coord_offsetB
;
826 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
828 real
*shiftvec
,*fshift
,*x
,*f
;
829 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
831 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
833 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
835 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
836 int vdwjidx0A
,vdwjidx0B
;
837 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
838 int vdwjidx1A
,vdwjidx1B
;
839 __m128d jx1
,jy1
,jz1
,fjx1
,fjy1
,fjz1
,jq1
,isaj1
;
840 int vdwjidx2A
,vdwjidx2B
;
841 __m128d jx2
,jy2
,jz2
,fjx2
,fjy2
,fjz2
,jq2
,isaj2
;
842 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
843 __m128d dx01
,dy01
,dz01
,rsq01
,rinv01
,rinvsq01
,r01
,qq01
,c6_01
,c12_01
;
844 __m128d dx02
,dy02
,dz02
,rsq02
,rinv02
,rinvsq02
,r02
,qq02
,c6_02
,c12_02
;
845 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
846 __m128d dx11
,dy11
,dz11
,rsq11
,rinv11
,rinvsq11
,r11
,qq11
,c6_11
,c12_11
;
847 __m128d dx12
,dy12
,dz12
,rsq12
,rinv12
,rinvsq12
,r12
,qq12
,c6_12
,c12_12
;
848 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
849 __m128d dx21
,dy21
,dz21
,rsq21
,rinv21
,rinvsq21
,r21
,qq21
,c6_21
,c12_21
;
850 __m128d dx22
,dy22
,dz22
,rsq22
,rinv22
,rinvsq22
,r22
,qq22
,c6_22
,c12_22
;
851 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
853 __m128d dummy_mask
,cutoff_mask
;
854 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
855 __m128d one
= _mm_set1_pd(1.0);
856 __m128d two
= _mm_set1_pd(2.0);
862 jindex
= nlist
->jindex
;
864 shiftidx
= nlist
->shift
;
866 shiftvec
= fr
->shift_vec
[0];
867 fshift
= fr
->fshift
[0];
868 facel
= _mm_set1_pd(fr
->ic
->epsfac
);
869 charge
= mdatoms
->chargeA
;
870 krf
= _mm_set1_pd(fr
->ic
->k_rf
);
871 krf2
= _mm_set1_pd(fr
->ic
->k_rf
*2.0);
872 crf
= _mm_set1_pd(fr
->ic
->c_rf
);
874 /* Setup water-specific parameters */
875 inr
= nlist
->iinr
[0];
876 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
877 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
878 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
880 jq0
= _mm_set1_pd(charge
[inr
+0]);
881 jq1
= _mm_set1_pd(charge
[inr
+1]);
882 jq2
= _mm_set1_pd(charge
[inr
+2]);
883 qq00
= _mm_mul_pd(iq0
,jq0
);
884 qq01
= _mm_mul_pd(iq0
,jq1
);
885 qq02
= _mm_mul_pd(iq0
,jq2
);
886 qq10
= _mm_mul_pd(iq1
,jq0
);
887 qq11
= _mm_mul_pd(iq1
,jq1
);
888 qq12
= _mm_mul_pd(iq1
,jq2
);
889 qq20
= _mm_mul_pd(iq2
,jq0
);
890 qq21
= _mm_mul_pd(iq2
,jq1
);
891 qq22
= _mm_mul_pd(iq2
,jq2
);
893 /* Avoid stupid compiler warnings */
901 /* Start outer loop over neighborlists */
902 for(iidx
=0; iidx
<nri
; iidx
++)
904 /* Load shift vector for this list */
905 i_shift_offset
= DIM
*shiftidx
[iidx
];
907 /* Load limits for loop over neighbors */
908 j_index_start
= jindex
[iidx
];
909 j_index_end
= jindex
[iidx
+1];
911 /* Get outer coordinate index */
913 i_coord_offset
= DIM
*inr
;
915 /* Load i particle coords and add shift vector */
916 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
917 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
919 fix0
= _mm_setzero_pd();
920 fiy0
= _mm_setzero_pd();
921 fiz0
= _mm_setzero_pd();
922 fix1
= _mm_setzero_pd();
923 fiy1
= _mm_setzero_pd();
924 fiz1
= _mm_setzero_pd();
925 fix2
= _mm_setzero_pd();
926 fiy2
= _mm_setzero_pd();
927 fiz2
= _mm_setzero_pd();
929 /* Start inner kernel loop */
930 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
933 /* Get j neighbor index, and coordinate index */
936 j_coord_offsetA
= DIM
*jnrA
;
937 j_coord_offsetB
= DIM
*jnrB
;
939 /* load j atom coordinates */
940 gmx_mm_load_3rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
941 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
943 /* Calculate displacement vector */
944 dx00
= _mm_sub_pd(ix0
,jx0
);
945 dy00
= _mm_sub_pd(iy0
,jy0
);
946 dz00
= _mm_sub_pd(iz0
,jz0
);
947 dx01
= _mm_sub_pd(ix0
,jx1
);
948 dy01
= _mm_sub_pd(iy0
,jy1
);
949 dz01
= _mm_sub_pd(iz0
,jz1
);
950 dx02
= _mm_sub_pd(ix0
,jx2
);
951 dy02
= _mm_sub_pd(iy0
,jy2
);
952 dz02
= _mm_sub_pd(iz0
,jz2
);
953 dx10
= _mm_sub_pd(ix1
,jx0
);
954 dy10
= _mm_sub_pd(iy1
,jy0
);
955 dz10
= _mm_sub_pd(iz1
,jz0
);
956 dx11
= _mm_sub_pd(ix1
,jx1
);
957 dy11
= _mm_sub_pd(iy1
,jy1
);
958 dz11
= _mm_sub_pd(iz1
,jz1
);
959 dx12
= _mm_sub_pd(ix1
,jx2
);
960 dy12
= _mm_sub_pd(iy1
,jy2
);
961 dz12
= _mm_sub_pd(iz1
,jz2
);
962 dx20
= _mm_sub_pd(ix2
,jx0
);
963 dy20
= _mm_sub_pd(iy2
,jy0
);
964 dz20
= _mm_sub_pd(iz2
,jz0
);
965 dx21
= _mm_sub_pd(ix2
,jx1
);
966 dy21
= _mm_sub_pd(iy2
,jy1
);
967 dz21
= _mm_sub_pd(iz2
,jz1
);
968 dx22
= _mm_sub_pd(ix2
,jx2
);
969 dy22
= _mm_sub_pd(iy2
,jy2
);
970 dz22
= _mm_sub_pd(iz2
,jz2
);
972 /* Calculate squared distance and things based on it */
973 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
974 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
975 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
976 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
977 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
978 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
979 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
980 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
981 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
983 rinv00
= avx128fma_invsqrt_d(rsq00
);
984 rinv01
= avx128fma_invsqrt_d(rsq01
);
985 rinv02
= avx128fma_invsqrt_d(rsq02
);
986 rinv10
= avx128fma_invsqrt_d(rsq10
);
987 rinv11
= avx128fma_invsqrt_d(rsq11
);
988 rinv12
= avx128fma_invsqrt_d(rsq12
);
989 rinv20
= avx128fma_invsqrt_d(rsq20
);
990 rinv21
= avx128fma_invsqrt_d(rsq21
);
991 rinv22
= avx128fma_invsqrt_d(rsq22
);
993 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
994 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
995 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
996 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
997 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
998 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
999 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1000 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1001 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1003 fjx0
= _mm_setzero_pd();
1004 fjy0
= _mm_setzero_pd();
1005 fjz0
= _mm_setzero_pd();
1006 fjx1
= _mm_setzero_pd();
1007 fjy1
= _mm_setzero_pd();
1008 fjz1
= _mm_setzero_pd();
1009 fjx2
= _mm_setzero_pd();
1010 fjy2
= _mm_setzero_pd();
1011 fjz2
= _mm_setzero_pd();
1013 /**************************
1014 * CALCULATE INTERACTIONS *
1015 **************************/
1017 /* REACTION-FIELD ELECTROSTATICS */
1018 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
1022 /* Update vectorial force */
1023 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
1024 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
1025 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
1027 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
1028 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
1029 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
1031 /**************************
1032 * CALCULATE INTERACTIONS *
1033 **************************/
1035 /* REACTION-FIELD ELECTROSTATICS */
1036 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
1040 /* Update vectorial force */
1041 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
1042 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
1043 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
1045 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
1046 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
1047 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
1049 /**************************
1050 * CALCULATE INTERACTIONS *
1051 **************************/
1053 /* REACTION-FIELD ELECTROSTATICS */
1054 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
1058 /* Update vectorial force */
1059 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
1060 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
1061 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
1063 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
1064 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
1065 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
1067 /**************************
1068 * CALCULATE INTERACTIONS *
1069 **************************/
1071 /* REACTION-FIELD ELECTROSTATICS */
1072 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
1076 /* Update vectorial force */
1077 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
1078 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
1079 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
1081 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
1082 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
1083 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
1085 /**************************
1086 * CALCULATE INTERACTIONS *
1087 **************************/
1089 /* REACTION-FIELD ELECTROSTATICS */
1090 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
1094 /* Update vectorial force */
1095 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
1096 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
1097 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
1099 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
1100 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
1101 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
1103 /**************************
1104 * CALCULATE INTERACTIONS *
1105 **************************/
1107 /* REACTION-FIELD ELECTROSTATICS */
1108 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
1112 /* Update vectorial force */
1113 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
1114 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
1115 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
1117 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
1118 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
1119 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
1121 /**************************
1122 * CALCULATE INTERACTIONS *
1123 **************************/
1125 /* REACTION-FIELD ELECTROSTATICS */
1126 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
1130 /* Update vectorial force */
1131 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
1132 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
1133 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
1135 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
1136 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
1137 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
1139 /**************************
1140 * CALCULATE INTERACTIONS *
1141 **************************/
1143 /* REACTION-FIELD ELECTROSTATICS */
1144 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
1148 /* Update vectorial force */
1149 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
1150 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
1151 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
1153 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
1154 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
1155 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
1157 /**************************
1158 * CALCULATE INTERACTIONS *
1159 **************************/
1161 /* REACTION-FIELD ELECTROSTATICS */
1162 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
1166 /* Update vectorial force */
1167 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
1168 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
1169 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
1171 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
1172 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
1173 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
1175 gmx_mm_decrement_3rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1177 /* Inner loop uses 270 flops */
1180 if(jidx
<j_index_end
)
1184 j_coord_offsetA
= DIM
*jnrA
;
1186 /* load j atom coordinates */
1187 gmx_mm_load_3rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
1188 &jx0
,&jy0
,&jz0
,&jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
);
1190 /* Calculate displacement vector */
1191 dx00
= _mm_sub_pd(ix0
,jx0
);
1192 dy00
= _mm_sub_pd(iy0
,jy0
);
1193 dz00
= _mm_sub_pd(iz0
,jz0
);
1194 dx01
= _mm_sub_pd(ix0
,jx1
);
1195 dy01
= _mm_sub_pd(iy0
,jy1
);
1196 dz01
= _mm_sub_pd(iz0
,jz1
);
1197 dx02
= _mm_sub_pd(ix0
,jx2
);
1198 dy02
= _mm_sub_pd(iy0
,jy2
);
1199 dz02
= _mm_sub_pd(iz0
,jz2
);
1200 dx10
= _mm_sub_pd(ix1
,jx0
);
1201 dy10
= _mm_sub_pd(iy1
,jy0
);
1202 dz10
= _mm_sub_pd(iz1
,jz0
);
1203 dx11
= _mm_sub_pd(ix1
,jx1
);
1204 dy11
= _mm_sub_pd(iy1
,jy1
);
1205 dz11
= _mm_sub_pd(iz1
,jz1
);
1206 dx12
= _mm_sub_pd(ix1
,jx2
);
1207 dy12
= _mm_sub_pd(iy1
,jy2
);
1208 dz12
= _mm_sub_pd(iz1
,jz2
);
1209 dx20
= _mm_sub_pd(ix2
,jx0
);
1210 dy20
= _mm_sub_pd(iy2
,jy0
);
1211 dz20
= _mm_sub_pd(iz2
,jz0
);
1212 dx21
= _mm_sub_pd(ix2
,jx1
);
1213 dy21
= _mm_sub_pd(iy2
,jy1
);
1214 dz21
= _mm_sub_pd(iz2
,jz1
);
1215 dx22
= _mm_sub_pd(ix2
,jx2
);
1216 dy22
= _mm_sub_pd(iy2
,jy2
);
1217 dz22
= _mm_sub_pd(iz2
,jz2
);
1219 /* Calculate squared distance and things based on it */
1220 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
1221 rsq01
= gmx_mm_calc_rsq_pd(dx01
,dy01
,dz01
);
1222 rsq02
= gmx_mm_calc_rsq_pd(dx02
,dy02
,dz02
);
1223 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
1224 rsq11
= gmx_mm_calc_rsq_pd(dx11
,dy11
,dz11
);
1225 rsq12
= gmx_mm_calc_rsq_pd(dx12
,dy12
,dz12
);
1226 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
1227 rsq21
= gmx_mm_calc_rsq_pd(dx21
,dy21
,dz21
);
1228 rsq22
= gmx_mm_calc_rsq_pd(dx22
,dy22
,dz22
);
1230 rinv00
= avx128fma_invsqrt_d(rsq00
);
1231 rinv01
= avx128fma_invsqrt_d(rsq01
);
1232 rinv02
= avx128fma_invsqrt_d(rsq02
);
1233 rinv10
= avx128fma_invsqrt_d(rsq10
);
1234 rinv11
= avx128fma_invsqrt_d(rsq11
);
1235 rinv12
= avx128fma_invsqrt_d(rsq12
);
1236 rinv20
= avx128fma_invsqrt_d(rsq20
);
1237 rinv21
= avx128fma_invsqrt_d(rsq21
);
1238 rinv22
= avx128fma_invsqrt_d(rsq22
);
1240 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
1241 rinvsq01
= _mm_mul_pd(rinv01
,rinv01
);
1242 rinvsq02
= _mm_mul_pd(rinv02
,rinv02
);
1243 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
1244 rinvsq11
= _mm_mul_pd(rinv11
,rinv11
);
1245 rinvsq12
= _mm_mul_pd(rinv12
,rinv12
);
1246 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
1247 rinvsq21
= _mm_mul_pd(rinv21
,rinv21
);
1248 rinvsq22
= _mm_mul_pd(rinv22
,rinv22
);
1250 fjx0
= _mm_setzero_pd();
1251 fjy0
= _mm_setzero_pd();
1252 fjz0
= _mm_setzero_pd();
1253 fjx1
= _mm_setzero_pd();
1254 fjy1
= _mm_setzero_pd();
1255 fjz1
= _mm_setzero_pd();
1256 fjx2
= _mm_setzero_pd();
1257 fjy2
= _mm_setzero_pd();
1258 fjz2
= _mm_setzero_pd();
1260 /**************************
1261 * CALCULATE INTERACTIONS *
1262 **************************/
1264 /* REACTION-FIELD ELECTROSTATICS */
1265 felec
= _mm_mul_pd(qq00
,_mm_msub_pd(rinv00
,rinvsq00
,krf2
));
1269 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1271 /* Update vectorial force */
1272 fix0
= _mm_macc_pd(dx00
,fscal
,fix0
);
1273 fiy0
= _mm_macc_pd(dy00
,fscal
,fiy0
);
1274 fiz0
= _mm_macc_pd(dz00
,fscal
,fiz0
);
1276 fjx0
= _mm_macc_pd(dx00
,fscal
,fjx0
);
1277 fjy0
= _mm_macc_pd(dy00
,fscal
,fjy0
);
1278 fjz0
= _mm_macc_pd(dz00
,fscal
,fjz0
);
1280 /**************************
1281 * CALCULATE INTERACTIONS *
1282 **************************/
1284 /* REACTION-FIELD ELECTROSTATICS */
1285 felec
= _mm_mul_pd(qq01
,_mm_msub_pd(rinv01
,rinvsq01
,krf2
));
1289 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1291 /* Update vectorial force */
1292 fix0
= _mm_macc_pd(dx01
,fscal
,fix0
);
1293 fiy0
= _mm_macc_pd(dy01
,fscal
,fiy0
);
1294 fiz0
= _mm_macc_pd(dz01
,fscal
,fiz0
);
1296 fjx1
= _mm_macc_pd(dx01
,fscal
,fjx1
);
1297 fjy1
= _mm_macc_pd(dy01
,fscal
,fjy1
);
1298 fjz1
= _mm_macc_pd(dz01
,fscal
,fjz1
);
1300 /**************************
1301 * CALCULATE INTERACTIONS *
1302 **************************/
1304 /* REACTION-FIELD ELECTROSTATICS */
1305 felec
= _mm_mul_pd(qq02
,_mm_msub_pd(rinv02
,rinvsq02
,krf2
));
1309 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1311 /* Update vectorial force */
1312 fix0
= _mm_macc_pd(dx02
,fscal
,fix0
);
1313 fiy0
= _mm_macc_pd(dy02
,fscal
,fiy0
);
1314 fiz0
= _mm_macc_pd(dz02
,fscal
,fiz0
);
1316 fjx2
= _mm_macc_pd(dx02
,fscal
,fjx2
);
1317 fjy2
= _mm_macc_pd(dy02
,fscal
,fjy2
);
1318 fjz2
= _mm_macc_pd(dz02
,fscal
,fjz2
);
1320 /**************************
1321 * CALCULATE INTERACTIONS *
1322 **************************/
1324 /* REACTION-FIELD ELECTROSTATICS */
1325 felec
= _mm_mul_pd(qq10
,_mm_msub_pd(rinv10
,rinvsq10
,krf2
));
1329 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1331 /* Update vectorial force */
1332 fix1
= _mm_macc_pd(dx10
,fscal
,fix1
);
1333 fiy1
= _mm_macc_pd(dy10
,fscal
,fiy1
);
1334 fiz1
= _mm_macc_pd(dz10
,fscal
,fiz1
);
1336 fjx0
= _mm_macc_pd(dx10
,fscal
,fjx0
);
1337 fjy0
= _mm_macc_pd(dy10
,fscal
,fjy0
);
1338 fjz0
= _mm_macc_pd(dz10
,fscal
,fjz0
);
1340 /**************************
1341 * CALCULATE INTERACTIONS *
1342 **************************/
1344 /* REACTION-FIELD ELECTROSTATICS */
1345 felec
= _mm_mul_pd(qq11
,_mm_msub_pd(rinv11
,rinvsq11
,krf2
));
1349 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1351 /* Update vectorial force */
1352 fix1
= _mm_macc_pd(dx11
,fscal
,fix1
);
1353 fiy1
= _mm_macc_pd(dy11
,fscal
,fiy1
);
1354 fiz1
= _mm_macc_pd(dz11
,fscal
,fiz1
);
1356 fjx1
= _mm_macc_pd(dx11
,fscal
,fjx1
);
1357 fjy1
= _mm_macc_pd(dy11
,fscal
,fjy1
);
1358 fjz1
= _mm_macc_pd(dz11
,fscal
,fjz1
);
1360 /**************************
1361 * CALCULATE INTERACTIONS *
1362 **************************/
1364 /* REACTION-FIELD ELECTROSTATICS */
1365 felec
= _mm_mul_pd(qq12
,_mm_msub_pd(rinv12
,rinvsq12
,krf2
));
1369 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1371 /* Update vectorial force */
1372 fix1
= _mm_macc_pd(dx12
,fscal
,fix1
);
1373 fiy1
= _mm_macc_pd(dy12
,fscal
,fiy1
);
1374 fiz1
= _mm_macc_pd(dz12
,fscal
,fiz1
);
1376 fjx2
= _mm_macc_pd(dx12
,fscal
,fjx2
);
1377 fjy2
= _mm_macc_pd(dy12
,fscal
,fjy2
);
1378 fjz2
= _mm_macc_pd(dz12
,fscal
,fjz2
);
1380 /**************************
1381 * CALCULATE INTERACTIONS *
1382 **************************/
1384 /* REACTION-FIELD ELECTROSTATICS */
1385 felec
= _mm_mul_pd(qq20
,_mm_msub_pd(rinv20
,rinvsq20
,krf2
));
1389 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1391 /* Update vectorial force */
1392 fix2
= _mm_macc_pd(dx20
,fscal
,fix2
);
1393 fiy2
= _mm_macc_pd(dy20
,fscal
,fiy2
);
1394 fiz2
= _mm_macc_pd(dz20
,fscal
,fiz2
);
1396 fjx0
= _mm_macc_pd(dx20
,fscal
,fjx0
);
1397 fjy0
= _mm_macc_pd(dy20
,fscal
,fjy0
);
1398 fjz0
= _mm_macc_pd(dz20
,fscal
,fjz0
);
1400 /**************************
1401 * CALCULATE INTERACTIONS *
1402 **************************/
1404 /* REACTION-FIELD ELECTROSTATICS */
1405 felec
= _mm_mul_pd(qq21
,_mm_msub_pd(rinv21
,rinvsq21
,krf2
));
1409 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1411 /* Update vectorial force */
1412 fix2
= _mm_macc_pd(dx21
,fscal
,fix2
);
1413 fiy2
= _mm_macc_pd(dy21
,fscal
,fiy2
);
1414 fiz2
= _mm_macc_pd(dz21
,fscal
,fiz2
);
1416 fjx1
= _mm_macc_pd(dx21
,fscal
,fjx1
);
1417 fjy1
= _mm_macc_pd(dy21
,fscal
,fjy1
);
1418 fjz1
= _mm_macc_pd(dz21
,fscal
,fjz1
);
1420 /**************************
1421 * CALCULATE INTERACTIONS *
1422 **************************/
1424 /* REACTION-FIELD ELECTROSTATICS */
1425 felec
= _mm_mul_pd(qq22
,_mm_msub_pd(rinv22
,rinvsq22
,krf2
));
1429 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
1431 /* Update vectorial force */
1432 fix2
= _mm_macc_pd(dx22
,fscal
,fix2
);
1433 fiy2
= _mm_macc_pd(dy22
,fscal
,fiy2
);
1434 fiz2
= _mm_macc_pd(dz22
,fscal
,fiz2
);
1436 fjx2
= _mm_macc_pd(dx22
,fscal
,fjx2
);
1437 fjy2
= _mm_macc_pd(dy22
,fscal
,fjy2
);
1438 fjz2
= _mm_macc_pd(dz22
,fscal
,fjz2
);
1440 gmx_mm_decrement_3rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
,fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
);
1442 /* Inner loop uses 270 flops */
1445 /* End of innermost loop */
1447 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
1448 f
+i_coord_offset
,fshift
+i_shift_offset
);
1450 /* Increment number of inner iterations */
1451 inneriter
+= j_index_end
- j_index_start
;
1453 /* Outer loop uses 18 flops */
1456 /* Increment number of outer iterations */
1459 /* Update outer/inner flops */
1461 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3W3_F
,outeriter
*18 + inneriter
*270);