2 * Note: this file was generated by the Gromacs sse2_double kernel generator.
4 * This source code is part of
8 * Copyright (c) 2001-2012, The GROMACS Development Team
10 * Gromacs is a library for molecular simulation and trajectory analysis,
11 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
12 * a full list of developers and information, check out http://www.gromacs.org
14 * This program is free software; you can redistribute it and/or modify it under
15 * the terms of the GNU Lesser General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option) any
19 * To help fund GROMACS development, we humbly ask that you cite
20 * the papers people have written on it - you can find them on the website.
28 #include "../nb_kernel.h"
29 #include "types/simple.h"
33 #include "gmx_math_x86_sse2_double.h"
34 #include "kernelutil_x86_sse2_double.h"
37 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double
38 * Electrostatics interaction: Coulomb
39 * VdW interaction: None
40 * Geometry: Water3-Particle
41 * Calculate force/pot: PotentialAndForce
44 nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sse2_double
45 (t_nblist
* gmx_restrict nlist
,
46 rvec
* gmx_restrict xx
,
47 rvec
* gmx_restrict ff
,
48 t_forcerec
* gmx_restrict fr
,
49 t_mdatoms
* gmx_restrict mdatoms
,
50 nb_kernel_data_t
* gmx_restrict kernel_data
,
51 t_nrnb
* gmx_restrict nrnb
)
53 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
54 * just 0 for non-waters.
55 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
56 * jnr indices corresponding to data put in the four positions in the SIMD register.
58 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
59 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
61 int j_coord_offsetA
,j_coord_offsetB
;
62 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
64 real
*shiftvec
,*fshift
,*x
,*f
;
65 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
67 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
69 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
71 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
72 int vdwjidx0A
,vdwjidx0B
;
73 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
74 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
75 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
76 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
77 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
79 __m128d dummy_mask
,cutoff_mask
;
80 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
81 __m128d one
= _mm_set1_pd(1.0);
82 __m128d two
= _mm_set1_pd(2.0);
88 jindex
= nlist
->jindex
;
90 shiftidx
= nlist
->shift
;
92 shiftvec
= fr
->shift_vec
[0];
93 fshift
= fr
->fshift
[0];
94 facel
= _mm_set1_pd(fr
->epsfac
);
95 charge
= mdatoms
->chargeA
;
97 /* Setup water-specific parameters */
99 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
100 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
101 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
103 /* Avoid stupid compiler warnings */
111 /* Start outer loop over neighborlists */
112 for(iidx
=0; iidx
<nri
; iidx
++)
114 /* Load shift vector for this list */
115 i_shift_offset
= DIM
*shiftidx
[iidx
];
117 /* Load limits for loop over neighbors */
118 j_index_start
= jindex
[iidx
];
119 j_index_end
= jindex
[iidx
+1];
121 /* Get outer coordinate index */
123 i_coord_offset
= DIM
*inr
;
125 /* Load i particle coords and add shift vector */
126 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
127 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
129 fix0
= _mm_setzero_pd();
130 fiy0
= _mm_setzero_pd();
131 fiz0
= _mm_setzero_pd();
132 fix1
= _mm_setzero_pd();
133 fiy1
= _mm_setzero_pd();
134 fiz1
= _mm_setzero_pd();
135 fix2
= _mm_setzero_pd();
136 fiy2
= _mm_setzero_pd();
137 fiz2
= _mm_setzero_pd();
139 /* Reset potential sums */
140 velecsum
= _mm_setzero_pd();
142 /* Start inner kernel loop */
143 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
146 /* Get j neighbor index, and coordinate index */
149 j_coord_offsetA
= DIM
*jnrA
;
150 j_coord_offsetB
= DIM
*jnrB
;
152 /* load j atom coordinates */
153 gmx_mm_load_1rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
156 /* Calculate displacement vector */
157 dx00
= _mm_sub_pd(ix0
,jx0
);
158 dy00
= _mm_sub_pd(iy0
,jy0
);
159 dz00
= _mm_sub_pd(iz0
,jz0
);
160 dx10
= _mm_sub_pd(ix1
,jx0
);
161 dy10
= _mm_sub_pd(iy1
,jy0
);
162 dz10
= _mm_sub_pd(iz1
,jz0
);
163 dx20
= _mm_sub_pd(ix2
,jx0
);
164 dy20
= _mm_sub_pd(iy2
,jy0
);
165 dz20
= _mm_sub_pd(iz2
,jz0
);
167 /* Calculate squared distance and things based on it */
168 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
169 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
170 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
172 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
173 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
174 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
176 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
177 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
178 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
180 /* Load parameters for j particles */
181 jq0
= gmx_mm_load_2real_swizzle_pd(charge
+jnrA
+0,charge
+jnrB
+0);
183 fjx0
= _mm_setzero_pd();
184 fjy0
= _mm_setzero_pd();
185 fjz0
= _mm_setzero_pd();
187 /**************************
188 * CALCULATE INTERACTIONS *
189 **************************/
191 /* Compute parameters for interactions between i and j atoms */
192 qq00
= _mm_mul_pd(iq0
,jq0
);
194 /* COULOMB ELECTROSTATICS */
195 velec
= _mm_mul_pd(qq00
,rinv00
);
196 felec
= _mm_mul_pd(velec
,rinvsq00
);
198 /* Update potential sum for this i atom from the interaction with this j atom. */
199 velecsum
= _mm_add_pd(velecsum
,velec
);
203 /* Calculate temporary vectorial force */
204 tx
= _mm_mul_pd(fscal
,dx00
);
205 ty
= _mm_mul_pd(fscal
,dy00
);
206 tz
= _mm_mul_pd(fscal
,dz00
);
208 /* Update vectorial force */
209 fix0
= _mm_add_pd(fix0
,tx
);
210 fiy0
= _mm_add_pd(fiy0
,ty
);
211 fiz0
= _mm_add_pd(fiz0
,tz
);
213 fjx0
= _mm_add_pd(fjx0
,tx
);
214 fjy0
= _mm_add_pd(fjy0
,ty
);
215 fjz0
= _mm_add_pd(fjz0
,tz
);
217 /**************************
218 * CALCULATE INTERACTIONS *
219 **************************/
221 /* Compute parameters for interactions between i and j atoms */
222 qq10
= _mm_mul_pd(iq1
,jq0
);
224 /* COULOMB ELECTROSTATICS */
225 velec
= _mm_mul_pd(qq10
,rinv10
);
226 felec
= _mm_mul_pd(velec
,rinvsq10
);
228 /* Update potential sum for this i atom from the interaction with this j atom. */
229 velecsum
= _mm_add_pd(velecsum
,velec
);
233 /* Calculate temporary vectorial force */
234 tx
= _mm_mul_pd(fscal
,dx10
);
235 ty
= _mm_mul_pd(fscal
,dy10
);
236 tz
= _mm_mul_pd(fscal
,dz10
);
238 /* Update vectorial force */
239 fix1
= _mm_add_pd(fix1
,tx
);
240 fiy1
= _mm_add_pd(fiy1
,ty
);
241 fiz1
= _mm_add_pd(fiz1
,tz
);
243 fjx0
= _mm_add_pd(fjx0
,tx
);
244 fjy0
= _mm_add_pd(fjy0
,ty
);
245 fjz0
= _mm_add_pd(fjz0
,tz
);
247 /**************************
248 * CALCULATE INTERACTIONS *
249 **************************/
251 /* Compute parameters for interactions between i and j atoms */
252 qq20
= _mm_mul_pd(iq2
,jq0
);
254 /* COULOMB ELECTROSTATICS */
255 velec
= _mm_mul_pd(qq20
,rinv20
);
256 felec
= _mm_mul_pd(velec
,rinvsq20
);
258 /* Update potential sum for this i atom from the interaction with this j atom. */
259 velecsum
= _mm_add_pd(velecsum
,velec
);
263 /* Calculate temporary vectorial force */
264 tx
= _mm_mul_pd(fscal
,dx20
);
265 ty
= _mm_mul_pd(fscal
,dy20
);
266 tz
= _mm_mul_pd(fscal
,dz20
);
268 /* Update vectorial force */
269 fix2
= _mm_add_pd(fix2
,tx
);
270 fiy2
= _mm_add_pd(fiy2
,ty
);
271 fiz2
= _mm_add_pd(fiz2
,tz
);
273 fjx0
= _mm_add_pd(fjx0
,tx
);
274 fjy0
= _mm_add_pd(fjy0
,ty
);
275 fjz0
= _mm_add_pd(fjz0
,tz
);
277 gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
);
279 /* Inner loop uses 87 flops */
286 j_coord_offsetA
= DIM
*jnrA
;
288 /* load j atom coordinates */
289 gmx_mm_load_1rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
292 /* Calculate displacement vector */
293 dx00
= _mm_sub_pd(ix0
,jx0
);
294 dy00
= _mm_sub_pd(iy0
,jy0
);
295 dz00
= _mm_sub_pd(iz0
,jz0
);
296 dx10
= _mm_sub_pd(ix1
,jx0
);
297 dy10
= _mm_sub_pd(iy1
,jy0
);
298 dz10
= _mm_sub_pd(iz1
,jz0
);
299 dx20
= _mm_sub_pd(ix2
,jx0
);
300 dy20
= _mm_sub_pd(iy2
,jy0
);
301 dz20
= _mm_sub_pd(iz2
,jz0
);
303 /* Calculate squared distance and things based on it */
304 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
305 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
306 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
308 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
309 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
310 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
312 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
313 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
314 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
316 /* Load parameters for j particles */
317 jq0
= _mm_load_sd(charge
+jnrA
+0);
319 fjx0
= _mm_setzero_pd();
320 fjy0
= _mm_setzero_pd();
321 fjz0
= _mm_setzero_pd();
323 /**************************
324 * CALCULATE INTERACTIONS *
325 **************************/
327 /* Compute parameters for interactions between i and j atoms */
328 qq00
= _mm_mul_pd(iq0
,jq0
);
330 /* COULOMB ELECTROSTATICS */
331 velec
= _mm_mul_pd(qq00
,rinv00
);
332 felec
= _mm_mul_pd(velec
,rinvsq00
);
334 /* Update potential sum for this i atom from the interaction with this j atom. */
335 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
336 velecsum
= _mm_add_pd(velecsum
,velec
);
340 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
342 /* Calculate temporary vectorial force */
343 tx
= _mm_mul_pd(fscal
,dx00
);
344 ty
= _mm_mul_pd(fscal
,dy00
);
345 tz
= _mm_mul_pd(fscal
,dz00
);
347 /* Update vectorial force */
348 fix0
= _mm_add_pd(fix0
,tx
);
349 fiy0
= _mm_add_pd(fiy0
,ty
);
350 fiz0
= _mm_add_pd(fiz0
,tz
);
352 fjx0
= _mm_add_pd(fjx0
,tx
);
353 fjy0
= _mm_add_pd(fjy0
,ty
);
354 fjz0
= _mm_add_pd(fjz0
,tz
);
356 /**************************
357 * CALCULATE INTERACTIONS *
358 **************************/
360 /* Compute parameters for interactions between i and j atoms */
361 qq10
= _mm_mul_pd(iq1
,jq0
);
363 /* COULOMB ELECTROSTATICS */
364 velec
= _mm_mul_pd(qq10
,rinv10
);
365 felec
= _mm_mul_pd(velec
,rinvsq10
);
367 /* Update potential sum for this i atom from the interaction with this j atom. */
368 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
369 velecsum
= _mm_add_pd(velecsum
,velec
);
373 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
375 /* Calculate temporary vectorial force */
376 tx
= _mm_mul_pd(fscal
,dx10
);
377 ty
= _mm_mul_pd(fscal
,dy10
);
378 tz
= _mm_mul_pd(fscal
,dz10
);
380 /* Update vectorial force */
381 fix1
= _mm_add_pd(fix1
,tx
);
382 fiy1
= _mm_add_pd(fiy1
,ty
);
383 fiz1
= _mm_add_pd(fiz1
,tz
);
385 fjx0
= _mm_add_pd(fjx0
,tx
);
386 fjy0
= _mm_add_pd(fjy0
,ty
);
387 fjz0
= _mm_add_pd(fjz0
,tz
);
389 /**************************
390 * CALCULATE INTERACTIONS *
391 **************************/
393 /* Compute parameters for interactions between i and j atoms */
394 qq20
= _mm_mul_pd(iq2
,jq0
);
396 /* COULOMB ELECTROSTATICS */
397 velec
= _mm_mul_pd(qq20
,rinv20
);
398 felec
= _mm_mul_pd(velec
,rinvsq20
);
400 /* Update potential sum for this i atom from the interaction with this j atom. */
401 velec
= _mm_unpacklo_pd(velec
,_mm_setzero_pd());
402 velecsum
= _mm_add_pd(velecsum
,velec
);
406 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
408 /* Calculate temporary vectorial force */
409 tx
= _mm_mul_pd(fscal
,dx20
);
410 ty
= _mm_mul_pd(fscal
,dy20
);
411 tz
= _mm_mul_pd(fscal
,dz20
);
413 /* Update vectorial force */
414 fix2
= _mm_add_pd(fix2
,tx
);
415 fiy2
= _mm_add_pd(fiy2
,ty
);
416 fiz2
= _mm_add_pd(fiz2
,tz
);
418 fjx0
= _mm_add_pd(fjx0
,tx
);
419 fjy0
= _mm_add_pd(fjy0
,ty
);
420 fjz0
= _mm_add_pd(fjz0
,tz
);
422 gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
);
424 /* Inner loop uses 87 flops */
427 /* End of innermost loop */
429 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
430 f
+i_coord_offset
,fshift
+i_shift_offset
);
433 /* Update potential energies */
434 gmx_mm_update_1pot_pd(velecsum
,kernel_data
->energygrp_elec
+ggid
);
436 /* Increment number of inner iterations */
437 inneriter
+= j_index_end
- j_index_start
;
439 /* Outer loop uses 19 flops */
442 /* Increment number of outer iterations */
445 /* Update outer/inner flops */
447 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3_VF
,outeriter
*19 + inneriter
*87);
450 * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double
451 * Electrostatics interaction: Coulomb
452 * VdW interaction: None
453 * Geometry: Water3-Particle
454 * Calculate force/pot: Force
457 nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sse2_double
458 (t_nblist
* gmx_restrict nlist
,
459 rvec
* gmx_restrict xx
,
460 rvec
* gmx_restrict ff
,
461 t_forcerec
* gmx_restrict fr
,
462 t_mdatoms
* gmx_restrict mdatoms
,
463 nb_kernel_data_t
* gmx_restrict kernel_data
,
464 t_nrnb
* gmx_restrict nrnb
)
466 /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or
467 * just 0 for non-waters.
468 * Suffixes A,B refer to j loop unrolling done with SSE double precision, e.g. for the two different
469 * jnr indices corresponding to data put in the four positions in the SIMD register.
471 int i_shift_offset
,i_coord_offset
,outeriter
,inneriter
;
472 int j_index_start
,j_index_end
,jidx
,nri
,inr
,ggid
,iidx
;
474 int j_coord_offsetA
,j_coord_offsetB
;
475 int *iinr
,*jindex
,*jjnr
,*shiftidx
,*gid
;
477 real
*shiftvec
,*fshift
,*x
,*f
;
478 __m128d tx
,ty
,tz
,fscal
,rcutoff
,rcutoff2
,jidxall
;
480 __m128d ix0
,iy0
,iz0
,fix0
,fiy0
,fiz0
,iq0
,isai0
;
482 __m128d ix1
,iy1
,iz1
,fix1
,fiy1
,fiz1
,iq1
,isai1
;
484 __m128d ix2
,iy2
,iz2
,fix2
,fiy2
,fiz2
,iq2
,isai2
;
485 int vdwjidx0A
,vdwjidx0B
;
486 __m128d jx0
,jy0
,jz0
,fjx0
,fjy0
,fjz0
,jq0
,isaj0
;
487 __m128d dx00
,dy00
,dz00
,rsq00
,rinv00
,rinvsq00
,r00
,qq00
,c6_00
,c12_00
;
488 __m128d dx10
,dy10
,dz10
,rsq10
,rinv10
,rinvsq10
,r10
,qq10
,c6_10
,c12_10
;
489 __m128d dx20
,dy20
,dz20
,rsq20
,rinv20
,rinvsq20
,r20
,qq20
,c6_20
,c12_20
;
490 __m128d velec
,felec
,velecsum
,facel
,crf
,krf
,krf2
;
492 __m128d dummy_mask
,cutoff_mask
;
493 __m128d signbit
= gmx_mm_castsi128_pd( _mm_set_epi32(0x80000000,0x00000000,0x80000000,0x00000000) );
494 __m128d one
= _mm_set1_pd(1.0);
495 __m128d two
= _mm_set1_pd(2.0);
501 jindex
= nlist
->jindex
;
503 shiftidx
= nlist
->shift
;
505 shiftvec
= fr
->shift_vec
[0];
506 fshift
= fr
->fshift
[0];
507 facel
= _mm_set1_pd(fr
->epsfac
);
508 charge
= mdatoms
->chargeA
;
510 /* Setup water-specific parameters */
511 inr
= nlist
->iinr
[0];
512 iq0
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+0]));
513 iq1
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+1]));
514 iq2
= _mm_mul_pd(facel
,_mm_set1_pd(charge
[inr
+2]));
516 /* Avoid stupid compiler warnings */
524 /* Start outer loop over neighborlists */
525 for(iidx
=0; iidx
<nri
; iidx
++)
527 /* Load shift vector for this list */
528 i_shift_offset
= DIM
*shiftidx
[iidx
];
530 /* Load limits for loop over neighbors */
531 j_index_start
= jindex
[iidx
];
532 j_index_end
= jindex
[iidx
+1];
534 /* Get outer coordinate index */
536 i_coord_offset
= DIM
*inr
;
538 /* Load i particle coords and add shift vector */
539 gmx_mm_load_shift_and_3rvec_broadcast_pd(shiftvec
+i_shift_offset
,x
+i_coord_offset
,
540 &ix0
,&iy0
,&iz0
,&ix1
,&iy1
,&iz1
,&ix2
,&iy2
,&iz2
);
542 fix0
= _mm_setzero_pd();
543 fiy0
= _mm_setzero_pd();
544 fiz0
= _mm_setzero_pd();
545 fix1
= _mm_setzero_pd();
546 fiy1
= _mm_setzero_pd();
547 fiz1
= _mm_setzero_pd();
548 fix2
= _mm_setzero_pd();
549 fiy2
= _mm_setzero_pd();
550 fiz2
= _mm_setzero_pd();
552 /* Start inner kernel loop */
553 for(jidx
=j_index_start
; jidx
<j_index_end
-1; jidx
+=2)
556 /* Get j neighbor index, and coordinate index */
559 j_coord_offsetA
= DIM
*jnrA
;
560 j_coord_offsetB
= DIM
*jnrB
;
562 /* load j atom coordinates */
563 gmx_mm_load_1rvec_2ptr_swizzle_pd(x
+j_coord_offsetA
,x
+j_coord_offsetB
,
566 /* Calculate displacement vector */
567 dx00
= _mm_sub_pd(ix0
,jx0
);
568 dy00
= _mm_sub_pd(iy0
,jy0
);
569 dz00
= _mm_sub_pd(iz0
,jz0
);
570 dx10
= _mm_sub_pd(ix1
,jx0
);
571 dy10
= _mm_sub_pd(iy1
,jy0
);
572 dz10
= _mm_sub_pd(iz1
,jz0
);
573 dx20
= _mm_sub_pd(ix2
,jx0
);
574 dy20
= _mm_sub_pd(iy2
,jy0
);
575 dz20
= _mm_sub_pd(iz2
,jz0
);
577 /* Calculate squared distance and things based on it */
578 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
579 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
580 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
582 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
583 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
584 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
586 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
587 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
588 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
590 /* Load parameters for j particles */
591 jq0
= gmx_mm_load_2real_swizzle_pd(charge
+jnrA
+0,charge
+jnrB
+0);
593 fjx0
= _mm_setzero_pd();
594 fjy0
= _mm_setzero_pd();
595 fjz0
= _mm_setzero_pd();
597 /**************************
598 * CALCULATE INTERACTIONS *
599 **************************/
601 /* Compute parameters for interactions between i and j atoms */
602 qq00
= _mm_mul_pd(iq0
,jq0
);
604 /* COULOMB ELECTROSTATICS */
605 velec
= _mm_mul_pd(qq00
,rinv00
);
606 felec
= _mm_mul_pd(velec
,rinvsq00
);
610 /* Calculate temporary vectorial force */
611 tx
= _mm_mul_pd(fscal
,dx00
);
612 ty
= _mm_mul_pd(fscal
,dy00
);
613 tz
= _mm_mul_pd(fscal
,dz00
);
615 /* Update vectorial force */
616 fix0
= _mm_add_pd(fix0
,tx
);
617 fiy0
= _mm_add_pd(fiy0
,ty
);
618 fiz0
= _mm_add_pd(fiz0
,tz
);
620 fjx0
= _mm_add_pd(fjx0
,tx
);
621 fjy0
= _mm_add_pd(fjy0
,ty
);
622 fjz0
= _mm_add_pd(fjz0
,tz
);
624 /**************************
625 * CALCULATE INTERACTIONS *
626 **************************/
628 /* Compute parameters for interactions between i and j atoms */
629 qq10
= _mm_mul_pd(iq1
,jq0
);
631 /* COULOMB ELECTROSTATICS */
632 velec
= _mm_mul_pd(qq10
,rinv10
);
633 felec
= _mm_mul_pd(velec
,rinvsq10
);
637 /* Calculate temporary vectorial force */
638 tx
= _mm_mul_pd(fscal
,dx10
);
639 ty
= _mm_mul_pd(fscal
,dy10
);
640 tz
= _mm_mul_pd(fscal
,dz10
);
642 /* Update vectorial force */
643 fix1
= _mm_add_pd(fix1
,tx
);
644 fiy1
= _mm_add_pd(fiy1
,ty
);
645 fiz1
= _mm_add_pd(fiz1
,tz
);
647 fjx0
= _mm_add_pd(fjx0
,tx
);
648 fjy0
= _mm_add_pd(fjy0
,ty
);
649 fjz0
= _mm_add_pd(fjz0
,tz
);
651 /**************************
652 * CALCULATE INTERACTIONS *
653 **************************/
655 /* Compute parameters for interactions between i and j atoms */
656 qq20
= _mm_mul_pd(iq2
,jq0
);
658 /* COULOMB ELECTROSTATICS */
659 velec
= _mm_mul_pd(qq20
,rinv20
);
660 felec
= _mm_mul_pd(velec
,rinvsq20
);
664 /* Calculate temporary vectorial force */
665 tx
= _mm_mul_pd(fscal
,dx20
);
666 ty
= _mm_mul_pd(fscal
,dy20
);
667 tz
= _mm_mul_pd(fscal
,dz20
);
669 /* Update vectorial force */
670 fix2
= _mm_add_pd(fix2
,tx
);
671 fiy2
= _mm_add_pd(fiy2
,ty
);
672 fiz2
= _mm_add_pd(fiz2
,tz
);
674 fjx0
= _mm_add_pd(fjx0
,tx
);
675 fjy0
= _mm_add_pd(fjy0
,ty
);
676 fjz0
= _mm_add_pd(fjz0
,tz
);
678 gmx_mm_decrement_1rvec_2ptr_swizzle_pd(f
+j_coord_offsetA
,f
+j_coord_offsetB
,fjx0
,fjy0
,fjz0
);
680 /* Inner loop uses 84 flops */
687 j_coord_offsetA
= DIM
*jnrA
;
689 /* load j atom coordinates */
690 gmx_mm_load_1rvec_1ptr_swizzle_pd(x
+j_coord_offsetA
,
693 /* Calculate displacement vector */
694 dx00
= _mm_sub_pd(ix0
,jx0
);
695 dy00
= _mm_sub_pd(iy0
,jy0
);
696 dz00
= _mm_sub_pd(iz0
,jz0
);
697 dx10
= _mm_sub_pd(ix1
,jx0
);
698 dy10
= _mm_sub_pd(iy1
,jy0
);
699 dz10
= _mm_sub_pd(iz1
,jz0
);
700 dx20
= _mm_sub_pd(ix2
,jx0
);
701 dy20
= _mm_sub_pd(iy2
,jy0
);
702 dz20
= _mm_sub_pd(iz2
,jz0
);
704 /* Calculate squared distance and things based on it */
705 rsq00
= gmx_mm_calc_rsq_pd(dx00
,dy00
,dz00
);
706 rsq10
= gmx_mm_calc_rsq_pd(dx10
,dy10
,dz10
);
707 rsq20
= gmx_mm_calc_rsq_pd(dx20
,dy20
,dz20
);
709 rinv00
= gmx_mm_invsqrt_pd(rsq00
);
710 rinv10
= gmx_mm_invsqrt_pd(rsq10
);
711 rinv20
= gmx_mm_invsqrt_pd(rsq20
);
713 rinvsq00
= _mm_mul_pd(rinv00
,rinv00
);
714 rinvsq10
= _mm_mul_pd(rinv10
,rinv10
);
715 rinvsq20
= _mm_mul_pd(rinv20
,rinv20
);
717 /* Load parameters for j particles */
718 jq0
= _mm_load_sd(charge
+jnrA
+0);
720 fjx0
= _mm_setzero_pd();
721 fjy0
= _mm_setzero_pd();
722 fjz0
= _mm_setzero_pd();
724 /**************************
725 * CALCULATE INTERACTIONS *
726 **************************/
728 /* Compute parameters for interactions between i and j atoms */
729 qq00
= _mm_mul_pd(iq0
,jq0
);
731 /* COULOMB ELECTROSTATICS */
732 velec
= _mm_mul_pd(qq00
,rinv00
);
733 felec
= _mm_mul_pd(velec
,rinvsq00
);
737 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
739 /* Calculate temporary vectorial force */
740 tx
= _mm_mul_pd(fscal
,dx00
);
741 ty
= _mm_mul_pd(fscal
,dy00
);
742 tz
= _mm_mul_pd(fscal
,dz00
);
744 /* Update vectorial force */
745 fix0
= _mm_add_pd(fix0
,tx
);
746 fiy0
= _mm_add_pd(fiy0
,ty
);
747 fiz0
= _mm_add_pd(fiz0
,tz
);
749 fjx0
= _mm_add_pd(fjx0
,tx
);
750 fjy0
= _mm_add_pd(fjy0
,ty
);
751 fjz0
= _mm_add_pd(fjz0
,tz
);
753 /**************************
754 * CALCULATE INTERACTIONS *
755 **************************/
757 /* Compute parameters for interactions between i and j atoms */
758 qq10
= _mm_mul_pd(iq1
,jq0
);
760 /* COULOMB ELECTROSTATICS */
761 velec
= _mm_mul_pd(qq10
,rinv10
);
762 felec
= _mm_mul_pd(velec
,rinvsq10
);
766 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
768 /* Calculate temporary vectorial force */
769 tx
= _mm_mul_pd(fscal
,dx10
);
770 ty
= _mm_mul_pd(fscal
,dy10
);
771 tz
= _mm_mul_pd(fscal
,dz10
);
773 /* Update vectorial force */
774 fix1
= _mm_add_pd(fix1
,tx
);
775 fiy1
= _mm_add_pd(fiy1
,ty
);
776 fiz1
= _mm_add_pd(fiz1
,tz
);
778 fjx0
= _mm_add_pd(fjx0
,tx
);
779 fjy0
= _mm_add_pd(fjy0
,ty
);
780 fjz0
= _mm_add_pd(fjz0
,tz
);
782 /**************************
783 * CALCULATE INTERACTIONS *
784 **************************/
786 /* Compute parameters for interactions between i and j atoms */
787 qq20
= _mm_mul_pd(iq2
,jq0
);
789 /* COULOMB ELECTROSTATICS */
790 velec
= _mm_mul_pd(qq20
,rinv20
);
791 felec
= _mm_mul_pd(velec
,rinvsq20
);
795 fscal
= _mm_unpacklo_pd(fscal
,_mm_setzero_pd());
797 /* Calculate temporary vectorial force */
798 tx
= _mm_mul_pd(fscal
,dx20
);
799 ty
= _mm_mul_pd(fscal
,dy20
);
800 tz
= _mm_mul_pd(fscal
,dz20
);
802 /* Update vectorial force */
803 fix2
= _mm_add_pd(fix2
,tx
);
804 fiy2
= _mm_add_pd(fiy2
,ty
);
805 fiz2
= _mm_add_pd(fiz2
,tz
);
807 fjx0
= _mm_add_pd(fjx0
,tx
);
808 fjy0
= _mm_add_pd(fjy0
,ty
);
809 fjz0
= _mm_add_pd(fjz0
,tz
);
811 gmx_mm_decrement_1rvec_1ptr_swizzle_pd(f
+j_coord_offsetA
,fjx0
,fjy0
,fjz0
);
813 /* Inner loop uses 84 flops */
816 /* End of innermost loop */
818 gmx_mm_update_iforce_3atom_swizzle_pd(fix0
,fiy0
,fiz0
,fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,
819 f
+i_coord_offset
,fshift
+i_shift_offset
);
821 /* Increment number of inner iterations */
822 inneriter
+= j_index_end
- j_index_start
;
824 /* Outer loop uses 18 flops */
827 /* Increment number of outer iterations */
830 /* Update outer/inner flops */
832 inc_nrnb(nrnb
,eNR_NBKERNEL_ELEC_W3_F
,outeriter
*18 + inneriter
*84);