4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Copyright (c) 1991-2001, University of Groningen, The Netherlands
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * If you want to redistribute modifications, please consider that
18 * scientific software is very special. Version control is crucial -
19 * bugs must be traceable. We will be happy to consider code for
20 * inclusion in the official distribution, but derived work must not
21 * be called official GROMACS. Details are found in the README & COPYING
22 * files - if they are missing, get the official version at www.gromacs.org.
24 * To help us fund GROMACS development, we humbly ask that you cite
25 * the papers on the package - you can find them in the top README file.
27 * For more info, check our website at http://www.gromacs.org
30 * Great Red Owns Many ACres of Sand
33 #include <ppc_altivec.h>
38 void check_altivec(void)
40 vector
unsigned short vsr1
,vsr2
;
41 vector
unsigned int tmp
;
44 tmp
=vec_sl(vec_splat_u32(1),vec_splat_u32(8));
45 vsr2
=(vector
unsigned short)vec_sl(tmp
,vec_splat_u32(8));
46 vsr1
=vec_or(vsr1
,vsr2
);
68 vector
float ix
,iy
,iz
,shvec
;
70 vector
float dx
,dy
,dz
;
71 vector
float vnbtot
,c6
,c12
;
72 vector
float fix
,fiy
,fiz
;
73 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
74 vector
float rinvsq
,rsq
,rinvsix
,vnb6
,vnb12
;
76 int n
,k
,k0
,ii
,is3
,ii3
,nj0
,nj1
;
77 int jnra
,jnrb
,jnrc
,jnrd
;
79 int ntiA
,tja
,tjb
,tjc
,tjd
;
85 shvec
= load_xyz(shiftvec
+is3
);
88 ix
= load_xyz(pos
+ii3
);
93 ix
= vec_add(ix
,shvec
);
96 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
97 ntiA
= 2*ntype
*type
[ii
];
98 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
107 transpose_4_to_3(load_xyz(pos
+j3a
),
110 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
114 rsq
= vec_madd(dx
,dx
,nul
);
115 rsq
= vec_madd(dy
,dy
,rsq
);
116 rsq
= vec_madd(dz
,dz
,rsq
);
117 rinvsq
= do_recip(rsq
);
118 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
119 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
120 tja
= ntiA
+2*type
[jnra
];
121 tjb
= ntiA
+2*type
[jnrb
];
122 tjc
= ntiA
+2*type
[jnrc
];
123 tjd
= ntiA
+2*type
[jnrd
];
124 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
125 vnb6
= vec_madd(c6
,rinvsix
,nul
);
126 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
127 vnbtot
= vec_add(vnbtot
,vnb12
);
128 vnbtot
= vec_sub(vnbtot
,vnb6
);
129 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
130 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
131 fs
= vec_madd(fs
,rinvsq
,nul
);
132 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
133 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
134 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
135 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
136 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
137 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
138 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
139 add_xyz_to_mem(faction
+j3a
,tmp1
);
140 add_xyz_to_mem(faction
+j3b
,tmp2
);
141 add_xyz_to_mem(faction
+j3c
,tmp3
);
142 add_xyz_to_mem(faction
+j3d
,tmp4
);
149 transpose_2_to_3(load_xyz(pos
+j3a
),
150 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
154 rsq
= vec_madd(dx
,dx
,nul
);
155 rsq
= vec_madd(dy
,dy
,rsq
);
156 rsq
= vec_madd(dz
,dz
,rsq
);
157 rinvsq
= do_recip(rsq
);
158 zero_highest_2_elements_in_vector(&rinvsq
);
159 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
160 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
161 tja
= ntiA
+2*type
[jnra
];
162 tjb
= ntiA
+2*type
[jnrb
];
163 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
164 vnb6
= vec_madd(c6
,rinvsix
,nul
);
165 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
166 vnbtot
= vec_add(vnbtot
,vnb12
);
167 vnbtot
= vec_sub(vnbtot
,vnb6
);
168 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
169 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
170 fs
= vec_madd(fs
,rinvsq
,nul
);
171 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
172 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
173 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
174 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
175 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
176 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
177 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
178 add_xyz_to_mem(faction
+j3a
,tmp1
);
179 add_xyz_to_mem(faction
+j3b
,tmp2
);
185 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
189 rsq
= vec_madd(dx
,dx
,nul
);
190 rsq
= vec_madd(dy
,dy
,rsq
);
191 rsq
= vec_madd(dz
,dz
,rsq
);
192 rinvsq
= do_recip(rsq
);
193 zero_highest_3_elements_in_vector(&rinvsq
);
194 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
195 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
196 tja
= ntiA
+2*type
[jnra
];
197 load_1_pair(nbfp
+tja
,&c6
,&c12
);
198 vnb6
= vec_madd(c6
,rinvsix
,nul
);
199 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
200 vnbtot
= vec_add(vnbtot
,vnb12
);
201 vnbtot
= vec_sub(vnbtot
,vnb6
);
202 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
203 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
204 fs
= vec_madd(fs
,rinvsq
,nul
);
205 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
206 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
207 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
208 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
209 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
210 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
211 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
212 add_xyz_to_mem(faction
+j3a
,tmp1
);
214 /* update outer data */
215 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
216 tmp1
= vec_add(tmp1
,tmp3
);
217 tmp2
= vec_add(tmp2
,tmp4
);
218 tmp1
= vec_add(tmp1
,tmp2
);
219 add_xyz_to_mem(faction
+ii3
,tmp1
);
220 add_xyz_to_mem(fshift
+is3
,tmp1
);
222 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
227 void inl0300_altivec(
245 vector
float ix
,iy
,iz
,shvec
;
246 vector
float fs
,nul
,tsc
;
247 vector
float dx
,dy
,dz
;
248 vector
float vnbtot
,c6
,c12
;
249 vector
float fix
,fiy
,fiz
;
250 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
251 vector
float rinv
,r
,rsq
;
252 vector
float VVd
,FFd
,VVr
,FFr
;
254 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
255 int jnra
,jnrb
,jnrc
,jnrd
;
260 tsc
=load_float_and_splat(&tabscale
);
264 shvec
= load_xyz(shiftvec
+is3
);
267 ix
= load_xyz(pos
+ii3
);
272 ix
= vec_add(ix
,shvec
);
275 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
276 ntiA
= 2*ntype
*type
[ii
];
278 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
287 transpose_4_to_3(load_xyz(pos
+j3a
),
290 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
294 rsq
= vec_madd(dx
,dx
,nul
);
295 rsq
= vec_madd(dy
,dy
,rsq
);
296 rsq
= vec_madd(dz
,dz
,rsq
);
297 rinv
= do_invsqrt(rsq
);
298 r
= vec_madd(rinv
,rsq
,nul
);
299 tja
= ntiA
+2*type
[jnra
];
300 tjb
= ntiA
+2*type
[jnrb
];
301 tjc
= ntiA
+2*type
[jnrc
];
302 tjd
= ntiA
+2*type
[jnrd
];
303 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
304 do_4_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&FFd
,&VVr
,&FFr
);
305 fs
= vec_nmsub(c6
,FFd
,nul
);
306 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
307 fs
= vec_nmsub(c12
,FFr
,fs
);
308 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
309 fs
= vec_madd(vec_madd(fs
,tsc
,nul
),rinv
,nul
);
310 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
311 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
312 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
313 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
314 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
315 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
316 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
317 add_xyz_to_mem(faction
+j3a
,tmp1
);
318 add_xyz_to_mem(faction
+j3b
,tmp2
);
319 add_xyz_to_mem(faction
+j3c
,tmp3
);
320 add_xyz_to_mem(faction
+j3d
,tmp4
);
327 transpose_2_to_3(load_xyz(pos
+j3a
),
328 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
332 rsq
= vec_madd(dx
,dx
,nul
);
333 rsq
= vec_madd(dy
,dy
,rsq
);
334 rsq
= vec_madd(dz
,dz
,rsq
);
335 zero_highest_2_elements_in_vector(&rsq
);
336 rinv
= do_invsqrt(rsq
);
337 zero_highest_2_elements_in_vector(&rinv
);
338 r
= vec_madd(rinv
,rsq
,nul
);
339 tja
= ntiA
+2*type
[jnra
];
340 tjb
= ntiA
+2*type
[jnrb
];
341 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
342 do_2_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&FFd
,&VVr
,&FFr
);
343 fs
= vec_nmsub(c6
,FFd
,nul
);
344 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
345 fs
= vec_nmsub(c12
,FFr
,fs
);
346 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
347 fs
= vec_madd(vec_madd(fs
,tsc
,nul
),rinv
,nul
);
348 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
349 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
350 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
351 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
352 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
353 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
354 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
355 add_xyz_to_mem(faction
+j3a
,tmp1
);
356 add_xyz_to_mem(faction
+j3b
,tmp2
);
362 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
366 rsq
= vec_madd(dx
,dx
,nul
);
367 rsq
= vec_madd(dy
,dy
,rsq
);
368 rsq
= vec_madd(dz
,dz
,rsq
);
369 zero_highest_3_elements_in_vector(&rsq
);
370 rinv
= do_invsqrt(rsq
);
371 zero_highest_3_elements_in_vector(&rinv
);
372 r
= vec_madd(rinv
,rsq
,nul
);
373 tja
= ntiA
+2*type
[jnra
];
374 load_1_pair(nbfp
+tja
,&c6
,&c12
);
375 do_1_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&FFd
,&VVr
,&FFr
);
376 fs
= vec_nmsub(c6
,FFd
,nul
);
377 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
378 fs
= vec_nmsub(c12
,FFr
,fs
);
379 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
380 fs
= vec_madd(vec_madd(fs
,tsc
,nul
),rinv
,nul
);
381 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
382 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
383 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
384 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
385 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
386 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
387 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
388 add_xyz_to_mem(faction
+j3a
,tmp1
);
390 /* update outer data */
391 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
392 tmp1
= vec_add(tmp1
,tmp3
);
393 tmp2
= vec_add(tmp2
,tmp4
);
394 tmp1
= vec_add(tmp1
,tmp2
);
395 add_xyz_to_mem(faction
+ii3
,tmp1
);
396 add_xyz_to_mem(fshift
+is3
,tmp1
);
398 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
404 void inl1000_altivec(
419 vector
float ix
,iy
,iz
,shvec
;
420 vector
float vfacel
,vcoul
,fs
,nul
;
421 vector
float dx
,dy
,dz
;
422 vector
float vctot
,qq
,iq
;
423 vector
float fix
,fiy
,fiz
;
424 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
425 vector
float rinv
,rinvsq
,rsq
;
427 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
428 int jnra
,jnrb
,jnrc
,jnrd
;
432 vfacel
=load_float_and_splat(&facel
);
436 shvec
= load_xyz(shiftvec
+is3
);
439 ix
= load_xyz(pos
+ii3
);
444 ix
= vec_add(ix
,shvec
);
447 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
448 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
450 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
459 transpose_4_to_3(load_xyz(pos
+j3a
),
462 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
466 rsq
= vec_madd(dx
,dx
,nul
);
467 rsq
= vec_madd(dy
,dy
,rsq
);
468 rsq
= vec_madd(dz
,dz
,rsq
);
469 rinv
= do_invsqrt(rsq
);
470 rinvsq
= vec_madd(rinv
,rinv
,nul
);
471 /* load 4 j charges and multiply by iq */
472 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
473 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
474 vcoul
= vec_madd(qq
,rinv
,nul
);
475 fs
= vec_madd(vcoul
,rinvsq
,nul
);
476 vctot
= vec_add(vctot
,vcoul
);
477 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
478 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
479 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
480 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
481 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
482 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
483 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
484 add_xyz_to_mem(faction
+j3a
,tmp1
);
485 add_xyz_to_mem(faction
+j3b
,tmp2
);
486 add_xyz_to_mem(faction
+j3c
,tmp3
);
487 add_xyz_to_mem(faction
+j3d
,tmp4
);
494 transpose_2_to_3(load_xyz(pos
+j3a
),
495 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
499 rsq
= vec_madd(dx
,dx
,nul
);
500 rsq
= vec_madd(dy
,dy
,rsq
);
501 rsq
= vec_madd(dz
,dz
,rsq
);
502 rinv
= do_invsqrt(rsq
);
503 zero_highest_2_elements_in_vector(&rinv
);
504 rinvsq
= vec_madd(rinv
,rinv
,nul
);
505 /* load 2 j charges and multiply by iq */
506 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
507 vcoul
= vec_madd(qq
,rinv
,nul
);
508 fs
= vec_madd(vcoul
,rinvsq
,nul
);
509 vctot
= vec_add(vctot
,vcoul
);
510 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
511 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
512 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
513 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
514 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
515 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
516 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
517 add_xyz_to_mem(faction
+j3a
,tmp1
);
518 add_xyz_to_mem(faction
+j3b
,tmp2
);
524 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
528 rsq
= vec_madd(dx
,dx
,nul
);
529 rsq
= vec_madd(dy
,dy
,rsq
);
530 rsq
= vec_madd(dz
,dz
,rsq
);
531 rinv
= do_invsqrt(rsq
);
532 zero_highest_3_elements_in_vector(&rinv
);
533 rinvsq
= vec_madd(rinv
,rinv
,nul
);
534 /* load 1 j charge and multiply by iq */
535 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
536 vcoul
= vec_madd(qq
,rinv
,nul
);
537 fs
= vec_madd(vcoul
,rinvsq
,nul
);
538 vctot
= vec_add(vctot
,vcoul
);
539 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
540 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
541 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
542 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
543 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
544 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
545 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
546 add_xyz_to_mem(faction
+j3a
,tmp1
);
548 /* update outer data */
549 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
550 tmp1
= vec_add(tmp1
,tmp3
);
551 tmp2
= vec_add(tmp2
,tmp4
);
552 tmp1
= vec_add(tmp1
,tmp2
);
553 add_xyz_to_mem(faction
+ii3
,tmp1
);
554 add_xyz_to_mem(fshift
+is3
,tmp1
);
556 add_vector_to_float(Vc
+gid
[n
],vctot
);
562 void inl1100_altivec(
581 vector
float ix
,iy
,iz
,shvec
;
582 vector
float vfacel
,vcoul
,fs
,nul
;
583 vector
float dx
,dy
,dz
;
584 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
;
585 vector
float fix
,fiy
,fiz
;
586 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
587 vector
float rinv
,rinvsq
,rsq
,rinvsix
,vnb6
,vnb12
;
589 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
590 int jnra
,jnrb
,jnrc
,jnrd
;
595 vfacel
=load_float_and_splat(&facel
);
599 shvec
= load_xyz(shiftvec
+is3
);
602 ix
= load_xyz(pos
+ii3
);
608 ix
= vec_add(ix
,shvec
);
611 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
612 ntiA
= 2*ntype
*type
[ii
];
613 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
615 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
624 transpose_4_to_3(load_xyz(pos
+j3a
),
627 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
631 rsq
= vec_madd(dx
,dx
,nul
);
632 rsq
= vec_madd(dy
,dy
,rsq
);
633 rsq
= vec_madd(dz
,dz
,rsq
);
634 rinv
= do_invsqrt(rsq
);
635 rinvsq
= vec_madd(rinv
,rinv
,nul
);
636 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
637 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
638 tja
= ntiA
+2*type
[jnra
];
639 tjb
= ntiA
+2*type
[jnrb
];
640 tjc
= ntiA
+2*type
[jnrc
];
641 tjd
= ntiA
+2*type
[jnrd
];
642 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
643 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
644 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
645 vnb6
= vec_madd(c6
,rinvsix
,nul
);
646 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
647 vcoul
= vec_madd(qq
,rinv
,nul
);
648 vnbtot
= vec_add(vnbtot
,vnb12
);
649 vnbtot
= vec_sub(vnbtot
,vnb6
);
650 vctot
= vec_add(vctot
,vcoul
);
651 fs
= vec_madd(vec_twelve(),vnb12
,vcoul
);
652 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
653 fs
= vec_madd(fs
,rinvsq
,nul
);
654 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
655 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
656 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
657 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
658 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
659 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
660 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
661 add_xyz_to_mem(faction
+j3a
,tmp1
);
662 add_xyz_to_mem(faction
+j3b
,tmp2
);
663 add_xyz_to_mem(faction
+j3c
,tmp3
);
664 add_xyz_to_mem(faction
+j3d
,tmp4
);
671 transpose_2_to_3(load_xyz(pos
+j3a
),
672 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
676 rsq
= vec_madd(dx
,dx
,nul
);
677 rsq
= vec_madd(dy
,dy
,rsq
);
678 rsq
= vec_madd(dz
,dz
,rsq
);
679 rinv
= do_invsqrt(rsq
);
680 zero_highest_2_elements_in_vector(&rinv
);
681 rinvsq
= vec_madd(rinv
,rinv
,nul
);
682 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
683 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
684 tja
= ntiA
+2*type
[jnra
];
685 tjb
= ntiA
+2*type
[jnrb
];
686 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
687 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
688 vnb6
= vec_madd(c6
,rinvsix
,nul
);
689 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
690 vcoul
= vec_madd(qq
,rinv
,nul
);
691 vnbtot
= vec_add(vnbtot
,vnb12
);
692 vnbtot
= vec_sub(vnbtot
,vnb6
);
693 vctot
= vec_add(vctot
,vcoul
);
694 fs
= vec_madd(vec_twelve(),vnb12
,vcoul
);
695 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
696 fs
= vec_madd(fs
,rinvsq
,nul
);
697 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
698 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
699 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
700 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
701 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
702 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
703 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
704 add_xyz_to_mem(faction
+j3a
,tmp1
);
705 add_xyz_to_mem(faction
+j3b
,tmp2
);
711 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
715 rsq
= vec_madd(dx
,dx
,nul
);
716 rsq
= vec_madd(dy
,dy
,rsq
);
717 rsq
= vec_madd(dz
,dz
,rsq
);
718 zero_highest_3_elements_in_vector(&rinv
);
719 rinv
= do_invsqrt(rsq
);
720 rinvsq
= vec_madd(rinv
,rinv
,nul
);
721 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
722 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
723 tja
= ntiA
+2*type
[jnra
];
724 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
725 load_1_pair(nbfp
+tja
,&c6
,&c12
);
726 vnb6
= vec_madd(c6
,rinvsix
,nul
);
727 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
728 vcoul
= vec_madd(qq
,rinv
,nul
);
729 vnbtot
= vec_add(vnbtot
,vnb12
);
730 vnbtot
= vec_sub(vnbtot
,vnb6
);
731 vctot
= vec_add(vctot
,vcoul
);
732 fs
= vec_madd(vec_twelve(),vnb12
,vcoul
);
733 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
734 fs
= vec_madd(fs
,rinvsq
,nul
);
735 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
736 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
737 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
738 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
739 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
740 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
741 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
742 add_xyz_to_mem(faction
+j3a
,tmp1
);
744 /* update outer data */
745 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
746 tmp1
= vec_add(tmp1
,tmp3
);
747 tmp2
= vec_add(tmp2
,tmp4
);
748 tmp1
= vec_add(tmp1
,tmp2
);
750 add_xyz_to_mem(faction
+ii3
,tmp1
);
751 add_xyz_to_mem(fshift
+is3
,tmp1
);
753 add_vector_to_float(Vc
+gid
[n
],vctot
);
754 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
761 void inl2000_altivec(
778 vector
float ix
,iy
,iz
,shvec
;
779 vector
float vfacel
,vkrf
,vcrf
,krsq
,vcoul
,fs
,nul
;
780 vector
float dx
,dy
,dz
;
781 vector
float vctot
,qq
,iq
;
782 vector
float fix
,fiy
,fiz
;
783 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
784 vector
float rinv
,rinvsq
,rsq
;
786 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
787 int jnra
,jnrb
,jnrc
,jnrd
;
791 vfacel
=load_float_and_splat(&facel
);
792 vkrf
=load_float_and_splat(&krf
);
793 vcrf
=load_float_and_splat(&crf
);
797 shvec
= load_xyz(shiftvec
+is3
);
800 ix
= load_xyz(pos
+ii3
);
805 ix
= vec_add(ix
,shvec
);
808 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
809 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
811 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
820 transpose_4_to_3(load_xyz(pos
+j3a
),
823 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
827 rsq
= vec_madd(dx
,dx
,nul
);
828 rsq
= vec_madd(dy
,dy
,rsq
);
829 rsq
= vec_madd(dz
,dz
,rsq
);
830 rinv
= do_invsqrt(rsq
);
831 rinvsq
= vec_madd(rinv
,rinv
,nul
);
832 /* load 4 j charges and multiply by iq */
833 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
834 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
835 krsq
= vec_madd(vkrf
,rsq
,nul
);
836 vcoul
= vec_add(rinv
,krsq
);
837 vcoul
= vec_sub(vcoul
,vcrf
);
839 fs
= vec_nmsub(vec_two(),krsq
,rinv
);
840 vctot
= vec_madd(qq
,vcoul
,vctot
);
841 fs
= vec_madd(fs
,qq
,nul
);
842 fs
= vec_madd(fs
,rinvsq
,nul
);
844 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
845 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
846 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
847 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
848 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
849 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
850 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
851 add_xyz_to_mem(faction
+j3a
,tmp1
);
852 add_xyz_to_mem(faction
+j3b
,tmp2
);
853 add_xyz_to_mem(faction
+j3c
,tmp3
);
854 add_xyz_to_mem(faction
+j3d
,tmp4
);
861 transpose_2_to_3(load_xyz(pos
+j3a
),
862 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
866 rsq
= vec_madd(dx
,dx
,nul
);
867 rsq
= vec_madd(dy
,dy
,rsq
);
868 rsq
= vec_madd(dz
,dz
,rsq
);
869 zero_highest_2_elements_in_vector(&rsq
);
870 rinv
= do_invsqrt(rsq
);
871 zero_highest_2_elements_in_vector(&rinv
);
872 rinvsq
= vec_madd(rinv
,rinv
,nul
);
873 /* load 2 j charges and multiply by iq */
874 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
875 krsq
= vec_madd(vkrf
,rsq
,nul
);
876 vcoul
= vec_add(rinv
,krsq
);
877 vcoul
= vec_sub(vcoul
,vcrf
);
878 fs
= vec_nmsub(vec_two(),krsq
,rinv
);
879 vctot
= vec_madd(qq
,vcoul
,vctot
);
880 fs
= vec_madd(fs
,qq
,nul
);
881 fs
= vec_madd(fs
,rinvsq
,nul
);
882 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
883 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
884 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
885 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
886 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
887 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
888 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
889 add_xyz_to_mem(faction
+j3a
,tmp1
);
890 add_xyz_to_mem(faction
+j3b
,tmp2
);
896 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
900 rsq
= vec_madd(dx
,dx
,nul
);
901 rsq
= vec_madd(dy
,dy
,rsq
);
902 rsq
= vec_madd(dz
,dz
,rsq
);
903 zero_highest_3_elements_in_vector(&rsq
);
904 rinv
= do_invsqrt(rsq
);
905 zero_highest_3_elements_in_vector(&rinv
);
906 rinvsq
= vec_madd(rinv
,rinv
,nul
);
907 /* load 1 j charge and multiply by iq */
908 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
909 krsq
= vec_madd(vkrf
,rsq
,nul
);
910 vcoul
= vec_add(rinv
,krsq
);
911 vcoul
= vec_sub(vcoul
,vcrf
);
912 fs
= vec_nmsub(vec_two(),krsq
,rinv
);
913 vctot
= vec_madd(qq
,vcoul
,vctot
);
914 fs
= vec_madd(fs
,qq
,nul
);
915 fs
= vec_madd(fs
,rinvsq
,nul
);
916 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
917 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
918 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
919 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
920 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
921 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
922 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
923 add_xyz_to_mem(faction
+j3a
,tmp1
);
925 /* update outer data */
926 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
927 tmp1
= vec_add(tmp1
,tmp3
);
928 tmp2
= vec_add(tmp2
,tmp4
);
929 tmp1
= vec_add(tmp1
,tmp2
);
930 add_xyz_to_mem(faction
+ii3
,tmp1
);
931 add_xyz_to_mem(fshift
+is3
,tmp1
);
933 add_vector_to_float(Vc
+gid
[n
],vctot
);
939 void inl2100_altivec(
960 vector
float ix
,iy
,iz
,shvec
;
961 vector
float vfacel
,vkrf
,vcrf
,krsq
,vcoul
,fs
,nul
;
962 vector
float dx
,dy
,dz
;
963 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
;
964 vector
float fix
,fiy
,fiz
;
965 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
966 vector
float rinv
,rinvsq
,rsq
,rinvsix
,vnb6
,vnb12
;
968 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
969 int jnra
,jnrb
,jnrc
,jnrd
;
974 vfacel
=load_float_and_splat(&facel
);
975 vkrf
=load_float_and_splat(&krf
);
976 vcrf
=load_float_and_splat(&crf
);
980 shvec
= load_xyz(shiftvec
+is3
);
983 ix
= load_xyz(pos
+ii3
);
989 ix
= vec_add(ix
,shvec
);
992 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
993 ntiA
= 2*ntype
*type
[ii
];
994 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
996 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
1005 transpose_4_to_3(load_xyz(pos
+j3a
),
1008 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
1009 dx
= vec_sub(ix
,dx
);
1010 dy
= vec_sub(iy
,dy
);
1011 dz
= vec_sub(iz
,dz
);
1012 rsq
= vec_madd(dx
,dx
,nul
);
1013 rsq
= vec_madd(dy
,dy
,rsq
);
1014 rsq
= vec_madd(dz
,dz
,rsq
);
1015 rinv
= do_invsqrt(rsq
);
1016 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1017 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1018 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1019 tja
= ntiA
+2*type
[jnra
];
1020 tjb
= ntiA
+2*type
[jnrb
];
1021 tjc
= ntiA
+2*type
[jnrc
];
1022 tjd
= ntiA
+2*type
[jnrd
];
1023 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
1024 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
1025 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
1026 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1027 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1028 krsq
= vec_madd(vkrf
,rsq
,nul
);
1029 vcoul
= vec_add(rinv
,krsq
);
1030 vcoul
= vec_sub(vcoul
,vcrf
);
1031 vctot
= vec_madd(qq
,vcoul
,vctot
);
1032 vnbtot
= vec_add(vnbtot
,vnb12
);
1033 vnbtot
= vec_sub(vnbtot
,vnb6
);
1034 fs
= vec_nmsub(vec_two(),krsq
,rinv
); /* rinv-2*krsq */
1035 fs
= vec_madd(qq
,fs
,nul
); /* qq*(rinv-2*krsq) */
1036 fs
= vec_madd(vec_twelve(),vnb12
,fs
);
1037 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1038 fs
= vec_madd(fs
,rinvsq
,nul
);
1039 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1040 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1041 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1042 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1043 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1044 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1045 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1046 add_xyz_to_mem(faction
+j3a
,tmp1
);
1047 add_xyz_to_mem(faction
+j3b
,tmp2
);
1048 add_xyz_to_mem(faction
+j3c
,tmp3
);
1049 add_xyz_to_mem(faction
+j3d
,tmp4
);
1056 transpose_2_to_3(load_xyz(pos
+j3a
),
1057 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
1058 dx
= vec_sub(ix
,dx
);
1059 dy
= vec_sub(iy
,dy
);
1060 dz
= vec_sub(iz
,dz
);
1061 rsq
= vec_madd(dx
,dx
,nul
);
1062 rsq
= vec_madd(dy
,dy
,rsq
);
1063 rsq
= vec_madd(dz
,dz
,rsq
);
1064 zero_highest_2_elements_in_vector(&rsq
);
1065 rinv
= do_invsqrt(rsq
);
1066 zero_highest_2_elements_in_vector(&rinv
);
1067 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1068 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1069 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1070 tja
= ntiA
+2*type
[jnra
];
1071 tjb
= ntiA
+2*type
[jnrb
];
1072 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
1073 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
1074 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1075 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1076 krsq
= vec_madd(vkrf
,rsq
,nul
);
1077 vcoul
= vec_add(rinv
,krsq
);
1078 vcoul
= vec_sub(vcoul
,vcrf
);
1079 vctot
= vec_madd(qq
,vcoul
,vctot
);
1080 vnbtot
= vec_add(vnbtot
,vnb12
);
1081 vnbtot
= vec_sub(vnbtot
,vnb6
);
1082 fs
= vec_nmsub(vec_two(),krsq
,rinv
); /* rinv-2*krsq */
1083 fs
= vec_madd(qq
,fs
,nul
); /* qq*(rinv-2*krsq) */
1084 fs
= vec_madd(vec_twelve(),vnb12
,fs
);
1085 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1086 fs
= vec_madd(fs
,rinvsq
,nul
);
1087 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1088 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1089 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1090 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1091 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1092 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1093 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
1094 add_xyz_to_mem(faction
+j3a
,tmp1
);
1095 add_xyz_to_mem(faction
+j3b
,tmp2
);
1101 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
1102 dx
= vec_sub(ix
,dx
);
1103 dy
= vec_sub(iy
,dy
);
1104 dz
= vec_sub(iz
,dz
);
1105 rsq
= vec_madd(dx
,dx
,nul
);
1106 rsq
= vec_madd(dy
,dy
,rsq
);
1107 rsq
= vec_madd(dz
,dz
,rsq
);
1108 zero_highest_3_elements_in_vector(&rsq
);
1109 rinv
= do_invsqrt(rsq
);
1110 zero_highest_3_elements_in_vector(&rinv
);
1111 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1112 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1113 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1114 tja
= ntiA
+2*type
[jnra
];
1115 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
1116 load_1_pair(nbfp
+tja
,&c6
,&c12
);
1117 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1118 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1119 krsq
= vec_madd(vkrf
,rsq
,nul
);
1120 vcoul
= vec_add(rinv
,krsq
);
1121 vcoul
= vec_sub(vcoul
,vcrf
);
1122 vctot
= vec_madd(qq
,vcoul
,vctot
);
1123 vnbtot
= vec_add(vnbtot
,vnb12
);
1124 vnbtot
= vec_sub(vnbtot
,vnb6
);
1125 fs
= vec_nmsub(vec_two(),krsq
,rinv
); /* rinv-2*krsq */
1126 fs
= vec_madd(qq
,fs
,nul
); /* qq*(rinv-2*krsq) */
1127 fs
= vec_madd(vec_twelve(),vnb12
,fs
);
1128 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1129 fs
= vec_madd(fs
,rinvsq
,nul
);
1130 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1131 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1132 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1133 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1134 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1135 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1136 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
1137 add_xyz_to_mem(faction
+j3a
,tmp1
);
1139 /* update outer data */
1140 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1141 tmp1
= vec_add(tmp1
,tmp3
);
1142 tmp2
= vec_add(tmp2
,tmp4
);
1143 tmp1
= vec_add(tmp1
,tmp2
);
1145 add_xyz_to_mem(faction
+ii3
,tmp1
);
1146 add_xyz_to_mem(fshift
+is3
,tmp1
);
1148 add_vector_to_float(Vc
+gid
[n
],vctot
);
1149 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
1158 void inl3000_altivec(
1175 vector
float ix
,iy
,iz
,shvec
;
1176 vector
float vfacel
,tsc
,vcoul
,fs
,nul
;
1177 vector
float dx
,dy
,dz
;
1178 vector
float vctot
,qq
,iq
;
1179 vector
float fix
,fiy
,fiz
;
1180 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
1181 vector
float rinv
,r
,rsq
,VVc
,FFc
;
1183 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
1184 int jnra
,jnrb
,jnrc
,jnrd
;
1185 int j3a
,j3b
,j3c
,j3d
;
1188 vfacel
=load_float_and_splat(&facel
);
1189 tsc
=load_float_and_splat(&tabscale
);
1191 for(n
=0;n
<nri
;n
++) {
1193 shvec
= load_xyz(shiftvec
+is3
);
1196 ix
= load_xyz(pos
+ii3
);
1201 ix
= vec_add(ix
,shvec
);
1204 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
1205 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
1207 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
1216 transpose_4_to_3(load_xyz(pos
+j3a
),
1219 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
1220 dx
= vec_sub(ix
,dx
);
1221 dy
= vec_sub(iy
,dy
);
1222 dz
= vec_sub(iz
,dz
);
1223 rsq
= vec_madd(dx
,dx
,nul
);
1224 rsq
= vec_madd(dy
,dy
,rsq
);
1225 rsq
= vec_madd(dz
,dz
,rsq
);
1226 rinv
= do_invsqrt(rsq
);
1227 r
= vec_madd(rinv
,rsq
,nul
);
1228 /* load 4 j charges and multiply by iq */
1229 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
1230 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
1231 do_4_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1232 fs
= vec_nmsub(qq
,FFc
,nul
);
1233 vctot
= vec_madd(qq
,VVc
,vctot
);
1234 fs
= vec_madd(fs
,tsc
,nul
);
1235 fs
= vec_madd(fs
,rinv
,nul
);
1236 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1237 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1238 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1239 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1240 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1241 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1242 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1243 add_xyz_to_mem(faction
+j3a
,tmp1
);
1244 add_xyz_to_mem(faction
+j3b
,tmp2
);
1245 add_xyz_to_mem(faction
+j3c
,tmp3
);
1246 add_xyz_to_mem(faction
+j3d
,tmp4
);
1253 transpose_2_to_3(load_xyz(pos
+j3a
),
1254 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
1255 dx
= vec_sub(ix
,dx
);
1256 dy
= vec_sub(iy
,dy
);
1257 dz
= vec_sub(iz
,dz
);
1258 rsq
= vec_madd(dx
,dx
,nul
);
1259 rsq
= vec_madd(dy
,dy
,rsq
);
1260 rsq
= vec_madd(dz
,dz
,rsq
);
1261 zero_highest_2_elements_in_vector(&rsq
);
1262 rinv
= do_invsqrt(rsq
);
1263 zero_highest_2_elements_in_vector(&rinv
);
1264 r
= vec_madd(rinv
,rsq
,nul
);
1265 /* load 2 j charges and multiply by iq */
1266 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
1267 do_2_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1268 fs
= vec_nmsub(qq
,FFc
,nul
);
1269 vctot
= vec_madd(qq
,VVc
,vctot
);
1270 fs
= vec_madd(fs
,tsc
,nul
);
1271 fs
= vec_madd(fs
,rinv
,nul
);
1272 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1273 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1274 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1275 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1276 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1277 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1278 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
1279 add_xyz_to_mem(faction
+j3a
,tmp1
);
1280 add_xyz_to_mem(faction
+j3b
,tmp2
);
1286 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
1287 dx
= vec_sub(ix
,dx
);
1288 dy
= vec_sub(iy
,dy
);
1289 dz
= vec_sub(iz
,dz
);
1290 rsq
= vec_madd(dx
,dx
,nul
);
1291 rsq
= vec_madd(dy
,dy
,rsq
);
1292 rsq
= vec_madd(dz
,dz
,rsq
);
1293 zero_highest_3_elements_in_vector(&rsq
);
1294 rinv
= do_invsqrt(rsq
);
1295 zero_highest_3_elements_in_vector(&rinv
);
1296 r
= vec_madd(rinv
,rsq
,nul
);
1297 /* load 1 j charge and multiply by iq */
1298 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
1299 do_1_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1300 fs
= vec_nmsub(qq
,FFc
,nul
);
1301 vctot
= vec_madd(qq
,VVc
,vctot
);
1302 fs
= vec_madd(fs
,tsc
,nul
);
1303 fs
= vec_madd(fs
,rinv
,nul
);
1304 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1305 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1306 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1307 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1308 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1309 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1310 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
1311 add_xyz_to_mem(faction
+j3a
,tmp1
);
1313 /* update outer data */
1314 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1315 tmp1
= vec_add(tmp1
,tmp3
);
1316 tmp2
= vec_add(tmp2
,tmp4
);
1317 tmp1
= vec_add(tmp1
,tmp2
);
1318 add_xyz_to_mem(faction
+ii3
,tmp1
);
1319 add_xyz_to_mem(fshift
+is3
,tmp1
);
1321 add_vector_to_float(Vc
+gid
[n
],vctot
);
1327 void inl3100_altivec(
1348 vector
float ix
,iy
,iz
,shvec
;
1349 vector
float vfacel
,vcoul
,tsc
,fs
,fs2
,nul
;
1350 vector
float dx
,dy
,dz
;
1351 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
,VVc
,FFc
;
1352 vector
float fix
,fiy
,fiz
;
1353 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
1354 vector
float rinv
,r
,rinvsq
,rsq
,rinvsix
,vnb6
,vnb12
;
1356 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
1357 int jnra
,jnrb
,jnrc
,jnrd
;
1358 int j3a
,j3b
,j3c
,j3d
;
1359 int tja
,tjb
,tjc
,tjd
;
1362 vfacel
=load_float_and_splat(&facel
);
1363 tsc
=load_float_and_splat(&tabscale
);
1365 for(n
=0;n
<nri
;n
++) {
1367 shvec
= load_xyz(shiftvec
+is3
);
1370 ix
= load_xyz(pos
+ii3
);
1376 ix
= vec_add(ix
,shvec
);
1379 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
1380 ntiA
= 2*ntype
*type
[ii
];
1381 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
1383 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
1392 transpose_4_to_3(load_xyz(pos
+j3a
),
1395 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
1396 dx
= vec_sub(ix
,dx
);
1397 dy
= vec_sub(iy
,dy
);
1398 dz
= vec_sub(iz
,dz
);
1399 rsq
= vec_madd(dx
,dx
,nul
);
1400 rsq
= vec_madd(dy
,dy
,rsq
);
1401 rsq
= vec_madd(dz
,dz
,rsq
);
1402 rinv
= do_invsqrt(rsq
);
1403 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1404 r
= vec_madd(rinv
,rsq
,nul
);
1405 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1406 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1407 tja
= ntiA
+2*type
[jnra
];
1408 tjb
= ntiA
+2*type
[jnrb
];
1409 tjc
= ntiA
+2*type
[jnrc
];
1410 tjd
= ntiA
+2*type
[jnrd
];
1411 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
1412 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
1413 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
1414 do_4_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1415 fs2
= vec_madd(qq
,FFc
,nul
); /* fijC */
1416 vctot
= vec_madd(qq
,VVc
,vctot
);
1417 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1418 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1419 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
1420 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1421 fs
= vec_madd(fs
,rinv
,nul
);
1422 vnbtot
= vec_add(vnbtot
,vnb12
);
1423 fs
= vec_nmsub(fs2
,tsc
,fs
);
1424 fs
= vec_madd(fs
,rinv
,nul
);
1425 vnbtot
= vec_sub(vnbtot
,vnb6
);
1426 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1427 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1428 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1429 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1430 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1431 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1432 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1433 add_xyz_to_mem(faction
+j3a
,tmp1
);
1434 add_xyz_to_mem(faction
+j3b
,tmp2
);
1435 add_xyz_to_mem(faction
+j3c
,tmp3
);
1436 add_xyz_to_mem(faction
+j3d
,tmp4
);
1443 transpose_2_to_3(load_xyz(pos
+j3a
),
1444 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
1445 dx
= vec_sub(ix
,dx
);
1446 dy
= vec_sub(iy
,dy
);
1447 dz
= vec_sub(iz
,dz
);
1448 rsq
= vec_madd(dx
,dx
,nul
);
1449 rsq
= vec_madd(dy
,dy
,rsq
);
1450 rsq
= vec_madd(dz
,dz
,rsq
);
1451 zero_highest_2_elements_in_vector(&rsq
);
1452 rinv
= do_invsqrt(rsq
);
1453 zero_highest_2_elements_in_vector(&rinv
);
1454 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1455 r
= vec_madd(rinv
,rsq
,nul
);
1456 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1457 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1458 tja
= ntiA
+2*type
[jnra
];
1459 tjb
= ntiA
+2*type
[jnrb
];
1460 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
1461 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
1462 do_2_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1463 fs2
= vec_madd(qq
,FFc
,nul
); /* fijC */
1464 vctot
= vec_madd(qq
,VVc
,vctot
);
1465 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1466 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1467 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
1468 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1469 vnbtot
= vec_add(vnbtot
,vnb12
);
1470 fs
= vec_madd(fs
,rinv
,nul
);
1471 fs
= vec_nmsub(fs2
,tsc
,fs
);
1472 fs
= vec_madd(fs
,rinv
,nul
);
1473 vnbtot
= vec_sub(vnbtot
,vnb6
);
1474 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1475 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1476 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1477 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1478 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1479 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1480 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
1481 add_xyz_to_mem(faction
+j3a
,tmp1
);
1482 add_xyz_to_mem(faction
+j3b
,tmp2
);
1488 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
1489 dx
= vec_sub(ix
,dx
);
1490 dy
= vec_sub(iy
,dy
);
1491 dz
= vec_sub(iz
,dz
);
1492 rsq
= vec_madd(dx
,dx
,nul
);
1493 rsq
= vec_madd(dy
,dy
,rsq
);
1494 rsq
= vec_madd(dz
,dz
,rsq
);
1495 zero_highest_3_elements_in_vector(&rsq
);
1496 rinv
= do_invsqrt(rsq
);
1497 zero_highest_3_elements_in_vector(&rinv
);
1498 rinvsq
= vec_madd(rinv
,rinv
,nul
);
1499 r
= vec_madd(rinv
,rsq
,nul
);
1500 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
1501 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
1502 tja
= ntiA
+2*type
[jnra
];
1503 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
1504 load_1_pair(nbfp
+tja
,&c6
,&c12
);
1505 do_1_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
);
1506 fs2
= vec_madd(qq
,FFc
,nul
); /* fijC */
1507 vctot
= vec_madd(qq
,VVc
,vctot
);
1508 vnb6
= vec_madd(c6
,rinvsix
,nul
);
1509 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
1510 fs
= vec_madd(vec_twelve(),vnb12
,nul
);
1511 fs
= vec_nmsub(vec_six(),vnb6
,fs
);
1512 fs
= vec_madd(fs
,rinv
,nul
);
1513 vnbtot
= vec_add(vnbtot
,vnb12
);
1514 fs
= vec_nmsub(fs2
,tsc
,fs
);
1515 fs
= vec_madd(fs
,rinv
,nul
);
1516 vnbtot
= vec_sub(vnbtot
,vnb6
);
1517 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1518 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1519 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1520 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1521 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1522 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1523 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
1524 add_xyz_to_mem(faction
+j3a
,tmp1
);
1526 /* update outer data */
1527 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1528 tmp1
= vec_add(tmp1
,tmp3
);
1529 tmp2
= vec_add(tmp2
,tmp4
);
1530 tmp1
= vec_add(tmp1
,tmp2
);
1532 add_xyz_to_mem(faction
+ii3
,tmp1
);
1533 add_xyz_to_mem(fshift
+is3
,tmp1
);
1535 add_vector_to_float(Vc
+gid
[n
],vctot
);
1536 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
1541 void inl3300_altivec(
1562 vector
float ix
,iy
,iz
,shvec
;
1563 vector
float fs
,nul
,tsc
;
1564 vector
float dx
,dy
,dz
,vfacel
,vcoul
,vctot
;
1565 vector
float vnbtot
,c6
,c12
,iq
,qq
;
1566 vector
float fix
,fiy
,fiz
;
1567 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
1568 vector
float rinv
,r
,rsq
;
1569 vector
float VVc
,FFc
,VVd
,FFd
,VVr
,FFr
;
1571 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
1572 int jnra
,jnrb
,jnrc
,jnrd
;
1573 int j3a
,j3b
,j3c
,j3d
;
1574 int tja
,tjb
,tjc
,tjd
;
1577 tsc
=load_float_and_splat(&tabscale
);
1578 vfacel
=load_float_and_splat(&facel
);
1580 for(n
=0;n
<nri
;n
++) {
1582 shvec
= load_xyz(shiftvec
+is3
);
1585 ix
= load_xyz(pos
+ii3
);
1591 ix
= vec_add(ix
,shvec
);
1594 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
1595 ntiA
= 2*ntype
*type
[ii
];
1596 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
1598 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
1607 transpose_4_to_3(load_xyz(pos
+j3a
),
1610 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
1611 dx
= vec_sub(ix
,dx
);
1612 dy
= vec_sub(iy
,dy
);
1613 dz
= vec_sub(iz
,dz
);
1614 rsq
= vec_madd(dx
,dx
,nul
);
1615 rsq
= vec_madd(dy
,dy
,rsq
);
1616 rsq
= vec_madd(dz
,dz
,rsq
);
1617 rinv
= do_invsqrt(rsq
);
1618 r
= vec_madd(rinv
,rsq
,nul
);
1619 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
1620 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
1621 tja
= ntiA
+2*type
[jnra
];
1622 tjb
= ntiA
+2*type
[jnrb
];
1623 tjc
= ntiA
+2*type
[jnrc
];
1624 tjd
= ntiA
+2*type
[jnrd
];
1625 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
1626 do_4_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
,&VVd
,&FFd
,&VVr
,&FFr
);
1627 vctot
= vec_madd(qq
,VVc
,vctot
);
1628 fs
= vec_nmsub(qq
,FFc
,nul
);
1629 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
1630 fs
= vec_nmsub(c6
,FFd
,fs
);
1631 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
1632 fs
= vec_nmsub(c12
,FFr
,fs
);
1633 fs
= vec_madd(fs
,tsc
,nul
);
1634 fs
= vec_madd(fs
,rinv
,nul
);
1635 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1636 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1637 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1638 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1639 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1640 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1641 transpose_3_to_4(dx
,dy
,dz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1642 add_xyz_to_mem(faction
+j3a
,tmp1
);
1643 add_xyz_to_mem(faction
+j3b
,tmp2
);
1644 add_xyz_to_mem(faction
+j3c
,tmp3
);
1645 add_xyz_to_mem(faction
+j3d
,tmp4
);
1652 transpose_2_to_3(load_xyz(pos
+j3a
),
1653 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
1654 dx
= vec_sub(ix
,dx
);
1655 dy
= vec_sub(iy
,dy
);
1656 dz
= vec_sub(iz
,dz
);
1657 rsq
= vec_madd(dx
,dx
,nul
);
1658 rsq
= vec_madd(dy
,dy
,rsq
);
1659 rsq
= vec_madd(dz
,dz
,rsq
);
1660 zero_highest_2_elements_in_vector(&rsq
);
1661 rinv
= do_invsqrt(rsq
);
1662 zero_highest_2_elements_in_vector(&rinv
);
1663 r
= vec_madd(rinv
,rsq
,nul
);
1664 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
1665 tja
= ntiA
+2*type
[jnra
];
1666 tjb
= ntiA
+2*type
[jnrb
];
1667 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
1668 do_2_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
,&VVd
,&FFd
,&VVr
,&FFr
);
1669 vctot
= vec_madd(qq
,VVc
,vctot
);
1670 fs
= vec_nmsub(qq
,FFc
,nul
);
1671 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
1672 fs
= vec_nmsub(c6
,FFd
,fs
);
1673 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
1674 fs
= vec_nmsub(c12
,FFr
,fs
);
1675 fs
= vec_madd(fs
,tsc
,nul
);
1676 fs
= vec_madd(fs
,rinv
,nul
);
1677 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1678 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1679 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1680 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1681 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1682 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1683 transpose_3_to_2(dx
,dy
,dz
,&tmp1
,&tmp2
);
1684 add_xyz_to_mem(faction
+j3a
,tmp1
);
1685 add_xyz_to_mem(faction
+j3b
,tmp2
);
1691 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
1692 dx
= vec_sub(ix
,dx
);
1693 dy
= vec_sub(iy
,dy
);
1694 dz
= vec_sub(iz
,dz
);
1695 rsq
= vec_madd(dx
,dx
,nul
);
1696 rsq
= vec_madd(dy
,dy
,rsq
);
1697 rsq
= vec_madd(dz
,dz
,rsq
);
1698 zero_highest_3_elements_in_vector(&rsq
);
1699 rinv
= do_invsqrt(rsq
);
1700 zero_highest_3_elements_in_vector(&rinv
);
1701 r
= vec_madd(rinv
,rsq
,nul
);
1702 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
1703 tja
= ntiA
+2*type
[jnra
];
1704 load_1_pair(nbfp
+tja
,&c6
,&c12
);
1705 do_1_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&FFc
,&VVd
,&FFd
,&VVr
,&FFr
);
1706 vctot
= vec_madd(qq
,VVc
,vctot
);
1707 fs
= vec_nmsub(qq
,FFc
,nul
);
1708 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
1709 fs
= vec_nmsub(c6
,FFd
,fs
);
1710 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
1711 fs
= vec_nmsub(c12
,FFr
,fs
);
1712 fs
= vec_madd(fs
,tsc
,nul
);
1713 fs
= vec_madd(fs
,rinv
,nul
);
1714 fix
= vec_madd(fs
,dx
,fix
); /* +=fx */
1715 fiy
= vec_madd(fs
,dy
,fiy
); /* +=fy */
1716 fiz
= vec_madd(fs
,dz
,fiz
); /* +=fz */
1717 dx
= vec_nmsub(dx
,fs
,nul
); /* -fx */
1718 dy
= vec_nmsub(dy
,fs
,nul
); /* -fy */
1719 dz
= vec_nmsub(dz
,fs
,nul
); /* -fz */
1720 transpose_3_to_1(dx
,dy
,dz
,&tmp1
);
1721 add_xyz_to_mem(faction
+j3a
,tmp1
);
1723 /* update outer data */
1724 transpose_3_to_4(fix
,fiy
,fiz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1725 tmp1
= vec_add(tmp1
,tmp3
);
1726 tmp2
= vec_add(tmp2
,tmp4
);
1727 tmp1
= vec_add(tmp1
,tmp2
);
1728 add_xyz_to_mem(faction
+ii3
,tmp1
);
1729 add_xyz_to_mem(fshift
+is3
,tmp1
);
1731 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
1732 add_vector_to_float(Vc
+gid
[n
],vctot
);
1737 void inl1020_altivec(
1752 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
1753 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
1754 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
1755 vector
float fsO
,fsH1
,fsH2
;
1756 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
1757 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
1758 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
1759 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rinvsqH1
,rinvsqH2
,rsqO
,rsqH1
,rsqH2
;
1762 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
1763 int jnra
,jnrb
,jnrc
,jnrd
;
1764 int j3a
,j3b
,j3c
,j3d
;
1767 vfacel
=load_float_and_splat(&facel
);
1768 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
1769 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
1771 for(n
=0;n
<nri
;n
++) {
1775 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
1776 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
1790 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
1799 transpose_4_to_3(load_xyz(pos
+j3a
),
1802 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
1803 dOx
= vec_sub(iOx
,dH2x
);
1804 dOy
= vec_sub(iOy
,dH2y
);
1805 dOz
= vec_sub(iOz
,dH2z
);
1806 dH1x
= vec_sub(iH1x
,dH2x
);
1807 dH1y
= vec_sub(iH1y
,dH2y
);
1808 dH1z
= vec_sub(iH1z
,dH2z
);
1809 dH2x
= vec_sub(iH2x
,dH2x
);
1810 dH2y
= vec_sub(iH2y
,dH2y
);
1811 dH2z
= vec_sub(iH2z
,dH2z
);
1813 rsqO
= vec_madd(dOx
,dOx
,nul
);
1814 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
1815 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
1816 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
1817 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
1818 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
1819 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
1820 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
1821 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
1822 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
1823 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
1824 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
1825 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
1826 /* load 4 j charges and multiply by iq */
1827 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
1828 qqO
= vec_madd(iqO
,jq
,nul
);
1829 qqH
= vec_madd(iqH
,jq
,nul
);
1830 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
1831 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
1832 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
1833 fsO
= vec_madd(vcoulO
,rinvsqO
,nul
);
1834 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
1835 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
1836 vctot
= vec_add(vctot
,vcoulO
);
1837 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
1838 vctot
= vec_add(vctot
,vcoulH1
);
1840 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
1841 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
1842 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
1843 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
1844 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
1845 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
1846 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
1847 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
1848 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
1849 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
1850 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
1851 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
1852 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
1853 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
1854 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
1855 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
1856 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
1857 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
1859 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
1860 add_xyz_to_mem(faction
+j3a
,tmp1
);
1861 add_xyz_to_mem(faction
+j3b
,tmp2
);
1862 add_xyz_to_mem(faction
+j3c
,tmp3
);
1863 add_xyz_to_mem(faction
+j3d
,tmp4
);
1872 transpose_4_to_3(load_xyz(pos
+j3a
),
1874 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
1875 dOx
= vec_sub(iOx
,dH2x
);
1876 dOy
= vec_sub(iOy
,dH2y
);
1877 dOz
= vec_sub(iOz
,dH2z
);
1878 dH1x
= vec_sub(iH1x
,dH2x
);
1879 dH1y
= vec_sub(iH1y
,dH2y
);
1880 dH1z
= vec_sub(iH1z
,dH2z
);
1881 dH2x
= vec_sub(iH2x
,dH2x
);
1882 dH2y
= vec_sub(iH2y
,dH2y
);
1883 dH2z
= vec_sub(iH2z
,dH2z
);
1885 rsqO
= vec_madd(dOx
,dOx
,nul
);
1886 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
1887 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
1888 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
1889 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
1890 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
1891 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
1892 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
1893 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
1894 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
1895 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
1897 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
1898 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
1899 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
1900 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
1902 qqO
= vec_madd(iqO
,jq
,nul
);
1903 qqH
= vec_madd(iqH
,jq
,nul
);
1904 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
1905 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
1906 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
1907 fsO
= vec_madd(vcoulO
,rinvsqO
,nul
);
1908 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
1909 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
1910 vctot
= vec_add(vctot
,vcoulO
);
1911 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
1912 vctot
= vec_add(vctot
,vcoulH1
);
1914 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
1915 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
1916 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
1917 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
1918 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
1919 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
1920 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
1921 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
1922 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
1923 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
1924 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
1925 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
1926 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
1927 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
1928 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
1929 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
1930 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
1931 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
1933 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
1934 add_xyz_to_mem(faction
+j3a
,tmp1
);
1935 add_xyz_to_mem(faction
+j3b
,tmp2
);
1936 add_xyz_to_mem(faction
+j3c
,tmp3
);
1937 } else if(k
<(nj1
-1)) {
1942 transpose_2_to_3(load_xyz(pos
+j3a
),
1943 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
1944 dOx
= vec_sub(iOx
,dH2x
);
1945 dOy
= vec_sub(iOy
,dH2y
);
1946 dOz
= vec_sub(iOz
,dH2z
);
1947 dH1x
= vec_sub(iH1x
,dH2x
);
1948 dH1y
= vec_sub(iH1y
,dH2y
);
1949 dH1z
= vec_sub(iH1z
,dH2z
);
1950 dH2x
= vec_sub(iH2x
,dH2x
);
1951 dH2y
= vec_sub(iH2y
,dH2y
);
1952 dH2z
= vec_sub(iH2z
,dH2z
);
1954 rsqO
= vec_madd(dOx
,dOx
,nul
);
1955 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
1956 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
1957 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
1958 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
1959 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
1960 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
1961 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
1962 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
1963 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
1964 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
1966 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
1967 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
1968 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
1969 /* load 2 j charges and multiply by iq */
1970 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
1971 qqO
= vec_madd(iqO
,jq
,nul
);
1972 qqH
= vec_madd(iqH
,jq
,nul
);
1973 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
1974 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
1975 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
1976 fsO
= vec_madd(vcoulO
,rinvsqO
,nul
);
1977 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
1978 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
1979 vctot
= vec_add(vctot
,vcoulO
);
1980 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
1981 vctot
= vec_add(vctot
,vcoulH1
);
1983 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
1984 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
1985 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
1986 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
1987 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
1988 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
1989 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
1990 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
1991 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
1992 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
1993 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
1994 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
1995 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
1996 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
1997 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
1998 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
1999 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2000 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2002 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
2003 add_xyz_to_mem(faction
+j3a
,tmp1
);
2004 add_xyz_to_mem(faction
+j3b
,tmp2
);
2008 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
2009 dOx
= vec_sub(iOx
,dH2x
);
2010 dOy
= vec_sub(iOy
,dH2y
);
2011 dOz
= vec_sub(iOz
,dH2z
);
2012 dH1x
= vec_sub(iH1x
,dH2x
);
2013 dH1y
= vec_sub(iH1y
,dH2y
);
2014 dH1z
= vec_sub(iH1z
,dH2z
);
2015 dH2x
= vec_sub(iH2x
,dH2x
);
2016 dH2y
= vec_sub(iH2y
,dH2y
);
2017 dH2z
= vec_sub(iH2z
,dH2z
);
2019 rsqO
= vec_madd(dOx
,dOx
,nul
);
2020 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2021 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2022 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2023 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2024 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2025 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2026 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2027 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2028 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2029 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2031 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2032 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2033 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2034 /* load 1 j charges and multiply by iq */
2035 jq
=load_1_float(charge
+jnra
);
2036 qqO
= vec_madd(iqO
,jq
,nul
);
2037 qqH
= vec_madd(iqH
,jq
,nul
);
2038 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
2039 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
2040 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
2041 fsO
= vec_madd(vcoulO
,rinvsqO
,nul
);
2042 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
2043 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
2044 vctot
= vec_add(vctot
,vcoulO
);
2045 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
2046 vctot
= vec_add(vctot
,vcoulH1
);
2048 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2049 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2050 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2051 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2052 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2053 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2054 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2055 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2056 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2057 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2058 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2059 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2060 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2061 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2062 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2063 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2064 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2065 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2067 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
2068 add_xyz_to_mem(faction
+j3a
,tmp1
);
2070 /* update outer data */
2071 update_i_water_forces(faction
+ii3
,fshift
+is3
,
2072 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
2074 add_vector_to_float(Vc
+gid
[n
],vctot
);
2079 void inl1120_altivec(
2098 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
2099 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
2100 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
2101 vector
float vnbtot
,c6
,c12
,rinvsix
,vnb6
,vnb12
;
2102 vector
float fsO
,fsH1
,fsH2
;
2103 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
2104 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
2105 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
2106 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rinvsqH1
,rinvsqH2
,rsqO
,rsqH1
,rsqH2
;
2109 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
2110 int jnra
,jnrb
,jnrc
,jnrd
;
2111 int j3a
,j3b
,j3c
,j3d
;
2112 int tja
,tjb
,tjc
,tjd
;
2115 vfacel
=load_float_and_splat(&facel
);
2117 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
2118 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
2119 ntiA
= 2*ntype
*type
[ii
];
2121 for(n
=0;n
<nri
;n
++) {
2125 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
2126 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
2141 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
2150 transpose_4_to_3(load_xyz(pos
+j3a
),
2153 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
2154 dOx
= vec_sub(iOx
,dH2x
);
2155 dOy
= vec_sub(iOy
,dH2y
);
2156 dOz
= vec_sub(iOz
,dH2z
);
2157 dH1x
= vec_sub(iH1x
,dH2x
);
2158 dH1y
= vec_sub(iH1y
,dH2y
);
2159 dH1z
= vec_sub(iH1z
,dH2z
);
2160 dH2x
= vec_sub(iH2x
,dH2x
);
2161 dH2y
= vec_sub(iH2y
,dH2y
);
2162 dH2z
= vec_sub(iH2z
,dH2z
);
2164 rsqO
= vec_madd(dOx
,dOx
,nul
);
2165 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2166 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2167 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2168 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2169 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2170 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2171 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2172 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2173 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2174 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2175 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2176 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2177 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
2178 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
2179 tja
= ntiA
+2*type
[jnra
];
2180 tjb
= ntiA
+2*type
[jnrb
];
2181 tjc
= ntiA
+2*type
[jnrc
];
2182 tjd
= ntiA
+2*type
[jnrd
];
2183 /* load 4 j charges and multiply by iq */
2184 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
2185 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
2186 qqO
= vec_madd(iqO
,jq
,nul
);
2187 qqH
= vec_madd(iqH
,jq
,nul
);
2188 vnb6
= vec_madd(c6
,rinvsix
,nul
);
2189 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
2190 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
2191 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
2192 vnbtot
= vec_add(vnbtot
,vnb12
);
2193 fsO
= vec_madd(vec_twelve(),vnb12
,vcoulO
);
2194 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
2195 vnbtot
= vec_sub(vnbtot
,vnb6
);
2196 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
2197 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
2198 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
2199 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2200 vctot
= vec_add(vctot
,vcoulO
);
2201 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
2202 vctot
= vec_add(vctot
,vcoulH1
);
2203 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2204 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2205 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2206 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2207 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2208 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2209 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2210 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2211 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2212 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2213 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2214 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2215 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2216 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2217 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2218 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2219 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2220 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2222 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
2223 add_xyz_to_mem(faction
+j3a
,tmp1
);
2224 add_xyz_to_mem(faction
+j3b
,tmp2
);
2225 add_xyz_to_mem(faction
+j3c
,tmp3
);
2226 add_xyz_to_mem(faction
+j3d
,tmp4
);
2235 transpose_4_to_3(load_xyz(pos
+j3a
),
2237 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
2238 dOx
= vec_sub(iOx
,dH2x
);
2239 dOy
= vec_sub(iOy
,dH2y
);
2240 dOz
= vec_sub(iOz
,dH2z
);
2241 dH1x
= vec_sub(iH1x
,dH2x
);
2242 dH1y
= vec_sub(iH1y
,dH2y
);
2243 dH1z
= vec_sub(iH1z
,dH2z
);
2244 dH2x
= vec_sub(iH2x
,dH2x
);
2245 dH2y
= vec_sub(iH2y
,dH2y
);
2246 dH2z
= vec_sub(iH2z
,dH2z
);
2248 rsqO
= vec_madd(dOx
,dOx
,nul
);
2249 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2250 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2251 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2252 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2253 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2254 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2255 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2256 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2257 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2258 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2260 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2261 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2262 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2263 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
2264 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
2265 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
2266 tja
= ntiA
+2*type
[jnra
];
2267 tjb
= ntiA
+2*type
[jnrb
];
2268 tjc
= ntiA
+2*type
[jnrc
];
2269 /* load 3 j charges and multiply by iq */
2270 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
2271 qqO
= vec_madd(iqO
,jq
,nul
);
2272 qqH
= vec_madd(iqH
,jq
,nul
);
2273 vnb6
= vec_madd(c6
,rinvsix
,nul
);
2274 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
2275 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
2276 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
2277 vnbtot
= vec_add(vnbtot
,vnb12
);
2278 fsO
= vec_madd(vec_twelve(),vnb12
,vcoulO
);
2279 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
2280 vnbtot
= vec_sub(vnbtot
,vnb6
);
2281 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
2282 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
2283 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
2284 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2285 vctot
= vec_add(vctot
,vcoulO
);
2286 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
2287 vctot
= vec_add(vctot
,vcoulH1
);
2289 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2290 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2291 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2292 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2293 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2294 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2295 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2296 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2297 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2298 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2299 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2300 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2301 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2302 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2303 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2304 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2305 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2306 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2308 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
2309 add_xyz_to_mem(faction
+j3a
,tmp1
);
2310 add_xyz_to_mem(faction
+j3b
,tmp2
);
2311 add_xyz_to_mem(faction
+j3c
,tmp3
);
2312 } else if(k
<(nj1
-1)) {
2317 transpose_2_to_3(load_xyz(pos
+j3a
),
2318 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
2319 dOx
= vec_sub(iOx
,dH2x
);
2320 dOy
= vec_sub(iOy
,dH2y
);
2321 dOz
= vec_sub(iOz
,dH2z
);
2322 dH1x
= vec_sub(iH1x
,dH2x
);
2323 dH1y
= vec_sub(iH1y
,dH2y
);
2324 dH1z
= vec_sub(iH1z
,dH2z
);
2325 dH2x
= vec_sub(iH2x
,dH2x
);
2326 dH2y
= vec_sub(iH2y
,dH2y
);
2327 dH2z
= vec_sub(iH2z
,dH2z
);
2329 rsqO
= vec_madd(dOx
,dOx
,nul
);
2330 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2331 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2332 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2333 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2334 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2335 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2336 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2337 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2338 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2339 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2341 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2342 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2343 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2344 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
2345 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
2346 tja
= ntiA
+2*type
[jnra
];
2347 tjb
= ntiA
+2*type
[jnrb
];
2348 /* load 2 j charges and multiply by iq */
2349 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
2350 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
2351 qqO
= vec_madd(iqO
,jq
,nul
);
2352 qqH
= vec_madd(iqH
,jq
,nul
);
2353 vnb6
= vec_madd(c6
,rinvsix
,nul
);
2354 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
2355 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
2356 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
2357 vnbtot
= vec_add(vnbtot
,vnb12
);
2358 fsO
= vec_madd(vec_twelve(),vnb12
,vcoulO
);
2359 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
2360 vnbtot
= vec_sub(vnbtot
,vnb6
);
2361 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
2362 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
2363 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
2364 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2365 vctot
= vec_add(vctot
,vcoulO
);
2366 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
2367 vctot
= vec_add(vctot
,vcoulH1
);
2369 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2370 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2371 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2372 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2373 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2374 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2375 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2376 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2377 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2378 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2379 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2380 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2381 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2382 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2383 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2384 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2385 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2386 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2388 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
2389 add_xyz_to_mem(faction
+j3a
,tmp1
);
2390 add_xyz_to_mem(faction
+j3b
,tmp2
);
2394 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
2395 dOx
= vec_sub(iOx
,dH2x
);
2396 dOy
= vec_sub(iOy
,dH2y
);
2397 dOz
= vec_sub(iOz
,dH2z
);
2398 dH1x
= vec_sub(iH1x
,dH2x
);
2399 dH1y
= vec_sub(iH1y
,dH2y
);
2400 dH1z
= vec_sub(iH1z
,dH2z
);
2401 dH2x
= vec_sub(iH2x
,dH2x
);
2402 dH2y
= vec_sub(iH2y
,dH2y
);
2403 dH2z
= vec_sub(iH2z
,dH2z
);
2405 rsqO
= vec_madd(dOx
,dOx
,nul
);
2406 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2407 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2408 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2409 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2410 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2411 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2412 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2413 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2414 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2415 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2417 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2418 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2419 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2420 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
2421 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
2422 tja
= ntiA
+2*type
[jnra
];
2423 /* load 1 j charges and multiply by iq */
2424 jq
=load_1_float(charge
+jnra
);
2425 load_1_pair(nbfp
+tja
,&c6
,&c12
);
2426 qqO
= vec_madd(iqO
,jq
,nul
);
2427 qqH
= vec_madd(iqH
,jq
,nul
);
2428 vnb6
= vec_madd(c6
,rinvsix
,nul
);
2429 vcoulO
= vec_madd(qqO
,rinvO
,nul
);
2430 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
2431 vcoulH1
= vec_madd(qqH
,rinvH1
,nul
);
2432 vnbtot
= vec_add(vnbtot
,vnb12
);
2433 fsO
= vec_madd(vec_twelve(),vnb12
,vcoulO
);
2434 vcoulH2
= vec_madd(qqH
,rinvH2
,nul
);
2435 vnbtot
= vec_sub(vnbtot
,vnb6
);
2436 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
2437 fsH1
= vec_madd(vcoulH1
,rinvsqH1
,nul
);
2438 fsH2
= vec_madd(vcoulH2
,rinvsqH2
,nul
);
2439 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2440 vctot
= vec_add(vctot
,vcoulO
);
2441 vcoulH1
= vec_add(vcoulH1
,vcoulH2
);
2442 vctot
= vec_add(vctot
,vcoulH1
);
2444 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2445 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2446 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2447 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2448 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2449 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2450 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2451 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2452 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2453 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2454 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2455 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2456 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2457 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2458 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2459 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2460 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2461 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2463 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
2464 add_xyz_to_mem(faction
+j3a
,tmp1
);
2466 /* update outer data */
2467 update_i_water_forces(faction
+ii3
,fshift
+is3
,
2468 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
2470 add_vector_to_float(Vc
+gid
[n
],vctot
);
2471 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
2477 void inl2020_altivec(
2494 vector
float vkrf
,vcrf
;
2495 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
2496 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
2497 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
2498 vector
float fsO
,fsH1
,fsH2
,krsqO
,krsqH1
,krsqH2
;
2499 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
2500 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
2501 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
2502 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rinvsqH1
,rinvsqH2
,rsqO
,rsqH1
,rsqH2
;
2505 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
2506 int jnra
,jnrb
,jnrc
,jnrd
;
2507 int j3a
,j3b
,j3c
,j3d
;
2510 vfacel
=load_float_and_splat(&facel
);
2511 vkrf
=load_float_and_splat(&krf
);
2512 vcrf
=load_float_and_splat(&crf
);
2514 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
2515 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
2517 for(n
=0;n
<nri
;n
++) {
2521 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
2522 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
2536 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
2545 transpose_4_to_3(load_xyz(pos
+j3a
),
2548 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
2549 dOx
= vec_sub(iOx
,dH2x
);
2550 dOy
= vec_sub(iOy
,dH2y
);
2551 dOz
= vec_sub(iOz
,dH2z
);
2552 dH1x
= vec_sub(iH1x
,dH2x
);
2553 dH1y
= vec_sub(iH1y
,dH2y
);
2554 dH1z
= vec_sub(iH1z
,dH2z
);
2555 dH2x
= vec_sub(iH2x
,dH2x
);
2556 dH2y
= vec_sub(iH2y
,dH2y
);
2557 dH2z
= vec_sub(iH2z
,dH2z
);
2559 rsqO
= vec_madd(dOx
,dOx
,nul
);
2560 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2561 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2562 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2563 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2564 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2565 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2566 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2567 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2568 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2569 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2570 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2571 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2572 /* load 4 j charges and multiply by iq */
2573 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
2574 qqO
= vec_madd(iqO
,jq
,nul
);
2575 qqH
= vec_madd(iqH
,jq
,nul
);
2576 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
2577 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
2578 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
2579 vcoulO
= vec_add(rinvO
,krsqO
);
2580 vcoulH1
= vec_add(rinvH1
,krsqH1
);
2581 vcoulH2
= vec_add(rinvH2
,krsqH2
);
2582 vcoulO
= vec_sub(vcoulO
,vcrf
);
2583 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
2584 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
2585 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
2586 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
2587 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
2588 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
2589 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
2590 fsO
= vec_madd(fsO
,qqO
,nul
);
2591 fsH1
= vec_madd(fsH1
,qqH
,nul
);
2592 fsH2
= vec_madd(fsH2
,qqH
,nul
);
2593 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
2594 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2595 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
2596 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
2597 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2598 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2599 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2600 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2601 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2602 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2603 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2604 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2605 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2606 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2607 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2608 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2609 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2610 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2611 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2612 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2613 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2614 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2616 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
2617 add_xyz_to_mem(faction
+j3a
,tmp1
);
2618 add_xyz_to_mem(faction
+j3b
,tmp2
);
2619 add_xyz_to_mem(faction
+j3c
,tmp3
);
2620 add_xyz_to_mem(faction
+j3d
,tmp4
);
2629 transpose_4_to_3(load_xyz(pos
+j3a
),
2631 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
2632 dOx
= vec_sub(iOx
,dH2x
);
2633 dOy
= vec_sub(iOy
,dH2y
);
2634 dOz
= vec_sub(iOz
,dH2z
);
2635 dH1x
= vec_sub(iH1x
,dH2x
);
2636 dH1y
= vec_sub(iH1y
,dH2y
);
2637 dH1z
= vec_sub(iH1z
,dH2z
);
2638 dH2x
= vec_sub(iH2x
,dH2x
);
2639 dH2y
= vec_sub(iH2y
,dH2y
);
2640 dH2z
= vec_sub(iH2z
,dH2z
);
2642 rsqO
= vec_madd(dOx
,dOx
,nul
);
2643 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2644 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2645 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2646 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2647 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2648 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2649 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2650 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2652 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
2653 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2654 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2656 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
2657 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2658 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2659 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2660 /* load 3 j charges and multiply by iq */
2661 qqO
= vec_madd(iqO
,jq
,nul
);
2662 qqH
= vec_madd(iqH
,jq
,nul
);
2663 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
2664 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
2665 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
2666 vcoulO
= vec_add(rinvO
,krsqO
);
2667 vcoulH1
= vec_add(rinvH1
,krsqH1
);
2668 vcoulH2
= vec_add(rinvH2
,krsqH2
);
2669 vcoulO
= vec_sub(vcoulO
,vcrf
);
2670 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
2671 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
2672 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
2673 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
2674 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
2675 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
2676 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
2677 fsO
= vec_madd(fsO
,qqO
,nul
);
2678 fsH1
= vec_madd(fsH1
,qqH
,nul
);
2679 fsH2
= vec_madd(fsH2
,qqH
,nul
);
2680 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
2681 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2682 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
2683 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
2685 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2686 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2687 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2688 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2689 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2690 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2691 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2692 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2693 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2694 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2695 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2696 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2697 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2698 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2699 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2700 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2701 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2702 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2704 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
2705 add_xyz_to_mem(faction
+j3a
,tmp1
);
2706 add_xyz_to_mem(faction
+j3b
,tmp2
);
2707 add_xyz_to_mem(faction
+j3c
,tmp3
);
2708 } else if(k
<(nj1
-1)) {
2713 transpose_2_to_3(load_xyz(pos
+j3a
),
2714 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
2715 dOx
= vec_sub(iOx
,dH2x
);
2716 dOy
= vec_sub(iOy
,dH2y
);
2717 dOz
= vec_sub(iOz
,dH2z
);
2718 dH1x
= vec_sub(iH1x
,dH2x
);
2719 dH1y
= vec_sub(iH1y
,dH2y
);
2720 dH1z
= vec_sub(iH1z
,dH2z
);
2721 dH2x
= vec_sub(iH2x
,dH2x
);
2722 dH2y
= vec_sub(iH2y
,dH2y
);
2723 dH2z
= vec_sub(iH2z
,dH2z
);
2725 rsqO
= vec_madd(dOx
,dOx
,nul
);
2726 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2727 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2728 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2729 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2730 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2731 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2732 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2733 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2735 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
2736 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2737 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2739 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2740 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2741 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2742 /* load 2 j charges and multiply by iq */
2743 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
2744 qqO
= vec_madd(iqO
,jq
,nul
);
2745 qqH
= vec_madd(iqH
,jq
,nul
);
2746 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
2747 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
2748 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
2749 vcoulO
= vec_add(rinvO
,krsqO
);
2750 vcoulH1
= vec_add(rinvH1
,krsqH1
);
2751 vcoulH2
= vec_add(rinvH2
,krsqH2
);
2752 vcoulO
= vec_sub(vcoulO
,vcrf
);
2753 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
2754 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
2755 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
2756 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
2757 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
2758 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
2759 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
2760 fsO
= vec_madd(fsO
,qqO
,nul
);
2761 fsH1
= vec_madd(fsH1
,qqH
,nul
);
2762 fsH2
= vec_madd(fsH2
,qqH
,nul
);
2763 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
2764 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2765 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
2766 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
2768 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2769 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2770 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2771 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2772 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2773 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2774 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2775 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2776 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2777 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2778 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2779 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2780 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2781 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2782 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2783 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2784 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2785 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2787 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
2788 add_xyz_to_mem(faction
+j3a
,tmp1
);
2789 add_xyz_to_mem(faction
+j3b
,tmp2
);
2793 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
2794 dOx
= vec_sub(iOx
,dH2x
);
2795 dOy
= vec_sub(iOy
,dH2y
);
2796 dOz
= vec_sub(iOz
,dH2z
);
2797 dH1x
= vec_sub(iH1x
,dH2x
);
2798 dH1y
= vec_sub(iH1y
,dH2y
);
2799 dH1z
= vec_sub(iH1z
,dH2z
);
2800 dH2x
= vec_sub(iH2x
,dH2x
);
2801 dH2y
= vec_sub(iH2y
,dH2y
);
2802 dH2z
= vec_sub(iH2z
,dH2z
);
2804 rsqO
= vec_madd(dOx
,dOx
,nul
);
2805 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2806 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2807 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2808 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2809 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2810 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2811 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2812 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2814 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
2815 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2816 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
2818 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2819 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2820 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2821 /* load 1 j charges and multiply by iq */
2822 jq
=load_1_float(charge
+jnra
);
2823 qqO
= vec_madd(iqO
,jq
,nul
);
2824 qqH
= vec_madd(iqH
,jq
,nul
);
2825 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
2826 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
2827 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
2828 vcoulO
= vec_add(rinvO
,krsqO
);
2829 vcoulH1
= vec_add(rinvH1
,krsqH1
);
2830 vcoulH2
= vec_add(rinvH2
,krsqH2
);
2831 vcoulO
= vec_sub(vcoulO
,vcrf
);
2832 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
2833 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
2834 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
2835 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
2836 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
2837 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
2838 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
2839 fsO
= vec_madd(fsO
,qqO
,nul
);
2840 fsH1
= vec_madd(fsH1
,qqH
,nul
);
2841 fsH2
= vec_madd(fsH2
,qqH
,nul
);
2842 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
2843 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
2844 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
2845 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
2847 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
2848 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
2849 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
2850 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
2851 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
2852 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
2853 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
2854 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
2855 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
2856 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
2857 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
2858 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
2859 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
2860 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
2861 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
2862 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
2863 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
2864 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
2866 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
2867 add_xyz_to_mem(faction
+j3a
,tmp1
);
2869 /* update outer data */
2870 update_i_water_forces(faction
+ii3
,fshift
+is3
,
2871 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
2873 add_vector_to_float(Vc
+gid
[n
],vctot
);
2879 void inl2120_altivec(
2900 vector
float vkrf
,vcrf
;
2901 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
2902 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
2903 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
2904 vector
float vnbtot
,c6
,c12
,rinvsix
,vnb6
,vnb12
;
2905 vector
float fsO
,fsH1
,fsH2
,krsqO
,krsqH1
,krsqH2
;
2906 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
2907 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
2908 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
2909 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rinvsqH1
,rinvsqH2
,rsqO
,rsqH1
,rsqH2
;
2912 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
2913 int jnra
,jnrb
,jnrc
,jnrd
;
2914 int j3a
,j3b
,j3c
,j3d
;
2915 int tja
,tjb
,tjc
,tjd
;
2918 vfacel
=load_float_and_splat(&facel
);
2919 vkrf
=load_float_and_splat(&krf
);
2920 vcrf
=load_float_and_splat(&crf
);
2922 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
2923 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
2924 ntiA
= 2*ntype
*type
[ii
];
2926 for(n
=0;n
<nri
;n
++) {
2930 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
2931 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
2946 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
2955 transpose_4_to_3(load_xyz(pos
+j3a
),
2958 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
2959 dOx
= vec_sub(iOx
,dH2x
);
2960 dOy
= vec_sub(iOy
,dH2y
);
2961 dOz
= vec_sub(iOz
,dH2z
);
2962 dH1x
= vec_sub(iH1x
,dH2x
);
2963 dH1y
= vec_sub(iH1y
,dH2y
);
2964 dH1z
= vec_sub(iH1z
,dH2z
);
2965 dH2x
= vec_sub(iH2x
,dH2x
);
2966 dH2y
= vec_sub(iH2y
,dH2y
);
2967 dH2z
= vec_sub(iH2z
,dH2z
);
2969 rsqO
= vec_madd(dOx
,dOx
,nul
);
2970 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
2971 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
2972 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
2973 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
2974 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
2975 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
2976 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
2977 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
2978 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
2979 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
2980 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
2981 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
2982 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
2983 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
2984 tja
= ntiA
+2*type
[jnra
];
2985 tjb
= ntiA
+2*type
[jnrb
];
2986 tjc
= ntiA
+2*type
[jnrc
];
2987 tjd
= ntiA
+2*type
[jnrd
];
2988 /* load 4 j charges and multiply by iq */
2989 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
2990 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
2991 qqO
= vec_madd(iqO
,jq
,nul
);
2992 qqH
= vec_madd(iqH
,jq
,nul
);
2993 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
2994 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
2995 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
2996 vnb6
= vec_madd(c6
,rinvsix
,nul
);
2997 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
2998 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
2999 vcoulO
= vec_add(rinvO
,krsqO
);
3000 vcoulH1
= vec_add(rinvH1
,krsqH1
);
3001 vnbtot
= vec_add(vnbtot
,vnb12
);
3002 fsO
= vec_madd(qqO
,fsO
,nul
);
3003 vcoulH2
= vec_add(rinvH2
,krsqH2
);
3004 vcoulO
= vec_sub(vcoulO
,vcrf
);
3005 vnbtot
= vec_sub(vnbtot
,vnb6
);
3006 fsO
= vec_madd(vec_twelve(),vnb12
,fsO
);
3007 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
3008 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
3009 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
3010 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3011 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
3012 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
3013 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
3014 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
3015 fsH1
= vec_madd(fsH1
,qqH
,nul
);
3016 fsH2
= vec_madd(fsH2
,qqH
,nul
);
3017 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
3018 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
3019 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
3021 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3022 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3023 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3024 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3025 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3026 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3027 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3028 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3029 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3030 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3031 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3032 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3033 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3034 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3035 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3036 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3037 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3038 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3040 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
3041 add_xyz_to_mem(faction
+j3a
,tmp1
);
3042 add_xyz_to_mem(faction
+j3b
,tmp2
);
3043 add_xyz_to_mem(faction
+j3c
,tmp3
);
3044 add_xyz_to_mem(faction
+j3d
,tmp4
);
3053 transpose_4_to_3(load_xyz(pos
+j3a
),
3055 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
3056 dOx
= vec_sub(iOx
,dH2x
);
3057 dOy
= vec_sub(iOy
,dH2y
);
3058 dOz
= vec_sub(iOz
,dH2z
);
3059 dH1x
= vec_sub(iH1x
,dH2x
);
3060 dH1y
= vec_sub(iH1y
,dH2y
);
3061 dH1z
= vec_sub(iH1z
,dH2z
);
3062 dH2x
= vec_sub(iH2x
,dH2x
);
3063 dH2y
= vec_sub(iH2y
,dH2y
);
3064 dH2z
= vec_sub(iH2z
,dH2z
);
3066 rsqO
= vec_madd(dOx
,dOx
,nul
);
3067 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3068 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3069 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3070 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3071 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3072 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3073 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3074 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3076 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3077 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3078 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3080 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
3081 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
3082 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
3083 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
3084 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
3085 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
3086 tja
= ntiA
+2*type
[jnra
];
3087 tjb
= ntiA
+2*type
[jnrb
];
3088 tjc
= ntiA
+2*type
[jnrc
];
3089 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
3090 qqO
= vec_madd(iqO
,jq
,nul
);
3091 qqH
= vec_madd(iqH
,jq
,nul
);
3092 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
3093 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
3094 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
3095 vnb6
= vec_madd(c6
,rinvsix
,nul
);
3096 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
3097 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
3098 vcoulO
= vec_add(rinvO
,krsqO
);
3099 vcoulH1
= vec_add(rinvH1
,krsqH1
);
3100 vnbtot
= vec_add(vnbtot
,vnb12
);
3101 fsO
= vec_madd(qqO
,fsO
,nul
);
3102 vcoulH2
= vec_add(rinvH2
,krsqH2
);
3103 vcoulO
= vec_sub(vcoulO
,vcrf
);
3104 vnbtot
= vec_sub(vnbtot
,vnb6
);
3105 fsO
= vec_madd(vec_twelve(),vnb12
,fsO
);
3106 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
3107 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
3108 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
3109 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3110 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
3111 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
3112 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
3113 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
3114 fsH1
= vec_madd(fsH1
,qqH
,nul
);
3115 fsH2
= vec_madd(fsH2
,qqH
,nul
);
3116 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
3117 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
3118 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
3120 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3121 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3122 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3123 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3124 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3125 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3126 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3127 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3128 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3129 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3130 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3131 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3132 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3133 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3134 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3135 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3136 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3137 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3139 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
3140 add_xyz_to_mem(faction
+j3a
,tmp1
);
3141 add_xyz_to_mem(faction
+j3b
,tmp2
);
3142 add_xyz_to_mem(faction
+j3c
,tmp3
);
3143 } else if(k
<(nj1
-1)) {
3148 transpose_2_to_3(load_xyz(pos
+j3a
),
3149 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
3150 dOx
= vec_sub(iOx
,dH2x
);
3151 dOy
= vec_sub(iOy
,dH2y
);
3152 dOz
= vec_sub(iOz
,dH2z
);
3153 dH1x
= vec_sub(iH1x
,dH2x
);
3154 dH1y
= vec_sub(iH1y
,dH2y
);
3155 dH1z
= vec_sub(iH1z
,dH2z
);
3156 dH2x
= vec_sub(iH2x
,dH2x
);
3157 dH2y
= vec_sub(iH2y
,dH2y
);
3158 dH2z
= vec_sub(iH2z
,dH2z
);
3160 rsqO
= vec_madd(dOx
,dOx
,nul
);
3161 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3162 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3163 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3164 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3165 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3166 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3167 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3168 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3170 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3171 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3172 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3174 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
3175 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
3176 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
3177 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
3178 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
3179 tja
= ntiA
+2*type
[jnra
];
3180 tjb
= ntiA
+2*type
[jnrb
];
3181 /* load 2 j charges and multiply by iq */
3182 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
3183 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
3184 qqO
= vec_madd(iqO
,jq
,nul
);
3185 qqH
= vec_madd(iqH
,jq
,nul
);
3186 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
3187 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
3188 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
3189 vnb6
= vec_madd(c6
,rinvsix
,nul
);
3190 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
3191 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
3192 vcoulO
= vec_add(rinvO
,krsqO
);
3193 vcoulH1
= vec_add(rinvH1
,krsqH1
);
3194 vnbtot
= vec_add(vnbtot
,vnb12
);
3195 fsO
= vec_madd(qqO
,fsO
,nul
);
3196 vcoulH2
= vec_add(rinvH2
,krsqH2
);
3197 vcoulO
= vec_sub(vcoulO
,vcrf
);
3198 vnbtot
= vec_sub(vnbtot
,vnb6
);
3199 fsO
= vec_madd(vec_twelve(),vnb12
,fsO
);
3200 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
3201 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
3202 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
3203 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3204 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
3205 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
3206 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
3207 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
3208 fsH1
= vec_madd(fsH1
,qqH
,nul
);
3209 fsH2
= vec_madd(fsH2
,qqH
,nul
);
3210 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
3211 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
3212 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
3214 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3215 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3216 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3217 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3218 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3219 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3220 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3221 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3222 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3223 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3224 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3225 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3226 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3227 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3228 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3229 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3230 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3231 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3233 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
3234 add_xyz_to_mem(faction
+j3a
,tmp1
);
3235 add_xyz_to_mem(faction
+j3b
,tmp2
);
3239 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
3240 dOx
= vec_sub(iOx
,dH2x
);
3241 dOy
= vec_sub(iOy
,dH2y
);
3242 dOz
= vec_sub(iOz
,dH2z
);
3243 dH1x
= vec_sub(iH1x
,dH2x
);
3244 dH1y
= vec_sub(iH1y
,dH2y
);
3245 dH1z
= vec_sub(iH1z
,dH2z
);
3246 dH2x
= vec_sub(iH2x
,dH2x
);
3247 dH2y
= vec_sub(iH2y
,dH2y
);
3248 dH2z
= vec_sub(iH2z
,dH2z
);
3250 rsqO
= vec_madd(dOx
,dOx
,nul
);
3251 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3252 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3253 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3254 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3255 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3256 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3257 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3258 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3260 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3261 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3262 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3264 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
3265 rinvsqH1
= vec_madd(rinvH1
,rinvH1
,nul
);
3266 rinvsqH2
= vec_madd(rinvH2
,rinvH2
,nul
);
3267 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
3268 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
3269 tja
= ntiA
+2*type
[jnra
];
3270 /* load 1 j charges and multiply by iq */
3271 jq
=load_1_float(charge
+jnra
);
3272 load_1_pair(nbfp
+tja
,&c6
,&c12
);
3273 qqO
= vec_madd(iqO
,jq
,nul
);
3274 qqH
= vec_madd(iqH
,jq
,nul
);
3275 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
3276 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
3277 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
3278 vnb6
= vec_madd(c6
,rinvsix
,nul
);
3279 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
3280 fsO
= vec_nmsub(vec_two(),krsqO
,rinvO
);
3281 vcoulO
= vec_add(rinvO
,krsqO
);
3282 vcoulH1
= vec_add(rinvH1
,krsqH1
);
3283 vnbtot
= vec_add(vnbtot
,vnb12
);
3284 fsO
= vec_madd(qqO
,fsO
,nul
);
3285 vcoulH2
= vec_add(rinvH2
,krsqH2
);
3286 vcoulO
= vec_sub(vcoulO
,vcrf
);
3287 vnbtot
= vec_sub(vnbtot
,vnb6
);
3288 fsO
= vec_madd(vec_twelve(),vnb12
,fsO
);
3289 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
3290 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
3291 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
3292 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3293 fsH1
= vec_nmsub(vec_two(),krsqH1
,rinvH1
);
3294 fsH2
= vec_nmsub(vec_two(),krsqH2
,rinvH2
);
3295 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
3296 fsO
= vec_madd(fsO
,rinvsqO
,nul
);
3297 fsH1
= vec_madd(fsH1
,qqH
,nul
);
3298 fsH2
= vec_madd(fsH2
,qqH
,nul
);
3299 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
3300 fsH1
= vec_madd(fsH1
,rinvsqH1
,nul
);
3301 fsH2
= vec_madd(fsH2
,rinvsqH2
,nul
);
3303 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3304 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3305 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3306 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3307 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3308 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3309 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3310 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3311 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3312 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3313 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3314 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3315 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3316 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3317 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3318 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3319 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3320 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3322 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
3323 add_xyz_to_mem(faction
+j3a
,tmp1
);
3325 /* update outer data */
3326 update_i_water_forces(faction
+ii3
,fshift
+is3
,
3327 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
3329 add_vector_to_float(Vc
+gid
[n
],vctot
);
3330 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
3336 void inl3020_altivec(
3353 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
3354 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
3355 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
3356 vector
float fsO
,fsH1
,fsH2
,tsc
,VVcO
,FFcO
,VVcH1
,FFcH1
,VVcH2
,FFcH2
;
3357 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
3358 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
3359 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
3360 vector
float rinvO
,rinvH1
,rinvH2
,rO
,rH1
,rH2
,rsqO
,rsqH1
,rsqH2
;
3363 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
3364 int jnra
,jnrb
,jnrc
,jnrd
;
3365 int j3a
,j3b
,j3c
,j3d
;
3368 vfacel
=load_float_and_splat(&facel
);
3369 tsc
=load_float_and_splat(&tabscale
);
3370 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
3371 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
3373 for(n
=0;n
<nri
;n
++) {
3377 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
3378 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
3392 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
3401 transpose_4_to_3(load_xyz(pos
+j3a
),
3404 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
3405 dOx
= vec_sub(iOx
,dH2x
);
3406 dOy
= vec_sub(iOy
,dH2y
);
3407 dOz
= vec_sub(iOz
,dH2z
);
3408 dH1x
= vec_sub(iH1x
,dH2x
);
3409 dH1y
= vec_sub(iH1y
,dH2y
);
3410 dH1z
= vec_sub(iH1z
,dH2z
);
3411 dH2x
= vec_sub(iH2x
,dH2x
);
3412 dH2y
= vec_sub(iH2y
,dH2y
);
3413 dH2z
= vec_sub(iH2z
,dH2z
);
3415 rsqO
= vec_madd(dOx
,dOx
,nul
);
3416 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3417 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3418 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3419 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3420 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3421 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3422 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3423 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3424 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3425 rO
= vec_madd(rsqO
,rinvO
,nul
);
3426 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3427 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3429 /* load 4 j charges and multiply by iq */
3430 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
3431 do_4_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3432 do_4_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3433 do_4_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3434 qqO
= vec_madd(iqO
,jq
,nul
);
3435 qqH
= vec_madd(iqH
,jq
,nul
);
3436 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3437 fsO
= vec_nmsub(qqO
,FFcO
,nul
);
3438 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3439 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3440 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3441 fsO
= vec_madd(fsO
,tsc
,nul
);
3442 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3443 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3444 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3445 fsO
= vec_madd(fsO
,rinvO
,nul
);
3446 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3447 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3449 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3450 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3451 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3452 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3453 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3454 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3455 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3456 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3457 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3458 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3459 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3460 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3461 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3462 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3463 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3464 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3465 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3466 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3468 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
3469 add_xyz_to_mem(faction
+j3a
,tmp1
);
3470 add_xyz_to_mem(faction
+j3b
,tmp2
);
3471 add_xyz_to_mem(faction
+j3c
,tmp3
);
3472 add_xyz_to_mem(faction
+j3d
,tmp4
);
3481 transpose_4_to_3(load_xyz(pos
+j3a
),
3483 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
3484 dOx
= vec_sub(iOx
,dH2x
);
3485 dOy
= vec_sub(iOy
,dH2y
);
3486 dOz
= vec_sub(iOz
,dH2z
);
3487 dH1x
= vec_sub(iH1x
,dH2x
);
3488 dH1y
= vec_sub(iH1y
,dH2y
);
3489 dH1z
= vec_sub(iH1z
,dH2z
);
3490 dH2x
= vec_sub(iH2x
,dH2x
);
3491 dH2y
= vec_sub(iH2y
,dH2y
);
3492 dH2z
= vec_sub(iH2z
,dH2z
);
3494 rsqO
= vec_madd(dOx
,dOx
,nul
);
3495 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3496 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3497 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3498 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3499 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3500 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3501 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3502 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3504 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3505 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3506 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3508 rO
= vec_madd(rsqO
,rinvO
,nul
);
3509 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3510 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3512 /* load 3 j charges and multiply by iq */
3513 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
3514 do_3_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3515 do_3_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3516 do_3_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3517 qqO
= vec_madd(iqO
,jq
,nul
);
3518 qqH
= vec_madd(iqH
,jq
,nul
);
3519 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3520 fsO
= vec_nmsub(qqO
,FFcO
,nul
);
3521 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3522 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3523 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3524 fsO
= vec_madd(fsO
,tsc
,nul
);
3525 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3526 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3527 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3528 fsO
= vec_madd(fsO
,rinvO
,nul
);
3529 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3530 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3532 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3533 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3534 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3535 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3536 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3537 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3538 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3539 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3540 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3541 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3542 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3543 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3544 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3545 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3546 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3547 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3548 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3549 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3551 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
3552 add_xyz_to_mem(faction
+j3a
,tmp1
);
3553 add_xyz_to_mem(faction
+j3b
,tmp2
);
3554 add_xyz_to_mem(faction
+j3c
,tmp3
);
3555 } else if(k
<(nj1
-1)) {
3560 transpose_2_to_3(load_xyz(pos
+j3a
),
3561 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
3562 dOx
= vec_sub(iOx
,dH2x
);
3563 dOy
= vec_sub(iOy
,dH2y
);
3564 dOz
= vec_sub(iOz
,dH2z
);
3565 dH1x
= vec_sub(iH1x
,dH2x
);
3566 dH1y
= vec_sub(iH1y
,dH2y
);
3567 dH1z
= vec_sub(iH1z
,dH2z
);
3568 dH2x
= vec_sub(iH2x
,dH2x
);
3569 dH2y
= vec_sub(iH2y
,dH2y
);
3570 dH2z
= vec_sub(iH2z
,dH2z
);
3572 rsqO
= vec_madd(dOx
,dOx
,nul
);
3573 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3574 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3575 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3576 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3577 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3578 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3579 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3580 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3582 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3583 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3584 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3586 rO
= vec_madd(rsqO
,rinvO
,nul
);
3587 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3588 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3590 /* load 2 j charges and multiply by iq */
3591 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
3592 do_2_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3593 do_2_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3594 do_2_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3595 qqO
= vec_madd(iqO
,jq
,nul
);
3596 qqH
= vec_madd(iqH
,jq
,nul
);
3597 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3598 fsO
= vec_nmsub(qqO
,FFcO
,nul
);
3599 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3600 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3601 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3602 fsO
= vec_madd(fsO
,tsc
,nul
);
3603 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3604 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3605 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3606 fsO
= vec_madd(fsO
,rinvO
,nul
);
3607 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3608 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3610 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3611 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3612 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3613 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3614 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3615 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3616 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3617 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3618 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3619 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3620 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3621 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3622 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3623 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3624 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3625 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3626 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3627 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3629 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
3630 add_xyz_to_mem(faction
+j3a
,tmp1
);
3631 add_xyz_to_mem(faction
+j3b
,tmp2
);
3635 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
3636 dOx
= vec_sub(iOx
,dH2x
);
3637 dOy
= vec_sub(iOy
,dH2y
);
3638 dOz
= vec_sub(iOz
,dH2z
);
3639 dH1x
= vec_sub(iH1x
,dH2x
);
3640 dH1y
= vec_sub(iH1y
,dH2y
);
3641 dH1z
= vec_sub(iH1z
,dH2z
);
3642 dH2x
= vec_sub(iH2x
,dH2x
);
3643 dH2y
= vec_sub(iH2y
,dH2y
);
3644 dH2z
= vec_sub(iH2z
,dH2z
);
3646 rsqO
= vec_madd(dOx
,dOx
,nul
);
3647 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3648 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3649 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3650 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3651 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3652 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3653 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3654 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3656 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3657 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3658 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3660 rO
= vec_madd(rsqO
,rinvO
,nul
);
3661 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3662 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3664 /* load 1 j charges and multiply by iq */
3665 jq
=load_1_float(charge
+jnra
);
3666 do_1_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3667 do_1_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3668 do_1_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3669 qqO
= vec_madd(iqO
,jq
,nul
);
3670 qqH
= vec_madd(iqH
,jq
,nul
);
3671 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3672 fsO
= vec_nmsub(qqO
,FFcO
,nul
);
3673 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3674 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3675 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3676 fsO
= vec_madd(fsO
,tsc
,nul
);
3677 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3678 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3679 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3680 fsO
= vec_madd(fsO
,rinvO
,nul
);
3681 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3682 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3684 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3685 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3686 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3687 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3688 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3689 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3690 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3691 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3692 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3693 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3694 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3695 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3696 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3697 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3698 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3699 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3700 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3701 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3703 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
3704 add_xyz_to_mem(faction
+j3a
,tmp1
);
3706 /* update outer data */
3707 update_i_water_forces(faction
+ii3
,fshift
+is3
,
3708 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
3710 add_vector_to_float(Vc
+gid
[n
],vctot
);
3716 void inl3120_altivec(
3737 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
3738 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
3739 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
3740 vector
float vnbtot
,c6
,c12
,rinvsix
,rinvsqO
,vnb6
,vnb12
;
3741 vector
float fsO
,fsH1
,fsH2
,tsc
,VVcO
,FFcO
,VVcH1
,FFcH1
,VVcH2
,FFcH2
;
3742 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
3743 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
3744 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
3745 vector
float rinvO
,rinvH1
,rinvH2
,rO
,rH1
,rH2
,rsqO
,rsqH1
,rsqH2
;
3747 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
3748 int jnra
,jnrb
,jnrc
,jnrd
;
3749 int j3a
,j3b
,j3c
,j3d
;
3750 int tja
,tjb
,tjc
,tjd
;
3753 vfacel
=load_float_and_splat(&facel
);
3754 tsc
=load_float_and_splat(&tabscale
);
3756 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
3757 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
3758 ntiA
= 2*ntype
*type
[ii
];
3760 for(n
=0;n
<nri
;n
++) {
3764 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
3765 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
3780 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
3789 transpose_4_to_3(load_xyz(pos
+j3a
),
3792 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
3793 dOx
= vec_sub(iOx
,dH2x
);
3794 dOy
= vec_sub(iOy
,dH2y
);
3795 dOz
= vec_sub(iOz
,dH2z
);
3796 dH1x
= vec_sub(iH1x
,dH2x
);
3797 dH1y
= vec_sub(iH1y
,dH2y
);
3798 dH1z
= vec_sub(iH1z
,dH2z
);
3799 dH2x
= vec_sub(iH2x
,dH2x
);
3800 dH2y
= vec_sub(iH2y
,dH2y
);
3801 dH2z
= vec_sub(iH2z
,dH2z
);
3803 rsqO
= vec_madd(dOx
,dOx
,nul
);
3804 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3805 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3806 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3807 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3808 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3809 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3810 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3811 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3812 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3813 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
3814 rO
= vec_madd(rsqO
,rinvO
,nul
);
3815 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3816 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3817 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
3818 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
3819 tja
= ntiA
+2*type
[jnra
];
3820 tjb
= ntiA
+2*type
[jnrb
];
3821 tjc
= ntiA
+2*type
[jnrc
];
3822 tjd
= ntiA
+2*type
[jnrd
];
3823 /* load 4 j charges and multiply by iq */
3824 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
3825 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
3826 do_4_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3827 do_4_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3828 do_4_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3829 vnb6
= vec_madd(c6
,rinvsix
,nul
);
3830 qqO
= vec_madd(iqO
,jq
,nul
);
3831 qqH
= vec_madd(iqH
,jq
,nul
);
3832 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
3833 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3834 fsO
= vec_madd(vec_twelve(),vnb12
,nul
);
3835 tmp1
= vec_madd(qqO
,FFcO
,nul
);
3836 vnbtot
= vec_add(vnbtot
,vnb12
);
3837 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3838 vnbtot
= vec_sub(vnbtot
,vnb6
);
3839 fsO
= vec_madd(fsO
,rinvO
,nul
);
3840 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3841 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3842 fsO
= vec_nmsub(tmp1
,tsc
,fsO
);
3843 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3844 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3845 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3846 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3847 fsO
= vec_madd(fsO
,rinvO
,nul
);
3848 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3849 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3851 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3852 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3853 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3854 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3855 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3856 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3857 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3858 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3859 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3860 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3861 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3862 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3863 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3864 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3865 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3866 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3867 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3868 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3870 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
3871 add_xyz_to_mem(faction
+j3a
,tmp1
);
3872 add_xyz_to_mem(faction
+j3b
,tmp2
);
3873 add_xyz_to_mem(faction
+j3c
,tmp3
);
3874 add_xyz_to_mem(faction
+j3d
,tmp4
);
3883 transpose_4_to_3(load_xyz(pos
+j3a
),
3885 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
3886 dOx
= vec_sub(iOx
,dH2x
);
3887 dOy
= vec_sub(iOy
,dH2y
);
3888 dOz
= vec_sub(iOz
,dH2z
);
3889 dH1x
= vec_sub(iH1x
,dH2x
);
3890 dH1y
= vec_sub(iH1y
,dH2y
);
3891 dH1z
= vec_sub(iH1z
,dH2z
);
3892 dH2x
= vec_sub(iH2x
,dH2x
);
3893 dH2y
= vec_sub(iH2y
,dH2y
);
3894 dH2z
= vec_sub(iH2z
,dH2z
);
3896 rsqO
= vec_madd(dOx
,dOx
,nul
);
3897 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3898 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3899 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3900 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3901 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3902 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3903 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3904 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3906 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3907 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3908 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
3910 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
3911 rO
= vec_madd(rsqO
,rinvO
,nul
);
3912 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
3913 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
3914 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
3915 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
3916 tja
= ntiA
+2*type
[jnra
];
3917 tjb
= ntiA
+2*type
[jnrb
];
3918 tjc
= ntiA
+2*type
[jnrc
];
3919 /* load 3 j charges and multiply by iq */
3920 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
3921 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
3922 do_3_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
3923 do_3_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
3924 do_3_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
3925 vnb6
= vec_madd(c6
,rinvsix
,nul
);
3926 qqO
= vec_madd(iqO
,jq
,nul
);
3927 qqH
= vec_madd(iqH
,jq
,nul
);
3928 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
3929 vctot
= vec_madd(qqO
,VVcO
,vctot
);
3930 fsO
= vec_madd(vec_twelve(),vnb12
,nul
);
3931 tmp1
= vec_madd(qqO
,FFcO
,nul
);
3932 vnbtot
= vec_add(vnbtot
,vnb12
);
3933 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
3934 vnbtot
= vec_sub(vnbtot
,vnb6
);
3935 fsO
= vec_madd(fsO
,rinvO
,nul
);
3936 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
3937 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
3938 fsO
= vec_nmsub(tmp1
,tsc
,fsO
);
3939 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
3940 fsH1
= vec_madd(fsH1
,tsc
,nul
);
3941 fsH2
= vec_madd(fsH2
,tsc
,nul
);
3942 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
3943 fsO
= vec_madd(fsO
,rinvO
,nul
);
3944 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
3945 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
3947 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
3948 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
3949 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
3950 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
3951 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
3952 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
3953 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
3954 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
3955 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
3956 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
3957 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
3958 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
3959 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
3960 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
3961 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
3962 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
3963 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
3964 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
3966 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
3967 add_xyz_to_mem(faction
+j3a
,tmp1
);
3968 add_xyz_to_mem(faction
+j3b
,tmp2
);
3969 add_xyz_to_mem(faction
+j3c
,tmp3
);
3970 } else if(k
<(nj1
-1)) {
3975 transpose_2_to_3(load_xyz(pos
+j3a
),
3976 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
3977 dOx
= vec_sub(iOx
,dH2x
);
3978 dOy
= vec_sub(iOy
,dH2y
);
3979 dOz
= vec_sub(iOz
,dH2z
);
3980 dH1x
= vec_sub(iH1x
,dH2x
);
3981 dH1y
= vec_sub(iH1y
,dH2y
);
3982 dH1z
= vec_sub(iH1z
,dH2z
);
3983 dH2x
= vec_sub(iH2x
,dH2x
);
3984 dH2y
= vec_sub(iH2y
,dH2y
);
3985 dH2z
= vec_sub(iH2z
,dH2z
);
3987 rsqO
= vec_madd(dOx
,dOx
,nul
);
3988 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
3989 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
3990 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
3991 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
3992 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
3993 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
3994 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
3995 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
3997 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
3998 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
3999 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
4001 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
4002 rO
= vec_madd(rsqO
,rinvO
,nul
);
4003 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4004 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4005 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
4006 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
4007 tja
= ntiA
+2*type
[jnra
];
4008 tjb
= ntiA
+2*type
[jnrb
];
4009 /* load 2 j charges and multiply by iq */
4010 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
4011 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
4012 do_2_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
4013 do_2_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4014 do_2_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4015 vnb6
= vec_madd(c6
,rinvsix
,nul
);
4016 qqO
= vec_madd(iqO
,jq
,nul
);
4017 qqH
= vec_madd(iqH
,jq
,nul
);
4018 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
4019 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4020 fsO
= vec_madd(vec_twelve(),vnb12
,nul
);
4021 tmp1
= vec_madd(qqO
,FFcO
,nul
);
4022 vnbtot
= vec_add(vnbtot
,vnb12
);
4023 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
4024 vnbtot
= vec_sub(vnbtot
,vnb6
);
4025 fsO
= vec_madd(fsO
,rinvO
,nul
);
4026 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
4027 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
4028 fsO
= vec_nmsub(tmp1
,tsc
,fsO
);
4029 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4030 fsH1
= vec_madd(fsH1
,tsc
,nul
);
4031 fsH2
= vec_madd(fsH2
,tsc
,nul
);
4032 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4033 fsO
= vec_madd(fsO
,rinvO
,nul
);
4034 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4035 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4037 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4038 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4039 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4040 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4041 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4042 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4043 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4044 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4045 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4046 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4047 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4048 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4049 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4050 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4051 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4052 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4053 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4054 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4056 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
4057 add_xyz_to_mem(faction
+j3a
,tmp1
);
4058 add_xyz_to_mem(faction
+j3b
,tmp2
);
4062 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
4063 dOx
= vec_sub(iOx
,dH2x
);
4064 dOy
= vec_sub(iOy
,dH2y
);
4065 dOz
= vec_sub(iOz
,dH2z
);
4066 dH1x
= vec_sub(iH1x
,dH2x
);
4067 dH1y
= vec_sub(iH1y
,dH2y
);
4068 dH1z
= vec_sub(iH1z
,dH2z
);
4069 dH2x
= vec_sub(iH2x
,dH2x
);
4070 dH2y
= vec_sub(iH2y
,dH2y
);
4071 dH2z
= vec_sub(iH2z
,dH2z
);
4073 rsqO
= vec_madd(dOx
,dOx
,nul
);
4074 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
4075 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
4076 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
4077 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
4078 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
4079 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
4080 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
4081 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
4083 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
4084 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
4085 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
4087 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
4088 rO
= vec_madd(rsqO
,rinvO
,nul
);
4089 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4090 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4091 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
4092 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
4093 tja
= ntiA
+2*type
[jnra
];
4094 /* load 1 j charges and multiply by iq */
4095 jq
=load_1_float(charge
+jnra
);
4096 load_1_pair(nbfp
+tja
,&c6
,&c12
);
4097 do_1_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
);
4098 do_1_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4099 do_1_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4100 vnb6
= vec_madd(c6
,rinvsix
,nul
);
4101 qqO
= vec_madd(iqO
,jq
,nul
);
4102 qqH
= vec_madd(iqH
,jq
,nul
);
4103 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
4104 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4105 fsO
= vec_madd(vec_twelve(),vnb12
,nul
);
4106 tmp1
= vec_madd(qqO
,FFcO
,nul
);
4107 vnbtot
= vec_add(vnbtot
,vnb12
);
4108 fsO
= vec_nmsub(vec_six(),vnb6
,fsO
);
4109 vnbtot
= vec_sub(vnbtot
,vnb6
);
4110 fsO
= vec_madd(fsO
,rinvO
,nul
);
4111 fsH1
= vec_nmsub(qqH
,FFcH1
,nul
);
4112 fsH2
= vec_nmsub(qqH
,FFcH2
,nul
);
4113 fsO
= vec_nmsub(tmp1
,tsc
,fsO
);
4114 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4115 fsH1
= vec_madd(fsH1
,tsc
,nul
);
4116 fsH2
= vec_madd(fsH2
,tsc
,nul
);
4117 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4118 fsO
= vec_madd(fsO
,rinvO
,nul
);
4119 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4120 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4122 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4123 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4124 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4125 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4126 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4127 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4128 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4129 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4130 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4131 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4132 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4133 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4134 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4135 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4136 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4137 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4138 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4139 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4141 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
4142 add_xyz_to_mem(faction
+j3a
,tmp1
);
4144 /* update outer data */
4145 update_i_water_forces(faction
+ii3
,fshift
+is3
,
4146 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
4148 add_vector_to_float(Vc
+gid
[n
],vctot
);
4149 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
4154 void inl3320_altivec(
4176 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
4177 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
4178 vector
float vfacel
,vcoulO
,vcoulH1
,vcoulH2
,nul
;
4179 vector
float vnbtot
,c6
,c12
;
4180 vector
float fsO
,fsH1
,fsH2
;
4181 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
4182 vector
float fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
;
4183 vector
float tmp1
,tmp2
,tmp3
,tmp4
;
4184 vector
float rinvO
,rinvH1
,rinvH2
,rsqO
,rsqH1
,rsqH2
;
4185 vector
float rO
,rH1
,rH2
,VVcO
,FFcO
,VVcH1
,FFcH1
,VVcH2
,FFcH2
,VVd
,FFd
,VVr
,FFr
;
4187 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
4188 int jnra
,jnrb
,jnrc
,jnrd
;
4189 int j3a
,j3b
,j3c
,j3d
;
4190 int tja
,tjb
,tjc
,tjd
;
4193 tsc
=load_float_and_splat(&tabscale
);
4194 vfacel
=load_float_and_splat(&facel
);
4197 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
4198 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
4199 ntiA
= 2*ntype
*type
[ii
];
4201 for(n
=0;n
<nri
;n
++) {
4205 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
4206 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
4221 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
4230 transpose_4_to_3(load_xyz(pos
+j3a
),
4233 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
4234 dOx
= vec_sub(iOx
,dH2x
);
4235 dOy
= vec_sub(iOy
,dH2y
);
4236 dOz
= vec_sub(iOz
,dH2z
);
4237 dH1x
= vec_sub(iH1x
,dH2x
);
4238 dH1y
= vec_sub(iH1y
,dH2y
);
4239 dH1z
= vec_sub(iH1z
,dH2z
);
4240 dH2x
= vec_sub(iH2x
,dH2x
);
4241 dH2y
= vec_sub(iH2y
,dH2y
);
4242 dH2z
= vec_sub(iH2z
,dH2z
);
4244 rsqO
= vec_madd(dOx
,dOx
,nul
);
4245 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
4246 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
4247 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
4248 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
4249 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
4250 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
4251 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
4252 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
4253 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
4254 rO
= vec_madd(rsqO
,rinvO
,nul
);
4255 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4256 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4257 tja
= ntiA
+2*type
[jnra
];
4258 tjb
= ntiA
+2*type
[jnrb
];
4259 tjc
= ntiA
+2*type
[jnrc
];
4260 tjd
= ntiA
+2*type
[jnrd
];
4261 /* load 4 j charges and multiply by iq */
4262 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
4263 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
4264 do_4_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
,&VVd
,&FFd
,&VVr
,&FFr
);
4265 do_4_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4266 do_4_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4267 qqO
= vec_madd(iqO
,jq
,nul
);
4268 qqH
= vec_madd(iqH
,jq
,nul
);
4269 fsO
= vec_madd(qqO
,FFcO
,nul
);
4270 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4271 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
4272 fsO
= vec_madd(c6
,FFd
,fsO
);
4273 fsH1
= vec_madd(qqH
,FFcH1
,nul
);
4274 fsH2
= vec_madd(qqH
,FFcH2
,nul
);
4275 fsO
= vec_madd(c12
,FFr
,fsO
);
4276 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4277 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
4278 fsO
= vec_nmsub(fsO
,tsc
,nul
);
4279 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4280 fsH1
= vec_nmsub(fsH1
,tsc
,nul
);
4281 fsH2
= vec_nmsub(fsH2
,tsc
,nul
);
4282 fsO
= vec_madd(fsO
,rinvO
,nul
);
4283 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4284 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4286 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4287 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4288 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4289 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4290 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4291 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4292 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4293 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4294 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4295 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4296 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4297 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4298 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4299 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4300 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4301 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4302 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4303 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4305 transpose_3_to_4(dOx
,dOy
,dOz
,&tmp1
,&tmp2
,&tmp3
,&tmp4
);
4306 add_xyz_to_mem(faction
+j3a
,tmp1
);
4307 add_xyz_to_mem(faction
+j3b
,tmp2
);
4308 add_xyz_to_mem(faction
+j3c
,tmp3
);
4309 add_xyz_to_mem(faction
+j3d
,tmp4
);
4318 transpose_4_to_3(load_xyz(pos
+j3a
),
4320 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
4321 dOx
= vec_sub(iOx
,dH2x
);
4322 dOy
= vec_sub(iOy
,dH2y
);
4323 dOz
= vec_sub(iOz
,dH2z
);
4324 dH1x
= vec_sub(iH1x
,dH2x
);
4325 dH1y
= vec_sub(iH1y
,dH2y
);
4326 dH1z
= vec_sub(iH1z
,dH2z
);
4327 dH2x
= vec_sub(iH2x
,dH2x
);
4328 dH2y
= vec_sub(iH2y
,dH2y
);
4329 dH2z
= vec_sub(iH2z
,dH2z
);
4331 rsqO
= vec_madd(dOx
,dOx
,nul
);
4332 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
4333 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
4334 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
4335 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
4336 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
4337 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
4338 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
4339 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
4341 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
4342 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
4343 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
4345 rO
= vec_madd(rsqO
,rinvO
,nul
);
4346 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4347 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4348 tja
= ntiA
+2*type
[jnra
];
4349 tjb
= ntiA
+2*type
[jnrb
];
4350 tjc
= ntiA
+2*type
[jnrc
];
4352 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
4353 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
4354 do_3_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
,&VVd
,&FFd
,&VVr
,&FFr
);
4355 do_3_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4356 do_3_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4357 qqO
= vec_madd(iqO
,jq
,nul
);
4358 qqH
= vec_madd(iqH
,jq
,nul
);
4359 fsO
= vec_madd(qqO
,FFcO
,nul
);
4360 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4361 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
4362 fsO
= vec_madd(c6
,FFd
,fsO
);
4363 fsH1
= vec_madd(qqH
,FFcH1
,nul
);
4364 fsH2
= vec_madd(qqH
,FFcH2
,nul
);
4365 fsO
= vec_madd(c12
,FFr
,fsO
);
4366 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4367 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
4368 fsO
= vec_nmsub(fsO
,tsc
,nul
);
4369 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4370 fsH1
= vec_nmsub(fsH1
,tsc
,nul
);
4371 fsH2
= vec_nmsub(fsH2
,tsc
,nul
);
4372 fsO
= vec_madd(fsO
,rinvO
,nul
);
4373 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4374 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4376 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4377 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4378 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4379 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4380 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4381 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4382 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4383 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4384 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4385 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4386 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4387 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4388 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4389 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4390 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4391 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4392 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4393 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4395 transpose_4_to_3(dOx
,dOy
,dOz
,nul
,&tmp1
,&tmp2
,&tmp3
);
4396 add_xyz_to_mem(faction
+j3a
,tmp1
);
4397 add_xyz_to_mem(faction
+j3b
,tmp2
);
4398 add_xyz_to_mem(faction
+j3c
,tmp3
);
4399 } else if(k
<(nj1
-1)) {
4404 transpose_2_to_3(load_xyz(pos
+j3a
),
4405 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
4406 dOx
= vec_sub(iOx
,dH2x
);
4407 dOy
= vec_sub(iOy
,dH2y
);
4408 dOz
= vec_sub(iOz
,dH2z
);
4409 dH1x
= vec_sub(iH1x
,dH2x
);
4410 dH1y
= vec_sub(iH1y
,dH2y
);
4411 dH1z
= vec_sub(iH1z
,dH2z
);
4412 dH2x
= vec_sub(iH2x
,dH2x
);
4413 dH2y
= vec_sub(iH2y
,dH2y
);
4414 dH2z
= vec_sub(iH2z
,dH2z
);
4416 rsqO
= vec_madd(dOx
,dOx
,nul
);
4417 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
4418 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
4419 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
4420 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
4421 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
4422 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
4423 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
4424 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
4426 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
4427 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
4428 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
4430 rO
= vec_madd(rsqO
,rinvO
,nul
);
4431 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4432 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4433 tja
= ntiA
+2*type
[jnra
];
4434 tjb
= ntiA
+2*type
[jnrb
];
4435 /* load 2 j charges and multiply by iq */
4436 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
4437 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
4438 do_2_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
,&VVd
,&FFd
,&VVr
,&FFr
);
4439 do_2_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4440 do_2_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4441 qqO
= vec_madd(iqO
,jq
,nul
);
4442 qqH
= vec_madd(iqH
,jq
,nul
);
4443 fsO
= vec_madd(qqO
,FFcO
,nul
);
4444 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4445 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
4446 fsO
= vec_madd(c6
,FFd
,fsO
);
4447 fsH1
= vec_madd(qqH
,FFcH1
,nul
);
4448 fsH2
= vec_madd(qqH
,FFcH2
,nul
);
4449 fsO
= vec_madd(c12
,FFr
,fsO
);
4450 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4451 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
4452 fsO
= vec_nmsub(fsO
,tsc
,nul
);
4453 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4454 fsH1
= vec_nmsub(fsH1
,tsc
,nul
);
4455 fsH2
= vec_nmsub(fsH2
,tsc
,nul
);
4456 fsO
= vec_madd(fsO
,rinvO
,nul
);
4457 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4458 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4460 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4461 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4462 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4463 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4464 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4465 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4466 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4467 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4468 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4469 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4470 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4471 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4472 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4473 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4474 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4475 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4476 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4477 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4479 transpose_3_to_2(dOx
,dOy
,dOz
,&tmp1
,&tmp2
);
4480 add_xyz_to_mem(faction
+j3a
,tmp1
);
4481 add_xyz_to_mem(faction
+j3b
,tmp2
);
4485 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
4486 dOx
= vec_sub(iOx
,dH2x
);
4487 dOy
= vec_sub(iOy
,dH2y
);
4488 dOz
= vec_sub(iOz
,dH2z
);
4489 dH1x
= vec_sub(iH1x
,dH2x
);
4490 dH1y
= vec_sub(iH1y
,dH2y
);
4491 dH1z
= vec_sub(iH1z
,dH2z
);
4492 dH2x
= vec_sub(iH2x
,dH2x
);
4493 dH2y
= vec_sub(iH2y
,dH2y
);
4494 dH2z
= vec_sub(iH2z
,dH2z
);
4496 rsqO
= vec_madd(dOx
,dOx
,nul
);
4497 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
4498 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
4499 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
4500 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
4501 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
4502 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
4503 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
4504 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
4506 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
4507 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
4508 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
4510 rO
= vec_madd(rsqO
,rinvO
,nul
);
4511 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
4512 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
4513 tja
= ntiA
+2*type
[jnra
];
4514 /* load 1 j charges and multiply by iq */
4515 jq
=load_1_float(charge
+jnra
);
4516 load_1_pair(nbfp
+tja
,&c6
,&c12
);
4517 do_1_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&FFcO
,&VVd
,&FFd
,&VVr
,&FFr
);
4518 do_1_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
,&FFcH1
);
4519 do_1_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
,&FFcH2
);
4520 qqO
= vec_madd(iqO
,jq
,nul
);
4521 qqH
= vec_madd(iqH
,jq
,nul
);
4522 fsO
= vec_madd(qqO
,FFcO
,nul
);
4523 vctot
= vec_madd(qqO
,VVcO
,vctot
);
4524 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
4525 fsO
= vec_madd(c6
,FFd
,fsO
);
4526 fsH1
= vec_madd(qqH
,FFcH1
,nul
);
4527 fsH2
= vec_madd(qqH
,FFcH2
,nul
);
4528 fsO
= vec_madd(c12
,FFr
,fsO
);
4529 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
4530 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
4531 fsO
= vec_nmsub(fsO
,tsc
,nul
);
4532 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
4533 fsH1
= vec_nmsub(fsH1
,tsc
,nul
);
4534 fsH2
= vec_nmsub(fsH2
,tsc
,nul
);
4535 fsO
= vec_madd(fsO
,rinvO
,nul
);
4536 fsH1
= vec_madd(fsH1
,rinvH1
,nul
);
4537 fsH2
= vec_madd(fsH2
,rinvH2
,nul
);
4539 fiOx
= vec_madd(fsO
,dOx
,fiOx
); /* +=fx */
4540 dOx
= vec_nmsub(fsO
,dOx
,nul
); /* -fx */
4541 fiOy
= vec_madd(fsO
,dOy
,fiOy
); /* +=fy */
4542 dOy
= vec_nmsub(fsO
,dOy
,nul
); /* -fy */
4543 fiOz
= vec_madd(fsO
,dOz
,fiOz
); /* +=fz */
4544 dOz
= vec_nmsub(fsO
,dOz
,nul
); /* -fz */
4545 fiH1x
= vec_madd(fsH1
,dH1x
,fiH1x
); /* +=fx */
4546 dOx
= vec_nmsub(fsH1
,dH1x
,dOx
); /* -fx */
4547 fiH1y
= vec_madd(fsH1
,dH1y
,fiH1y
); /* +=fy */
4548 dOy
= vec_nmsub(fsH1
,dH1y
,dOy
); /* -fy */
4549 fiH1z
= vec_madd(fsH1
,dH1z
,fiH1z
); /* +=fz */
4550 dOz
= vec_nmsub(fsH1
,dH1z
,dOz
); /* -fz */
4551 fiH2x
= vec_madd(fsH2
,dH2x
,fiH2x
); /* +=fx */
4552 dOx
= vec_nmsub(fsH2
,dH2x
,dOx
); /* -fx */
4553 fiH2y
= vec_madd(fsH2
,dH2y
,fiH2y
); /* +=fy */
4554 dOy
= vec_nmsub(fsH2
,dH2y
,dOy
); /* -fy */
4555 fiH2z
= vec_madd(fsH2
,dH2z
,fiH2z
); /* +=fz */
4556 dOz
= vec_nmsub(fsH2
,dH2z
,dOz
); /* -fz */
4558 transpose_3_to_1(dOx
,dOy
,dOz
,&tmp1
);
4559 add_xyz_to_mem(faction
+j3a
,tmp1
);
4561 /* update outer data */
4562 update_i_water_forces(faction
+ii3
,fshift
+is3
,
4563 fiOx
,fiOy
,fiOz
,fiH1x
,fiH1y
,fiH1z
,fiH2x
,fiH2y
,fiH2z
);
4565 add_vector_to_float(Vc
+gid
[n
],vctot
);
4566 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
4573 void inl1030_altivec(
4588 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
4589 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
4591 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
4592 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
4593 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
4595 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
4596 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
4597 vector
float rinvsq11
,rinvsq12
,rinvsq13
;
4598 vector
float rinvsq21
,rinvsq22
,rinvsq23
;
4599 vector
float rinvsq31
,rinvsq32
,rinvsq33
;
4600 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
;
4602 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
4603 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
;
4604 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
4605 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
4606 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,qqOOt
,qqOHt
,qqHHt
;
4610 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
4611 int jnra
,jnrb
,jnrc
,jnrd
;
4612 int j3a
,j3b
,j3c
,j3d
;
4615 vfacel
=load_float_and_splat(&facel
);
4616 qO
= load_float_and_splat(charge
+iinr
[0]);
4617 qH
= load_float_and_splat(charge
+iinr
[0]+1);
4618 qqOO
= vec_madd(qO
,qO
,nul
);
4619 qqOH
= vec_madd(qO
,qH
,nul
);
4620 qqHH
= vec_madd(qH
,qH
,nul
);
4621 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
4622 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
4623 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
4625 for(n
=0;n
<nri
;n
++) {
4629 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
4630 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
4644 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
4653 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
4654 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
4656 dx11
= vec_sub(ix1
,jx1
);
4657 dx12
= vec_sub(ix1
,jx2
);
4658 dx13
= vec_sub(ix1
,jx3
);
4659 dy11
= vec_sub(iy1
,jy1
);
4660 dy12
= vec_sub(iy1
,jy2
);
4661 dy13
= vec_sub(iy1
,jy3
);
4662 dz11
= vec_sub(iz1
,jz1
);
4663 dz12
= vec_sub(iz1
,jz2
);
4664 dz13
= vec_sub(iz1
,jz3
);
4665 dx21
= vec_sub(ix2
,jx1
);
4666 dx22
= vec_sub(ix2
,jx2
);
4667 dx23
= vec_sub(ix2
,jx3
);
4668 dy21
= vec_sub(iy2
,jy1
);
4669 dy22
= vec_sub(iy2
,jy2
);
4670 dy23
= vec_sub(iy2
,jy3
);
4671 dz21
= vec_sub(iz2
,jz1
);
4672 dz22
= vec_sub(iz2
,jz2
);
4673 dz23
= vec_sub(iz2
,jz3
);
4674 dx31
= vec_sub(ix3
,jx1
);
4675 dx32
= vec_sub(ix3
,jx2
);
4676 dx33
= vec_sub(ix3
,jx3
);
4677 dy31
= vec_sub(iy3
,jy1
);
4678 dy32
= vec_sub(iy3
,jy2
);
4679 dy33
= vec_sub(iy3
,jy3
);
4680 dz31
= vec_sub(iz3
,jz1
);
4681 dz32
= vec_sub(iz3
,jz2
);
4682 dz33
= vec_sub(iz3
,jz3
);
4684 rsq11
= vec_madd(dx11
,dx11
,nul
);
4685 rsq12
= vec_madd(dx12
,dx12
,nul
);
4686 rsq13
= vec_madd(dx13
,dx13
,nul
);
4687 rsq21
= vec_madd(dx21
,dx21
,nul
);
4688 rsq22
= vec_madd(dx22
,dx22
,nul
);
4689 rsq23
= vec_madd(dx23
,dx23
,nul
);
4690 rsq31
= vec_madd(dx31
,dx31
,nul
);
4691 rsq32
= vec_madd(dx32
,dx32
,nul
);
4692 rsq33
= vec_madd(dx33
,dx33
,nul
);
4693 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
4694 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
4695 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
4696 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
4697 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
4698 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
4699 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
4700 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
4701 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
4702 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
4703 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
4704 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
4705 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
4706 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
4707 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
4708 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
4709 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
4710 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
4712 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
4715 &rinv11
,&rinv12
,&rinv13
,
4716 &rinv21
,&rinv22
,&rinv23
,
4717 &rinv31
,&rinv32
,&rinv33
);
4719 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
4720 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
4721 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
4722 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
4723 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
4724 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
4725 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
4726 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
4727 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
4729 vc11
= vec_madd(rinv11
,qqOO
,nul
);
4730 vc12
= vec_madd(rinv12
,qqOH
,nul
);
4731 vc13
= vec_madd(rinv13
,qqOH
,nul
);
4732 vc21
= vec_madd(rinv21
,qqOH
,nul
);
4733 vc22
= vec_madd(rinv22
,qqHH
,nul
);
4734 vc23
= vec_madd(rinv23
,qqHH
,nul
);
4735 vc31
= vec_madd(rinv31
,qqOH
,nul
);
4736 vc32
= vec_madd(rinv32
,qqHH
,nul
);
4737 vc33
= vec_madd(rinv33
,qqHH
,nul
);
4739 fs11
= vec_madd(vc11
,rinvsq11
,nul
);
4740 fs12
= vec_madd(vc12
,rinvsq12
,nul
);
4741 fs13
= vec_madd(vc13
,rinvsq13
,nul
);
4742 fs21
= vec_madd(vc21
,rinvsq21
,nul
);
4743 fs22
= vec_madd(vc22
,rinvsq22
,nul
);
4744 fs23
= vec_madd(vc23
,rinvsq23
,nul
);
4745 fs31
= vec_madd(vc31
,rinvsq31
,nul
);
4746 fs32
= vec_madd(vc32
,rinvsq32
,nul
);
4747 fs33
= vec_madd(vc33
,rinvsq33
,nul
);
4749 vctot
= vec_add(vctot
,vc11
);
4750 vc12
= vec_add(vc12
,vc13
);
4751 vc21
= vec_add(vc21
,vc22
);
4752 vc23
= vec_add(vc23
,vc31
);
4753 vc32
= vec_add(vc32
,vc33
);
4754 vctot
= vec_add(vctot
,vc12
);
4755 vc21
= vec_add(vc21
,vc23
);
4756 vctot
= vec_add(vctot
,vc32
);
4757 vctot
= vec_add(vctot
,vc21
);
4759 fix1
= vec_madd(fs11
,dx11
,fix1
);
4760 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
4761 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
4762 fix2
= vec_madd(fs21
,dx21
,fix2
);
4763 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
4764 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
4765 fix3
= vec_madd(fs31
,dx31
,fix3
);
4766 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
4767 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
4769 fix1
= vec_madd(fs12
,dx12
,fix1
);
4770 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
4771 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
4772 fix2
= vec_madd(fs22
,dx22
,fix2
);
4773 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
4774 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
4775 fix3
= vec_madd(fs32
,dx32
,fix3
);
4776 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
4777 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
4779 fix1
= vec_madd(fs13
,dx13
,fix1
);
4780 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
4781 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
4782 fix2
= vec_madd(fs23
,dx23
,fix2
);
4783 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
4784 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
4785 fix3
= vec_madd(fs33
,dx33
,fix3
);
4786 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
4787 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
4789 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
4790 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
4791 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
4792 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
4793 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
4794 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
4795 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
4796 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
4797 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
4799 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
4800 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
4801 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
4802 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
4803 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
4804 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
4805 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
4806 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
4807 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
4809 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
4810 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
4811 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
4812 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
4813 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
4814 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
4815 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
4816 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
4817 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
4819 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
4820 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
4829 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
4830 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
4831 qqOOt
= vec_sld(qqOO
,nul
,4);
4832 qqOHt
= vec_sld(qqOH
,nul
,4);
4833 qqHHt
= vec_sld(qqHH
,nul
,4);
4835 dx11
= vec_sub(ix1
,jx1
);
4836 dx12
= vec_sub(ix1
,jx2
);
4837 dx13
= vec_sub(ix1
,jx3
);
4838 dy11
= vec_sub(iy1
,jy1
);
4839 dy12
= vec_sub(iy1
,jy2
);
4840 dy13
= vec_sub(iy1
,jy3
);
4841 dz11
= vec_sub(iz1
,jz1
);
4842 dz12
= vec_sub(iz1
,jz2
);
4843 dz13
= vec_sub(iz1
,jz3
);
4844 dx21
= vec_sub(ix2
,jx1
);
4845 dx22
= vec_sub(ix2
,jx2
);
4846 dx23
= vec_sub(ix2
,jx3
);
4847 dy21
= vec_sub(iy2
,jy1
);
4848 dy22
= vec_sub(iy2
,jy2
);
4849 dy23
= vec_sub(iy2
,jy3
);
4850 dz21
= vec_sub(iz2
,jz1
);
4851 dz22
= vec_sub(iz2
,jz2
);
4852 dz23
= vec_sub(iz2
,jz3
);
4853 dx31
= vec_sub(ix3
,jx1
);
4854 dx32
= vec_sub(ix3
,jx2
);
4855 dx33
= vec_sub(ix3
,jx3
);
4856 dy31
= vec_sub(iy3
,jy1
);
4857 dy32
= vec_sub(iy3
,jy2
);
4858 dy33
= vec_sub(iy3
,jy3
);
4859 dz31
= vec_sub(iz3
,jz1
);
4860 dz32
= vec_sub(iz3
,jz2
);
4861 dz33
= vec_sub(iz3
,jz3
);
4863 rsq11
= vec_madd(dx11
,dx11
,nul
);
4864 rsq12
= vec_madd(dx12
,dx12
,nul
);
4865 rsq13
= vec_madd(dx13
,dx13
,nul
);
4866 rsq21
= vec_madd(dx21
,dx21
,nul
);
4867 rsq22
= vec_madd(dx22
,dx22
,nul
);
4868 rsq23
= vec_madd(dx23
,dx23
,nul
);
4869 rsq31
= vec_madd(dx31
,dx31
,nul
);
4870 rsq32
= vec_madd(dx32
,dx32
,nul
);
4871 rsq33
= vec_madd(dx33
,dx33
,nul
);
4872 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
4873 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
4874 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
4875 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
4876 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
4877 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
4878 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
4879 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
4880 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
4881 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
4882 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
4883 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
4884 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
4885 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
4886 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
4887 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
4888 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
4889 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
4891 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
4894 &rinv11
,&rinv12
,&rinv13
,
4895 &rinv21
,&rinv22
,&rinv23
,
4896 &rinv31
,&rinv32
,&rinv33
);
4898 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
4899 &rinv21
,&rinv22
,&rinv23
,
4900 &rinv31
,&rinv32
,&rinv33
);
4902 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
4903 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
4904 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
4905 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
4906 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
4907 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
4908 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
4909 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
4910 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
4912 vc11
= vec_madd(rinv11
,qqOOt
,nul
);
4913 vc12
= vec_madd(rinv12
,qqOHt
,nul
);
4914 vc13
= vec_madd(rinv13
,qqOHt
,nul
);
4915 vc21
= vec_madd(rinv21
,qqOHt
,nul
);
4916 vc22
= vec_madd(rinv22
,qqHHt
,nul
);
4917 vc23
= vec_madd(rinv23
,qqHHt
,nul
);
4918 vc31
= vec_madd(rinv31
,qqOHt
,nul
);
4919 vc32
= vec_madd(rinv32
,qqHHt
,nul
);
4920 vc33
= vec_madd(rinv33
,qqHHt
,nul
);
4922 fs11
= vec_madd(vc11
,rinvsq11
,nul
);
4923 fs12
= vec_madd(vc12
,rinvsq12
,nul
);
4924 fs13
= vec_madd(vc13
,rinvsq13
,nul
);
4925 fs21
= vec_madd(vc21
,rinvsq21
,nul
);
4926 fs22
= vec_madd(vc22
,rinvsq22
,nul
);
4927 fs23
= vec_madd(vc23
,rinvsq23
,nul
);
4928 fs31
= vec_madd(vc31
,rinvsq31
,nul
);
4929 fs32
= vec_madd(vc32
,rinvsq32
,nul
);
4930 fs33
= vec_madd(vc33
,rinvsq33
,nul
);
4932 vctot
= vec_add(vctot
,vc11
);
4933 vc12
= vec_add(vc12
,vc13
);
4934 vc21
= vec_add(vc21
,vc22
);
4935 vc23
= vec_add(vc23
,vc31
);
4936 vc32
= vec_add(vc32
,vc33
);
4937 vctot
= vec_add(vctot
,vc12
);
4938 vc21
= vec_add(vc21
,vc23
);
4939 vctot
= vec_add(vctot
,vc32
);
4940 vctot
= vec_add(vctot
,vc21
);
4942 fix1
= vec_madd(fs11
,dx11
,fix1
);
4943 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
4944 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
4945 fix2
= vec_madd(fs21
,dx21
,fix2
);
4946 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
4947 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
4948 fix3
= vec_madd(fs31
,dx31
,fix3
);
4949 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
4950 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
4952 fix1
= vec_madd(fs12
,dx12
,fix1
);
4953 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
4954 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
4955 fix2
= vec_madd(fs22
,dx22
,fix2
);
4956 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
4957 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
4958 fix3
= vec_madd(fs32
,dx32
,fix3
);
4959 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
4960 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
4962 fix1
= vec_madd(fs13
,dx13
,fix1
);
4963 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
4964 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
4965 fix2
= vec_madd(fs23
,dx23
,fix2
);
4966 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
4967 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
4968 fix3
= vec_madd(fs33
,dx33
,fix3
);
4969 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
4970 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
4972 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
4973 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
4974 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
4975 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
4976 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
4977 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
4978 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
4979 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
4980 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
4982 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
4983 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
4984 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
4985 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
4986 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
4987 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
4988 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
4989 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
4990 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
4992 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
4993 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
4994 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
4995 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
4996 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
4997 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
4998 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
4999 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
5000 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
5002 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
5003 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
5004 } else if(k
<(nj1
-1)) {
5009 load_2_water(pos
+j3a
,pos
+j3b
,
5010 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
5011 qqOOt
= vec_sld(qqOO
,nul
,8);
5012 qqOHt
= vec_sld(qqOH
,nul
,8);
5013 qqHHt
= vec_sld(qqHH
,nul
,8);
5015 dx11
= vec_sub(ix1
,jx1
);
5016 dx12
= vec_sub(ix1
,jx2
);
5017 dx13
= vec_sub(ix1
,jx3
);
5018 dy11
= vec_sub(iy1
,jy1
);
5019 dy12
= vec_sub(iy1
,jy2
);
5020 dy13
= vec_sub(iy1
,jy3
);
5021 dz11
= vec_sub(iz1
,jz1
);
5022 dz12
= vec_sub(iz1
,jz2
);
5023 dz13
= vec_sub(iz1
,jz3
);
5024 dx21
= vec_sub(ix2
,jx1
);
5025 dx22
= vec_sub(ix2
,jx2
);
5026 dx23
= vec_sub(ix2
,jx3
);
5027 dy21
= vec_sub(iy2
,jy1
);
5028 dy22
= vec_sub(iy2
,jy2
);
5029 dy23
= vec_sub(iy2
,jy3
);
5030 dz21
= vec_sub(iz2
,jz1
);
5031 dz22
= vec_sub(iz2
,jz2
);
5032 dz23
= vec_sub(iz2
,jz3
);
5033 dx31
= vec_sub(ix3
,jx1
);
5034 dx32
= vec_sub(ix3
,jx2
);
5035 dx33
= vec_sub(ix3
,jx3
);
5036 dy31
= vec_sub(iy3
,jy1
);
5037 dy32
= vec_sub(iy3
,jy2
);
5038 dy33
= vec_sub(iy3
,jy3
);
5039 dz31
= vec_sub(iz3
,jz1
);
5040 dz32
= vec_sub(iz3
,jz2
);
5041 dz33
= vec_sub(iz3
,jz3
);
5043 rsq11
= vec_madd(dx11
,dx11
,nul
);
5044 rsq12
= vec_madd(dx12
,dx12
,nul
);
5045 rsq13
= vec_madd(dx13
,dx13
,nul
);
5046 rsq21
= vec_madd(dx21
,dx21
,nul
);
5047 rsq22
= vec_madd(dx22
,dx22
,nul
);
5048 rsq23
= vec_madd(dx23
,dx23
,nul
);
5049 rsq31
= vec_madd(dx31
,dx31
,nul
);
5050 rsq32
= vec_madd(dx32
,dx32
,nul
);
5051 rsq33
= vec_madd(dx33
,dx33
,nul
);
5052 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
5053 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
5054 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
5055 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
5056 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
5057 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
5058 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
5059 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
5060 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
5061 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
5062 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
5063 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
5064 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
5065 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
5066 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
5067 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
5068 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
5069 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
5071 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
5074 &rinv11
,&rinv12
,&rinv13
,
5075 &rinv21
,&rinv22
,&rinv23
,
5076 &rinv31
,&rinv32
,&rinv33
);
5078 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
5079 &rinv21
,&rinv22
,&rinv23
,
5080 &rinv31
,&rinv32
,&rinv33
);
5082 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
5083 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
5084 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
5085 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
5086 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
5087 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
5088 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
5089 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
5090 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
5092 vc11
= vec_madd(rinv11
,qqOOt
,nul
);
5093 vc12
= vec_madd(rinv12
,qqOHt
,nul
);
5094 vc13
= vec_madd(rinv13
,qqOHt
,nul
);
5095 vc21
= vec_madd(rinv21
,qqOHt
,nul
);
5096 vc22
= vec_madd(rinv22
,qqHHt
,nul
);
5097 vc23
= vec_madd(rinv23
,qqHHt
,nul
);
5098 vc31
= vec_madd(rinv31
,qqOHt
,nul
);
5099 vc32
= vec_madd(rinv32
,qqHHt
,nul
);
5100 vc33
= vec_madd(rinv33
,qqHHt
,nul
);
5102 fs11
= vec_madd(vc11
,rinvsq11
,nul
);
5103 fs12
= vec_madd(vc12
,rinvsq12
,nul
);
5104 fs13
= vec_madd(vc13
,rinvsq13
,nul
);
5105 fs21
= vec_madd(vc21
,rinvsq21
,nul
);
5106 fs22
= vec_madd(vc22
,rinvsq22
,nul
);
5107 fs23
= vec_madd(vc23
,rinvsq23
,nul
);
5108 fs31
= vec_madd(vc31
,rinvsq31
,nul
);
5109 fs32
= vec_madd(vc32
,rinvsq32
,nul
);
5110 fs33
= vec_madd(vc33
,rinvsq33
,nul
);
5112 vctot
= vec_add(vctot
,vc11
);
5113 vc12
= vec_add(vc12
,vc13
);
5114 vc21
= vec_add(vc21
,vc22
);
5115 vc23
= vec_add(vc23
,vc31
);
5116 vc32
= vec_add(vc32
,vc33
);
5117 vctot
= vec_add(vctot
,vc12
);
5118 vc21
= vec_add(vc21
,vc23
);
5119 vctot
= vec_add(vctot
,vc32
);
5120 vctot
= vec_add(vctot
,vc21
);
5122 fix1
= vec_madd(fs11
,dx11
,fix1
);
5123 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
5124 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
5125 fix2
= vec_madd(fs21
,dx21
,fix2
);
5126 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
5127 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
5128 fix3
= vec_madd(fs31
,dx31
,fix3
);
5129 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
5130 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
5132 fix1
= vec_madd(fs12
,dx12
,fix1
);
5133 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
5134 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
5135 fix2
= vec_madd(fs22
,dx22
,fix2
);
5136 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
5137 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
5138 fix3
= vec_madd(fs32
,dx32
,fix3
);
5139 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
5140 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
5142 fix1
= vec_madd(fs13
,dx13
,fix1
);
5143 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
5144 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
5145 fix2
= vec_madd(fs23
,dx23
,fix2
);
5146 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
5147 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
5148 fix3
= vec_madd(fs33
,dx33
,fix3
);
5149 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
5150 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
5152 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
5153 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
5154 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
5155 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
5156 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
5157 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
5158 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
5159 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
5160 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
5162 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
5163 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
5164 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
5165 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
5166 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
5167 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
5168 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
5169 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
5170 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
5172 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
5173 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
5174 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
5175 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
5176 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
5177 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
5178 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
5179 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
5180 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
5182 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
5183 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
5187 load_1_water(pos
+j3a
,
5188 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
5189 qqOOt
= vec_sld(qqOO
,nul
,12);
5190 qqOHt
= vec_sld(qqOH
,nul
,12);
5191 qqHHt
= vec_sld(qqHH
,nul
,12);
5193 dx11
= vec_sub(ix1
,jx1
);
5194 dx12
= vec_sub(ix1
,jx2
);
5195 dx13
= vec_sub(ix1
,jx3
);
5196 dy11
= vec_sub(iy1
,jy1
);
5197 dy12
= vec_sub(iy1
,jy2
);
5198 dy13
= vec_sub(iy1
,jy3
);
5199 dz11
= vec_sub(iz1
,jz1
);
5200 dz12
= vec_sub(iz1
,jz2
);
5201 dz13
= vec_sub(iz1
,jz3
);
5202 dx21
= vec_sub(ix2
,jx1
);
5203 dx22
= vec_sub(ix2
,jx2
);
5204 dx23
= vec_sub(ix2
,jx3
);
5205 dy21
= vec_sub(iy2
,jy1
);
5206 dy22
= vec_sub(iy2
,jy2
);
5207 dy23
= vec_sub(iy2
,jy3
);
5208 dz21
= vec_sub(iz2
,jz1
);
5209 dz22
= vec_sub(iz2
,jz2
);
5210 dz23
= vec_sub(iz2
,jz3
);
5211 dx31
= vec_sub(ix3
,jx1
);
5212 dx32
= vec_sub(ix3
,jx2
);
5213 dx33
= vec_sub(ix3
,jx3
);
5214 dy31
= vec_sub(iy3
,jy1
);
5215 dy32
= vec_sub(iy3
,jy2
);
5216 dy33
= vec_sub(iy3
,jy3
);
5217 dz31
= vec_sub(iz3
,jz1
);
5218 dz32
= vec_sub(iz3
,jz2
);
5219 dz33
= vec_sub(iz3
,jz3
);
5221 rsq11
= vec_madd(dx11
,dx11
,nul
);
5222 rsq12
= vec_madd(dx12
,dx12
,nul
);
5223 rsq13
= vec_madd(dx13
,dx13
,nul
);
5224 rsq21
= vec_madd(dx21
,dx21
,nul
);
5225 rsq22
= vec_madd(dx22
,dx22
,nul
);
5226 rsq23
= vec_madd(dx23
,dx23
,nul
);
5227 rsq31
= vec_madd(dx31
,dx31
,nul
);
5228 rsq32
= vec_madd(dx32
,dx32
,nul
);
5229 rsq33
= vec_madd(dx33
,dx33
,nul
);
5230 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
5231 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
5232 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
5233 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
5234 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
5235 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
5236 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
5237 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
5238 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
5239 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
5240 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
5241 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
5242 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
5243 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
5244 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
5245 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
5246 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
5247 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
5249 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
5252 &rinv11
,&rinv12
,&rinv13
,
5253 &rinv21
,&rinv22
,&rinv23
,
5254 &rinv31
,&rinv32
,&rinv33
);
5256 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
5257 &rinv21
,&rinv22
,&rinv23
,
5258 &rinv31
,&rinv32
,&rinv33
);
5260 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
5261 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
5262 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
5263 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
5264 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
5265 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
5266 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
5267 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
5268 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
5270 vc11
= vec_madd(rinv11
,qqOOt
,nul
);
5271 vc12
= vec_madd(rinv12
,qqOHt
,nul
);
5272 vc13
= vec_madd(rinv13
,qqOHt
,nul
);
5273 vc21
= vec_madd(rinv21
,qqOHt
,nul
);
5274 vc22
= vec_madd(rinv22
,qqHHt
,nul
);
5275 vc23
= vec_madd(rinv23
,qqHHt
,nul
);
5276 vc31
= vec_madd(rinv31
,qqOHt
,nul
);
5277 vc32
= vec_madd(rinv32
,qqHHt
,nul
);
5278 vc33
= vec_madd(rinv33
,qqHHt
,nul
);
5280 fs11
= vec_madd(vc11
,rinvsq11
,nul
);
5281 fs12
= vec_madd(vc12
,rinvsq12
,nul
);
5282 fs13
= vec_madd(vc13
,rinvsq13
,nul
);
5283 fs21
= vec_madd(vc21
,rinvsq21
,nul
);
5284 fs22
= vec_madd(vc22
,rinvsq22
,nul
);
5285 fs23
= vec_madd(vc23
,rinvsq23
,nul
);
5286 fs31
= vec_madd(vc31
,rinvsq31
,nul
);
5287 fs32
= vec_madd(vc32
,rinvsq32
,nul
);
5288 fs33
= vec_madd(vc33
,rinvsq33
,nul
);
5290 vctot
= vec_add(vctot
,vc11
);
5291 vc12
= vec_add(vc12
,vc13
);
5292 vc21
= vec_add(vc21
,vc22
);
5293 vc23
= vec_add(vc23
,vc31
);
5294 vc32
= vec_add(vc32
,vc33
);
5295 vctot
= vec_add(vctot
,vc12
);
5296 vc21
= vec_add(vc21
,vc23
);
5297 vctot
= vec_add(vctot
,vc32
);
5298 vctot
= vec_add(vctot
,vc21
);
5300 fix1
= vec_madd(fs11
,dx11
,fix1
);
5301 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
5302 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
5303 fix2
= vec_madd(fs21
,dx21
,fix2
);
5304 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
5305 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
5306 fix3
= vec_madd(fs31
,dx31
,fix3
);
5307 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
5308 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
5310 fix1
= vec_madd(fs12
,dx12
,fix1
);
5311 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
5312 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
5313 fix2
= vec_madd(fs22
,dx22
,fix2
);
5314 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
5315 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
5316 fix3
= vec_madd(fs32
,dx32
,fix3
);
5317 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
5318 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
5320 fix1
= vec_madd(fs13
,dx13
,fix1
);
5321 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
5322 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
5323 fix2
= vec_madd(fs23
,dx23
,fix2
);
5324 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
5325 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
5326 fix3
= vec_madd(fs33
,dx33
,fix3
);
5327 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
5328 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
5330 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
5331 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
5332 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
5333 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
5334 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
5335 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
5336 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
5337 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
5338 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
5340 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
5341 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
5342 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
5343 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
5344 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
5345 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
5346 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
5347 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
5348 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
5350 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
5351 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
5352 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
5353 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
5354 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
5355 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
5356 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
5357 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
5358 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
5360 add_force_to_1_water(faction
+j3a
,
5361 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
5363 /* update outer data */
5364 update_i_water_forces(faction
+ii3
,fshift
+is3
,
5365 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
5367 add_vector_to_float(Vc
+gid
[n
],vctot
);
5371 typedef union vfloat
{
5376 void inl1130_altivec(
5395 register vector
float v0
;
5396 register vector
float v1
;
5397 register vector
float v2
;
5398 register vector
float v3
;
5399 register vector
float v4
;
5400 register vector
float v5
;
5401 register vector
float v6
;
5402 register vector
float v7
;
5403 register vector
float v8
;
5404 register vector
float v9
;
5405 register vector
float v10
;
5406 register vector
float v11
;
5407 register vector
float v12
;
5408 register vector
float v13
;
5409 register vector
float v14
;
5410 register vector
float v15
;
5411 register vector
float v16
;
5412 register vector
float v17
;
5413 register vector
float v18
;
5414 register vector
float v19
;
5415 register vector
float v20
;
5416 register vector
float v21
;
5417 register vector
float v22
;
5418 register vector
float v23
;
5419 register vector
float v24
;
5420 register vector
float v25
;
5421 register vector
float v26
;
5422 register vector
float v27
;
5423 register vector
float v28
;
5424 register vector
float v29
;
5425 register vector
float v30
;
5426 register vector
float v31
;
5428 vfloat stackdata
[52];
5430 int n
,k
,k0
,ii
,is3
,ii3
,nj0
,nj1
;
5431 int jnra
,jnrb
,jnrc
,jnrd
;
5433 int j3a
,j3b
,j3c
,j3d
;
5435 /* set non java mode */
5436 v10
= (vector
float)vec_mfvscr();
5437 v11
= (vector
float)vec_sl(vec_splat_u32(1),vec_splat_u32(8));
5438 v12
= (vector
float)vec_sl((vector
unsigned int)v11
,vec_splat_u32(8));
5439 v10
= (vector
float)vec_or((vector
unsigned short)v10
,(vector
unsigned short)v12
);
5440 vec_mtvscr((vector
unsigned short)v10
);
5442 v0
= (vector
float)vec_splat_u32(0);
5443 v0
= vec_ctf((vector
unsigned int)v0
,0); /* load 0 to v0 */
5444 v1
= vec_lde(0,&facel
); /* load facel float to a vector */
5445 v2
= (vector
float) vec_lvsl(0,&facel
);
5446 v1
= vec_perm(v1
,v1
,(vector
unsigned char) v2
); /* move it to elem 0 */
5447 v1
= vec_splat(v1
,0); /* splat it to all elem */
5451 v3
= vec_lde(0,charge
+ii
); /* load qO float to a vector */
5452 v4
= (vector
float) vec_lvsl(0,charge
+ii
);
5453 v3
= vec_perm(v3
,v3
,(vector
unsigned char) v4
); /* move it to elem 0 */
5454 v3
= vec_splat(v3
,0); /* splat it to all elem */
5456 v5
= vec_lde(0,charge
+ii
+1); /* load qH float to a vector */
5457 v6
= (vector
float) vec_lvsl(0,charge
+ii
+1);
5458 v5
= vec_perm(v5
,v5
,(vector
unsigned char) v6
); /* move it to elem 0 */
5459 v5
= vec_splat(v5
,0); /* splat it to all elem */
5461 v4
= vec_madd(v3
,v5
,v0
); /* qqOH */
5462 v3
= vec_madd(v3
,v3
,v0
); /* qqOO */
5463 v5
= vec_madd(v5
,v5
,v0
); /* qqHH */
5464 v4
= vec_madd(v4
,v1
,v0
); /* qqOH * facel */
5465 v3
= vec_madd(v3
,v1
,v0
); /* qqOO * facel */
5466 v5
= vec_madd(v5
,v1
,v0
); /* qqHH * facel */
5471 v1
= vec_ld( 0,nbfp
+n
); /* c6a c12a - this works since the nbfp array
5472 * is always at least 8-byte aligned and n is even here.
5474 v2
= (vector
float) vec_lvsl(0,nbfp
+n
);
5475 v1
= vec_perm(v1
,v1
,(vector
unsigned char)v2
); /* c6 c12 moved to positions 0,1 */
5476 v2
= vec_splat(v1
,1); /* c12 in all elements */
5477 v1
= vec_splat(v1
,0); /* c6 in all elements */
5479 /* store things to stack before starting outer loop */
5480 vec_st(v3
, 0, (float *) stackdata
); /* qqOO*facel is in stack pos 0 */
5481 vec_st(v4
, 16, (float *) stackdata
); /* qqOH*facel is in stack pos 1 */
5482 vec_st(v5
, 32, (float *) stackdata
); /* qqHH*facel is in stack pos 2 */
5483 vec_st(v1
, 48, (float *) stackdata
); /* c6 is in stack pos 3 */
5484 vec_st(v2
, 64, (float *) stackdata
); /* c12 is in stack pos 4 */
5486 for(n
=0;n
<nri
;n
++) {
5491 /* load three consecutive shiftvector floats. We never access the fourth element,
5492 * so this is safe even at the end of an array.
5495 v4
= (vector
float)vec_lvsl(0, shiftvec
+is3
);
5496 v1
= vec_lde(0, shiftvec
+is3
);
5497 v2
= vec_lde(4, shiftvec
+is3
);
5498 v3
= vec_lde(8, shiftvec
+is3
);
5499 v1
= vec_perm(v1
,v1
,(vector
unsigned char)v4
); /* shX in elem 0 */
5500 v2
= vec_perm(v2
,v2
,(vector
unsigned char)v4
); /* shY in elem 1 */
5501 v3
= vec_perm(v3
,v3
,(vector
unsigned char)v4
); /* shZ in elem 2 */
5502 v2
= vec_sld(v2
,v2
,4);
5503 v3
= vec_sld(v3
,v3
,8);
5504 v1
= vec_mergeh(v1
,v3
);
5505 v1
= vec_mergeh(v1
,v2
); /* [ shX shY shZ - ] */
5506 /* load i coordinates */
5507 v2
= (vector
float)vec_lvsl(0, pos
+ii3
);
5508 v3
= vec_ld(0, pos
+ii3
); /* load water coords into three vectors. */
5509 v4
= vec_ld(16, pos
+ii3
);/* we do not yet know how it is aligned. */
5510 v5
= vec_ld(32, pos
+ii3
);
5511 v6
= vec_sld(v1
,v1
,12); /* - shX shY shZ */
5512 v7
= vec_sld(v6
,v1
,4); /* shX shY shZ shX */
5513 v8
= vec_sld(v6
,v1
,8); /* shY shZ shX shY */
5514 v9
= vec_sld(v6
,v1
,12); /* shZ shX shY shZ */
5515 v3
= vec_perm(v3
,v4
,(vector
unsigned char)v2
); /* Ox Oy Oz H1x */
5516 v4
= vec_perm(v4
,v5
,(vector
unsigned char)v2
); /* H1y H1z H2x H2y */
5517 v5
= vec_perm(v5
,v5
,(vector
unsigned char)v2
); /* H2z - - - */
5518 v3
= vec_add(v3
,v7
);
5519 v4
= vec_add(v4
,v8
);
5520 v5
= vec_add(v5
,v9
);
5521 v6
= vec_splat(v3
,0); /* Ox Ox Ox Ox */
5522 v7
= vec_splat(v3
,1); /* Oy Oy Oy Oy */
5523 v8
= vec_splat(v3
,2); /* Oz Oz Oz Oz */
5524 v9
= vec_splat(v3
,3); /* H1x H1x H1x H1x */
5525 v10
= vec_splat(v4
,0); /* H1y H1y H1y H1y */
5526 v11
= vec_splat(v4
,1); /* H1z H1z H1z H1z */
5527 v12
= vec_splat(v4
,2); /* H2x H2x H2x H2x */
5528 v13
= vec_splat(v4
,3); /* H2y H2y H2y H2y */
5529 v14
= vec_splat(v5
,0); /* H2z H2z H2z H2z */
5530 /* Store i water coordinates to stack */
5531 vec_st(v6
, 80, (float *)stackdata
); /* i Ox is in stack pos 5 */
5532 vec_st(v7
, 96, (float *)stackdata
); /* i Oy is in stack pos 6 */
5533 vec_st(v8
, 112, (float *)stackdata
); /* i Oz is in stack pos 7 */
5534 vec_st(v9
, 128, (float *)stackdata
); /* i H1x is in stack pos 8 */
5535 vec_st(v10
,144, (float *)stackdata
); /* i H1y is in stack pos 9 */
5536 vec_st(v11
,160, (float *)stackdata
); /* i H1z is in stack pos 10 */
5537 vec_st(v12
,176, (float *)stackdata
); /* i H2x is in stack pos 11 */
5538 vec_st(v13
,192, (float *)stackdata
); /* i H2y is in stack pos 12 */
5539 vec_st(v14
,208, (float *)stackdata
); /* i H2z is in stack pos 13 */
5543 vec_dst( jjnr
+ nj1
, 0x10010100, 0 );
5544 vec_st(v0
, 224, (float *)stackdata
); /* zero vctot, in stack pos 14 */
5545 vec_st(v0
, 240, (float *)stackdata
); /* zero vctot, in stack pos 15 */
5546 vec_st(v0
, 256, (float *)stackdata
); /* zero fiOx, in stack pos 16 */
5547 vec_st(v0
, 272, (float *)stackdata
); /* zero fiOy, in stack pos 17 */
5548 vec_st(v0
, 288, (float *)stackdata
); /* zero fiOz, in stack pos 18 */
5550 vec_st(v0
, 304, (float *)stackdata
); /* zero fiH1x, in stack pos 19 */
5551 vec_st(v0
, 320, (float *)stackdata
); /* zero fiH1y, in stack pos 20 */
5552 vec_st(v0
, 336, (float *)stackdata
); /* zero fiH1z, in stack pos 21 */
5553 vec_st(v0
, 352, (float *)stackdata
); /* zero fiH2x, in stack pos 22 */
5554 vec_st(v0
, 368, (float *)stackdata
); /* zero fiH2y, in stack pos 23 */
5555 vec_st(v0
, 384, (float *)stackdata
); /* zero fiH2z, in stack pos 24 */
5557 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
5563 vec_dst( jjnr
+ k
+ 4, 0x02020020, 0 );
5570 vec_dst( pos
+j3a
, 0x10010100, 1 );
5572 v1
= (vector
float)vec_lvsl(0, pos
+j3a
);
5573 v8
= (vector
float)vec_lvsl(0, pos
+j3b
);
5574 v15
= (vector
float)vec_lvsl(0, pos
+j3c
);
5575 v22
= (vector
float)vec_lvsl(0, pos
+j3d
);
5576 v2
= vec_ld(0, pos
+j3a
);
5577 v9
= vec_ld(0, pos
+j3b
);
5578 v16
= vec_ld(0, pos
+j3c
);
5579 v23
= vec_ld(0, pos
+j3d
);
5581 v3
= vec_ld(16, pos
+j3a
);
5582 v10
= vec_ld(16, pos
+j3b
);
5583 v17
= vec_ld(16, pos
+j3c
);
5584 v24
= vec_ld(16, pos
+j3d
);
5585 v4
= vec_ld(32, pos
+j3a
);
5586 v11
= vec_ld(32, pos
+j3b
);
5587 v18
= vec_ld(32, pos
+j3c
);
5588 v25
= vec_ld(32, pos
+j3d
);
5589 v5
= vec_perm(v2
,v3
,(vector
unsigned char)v1
); /* Oxa Oya Oza H1xa */
5590 v12
= vec_perm(v9
,v10
,(vector
unsigned char)v8
); /* Oxb Oyb Ozb H1xb */
5591 v19
= vec_perm(v16
,v17
,(vector
unsigned char)v15
); /* Oxc Oyc Ozc H1xc */
5592 v26
= vec_perm(v23
,v24
,(vector
unsigned char)v22
); /* Oxd Oyd Ozd H1xd */
5594 v6
= vec_perm(v3
,v4
,(vector
unsigned char)v1
); /* H1ya H1za H2xa H2ya */
5595 v13
= vec_perm(v10
,v11
,(vector
unsigned char)v8
); /* H1yb H1zb H2xb H2yb */
5596 v20
= vec_perm(v17
,v18
,(vector
unsigned char)v15
); /* H1yc H1zc H2xc H2yc */
5597 v27
= vec_perm(v24
,v25
,(vector
unsigned char)v22
); /* H1yd H1zd H2xd H2yd */
5599 v7
= vec_perm(v4
,v4
,(vector
unsigned char)v1
); /* H2za - - - */
5600 v14
= vec_perm(v11
,v11
,(vector
unsigned char)v8
); /* H2zb - - - */
5601 v21
= vec_perm(v18
,v18
,(vector
unsigned char)v15
); /* H2zc - - - */
5602 v28
= vec_perm(v25
,v25
,(vector
unsigned char)v22
); /* H2zd - - - */
5604 /* permute water coordinates */
5605 v3
= vec_mergeh(v5
,v19
); /* Oxa Oxc Oya Oyc */
5606 v5
= vec_mergel(v5
,v19
); /* Oza Ozc H1xa H1xc */
5607 v19
= vec_mergeh(v12
,v26
); /* Oxb Oxd Oyb Oyd */
5608 v12
= vec_mergel(v12
,v26
); /* Ozb Ozd H1xb H1xd */
5610 v26
= vec_mergeh(v6
,v20
); /* H1ya H1yc H1za H1zc */
5611 v16
= vec_mergel(v6
,v20
); /* H2xa H2xc H2ya H2yc */
5612 v20
= vec_mergeh(v13
,v27
); /* H1yb H1yd H1zb H1zd */
5613 v13
= vec_mergel(v13
,v27
); /* H2xb H2xd H2yb H2yd */
5615 v15
= vec_mergeh(v7
,v21
); /* H2za H2zc - - */
5616 v14
= vec_mergeh(v14
,v28
); /* H2zb H2zd - - */
5618 v1
= vec_mergeh(v3
,v19
); /* Oxa Oxb Oxc Oxd */
5619 v29
= vec_ld(128, (float *) stackdata
); /* load i H1x */
5620 v2
= vec_mergel(v3
,v19
); /* Oya Oyb Oyc Oyd */
5621 v30
= vec_ld(144, (float *) stackdata
); /* load i H1y */
5622 v3
= vec_mergeh(v5
,v12
); /* Oza Ozb Ozc Ozd */
5623 v31
= vec_ld(160, (float *) stackdata
); /* load i H1z */
5624 v4
= vec_mergel(v5
,v12
); /* H1xa H1xb H1xc H1xd */
5625 v5
= vec_mergeh(v26
,v20
); /* H1ya H1yb H1yc H1yd */
5626 v6
= vec_mergel(v26
,v20
); /* H1za H1zb H1zc H1zd */
5627 v7
= vec_mergeh(v16
,v13
); /* H2xa H2xb H2xc H2xd */
5628 v8
= vec_mergel(v16
,v13
); /* H2ya H2yb H2yc H2yd */
5629 v9
= vec_mergeh(v15
,v14
); /* H2za H2zb H2zc H2zd */
5631 v10
= vec_sub(v29
,v1
); /* iH1x - jOx */
5632 v13
= vec_sub(v29
,v4
); /* iH1x - jH1x */
5633 v16
= vec_sub(v29
,v7
); /* iH1x - jH2x */
5634 v29
= vec_ld(176, (float *) stackdata
); /* load i H2x */
5635 v11
= vec_sub(v30
,v2
); /* iH1y - jOy */
5636 v14
= vec_sub(v30
,v5
); /* iH1y - jH1y */
5637 v17
= vec_sub(v30
,v8
); /* iH1y - jH2y */
5638 v30
= vec_ld(192, (float *) stackdata
); /* load i H2y */
5639 vec_st(v10
, 544, (float *)stackdata
); /* dx21 */
5640 vec_st(v13
, 592, (float *)stackdata
); /* dx22 */
5641 vec_st(v16
, 640, (float *)stackdata
); /* dx23 */
5642 v12
= vec_sub(v31
,v3
); /* iH1z - jOz */
5643 v15
= vec_sub(v31
,v6
); /* iH1z - jH1z */
5644 v18
= vec_sub(v31
,v9
); /* iH1z - jH2z */
5645 v31
= vec_ld(208, (float *) stackdata
); /* load i H2z */
5646 /* v10-v18 now contains iH1-jO, iH1-jH1 and iJ1-jH2 distances */
5647 vec_st(v11
, 560, (float *)stackdata
); /* dy21 */
5648 vec_st(v14
, 608, (float *)stackdata
); /* dy22 */
5649 vec_st(v17
, 656, (float *)stackdata
); /* dy23 */
5650 v19
= vec_sub(v29
,v1
); /* iH2x - jOx */
5651 v22
= vec_sub(v29
,v4
); /* iH2x - jH1x */
5652 v25
= vec_sub(v29
,v7
); /* iH2x - jH2x */
5653 vec_st(v12
, 576, (float *)stackdata
); /* dz21 */
5654 vec_st(v15
, 624, (float *)stackdata
); /* dz22 */
5655 vec_st(v18
, 672, (float *)stackdata
); /* dz23 */
5656 v29
= vec_ld(80, (float *) stackdata
); /* load i Ox */
5657 v20
= vec_sub(v30
,v2
); /* iH2y - jOy */
5658 v23
= vec_sub(v30
,v5
); /* iH2y - jH1y */
5659 v26
= vec_sub(v30
,v8
); /* iH2y - jH2y */
5660 vec_st(v19
, 688, (float *)stackdata
); /* dx31 */
5661 vec_st(v22
, 736, (float *)stackdata
); /* dx32 */
5662 vec_st(v25
, 784, (float *)stackdata
); /* dx33 */
5663 v30
= vec_ld(96, (float *) stackdata
); /* load i Oy */
5664 v21
= vec_sub(v31
,v3
); /* iH2z - jOz */
5665 v24
= vec_sub(v31
,v6
); /* iH2z - jH1z */
5666 v27
= vec_sub(v31
,v9
); /* iH2z - jH2z */
5667 v31
= vec_ld(112, (float *) stackdata
); /* load i Oz */
5668 vec_st(v20
, 704, (float *)stackdata
); /* dy31 */
5669 vec_st(v23
, 752, (float *)stackdata
); /* dy32 */
5670 vec_st(v26
, 800, (float *)stackdata
); /* dy33 */
5672 v1
= vec_sub(v29
,v1
); /* iOx - jOx */
5673 v4
= vec_sub(v29
,v4
); /* iOx - jH1x */
5674 v7
= vec_sub(v29
,v7
); /* iOx - jH2x */
5675 vec_st(v21
, 720, (float *)stackdata
); /* dz31 */
5676 vec_st(v24
, 768, (float *)stackdata
); /* dz32 */
5677 vec_st(v27
, 816, (float *)stackdata
); /* dz33 */
5678 v2
= vec_sub(v30
,v2
); /* iOy - jOy */
5679 v5
= vec_sub(v30
,v5
); /* iOy - jH1y */
5680 v8
= vec_sub(v30
,v8
); /* iOy - jH2y */
5681 vec_st(v1
, 400, (float *)stackdata
); /* dx11 */
5682 vec_st(v4
, 448, (float *)stackdata
); /* dx12 */
5683 vec_st(v7
, 496, (float *)stackdata
); /* dx13 */
5684 v3
= vec_sub(v31
,v3
); /* iOz - jOz */
5685 v6
= vec_sub(v31
,v6
); /* iOz - jH1z */
5686 v9
= vec_sub(v31
,v9
); /* iOz - jH2z */
5687 vec_st(v2
, 416, (float *)stackdata
); /* dy11 */
5688 vec_st(v5
, 464, (float *)stackdata
); /* dy12 */
5689 vec_st(v8
, 512, (float *)stackdata
); /* dy13 */
5691 v1
= vec_madd(v1
,v1
,v0
);
5692 v4
= vec_madd(v4
,v4
,v0
);
5693 v7
= vec_madd(v7
,v7
,v0
);
5694 vec_st(v3
, 432, (float *)stackdata
); /* dz11 */
5695 vec_st(v6
, 480, (float *)stackdata
); /* dz12 */
5696 vec_st(v9
, 528, (float *)stackdata
); /* dz13 */
5697 v10
= vec_madd(v10
,v10
,v0
);
5698 v13
= vec_madd(v13
,v13
,v0
);
5699 v16
= vec_madd(v16
,v16
,v0
);
5700 v19
= vec_madd(v19
,v19
,v0
);
5701 v22
= vec_madd(v22
,v22
,v0
);
5702 v25
= vec_madd(v25
,v25
,v0
);
5703 v1
= vec_madd(v2
,v2
,v1
);
5704 v4
= vec_madd(v5
,v5
,v4
);
5705 v7
= vec_madd(v8
,v8
,v7
);
5706 v10
= vec_madd(v11
,v11
,v10
);
5707 v13
= vec_madd(v14
,v14
,v13
);
5708 v16
= vec_madd(v17
,v17
,v16
);
5709 v19
= vec_madd(v20
,v20
,v19
);
5710 v22
= vec_madd(v23
,v23
,v22
);
5711 v25
= vec_madd(v26
,v26
,v25
);
5712 v1
= vec_madd(v3
,v3
,v1
);
5713 v2
= vec_madd(v6
,v6
,v4
);
5714 v3
= vec_madd(v9
,v9
,v7
);
5715 v4
= vec_madd(v12
,v12
,v10
);
5716 v5
= vec_madd(v15
,v15
,v13
);
5717 v6
= vec_madd(v18
,v18
,v16
);
5718 v7
= vec_madd(v21
,v21
,v19
);
5719 v8
= vec_madd(v24
,v24
,v22
);
5720 v9
= vec_madd(v27
,v27
,v25
);
5733 v10
= vec_rsqrte(v1
);
5734 v11
= vec_rsqrte(v2
);
5735 v12
= vec_rsqrte(v3
);
5736 v13
= vec_rsqrte(v4
);
5737 v14
= vec_rsqrte(v5
);
5738 v15
= vec_rsqrte(v6
);
5739 v16
= vec_rsqrte(v7
);
5740 v17
= vec_rsqrte(v8
);
5741 v18
= vec_rsqrte(v9
);
5742 /* create constant 0.5 */
5743 v30
= (vector
float) vec_splat_u32(1);
5744 v31
= vec_ctf((vector
unsigned int)v30
,1); /* 0.5 */
5745 v30
= vec_ctf((vector
unsigned int)v30
,0); /* 1.0 */
5747 v19
= vec_madd(v10
,v10
,v0
); /* lu*lu */
5748 v20
= vec_madd(v11
,v11
,v0
);
5749 v21
= vec_madd(v12
,v12
,v0
);
5750 v22
= vec_madd(v13
,v13
,v0
);
5751 v23
= vec_madd(v14
,v14
,v0
);
5752 v24
= vec_madd(v15
,v15
,v0
);
5753 v25
= vec_madd(v16
,v16
,v0
);
5754 v26
= vec_madd(v17
,v17
,v0
);
5755 v27
= vec_madd(v18
,v18
,v0
);
5757 v19
= vec_nmsub(v1
,v19
,v30
); /* 1.0 - rsq*lu*lu */
5758 v20
= vec_nmsub(v2
,v20
,v30
);
5759 v21
= vec_nmsub(v3
,v21
,v30
);
5760 v22
= vec_nmsub(v4
,v22
,v30
);
5761 v23
= vec_nmsub(v5
,v23
,v30
);
5762 v24
= vec_nmsub(v6
,v24
,v30
);
5763 v25
= vec_nmsub(v7
,v25
,v30
);
5764 v26
= vec_nmsub(v8
,v26
,v30
);
5765 v27
= vec_nmsub(v9
,v27
,v30
);
5767 v1
= vec_madd(v10
,v31
,v0
);/* lu*0.5*/
5768 v2
= vec_madd(v11
,v31
,v0
);
5769 v3
= vec_madd(v12
,v31
,v0
);
5770 v4
= vec_madd(v13
,v31
,v0
);
5771 v5
= vec_madd(v14
,v31
,v0
);
5772 v6
= vec_madd(v15
,v31
,v0
);
5773 v7
= vec_madd(v16
,v31
,v0
);
5774 v8
= vec_madd(v17
,v31
,v0
);
5775 v9
= vec_madd(v18
,v31
,v0
);
5777 /* The rinv values */
5778 v1
= vec_madd(v1
,v19
,v10
);
5779 v2
= vec_madd(v2
,v20
,v11
);
5780 v3
= vec_madd(v3
,v21
,v12
);
5781 v4
= vec_madd(v4
,v22
,v13
);
5782 v5
= vec_madd(v5
,v23
,v14
);
5783 v6
= vec_madd(v6
,v24
,v15
);
5784 v7
= vec_madd(v7
,v25
,v16
);
5785 v8
= vec_madd(v8
,v26
,v17
);
5786 v9
= vec_madd(v9
,v27
,v18
);
5788 /* load qqOO, qqOH and qqHH to v27,v28,v29 */
5789 v27
= vec_ld(0, (float *) stackdata
);
5790 v28
= vec_ld(16, (float *) stackdata
);
5791 v29
= vec_ld(32, (float *) stackdata
);
5793 vec_dstst( faction
+j3a
, 0x10010100, 2 );
5795 /* put rinvsq in v10-v18, rinv6_OO in v30 and rinv12_OO in v31 */
5796 /* load c6 to v25 and c12 to v26 */
5797 v25
= vec_ld(48, (float *) stackdata
);
5798 v26
= vec_ld(64, (float *) stackdata
);
5800 v10
= vec_madd(v1
,v1
,v0
);
5801 v1
= vec_madd(v1
,v27
,v0
); /* rinv11*qqOO */
5802 v11
= vec_madd(v2
,v2
,v0
);
5803 /* load vctot to v23 and vnbtot to v24 */
5804 v23
= vec_ld(224,(float *) stackdata
);
5805 v24
= vec_ld(240,(float *) stackdata
);
5807 v2
= vec_madd(v2
,v28
,v0
); /* rinv12*qqOH */
5808 v12
= vec_madd(v3
,v3
,v0
);
5809 v30
= vec_madd(v10
,v10
,v0
); /* rinv4 */
5810 v3
= vec_madd(v3
,v28
,v0
); /* rinv13*qqOH */
5811 v13
= vec_madd(v4
,v4
,v0
);
5812 v4
= vec_madd(v4
,v28
,v0
); /* rinv21*qqOH */
5813 v14
= vec_madd(v5
,v5
,v0
);
5815 v23
= vec_add(v23
,v1
);
5817 v30
= vec_madd(v30
,v10
,v0
); /* rinv6 */
5818 v5
= vec_madd(v5
,v29
,v0
); /* rinv22*qqHH */
5819 v15
= vec_madd(v6
,v6
,v0
);
5820 v6
= vec_madd(v6
,v29
,v0
); /* rinv23*qqHH */
5821 v23
= vec_add(v23
,v2
);
5822 v16
= vec_madd(v7
,v7
,v0
);
5823 v31
= vec_madd(v30
,v30
,v0
); /* rinv12 */
5824 v25
= vec_madd(v25
,v30
,v0
); /* c6*rinv6 */
5825 /* load 6.0 to v30 */
5826 v30
= (vector
float)vec_splat_u32(6);
5827 v30
= vec_ctf((vector
unsigned int)v30
,0);
5828 v23
= vec_add(v23
,v3
);
5830 v7
= vec_madd(v7
,v28
,v0
); /* rinv31*qqOH */
5831 v17
= vec_madd(v8
,v8
,v0
);
5832 v8
= vec_madd(v8
,v29
,v0
); /* rinv32*qqHH */
5833 v26
= vec_madd(v26
,v31
,v0
); /* c12*rinv12 */
5834 v23
= vec_add(v23
,v4
);
5835 /* load 12.0 to v31 */
5836 v31
= (vector
float)vec_splat_u32(12);
5837 v31
= vec_ctf((vector
unsigned int)v31
,0);
5839 v24
= vec_sub(v24
,v25
); /* add vnb6 to vnbtot */
5840 v18
= vec_madd(v9
,v9
,v0
);
5841 v23
= vec_add(v23
,v5
);
5842 v9
= vec_madd(v9
,v29
,v0
); /* rinv33*qqHH */
5844 v24
= vec_add(v24
,v26
);/* add vnb12 to vnbtot */
5846 v31
= vec_madd(v31
,v26
,v0
);
5847 v11
= vec_madd(v11
,v2
,v0
); /* fs12 */
5848 v23
= vec_add(v23
,v6
);
5849 v12
= vec_madd(v12
,v3
,v0
); /* fs13 */
5850 v13
= vec_madd(v13
,v4
,v0
); /* fs21 */
5851 v31
= vec_nmsub(v30
,v25
,v31
);
5853 v14
= vec_madd(v14
,v5
,v0
); /* fs22 */
5854 v23
= vec_add(v23
,v7
);
5855 v15
= vec_madd(v15
,v6
,v0
); /* fs23 */
5856 v16
= vec_madd(v16
,v7
,v0
); /* fs31 */
5857 v1
= vec_add(v31
,v1
);
5858 v17
= vec_madd(v17
,v8
,v0
); /* fs32 */
5859 v23
= vec_add(v23
,v8
);
5860 v18
= vec_madd(v18
,v9
,v0
); /* fs33 */
5861 v10
= vec_madd(v10
,v1
,v0
);
5863 vec_st(v24
,240,(float *)stackdata
); /* store vnbtot */
5864 /* calculate vectorial forces and accumulate fj. v10-v18 has fs11-fs33 now. */
5865 /* First load iO-* dx,dy,dz vectors to v1-v9 */
5866 /* and load iO forces to v28,v29,v30 */
5867 /* use v19-v27 to accumulate j water forces */
5868 v28
= vec_ld(256, (float *) stackdata
);
5869 v29
= vec_ld(272, (float *) stackdata
);
5870 v30
= vec_ld(288, (float *) stackdata
);
5872 v1
= vec_ld(400, (float *) stackdata
);
5873 v2
= vec_ld(416, (float *) stackdata
);
5874 v23
= vec_add(v23
,v9
); /* incr. vctot */
5875 v3
= vec_ld(432, (float *) stackdata
);
5876 v4
= vec_ld(448, (float *) stackdata
);
5877 v5
= vec_ld(464, (float *) stackdata
);
5878 v6
= vec_ld(480, (float *) stackdata
);
5879 vec_st(v23
,224,(float *)stackdata
); /* store vctot back to stack */
5880 v7
= vec_ld(496, (float *) stackdata
);
5881 v8
= vec_ld(512, (float *) stackdata
);
5882 v9
= vec_ld(528, (float *) stackdata
);
5884 v28
= vec_madd(v10
,v1
,v28
);
5885 v19
= vec_nmsub(v10
,v1
,v0
);
5886 v29
= vec_madd(v10
,v2
,v29
);
5887 v20
= vec_nmsub(v10
,v2
,v0
);
5888 v30
= vec_madd(v10
,v3
,v30
);
5889 v21
= vec_nmsub(v10
,v3
,v0
);
5891 v28
= vec_madd(v11
,v4
,v28
);
5892 v22
= vec_nmsub(v11
,v4
,v0
);
5893 v29
= vec_madd(v11
,v5
,v29
);
5894 v23
= vec_nmsub(v11
,v5
,v0
);
5895 v30
= vec_madd(v11
,v6
,v30
);
5896 v24
= vec_nmsub(v11
,v6
,v0
);
5898 v28
= vec_madd(v12
,v7
,v28
);
5899 v25
= vec_nmsub(v12
,v7
,v0
);
5900 v29
= vec_madd(v12
,v8
,v29
);
5901 v26
= vec_nmsub(v12
,v8
,v0
);
5902 v30
= vec_madd(v12
,v9
,v30
);
5903 v27
= vec_nmsub(v12
,v9
,v0
);
5905 /* store these i forces, and repeat the procedue for the iH1-* force */
5906 vec_st(v28
,256,(float *)stackdata
);
5907 vec_st(v29
,272,(float *)stackdata
);
5908 vec_st(v30
,288,(float *)stackdata
);
5910 v28
= vec_ld(304,(float *) stackdata
);
5911 v29
= vec_ld(320,(float *) stackdata
);
5912 v30
= vec_ld(336,(float *) stackdata
);
5913 /* load new vectorial distances */
5914 v1
= vec_ld(544, (float *) stackdata
);
5915 v2
= vec_ld(560, (float *) stackdata
);
5916 v3
= vec_ld(576, (float *) stackdata
);
5917 v4
= vec_ld(592, (float *) stackdata
);
5918 v5
= vec_ld(608, (float *) stackdata
);
5919 v6
= vec_ld(624, (float *) stackdata
);
5920 v7
= vec_ld(640, (float *) stackdata
);
5921 v8
= vec_ld(656, (float *) stackdata
);
5922 v9
= vec_ld(672, (float *) stackdata
);
5924 v28
= vec_madd(v13
,v1
,v28
);
5925 v19
= vec_nmsub(v13
,v1
,v19
);
5926 v29
= vec_madd(v13
,v2
,v29
);
5927 v20
= vec_nmsub(v13
,v2
,v20
);
5928 v30
= vec_madd(v13
,v3
,v30
);
5929 v21
= vec_nmsub(v13
,v3
,v21
);
5931 v28
= vec_madd(v14
,v4
,v28
);
5932 v22
= vec_nmsub(v14
,v4
,v22
);
5933 v29
= vec_madd(v14
,v5
,v29
);
5934 v23
= vec_nmsub(v14
,v5
,v23
);
5935 v30
= vec_madd(v14
,v6
,v30
);
5936 v24
= vec_nmsub(v14
,v6
,v24
);
5938 v28
= vec_madd(v15
,v7
,v28
);
5939 v25
= vec_nmsub(v15
,v7
,v25
);
5940 v29
= vec_madd(v15
,v8
,v29
);
5941 v26
= vec_nmsub(v15
,v8
,v26
);
5942 v30
= vec_madd(v15
,v9
,v30
);
5943 v27
= vec_nmsub(v15
,v9
,v27
);
5945 /* store these i forces, and repeat the procedue for the iH2-* force */
5946 vec_st(v28
,304,(float *)stackdata
);
5947 vec_st(v29
,320,(float *)stackdata
);
5948 vec_st(v30
,336,(float *)stackdata
);
5949 v28
= vec_ld(352,(float *) stackdata
);
5950 v29
= vec_ld(368,(float *) stackdata
);
5951 v30
= vec_ld(384,(float *) stackdata
);
5952 /* load new vectorial distances */
5953 v1
= vec_ld(688, (float *) stackdata
);
5954 v2
= vec_ld(704, (float *) stackdata
);
5955 v3
= vec_ld(720, (float *) stackdata
);
5956 v4
= vec_ld(736, (float *) stackdata
);
5957 v5
= vec_ld(752, (float *) stackdata
);
5958 v6
= vec_ld(768, (float *) stackdata
);
5959 v7
= vec_ld(784, (float *) stackdata
);
5960 v8
= vec_ld(800, (float *) stackdata
);
5961 v9
= vec_ld(816, (float *) stackdata
);
5963 v28
= vec_madd(v16
,v1
,v28
);
5964 v19
= vec_nmsub(v16
,v1
,v19
);
5965 v29
= vec_madd(v16
,v2
,v29
);
5966 v20
= vec_nmsub(v16
,v2
,v20
);
5967 v30
= vec_madd(v16
,v3
,v30
);
5968 v21
= vec_nmsub(v16
,v3
,v21
);
5970 v28
= vec_madd(v17
,v4
,v28
);
5971 v22
= vec_nmsub(v17
,v4
,v22
);
5972 v29
= vec_madd(v17
,v5
,v29
);
5973 v23
= vec_nmsub(v17
,v5
,v23
);
5974 v30
= vec_madd(v17
,v6
,v30
);
5975 v24
= vec_nmsub(v17
,v6
,v24
);
5977 v28
= vec_madd(v18
,v7
,v28
);
5978 v25
= vec_nmsub(v18
,v7
,v25
);
5979 v29
= vec_madd(v18
,v8
,v29
);
5980 v26
= vec_nmsub(v18
,v8
,v26
);
5981 v30
= vec_madd(v18
,v9
,v30
);
5982 v27
= vec_nmsub(v18
,v9
,v27
);
5984 /* store these i forces */
5985 vec_st(v28
,352,(float *)stackdata
);
5986 vec_st(v29
,368,(float *)stackdata
);
5987 vec_st(v30
,384,(float *)stackdata
);
5989 /* j forces present in v19-v27 */
5991 v1
= vec_mergeh(v19
,v21
); /* Oxa Oza Oxb Ozb */
5992 v19
= vec_mergel(v19
,v21
); /* Oxc Ozc Oxd Ozd */
5993 v21
= vec_mergeh(v20
,v22
); /* Oya H1xa Oyb H1xb */
5994 v20
= vec_mergel(v20
,v22
); /* Oyc H1xc Oyd H1xd */
5995 v22
= vec_mergeh(v23
,v25
); /* H1ya H2xa H1yb H2xb */
5996 v23
= vec_mergel(v23
,v25
); /* H1yc H2xc H1yd H2xd */
5997 v25
= vec_mergeh(v24
,v26
); /* H1za H2ya H1zb H2yb */
5998 v24
= vec_mergel(v24
,v26
); /* H1zc H2yc H1zd H2yd */
6000 v26
= vec_mergeh(v27
,v0
); /* H2za 0 H2zb 0 */
6001 v27
= vec_mergel(v27
,v0
); /* H2zc 0 H2zd 0 */
6003 v2
= vec_mergeh(v1
,v21
); /* Oxa Oya Oza H1xa */
6004 v21
= vec_mergel(v1
,v21
); /* Oxb Oyb Ozb H1xb */
6005 v1
= vec_mergeh(v19
,v20
); /* Oxc Oyc Ozc H1xc */
6006 v19
= vec_mergel(v19
,v20
); /* Oxd Oyd Ozd H1xd */
6007 v20
= vec_mergeh(v22
,v25
); /* H1ya H1za H2xa H2ya */
6008 v22
= vec_mergel(v22
,v25
); /* H1yb H1zb H2xb H2yb */
6009 v25
= vec_mergeh(v23
,v24
); /* H1yc H1zc H2xc H2yc */
6010 v23
= vec_mergel(v23
,v24
); /* H1yd H1zd H2xd H2yd */
6011 v24
= vec_mergeh(v26
,v0
); /* H2za 0 0 0 */
6012 v26
= vec_mergel(v26
,v0
); /* H2zb 0 0 0 */
6013 v3
= vec_mergeh(v27
,v0
); /* H2zc 0 0 0 */
6014 v27
= vec_mergel(v27
,v0
); /* H2zd 0 0 0 */
6016 v29
= (vector
float)vec_splat_s32(-1);
6017 /* move into position, load and add */
6018 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3a
);
6019 v31
= (vector
float)vec_lvsr( 0, (int *) faction
+j3c
);
6020 v4
= vec_ld( 0, faction
+j3a
);
6021 v5
= vec_ld( 0, faction
+j3c
);
6023 v6
= vec_ld( 16, faction
+j3a
);
6024 v7
= vec_ld( 16, faction
+j3c
);
6025 v8
= vec_ld( 32, faction
+j3a
);
6026 v9
= vec_ld( 32, faction
+j3c
);
6027 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
6028 v11
= vec_perm(v0
,v29
,(vector
unsigned char)v31
);
6030 v12
= vec_perm(v0
,v2
,(vector
unsigned char)v30
);
6031 v13
= vec_perm(v0
,v1
,(vector
unsigned char)v31
);
6032 v4
= vec_add(v12
,v4
);
6033 v5
= vec_add(v13
,v5
);
6035 v14
= vec_perm(v2
,v20
,(vector
unsigned char)v30
);
6036 v15
= vec_perm(v1
,v25
,(vector
unsigned char)v31
);
6037 v2
= vec_add(v14
,v6
);
6038 v1
= vec_add(v15
,v7
);
6040 v16
= vec_perm(v20
,v24
,(vector
unsigned char)v30
);
6041 v17
= vec_perm(v25
,v3
,(vector
unsigned char)v31
);
6042 v20
= vec_add(v16
,v8
);
6043 v25
= vec_add(v17
,v9
);
6045 v12
= vec_sel(v4
,v4
,(vector
unsigned int)v10
);
6046 v13
= vec_sel(v5
,v5
,(vector
unsigned int)v11
);
6047 vec_st(v12
, 0, faction
+j3a
);
6048 vec_st(v13
, 0, faction
+j3c
);
6050 v10
= vec_sld(v0
,v10
,12);
6051 v11
= vec_sld(v0
,v11
,12);
6053 vec_st(v2
, 16, faction
+j3a
);
6054 vec_st(v1
, 16, faction
+j3c
);
6056 v12
= vec_sel(v20
,v8
,(vector
unsigned int)v10
);
6057 v13
= vec_sel(v25
,v9
,(vector
unsigned int)v11
);
6059 vec_st(v12
, 32, faction
+j3a
);
6060 vec_st(v13
, 32, faction
+j3c
);
6062 /* Finished 1 & 3 - now do 2 & 4 */
6064 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3b
);
6065 v31
= (vector
float)vec_lvsr( 0, (int *) faction
+j3d
);
6067 v4
= vec_ld( 0, faction
+j3b
);
6068 v5
= vec_ld( 0, faction
+j3d
);
6069 v6
= vec_ld( 16, faction
+j3b
);
6070 v7
= vec_ld( 16, faction
+j3d
);
6071 v8
= vec_ld( 32, faction
+j3b
);
6072 v9
= vec_ld( 32, faction
+j3d
);
6073 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
6074 v11
= vec_perm(v0
,v29
,(vector
unsigned char)v31
);
6076 v12
= vec_perm(v0
,v21
,(vector
unsigned char)v30
);
6077 v13
= vec_perm(v0
,v19
,(vector
unsigned char)v31
);
6078 v24
= vec_add(v12
,v4
);
6079 v25
= vec_add(v13
,v5
);
6081 v12
= vec_perm(v21
,v22
,(vector
unsigned char)v30
);
6082 v13
= vec_perm(v19
,v23
,(vector
unsigned char)v31
);
6083 v21
= vec_add(v12
,v6
);
6084 v19
= vec_add(v13
,v7
);
6086 v12
= vec_perm(v22
,v26
,(vector
unsigned char)v30
);
6087 v13
= vec_perm(v23
,v27
,(vector
unsigned char)v31
);
6088 v22
= vec_add(v12
,v8
);
6089 v23
= vec_add(v13
,v9
);
6091 v12
= vec_sel(v4
,v24
,(vector
unsigned int)v10
);
6092 v13
= vec_sel(v5
,v25
,(vector
unsigned int)v11
);
6093 vec_st(v12
, 0, faction
+j3b
);
6094 vec_st(v13
, 0, faction
+j3d
);
6095 v10
= vec_sld(v0
,v10
,12);
6096 v11
= vec_sld(v0
,v11
,12);
6098 vec_st(v21
, 16, faction
+j3b
);
6099 vec_st(v19
, 16, faction
+j3d
);
6101 v12
= vec_sel(v22
,v8
,(vector
unsigned int)v10
);
6102 v13
= vec_sel(v23
,v9
,(vector
unsigned int)v11
);
6103 vec_st(v12
, 32, faction
+j3b
);
6104 vec_st(v13
, 32, faction
+j3d
);
6114 v1
= (vector
float)vec_lvsl(0, pos
+j3a
);
6115 v8
= (vector
float)vec_lvsl(0, pos
+j3b
);
6116 v15
= (vector
float)vec_lvsl(0, pos
+j3c
);
6118 v2
= vec_ld(0, pos
+j3a
);
6119 v9
= vec_ld(0, pos
+j3b
);
6120 v16
= vec_ld(0, pos
+j3c
);
6121 v3
= vec_ld(16, pos
+j3a
);
6122 v10
= vec_ld(16, pos
+j3b
);
6123 v17
= vec_ld(16, pos
+j3c
);
6124 v4
= vec_ld(32, pos
+j3a
);
6125 v11
= vec_ld(32, pos
+j3b
);
6126 v18
= vec_ld(32, pos
+j3c
);
6127 v5
= vec_perm(v2
,v3
,(vector
unsigned char)v1
); /* Oxa Oya Oza H1xa */
6128 v12
= vec_perm(v9
,v10
,(vector
unsigned char)v8
); /* Oxb Oyb Ozb H1xb */
6129 v19
= vec_perm(v16
,v17
,(vector
unsigned char)v15
); /* Oxc Oyc Ozc H1xc */
6131 v6
= vec_perm(v3
,v4
,(vector
unsigned char)v1
); /* H1ya H1za H2xa H2ya */
6132 v13
= vec_perm(v10
,v11
,(vector
unsigned char)v8
); /* H1yb H1zb H2xb H2yb */
6133 v20
= vec_perm(v17
,v18
,(vector
unsigned char)v15
); /* H1yc H1zc H2xc H2yc */
6135 v7
= vec_perm(v4
,v4
,(vector
unsigned char)v1
); /* H2za - - - */
6136 v14
= vec_perm(v11
,v11
,(vector
unsigned char)v8
); /* H2zb - - - */
6137 v21
= vec_perm(v18
,v18
,(vector
unsigned char)v15
); /* H2zc - - - */
6139 /* permute water coordinates */
6140 v3
= vec_mergeh(v5
,v19
); /* Oxa Oxc Oya Oyc */
6141 v5
= vec_mergel(v5
,v19
); /* Oza Ozc H1xa H1xc */
6142 v19
= vec_mergeh(v12
,v0
); /* Oxb - Oyb - */
6143 v12
= vec_mergel(v12
,v0
); /* Ozb - H1xb - */
6145 v26
= vec_mergeh(v6
,v20
); /* H1ya H1yc H1za H1zc */
6146 v16
= vec_mergel(v6
,v20
); /* H2xa H2xc H2ya H2yc */
6147 v20
= vec_mergeh(v13
,v0
); /* H1yb - H1zb - */
6148 v13
= vec_mergel(v13
,v0
); /* H2xb - H2yb - */
6150 v15
= vec_mergeh(v7
,v21
); /* H2za H2zc - - */
6152 v1
= vec_mergeh(v3
,v19
); /* Oxa Oxb Oxc - */
6153 v29
= vec_ld(128, (float *) stackdata
); /* load i H1x */
6154 v2
= vec_mergel(v3
,v19
); /* Oya Oyb Oyc - */
6155 v30
= vec_ld(144, (float *) stackdata
); /* load i H1y */
6156 v3
= vec_mergeh(v5
,v12
); /* Oza Ozb Ozc - */
6157 v31
= vec_ld(160, (float *) stackdata
); /* load i H1z */
6158 v4
= vec_mergel(v5
,v12
); /* H1xa H1xb H1xc - */
6159 v5
= vec_mergeh(v26
,v20
); /* H1ya H1yb H1yc - */
6160 v6
= vec_mergel(v26
,v20
); /* H1za H1zb H1zc - */
6161 v7
= vec_mergeh(v16
,v13
); /* H2xa H2xb H2xc - */
6162 v8
= vec_mergel(v16
,v13
); /* H2ya H2yb H2yc - */
6163 v9
= vec_mergeh(v15
,v14
); /* H2za H2zb H2zc - */
6165 v10
= vec_sub(v29
,v1
); /* iH1x - jOx */
6166 v13
= vec_sub(v29
,v4
); /* iH1x - jH1x */
6167 v16
= vec_sub(v29
,v7
); /* iH1x - jH2x */
6168 v29
= vec_ld(176, (float *) stackdata
); /* load i H2x */
6169 v11
= vec_sub(v30
,v2
); /* iH1y - jOy */
6170 v14
= vec_sub(v30
,v5
); /* iH1y - jH1y */
6171 v17
= vec_sub(v30
,v8
); /* iH1y - jH2y */
6172 v30
= vec_ld(192, (float *) stackdata
); /* load i H2y */
6173 vec_st(v10
, 544, (float *)stackdata
); /* dx21 */
6174 vec_st(v13
, 592, (float *)stackdata
); /* dx22 */
6175 vec_st(v16
, 640, (float *)stackdata
); /* dx23 */
6176 v12
= vec_sub(v31
,v3
); /* iH1z - jOz */
6177 v15
= vec_sub(v31
,v6
); /* iH1z - jH1z */
6178 v18
= vec_sub(v31
,v9
); /* iH1z - jH2z */
6179 v31
= vec_ld(208, (float *) stackdata
); /* load i H2z */
6180 /* v10-v18 now contains iH1-jO, iH1-jH1 and iJ1-jH2 distances */
6181 vec_st(v11
, 560, (float *)stackdata
); /* dy21 */
6182 vec_st(v14
, 608, (float *)stackdata
); /* dy22 */
6183 vec_st(v17
, 656, (float *)stackdata
); /* dy23 */
6184 v19
= vec_sub(v29
,v1
); /* iH2x - jOx */
6185 v22
= vec_sub(v29
,v4
); /* iH2x - jH1x */
6186 v25
= vec_sub(v29
,v7
); /* iH2x - jH2x */
6187 vec_st(v12
, 576, (float *)stackdata
); /* dz21 */
6188 vec_st(v15
, 624, (float *)stackdata
); /* dz22 */
6189 vec_st(v18
, 672, (float *)stackdata
); /* dz23 */
6190 v29
= vec_ld(80, (float *) stackdata
); /* load i Ox */
6191 v20
= vec_sub(v30
,v2
); /* iH2y - jOy */
6192 v23
= vec_sub(v30
,v5
); /* iH2y - jH1y */
6193 v26
= vec_sub(v30
,v8
); /* iH2y - jH2y */
6194 vec_st(v19
, 688, (float *)stackdata
); /* dx31 */
6195 vec_st(v22
, 736, (float *)stackdata
); /* dx32 */
6196 vec_st(v25
, 784, (float *)stackdata
); /* dx33 */
6197 v30
= vec_ld(96, (float *) stackdata
); /* load i Oy */
6198 v21
= vec_sub(v31
,v3
); /* iH2z - jOz */
6199 v24
= vec_sub(v31
,v6
); /* iH2z - jH1z */
6200 v27
= vec_sub(v31
,v9
); /* iH2z - jH2z */
6201 v31
= vec_ld(112, (float *) stackdata
); /* load i Oz */
6202 vec_st(v20
, 704, (float *)stackdata
); /* dy31 */
6203 vec_st(v23
, 752, (float *)stackdata
); /* dy32 */
6204 vec_st(v26
, 800, (float *)stackdata
); /* dy33 */
6206 v1
= vec_sub(v29
,v1
); /* iOx - jOx */
6207 v4
= vec_sub(v29
,v4
); /* iOx - jH1x */
6208 v7
= vec_sub(v29
,v7
); /* iOx - jH2x */
6209 vec_st(v21
, 720, (float *)stackdata
); /* dz31 */
6210 vec_st(v24
, 768, (float *)stackdata
); /* dz32 */
6211 vec_st(v27
, 816, (float *)stackdata
); /* dz33 */
6212 v2
= vec_sub(v30
,v2
); /* iOy - jOy */
6213 v5
= vec_sub(v30
,v5
); /* iOy - jH1y */
6214 v8
= vec_sub(v30
,v8
); /* iOy - jH2y */
6215 vec_st(v1
, 400, (float *)stackdata
); /* dx11 */
6216 vec_st(v4
, 448, (float *)stackdata
); /* dx12 */
6217 vec_st(v7
, 496, (float *)stackdata
); /* dx13 */
6218 v3
= vec_sub(v31
,v3
); /* iOz - jOz */
6219 v6
= vec_sub(v31
,v6
); /* iOz - jH1z */
6220 v9
= vec_sub(v31
,v9
); /* iOz - jH2z */
6221 vec_st(v2
, 416, (float *)stackdata
); /* dy11 */
6222 vec_st(v5
, 464, (float *)stackdata
); /* dy12 */
6223 vec_st(v8
, 512, (float *)stackdata
); /* dy13 */
6225 v1
= vec_madd(v1
,v1
,v0
);
6226 v4
= vec_madd(v4
,v4
,v0
);
6227 v7
= vec_madd(v7
,v7
,v0
);
6228 vec_st(v3
, 432, (float *)stackdata
); /* dz11 */
6229 vec_st(v6
, 480, (float *)stackdata
); /* dz12 */
6230 vec_st(v9
, 528, (float *)stackdata
); /* dz13 */
6231 v10
= vec_madd(v10
,v10
,v0
);
6232 v13
= vec_madd(v13
,v13
,v0
);
6233 v16
= vec_madd(v16
,v16
,v0
);
6234 v19
= vec_madd(v19
,v19
,v0
);
6235 v22
= vec_madd(v22
,v22
,v0
);
6236 v25
= vec_madd(v25
,v25
,v0
);
6237 v1
= vec_madd(v2
,v2
,v1
);
6238 v4
= vec_madd(v5
,v5
,v4
);
6239 v7
= vec_madd(v8
,v8
,v7
);
6240 v10
= vec_madd(v11
,v11
,v10
);
6241 v13
= vec_madd(v14
,v14
,v13
);
6242 v16
= vec_madd(v17
,v17
,v16
);
6243 v19
= vec_madd(v20
,v20
,v19
);
6244 v22
= vec_madd(v23
,v23
,v22
);
6245 v25
= vec_madd(v26
,v26
,v25
);
6246 v1
= vec_madd(v3
,v3
,v1
);
6247 v2
= vec_madd(v6
,v6
,v4
);
6248 v3
= vec_madd(v9
,v9
,v7
);
6249 v4
= vec_madd(v12
,v12
,v10
);
6250 v5
= vec_madd(v15
,v15
,v13
);
6251 v6
= vec_madd(v18
,v18
,v16
);
6252 v7
= vec_madd(v21
,v21
,v19
);
6253 v8
= vec_madd(v24
,v24
,v22
);
6254 v9
= vec_madd(v27
,v27
,v25
);
6267 v10
= vec_rsqrte(v1
);
6268 v11
= vec_rsqrte(v2
);
6269 v12
= vec_rsqrte(v3
);
6270 v13
= vec_rsqrte(v4
);
6271 v14
= vec_rsqrte(v5
);
6272 v15
= vec_rsqrte(v6
);
6273 v16
= vec_rsqrte(v7
);
6274 v17
= vec_rsqrte(v8
);
6275 v18
= vec_rsqrte(v9
);
6277 /* create constant 0.5 */
6278 v30
= (vector
float) vec_splat_u32(1);
6279 v31
= vec_ctf((vector
unsigned int)v30
,1); /* 0.5 */
6280 v30
= vec_ctf((vector
unsigned int)v30
,0); /* 1.0 */
6282 v19
= vec_madd(v10
,v10
,v0
); /* lu*lu */
6283 v20
= vec_madd(v11
,v11
,v0
);
6284 v21
= vec_madd(v12
,v12
,v0
);
6285 v22
= vec_madd(v13
,v13
,v0
);
6286 v23
= vec_madd(v14
,v14
,v0
);
6287 v24
= vec_madd(v15
,v15
,v0
);
6288 v25
= vec_madd(v16
,v16
,v0
);
6289 v26
= vec_madd(v17
,v17
,v0
);
6290 v27
= vec_madd(v18
,v18
,v0
);
6292 v19
= vec_nmsub(v1
,v19
,v30
); /* 1.0 - rsq*lu*lu */
6293 v20
= vec_nmsub(v2
,v20
,v30
);
6294 v21
= vec_nmsub(v3
,v21
,v30
);
6295 v22
= vec_nmsub(v4
,v22
,v30
);
6296 v23
= vec_nmsub(v5
,v23
,v30
);
6297 v24
= vec_nmsub(v6
,v24
,v30
);
6298 v25
= vec_nmsub(v7
,v25
,v30
);
6299 v26
= vec_nmsub(v8
,v26
,v30
);
6300 v27
= vec_nmsub(v9
,v27
,v30
);
6302 v1
= vec_madd(v10
,v31
,v0
);/* lu*0.5*/
6303 v2
= vec_madd(v11
,v31
,v0
);
6304 v3
= vec_madd(v12
,v31
,v0
);
6305 v4
= vec_madd(v13
,v31
,v0
);
6306 v5
= vec_madd(v14
,v31
,v0
);
6307 v6
= vec_madd(v15
,v31
,v0
);
6308 v7
= vec_madd(v16
,v31
,v0
);
6309 v8
= vec_madd(v17
,v31
,v0
);
6310 v9
= vec_madd(v18
,v31
,v0
);
6312 /* The rinv values */
6313 v1
= vec_madd(v1
,v19
,v10
);
6314 v2
= vec_madd(v2
,v20
,v11
);
6315 v3
= vec_madd(v3
,v21
,v12
);
6316 v4
= vec_madd(v4
,v22
,v13
);
6317 v5
= vec_madd(v5
,v23
,v14
);
6318 v6
= vec_madd(v6
,v24
,v15
);
6319 v7
= vec_madd(v7
,v25
,v16
);
6320 v8
= vec_madd(v8
,v26
,v17
);
6321 v9
= vec_madd(v9
,v27
,v18
);
6323 v10
= (vector
float)vec_splat_s32(-1);
6324 v10
= vec_sld(v0
,v10
,4);
6326 v1
= (vector
float)vec_sel((vector
unsigned int)v1
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6327 v2
= (vector
float)vec_sel((vector
unsigned int)v2
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6328 v3
= (vector
float)vec_sel((vector
unsigned int)v3
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6329 v4
= (vector
float)vec_sel((vector
unsigned int)v4
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6330 v5
= (vector
float)vec_sel((vector
unsigned int)v5
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6331 v6
= (vector
float)vec_sel((vector
unsigned int)v6
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6332 v7
= (vector
float)vec_sel((vector
unsigned int)v7
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6333 v8
= (vector
float)vec_sel((vector
unsigned int)v8
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6334 v9
= (vector
float)vec_sel((vector
unsigned int)v9
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6336 /* load qqOO, qqOH and qqHH to v27,v28,v29 */
6337 v27
= vec_ld(0, (float *) stackdata
);
6338 v28
= vec_ld(16, (float *) stackdata
);
6339 v29
= vec_ld(32, (float *) stackdata
);
6342 vec_dstst( faction
+j3a
, 0x10010100, 2 );
6344 v27
= vec_sld(v27
,v0
,4);
6345 v28
= vec_sld(v28
,v0
,4);
6346 v29
= vec_sld(v29
,v0
,4);
6348 /* put rinvsq in v10-v18, rinv6_OO in v30 and rinv12_OO in v31 */
6349 /* load c6 to v25 and c12 to v26 */
6350 v25
= vec_ld(48, (float *) stackdata
);
6351 v26
= vec_ld(64, (float *) stackdata
);
6353 v10
= vec_madd(v1
,v1
,v0
);
6354 v1
= vec_madd(v1
,v27
,v0
); /* rinv11*qqOO */
6355 v11
= vec_madd(v2
,v2
,v0
);
6356 /* load vctot to v23 and vnbtot to v24 */
6357 v23
= vec_ld(224,(float *) stackdata
);
6358 v24
= vec_ld(240,(float *) stackdata
);
6360 v25
= vec_sld(v25
,v0
,4);
6361 v26
= vec_sld(v26
,v0
,4);
6363 v2
= vec_madd(v2
,v28
,v0
); /* rinv12*qqOH */
6364 v12
= vec_madd(v3
,v3
,v0
);
6365 v30
= vec_madd(v10
,v10
,v0
); /* rinv4 */
6366 v3
= vec_madd(v3
,v28
,v0
); /* rinv13*qqOH */
6367 v13
= vec_madd(v4
,v4
,v0
);
6368 v4
= vec_madd(v4
,v28
,v0
); /* rinv21*qqOH */
6369 v14
= vec_madd(v5
,v5
,v0
);
6371 v23
= vec_add(v23
,v1
);
6373 v30
= vec_madd(v30
,v10
,v0
); /* rinv6 */
6374 v5
= vec_madd(v5
,v29
,v0
); /* rinv22*qqHH */
6375 v15
= vec_madd(v6
,v6
,v0
);
6376 v6
= vec_madd(v6
,v29
,v0
); /* rinv23*qqHH */
6377 v23
= vec_add(v23
,v2
);
6378 v16
= vec_madd(v7
,v7
,v0
);
6379 v31
= vec_madd(v30
,v30
,v0
); /* rinv12 */
6380 v25
= vec_madd(v25
,v30
,v0
); /* c6*rinv6 */
6381 /* load 6.0 to v30 */
6382 v30
= (vector
float)vec_splat_u32(6);
6383 v30
= vec_ctf((vector
unsigned int)v30
,0);
6384 v23
= vec_add(v23
,v3
);
6386 v7
= vec_madd(v7
,v28
,v0
); /* rinv31*qqOH */
6387 v17
= vec_madd(v8
,v8
,v0
);
6388 v8
= vec_madd(v8
,v29
,v0
); /* rinv32*qqHH */
6389 v26
= vec_madd(v26
,v31
,v0
); /* c12*rinv12 */
6390 v23
= vec_add(v23
,v4
);
6391 /* load 12.0 to v31 */
6392 v31
= (vector
float)vec_splat_u32(12);
6393 v31
= vec_ctf((vector
unsigned int)v31
,0);
6396 v24
= vec_sub(v24
,v25
); /* add vnb6 to vnbtot */
6397 v18
= vec_madd(v9
,v9
,v0
);
6398 v23
= vec_add(v23
,v5
);
6399 v9
= vec_madd(v9
,v29
,v0
); /* rinv33*qqHH */
6400 v24
= vec_add(v24
,v26
);/* add vnb12 to vnbtot */
6402 v31
= vec_madd(v31
,v26
,v0
);
6403 v11
= vec_madd(v11
,v2
,v0
); /* fs12 */
6404 v23
= vec_add(v23
,v6
);
6405 v12
= vec_madd(v12
,v3
,v0
); /* fs13 */
6406 v13
= vec_madd(v13
,v4
,v0
); /* fs21 */
6407 v31
= vec_nmsub(v30
,v25
,v31
);
6409 v14
= vec_madd(v14
,v5
,v0
); /* fs22 */
6410 v23
= vec_add(v23
,v7
);
6411 v15
= vec_madd(v15
,v6
,v0
); /* fs23 */
6412 v16
= vec_madd(v16
,v7
,v0
); /* fs31 */
6413 v1
= vec_add(v31
,v1
);
6414 v17
= vec_madd(v17
,v8
,v0
); /* fs32 */
6415 v23
= vec_add(v23
,v8
);
6416 v18
= vec_madd(v18
,v9
,v0
); /* fs33 */
6417 v10
= vec_madd(v10
,v1
,v0
);
6419 vec_st(v24
,240,(float *)stackdata
); /* store vnbtot */
6420 /* calculate vectorial forces and accumulate fj. v10-v18 has fs11-fs33 now. */
6421 /* First load iO-* dx,dy,dz vectors to v1-v9 */
6422 /* and load iO forces to v28,v29,v30 */
6423 /* use v19-v27 to accumulate j water forces */
6424 v28
= vec_ld(256, (float *) stackdata
);
6425 v29
= vec_ld(272, (float *) stackdata
);
6426 v30
= vec_ld(288, (float *) stackdata
);
6428 v1
= vec_ld(400, (float *) stackdata
);
6429 v2
= vec_ld(416, (float *) stackdata
);
6430 v23
= vec_add(v23
,v9
); /* incr. vctot */
6431 v3
= vec_ld(432, (float *) stackdata
);
6432 v4
= vec_ld(448, (float *) stackdata
);
6433 v5
= vec_ld(464, (float *) stackdata
);
6434 v6
= vec_ld(480, (float *) stackdata
);
6435 vec_st(v23
,224,(float *)stackdata
); /* store vctot back to stack */
6436 v7
= vec_ld(496, (float *) stackdata
);
6437 v8
= vec_ld(512, (float *) stackdata
);
6438 v9
= vec_ld(528, (float *) stackdata
);
6440 v28
= vec_madd(v10
,v1
,v28
);
6441 v19
= vec_nmsub(v10
,v1
,v0
);
6442 v29
= vec_madd(v10
,v2
,v29
);
6443 v20
= vec_nmsub(v10
,v2
,v0
);
6444 v30
= vec_madd(v10
,v3
,v30
);
6445 v21
= vec_nmsub(v10
,v3
,v0
);
6447 v28
= vec_madd(v11
,v4
,v28
);
6448 v22
= vec_nmsub(v11
,v4
,v0
);
6449 v29
= vec_madd(v11
,v5
,v29
);
6450 v23
= vec_nmsub(v11
,v5
,v0
);
6451 v30
= vec_madd(v11
,v6
,v30
);
6452 v24
= vec_nmsub(v11
,v6
,v0
);
6454 v28
= vec_madd(v12
,v7
,v28
);
6455 v25
= vec_nmsub(v12
,v7
,v0
);
6456 v29
= vec_madd(v12
,v8
,v29
);
6457 v26
= vec_nmsub(v12
,v8
,v0
);
6458 v30
= vec_madd(v12
,v9
,v30
);
6459 v27
= vec_nmsub(v12
,v9
,v0
);
6461 /* store these i forces, and repeat the procedue for the iH1-* force */
6462 vec_st(v28
,256,(float *)stackdata
);
6463 vec_st(v29
,272,(float *)stackdata
);
6464 vec_st(v30
,288,(float *)stackdata
);
6466 v28
= vec_ld(304,(float *) stackdata
);
6467 v29
= vec_ld(320,(float *) stackdata
);
6468 v30
= vec_ld(336,(float *) stackdata
);
6469 /* load new vectorial distances */
6470 v1
= vec_ld(544, (float *) stackdata
);
6471 v2
= vec_ld(560, (float *) stackdata
);
6472 v3
= vec_ld(576, (float *) stackdata
);
6473 v4
= vec_ld(592, (float *) stackdata
);
6474 v5
= vec_ld(608, (float *) stackdata
);
6475 v6
= vec_ld(624, (float *) stackdata
);
6476 v7
= vec_ld(640, (float *) stackdata
);
6477 v8
= vec_ld(656, (float *) stackdata
);
6478 v9
= vec_ld(672, (float *) stackdata
);
6480 v28
= vec_madd(v13
,v1
,v28
);
6481 v19
= vec_nmsub(v13
,v1
,v19
);
6482 v29
= vec_madd(v13
,v2
,v29
);
6483 v20
= vec_nmsub(v13
,v2
,v20
);
6484 v30
= vec_madd(v13
,v3
,v30
);
6485 v21
= vec_nmsub(v13
,v3
,v21
);
6487 v28
= vec_madd(v14
,v4
,v28
);
6488 v22
= vec_nmsub(v14
,v4
,v22
);
6489 v29
= vec_madd(v14
,v5
,v29
);
6490 v23
= vec_nmsub(v14
,v5
,v23
);
6491 v30
= vec_madd(v14
,v6
,v30
);
6492 v24
= vec_nmsub(v14
,v6
,v24
);
6494 v28
= vec_madd(v15
,v7
,v28
);
6495 v25
= vec_nmsub(v15
,v7
,v25
);
6496 v29
= vec_madd(v15
,v8
,v29
);
6497 v26
= vec_nmsub(v15
,v8
,v26
);
6498 v30
= vec_madd(v15
,v9
,v30
);
6499 v27
= vec_nmsub(v15
,v9
,v27
);
6501 /* store these i forces, and repeat the procedue for the iH2-* force */
6502 vec_st(v28
,304,(float *)stackdata
);
6503 vec_st(v29
,320,(float *)stackdata
);
6504 vec_st(v30
,336,(float *)stackdata
);
6505 v28
= vec_ld(352,(float *) stackdata
);
6506 v29
= vec_ld(368,(float *) stackdata
);
6507 v30
= vec_ld(384,(float *) stackdata
);
6508 /* load new vectorial distances */
6509 v1
= vec_ld(688, (float *) stackdata
);
6510 v2
= vec_ld(704, (float *) stackdata
);
6511 v3
= vec_ld(720, (float *) stackdata
);
6512 v4
= vec_ld(736, (float *) stackdata
);
6513 v5
= vec_ld(752, (float *) stackdata
);
6514 v6
= vec_ld(768, (float *) stackdata
);
6515 v7
= vec_ld(784, (float *) stackdata
);
6516 v8
= vec_ld(800, (float *) stackdata
);
6517 v9
= vec_ld(816, (float *) stackdata
);
6519 v28
= vec_madd(v16
,v1
,v28
);
6520 v19
= vec_nmsub(v16
,v1
,v19
);
6521 v29
= vec_madd(v16
,v2
,v29
);
6522 v20
= vec_nmsub(v16
,v2
,v20
);
6523 v30
= vec_madd(v16
,v3
,v30
);
6524 v21
= vec_nmsub(v16
,v3
,v21
);
6526 v28
= vec_madd(v17
,v4
,v28
);
6527 v22
= vec_nmsub(v17
,v4
,v22
);
6528 v29
= vec_madd(v17
,v5
,v29
);
6529 v23
= vec_nmsub(v17
,v5
,v23
);
6530 v30
= vec_madd(v17
,v6
,v30
);
6531 v24
= vec_nmsub(v17
,v6
,v24
);
6533 v28
= vec_madd(v18
,v7
,v28
);
6534 v25
= vec_nmsub(v18
,v7
,v25
);
6535 v29
= vec_madd(v18
,v8
,v29
);
6536 v26
= vec_nmsub(v18
,v8
,v26
);
6537 v30
= vec_madd(v18
,v9
,v30
);
6538 v27
= vec_nmsub(v18
,v9
,v27
);
6540 /* store these i forces */
6541 vec_st(v28
,352,(float *)stackdata
);
6542 vec_st(v29
,368,(float *)stackdata
);
6543 vec_st(v30
,384,(float *)stackdata
);
6545 /* j forces present in v19-v27 */
6547 v1
= vec_mergeh(v19
,v21
); /* Oxa Oza Oxb Ozb */
6548 v19
= vec_mergel(v19
,v21
); /* Oxc Ozc - - */
6549 v21
= vec_mergeh(v20
,v22
); /* Oya H1xa Oyb H1xb */
6550 v20
= vec_mergel(v20
,v22
); /* Oyc H1xc - - */
6551 v22
= vec_mergeh(v23
,v25
); /* H1ya H2xa H1yb H2xb */
6552 v23
= vec_mergel(v23
,v25
); /* H1yc H2xc - - */
6553 v25
= vec_mergeh(v24
,v26
); /* H1za H2ya H1zb H2yb */
6554 v24
= vec_mergel(v24
,v26
); /* H1zc H2yc - - */
6556 v26
= vec_mergeh(v27
,v0
); /* H2za 0 H2zb 0 */
6557 v27
= vec_mergel(v27
,v0
); /* H2zc 0 - 0 */
6559 v2
= vec_mergeh(v1
,v21
); /* Oxa Oya Oza H1xa */
6560 v21
= vec_mergel(v1
,v21
); /* Oxb Oyb Ozb H1xb */
6561 v1
= vec_mergeh(v19
,v20
); /* Oxc Oyc Ozc H1xc */
6562 v20
= vec_mergeh(v22
,v25
); /* H1ya H1za H2xa H2ya */
6563 v22
= vec_mergel(v22
,v25
); /* H1yb H1zb H2xb H2yb */
6564 v25
= vec_mergeh(v23
,v24
); /* H1yc H1zc H2xc H2yc */
6565 v24
= vec_mergeh(v26
,v0
); /* H2za 0 0 0 */
6566 v26
= vec_mergel(v26
,v0
); /* H2zb 0 0 0 */
6567 v3
= vec_mergeh(v27
,v0
); /* H2zc 0 0 0 */
6569 v29
= (vector
float)vec_splat_s32(-1);
6570 /* move into position, load and add */
6571 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3a
);
6572 v31
= (vector
float)vec_lvsr( 0, (int *) faction
+j3c
);
6573 v4
= vec_ld( 0, faction
+j3a
);
6574 v5
= vec_ld( 0, faction
+j3c
);
6576 v6
= vec_ld( 16, faction
+j3a
);
6577 v7
= vec_ld( 16, faction
+j3c
);
6578 v8
= vec_ld( 32, faction
+j3a
);
6579 v9
= vec_ld( 32, faction
+j3c
);
6580 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
6581 v11
= vec_perm(v0
,v29
,(vector
unsigned char)v31
);
6583 v12
= vec_perm(v0
,v2
,(vector
unsigned char)v30
);
6584 v13
= vec_perm(v0
,v1
,(vector
unsigned char)v31
);
6585 v4
= vec_add(v12
,v4
);
6586 v5
= vec_add(v13
,v5
);
6588 v14
= vec_perm(v2
,v20
,(vector
unsigned char)v30
);
6589 v15
= vec_perm(v1
,v25
,(vector
unsigned char)v31
);
6590 v2
= vec_add(v14
,v6
);
6591 v1
= vec_add(v15
,v7
);
6593 v16
= vec_perm(v20
,v24
,(vector
unsigned char)v30
);
6594 v17
= vec_perm(v25
,v3
,(vector
unsigned char)v31
);
6595 v20
= vec_add(v16
,v8
);
6596 v25
= vec_add(v17
,v9
);
6598 v12
= vec_sel(v4
,v4
,(vector
unsigned int)v10
);
6599 v13
= vec_sel(v5
,v5
,(vector
unsigned int)v11
);
6600 vec_st(v12
, 0, faction
+j3a
);
6601 vec_st(v13
, 0, faction
+j3c
);
6603 v10
= vec_sld(v0
,v10
,12);
6604 v11
= vec_sld(v0
,v11
,12);
6606 vec_st(v2
, 16, faction
+j3a
);
6607 vec_st(v1
, 16, faction
+j3c
);
6609 v12
= vec_sel(v20
,v8
,(vector
unsigned int)v10
);
6610 v13
= vec_sel(v25
,v9
,(vector
unsigned int)v11
);
6612 vec_st(v12
, 32, faction
+j3a
);
6613 vec_st(v13
, 32, faction
+j3c
);
6615 /* Finished 1 & 3 - now do 2 */
6617 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3b
);
6619 v4
= vec_ld( 0, faction
+j3b
);
6620 v6
= vec_ld( 16, faction
+j3b
);
6621 v8
= vec_ld( 32, faction
+j3b
);
6622 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
6624 v12
= vec_perm(v0
,v21
,(vector
unsigned char)v30
);
6625 v24
= vec_add(v12
,v4
);
6627 v12
= vec_perm(v21
,v22
,(vector
unsigned char)v30
);
6628 v21
= vec_add(v12
,v6
);
6630 v12
= vec_perm(v22
,v26
,(vector
unsigned char)v30
);
6631 v22
= vec_add(v12
,v8
);
6633 v12
= vec_sel(v4
,v24
,(vector
unsigned int)v10
);
6634 vec_st(v12
, 0, faction
+j3b
);
6635 v10
= vec_sld(v0
,v10
,12);
6637 vec_st(v21
, 16, faction
+j3b
);
6639 v12
= vec_sel(v22
,v8
,(vector
unsigned int)v10
);
6640 vec_st(v12
, 32, faction
+j3b
);
6642 } else if(k
<(nj1
-1)) {
6648 v1
= (vector
float)vec_lvsl(0, pos
+j3a
);
6649 v8
= (vector
float)vec_lvsl(0, pos
+j3b
);
6651 v2
= vec_ld(0, pos
+j3a
);
6652 v9
= vec_ld(0, pos
+j3b
);
6653 v3
= vec_ld(16, pos
+j3a
);
6654 v10
= vec_ld(16, pos
+j3b
);
6655 v4
= vec_ld(32, pos
+j3a
);
6656 v11
= vec_ld(32, pos
+j3b
);
6657 v5
= vec_perm(v2
,v3
,(vector
unsigned char)v1
); /* Oxa Oya Oza H1xa */
6658 v12
= vec_perm(v9
,v10
,(vector
unsigned char)v8
); /* Oxb Oyb Ozb H1xb */
6660 v6
= vec_perm(v3
,v4
,(vector
unsigned char)v1
); /* H1ya H1za H2xa H2ya */
6661 v13
= vec_perm(v10
,v11
,(vector
unsigned char)v8
); /* H1yb H1zb H2xb H2yb */
6663 v7
= vec_perm(v4
,v4
,(vector
unsigned char)v1
); /* H2za - - - */
6664 v14
= vec_perm(v11
,v11
,(vector
unsigned char)v8
); /* H2zb - - - */
6666 /* permute water coordinates */
6667 v1
= vec_mergeh(v5
,v12
); /* Oxa Oxb Oya Oyb */
6668 v3
= vec_mergel(v5
,v12
); /* Oza Ozb H1xa H1xb */
6669 v5
= vec_mergeh(v6
,v13
); /* H1ya H1yb H1za H1zb */
6670 v9
= vec_mergeh(v7
,v14
); /* H2za H2zb - - */
6671 v7
= vec_mergel(v6
,v13
); /* H2xa H2xb H2ya H2yb */
6673 v29
= vec_ld(128, (float *) stackdata
); /* load i H1x */
6674 v2
= vec_sld(v1
,v1
,8); /* Oya Oyb - - */
6675 v30
= vec_ld(144, (float *) stackdata
); /* load i H1y */
6676 v4
= vec_sld(v3
,v3
,8); /* H1xa H1xb - - */
6677 v31
= vec_ld(160, (float *) stackdata
); /* load i H1z */
6678 v6
= vec_sld(v5
,v5
,8); /* H1za H1zb - - */
6679 v8
= vec_sld(v7
,v7
,8); /* H2ya H2yb - - */
6682 v10
= vec_sub(v29
,v1
); /* iH1x - jOx */
6683 v13
= vec_sub(v29
,v4
); /* iH1x - jH1x */
6684 v16
= vec_sub(v29
,v7
); /* iH1x - jH2x */
6685 v29
= vec_ld(176, (float *) stackdata
); /* load i H2x */
6686 v11
= vec_sub(v30
,v2
); /* iH1y - jOy */
6687 v14
= vec_sub(v30
,v5
); /* iH1y - jH1y */
6688 v17
= vec_sub(v30
,v8
); /* iH1y - jH2y */
6689 v30
= vec_ld(192, (float *) stackdata
); /* load i H2y */
6690 vec_st(v10
, 544, (float *)stackdata
); /* dx21 */
6691 vec_st(v13
, 592, (float *)stackdata
); /* dx22 */
6692 vec_st(v16
, 640, (float *)stackdata
); /* dx23 */
6693 v12
= vec_sub(v31
,v3
); /* iH1z - jOz */
6694 v15
= vec_sub(v31
,v6
); /* iH1z - jH1z */
6695 v18
= vec_sub(v31
,v9
); /* iH1z - jH2z */
6696 v31
= vec_ld(208, (float *) stackdata
); /* load i H2z */
6697 /* v10-v18 now contains iH1-jO, iH1-jH1 and iJ1-jH2 distances */
6698 vec_st(v11
, 560, (float *)stackdata
); /* dy21 */
6699 vec_st(v14
, 608, (float *)stackdata
); /* dy22 */
6700 vec_st(v17
, 656, (float *)stackdata
); /* dy23 */
6701 v19
= vec_sub(v29
,v1
); /* iH2x - jOx */
6702 v22
= vec_sub(v29
,v4
); /* iH2x - jH1x */
6703 v25
= vec_sub(v29
,v7
); /* iH2x - jH2x */
6704 vec_st(v12
, 576, (float *)stackdata
); /* dz21 */
6705 vec_st(v15
, 624, (float *)stackdata
); /* dz22 */
6706 vec_st(v18
, 672, (float *)stackdata
); /* dz23 */
6707 v29
= vec_ld(80, (float *) stackdata
); /* load i Ox */
6708 v20
= vec_sub(v30
,v2
); /* iH2y - jOy */
6709 v23
= vec_sub(v30
,v5
); /* iH2y - jH1y */
6710 v26
= vec_sub(v30
,v8
); /* iH2y - jH2y */
6711 vec_st(v19
, 688, (float *)stackdata
); /* dx31 */
6712 vec_st(v22
, 736, (float *)stackdata
); /* dx32 */
6713 vec_st(v25
, 784, (float *)stackdata
); /* dx33 */
6714 v30
= vec_ld(96, (float *) stackdata
); /* load i Oy */
6715 v21
= vec_sub(v31
,v3
); /* iH2z - jOz */
6716 v24
= vec_sub(v31
,v6
); /* iH2z - jH1z */
6717 v27
= vec_sub(v31
,v9
); /* iH2z - jH2z */
6718 v31
= vec_ld(112, (float *) stackdata
); /* load i Oz */
6719 vec_st(v20
, 704, (float *)stackdata
); /* dy31 */
6720 vec_st(v23
, 752, (float *)stackdata
); /* dy32 */
6721 vec_st(v26
, 800, (float *)stackdata
); /* dy33 */
6723 v1
= vec_sub(v29
,v1
); /* iOx - jOx */
6724 v4
= vec_sub(v29
,v4
); /* iOx - jH1x */
6725 v7
= vec_sub(v29
,v7
); /* iOx - jH2x */
6726 vec_st(v21
, 720, (float *)stackdata
); /* dz31 */
6727 vec_st(v24
, 768, (float *)stackdata
); /* dz32 */
6728 vec_st(v27
, 816, (float *)stackdata
); /* dz33 */
6729 v2
= vec_sub(v30
,v2
); /* iOy - jOy */
6730 v5
= vec_sub(v30
,v5
); /* iOy - jH1y */
6731 v8
= vec_sub(v30
,v8
); /* iOy - jH2y */
6732 vec_st(v1
, 400, (float *)stackdata
); /* dx11 */
6733 vec_st(v4
, 448, (float *)stackdata
); /* dx12 */
6734 vec_st(v7
, 496, (float *)stackdata
); /* dx13 */
6735 v3
= vec_sub(v31
,v3
); /* iOz - jOz */
6736 v6
= vec_sub(v31
,v6
); /* iOz - jH1z */
6737 v9
= vec_sub(v31
,v9
); /* iOz - jH2z */
6738 vec_st(v2
, 416, (float *)stackdata
); /* dy11 */
6739 vec_st(v5
, 464, (float *)stackdata
); /* dy12 */
6740 vec_st(v8
, 512, (float *)stackdata
); /* dy13 */
6742 v1
= vec_madd(v1
,v1
,v0
);
6743 v4
= vec_madd(v4
,v4
,v0
);
6744 v7
= vec_madd(v7
,v7
,v0
);
6745 vec_st(v3
, 432, (float *)stackdata
); /* dz11 */
6746 vec_st(v6
, 480, (float *)stackdata
); /* dz12 */
6747 vec_st(v9
, 528, (float *)stackdata
); /* dz13 */
6748 v10
= vec_madd(v10
,v10
,v0
);
6749 v13
= vec_madd(v13
,v13
,v0
);
6750 v16
= vec_madd(v16
,v16
,v0
);
6751 v19
= vec_madd(v19
,v19
,v0
);
6752 v22
= vec_madd(v22
,v22
,v0
);
6753 v25
= vec_madd(v25
,v25
,v0
);
6754 v1
= vec_madd(v2
,v2
,v1
);
6755 v4
= vec_madd(v5
,v5
,v4
);
6756 v7
= vec_madd(v8
,v8
,v7
);
6757 v10
= vec_madd(v11
,v11
,v10
);
6758 v13
= vec_madd(v14
,v14
,v13
);
6759 v16
= vec_madd(v17
,v17
,v16
);
6760 v19
= vec_madd(v20
,v20
,v19
);
6761 v22
= vec_madd(v23
,v23
,v22
);
6762 v25
= vec_madd(v26
,v26
,v25
);
6763 v1
= vec_madd(v3
,v3
,v1
);
6764 v2
= vec_madd(v6
,v6
,v4
);
6765 v3
= vec_madd(v9
,v9
,v7
);
6766 v4
= vec_madd(v12
,v12
,v10
);
6767 v5
= vec_madd(v15
,v15
,v13
);
6768 v6
= vec_madd(v18
,v18
,v16
);
6769 v7
= vec_madd(v21
,v21
,v19
);
6770 v8
= vec_madd(v24
,v24
,v22
);
6771 v9
= vec_madd(v27
,v27
,v25
);
6784 v10
= vec_rsqrte(v1
);
6785 v11
= vec_rsqrte(v2
);
6786 v12
= vec_rsqrte(v3
);
6787 v13
= vec_rsqrte(v4
);
6788 v14
= vec_rsqrte(v5
);
6789 v15
= vec_rsqrte(v6
);
6790 v16
= vec_rsqrte(v7
);
6791 v17
= vec_rsqrte(v8
);
6792 v18
= vec_rsqrte(v9
);
6793 /* create constant 0.5 */
6794 v30
= (vector
float) vec_splat_u32(1);
6795 v31
= vec_ctf((vector
unsigned int)v30
,1); /* 0.5 */
6796 v30
= vec_ctf((vector
unsigned int)v30
,0); /* 1.0 */
6798 v19
= vec_madd(v10
,v10
,v0
); /* lu*lu */
6799 v20
= vec_madd(v11
,v11
,v0
);
6800 v21
= vec_madd(v12
,v12
,v0
);
6801 v22
= vec_madd(v13
,v13
,v0
);
6802 v23
= vec_madd(v14
,v14
,v0
);
6803 v24
= vec_madd(v15
,v15
,v0
);
6804 v25
= vec_madd(v16
,v16
,v0
);
6805 v26
= vec_madd(v17
,v17
,v0
);
6806 v27
= vec_madd(v18
,v18
,v0
);
6808 v19
= vec_nmsub(v1
,v19
,v30
); /* 1.0 - rsq*lu*lu */
6809 v20
= vec_nmsub(v2
,v20
,v30
);
6810 v21
= vec_nmsub(v3
,v21
,v30
);
6811 v22
= vec_nmsub(v4
,v22
,v30
);
6812 v23
= vec_nmsub(v5
,v23
,v30
);
6813 v24
= vec_nmsub(v6
,v24
,v30
);
6814 v25
= vec_nmsub(v7
,v25
,v30
);
6815 v26
= vec_nmsub(v8
,v26
,v30
);
6816 v27
= vec_nmsub(v9
,v27
,v30
);
6818 v1
= vec_madd(v10
,v31
,v0
);/* lu*0.5*/
6819 v2
= vec_madd(v11
,v31
,v0
);
6820 v3
= vec_madd(v12
,v31
,v0
);
6821 v4
= vec_madd(v13
,v31
,v0
);
6822 v5
= vec_madd(v14
,v31
,v0
);
6823 v6
= vec_madd(v15
,v31
,v0
);
6824 v7
= vec_madd(v16
,v31
,v0
);
6825 v8
= vec_madd(v17
,v31
,v0
);
6826 v9
= vec_madd(v18
,v31
,v0
);
6828 /* The rinv values */
6829 v1
= vec_madd(v1
,v19
,v10
);
6830 v2
= vec_madd(v2
,v20
,v11
);
6831 v3
= vec_madd(v3
,v21
,v12
);
6832 v4
= vec_madd(v4
,v22
,v13
);
6833 v5
= vec_madd(v5
,v23
,v14
);
6834 v6
= vec_madd(v6
,v24
,v15
);
6835 v7
= vec_madd(v7
,v25
,v16
);
6836 v8
= vec_madd(v8
,v26
,v17
);
6837 v9
= vec_madd(v9
,v27
,v18
);
6839 v10
= (vector
float)vec_splat_s32(-1);
6840 v10
= vec_sld(v0
,v10
,8);
6842 v1
= (vector
float)vec_sel((vector
unsigned int)v1
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6843 v2
= (vector
float)vec_sel((vector
unsigned int)v2
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6844 v3
= (vector
float)vec_sel((vector
unsigned int)v3
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6845 v4
= (vector
float)vec_sel((vector
unsigned int)v4
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6846 v5
= (vector
float)vec_sel((vector
unsigned int)v5
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6847 v6
= (vector
float)vec_sel((vector
unsigned int)v6
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6848 v7
= (vector
float)vec_sel((vector
unsigned int)v7
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6849 v8
= (vector
float)vec_sel((vector
unsigned int)v8
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6850 v9
= (vector
float)vec_sel((vector
unsigned int)v9
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
6852 /* load qqOO, qqOH and qqHH to v27,v28,v29 */
6853 v27
= vec_ld(0, (float *) stackdata
);
6854 v28
= vec_ld(16, (float *) stackdata
);
6855 v29
= vec_ld(32, (float *) stackdata
);
6857 vec_dstst( faction
+j3a
, 0x10010100, 2 );
6859 /* put rinvsq in v10-v18, rinv6_OO in v30 and rinv12_OO in v31 */
6860 /* load c6 to v25 and c12 to v26 */
6861 v25
= vec_ld(48, (float *) stackdata
);
6862 v26
= vec_ld(64, (float *) stackdata
);
6864 v10
= vec_madd(v1
,v1
,v0
);
6865 v1
= vec_madd(v1
,v27
,v0
); /* rinv11*qqOO */
6866 v11
= vec_madd(v2
,v2
,v0
);
6867 /* load vctot to v23 and vnbtot to v24 */
6868 v23
= vec_ld(224,(float *) stackdata
);
6869 v24
= vec_ld(240,(float *) stackdata
);
6871 v2
= vec_madd(v2
,v28
,v0
); /* rinv12*qqOH */
6872 v12
= vec_madd(v3
,v3
,v0
);
6873 v30
= vec_madd(v10
,v10
,v0
); /* rinv4 */
6874 v3
= vec_madd(v3
,v28
,v0
); /* rinv13*qqOH */
6875 v13
= vec_madd(v4
,v4
,v0
);
6876 v4
= vec_madd(v4
,v28
,v0
); /* rinv21*qqOH */
6877 v14
= vec_madd(v5
,v5
,v0
);
6879 v23
= vec_add(v23
,v1
);
6881 v30
= vec_madd(v30
,v10
,v0
); /* rinv6 */
6882 v5
= vec_madd(v5
,v29
,v0
); /* rinv22*qqHH */
6883 v15
= vec_madd(v6
,v6
,v0
);
6884 v6
= vec_madd(v6
,v29
,v0
); /* rinv23*qqHH */
6885 v23
= vec_add(v23
,v2
);
6886 v16
= vec_madd(v7
,v7
,v0
);
6887 v31
= vec_madd(v30
,v30
,v0
); /* rinv12 */
6888 v25
= vec_madd(v25
,v30
,v0
); /* c6*rinv6 */
6889 /* load 6.0 to v30 */
6890 v30
= (vector
float)vec_splat_u32(6);
6891 v30
= vec_ctf((vector
unsigned int)v30
,0);
6892 v23
= vec_add(v23
,v3
);
6894 v7
= vec_madd(v7
,v28
,v0
); /* rinv31*qqOH */
6895 v17
= vec_madd(v8
,v8
,v0
);
6896 v8
= vec_madd(v8
,v29
,v0
); /* rinv32*qqHH */
6897 v26
= vec_madd(v26
,v31
,v0
); /* c12*rinv12 */
6898 v23
= vec_add(v23
,v4
);
6899 /* load 12.0 to v31 */
6900 v31
= (vector
float)vec_splat_u32(12);
6901 v31
= vec_ctf((vector
unsigned int)v31
,0);
6903 v24
= vec_sub(v24
,v25
); /* add vnb6 to vnbtot */
6904 v18
= vec_madd(v9
,v9
,v0
);
6905 v23
= vec_add(v23
,v5
);
6906 v9
= vec_madd(v9
,v29
,v0
); /* rinv33*qqHH */
6908 v24
= vec_add(v24
,v26
);/* add vnb12 to vnbtot */
6910 v31
= vec_madd(v31
,v26
,v0
);
6911 v11
= vec_madd(v11
,v2
,v0
); /* fs12 */
6912 v23
= vec_add(v23
,v6
);
6913 v12
= vec_madd(v12
,v3
,v0
); /* fs13 */
6914 v13
= vec_madd(v13
,v4
,v0
); /* fs21 */
6915 v31
= vec_nmsub(v30
,v25
,v31
);
6917 v14
= vec_madd(v14
,v5
,v0
); /* fs22 */
6918 v23
= vec_add(v23
,v7
);
6919 v15
= vec_madd(v15
,v6
,v0
); /* fs23 */
6920 v16
= vec_madd(v16
,v7
,v0
); /* fs31 */
6921 v1
= vec_add(v31
,v1
);
6922 v17
= vec_madd(v17
,v8
,v0
); /* fs32 */
6923 v23
= vec_add(v23
,v8
);
6924 v18
= vec_madd(v18
,v9
,v0
); /* fs33 */
6925 v10
= vec_madd(v10
,v1
,v0
);
6927 vec_st(v24
,240,(float *)stackdata
); /* store vnbtot */
6928 /* calculate vectorial forces and accumulate fj. v10-v18 has fs11-fs33 now. */
6929 /* First load iO-* dx,dy,dz vectors to v1-v9 */
6930 /* and load iO forces to v28,v29,v30 */
6931 /* use v19-v27 to accumulate j water forces */
6932 v28
= vec_ld(256, (float *) stackdata
);
6933 v29
= vec_ld(272, (float *) stackdata
);
6934 v30
= vec_ld(288, (float *) stackdata
);
6936 v1
= vec_ld(400, (float *) stackdata
);
6937 v2
= vec_ld(416, (float *) stackdata
);
6938 v23
= vec_add(v23
,v9
); /* incr. vctot */
6939 v3
= vec_ld(432, (float *) stackdata
);
6940 v4
= vec_ld(448, (float *) stackdata
);
6941 v5
= vec_ld(464, (float *) stackdata
);
6942 v6
= vec_ld(480, (float *) stackdata
);
6943 vec_st(v23
,224,(float *)stackdata
); /* store vctot back to stack */
6944 v7
= vec_ld(496, (float *) stackdata
);
6945 v8
= vec_ld(512, (float *) stackdata
);
6946 v9
= vec_ld(528, (float *) stackdata
);
6948 v28
= vec_madd(v10
,v1
,v28
);
6949 v19
= vec_nmsub(v10
,v1
,v0
);
6950 v29
= vec_madd(v10
,v2
,v29
);
6951 v20
= vec_nmsub(v10
,v2
,v0
);
6952 v30
= vec_madd(v10
,v3
,v30
);
6953 v21
= vec_nmsub(v10
,v3
,v0
);
6955 v28
= vec_madd(v11
,v4
,v28
);
6956 v22
= vec_nmsub(v11
,v4
,v0
);
6957 v29
= vec_madd(v11
,v5
,v29
);
6958 v23
= vec_nmsub(v11
,v5
,v0
);
6959 v30
= vec_madd(v11
,v6
,v30
);
6960 v24
= vec_nmsub(v11
,v6
,v0
);
6962 v28
= vec_madd(v12
,v7
,v28
);
6963 v25
= vec_nmsub(v12
,v7
,v0
);
6964 v29
= vec_madd(v12
,v8
,v29
);
6965 v26
= vec_nmsub(v12
,v8
,v0
);
6966 v30
= vec_madd(v12
,v9
,v30
);
6967 v27
= vec_nmsub(v12
,v9
,v0
);
6969 /* store these i forces, and repeat the procedue for the iH1-* force */
6970 vec_st(v28
,256,(float *)stackdata
);
6971 vec_st(v29
,272,(float *)stackdata
);
6972 vec_st(v30
,288,(float *)stackdata
);
6974 v28
= vec_ld(304,(float *) stackdata
);
6975 v29
= vec_ld(320,(float *) stackdata
);
6976 v30
= vec_ld(336,(float *) stackdata
);
6977 /* load new vectorial distances */
6978 v1
= vec_ld(544, (float *) stackdata
);
6979 v2
= vec_ld(560, (float *) stackdata
);
6980 v3
= vec_ld(576, (float *) stackdata
);
6981 v4
= vec_ld(592, (float *) stackdata
);
6982 v5
= vec_ld(608, (float *) stackdata
);
6983 v6
= vec_ld(624, (float *) stackdata
);
6984 v7
= vec_ld(640, (float *) stackdata
);
6985 v8
= vec_ld(656, (float *) stackdata
);
6986 v9
= vec_ld(672, (float *) stackdata
);
6988 v28
= vec_madd(v13
,v1
,v28
);
6989 v19
= vec_nmsub(v13
,v1
,v19
);
6990 v29
= vec_madd(v13
,v2
,v29
);
6991 v20
= vec_nmsub(v13
,v2
,v20
);
6992 v30
= vec_madd(v13
,v3
,v30
);
6993 v21
= vec_nmsub(v13
,v3
,v21
);
6995 v28
= vec_madd(v14
,v4
,v28
);
6996 v22
= vec_nmsub(v14
,v4
,v22
);
6997 v29
= vec_madd(v14
,v5
,v29
);
6998 v23
= vec_nmsub(v14
,v5
,v23
);
6999 v30
= vec_madd(v14
,v6
,v30
);
7000 v24
= vec_nmsub(v14
,v6
,v24
);
7002 v28
= vec_madd(v15
,v7
,v28
);
7003 v25
= vec_nmsub(v15
,v7
,v25
);
7004 v29
= vec_madd(v15
,v8
,v29
);
7005 v26
= vec_nmsub(v15
,v8
,v26
);
7006 v30
= vec_madd(v15
,v9
,v30
);
7007 v27
= vec_nmsub(v15
,v9
,v27
);
7009 /* store these i forces, and repeat the procedue for the iH2-* force */
7010 vec_st(v28
,304,(float *)stackdata
);
7011 vec_st(v29
,320,(float *)stackdata
);
7012 vec_st(v30
,336,(float *)stackdata
);
7013 v28
= vec_ld(352,(float *) stackdata
);
7014 v29
= vec_ld(368,(float *) stackdata
);
7015 v30
= vec_ld(384,(float *) stackdata
);
7016 /* load new vectorial distances */
7017 v1
= vec_ld(688, (float *) stackdata
);
7018 v2
= vec_ld(704, (float *) stackdata
);
7019 v3
= vec_ld(720, (float *) stackdata
);
7020 v4
= vec_ld(736, (float *) stackdata
);
7021 v5
= vec_ld(752, (float *) stackdata
);
7022 v6
= vec_ld(768, (float *) stackdata
);
7023 v7
= vec_ld(784, (float *) stackdata
);
7024 v8
= vec_ld(800, (float *) stackdata
);
7025 v9
= vec_ld(816, (float *) stackdata
);
7027 v28
= vec_madd(v16
,v1
,v28
);
7028 v19
= vec_nmsub(v16
,v1
,v19
);
7029 v29
= vec_madd(v16
,v2
,v29
);
7030 v20
= vec_nmsub(v16
,v2
,v20
);
7031 v30
= vec_madd(v16
,v3
,v30
);
7032 v21
= vec_nmsub(v16
,v3
,v21
);
7034 v28
= vec_madd(v17
,v4
,v28
);
7035 v22
= vec_nmsub(v17
,v4
,v22
);
7036 v29
= vec_madd(v17
,v5
,v29
);
7037 v23
= vec_nmsub(v17
,v5
,v23
);
7038 v30
= vec_madd(v17
,v6
,v30
);
7039 v24
= vec_nmsub(v17
,v6
,v24
);
7041 v28
= vec_madd(v18
,v7
,v28
);
7042 v25
= vec_nmsub(v18
,v7
,v25
);
7043 v29
= vec_madd(v18
,v8
,v29
);
7044 v26
= vec_nmsub(v18
,v8
,v26
);
7045 v30
= vec_madd(v18
,v9
,v30
);
7046 v27
= vec_nmsub(v18
,v9
,v27
);
7048 /* store these i forces */
7049 vec_st(v28
,352,(float *)stackdata
);
7050 vec_st(v29
,368,(float *)stackdata
);
7051 vec_st(v30
,384,(float *)stackdata
);
7053 /* j forces present in v19-v27 */
7055 v1
= vec_mergeh(v19
,v21
); /* Oxa Oza Oxb Ozb */
7056 v21
= vec_mergeh(v20
,v22
); /* Oya H1xa Oyb H1xb */
7057 v22
= vec_mergeh(v23
,v25
); /* H1ya H2xa H1yb H2xb */
7058 v25
= vec_mergeh(v24
,v26
); /* H1za H2ya H1zb H2yb */
7060 v26
= vec_mergeh(v27
,v0
); /* H2za 0 H2zb 0 */
7062 v2
= vec_mergeh(v1
,v21
); /* Oxa Oya Oza H1xa */
7063 v21
= vec_mergel(v1
,v21
); /* Oxb Oyb Ozb H1xb */
7064 v20
= vec_mergeh(v22
,v25
); /* H1ya H1za H2xa H2ya */
7065 v22
= vec_mergel(v22
,v25
); /* H1yb H1zb H2xb H2yb */
7066 v24
= vec_mergeh(v26
,v0
); /* H2za 0 0 0 */
7067 v26
= vec_mergel(v26
,v0
); /* H2zb 0 0 0 */
7069 v29
= (vector
float)vec_splat_s32(-1);
7070 /* move into position, load and add */
7071 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3a
);
7072 v4
= vec_ld( 0, faction
+j3a
);
7074 v6
= vec_ld( 16, faction
+j3a
);
7075 v8
= vec_ld( 32, faction
+j3a
);
7076 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
7078 v12
= vec_perm(v0
,v2
,(vector
unsigned char)v30
);
7079 v4
= vec_add(v12
,v4
);
7081 v14
= vec_perm(v2
,v20
,(vector
unsigned char)v30
);
7082 v2
= vec_add(v14
,v6
);
7084 v16
= vec_perm(v20
,v24
,(vector
unsigned char)v30
);
7085 v20
= vec_add(v16
,v8
);
7087 v12
= vec_sel(v4
,v4
,(vector
unsigned int)v10
);
7088 vec_st(v12
, 0, faction
+j3a
);
7090 v10
= vec_sld(v0
,v10
,12);
7092 vec_st(v2
, 16, faction
+j3a
);
7094 v12
= vec_sel(v20
,v8
,(vector
unsigned int)v10
);
7096 vec_st(v12
, 32, faction
+j3a
);
7098 /* Finished 1 - now do 2 */
7100 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3b
);
7101 v4
= vec_ld( 0, faction
+j3b
);
7102 v6
= vec_ld( 16, faction
+j3b
);
7103 v8
= vec_ld( 32, faction
+j3b
);
7104 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
7106 v12
= vec_perm(v0
,v21
,(vector
unsigned char)v30
);
7107 v24
= vec_add(v12
,v4
);
7109 v12
= vec_perm(v21
,v22
,(vector
unsigned char)v30
);
7110 v21
= vec_add(v12
,v6
);
7112 v12
= vec_perm(v22
,v26
,(vector
unsigned char)v30
);
7113 v22
= vec_add(v12
,v8
);
7115 v12
= vec_sel(v4
,v24
,(vector
unsigned int)v10
);
7116 vec_st(v12
, 0, faction
+j3b
);
7117 v10
= vec_sld(v0
,v10
,12);
7119 vec_st(v21
, 16, faction
+j3b
);
7121 v12
= vec_sel(v22
,v8
,(vector
unsigned int)v10
);
7122 vec_st(v12
, 32, faction
+j3b
);
7128 v10
= (vector
float)vec_lvsl(0, pos
+j3a
);
7130 v2
= vec_ld(0, pos
+j3a
);
7131 v3
= vec_ld(16, pos
+j3a
);
7132 v4
= vec_ld(32, pos
+j3a
);
7133 v1
= vec_perm(v2
,v3
,(vector
unsigned char)v10
); /* Oxa Oya Oza H1xa */
7134 v5
= vec_perm(v3
,v4
,(vector
unsigned char)v10
); /* H1ya H1za H2xa H2ya */
7135 v9
= vec_perm(v4
,v4
,(vector
unsigned char)v10
); /* H2za - - - */
7137 /* permute water coordinates */
7138 /* just splat things... never mind that we fill all cells :-) */
7139 v29
= vec_ld(128, (float *) stackdata
); /* load i H1x */
7140 v2
= vec_splat(v1
,1);
7141 v30
= vec_ld(144, (float *) stackdata
); /* load i H1y */
7142 v3
= vec_splat(v1
,2);
7143 v31
= vec_ld(160, (float *) stackdata
); /* load i H1z */
7144 v4
= vec_splat(v1
,3);
7145 v6
= vec_splat(v5
,1);
7146 v7
= vec_splat(v5
,2);
7147 v8
= vec_splat(v5
,3);
7149 v10
= vec_sub(v29
,v1
); /* iH1x - jOx */
7150 v13
= vec_sub(v29
,v4
); /* iH1x - jH1x */
7151 v16
= vec_sub(v29
,v7
); /* iH1x - jH2x */
7152 v29
= vec_ld(176, (float *) stackdata
); /* load i H2x */
7153 v11
= vec_sub(v30
,v2
); /* iH1y - jOy */
7154 v14
= vec_sub(v30
,v5
); /* iH1y - jH1y */
7155 v17
= vec_sub(v30
,v8
); /* iH1y - jH2y */
7156 v30
= vec_ld(192, (float *) stackdata
); /* load i H2y */
7157 vec_st(v10
, 544, (float *)stackdata
); /* dx21 */
7158 vec_st(v13
, 592, (float *)stackdata
); /* dx22 */
7159 vec_st(v16
, 640, (float *)stackdata
); /* dx23 */
7160 v12
= vec_sub(v31
,v3
); /* iH1z - jOz */
7161 v15
= vec_sub(v31
,v6
); /* iH1z - jH1z */
7162 v18
= vec_sub(v31
,v9
); /* iH1z - jH2z */
7163 v31
= vec_ld(208, (float *) stackdata
); /* load i H2z */
7164 /* v10-v18 now contains iH1-jO, iH1-jH1 and iJ1-jH2 distances */
7165 vec_st(v11
, 560, (float *)stackdata
); /* dy21 */
7166 vec_st(v14
, 608, (float *)stackdata
); /* dy22 */
7167 vec_st(v17
, 656, (float *)stackdata
); /* dy23 */
7168 v19
= vec_sub(v29
,v1
); /* iH2x - jOx */
7169 v22
= vec_sub(v29
,v4
); /* iH2x - jH1x */
7170 v25
= vec_sub(v29
,v7
); /* iH2x - jH2x */
7171 vec_st(v12
, 576, (float *)stackdata
); /* dz21 */
7172 vec_st(v15
, 624, (float *)stackdata
); /* dz22 */
7173 vec_st(v18
, 672, (float *)stackdata
); /* dz23 */
7174 v29
= vec_ld(80, (float *) stackdata
); /* load i Ox */
7175 v20
= vec_sub(v30
,v2
); /* iH2y - jOy */
7176 v23
= vec_sub(v30
,v5
); /* iH2y - jH1y */
7177 v26
= vec_sub(v30
,v8
); /* iH2y - jH2y */
7178 vec_st(v19
, 688, (float *)stackdata
); /* dx31 */
7179 vec_st(v22
, 736, (float *)stackdata
); /* dx32 */
7180 vec_st(v25
, 784, (float *)stackdata
); /* dx33 */
7181 v30
= vec_ld(96, (float *) stackdata
); /* load i Oy */
7182 v21
= vec_sub(v31
,v3
); /* iH2z - jOz */
7183 v24
= vec_sub(v31
,v6
); /* iH2z - jH1z */
7184 v27
= vec_sub(v31
,v9
); /* iH2z - jH2z */
7185 v31
= vec_ld(112, (float *) stackdata
); /* load i Oz */
7186 vec_st(v20
, 704, (float *)stackdata
); /* dy31 */
7187 vec_st(v23
, 752, (float *)stackdata
); /* dy32 */
7188 vec_st(v26
, 800, (float *)stackdata
); /* dy33 */
7190 v1
= vec_sub(v29
,v1
); /* iOx - jOx */
7191 v4
= vec_sub(v29
,v4
); /* iOx - jH1x */
7192 v7
= vec_sub(v29
,v7
); /* iOx - jH2x */
7193 vec_st(v21
, 720, (float *)stackdata
); /* dz31 */
7194 vec_st(v24
, 768, (float *)stackdata
); /* dz32 */
7195 vec_st(v27
, 816, (float *)stackdata
); /* dz33 */
7196 v2
= vec_sub(v30
,v2
); /* iOy - jOy */
7197 v5
= vec_sub(v30
,v5
); /* iOy - jH1y */
7198 v8
= vec_sub(v30
,v8
); /* iOy - jH2y */
7199 vec_st(v1
, 400, (float *)stackdata
); /* dx11 */
7200 vec_st(v4
, 448, (float *)stackdata
); /* dx12 */
7201 vec_st(v7
, 496, (float *)stackdata
); /* dx13 */
7202 v3
= vec_sub(v31
,v3
); /* iOz - jOz */
7203 v6
= vec_sub(v31
,v6
); /* iOz - jH1z */
7204 v9
= vec_sub(v31
,v9
); /* iOz - jH2z */
7205 vec_st(v2
, 416, (float *)stackdata
); /* dy11 */
7206 vec_st(v5
, 464, (float *)stackdata
); /* dy12 */
7207 vec_st(v8
, 512, (float *)stackdata
); /* dy13 */
7209 v1
= vec_madd(v1
,v1
,v0
);
7210 v4
= vec_madd(v4
,v4
,v0
);
7211 v7
= vec_madd(v7
,v7
,v0
);
7212 vec_st(v3
, 432, (float *)stackdata
); /* dz11 */
7213 vec_st(v6
, 480, (float *)stackdata
); /* dz12 */
7214 vec_st(v9
, 528, (float *)stackdata
); /* dz13 */
7215 v10
= vec_madd(v10
,v10
,v0
);
7216 v13
= vec_madd(v13
,v13
,v0
);
7217 v16
= vec_madd(v16
,v16
,v0
);
7218 v19
= vec_madd(v19
,v19
,v0
);
7219 v22
= vec_madd(v22
,v22
,v0
);
7220 v25
= vec_madd(v25
,v25
,v0
);
7221 v1
= vec_madd(v2
,v2
,v1
);
7222 v4
= vec_madd(v5
,v5
,v4
);
7223 v7
= vec_madd(v8
,v8
,v7
);
7224 v10
= vec_madd(v11
,v11
,v10
);
7225 v13
= vec_madd(v14
,v14
,v13
);
7226 v16
= vec_madd(v17
,v17
,v16
);
7227 v19
= vec_madd(v20
,v20
,v19
);
7228 v22
= vec_madd(v23
,v23
,v22
);
7229 v25
= vec_madd(v26
,v26
,v25
);
7230 v1
= vec_madd(v3
,v3
,v1
);
7231 v2
= vec_madd(v6
,v6
,v4
);
7232 v3
= vec_madd(v9
,v9
,v7
);
7233 v4
= vec_madd(v12
,v12
,v10
);
7234 v5
= vec_madd(v15
,v15
,v13
);
7235 v6
= vec_madd(v18
,v18
,v16
);
7236 v7
= vec_madd(v21
,v21
,v19
);
7237 v8
= vec_madd(v24
,v24
,v22
);
7238 v9
= vec_madd(v27
,v27
,v25
);
7251 v10
= vec_rsqrte(v1
);
7252 v11
= vec_rsqrte(v2
);
7253 v12
= vec_rsqrte(v3
);
7254 v13
= vec_rsqrte(v4
);
7255 v14
= vec_rsqrte(v5
);
7256 v15
= vec_rsqrte(v6
);
7257 v16
= vec_rsqrte(v7
);
7258 v17
= vec_rsqrte(v8
);
7259 v18
= vec_rsqrte(v9
);
7260 /* create constant 0.5 */
7261 v30
= (vector
float) vec_splat_u32(1);
7262 v31
= vec_ctf((vector
unsigned int)v30
,1); /* 0.5 */
7263 v30
= vec_ctf((vector
unsigned int)v30
,0); /* 1.0 */
7265 v19
= vec_madd(v10
,v10
,v0
); /* lu*lu */
7266 v20
= vec_madd(v11
,v11
,v0
);
7267 v21
= vec_madd(v12
,v12
,v0
);
7268 v22
= vec_madd(v13
,v13
,v0
);
7269 v23
= vec_madd(v14
,v14
,v0
);
7270 v24
= vec_madd(v15
,v15
,v0
);
7271 v25
= vec_madd(v16
,v16
,v0
);
7272 v26
= vec_madd(v17
,v17
,v0
);
7273 v27
= vec_madd(v18
,v18
,v0
);
7275 v19
= vec_nmsub(v1
,v19
,v30
); /* 1.0 - rsq*lu*lu */
7276 v20
= vec_nmsub(v2
,v20
,v30
);
7277 v21
= vec_nmsub(v3
,v21
,v30
);
7278 v22
= vec_nmsub(v4
,v22
,v30
);
7279 v23
= vec_nmsub(v5
,v23
,v30
);
7280 v24
= vec_nmsub(v6
,v24
,v30
);
7281 v25
= vec_nmsub(v7
,v25
,v30
);
7282 v26
= vec_nmsub(v8
,v26
,v30
);
7283 v27
= vec_nmsub(v9
,v27
,v30
);
7285 v1
= vec_madd(v10
,v31
,v0
);/* lu*0.5*/
7286 v2
= vec_madd(v11
,v31
,v0
);
7287 v3
= vec_madd(v12
,v31
,v0
);
7288 v4
= vec_madd(v13
,v31
,v0
);
7289 v5
= vec_madd(v14
,v31
,v0
);
7290 v6
= vec_madd(v15
,v31
,v0
);
7291 v7
= vec_madd(v16
,v31
,v0
);
7292 v8
= vec_madd(v17
,v31
,v0
);
7293 v9
= vec_madd(v18
,v31
,v0
);
7295 /* The rinv values */
7296 v1
= vec_madd(v1
,v19
,v10
);
7297 v2
= vec_madd(v2
,v20
,v11
);
7298 v3
= vec_madd(v3
,v21
,v12
);
7299 v4
= vec_madd(v4
,v22
,v13
);
7300 v5
= vec_madd(v5
,v23
,v14
);
7301 v6
= vec_madd(v6
,v24
,v15
);
7302 v7
= vec_madd(v7
,v25
,v16
);
7303 v8
= vec_madd(v8
,v26
,v17
);
7304 v9
= vec_madd(v9
,v27
,v18
);
7306 v10
= (vector
float)vec_splat_s32(-1);
7307 v10
= vec_sld(v0
,v10
,12);
7309 v1
= (vector
float)vec_sel((vector
unsigned int)v1
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7310 v2
= (vector
float)vec_sel((vector
unsigned int)v2
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7311 v3
= (vector
float)vec_sel((vector
unsigned int)v3
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7312 v4
= (vector
float)vec_sel((vector
unsigned int)v4
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7313 v5
= (vector
float)vec_sel((vector
unsigned int)v5
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7314 v6
= (vector
float)vec_sel((vector
unsigned int)v6
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7315 v7
= (vector
float)vec_sel((vector
unsigned int)v7
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7316 v8
= (vector
float)vec_sel((vector
unsigned int)v8
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7317 v9
= (vector
float)vec_sel((vector
unsigned int)v9
,(vector
unsigned int)v0
,(vector
unsigned int)v10
);
7319 /* load qqOO, qqOH and qqHH to v27,v28,v29 */
7320 v27
= vec_ld(0, (float *) stackdata
);
7321 v28
= vec_ld(16, (float *) stackdata
);
7322 v29
= vec_ld(32, (float *) stackdata
);
7323 vec_dstst( faction
+j3a
, 0x10010100, 2 );
7325 /* put rinvsq in v10-v18, rinv6_OO in v30 and rinv12_OO in v31 */
7326 /* load c6 to v25 and c12 to v26 */
7327 v25
= vec_ld(48, (float *) stackdata
);
7328 v26
= vec_ld(64, (float *) stackdata
);
7330 v10
= vec_madd(v1
,v1
,v0
);
7331 v1
= vec_madd(v1
,v27
,v0
); /* rinv11*qqOO */
7332 v11
= vec_madd(v2
,v2
,v0
);
7333 /* load vctot to v23 and vnbtot to v24 */
7334 v23
= vec_ld(224,(float *) stackdata
);
7335 v24
= vec_ld(240,(float *) stackdata
);
7337 v2
= vec_madd(v2
,v28
,v0
); /* rinv12*qqOH */
7338 v12
= vec_madd(v3
,v3
,v0
);
7339 v30
= vec_madd(v10
,v10
,v0
); /* rinv4 */
7340 v3
= vec_madd(v3
,v28
,v0
); /* rinv13*qqOH */
7341 v13
= vec_madd(v4
,v4
,v0
);
7342 v4
= vec_madd(v4
,v28
,v0
); /* rinv21*qqOH */
7343 v14
= vec_madd(v5
,v5
,v0
);
7345 v23
= vec_add(v23
,v1
);
7347 v30
= vec_madd(v30
,v10
,v0
); /* rinv6 */
7348 v5
= vec_madd(v5
,v29
,v0
); /* rinv22*qqHH */
7349 v15
= vec_madd(v6
,v6
,v0
);
7350 v6
= vec_madd(v6
,v29
,v0
); /* rinv23*qqHH */
7351 v23
= vec_add(v23
,v2
);
7352 v16
= vec_madd(v7
,v7
,v0
);
7353 v31
= vec_madd(v30
,v30
,v0
); /* rinv12 */
7354 v25
= vec_madd(v25
,v30
,v0
); /* c6*rinv6 */
7355 /* load 6.0 to v30 */
7356 v30
= (vector
float)vec_splat_u32(6);
7357 v30
= vec_ctf((vector
unsigned int)v30
,0);
7358 v23
= vec_add(v23
,v3
);
7360 v7
= vec_madd(v7
,v28
,v0
); /* rinv31*qqOH */
7361 v17
= vec_madd(v8
,v8
,v0
);
7362 v8
= vec_madd(v8
,v29
,v0
); /* rinv32*qqHH */
7363 v26
= vec_madd(v26
,v31
,v0
); /* c12*rinv12 */
7364 v23
= vec_add(v23
,v4
);
7365 /* load 12.0 to v31 */
7366 v31
= (vector
float)vec_splat_u32(12);
7367 v31
= vec_ctf((vector
unsigned int)v31
,0);
7369 v24
= vec_sub(v24
,v25
); /* add vnb6 to vnbtot */
7370 v18
= vec_madd(v9
,v9
,v0
);
7371 v23
= vec_add(v23
,v5
);
7372 v9
= vec_madd(v9
,v29
,v0
); /* rinv33*qqHH */
7374 v24
= vec_add(v24
,v26
);/* add vnb12 to vnbtot */
7376 v31
= vec_madd(v31
,v26
,v0
);
7377 v11
= vec_madd(v11
,v2
,v0
); /* fs12 */
7378 v23
= vec_add(v23
,v6
);
7379 v12
= vec_madd(v12
,v3
,v0
); /* fs13 */
7380 v13
= vec_madd(v13
,v4
,v0
); /* fs21 */
7381 v31
= vec_nmsub(v30
,v25
,v31
);
7383 v14
= vec_madd(v14
,v5
,v0
); /* fs22 */
7384 v23
= vec_add(v23
,v7
);
7385 v15
= vec_madd(v15
,v6
,v0
); /* fs23 */
7386 v16
= vec_madd(v16
,v7
,v0
); /* fs31 */
7387 v1
= vec_add(v31
,v1
);
7388 v17
= vec_madd(v17
,v8
,v0
); /* fs32 */
7389 v23
= vec_add(v23
,v8
);
7390 v18
= vec_madd(v18
,v9
,v0
); /* fs33 */
7391 v10
= vec_madd(v10
,v1
,v0
);
7393 vec_st(v24
,240,(float *)stackdata
); /* store vnbtot */
7394 /* calculate vectorial forces and accumulate fj. v10-v18 has fs11-fs33 now. */
7395 /* First load iO-* dx,dy,dz vectors to v1-v9 */
7396 /* and load iO forces to v28,v29,v30 */
7397 /* use v19-v27 to accumulate j water forces */
7398 v28
= vec_ld(256, (float *) stackdata
);
7399 v29
= vec_ld(272, (float *) stackdata
);
7400 v30
= vec_ld(288, (float *) stackdata
);
7402 v1
= vec_ld(400, (float *) stackdata
);
7403 v2
= vec_ld(416, (float *) stackdata
);
7404 v23
= vec_add(v23
,v9
); /* incr. vctot */
7405 v3
= vec_ld(432, (float *) stackdata
);
7406 v4
= vec_ld(448, (float *) stackdata
);
7407 v5
= vec_ld(464, (float *) stackdata
);
7408 v6
= vec_ld(480, (float *) stackdata
);
7409 vec_st(v23
,224,(float *)stackdata
); /* store vctot back to stack */
7410 v7
= vec_ld(496, (float *) stackdata
);
7411 v8
= vec_ld(512, (float *) stackdata
);
7412 v9
= vec_ld(528, (float *) stackdata
);
7414 v28
= vec_madd(v10
,v1
,v28
);
7415 v19
= vec_nmsub(v10
,v1
,v0
);
7416 v29
= vec_madd(v10
,v2
,v29
);
7417 v20
= vec_nmsub(v10
,v2
,v0
);
7418 v30
= vec_madd(v10
,v3
,v30
);
7419 v21
= vec_nmsub(v10
,v3
,v0
);
7421 v28
= vec_madd(v11
,v4
,v28
);
7422 v22
= vec_nmsub(v11
,v4
,v0
);
7423 v29
= vec_madd(v11
,v5
,v29
);
7424 v23
= vec_nmsub(v11
,v5
,v0
);
7425 v30
= vec_madd(v11
,v6
,v30
);
7426 v24
= vec_nmsub(v11
,v6
,v0
);
7428 v28
= vec_madd(v12
,v7
,v28
);
7429 v25
= vec_nmsub(v12
,v7
,v0
);
7430 v29
= vec_madd(v12
,v8
,v29
);
7431 v26
= vec_nmsub(v12
,v8
,v0
);
7432 v30
= vec_madd(v12
,v9
,v30
);
7433 v27
= vec_nmsub(v12
,v9
,v0
);
7435 /* store these i forces, and repeat the procedue for the iH1-* force */
7436 vec_st(v28
,256,(float *)stackdata
);
7437 vec_st(v29
,272,(float *)stackdata
);
7438 vec_st(v30
,288,(float *)stackdata
);
7440 v28
= vec_ld(304,(float *) stackdata
);
7441 v29
= vec_ld(320,(float *) stackdata
);
7442 v30
= vec_ld(336,(float *) stackdata
);
7443 /* load new vectorial distances */
7444 v1
= vec_ld(544, (float *) stackdata
);
7445 v2
= vec_ld(560, (float *) stackdata
);
7446 v3
= vec_ld(576, (float *) stackdata
);
7447 v4
= vec_ld(592, (float *) stackdata
);
7448 v5
= vec_ld(608, (float *) stackdata
);
7449 v6
= vec_ld(624, (float *) stackdata
);
7450 v7
= vec_ld(640, (float *) stackdata
);
7451 v8
= vec_ld(656, (float *) stackdata
);
7452 v9
= vec_ld(672, (float *) stackdata
);
7454 v28
= vec_madd(v13
,v1
,v28
);
7455 v19
= vec_nmsub(v13
,v1
,v19
);
7456 v29
= vec_madd(v13
,v2
,v29
);
7457 v20
= vec_nmsub(v13
,v2
,v20
);
7458 v30
= vec_madd(v13
,v3
,v30
);
7459 v21
= vec_nmsub(v13
,v3
,v21
);
7461 v28
= vec_madd(v14
,v4
,v28
);
7462 v22
= vec_nmsub(v14
,v4
,v22
);
7463 v29
= vec_madd(v14
,v5
,v29
);
7464 v23
= vec_nmsub(v14
,v5
,v23
);
7465 v30
= vec_madd(v14
,v6
,v30
);
7466 v24
= vec_nmsub(v14
,v6
,v24
);
7468 v28
= vec_madd(v15
,v7
,v28
);
7469 v25
= vec_nmsub(v15
,v7
,v25
);
7470 v29
= vec_madd(v15
,v8
,v29
);
7471 v26
= vec_nmsub(v15
,v8
,v26
);
7472 v30
= vec_madd(v15
,v9
,v30
);
7473 v27
= vec_nmsub(v15
,v9
,v27
);
7475 /* store these i forces, and repeat the procedue for the iH2-* force */
7476 vec_st(v28
,304,(float *)stackdata
);
7477 vec_st(v29
,320,(float *)stackdata
);
7478 vec_st(v30
,336,(float *)stackdata
);
7479 v28
= vec_ld(352,(float *) stackdata
);
7480 v29
= vec_ld(368,(float *) stackdata
);
7481 v30
= vec_ld(384,(float *) stackdata
);
7482 /* load new vectorial distances */
7483 v1
= vec_ld(688, (float *) stackdata
);
7484 v2
= vec_ld(704, (float *) stackdata
);
7485 v3
= vec_ld(720, (float *) stackdata
);
7486 v4
= vec_ld(736, (float *) stackdata
);
7487 v5
= vec_ld(752, (float *) stackdata
);
7488 v6
= vec_ld(768, (float *) stackdata
);
7489 v7
= vec_ld(784, (float *) stackdata
);
7490 v8
= vec_ld(800, (float *) stackdata
);
7491 v9
= vec_ld(816, (float *) stackdata
);
7493 v28
= vec_madd(v16
,v1
,v28
);
7494 v19
= vec_nmsub(v16
,v1
,v19
);
7495 v29
= vec_madd(v16
,v2
,v29
);
7496 v20
= vec_nmsub(v16
,v2
,v20
);
7497 v30
= vec_madd(v16
,v3
,v30
);
7498 v21
= vec_nmsub(v16
,v3
,v21
);
7500 v28
= vec_madd(v17
,v4
,v28
);
7501 v22
= vec_nmsub(v17
,v4
,v22
);
7502 v29
= vec_madd(v17
,v5
,v29
);
7503 v23
= vec_nmsub(v17
,v5
,v23
);
7504 v30
= vec_madd(v17
,v6
,v30
);
7505 v24
= vec_nmsub(v17
,v6
,v24
);
7507 v28
= vec_madd(v18
,v7
,v28
);
7508 v25
= vec_nmsub(v18
,v7
,v25
);
7509 v29
= vec_madd(v18
,v8
,v29
);
7510 v26
= vec_nmsub(v18
,v8
,v26
);
7511 v30
= vec_madd(v18
,v9
,v30
);
7512 v27
= vec_nmsub(v18
,v9
,v27
);
7514 /* store these i forces */
7515 vec_st(v28
,352,(float *)stackdata
);
7516 vec_st(v29
,368,(float *)stackdata
);
7517 vec_st(v30
,384,(float *)stackdata
);
7519 /* j forces present in v19-v27 */
7521 v1
= vec_mergeh(v19
,v21
); /* Oxa Oza - - */
7522 v21
= vec_mergeh(v20
,v22
); /* Oya H1xa - - */
7523 v22
= vec_mergeh(v23
,v25
); /* H1ya H2xa - - */
7524 v25
= vec_mergeh(v24
,v26
); /* H1za H2ya - - */
7526 v26
= vec_mergeh(v27
,v0
); /* H2za 0 - 0 */
7528 v2
= vec_mergeh(v1
,v21
); /* Oxa Oya Oza H1xa */
7529 v20
= vec_mergeh(v22
,v25
); /* H1ya H1za H2xa H2ya */
7530 v24
= vec_mergeh(v26
,v0
); /* H2za 0 0 0 */
7532 v29
= (vector
float)vec_splat_s32(-1);
7534 /* move into position, load and add */
7535 v30
= (vector
float)vec_lvsr( 0, (int *) faction
+j3a
);
7536 v4
= vec_ld( 0, faction
+j3a
);
7538 v6
= vec_ld( 16, faction
+j3a
);
7539 v8
= vec_ld( 32, faction
+j3a
);
7540 v10
= vec_perm(v0
,v29
,(vector
unsigned char)v30
);
7542 v12
= vec_perm(v0
,v2
,(vector
unsigned char)v30
);
7543 v4
= vec_add(v12
,v4
);
7545 v14
= vec_perm(v2
,v20
,(vector
unsigned char)v30
);
7546 v2
= vec_add(v14
,v6
);
7548 v16
= vec_perm(v20
,v24
,(vector
unsigned char)v30
);
7549 v20
= vec_add(v16
,v8
);
7551 v12
= vec_sel(v4
,v4
,(vector
unsigned int)v10
);
7552 vec_st(v12
, 0, faction
+j3a
);
7554 v10
= vec_sld(v0
,v10
,12);
7556 vec_st(v2
, 16, faction
+j3a
);
7558 v12
= vec_sel(v20
,v8
,(vector
unsigned int)v10
);
7560 vec_st(v12
, 32, faction
+j3a
);
7564 v1
= (vector
float)vec_lvsr(0,faction
+ii3
);
7565 v5
= (vector
float)vec_splat_s32(-1);
7566 v2
= vec_ld( 0, faction
+ii3
);
7567 v3
= vec_ld(16, faction
+ii3
);
7568 v4
= vec_ld(32, faction
+ii3
);
7569 v5
= vec_perm(v0
, v5
,(vector
unsigned char)v1
); /* mask */
7570 /* load forces from stack */
7571 v6
= vec_ld(256, (float *) stackdata
); /* Ox */
7572 v7
= vec_ld(272, (float *) stackdata
); /* Oy */
7573 v8
= vec_ld(288, (float *) stackdata
); /* Oz */
7574 v9
= vec_ld(304, (float *) stackdata
); /* H1x */
7575 v10
= vec_ld(320, (float *) stackdata
); /* H1y */
7576 v11
= vec_ld(336, (float *) stackdata
); /* H1z */
7577 v12
= vec_ld(352, (float *) stackdata
); /* H2x */
7578 v13
= vec_ld(368, (float *) stackdata
); /* H2y */
7579 v14
= vec_ld(384, (float *) stackdata
); /* H2z */
7581 /* accumulate the forces */
7582 v15
= vec_sld(v6
,v6
,8);
7583 v16
= vec_sld(v7
,v7
,8);
7584 v17
= vec_sld(v8
,v8
,8);
7585 v18
= vec_sld(v9
,v9
,8);
7586 v19
= vec_sld(v10
,v10
,8);
7587 v20
= vec_sld(v11
,v11
,8);
7588 v21
= vec_sld(v12
,v12
,8);
7589 v22
= vec_sld(v13
,v13
,8);
7590 v23
= vec_sld(v14
,v14
,8);
7592 v6
= vec_add(v6
,v15
); /* Ox Ox' - - */
7593 v7
= vec_add(v7
,v16
); /* Oy Oy' - - */
7594 v8
= vec_add(v8
,v17
); /* Oz Oz' - - */
7595 v9
= vec_add(v9
,v18
); /* H1x H1x' - - */
7596 v10
= vec_add(v10
,v19
); /* H1y H1y' - - */
7597 v11
= vec_add(v11
,v20
); /* H1z H1z' - - */
7598 v12
= vec_add(v12
,v21
); /* H2x H2x' - - */
7599 v13
= vec_add(v13
,v22
); /* H2y H2y' - - */
7600 v14
= vec_add(v14
,v23
); /* H2z H2z' - - */
7602 v6
= vec_mergeh(v6
,v8
); /* Ox Oz Ox' Oz' */
7603 v7
= vec_mergeh(v7
,v9
); /* Oy H1x Oy' H1x' */
7604 v10
= vec_mergeh(v10
,v12
); /* H1y H2x H1y' H2x' */
7605 v11
= vec_mergeh(v11
,v13
); /* H1z H2y H1z' H2y' */
7606 v14
= vec_mergeh(v14
,v0
); /* H2z 0 H2z' 0 */
7608 v15
= vec_sld(v6
,v6
,8);
7609 v16
= vec_sld(v7
,v7
,8);
7610 v17
= vec_sld(v10
,v10
,8);
7611 v18
= vec_sld(v11
,v11
,8);
7612 v19
= vec_sld(v14
,v14
,8);
7614 v6
= vec_add(v6
,v15
); /* Ox Oz - - */
7615 v7
= vec_add(v7
,v16
); /* Oy H1x - - */
7616 v10
= vec_add(v10
,v17
);/* H1y H2x - - */
7617 v11
= vec_add(v11
,v18
);/* H1z H2y - - */
7618 v14
= vec_add(v14
,v19
);/* H2z 0 - 0 */
7620 v6
= vec_mergeh(v6
,v7
); /* Ox Oy Oz H1x */
7621 v10
= vec_mergeh(v10
,v11
); /* H1y H1z H2x H2y */
7622 v14
= vec_mergeh(v14
,v0
); /* H2z 0 0 0 */
7624 v7
= vec_sld(v0
,v6
,12); /* 0 Ox Oy Oz */
7625 v8
= vec_sld(v6
,v10
,8); /* - H1x H1y H1z */
7626 v9
= vec_sld(v10
,v14
,4); /* - H2x H2y H2z */
7628 v12
= vec_perm(v0
,v6
,(vector
unsigned char)v1
); /* The part to add to v2 */
7629 v13
= vec_perm(v6
,v10
,(vector
unsigned char)v1
); /* The part to add to v3 */
7630 v14
= vec_perm(v10
,v14
,(vector
unsigned char)v1
); /* The part to add to v4 */
7632 v12
= vec_add(v2
,v12
);
7633 v13
= vec_add(v3
,v13
);
7634 v14
= vec_add(v4
,v14
);
7636 v12
= vec_sel(v2
,v12
,(vector
unsigned int)v5
);
7637 v5
= vec_sld(v0
,v5
,12);
7638 v14
= vec_sel(v14
,v4
,(vector
unsigned int)v5
);
7641 vec_st(v12
, 0, faction
+ii3
);
7642 vec_st(v13
,16, faction
+ii3
);
7643 vec_st(v14
,32, faction
+ii3
);
7645 /* accumulate for shift */
7646 v7
= vec_add(v7
,v8
);
7647 v7
= vec_add(v7
,v9
);
7648 v7
= vec_sld(v7
,v0
,4); /* x y z 0 */
7650 /* add v7 to the memory location fshift+is3 */
7651 v15
= vec_lde(0, fshift
+is3
);
7652 v16
= vec_lde(4, fshift
+is3
);
7653 v17
= vec_lde(8, fshift
+is3
);
7654 v18
= (vector
float)vec_splat(v7
,0);
7655 v19
= (vector
float)vec_splat(v7
,1);
7656 v20
= (vector
float)vec_splat(v7
,2);
7657 v15
= vec_add(v15
,v18
);
7658 v16
= vec_add(v16
,v19
);
7659 v17
= vec_add(v17
,v20
);
7660 vec_ste(v15
,0,fshift
+is3
);
7661 vec_ste(v16
,4,fshift
+is3
);
7662 vec_ste(v17
,8,fshift
+is3
);
7664 /* update potential energies */
7665 v1
= vec_ld(224,(float *) stackdata
); /* load vctot */
7666 v2
= vec_ld(240,(float *) stackdata
); /* load vnbtot */
7667 v3
= vec_sld(v1
,v1
,8);
7668 v4
= vec_sld(v2
,v2
,8);
7669 v1
= vec_add(v1
,v3
);
7670 v2
= vec_add(v2
,v4
);
7671 v3
= vec_sld(v1
,v1
,4);
7672 v4
= vec_sld(v2
,v2
,4);
7673 v1
= vec_add(v1
,v3
);
7674 v2
= vec_add(v2
,v4
);
7675 /* all 4 positions in v1, v2 contain the sum now */
7676 v3
= vec_lde(0, Vc
+gid
[n
]);
7677 v4
= vec_lde(0, Vnb
+gid
[n
]);
7678 v3
= vec_add(v1
,v3
);
7679 v4
= vec_add(v2
,v4
);
7680 vec_ste(v3
,0,Vc
+gid
[n
]);
7681 vec_ste(v4
,0,Vnb
+gid
[n
]);
7688 void inl2030_altivec(
7705 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
7706 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
7708 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
7709 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
7710 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
7712 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
7713 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
7714 vector
float rinvsq11
,rinvsq12
,rinvsq13
;
7715 vector
float rinvsq21
,rinvsq22
,rinvsq23
;
7716 vector
float rinvsq31
,rinvsq32
,rinvsq33
;
7717 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
;
7719 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
7720 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
;
7721 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
7722 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
7723 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,vkrf
,vcrf
;
7724 vector
float krsq11
,krsq12
,krsq13
,krsq21
,krsq22
,krsq23
,krsq31
,krsq32
,krsq33
;
7725 vector
float qqOOt
,qqOHt
,qqHHt
;
7727 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
7728 int jnra
,jnrb
,jnrc
,jnrd
;
7729 int j3a
,j3b
,j3c
,j3d
;
7732 vfacel
=load_float_and_splat(&facel
);
7733 vkrf
=load_float_and_splat(&krf
);
7734 vcrf
=load_float_and_splat(&crf
);
7735 qO
= load_float_and_splat(charge
+iinr
[0]);
7736 qH
= load_float_and_splat(charge
+iinr
[0]+1);
7737 qqOO
= vec_madd(qO
,qO
,nul
);
7738 qqOH
= vec_madd(qO
,qH
,nul
);
7739 qqHH
= vec_madd(qH
,qH
,nul
);
7740 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
7741 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
7742 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
7744 for(n
=0;n
<nri
;n
++) {
7748 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
7749 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
7763 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
7772 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
7773 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
7774 dx11
= vec_sub(ix1
,jx1
);
7775 dx12
= vec_sub(ix1
,jx2
);
7776 dx13
= vec_sub(ix1
,jx3
);
7777 dy11
= vec_sub(iy1
,jy1
);
7778 dy12
= vec_sub(iy1
,jy2
);
7779 dy13
= vec_sub(iy1
,jy3
);
7780 dz11
= vec_sub(iz1
,jz1
);
7781 dz12
= vec_sub(iz1
,jz2
);
7782 dz13
= vec_sub(iz1
,jz3
);
7783 dx21
= vec_sub(ix2
,jx1
);
7784 dx22
= vec_sub(ix2
,jx2
);
7785 dx23
= vec_sub(ix2
,jx3
);
7786 dy21
= vec_sub(iy2
,jy1
);
7787 dy22
= vec_sub(iy2
,jy2
);
7788 dy23
= vec_sub(iy2
,jy3
);
7789 dz21
= vec_sub(iz2
,jz1
);
7790 dz22
= vec_sub(iz2
,jz2
);
7791 dz23
= vec_sub(iz2
,jz3
);
7792 dx31
= vec_sub(ix3
,jx1
);
7793 dx32
= vec_sub(ix3
,jx2
);
7794 dx33
= vec_sub(ix3
,jx3
);
7795 dy31
= vec_sub(iy3
,jy1
);
7796 dy32
= vec_sub(iy3
,jy2
);
7797 dy33
= vec_sub(iy3
,jy3
);
7798 dz31
= vec_sub(iz3
,jz1
);
7799 dz32
= vec_sub(iz3
,jz2
);
7800 dz33
= vec_sub(iz3
,jz3
);
7802 rsq11
= vec_madd(dx11
,dx11
,nul
);
7803 rsq12
= vec_madd(dx12
,dx12
,nul
);
7804 rsq13
= vec_madd(dx13
,dx13
,nul
);
7805 rsq21
= vec_madd(dx21
,dx21
,nul
);
7806 rsq22
= vec_madd(dx22
,dx22
,nul
);
7807 rsq23
= vec_madd(dx23
,dx23
,nul
);
7808 rsq31
= vec_madd(dx31
,dx31
,nul
);
7809 rsq32
= vec_madd(dx32
,dx32
,nul
);
7810 rsq33
= vec_madd(dx33
,dx33
,nul
);
7811 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
7812 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
7813 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
7814 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
7815 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
7816 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
7817 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
7818 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
7819 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
7820 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
7821 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
7822 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
7823 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
7824 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
7825 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
7826 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
7827 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
7828 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
7830 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
7833 &rinv11
,&rinv12
,&rinv13
,
7834 &rinv21
,&rinv22
,&rinv23
,
7835 &rinv31
,&rinv32
,&rinv33
);
7837 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
7838 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
7839 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
7840 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
7841 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
7842 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
7843 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
7844 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
7845 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
7847 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
7848 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
7849 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
7850 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
7851 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
7852 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
7853 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
7854 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
7855 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
7857 vc11
= vec_add(rinv11
,krsq11
);
7858 vc12
= vec_add(rinv12
,krsq12
);
7859 vc13
= vec_add(rinv13
,krsq13
);
7860 vc21
= vec_add(rinv21
,krsq21
);
7861 vc22
= vec_add(rinv22
,krsq22
);
7862 vc23
= vec_add(rinv23
,krsq23
);
7863 vc31
= vec_add(rinv31
,krsq31
);
7864 vc32
= vec_add(rinv32
,krsq32
);
7865 vc33
= vec_add(rinv33
,krsq33
);
7867 vc11
= vec_sub(vc11
,vcrf
);
7868 vc12
= vec_sub(vc12
,vcrf
);
7869 vc13
= vec_sub(vc13
,vcrf
);
7870 vc21
= vec_sub(vc21
,vcrf
);
7871 vc22
= vec_sub(vc22
,vcrf
);
7872 vc23
= vec_sub(vc23
,vcrf
);
7873 vc31
= vec_sub(vc31
,vcrf
);
7874 vc32
= vec_sub(vc32
,vcrf
);
7875 vc33
= vec_sub(vc33
,vcrf
);
7877 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
7878 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
7879 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
7880 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
7881 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
7882 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
7883 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
7884 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
7885 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
7887 fs11
= vec_madd(fs11
,qqOO
,nul
);
7888 fs12
= vec_madd(fs12
,qqOH
,nul
);
7889 fs13
= vec_madd(fs13
,qqOH
,nul
);
7890 fs21
= vec_madd(fs21
,qqOH
,nul
);
7891 fs22
= vec_madd(fs22
,qqHH
,nul
);
7892 fs23
= vec_madd(fs23
,qqHH
,nul
);
7893 fs31
= vec_madd(fs31
,qqOH
,nul
);
7894 fs32
= vec_madd(fs32
,qqHH
,nul
);
7895 fs33
= vec_madd(fs33
,qqHH
,nul
);
7897 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
7898 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
7899 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
7900 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
7901 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
7902 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
7903 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
7904 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
7905 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
7907 vctot
= vec_madd(qqOO
,vc11
,vctot
);
7908 vctot
= vec_madd(qqOH
,vc12
,vctot
);
7909 vctot
= vec_madd(qqOH
,vc13
,vctot
);
7910 vctot
= vec_madd(qqOH
,vc21
,vctot
);
7911 vctot
= vec_madd(qqHH
,vc22
,vctot
);
7912 vctot
= vec_madd(qqHH
,vc23
,vctot
);
7913 vctot
= vec_madd(qqOH
,vc31
,vctot
);
7914 vctot
= vec_madd(qqHH
,vc32
,vctot
);
7915 vctot
= vec_madd(qqHH
,vc33
,vctot
);
7917 fix1
= vec_madd(fs11
,dx11
,fix1
);
7918 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
7919 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
7920 fix2
= vec_madd(fs21
,dx21
,fix2
);
7921 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
7922 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
7923 fix3
= vec_madd(fs31
,dx31
,fix3
);
7924 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
7925 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
7927 fix1
= vec_madd(fs12
,dx12
,fix1
);
7928 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
7929 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
7930 fix2
= vec_madd(fs22
,dx22
,fix2
);
7931 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
7932 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
7933 fix3
= vec_madd(fs32
,dx32
,fix3
);
7934 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
7935 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
7937 fix1
= vec_madd(fs13
,dx13
,fix1
);
7938 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
7939 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
7940 fix2
= vec_madd(fs23
,dx23
,fix2
);
7941 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
7942 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
7943 fix3
= vec_madd(fs33
,dx33
,fix3
);
7944 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
7945 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
7947 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
7948 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
7949 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
7950 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
7951 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
7952 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
7953 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
7954 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
7955 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
7957 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
7958 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
7959 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
7960 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
7961 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
7962 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
7963 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
7964 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
7965 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
7967 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
7968 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
7969 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
7970 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
7971 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
7972 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
7973 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
7974 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
7975 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
7977 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
7978 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
7987 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
7988 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
7989 qqOOt
= vec_sld(qqOO
,nul
,4);
7990 qqOHt
= vec_sld(qqOH
,nul
,4);
7991 qqHHt
= vec_sld(qqHH
,nul
,4);
7993 dx11
= vec_sub(ix1
,jx1
);
7994 dx12
= vec_sub(ix1
,jx2
);
7995 dx13
= vec_sub(ix1
,jx3
);
7996 dy11
= vec_sub(iy1
,jy1
);
7997 dy12
= vec_sub(iy1
,jy2
);
7998 dy13
= vec_sub(iy1
,jy3
);
7999 dz11
= vec_sub(iz1
,jz1
);
8000 dz12
= vec_sub(iz1
,jz2
);
8001 dz13
= vec_sub(iz1
,jz3
);
8002 dx21
= vec_sub(ix2
,jx1
);
8003 dx22
= vec_sub(ix2
,jx2
);
8004 dx23
= vec_sub(ix2
,jx3
);
8005 dy21
= vec_sub(iy2
,jy1
);
8006 dy22
= vec_sub(iy2
,jy2
);
8007 dy23
= vec_sub(iy2
,jy3
);
8008 dz21
= vec_sub(iz2
,jz1
);
8009 dz22
= vec_sub(iz2
,jz2
);
8010 dz23
= vec_sub(iz2
,jz3
);
8011 dx31
= vec_sub(ix3
,jx1
);
8012 dx32
= vec_sub(ix3
,jx2
);
8013 dx33
= vec_sub(ix3
,jx3
);
8014 dy31
= vec_sub(iy3
,jy1
);
8015 dy32
= vec_sub(iy3
,jy2
);
8016 dy33
= vec_sub(iy3
,jy3
);
8017 dz31
= vec_sub(iz3
,jz1
);
8018 dz32
= vec_sub(iz3
,jz2
);
8019 dz33
= vec_sub(iz3
,jz3
);
8021 rsq11
= vec_madd(dx11
,dx11
,nul
);
8022 rsq12
= vec_madd(dx12
,dx12
,nul
);
8023 rsq13
= vec_madd(dx13
,dx13
,nul
);
8024 rsq21
= vec_madd(dx21
,dx21
,nul
);
8025 rsq22
= vec_madd(dx22
,dx22
,nul
);
8026 rsq23
= vec_madd(dx23
,dx23
,nul
);
8027 rsq31
= vec_madd(dx31
,dx31
,nul
);
8028 rsq32
= vec_madd(dx32
,dx32
,nul
);
8029 rsq33
= vec_madd(dx33
,dx33
,nul
);
8030 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
8031 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
8032 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
8033 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
8034 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
8035 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
8036 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
8037 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
8038 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
8039 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
8040 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
8041 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
8042 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
8043 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
8044 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
8045 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
8046 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
8047 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
8049 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
8050 &rsq21
,&rsq22
,&rsq23
,
8051 &rsq31
,&rsq32
,&rsq33
);
8053 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
8056 &rinv11
,&rinv12
,&rinv13
,
8057 &rinv21
,&rinv22
,&rinv23
,
8058 &rinv31
,&rinv32
,&rinv33
);
8060 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
8061 &rinv21
,&rinv22
,&rinv23
,
8062 &rinv31
,&rinv32
,&rinv33
);
8064 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
8065 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
8066 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
8067 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
8068 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
8069 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
8070 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
8071 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
8072 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
8074 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
8075 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
8076 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
8077 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
8078 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
8079 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
8080 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
8081 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
8082 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
8084 vc11
= vec_add(rinv11
,krsq11
);
8085 vc12
= vec_add(rinv12
,krsq12
);
8086 vc13
= vec_add(rinv13
,krsq13
);
8087 vc21
= vec_add(rinv21
,krsq21
);
8088 vc22
= vec_add(rinv22
,krsq22
);
8089 vc23
= vec_add(rinv23
,krsq23
);
8090 vc31
= vec_add(rinv31
,krsq31
);
8091 vc32
= vec_add(rinv32
,krsq32
);
8092 vc33
= vec_add(rinv33
,krsq33
);
8094 vc11
= vec_sub(vc11
,vcrf
);
8095 vc12
= vec_sub(vc12
,vcrf
);
8096 vc13
= vec_sub(vc13
,vcrf
);
8097 vc21
= vec_sub(vc21
,vcrf
);
8098 vc22
= vec_sub(vc22
,vcrf
);
8099 vc23
= vec_sub(vc23
,vcrf
);
8100 vc31
= vec_sub(vc31
,vcrf
);
8101 vc32
= vec_sub(vc32
,vcrf
);
8102 vc33
= vec_sub(vc33
,vcrf
);
8104 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
8105 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
8106 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
8107 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
8108 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
8109 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
8110 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
8111 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
8112 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
8114 fs11
= vec_madd(fs11
,qqOOt
,nul
);
8115 fs12
= vec_madd(fs12
,qqOHt
,nul
);
8116 fs13
= vec_madd(fs13
,qqOHt
,nul
);
8117 fs21
= vec_madd(fs21
,qqOHt
,nul
);
8118 fs22
= vec_madd(fs22
,qqHHt
,nul
);
8119 fs23
= vec_madd(fs23
,qqHHt
,nul
);
8120 fs31
= vec_madd(fs31
,qqOHt
,nul
);
8121 fs32
= vec_madd(fs32
,qqHHt
,nul
);
8122 fs33
= vec_madd(fs33
,qqHHt
,nul
);
8124 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
8125 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
8126 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
8127 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
8128 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
8129 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
8130 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
8131 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
8132 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
8134 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
8135 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
8136 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
8137 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
8138 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
8139 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
8140 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
8141 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
8142 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
8144 fix1
= vec_madd(fs11
,dx11
,fix1
);
8145 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
8146 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
8147 fix2
= vec_madd(fs21
,dx21
,fix2
);
8148 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
8149 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
8150 fix3
= vec_madd(fs31
,dx31
,fix3
);
8151 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
8152 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
8154 fix1
= vec_madd(fs12
,dx12
,fix1
);
8155 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
8156 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
8157 fix2
= vec_madd(fs22
,dx22
,fix2
);
8158 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
8159 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
8160 fix3
= vec_madd(fs32
,dx32
,fix3
);
8161 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
8162 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
8164 fix1
= vec_madd(fs13
,dx13
,fix1
);
8165 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
8166 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
8167 fix2
= vec_madd(fs23
,dx23
,fix2
);
8168 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
8169 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
8170 fix3
= vec_madd(fs33
,dx33
,fix3
);
8171 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
8172 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
8174 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
8175 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
8176 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
8177 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
8178 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
8179 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
8180 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
8181 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
8182 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
8184 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
8185 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
8186 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
8187 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
8188 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
8189 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
8190 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
8191 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
8192 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
8194 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
8195 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
8196 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
8197 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
8198 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
8199 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
8200 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
8201 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
8202 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
8204 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
8205 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
8206 } else if(k
<(nj1
-1)) {
8211 load_2_water(pos
+j3a
,pos
+j3b
,
8212 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
8213 qqOOt
= vec_sld(qqOO
,nul
,8);
8214 qqOHt
= vec_sld(qqOH
,nul
,8);
8215 qqHHt
= vec_sld(qqHH
,nul
,8);
8217 dx11
= vec_sub(ix1
,jx1
);
8218 dx12
= vec_sub(ix1
,jx2
);
8219 dx13
= vec_sub(ix1
,jx3
);
8220 dy11
= vec_sub(iy1
,jy1
);
8221 dy12
= vec_sub(iy1
,jy2
);
8222 dy13
= vec_sub(iy1
,jy3
);
8223 dz11
= vec_sub(iz1
,jz1
);
8224 dz12
= vec_sub(iz1
,jz2
);
8225 dz13
= vec_sub(iz1
,jz3
);
8226 dx21
= vec_sub(ix2
,jx1
);
8227 dx22
= vec_sub(ix2
,jx2
);
8228 dx23
= vec_sub(ix2
,jx3
);
8229 dy21
= vec_sub(iy2
,jy1
);
8230 dy22
= vec_sub(iy2
,jy2
);
8231 dy23
= vec_sub(iy2
,jy3
);
8232 dz21
= vec_sub(iz2
,jz1
);
8233 dz22
= vec_sub(iz2
,jz2
);
8234 dz23
= vec_sub(iz2
,jz3
);
8235 dx31
= vec_sub(ix3
,jx1
);
8236 dx32
= vec_sub(ix3
,jx2
);
8237 dx33
= vec_sub(ix3
,jx3
);
8238 dy31
= vec_sub(iy3
,jy1
);
8239 dy32
= vec_sub(iy3
,jy2
);
8240 dy33
= vec_sub(iy3
,jy3
);
8241 dz31
= vec_sub(iz3
,jz1
);
8242 dz32
= vec_sub(iz3
,jz2
);
8243 dz33
= vec_sub(iz3
,jz3
);
8245 rsq11
= vec_madd(dx11
,dx11
,nul
);
8246 rsq12
= vec_madd(dx12
,dx12
,nul
);
8247 rsq13
= vec_madd(dx13
,dx13
,nul
);
8248 rsq21
= vec_madd(dx21
,dx21
,nul
);
8249 rsq22
= vec_madd(dx22
,dx22
,nul
);
8250 rsq23
= vec_madd(dx23
,dx23
,nul
);
8251 rsq31
= vec_madd(dx31
,dx31
,nul
);
8252 rsq32
= vec_madd(dx32
,dx32
,nul
);
8253 rsq33
= vec_madd(dx33
,dx33
,nul
);
8254 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
8255 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
8256 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
8257 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
8258 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
8259 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
8260 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
8261 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
8262 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
8263 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
8264 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
8265 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
8266 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
8267 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
8268 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
8269 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
8270 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
8271 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
8273 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
8274 &rsq21
,&rsq22
,&rsq23
,
8275 &rsq31
,&rsq32
,&rsq33
);
8277 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
8280 &rinv11
,&rinv12
,&rinv13
,
8281 &rinv21
,&rinv22
,&rinv23
,
8282 &rinv31
,&rinv32
,&rinv33
);
8284 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
8285 &rinv21
,&rinv22
,&rinv23
,
8286 &rinv31
,&rinv32
,&rinv33
);
8288 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
8289 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
8290 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
8291 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
8292 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
8293 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
8294 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
8295 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
8296 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
8298 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
8299 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
8300 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
8301 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
8302 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
8303 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
8304 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
8305 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
8306 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
8308 vc11
= vec_add(rinv11
,krsq11
);
8309 vc12
= vec_add(rinv12
,krsq12
);
8310 vc13
= vec_add(rinv13
,krsq13
);
8311 vc21
= vec_add(rinv21
,krsq21
);
8312 vc22
= vec_add(rinv22
,krsq22
);
8313 vc23
= vec_add(rinv23
,krsq23
);
8314 vc31
= vec_add(rinv31
,krsq31
);
8315 vc32
= vec_add(rinv32
,krsq32
);
8316 vc33
= vec_add(rinv33
,krsq33
);
8318 vc11
= vec_sub(vc11
,vcrf
);
8319 vc12
= vec_sub(vc12
,vcrf
);
8320 vc13
= vec_sub(vc13
,vcrf
);
8321 vc21
= vec_sub(vc21
,vcrf
);
8322 vc22
= vec_sub(vc22
,vcrf
);
8323 vc23
= vec_sub(vc23
,vcrf
);
8324 vc31
= vec_sub(vc31
,vcrf
);
8325 vc32
= vec_sub(vc32
,vcrf
);
8326 vc33
= vec_sub(vc33
,vcrf
);
8328 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
8329 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
8330 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
8331 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
8332 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
8333 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
8334 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
8335 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
8336 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
8338 fs11
= vec_madd(fs11
,qqOOt
,nul
);
8339 fs12
= vec_madd(fs12
,qqOHt
,nul
);
8340 fs13
= vec_madd(fs13
,qqOHt
,nul
);
8341 fs21
= vec_madd(fs21
,qqOHt
,nul
);
8342 fs22
= vec_madd(fs22
,qqHHt
,nul
);
8343 fs23
= vec_madd(fs23
,qqHHt
,nul
);
8344 fs31
= vec_madd(fs31
,qqOHt
,nul
);
8345 fs32
= vec_madd(fs32
,qqHHt
,nul
);
8346 fs33
= vec_madd(fs33
,qqHHt
,nul
);
8348 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
8349 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
8350 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
8351 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
8352 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
8353 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
8354 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
8355 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
8356 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
8358 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
8359 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
8360 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
8361 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
8362 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
8363 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
8364 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
8365 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
8366 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
8368 fix1
= vec_madd(fs11
,dx11
,fix1
);
8369 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
8370 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
8371 fix2
= vec_madd(fs21
,dx21
,fix2
);
8372 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
8373 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
8374 fix3
= vec_madd(fs31
,dx31
,fix3
);
8375 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
8376 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
8378 fix1
= vec_madd(fs12
,dx12
,fix1
);
8379 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
8380 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
8381 fix2
= vec_madd(fs22
,dx22
,fix2
);
8382 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
8383 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
8384 fix3
= vec_madd(fs32
,dx32
,fix3
);
8385 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
8386 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
8388 fix1
= vec_madd(fs13
,dx13
,fix1
);
8389 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
8390 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
8391 fix2
= vec_madd(fs23
,dx23
,fix2
);
8392 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
8393 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
8394 fix3
= vec_madd(fs33
,dx33
,fix3
);
8395 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
8396 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
8398 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
8399 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
8400 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
8401 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
8402 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
8403 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
8404 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
8405 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
8406 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
8408 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
8409 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
8410 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
8411 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
8412 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
8413 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
8414 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
8415 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
8416 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
8418 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
8419 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
8420 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
8421 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
8422 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
8423 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
8424 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
8425 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
8426 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
8428 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
8429 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
8433 load_1_water(pos
+j3a
,
8434 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
8435 qqOOt
= vec_sld(qqOO
,nul
,12);
8436 qqOHt
= vec_sld(qqOH
,nul
,12);
8437 qqHHt
= vec_sld(qqHH
,nul
,12);
8439 dx11
= vec_sub(ix1
,jx1
);
8440 dx12
= vec_sub(ix1
,jx2
);
8441 dx13
= vec_sub(ix1
,jx3
);
8442 dy11
= vec_sub(iy1
,jy1
);
8443 dy12
= vec_sub(iy1
,jy2
);
8444 dy13
= vec_sub(iy1
,jy3
);
8445 dz11
= vec_sub(iz1
,jz1
);
8446 dz12
= vec_sub(iz1
,jz2
);
8447 dz13
= vec_sub(iz1
,jz3
);
8448 dx21
= vec_sub(ix2
,jx1
);
8449 dx22
= vec_sub(ix2
,jx2
);
8450 dx23
= vec_sub(ix2
,jx3
);
8451 dy21
= vec_sub(iy2
,jy1
);
8452 dy22
= vec_sub(iy2
,jy2
);
8453 dy23
= vec_sub(iy2
,jy3
);
8454 dz21
= vec_sub(iz2
,jz1
);
8455 dz22
= vec_sub(iz2
,jz2
);
8456 dz23
= vec_sub(iz2
,jz3
);
8457 dx31
= vec_sub(ix3
,jx1
);
8458 dx32
= vec_sub(ix3
,jx2
);
8459 dx33
= vec_sub(ix3
,jx3
);
8460 dy31
= vec_sub(iy3
,jy1
);
8461 dy32
= vec_sub(iy3
,jy2
);
8462 dy33
= vec_sub(iy3
,jy3
);
8463 dz31
= vec_sub(iz3
,jz1
);
8464 dz32
= vec_sub(iz3
,jz2
);
8465 dz33
= vec_sub(iz3
,jz3
);
8467 rsq11
= vec_madd(dx11
,dx11
,nul
);
8468 rsq12
= vec_madd(dx12
,dx12
,nul
);
8469 rsq13
= vec_madd(dx13
,dx13
,nul
);
8470 rsq21
= vec_madd(dx21
,dx21
,nul
);
8471 rsq22
= vec_madd(dx22
,dx22
,nul
);
8472 rsq23
= vec_madd(dx23
,dx23
,nul
);
8473 rsq31
= vec_madd(dx31
,dx31
,nul
);
8474 rsq32
= vec_madd(dx32
,dx32
,nul
);
8475 rsq33
= vec_madd(dx33
,dx33
,nul
);
8476 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
8477 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
8478 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
8479 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
8480 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
8481 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
8482 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
8483 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
8484 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
8485 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
8486 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
8487 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
8488 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
8489 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
8490 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
8491 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
8492 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
8493 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
8495 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
8496 &rsq21
,&rsq22
,&rsq23
,
8497 &rsq31
,&rsq32
,&rsq33
);
8499 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
8502 &rinv11
,&rinv12
,&rinv13
,
8503 &rinv21
,&rinv22
,&rinv23
,
8504 &rinv31
,&rinv32
,&rinv33
);
8506 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
8507 &rinv21
,&rinv22
,&rinv23
,
8508 &rinv31
,&rinv32
,&rinv33
);
8510 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
8511 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
8512 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
8513 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
8514 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
8515 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
8516 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
8517 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
8518 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
8520 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
8521 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
8522 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
8523 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
8524 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
8525 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
8526 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
8527 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
8528 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
8530 vc11
= vec_add(rinv11
,krsq11
);
8531 vc12
= vec_add(rinv12
,krsq12
);
8532 vc13
= vec_add(rinv13
,krsq13
);
8533 vc21
= vec_add(rinv21
,krsq21
);
8534 vc22
= vec_add(rinv22
,krsq22
);
8535 vc23
= vec_add(rinv23
,krsq23
);
8536 vc31
= vec_add(rinv31
,krsq31
);
8537 vc32
= vec_add(rinv32
,krsq32
);
8538 vc33
= vec_add(rinv33
,krsq33
);
8540 vc11
= vec_sub(vc11
,vcrf
);
8541 vc12
= vec_sub(vc12
,vcrf
);
8542 vc13
= vec_sub(vc13
,vcrf
);
8543 vc21
= vec_sub(vc21
,vcrf
);
8544 vc22
= vec_sub(vc22
,vcrf
);
8545 vc23
= vec_sub(vc23
,vcrf
);
8546 vc31
= vec_sub(vc31
,vcrf
);
8547 vc32
= vec_sub(vc32
,vcrf
);
8548 vc33
= vec_sub(vc33
,vcrf
);
8550 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
8551 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
8552 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
8553 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
8554 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
8555 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
8556 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
8557 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
8558 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
8560 fs11
= vec_madd(fs11
,qqOOt
,nul
);
8561 fs12
= vec_madd(fs12
,qqOHt
,nul
);
8562 fs13
= vec_madd(fs13
,qqOHt
,nul
);
8563 fs21
= vec_madd(fs21
,qqOHt
,nul
);
8564 fs22
= vec_madd(fs22
,qqHHt
,nul
);
8565 fs23
= vec_madd(fs23
,qqHHt
,nul
);
8566 fs31
= vec_madd(fs31
,qqOHt
,nul
);
8567 fs32
= vec_madd(fs32
,qqHHt
,nul
);
8568 fs33
= vec_madd(fs33
,qqHHt
,nul
);
8570 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
8571 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
8572 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
8573 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
8574 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
8575 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
8576 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
8577 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
8578 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
8580 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
8581 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
8582 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
8583 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
8584 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
8585 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
8586 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
8587 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
8588 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
8590 fix1
= vec_madd(fs11
,dx11
,fix1
);
8591 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
8592 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
8593 fix2
= vec_madd(fs21
,dx21
,fix2
);
8594 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
8595 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
8596 fix3
= vec_madd(fs31
,dx31
,fix3
);
8597 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
8598 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
8600 fix1
= vec_madd(fs12
,dx12
,fix1
);
8601 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
8602 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
8603 fix2
= vec_madd(fs22
,dx22
,fix2
);
8604 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
8605 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
8606 fix3
= vec_madd(fs32
,dx32
,fix3
);
8607 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
8608 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
8610 fix1
= vec_madd(fs13
,dx13
,fix1
);
8611 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
8612 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
8613 fix2
= vec_madd(fs23
,dx23
,fix2
);
8614 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
8615 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
8616 fix3
= vec_madd(fs33
,dx33
,fix3
);
8617 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
8618 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
8620 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
8621 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
8622 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
8623 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
8624 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
8625 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
8626 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
8627 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
8628 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
8630 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
8631 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
8632 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
8633 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
8634 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
8635 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
8636 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
8637 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
8638 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
8640 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
8641 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
8642 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
8643 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
8644 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
8645 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
8646 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
8647 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
8648 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
8650 add_force_to_1_water(faction
+j3a
,
8651 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
8653 /* update outer data */
8654 update_i_water_forces(faction
+ii3
,fshift
+is3
,
8655 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
8657 add_vector_to_float(Vc
+gid
[n
],vctot
);
8663 void inl2130_altivec(
8684 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
8685 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
8687 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
8688 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
8689 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
8691 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
8692 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
8693 vector
float rinvsq11
,rinvsq12
,rinvsq13
;
8694 vector
float rinvsq21
,rinvsq22
,rinvsq23
;
8695 vector
float rinvsq31
,rinvsq32
,rinvsq33
;
8696 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
,vkrf
,vcrf
;
8697 vector
float krsq11
,krsq12
,krsq13
,krsq21
,krsq22
,krsq23
,krsq31
,krsq32
,krsq33
;
8699 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
8700 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
;
8701 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
8702 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
8703 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
,rinvsix
;
8704 vector
float vnb6
,vnb12
,vnbtot
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
8706 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
8707 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
8708 int j3a
,j3b
,j3c
,j3d
;
8711 vfacel
=load_float_and_splat(&facel
);
8712 vkrf
=load_float_and_splat(&krf
);
8713 vcrf
=load_float_and_splat(&crf
);
8715 qO
= load_float_and_splat(charge
+ii
);
8716 qH
= load_float_and_splat(charge
+ii
+1);
8717 qqOO
= vec_madd(qO
,qO
,nul
);
8718 qqOH
= vec_madd(qO
,qH
,nul
);
8719 qqHH
= vec_madd(qH
,qH
,nul
);
8720 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
8721 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
8722 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
8725 load_1_pair(nbfp
+tj
,&c6
,&c12
);
8726 c6
= vec_splat(c6
,0);
8727 c12
= vec_splat(c12
,0);
8729 for(n
=0;n
<nri
;n
++) {
8733 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
8734 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
8749 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
8758 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
8759 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
8760 dx11
= vec_sub(ix1
,jx1
);
8761 dx12
= vec_sub(ix1
,jx2
);
8762 dx13
= vec_sub(ix1
,jx3
);
8763 dy11
= vec_sub(iy1
,jy1
);
8764 dy12
= vec_sub(iy1
,jy2
);
8765 dy13
= vec_sub(iy1
,jy3
);
8766 dz11
= vec_sub(iz1
,jz1
);
8767 dz12
= vec_sub(iz1
,jz2
);
8768 dz13
= vec_sub(iz1
,jz3
);
8769 dx21
= vec_sub(ix2
,jx1
);
8770 dx22
= vec_sub(ix2
,jx2
);
8771 dx23
= vec_sub(ix2
,jx3
);
8772 dy21
= vec_sub(iy2
,jy1
);
8773 dy22
= vec_sub(iy2
,jy2
);
8774 dy23
= vec_sub(iy2
,jy3
);
8775 dz21
= vec_sub(iz2
,jz1
);
8776 dz22
= vec_sub(iz2
,jz2
);
8777 dz23
= vec_sub(iz2
,jz3
);
8778 dx31
= vec_sub(ix3
,jx1
);
8779 dx32
= vec_sub(ix3
,jx2
);
8780 dx33
= vec_sub(ix3
,jx3
);
8781 dy31
= vec_sub(iy3
,jy1
);
8782 dy32
= vec_sub(iy3
,jy2
);
8783 dy33
= vec_sub(iy3
,jy3
);
8784 dz31
= vec_sub(iz3
,jz1
);
8785 dz32
= vec_sub(iz3
,jz2
);
8786 dz33
= vec_sub(iz3
,jz3
);
8788 rsq11
= vec_madd(dx11
,dx11
,nul
);
8789 rsq12
= vec_madd(dx12
,dx12
,nul
);
8790 rsq13
= vec_madd(dx13
,dx13
,nul
);
8791 rsq21
= vec_madd(dx21
,dx21
,nul
);
8792 rsq22
= vec_madd(dx22
,dx22
,nul
);
8793 rsq23
= vec_madd(dx23
,dx23
,nul
);
8794 rsq31
= vec_madd(dx31
,dx31
,nul
);
8795 rsq32
= vec_madd(dx32
,dx32
,nul
);
8796 rsq33
= vec_madd(dx33
,dx33
,nul
);
8797 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
8798 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
8799 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
8800 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
8801 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
8802 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
8803 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
8804 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
8805 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
8806 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
8807 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
8808 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
8809 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
8810 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
8811 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
8812 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
8813 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
8814 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
8816 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
8819 &rinv11
,&rinv12
,&rinv13
,
8820 &rinv21
,&rinv22
,&rinv23
,
8821 &rinv31
,&rinv32
,&rinv33
);
8823 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
8824 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
8825 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
8826 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
8827 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
8828 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
8829 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
8830 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
8831 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
8833 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
8834 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
8835 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
8836 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
8837 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
8838 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
8839 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
8840 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
8841 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
8842 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
8843 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
8845 vnb6
= vec_madd(c6
,rinvsix
,nul
);
8846 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
8847 vnbtot
= vec_add(vnbtot
,vnb12
);
8848 vnbtot
= vec_sub(vnbtot
,vnb6
);
8850 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
8851 vc11
= vec_add(rinv11
,krsq11
);
8852 vc12
= vec_add(rinv12
,krsq12
);
8853 vc13
= vec_add(rinv13
,krsq13
);
8854 vc21
= vec_add(rinv21
,krsq21
);
8855 vc22
= vec_add(rinv22
,krsq22
);
8856 vc23
= vec_add(rinv23
,krsq23
);
8857 vc31
= vec_add(rinv31
,krsq31
);
8858 vc32
= vec_add(rinv32
,krsq32
);
8859 vc33
= vec_add(rinv33
,krsq33
);
8861 fs11
= vec_madd(qqOO
,fs11
,nul
);
8862 vc11
= vec_sub(vc11
,vcrf
);
8863 vc12
= vec_sub(vc12
,vcrf
);
8864 vc13
= vec_sub(vc13
,vcrf
);
8865 vc21
= vec_sub(vc21
,vcrf
);
8866 vc22
= vec_sub(vc22
,vcrf
);
8867 vc23
= vec_sub(vc23
,vcrf
);
8868 vc31
= vec_sub(vc31
,vcrf
);
8869 vc32
= vec_sub(vc32
,vcrf
);
8870 vc33
= vec_sub(vc33
,vcrf
);
8872 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
8873 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
8874 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
8875 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
8876 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
8877 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
8878 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
8879 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
8880 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
8882 fs11
= vec_madd(vec_twelve(),vnb12
,fs11
);
8883 fs12
= vec_madd(fs12
,qqOH
,nul
);
8884 fs13
= vec_madd(fs13
,qqOH
,nul
);
8885 fs21
= vec_madd(fs21
,qqOH
,nul
);
8886 fs22
= vec_madd(fs22
,qqHH
,nul
);
8887 fs23
= vec_madd(fs23
,qqHH
,nul
);
8888 fs31
= vec_madd(fs31
,qqOH
,nul
);
8889 fs32
= vec_madd(fs32
,qqHH
,nul
);
8890 fs33
= vec_madd(fs33
,qqHH
,nul
);
8892 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
8893 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
8894 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
8895 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
8896 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
8897 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
8898 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
8899 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
8900 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
8902 vctot
= vec_madd(qqOO
,vc11
,vctot
);
8903 vctot
= vec_madd(qqOH
,vc12
,vctot
);
8904 vctot
= vec_madd(qqOH
,vc13
,vctot
);
8905 vctot
= vec_madd(qqOH
,vc21
,vctot
);
8906 vctot
= vec_madd(qqHH
,vc22
,vctot
);
8907 vctot
= vec_madd(qqHH
,vc23
,vctot
);
8908 vctot
= vec_madd(qqOH
,vc31
,vctot
);
8909 vctot
= vec_madd(qqHH
,vc32
,vctot
);
8910 vctot
= vec_madd(qqHH
,vc33
,vctot
);
8912 fix1
= vec_madd(fs11
,dx11
,fix1
);
8913 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
8914 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
8915 fix2
= vec_madd(fs21
,dx21
,fix2
);
8916 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
8917 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
8918 fix3
= vec_madd(fs31
,dx31
,fix3
);
8919 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
8920 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
8922 fix1
= vec_madd(fs12
,dx12
,fix1
);
8923 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
8924 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
8925 fix2
= vec_madd(fs22
,dx22
,fix2
);
8926 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
8927 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
8928 fix3
= vec_madd(fs32
,dx32
,fix3
);
8929 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
8930 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
8932 fix1
= vec_madd(fs13
,dx13
,fix1
);
8933 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
8934 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
8935 fix2
= vec_madd(fs23
,dx23
,fix2
);
8936 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
8937 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
8938 fix3
= vec_madd(fs33
,dx33
,fix3
);
8939 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
8940 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
8942 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
8943 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
8944 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
8945 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
8946 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
8947 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
8948 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
8949 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
8950 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
8952 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
8953 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
8954 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
8955 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
8956 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
8957 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
8958 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
8959 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
8960 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
8962 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
8963 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
8964 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
8965 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
8966 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
8967 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
8968 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
8969 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
8970 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
8972 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
8973 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
8982 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
8983 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
8984 qqOOt
= vec_sld(qqOO
,nul
,4);
8985 qqOHt
= vec_sld(qqOH
,nul
,4);
8986 qqHHt
= vec_sld(qqHH
,nul
,4);
8987 c6t
= vec_sld(c6
,nul
,4);
8988 c12t
= vec_sld(c12
,nul
,4);
8990 dx11
= vec_sub(ix1
,jx1
);
8991 dx12
= vec_sub(ix1
,jx2
);
8992 dx13
= vec_sub(ix1
,jx3
);
8993 dy11
= vec_sub(iy1
,jy1
);
8994 dy12
= vec_sub(iy1
,jy2
);
8995 dy13
= vec_sub(iy1
,jy3
);
8996 dz11
= vec_sub(iz1
,jz1
);
8997 dz12
= vec_sub(iz1
,jz2
);
8998 dz13
= vec_sub(iz1
,jz3
);
8999 dx21
= vec_sub(ix2
,jx1
);
9000 dx22
= vec_sub(ix2
,jx2
);
9001 dx23
= vec_sub(ix2
,jx3
);
9002 dy21
= vec_sub(iy2
,jy1
);
9003 dy22
= vec_sub(iy2
,jy2
);
9004 dy23
= vec_sub(iy2
,jy3
);
9005 dz21
= vec_sub(iz2
,jz1
);
9006 dz22
= vec_sub(iz2
,jz2
);
9007 dz23
= vec_sub(iz2
,jz3
);
9008 dx31
= vec_sub(ix3
,jx1
);
9009 dx32
= vec_sub(ix3
,jx2
);
9010 dx33
= vec_sub(ix3
,jx3
);
9011 dy31
= vec_sub(iy3
,jy1
);
9012 dy32
= vec_sub(iy3
,jy2
);
9013 dy33
= vec_sub(iy3
,jy3
);
9014 dz31
= vec_sub(iz3
,jz1
);
9015 dz32
= vec_sub(iz3
,jz2
);
9016 dz33
= vec_sub(iz3
,jz3
);
9018 rsq11
= vec_madd(dx11
,dx11
,nul
);
9019 rsq12
= vec_madd(dx12
,dx12
,nul
);
9020 rsq13
= vec_madd(dx13
,dx13
,nul
);
9021 rsq21
= vec_madd(dx21
,dx21
,nul
);
9022 rsq22
= vec_madd(dx22
,dx22
,nul
);
9023 rsq23
= vec_madd(dx23
,dx23
,nul
);
9024 rsq31
= vec_madd(dx31
,dx31
,nul
);
9025 rsq32
= vec_madd(dx32
,dx32
,nul
);
9026 rsq33
= vec_madd(dx33
,dx33
,nul
);
9027 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
9028 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
9029 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
9030 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
9031 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
9032 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
9033 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
9034 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
9035 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
9036 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
9037 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
9038 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
9039 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
9040 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
9041 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
9042 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
9043 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
9044 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
9046 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
9047 &rsq21
,&rsq22
,&rsq23
,
9048 &rsq31
,&rsq32
,&rsq33
);
9050 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
9053 &rinv11
,&rinv12
,&rinv13
,
9054 &rinv21
,&rinv22
,&rinv23
,
9055 &rinv31
,&rinv32
,&rinv33
);
9057 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
9058 &rinv21
,&rinv22
,&rinv23
,
9059 &rinv31
,&rinv32
,&rinv33
);
9061 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
9062 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
9063 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
9064 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
9065 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
9066 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
9067 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
9068 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
9069 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
9071 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
9072 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
9073 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
9074 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
9075 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
9076 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
9077 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
9078 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
9079 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
9080 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
9081 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
9083 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
9084 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
9085 vnbtot
= vec_add(vnbtot
,vnb12
);
9086 vnbtot
= vec_sub(vnbtot
,vnb6
);
9088 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
9089 vc11
= vec_add(rinv11
,krsq11
);
9090 vc12
= vec_add(rinv12
,krsq12
);
9091 vc13
= vec_add(rinv13
,krsq13
);
9092 vc21
= vec_add(rinv21
,krsq21
);
9093 vc22
= vec_add(rinv22
,krsq22
);
9094 vc23
= vec_add(rinv23
,krsq23
);
9095 vc31
= vec_add(rinv31
,krsq31
);
9096 vc32
= vec_add(rinv32
,krsq32
);
9097 vc33
= vec_add(rinv33
,krsq33
);
9099 fs11
= vec_madd(qqOOt
,fs11
,nul
);
9100 vc11
= vec_sub(vc11
,vcrf
);
9101 vc12
= vec_sub(vc12
,vcrf
);
9102 vc13
= vec_sub(vc13
,vcrf
);
9103 vc21
= vec_sub(vc21
,vcrf
);
9104 vc22
= vec_sub(vc22
,vcrf
);
9105 vc23
= vec_sub(vc23
,vcrf
);
9106 vc31
= vec_sub(vc31
,vcrf
);
9107 vc32
= vec_sub(vc32
,vcrf
);
9108 vc33
= vec_sub(vc33
,vcrf
);
9110 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
9111 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
9112 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
9113 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
9114 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
9115 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
9116 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
9117 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
9118 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
9120 fs11
= vec_madd(vec_twelve(),vnb12
,fs11
);
9121 fs12
= vec_madd(fs12
,qqOHt
,nul
);
9122 fs13
= vec_madd(fs13
,qqOHt
,nul
);
9123 fs21
= vec_madd(fs21
,qqOHt
,nul
);
9124 fs22
= vec_madd(fs22
,qqHHt
,nul
);
9125 fs23
= vec_madd(fs23
,qqHHt
,nul
);
9126 fs31
= vec_madd(fs31
,qqOHt
,nul
);
9127 fs32
= vec_madd(fs32
,qqHHt
,nul
);
9128 fs33
= vec_madd(fs33
,qqHHt
,nul
);
9130 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
9131 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
9132 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
9133 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
9134 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
9135 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
9136 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
9137 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
9138 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
9140 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
9141 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
9142 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
9143 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
9144 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
9145 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
9146 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
9147 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
9148 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
9150 fix1
= vec_madd(fs11
,dx11
,fix1
);
9151 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
9152 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
9153 fix2
= vec_madd(fs21
,dx21
,fix2
);
9154 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
9155 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
9156 fix3
= vec_madd(fs31
,dx31
,fix3
);
9157 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
9158 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
9160 fix1
= vec_madd(fs12
,dx12
,fix1
);
9161 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
9162 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
9163 fix2
= vec_madd(fs22
,dx22
,fix2
);
9164 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
9165 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
9166 fix3
= vec_madd(fs32
,dx32
,fix3
);
9167 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
9168 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
9170 fix1
= vec_madd(fs13
,dx13
,fix1
);
9171 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
9172 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
9173 fix2
= vec_madd(fs23
,dx23
,fix2
);
9174 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
9175 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
9176 fix3
= vec_madd(fs33
,dx33
,fix3
);
9177 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
9178 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
9180 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
9181 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
9182 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
9183 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
9184 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
9185 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
9186 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
9187 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
9188 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
9190 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
9191 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
9192 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
9193 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
9194 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
9195 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
9196 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
9197 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
9198 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
9200 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
9201 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
9202 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
9203 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
9204 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
9205 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
9206 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
9207 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
9208 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
9210 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
9211 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
9212 } else if(k
<(nj1
-1)) {
9217 load_2_water(pos
+j3a
,pos
+j3b
,
9218 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
9219 qqOOt
= vec_sld(qqOO
,nul
,8);
9220 qqOHt
= vec_sld(qqOH
,nul
,8);
9221 qqHHt
= vec_sld(qqHH
,nul
,8);
9222 c6t
= vec_sld(c6
,nul
,8);
9223 c12t
= vec_sld(c12
,nul
,8);
9225 dx11
= vec_sub(ix1
,jx1
);
9226 dx12
= vec_sub(ix1
,jx2
);
9227 dx13
= vec_sub(ix1
,jx3
);
9228 dy11
= vec_sub(iy1
,jy1
);
9229 dy12
= vec_sub(iy1
,jy2
);
9230 dy13
= vec_sub(iy1
,jy3
);
9231 dz11
= vec_sub(iz1
,jz1
);
9232 dz12
= vec_sub(iz1
,jz2
);
9233 dz13
= vec_sub(iz1
,jz3
);
9234 dx21
= vec_sub(ix2
,jx1
);
9235 dx22
= vec_sub(ix2
,jx2
);
9236 dx23
= vec_sub(ix2
,jx3
);
9237 dy21
= vec_sub(iy2
,jy1
);
9238 dy22
= vec_sub(iy2
,jy2
);
9239 dy23
= vec_sub(iy2
,jy3
);
9240 dz21
= vec_sub(iz2
,jz1
);
9241 dz22
= vec_sub(iz2
,jz2
);
9242 dz23
= vec_sub(iz2
,jz3
);
9243 dx31
= vec_sub(ix3
,jx1
);
9244 dx32
= vec_sub(ix3
,jx2
);
9245 dx33
= vec_sub(ix3
,jx3
);
9246 dy31
= vec_sub(iy3
,jy1
);
9247 dy32
= vec_sub(iy3
,jy2
);
9248 dy33
= vec_sub(iy3
,jy3
);
9249 dz31
= vec_sub(iz3
,jz1
);
9250 dz32
= vec_sub(iz3
,jz2
);
9251 dz33
= vec_sub(iz3
,jz3
);
9253 rsq11
= vec_madd(dx11
,dx11
,nul
);
9254 rsq12
= vec_madd(dx12
,dx12
,nul
);
9255 rsq13
= vec_madd(dx13
,dx13
,nul
);
9256 rsq21
= vec_madd(dx21
,dx21
,nul
);
9257 rsq22
= vec_madd(dx22
,dx22
,nul
);
9258 rsq23
= vec_madd(dx23
,dx23
,nul
);
9259 rsq31
= vec_madd(dx31
,dx31
,nul
);
9260 rsq32
= vec_madd(dx32
,dx32
,nul
);
9261 rsq33
= vec_madd(dx33
,dx33
,nul
);
9262 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
9263 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
9264 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
9265 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
9266 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
9267 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
9268 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
9269 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
9270 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
9271 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
9272 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
9273 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
9274 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
9275 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
9276 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
9277 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
9278 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
9279 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
9281 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
9282 &rsq21
,&rsq22
,&rsq23
,
9283 &rsq31
,&rsq32
,&rsq33
);
9285 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
9288 &rinv11
,&rinv12
,&rinv13
,
9289 &rinv21
,&rinv22
,&rinv23
,
9290 &rinv31
,&rinv32
,&rinv33
);
9292 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
9293 &rinv21
,&rinv22
,&rinv23
,
9294 &rinv31
,&rinv32
,&rinv33
);
9296 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
9297 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
9298 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
9299 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
9300 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
9301 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
9302 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
9303 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
9304 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
9306 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
9307 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
9308 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
9309 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
9310 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
9311 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
9312 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
9313 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
9314 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
9315 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
9316 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
9318 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
9319 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
9320 vnbtot
= vec_add(vnbtot
,vnb12
);
9321 vnbtot
= vec_sub(vnbtot
,vnb6
);
9323 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
9324 vc11
= vec_add(rinv11
,krsq11
);
9325 vc12
= vec_add(rinv12
,krsq12
);
9326 vc13
= vec_add(rinv13
,krsq13
);
9327 vc21
= vec_add(rinv21
,krsq21
);
9328 vc22
= vec_add(rinv22
,krsq22
);
9329 vc23
= vec_add(rinv23
,krsq23
);
9330 vc31
= vec_add(rinv31
,krsq31
);
9331 vc32
= vec_add(rinv32
,krsq32
);
9332 vc33
= vec_add(rinv33
,krsq33
);
9334 fs11
= vec_madd(qqOOt
,fs11
,nul
);
9335 vc11
= vec_sub(vc11
,vcrf
);
9336 vc12
= vec_sub(vc12
,vcrf
);
9337 vc13
= vec_sub(vc13
,vcrf
);
9338 vc21
= vec_sub(vc21
,vcrf
);
9339 vc22
= vec_sub(vc22
,vcrf
);
9340 vc23
= vec_sub(vc23
,vcrf
);
9341 vc31
= vec_sub(vc31
,vcrf
);
9342 vc32
= vec_sub(vc32
,vcrf
);
9343 vc33
= vec_sub(vc33
,vcrf
);
9345 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
9346 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
9347 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
9348 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
9349 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
9350 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
9351 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
9352 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
9353 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
9355 fs11
= vec_madd(vec_twelve(),vnb12
,fs11
);
9356 fs12
= vec_madd(fs12
,qqOHt
,nul
);
9357 fs13
= vec_madd(fs13
,qqOHt
,nul
);
9358 fs21
= vec_madd(fs21
,qqOHt
,nul
);
9359 fs22
= vec_madd(fs22
,qqHHt
,nul
);
9360 fs23
= vec_madd(fs23
,qqHHt
,nul
);
9361 fs31
= vec_madd(fs31
,qqOHt
,nul
);
9362 fs32
= vec_madd(fs32
,qqHHt
,nul
);
9363 fs33
= vec_madd(fs33
,qqHHt
,nul
);
9365 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
9366 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
9367 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
9368 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
9369 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
9370 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
9371 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
9372 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
9373 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
9375 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
9376 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
9377 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
9378 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
9379 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
9380 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
9381 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
9382 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
9383 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
9385 fix1
= vec_madd(fs11
,dx11
,fix1
);
9386 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
9387 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
9388 fix2
= vec_madd(fs21
,dx21
,fix2
);
9389 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
9390 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
9391 fix3
= vec_madd(fs31
,dx31
,fix3
);
9392 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
9393 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
9395 fix1
= vec_madd(fs12
,dx12
,fix1
);
9396 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
9397 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
9398 fix2
= vec_madd(fs22
,dx22
,fix2
);
9399 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
9400 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
9401 fix3
= vec_madd(fs32
,dx32
,fix3
);
9402 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
9403 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
9405 fix1
= vec_madd(fs13
,dx13
,fix1
);
9406 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
9407 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
9408 fix2
= vec_madd(fs23
,dx23
,fix2
);
9409 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
9410 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
9411 fix3
= vec_madd(fs33
,dx33
,fix3
);
9412 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
9413 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
9415 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
9416 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
9417 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
9418 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
9419 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
9420 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
9421 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
9422 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
9423 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
9425 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
9426 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
9427 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
9428 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
9429 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
9430 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
9431 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
9432 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
9433 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
9435 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
9436 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
9437 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
9438 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
9439 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
9440 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
9441 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
9442 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
9443 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
9445 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
9446 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
9450 load_1_water(pos
+j3a
,
9451 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
9452 qqOOt
= vec_sld(qqOO
,nul
,12);
9453 qqOHt
= vec_sld(qqOH
,nul
,12);
9454 qqHHt
= vec_sld(qqHH
,nul
,12);
9455 c6t
= vec_sld(c6
,nul
,12);
9456 c12t
= vec_sld(c12
,nul
,12);
9458 dx11
= vec_sub(ix1
,jx1
);
9459 dx12
= vec_sub(ix1
,jx2
);
9460 dx13
= vec_sub(ix1
,jx3
);
9461 dy11
= vec_sub(iy1
,jy1
);
9462 dy12
= vec_sub(iy1
,jy2
);
9463 dy13
= vec_sub(iy1
,jy3
);
9464 dz11
= vec_sub(iz1
,jz1
);
9465 dz12
= vec_sub(iz1
,jz2
);
9466 dz13
= vec_sub(iz1
,jz3
);
9467 dx21
= vec_sub(ix2
,jx1
);
9468 dx22
= vec_sub(ix2
,jx2
);
9469 dx23
= vec_sub(ix2
,jx3
);
9470 dy21
= vec_sub(iy2
,jy1
);
9471 dy22
= vec_sub(iy2
,jy2
);
9472 dy23
= vec_sub(iy2
,jy3
);
9473 dz21
= vec_sub(iz2
,jz1
);
9474 dz22
= vec_sub(iz2
,jz2
);
9475 dz23
= vec_sub(iz2
,jz3
);
9476 dx31
= vec_sub(ix3
,jx1
);
9477 dx32
= vec_sub(ix3
,jx2
);
9478 dx33
= vec_sub(ix3
,jx3
);
9479 dy31
= vec_sub(iy3
,jy1
);
9480 dy32
= vec_sub(iy3
,jy2
);
9481 dy33
= vec_sub(iy3
,jy3
);
9482 dz31
= vec_sub(iz3
,jz1
);
9483 dz32
= vec_sub(iz3
,jz2
);
9484 dz33
= vec_sub(iz3
,jz3
);
9486 rsq11
= vec_madd(dx11
,dx11
,nul
);
9487 rsq12
= vec_madd(dx12
,dx12
,nul
);
9488 rsq13
= vec_madd(dx13
,dx13
,nul
);
9489 rsq21
= vec_madd(dx21
,dx21
,nul
);
9490 rsq22
= vec_madd(dx22
,dx22
,nul
);
9491 rsq23
= vec_madd(dx23
,dx23
,nul
);
9492 rsq31
= vec_madd(dx31
,dx31
,nul
);
9493 rsq32
= vec_madd(dx32
,dx32
,nul
);
9494 rsq33
= vec_madd(dx33
,dx33
,nul
);
9495 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
9496 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
9497 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
9498 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
9499 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
9500 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
9501 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
9502 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
9503 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
9504 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
9505 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
9506 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
9507 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
9508 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
9509 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
9510 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
9511 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
9512 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
9514 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
9515 &rsq21
,&rsq22
,&rsq23
,
9516 &rsq31
,&rsq32
,&rsq33
);
9518 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
9521 &rinv11
,&rinv12
,&rinv13
,
9522 &rinv21
,&rinv22
,&rinv23
,
9523 &rinv31
,&rinv32
,&rinv33
);
9525 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
9526 &rinv21
,&rinv22
,&rinv23
,
9527 &rinv31
,&rinv32
,&rinv33
);
9529 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
9530 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
9531 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
9532 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
9533 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
9534 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
9535 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
9536 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
9537 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
9539 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
9540 rinvsq12
= vec_madd(rinv12
,rinv12
,nul
);
9541 rinvsq13
= vec_madd(rinv13
,rinv13
,nul
);
9542 rinvsq21
= vec_madd(rinv21
,rinv21
,nul
);
9543 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
9544 rinvsq22
= vec_madd(rinv22
,rinv22
,nul
);
9545 rinvsq23
= vec_madd(rinv23
,rinv23
,nul
);
9546 rinvsq31
= vec_madd(rinv31
,rinv31
,nul
);
9547 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
9548 rinvsq32
= vec_madd(rinv32
,rinv32
,nul
);
9549 rinvsq33
= vec_madd(rinv33
,rinv33
,nul
);
9551 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
9552 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
9553 vnbtot
= vec_add(vnbtot
,vnb12
);
9554 vnbtot
= vec_sub(vnbtot
,vnb6
);
9556 fs11
= vec_nmsub(vec_two(),krsq11
,rinv11
);
9557 vc11
= vec_add(rinv11
,krsq11
);
9558 vc12
= vec_add(rinv12
,krsq12
);
9559 vc13
= vec_add(rinv13
,krsq13
);
9560 vc21
= vec_add(rinv21
,krsq21
);
9561 vc22
= vec_add(rinv22
,krsq22
);
9562 vc23
= vec_add(rinv23
,krsq23
);
9563 vc31
= vec_add(rinv31
,krsq31
);
9564 vc32
= vec_add(rinv32
,krsq32
);
9565 vc33
= vec_add(rinv33
,krsq33
);
9567 fs11
= vec_madd(qqOOt
,fs11
,nul
);
9568 vc11
= vec_sub(vc11
,vcrf
);
9569 vc12
= vec_sub(vc12
,vcrf
);
9570 vc13
= vec_sub(vc13
,vcrf
);
9571 vc21
= vec_sub(vc21
,vcrf
);
9572 vc22
= vec_sub(vc22
,vcrf
);
9573 vc23
= vec_sub(vc23
,vcrf
);
9574 vc31
= vec_sub(vc31
,vcrf
);
9575 vc32
= vec_sub(vc32
,vcrf
);
9576 vc33
= vec_sub(vc33
,vcrf
);
9578 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
9579 fs12
= vec_nmsub(vec_two(),krsq12
,rinv12
);
9580 fs13
= vec_nmsub(vec_two(),krsq13
,rinv13
);
9581 fs21
= vec_nmsub(vec_two(),krsq21
,rinv21
);
9582 fs22
= vec_nmsub(vec_two(),krsq22
,rinv22
);
9583 fs23
= vec_nmsub(vec_two(),krsq23
,rinv23
);
9584 fs31
= vec_nmsub(vec_two(),krsq31
,rinv31
);
9585 fs32
= vec_nmsub(vec_two(),krsq32
,rinv32
);
9586 fs33
= vec_nmsub(vec_two(),krsq33
,rinv33
);
9588 fs11
= vec_madd(vec_twelve(),vnb12
,fs11
);
9589 fs12
= vec_madd(fs12
,qqOHt
,nul
);
9590 fs13
= vec_madd(fs13
,qqOHt
,nul
);
9591 fs21
= vec_madd(fs21
,qqOHt
,nul
);
9592 fs22
= vec_madd(fs22
,qqHHt
,nul
);
9593 fs23
= vec_madd(fs23
,qqHHt
,nul
);
9594 fs31
= vec_madd(fs31
,qqOHt
,nul
);
9595 fs32
= vec_madd(fs32
,qqHHt
,nul
);
9596 fs33
= vec_madd(fs33
,qqHHt
,nul
);
9598 fs11
= vec_madd(fs11
,rinvsq11
,nul
);
9599 fs12
= vec_madd(fs12
,rinvsq12
,nul
);
9600 fs13
= vec_madd(fs13
,rinvsq13
,nul
);
9601 fs21
= vec_madd(fs21
,rinvsq21
,nul
);
9602 fs22
= vec_madd(fs22
,rinvsq22
,nul
);
9603 fs23
= vec_madd(fs23
,rinvsq23
,nul
);
9604 fs31
= vec_madd(fs31
,rinvsq31
,nul
);
9605 fs32
= vec_madd(fs32
,rinvsq32
,nul
);
9606 fs33
= vec_madd(fs33
,rinvsq33
,nul
);
9608 vctot
= vec_madd(qqOOt
,vc11
,vctot
);
9609 vctot
= vec_madd(qqOHt
,vc12
,vctot
);
9610 vctot
= vec_madd(qqOHt
,vc13
,vctot
);
9611 vctot
= vec_madd(qqOHt
,vc21
,vctot
);
9612 vctot
= vec_madd(qqHHt
,vc22
,vctot
);
9613 vctot
= vec_madd(qqHHt
,vc23
,vctot
);
9614 vctot
= vec_madd(qqOHt
,vc31
,vctot
);
9615 vctot
= vec_madd(qqHHt
,vc32
,vctot
);
9616 vctot
= vec_madd(qqHHt
,vc33
,vctot
);
9618 fix1
= vec_madd(fs11
,dx11
,fix1
);
9619 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
9620 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
9621 fix2
= vec_madd(fs21
,dx21
,fix2
);
9622 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
9623 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
9624 fix3
= vec_madd(fs31
,dx31
,fix3
);
9625 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
9626 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
9628 fix1
= vec_madd(fs12
,dx12
,fix1
);
9629 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
9630 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
9631 fix2
= vec_madd(fs22
,dx22
,fix2
);
9632 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
9633 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
9634 fix3
= vec_madd(fs32
,dx32
,fix3
);
9635 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
9636 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
9638 fix1
= vec_madd(fs13
,dx13
,fix1
);
9639 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
9640 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
9641 fix2
= vec_madd(fs23
,dx23
,fix2
);
9642 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
9643 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
9644 fix3
= vec_madd(fs33
,dx33
,fix3
);
9645 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
9646 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
9648 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
9649 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
9650 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
9651 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
9652 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
9653 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
9654 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
9655 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
9656 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
9658 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
9659 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
9660 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
9661 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
9662 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
9663 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
9664 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
9665 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
9666 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
9668 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
9669 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
9670 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
9671 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
9672 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
9673 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
9674 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
9675 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
9676 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
9678 add_force_to_1_water(faction
+j3a
,
9679 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
9681 /* update outer data */
9682 update_i_water_forces(faction
+ii3
,fshift
+is3
,
9683 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
9685 add_vector_to_float(Vc
+gid
[n
],vctot
);
9686 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
9692 void inl3030_altivec(
9709 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
9710 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
9712 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
9713 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
9714 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
9716 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
9717 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
9718 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
9719 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
;
9721 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
9722 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
;
9723 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
9724 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
9725 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,tsc
;
9726 vector
float VV11c
,FF11c
,VV12c
,FF12c
,VV13c
,FF13c
;
9727 vector
float VV21c
,FF21c
,VV22c
,FF22c
,VV23c
,FF23c
;
9728 vector
float VV31c
,FF31c
,VV32c
,FF32c
,VV33c
,FF33c
;
9729 vector
float qqOOt
,qqOHt
,qqHHt
;
9731 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
9732 int jnra
,jnrb
,jnrc
,jnrd
;
9733 int j3a
,j3b
,j3c
,j3d
;
9736 vfacel
=load_float_and_splat(&facel
);
9737 tsc
=load_float_and_splat(&tabscale
);
9738 qO
= load_float_and_splat(charge
+iinr
[0]);
9739 qH
= load_float_and_splat(charge
+iinr
[0]+1);
9740 qqOO
= vec_madd(qO
,qO
,nul
);
9741 qqOH
= vec_madd(qO
,qH
,nul
);
9742 qqHH
= vec_madd(qH
,qH
,nul
);
9743 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
9744 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
9745 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
9747 for(n
=0;n
<nri
;n
++) {
9751 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
9752 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
9766 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
9775 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
9776 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
9777 dx11
= vec_sub(ix1
,jx1
);
9778 dx12
= vec_sub(ix1
,jx2
);
9779 dx13
= vec_sub(ix1
,jx3
);
9780 dy11
= vec_sub(iy1
,jy1
);
9781 dy12
= vec_sub(iy1
,jy2
);
9782 dy13
= vec_sub(iy1
,jy3
);
9783 dz11
= vec_sub(iz1
,jz1
);
9784 dz12
= vec_sub(iz1
,jz2
);
9785 dz13
= vec_sub(iz1
,jz3
);
9786 dx21
= vec_sub(ix2
,jx1
);
9787 dx22
= vec_sub(ix2
,jx2
);
9788 dx23
= vec_sub(ix2
,jx3
);
9789 dy21
= vec_sub(iy2
,jy1
);
9790 dy22
= vec_sub(iy2
,jy2
);
9791 dy23
= vec_sub(iy2
,jy3
);
9792 dz21
= vec_sub(iz2
,jz1
);
9793 dz22
= vec_sub(iz2
,jz2
);
9794 dz23
= vec_sub(iz2
,jz3
);
9795 dx31
= vec_sub(ix3
,jx1
);
9796 dx32
= vec_sub(ix3
,jx2
);
9797 dx33
= vec_sub(ix3
,jx3
);
9798 dy31
= vec_sub(iy3
,jy1
);
9799 dy32
= vec_sub(iy3
,jy2
);
9800 dy33
= vec_sub(iy3
,jy3
);
9801 dz31
= vec_sub(iz3
,jz1
);
9802 dz32
= vec_sub(iz3
,jz2
);
9803 dz33
= vec_sub(iz3
,jz3
);
9805 rsq11
= vec_madd(dx11
,dx11
,nul
);
9806 rsq12
= vec_madd(dx12
,dx12
,nul
);
9807 rsq13
= vec_madd(dx13
,dx13
,nul
);
9808 rsq21
= vec_madd(dx21
,dx21
,nul
);
9809 rsq22
= vec_madd(dx22
,dx22
,nul
);
9810 rsq23
= vec_madd(dx23
,dx23
,nul
);
9811 rsq31
= vec_madd(dx31
,dx31
,nul
);
9812 rsq32
= vec_madd(dx32
,dx32
,nul
);
9813 rsq33
= vec_madd(dx33
,dx33
,nul
);
9814 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
9815 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
9816 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
9817 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
9818 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
9819 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
9820 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
9821 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
9822 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
9823 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
9824 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
9825 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
9826 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
9827 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
9828 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
9829 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
9830 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
9831 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
9833 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
9836 &rinv11
,&rinv12
,&rinv13
,
9837 &rinv21
,&rinv22
,&rinv23
,
9838 &rinv31
,&rinv32
,&rinv33
);
9840 r11
= vec_madd(rsq11
,rinv11
,nul
);
9841 r12
= vec_madd(rsq12
,rinv12
,nul
);
9842 r13
= vec_madd(rsq13
,rinv13
,nul
);
9843 r21
= vec_madd(rsq21
,rinv21
,nul
);
9844 r22
= vec_madd(rsq22
,rinv22
,nul
);
9845 r23
= vec_madd(rsq23
,rinv23
,nul
);
9846 r31
= vec_madd(rsq31
,rinv31
,nul
);
9847 r32
= vec_madd(rsq32
,rinv32
,nul
);
9848 r33
= vec_madd(rsq33
,rinv33
,nul
);
9850 do_4_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
9851 do_4_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
9852 do_4_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
9853 do_4_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
9854 do_4_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
9855 do_4_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
9856 do_4_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
9857 do_4_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
9858 do_4_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
9860 fs11
= vec_nmsub(qqOO
,FF11c
,nul
);
9861 fs12
= vec_nmsub(qqOH
,FF12c
,nul
);
9862 fs13
= vec_nmsub(qqOH
,FF13c
,nul
);
9863 fs21
= vec_nmsub(qqOH
,FF21c
,nul
);
9864 fs22
= vec_nmsub(qqHH
,FF22c
,nul
);
9865 fs23
= vec_nmsub(qqHH
,FF23c
,nul
);
9866 fs31
= vec_nmsub(qqOH
,FF31c
,nul
);
9867 fs32
= vec_nmsub(qqHH
,FF32c
,nul
);
9868 fs33
= vec_nmsub(qqHH
,FF33c
,nul
);
9870 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
9871 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
9872 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
9873 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
9874 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
9875 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
9876 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
9877 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
9878 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
9880 fs11
= vec_madd(fs11
,tsc
,nul
);
9881 fs12
= vec_madd(fs12
,tsc
,nul
);
9882 fs13
= vec_madd(fs13
,tsc
,nul
);
9883 fs21
= vec_madd(fs21
,tsc
,nul
);
9884 fs22
= vec_madd(fs22
,tsc
,nul
);
9885 fs23
= vec_madd(fs23
,tsc
,nul
);
9886 fs31
= vec_madd(fs31
,tsc
,nul
);
9887 fs32
= vec_madd(fs32
,tsc
,nul
);
9888 fs33
= vec_madd(fs33
,tsc
,nul
);
9890 fs11
= vec_madd(fs11
,rinv11
,nul
);
9891 fs12
= vec_madd(fs12
,rinv12
,nul
);
9892 fs13
= vec_madd(fs13
,rinv13
,nul
);
9893 fs21
= vec_madd(fs21
,rinv21
,nul
);
9894 fs22
= vec_madd(fs22
,rinv22
,nul
);
9895 fs23
= vec_madd(fs23
,rinv23
,nul
);
9896 fs31
= vec_madd(fs31
,rinv31
,nul
);
9897 fs32
= vec_madd(fs32
,rinv32
,nul
);
9898 fs33
= vec_madd(fs33
,rinv33
,nul
);
9900 fix1
= vec_madd(fs11
,dx11
,fix1
);
9901 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
9902 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
9903 fix2
= vec_madd(fs21
,dx21
,fix2
);
9904 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
9905 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
9906 fix3
= vec_madd(fs31
,dx31
,fix3
);
9907 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
9908 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
9910 fix1
= vec_madd(fs12
,dx12
,fix1
);
9911 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
9912 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
9913 fix2
= vec_madd(fs22
,dx22
,fix2
);
9914 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
9915 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
9916 fix3
= vec_madd(fs32
,dx32
,fix3
);
9917 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
9918 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
9920 fix1
= vec_madd(fs13
,dx13
,fix1
);
9921 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
9922 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
9923 fix2
= vec_madd(fs23
,dx23
,fix2
);
9924 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
9925 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
9926 fix3
= vec_madd(fs33
,dx33
,fix3
);
9927 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
9928 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
9930 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
9931 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
9932 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
9933 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
9934 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
9935 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
9936 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
9937 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
9938 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
9940 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
9941 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
9942 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
9943 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
9944 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
9945 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
9946 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
9947 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
9948 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
9950 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
9951 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
9952 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
9953 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
9954 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
9955 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
9956 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
9957 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
9958 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
9960 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
9961 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
9970 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
9971 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
9972 qqOOt
= vec_sld(qqOO
,nul
,4);
9973 qqOHt
= vec_sld(qqOH
,nul
,4);
9974 qqHHt
= vec_sld(qqHH
,nul
,4);
9976 dx11
= vec_sub(ix1
,jx1
);
9977 dx12
= vec_sub(ix1
,jx2
);
9978 dx13
= vec_sub(ix1
,jx3
);
9979 dy11
= vec_sub(iy1
,jy1
);
9980 dy12
= vec_sub(iy1
,jy2
);
9981 dy13
= vec_sub(iy1
,jy3
);
9982 dz11
= vec_sub(iz1
,jz1
);
9983 dz12
= vec_sub(iz1
,jz2
);
9984 dz13
= vec_sub(iz1
,jz3
);
9985 dx21
= vec_sub(ix2
,jx1
);
9986 dx22
= vec_sub(ix2
,jx2
);
9987 dx23
= vec_sub(ix2
,jx3
);
9988 dy21
= vec_sub(iy2
,jy1
);
9989 dy22
= vec_sub(iy2
,jy2
);
9990 dy23
= vec_sub(iy2
,jy3
);
9991 dz21
= vec_sub(iz2
,jz1
);
9992 dz22
= vec_sub(iz2
,jz2
);
9993 dz23
= vec_sub(iz2
,jz3
);
9994 dx31
= vec_sub(ix3
,jx1
);
9995 dx32
= vec_sub(ix3
,jx2
);
9996 dx33
= vec_sub(ix3
,jx3
);
9997 dy31
= vec_sub(iy3
,jy1
);
9998 dy32
= vec_sub(iy3
,jy2
);
9999 dy33
= vec_sub(iy3
,jy3
);
10000 dz31
= vec_sub(iz3
,jz1
);
10001 dz32
= vec_sub(iz3
,jz2
);
10002 dz33
= vec_sub(iz3
,jz3
);
10004 rsq11
= vec_madd(dx11
,dx11
,nul
);
10005 rsq12
= vec_madd(dx12
,dx12
,nul
);
10006 rsq13
= vec_madd(dx13
,dx13
,nul
);
10007 rsq21
= vec_madd(dx21
,dx21
,nul
);
10008 rsq22
= vec_madd(dx22
,dx22
,nul
);
10009 rsq23
= vec_madd(dx23
,dx23
,nul
);
10010 rsq31
= vec_madd(dx31
,dx31
,nul
);
10011 rsq32
= vec_madd(dx32
,dx32
,nul
);
10012 rsq33
= vec_madd(dx33
,dx33
,nul
);
10013 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
10014 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
10015 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
10016 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
10017 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
10018 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
10019 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
10020 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
10021 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
10022 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
10023 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
10024 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
10025 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
10026 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
10027 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
10028 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
10029 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
10030 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
10032 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
10033 &rsq21
,&rsq22
,&rsq23
,
10034 &rsq31
,&rsq32
,&rsq33
);
10036 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
10039 &rinv11
,&rinv12
,&rinv13
,
10040 &rinv21
,&rinv22
,&rinv23
,
10041 &rinv31
,&rinv32
,&rinv33
);
10043 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
10044 &rinv21
,&rinv22
,&rinv23
,
10045 &rinv31
,&rinv32
,&rinv33
);
10047 r11
= vec_madd(rsq11
,rinv11
,nul
);
10048 r12
= vec_madd(rsq12
,rinv12
,nul
);
10049 r13
= vec_madd(rsq13
,rinv13
,nul
);
10050 r21
= vec_madd(rsq21
,rinv21
,nul
);
10051 r22
= vec_madd(rsq22
,rinv22
,nul
);
10052 r23
= vec_madd(rsq23
,rinv23
,nul
);
10053 r31
= vec_madd(rsq31
,rinv31
,nul
);
10054 r32
= vec_madd(rsq32
,rinv32
,nul
);
10055 r33
= vec_madd(rsq33
,rinv33
,nul
);
10057 do_3_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
10058 do_3_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
10059 do_3_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
10060 do_3_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
10061 do_3_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
10062 do_3_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
10063 do_3_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
10064 do_3_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
10065 do_3_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
10067 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
10068 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
10069 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
10070 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
10071 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
10072 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
10073 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
10074 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
10075 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
10077 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
10078 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
10079 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
10080 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
10081 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
10082 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
10083 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
10084 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
10085 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
10087 fs11
= vec_madd(fs11
,tsc
,nul
);
10088 fs12
= vec_madd(fs12
,tsc
,nul
);
10089 fs13
= vec_madd(fs13
,tsc
,nul
);
10090 fs21
= vec_madd(fs21
,tsc
,nul
);
10091 fs22
= vec_madd(fs22
,tsc
,nul
);
10092 fs23
= vec_madd(fs23
,tsc
,nul
);
10093 fs31
= vec_madd(fs31
,tsc
,nul
);
10094 fs32
= vec_madd(fs32
,tsc
,nul
);
10095 fs33
= vec_madd(fs33
,tsc
,nul
);
10097 fs11
= vec_madd(fs11
,rinv11
,nul
);
10098 fs12
= vec_madd(fs12
,rinv12
,nul
);
10099 fs13
= vec_madd(fs13
,rinv13
,nul
);
10100 fs21
= vec_madd(fs21
,rinv21
,nul
);
10101 fs22
= vec_madd(fs22
,rinv22
,nul
);
10102 fs23
= vec_madd(fs23
,rinv23
,nul
);
10103 fs31
= vec_madd(fs31
,rinv31
,nul
);
10104 fs32
= vec_madd(fs32
,rinv32
,nul
);
10105 fs33
= vec_madd(fs33
,rinv33
,nul
);
10107 fix1
= vec_madd(fs11
,dx11
,fix1
);
10108 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
10109 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
10110 fix2
= vec_madd(fs21
,dx21
,fix2
);
10111 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
10112 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
10113 fix3
= vec_madd(fs31
,dx31
,fix3
);
10114 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
10115 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
10117 fix1
= vec_madd(fs12
,dx12
,fix1
);
10118 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
10119 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
10120 fix2
= vec_madd(fs22
,dx22
,fix2
);
10121 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
10122 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
10123 fix3
= vec_madd(fs32
,dx32
,fix3
);
10124 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
10125 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
10127 fix1
= vec_madd(fs13
,dx13
,fix1
);
10128 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
10129 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
10130 fix2
= vec_madd(fs23
,dx23
,fix2
);
10131 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
10132 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
10133 fix3
= vec_madd(fs33
,dx33
,fix3
);
10134 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
10135 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
10137 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
10138 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
10139 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
10140 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
10141 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
10142 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
10143 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
10144 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
10145 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
10147 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
10148 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
10149 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
10150 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
10151 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
10152 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
10153 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
10154 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
10155 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
10157 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
10158 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
10159 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
10160 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
10161 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
10162 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
10163 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
10164 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
10165 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
10167 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
10168 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
10169 } else if(k
<(nj1
-1)) {
10174 load_2_water(pos
+j3a
,pos
+j3b
,
10175 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
10176 qqOOt
= vec_sld(qqOO
,nul
,8);
10177 qqOHt
= vec_sld(qqOH
,nul
,8);
10178 qqHHt
= vec_sld(qqHH
,nul
,8);
10180 dx11
= vec_sub(ix1
,jx1
);
10181 dx12
= vec_sub(ix1
,jx2
);
10182 dx13
= vec_sub(ix1
,jx3
);
10183 dy11
= vec_sub(iy1
,jy1
);
10184 dy12
= vec_sub(iy1
,jy2
);
10185 dy13
= vec_sub(iy1
,jy3
);
10186 dz11
= vec_sub(iz1
,jz1
);
10187 dz12
= vec_sub(iz1
,jz2
);
10188 dz13
= vec_sub(iz1
,jz3
);
10189 dx21
= vec_sub(ix2
,jx1
);
10190 dx22
= vec_sub(ix2
,jx2
);
10191 dx23
= vec_sub(ix2
,jx3
);
10192 dy21
= vec_sub(iy2
,jy1
);
10193 dy22
= vec_sub(iy2
,jy2
);
10194 dy23
= vec_sub(iy2
,jy3
);
10195 dz21
= vec_sub(iz2
,jz1
);
10196 dz22
= vec_sub(iz2
,jz2
);
10197 dz23
= vec_sub(iz2
,jz3
);
10198 dx31
= vec_sub(ix3
,jx1
);
10199 dx32
= vec_sub(ix3
,jx2
);
10200 dx33
= vec_sub(ix3
,jx3
);
10201 dy31
= vec_sub(iy3
,jy1
);
10202 dy32
= vec_sub(iy3
,jy2
);
10203 dy33
= vec_sub(iy3
,jy3
);
10204 dz31
= vec_sub(iz3
,jz1
);
10205 dz32
= vec_sub(iz3
,jz2
);
10206 dz33
= vec_sub(iz3
,jz3
);
10208 rsq11
= vec_madd(dx11
,dx11
,nul
);
10209 rsq12
= vec_madd(dx12
,dx12
,nul
);
10210 rsq13
= vec_madd(dx13
,dx13
,nul
);
10211 rsq21
= vec_madd(dx21
,dx21
,nul
);
10212 rsq22
= vec_madd(dx22
,dx22
,nul
);
10213 rsq23
= vec_madd(dx23
,dx23
,nul
);
10214 rsq31
= vec_madd(dx31
,dx31
,nul
);
10215 rsq32
= vec_madd(dx32
,dx32
,nul
);
10216 rsq33
= vec_madd(dx33
,dx33
,nul
);
10217 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
10218 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
10219 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
10220 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
10221 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
10222 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
10223 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
10224 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
10225 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
10226 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
10227 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
10228 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
10229 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
10230 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
10231 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
10232 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
10233 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
10234 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
10236 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
10237 &rsq21
,&rsq22
,&rsq23
,
10238 &rsq31
,&rsq32
,&rsq33
);
10240 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
10243 &rinv11
,&rinv12
,&rinv13
,
10244 &rinv21
,&rinv22
,&rinv23
,
10245 &rinv31
,&rinv32
,&rinv33
);
10247 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
10248 &rinv21
,&rinv22
,&rinv23
,
10249 &rinv31
,&rinv32
,&rinv33
);
10251 r11
= vec_madd(rsq11
,rinv11
,nul
);
10252 r12
= vec_madd(rsq12
,rinv12
,nul
);
10253 r13
= vec_madd(rsq13
,rinv13
,nul
);
10254 r21
= vec_madd(rsq21
,rinv21
,nul
);
10255 r22
= vec_madd(rsq22
,rinv22
,nul
);
10256 r23
= vec_madd(rsq23
,rinv23
,nul
);
10257 r31
= vec_madd(rsq31
,rinv31
,nul
);
10258 r32
= vec_madd(rsq32
,rinv32
,nul
);
10259 r33
= vec_madd(rsq33
,rinv33
,nul
);
10261 do_2_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
10262 do_2_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
10263 do_2_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
10264 do_2_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
10265 do_2_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
10266 do_2_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
10267 do_2_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
10268 do_2_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
10269 do_2_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
10271 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
10272 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
10273 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
10274 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
10275 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
10276 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
10277 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
10278 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
10279 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
10281 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
10282 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
10283 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
10284 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
10285 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
10286 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
10287 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
10288 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
10289 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
10291 fs11
= vec_madd(fs11
,tsc
,nul
);
10292 fs12
= vec_madd(fs12
,tsc
,nul
);
10293 fs13
= vec_madd(fs13
,tsc
,nul
);
10294 fs21
= vec_madd(fs21
,tsc
,nul
);
10295 fs22
= vec_madd(fs22
,tsc
,nul
);
10296 fs23
= vec_madd(fs23
,tsc
,nul
);
10297 fs31
= vec_madd(fs31
,tsc
,nul
);
10298 fs32
= vec_madd(fs32
,tsc
,nul
);
10299 fs33
= vec_madd(fs33
,tsc
,nul
);
10301 fs11
= vec_madd(fs11
,rinv11
,nul
);
10302 fs12
= vec_madd(fs12
,rinv12
,nul
);
10303 fs13
= vec_madd(fs13
,rinv13
,nul
);
10304 fs21
= vec_madd(fs21
,rinv21
,nul
);
10305 fs22
= vec_madd(fs22
,rinv22
,nul
);
10306 fs23
= vec_madd(fs23
,rinv23
,nul
);
10307 fs31
= vec_madd(fs31
,rinv31
,nul
);
10308 fs32
= vec_madd(fs32
,rinv32
,nul
);
10309 fs33
= vec_madd(fs33
,rinv33
,nul
);
10311 fix1
= vec_madd(fs11
,dx11
,fix1
);
10312 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
10313 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
10314 fix2
= vec_madd(fs21
,dx21
,fix2
);
10315 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
10316 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
10317 fix3
= vec_madd(fs31
,dx31
,fix3
);
10318 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
10319 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
10321 fix1
= vec_madd(fs12
,dx12
,fix1
);
10322 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
10323 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
10324 fix2
= vec_madd(fs22
,dx22
,fix2
);
10325 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
10326 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
10327 fix3
= vec_madd(fs32
,dx32
,fix3
);
10328 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
10329 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
10331 fix1
= vec_madd(fs13
,dx13
,fix1
);
10332 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
10333 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
10334 fix2
= vec_madd(fs23
,dx23
,fix2
);
10335 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
10336 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
10337 fix3
= vec_madd(fs33
,dx33
,fix3
);
10338 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
10339 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
10341 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
10342 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
10343 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
10344 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
10345 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
10346 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
10347 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
10348 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
10349 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
10351 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
10352 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
10353 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
10354 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
10355 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
10356 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
10357 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
10358 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
10359 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
10361 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
10362 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
10363 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
10364 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
10365 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
10366 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
10367 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
10368 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
10369 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
10371 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
10372 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
10376 load_1_water(pos
+j3a
,
10377 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
10378 qqOOt
= vec_sld(qqOO
,nul
,12);
10379 qqOHt
= vec_sld(qqOH
,nul
,12);
10380 qqHHt
= vec_sld(qqHH
,nul
,12);
10382 dx11
= vec_sub(ix1
,jx1
);
10383 dx12
= vec_sub(ix1
,jx2
);
10384 dx13
= vec_sub(ix1
,jx3
);
10385 dy11
= vec_sub(iy1
,jy1
);
10386 dy12
= vec_sub(iy1
,jy2
);
10387 dy13
= vec_sub(iy1
,jy3
);
10388 dz11
= vec_sub(iz1
,jz1
);
10389 dz12
= vec_sub(iz1
,jz2
);
10390 dz13
= vec_sub(iz1
,jz3
);
10391 dx21
= vec_sub(ix2
,jx1
);
10392 dx22
= vec_sub(ix2
,jx2
);
10393 dx23
= vec_sub(ix2
,jx3
);
10394 dy21
= vec_sub(iy2
,jy1
);
10395 dy22
= vec_sub(iy2
,jy2
);
10396 dy23
= vec_sub(iy2
,jy3
);
10397 dz21
= vec_sub(iz2
,jz1
);
10398 dz22
= vec_sub(iz2
,jz2
);
10399 dz23
= vec_sub(iz2
,jz3
);
10400 dx31
= vec_sub(ix3
,jx1
);
10401 dx32
= vec_sub(ix3
,jx2
);
10402 dx33
= vec_sub(ix3
,jx3
);
10403 dy31
= vec_sub(iy3
,jy1
);
10404 dy32
= vec_sub(iy3
,jy2
);
10405 dy33
= vec_sub(iy3
,jy3
);
10406 dz31
= vec_sub(iz3
,jz1
);
10407 dz32
= vec_sub(iz3
,jz2
);
10408 dz33
= vec_sub(iz3
,jz3
);
10410 rsq11
= vec_madd(dx11
,dx11
,nul
);
10411 rsq12
= vec_madd(dx12
,dx12
,nul
);
10412 rsq13
= vec_madd(dx13
,dx13
,nul
);
10413 rsq21
= vec_madd(dx21
,dx21
,nul
);
10414 rsq22
= vec_madd(dx22
,dx22
,nul
);
10415 rsq23
= vec_madd(dx23
,dx23
,nul
);
10416 rsq31
= vec_madd(dx31
,dx31
,nul
);
10417 rsq32
= vec_madd(dx32
,dx32
,nul
);
10418 rsq33
= vec_madd(dx33
,dx33
,nul
);
10419 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
10420 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
10421 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
10422 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
10423 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
10424 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
10425 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
10426 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
10427 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
10428 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
10429 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
10430 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
10431 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
10432 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
10433 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
10434 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
10435 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
10436 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
10438 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
10439 &rsq21
,&rsq22
,&rsq23
,
10440 &rsq31
,&rsq32
,&rsq33
);
10442 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
10445 &rinv11
,&rinv12
,&rinv13
,
10446 &rinv21
,&rinv22
,&rinv23
,
10447 &rinv31
,&rinv32
,&rinv33
);
10449 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
10450 &rinv21
,&rinv22
,&rinv23
,
10451 &rinv31
,&rinv32
,&rinv33
);
10453 r11
= vec_madd(rsq11
,rinv11
,nul
);
10454 r12
= vec_madd(rsq12
,rinv12
,nul
);
10455 r13
= vec_madd(rsq13
,rinv13
,nul
);
10456 r21
= vec_madd(rsq21
,rinv21
,nul
);
10457 r22
= vec_madd(rsq22
,rinv22
,nul
);
10458 r23
= vec_madd(rsq23
,rinv23
,nul
);
10459 r31
= vec_madd(rsq31
,rinv31
,nul
);
10460 r32
= vec_madd(rsq32
,rinv32
,nul
);
10461 r33
= vec_madd(rsq33
,rinv33
,nul
);
10463 do_1_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
10464 do_1_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
10465 do_1_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
10466 do_1_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
10467 do_1_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
10468 do_1_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
10469 do_1_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
10470 do_1_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
10471 do_1_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
10473 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
10474 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
10475 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
10476 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
10477 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
10478 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
10479 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
10480 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
10481 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
10483 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
10484 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
10485 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
10486 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
10487 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
10488 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
10489 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
10490 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
10491 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
10493 fs11
= vec_madd(fs11
,tsc
,nul
);
10494 fs12
= vec_madd(fs12
,tsc
,nul
);
10495 fs13
= vec_madd(fs13
,tsc
,nul
);
10496 fs21
= vec_madd(fs21
,tsc
,nul
);
10497 fs22
= vec_madd(fs22
,tsc
,nul
);
10498 fs23
= vec_madd(fs23
,tsc
,nul
);
10499 fs31
= vec_madd(fs31
,tsc
,nul
);
10500 fs32
= vec_madd(fs32
,tsc
,nul
);
10501 fs33
= vec_madd(fs33
,tsc
,nul
);
10503 fs11
= vec_madd(fs11
,rinv11
,nul
);
10504 fs12
= vec_madd(fs12
,rinv12
,nul
);
10505 fs13
= vec_madd(fs13
,rinv13
,nul
);
10506 fs21
= vec_madd(fs21
,rinv21
,nul
);
10507 fs22
= vec_madd(fs22
,rinv22
,nul
);
10508 fs23
= vec_madd(fs23
,rinv23
,nul
);
10509 fs31
= vec_madd(fs31
,rinv31
,nul
);
10510 fs32
= vec_madd(fs32
,rinv32
,nul
);
10511 fs33
= vec_madd(fs33
,rinv33
,nul
);
10513 fix1
= vec_madd(fs11
,dx11
,fix1
);
10514 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
10515 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
10516 fix2
= vec_madd(fs21
,dx21
,fix2
);
10517 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
10518 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
10519 fix3
= vec_madd(fs31
,dx31
,fix3
);
10520 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
10521 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
10523 fix1
= vec_madd(fs12
,dx12
,fix1
);
10524 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
10525 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
10526 fix2
= vec_madd(fs22
,dx22
,fix2
);
10527 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
10528 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
10529 fix3
= vec_madd(fs32
,dx32
,fix3
);
10530 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
10531 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
10533 fix1
= vec_madd(fs13
,dx13
,fix1
);
10534 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
10535 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
10536 fix2
= vec_madd(fs23
,dx23
,fix2
);
10537 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
10538 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
10539 fix3
= vec_madd(fs33
,dx33
,fix3
);
10540 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
10541 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
10543 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
10544 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
10545 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
10546 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
10547 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
10548 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
10549 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
10550 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
10551 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
10553 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
10554 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
10555 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
10556 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
10557 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
10558 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
10559 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
10560 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
10561 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
10563 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
10564 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
10565 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
10566 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
10567 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
10568 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
10569 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
10570 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
10571 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
10573 add_force_to_1_water(faction
+j3a
,
10574 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
10576 /* update outer data */
10577 update_i_water_forces(faction
+ii3
,fshift
+is3
,
10578 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
10580 add_vector_to_float(Vc
+gid
[n
],vctot
);
10586 void inl3130_altivec(
10607 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
10608 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
10610 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
10611 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
10612 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
10614 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
10615 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
10616 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
10617 vector
float rinvsq11
;
10618 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
,tsc
,VVc
,FFc
;
10620 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
10621 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
,fs11c
;
10622 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
10623 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
10624 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
,rinvsix
;
10625 vector
float vnb6
,vnb12
,vnbtot
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
10626 vector
float VV11c
,FF11c
,VV12c
,FF12c
,VV13c
,FF13c
;
10627 vector
float VV21c
,FF21c
,VV22c
,FF22c
,VV23c
,FF23c
;
10628 vector
float VV31c
,FF31c
,VV32c
,FF32c
,VV33c
,FF33c
;
10630 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
10631 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
10632 int j3a
,j3b
,j3c
,j3d
;
10635 vfacel
=load_float_and_splat(&facel
);
10636 tsc
=load_float_and_splat(&tabscale
);
10638 qO
= load_float_and_splat(charge
+ii
);
10639 qH
= load_float_and_splat(charge
+ii
+1);
10640 qqOO
= vec_madd(qO
,qO
,nul
);
10641 qqOH
= vec_madd(qO
,qH
,nul
);
10642 qqHH
= vec_madd(qH
,qH
,nul
);
10643 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
10644 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
10645 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
10648 load_1_pair(nbfp
+tj
,&c6
,&c12
);
10649 c6
= vec_splat(c6
,0);
10650 c12
= vec_splat(c12
,0);
10652 for(n
=0;n
<nri
;n
++) {
10656 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
10657 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
10672 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
10681 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
10682 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
10683 dx11
= vec_sub(ix1
,jx1
);
10684 dx12
= vec_sub(ix1
,jx2
);
10685 dx13
= vec_sub(ix1
,jx3
);
10686 dy11
= vec_sub(iy1
,jy1
);
10687 dy12
= vec_sub(iy1
,jy2
);
10688 dy13
= vec_sub(iy1
,jy3
);
10689 dz11
= vec_sub(iz1
,jz1
);
10690 dz12
= vec_sub(iz1
,jz2
);
10691 dz13
= vec_sub(iz1
,jz3
);
10692 dx21
= vec_sub(ix2
,jx1
);
10693 dx22
= vec_sub(ix2
,jx2
);
10694 dx23
= vec_sub(ix2
,jx3
);
10695 dy21
= vec_sub(iy2
,jy1
);
10696 dy22
= vec_sub(iy2
,jy2
);
10697 dy23
= vec_sub(iy2
,jy3
);
10698 dz21
= vec_sub(iz2
,jz1
);
10699 dz22
= vec_sub(iz2
,jz2
);
10700 dz23
= vec_sub(iz2
,jz3
);
10701 dx31
= vec_sub(ix3
,jx1
);
10702 dx32
= vec_sub(ix3
,jx2
);
10703 dx33
= vec_sub(ix3
,jx3
);
10704 dy31
= vec_sub(iy3
,jy1
);
10705 dy32
= vec_sub(iy3
,jy2
);
10706 dy33
= vec_sub(iy3
,jy3
);
10707 dz31
= vec_sub(iz3
,jz1
);
10708 dz32
= vec_sub(iz3
,jz2
);
10709 dz33
= vec_sub(iz3
,jz3
);
10711 rsq11
= vec_madd(dx11
,dx11
,nul
);
10712 rsq12
= vec_madd(dx12
,dx12
,nul
);
10713 rsq13
= vec_madd(dx13
,dx13
,nul
);
10714 rsq21
= vec_madd(dx21
,dx21
,nul
);
10715 rsq22
= vec_madd(dx22
,dx22
,nul
);
10716 rsq23
= vec_madd(dx23
,dx23
,nul
);
10717 rsq31
= vec_madd(dx31
,dx31
,nul
);
10718 rsq32
= vec_madd(dx32
,dx32
,nul
);
10719 rsq33
= vec_madd(dx33
,dx33
,nul
);
10720 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
10721 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
10722 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
10723 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
10724 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
10725 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
10726 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
10727 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
10728 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
10729 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
10730 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
10731 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
10732 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
10733 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
10734 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
10735 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
10736 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
10737 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
10739 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
10742 &rinv11
,&rinv12
,&rinv13
,
10743 &rinv21
,&rinv22
,&rinv23
,
10744 &rinv31
,&rinv32
,&rinv33
);
10746 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
10747 r11
= vec_madd(rsq11
,rinv11
,nul
);
10748 r12
= vec_madd(rsq12
,rinv12
,nul
);
10749 r13
= vec_madd(rsq13
,rinv13
,nul
);
10750 r21
= vec_madd(rsq21
,rinv21
,nul
);
10751 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
10752 r22
= vec_madd(rsq22
,rinv22
,nul
);
10753 r23
= vec_madd(rsq23
,rinv23
,nul
);
10754 r31
= vec_madd(rsq31
,rinv31
,nul
);
10755 r32
= vec_madd(rsq32
,rinv32
,nul
);
10756 r33
= vec_madd(rsq33
,rinv33
,nul
);
10757 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
10759 do_4_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
10760 do_4_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
10761 do_4_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
10762 do_4_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
10763 do_4_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
10764 do_4_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
10765 do_4_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
10766 do_4_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
10767 do_4_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
10769 vnb6
= vec_madd(c6
,rinvsix
,nul
);
10770 vnb12
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
10771 fs11
= vec_madd(vec_twelve(),vnb12
,nul
);
10772 fs11c
= vec_nmsub(qqOO
,FF11c
,nul
);
10773 fs12
= vec_nmsub(qqOH
,FF12c
,nul
);
10774 fs13
= vec_nmsub(qqOH
,FF13c
,nul
);
10775 fs21
= vec_nmsub(qqOH
,FF21c
,nul
);
10776 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
10777 fs22
= vec_nmsub(qqHH
,FF22c
,nul
);
10778 fs23
= vec_nmsub(qqHH
,FF23c
,nul
);
10779 fs31
= vec_nmsub(qqOH
,FF31c
,nul
);
10780 fs32
= vec_nmsub(qqHH
,FF32c
,nul
);
10781 fs11
= vec_madd(fs11
,rinv11
,nul
);
10782 fs33
= vec_nmsub(qqHH
,FF33c
,nul
);
10783 vnbtot
= vec_add(vnbtot
,vnb12
);
10784 vnbtot
= vec_sub(vnbtot
,vnb6
);
10785 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
10786 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
10787 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
10788 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
10789 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
10790 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
10791 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
10792 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
10793 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
10795 fs11
= vec_madd(fs11c
,tsc
,fs11
);
10796 fs12
= vec_madd(fs12
,tsc
,nul
);
10797 fs13
= vec_madd(fs13
,tsc
,nul
);
10798 fs21
= vec_madd(fs21
,tsc
,nul
);
10799 fs22
= vec_madd(fs22
,tsc
,nul
);
10800 fs23
= vec_madd(fs23
,tsc
,nul
);
10801 fs31
= vec_madd(fs31
,tsc
,nul
);
10802 fs32
= vec_madd(fs32
,tsc
,nul
);
10803 fs33
= vec_madd(fs33
,tsc
,nul
);
10805 fs11
= vec_madd(fs11
,rinv11
,nul
);
10806 fs12
= vec_madd(fs12
,rinv12
,nul
);
10807 fs13
= vec_madd(fs13
,rinv13
,nul
);
10808 fs21
= vec_madd(fs21
,rinv21
,nul
);
10809 fs22
= vec_madd(fs22
,rinv22
,nul
);
10810 fs23
= vec_madd(fs23
,rinv23
,nul
);
10811 fs31
= vec_madd(fs31
,rinv31
,nul
);
10812 fs32
= vec_madd(fs32
,rinv32
,nul
);
10813 fs33
= vec_madd(fs33
,rinv33
,nul
);
10815 fix1
= vec_madd(fs11
,dx11
,fix1
);
10816 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
10817 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
10818 fix2
= vec_madd(fs21
,dx21
,fix2
);
10819 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
10820 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
10821 fix3
= vec_madd(fs31
,dx31
,fix3
);
10822 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
10823 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
10825 fix1
= vec_madd(fs12
,dx12
,fix1
);
10826 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
10827 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
10828 fix2
= vec_madd(fs22
,dx22
,fix2
);
10829 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
10830 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
10831 fix3
= vec_madd(fs32
,dx32
,fix3
);
10832 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
10833 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
10835 fix1
= vec_madd(fs13
,dx13
,fix1
);
10836 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
10837 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
10838 fix2
= vec_madd(fs23
,dx23
,fix2
);
10839 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
10840 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
10841 fix3
= vec_madd(fs33
,dx33
,fix3
);
10842 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
10843 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
10845 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
10846 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
10847 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
10848 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
10849 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
10850 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
10851 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
10852 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
10853 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
10855 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
10856 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
10857 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
10858 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
10859 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
10860 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
10861 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
10862 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
10863 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
10865 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
10866 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
10867 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
10868 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
10869 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
10870 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
10871 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
10872 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
10873 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
10875 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
10876 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
10885 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
10886 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
10887 qqOOt
= vec_sld(qqOO
,nul
,4);
10888 qqOHt
= vec_sld(qqOH
,nul
,4);
10889 qqHHt
= vec_sld(qqHH
,nul
,4);
10890 c6t
= vec_sld(c6
,nul
,4);
10891 c12t
= vec_sld(c12
,nul
,4);
10893 dx11
= vec_sub(ix1
,jx1
);
10894 dx12
= vec_sub(ix1
,jx2
);
10895 dx13
= vec_sub(ix1
,jx3
);
10896 dy11
= vec_sub(iy1
,jy1
);
10897 dy12
= vec_sub(iy1
,jy2
);
10898 dy13
= vec_sub(iy1
,jy3
);
10899 dz11
= vec_sub(iz1
,jz1
);
10900 dz12
= vec_sub(iz1
,jz2
);
10901 dz13
= vec_sub(iz1
,jz3
);
10902 dx21
= vec_sub(ix2
,jx1
);
10903 dx22
= vec_sub(ix2
,jx2
);
10904 dx23
= vec_sub(ix2
,jx3
);
10905 dy21
= vec_sub(iy2
,jy1
);
10906 dy22
= vec_sub(iy2
,jy2
);
10907 dy23
= vec_sub(iy2
,jy3
);
10908 dz21
= vec_sub(iz2
,jz1
);
10909 dz22
= vec_sub(iz2
,jz2
);
10910 dz23
= vec_sub(iz2
,jz3
);
10911 dx31
= vec_sub(ix3
,jx1
);
10912 dx32
= vec_sub(ix3
,jx2
);
10913 dx33
= vec_sub(ix3
,jx3
);
10914 dy31
= vec_sub(iy3
,jy1
);
10915 dy32
= vec_sub(iy3
,jy2
);
10916 dy33
= vec_sub(iy3
,jy3
);
10917 dz31
= vec_sub(iz3
,jz1
);
10918 dz32
= vec_sub(iz3
,jz2
);
10919 dz33
= vec_sub(iz3
,jz3
);
10921 rsq11
= vec_madd(dx11
,dx11
,nul
);
10922 rsq12
= vec_madd(dx12
,dx12
,nul
);
10923 rsq13
= vec_madd(dx13
,dx13
,nul
);
10924 rsq21
= vec_madd(dx21
,dx21
,nul
);
10925 rsq22
= vec_madd(dx22
,dx22
,nul
);
10926 rsq23
= vec_madd(dx23
,dx23
,nul
);
10927 rsq31
= vec_madd(dx31
,dx31
,nul
);
10928 rsq32
= vec_madd(dx32
,dx32
,nul
);
10929 rsq33
= vec_madd(dx33
,dx33
,nul
);
10930 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
10931 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
10932 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
10933 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
10934 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
10935 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
10936 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
10937 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
10938 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
10939 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
10940 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
10941 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
10942 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
10943 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
10944 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
10945 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
10946 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
10947 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
10949 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
10950 &rsq21
,&rsq22
,&rsq23
,
10951 &rsq31
,&rsq32
,&rsq33
);
10953 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
10956 &rinv11
,&rinv12
,&rinv13
,
10957 &rinv21
,&rinv22
,&rinv23
,
10958 &rinv31
,&rinv32
,&rinv33
);
10960 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
10961 &rinv21
,&rinv22
,&rinv23
,
10962 &rinv31
,&rinv32
,&rinv33
);
10964 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
10965 r11
= vec_madd(rsq11
,rinv11
,nul
);
10966 r12
= vec_madd(rsq12
,rinv12
,nul
);
10967 r13
= vec_madd(rsq13
,rinv13
,nul
);
10968 r21
= vec_madd(rsq21
,rinv21
,nul
);
10969 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
10970 r22
= vec_madd(rsq22
,rinv22
,nul
);
10971 r23
= vec_madd(rsq23
,rinv23
,nul
);
10972 r31
= vec_madd(rsq31
,rinv31
,nul
);
10973 r32
= vec_madd(rsq32
,rinv32
,nul
);
10974 r33
= vec_madd(rsq33
,rinv33
,nul
);
10975 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
10977 do_3_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
10978 do_3_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
10979 do_3_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
10980 do_3_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
10981 do_3_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
10982 do_3_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
10983 do_3_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
10984 do_3_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
10985 do_3_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
10987 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
10988 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
10989 fs11
= vec_madd(vec_twelve(),vnb12
,nul
);
10990 fs11c
= vec_nmsub(qqOOt
,FF11c
,nul
);
10991 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
10992 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
10993 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
10994 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
10995 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
10996 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
10997 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
10998 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
10999 fs11
= vec_madd(fs11
,rinv11
,nul
);
11000 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
11001 vnbtot
= vec_add(vnbtot
,vnb12
);
11002 vnbtot
= vec_sub(vnbtot
,vnb6
);
11003 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
11004 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
11005 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
11006 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
11007 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
11008 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
11009 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
11010 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
11011 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
11013 fs11
= vec_madd(fs11c
,tsc
,fs11
);
11014 fs12
= vec_madd(fs12
,tsc
,nul
);
11015 fs13
= vec_madd(fs13
,tsc
,nul
);
11016 fs21
= vec_madd(fs21
,tsc
,nul
);
11017 fs22
= vec_madd(fs22
,tsc
,nul
);
11018 fs23
= vec_madd(fs23
,tsc
,nul
);
11019 fs31
= vec_madd(fs31
,tsc
,nul
);
11020 fs32
= vec_madd(fs32
,tsc
,nul
);
11021 fs33
= vec_madd(fs33
,tsc
,nul
);
11023 fs11
= vec_madd(fs11
,rinv11
,nul
);
11024 fs12
= vec_madd(fs12
,rinv12
,nul
);
11025 fs13
= vec_madd(fs13
,rinv13
,nul
);
11026 fs21
= vec_madd(fs21
,rinv21
,nul
);
11027 fs22
= vec_madd(fs22
,rinv22
,nul
);
11028 fs23
= vec_madd(fs23
,rinv23
,nul
);
11029 fs31
= vec_madd(fs31
,rinv31
,nul
);
11030 fs32
= vec_madd(fs32
,rinv32
,nul
);
11031 fs33
= vec_madd(fs33
,rinv33
,nul
);
11033 fix1
= vec_madd(fs11
,dx11
,fix1
);
11034 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
11035 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
11036 fix2
= vec_madd(fs21
,dx21
,fix2
);
11037 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
11038 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
11039 fix3
= vec_madd(fs31
,dx31
,fix3
);
11040 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
11041 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
11043 fix1
= vec_madd(fs12
,dx12
,fix1
);
11044 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
11045 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
11046 fix2
= vec_madd(fs22
,dx22
,fix2
);
11047 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
11048 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
11049 fix3
= vec_madd(fs32
,dx32
,fix3
);
11050 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
11051 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
11053 fix1
= vec_madd(fs13
,dx13
,fix1
);
11054 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
11055 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
11056 fix2
= vec_madd(fs23
,dx23
,fix2
);
11057 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
11058 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
11059 fix3
= vec_madd(fs33
,dx33
,fix3
);
11060 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
11061 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
11063 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
11064 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
11065 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
11066 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
11067 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
11068 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
11069 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
11070 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
11071 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
11073 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
11074 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
11075 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
11076 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
11077 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
11078 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
11079 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
11080 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
11081 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
11083 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
11084 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
11085 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
11086 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
11087 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
11088 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
11089 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
11090 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
11091 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
11093 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
11094 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
11095 } else if(k
<(nj1
-1)) {
11100 load_2_water(pos
+j3a
,pos
+j3b
,
11101 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
11102 qqOOt
= vec_sld(qqOO
,nul
,8);
11103 qqOHt
= vec_sld(qqOH
,nul
,8);
11104 qqHHt
= vec_sld(qqHH
,nul
,8);
11105 c6t
= vec_sld(c6
,nul
,8);
11106 c12t
= vec_sld(c12
,nul
,8);
11108 dx11
= vec_sub(ix1
,jx1
);
11109 dx12
= vec_sub(ix1
,jx2
);
11110 dx13
= vec_sub(ix1
,jx3
);
11111 dy11
= vec_sub(iy1
,jy1
);
11112 dy12
= vec_sub(iy1
,jy2
);
11113 dy13
= vec_sub(iy1
,jy3
);
11114 dz11
= vec_sub(iz1
,jz1
);
11115 dz12
= vec_sub(iz1
,jz2
);
11116 dz13
= vec_sub(iz1
,jz3
);
11117 dx21
= vec_sub(ix2
,jx1
);
11118 dx22
= vec_sub(ix2
,jx2
);
11119 dx23
= vec_sub(ix2
,jx3
);
11120 dy21
= vec_sub(iy2
,jy1
);
11121 dy22
= vec_sub(iy2
,jy2
);
11122 dy23
= vec_sub(iy2
,jy3
);
11123 dz21
= vec_sub(iz2
,jz1
);
11124 dz22
= vec_sub(iz2
,jz2
);
11125 dz23
= vec_sub(iz2
,jz3
);
11126 dx31
= vec_sub(ix3
,jx1
);
11127 dx32
= vec_sub(ix3
,jx2
);
11128 dx33
= vec_sub(ix3
,jx3
);
11129 dy31
= vec_sub(iy3
,jy1
);
11130 dy32
= vec_sub(iy3
,jy2
);
11131 dy33
= vec_sub(iy3
,jy3
);
11132 dz31
= vec_sub(iz3
,jz1
);
11133 dz32
= vec_sub(iz3
,jz2
);
11134 dz33
= vec_sub(iz3
,jz3
);
11136 rsq11
= vec_madd(dx11
,dx11
,nul
);
11137 rsq12
= vec_madd(dx12
,dx12
,nul
);
11138 rsq13
= vec_madd(dx13
,dx13
,nul
);
11139 rsq21
= vec_madd(dx21
,dx21
,nul
);
11140 rsq22
= vec_madd(dx22
,dx22
,nul
);
11141 rsq23
= vec_madd(dx23
,dx23
,nul
);
11142 rsq31
= vec_madd(dx31
,dx31
,nul
);
11143 rsq32
= vec_madd(dx32
,dx32
,nul
);
11144 rsq33
= vec_madd(dx33
,dx33
,nul
);
11145 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
11146 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
11147 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
11148 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
11149 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
11150 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
11151 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
11152 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
11153 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
11154 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
11155 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
11156 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
11157 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
11158 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
11159 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
11160 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
11161 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
11162 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
11164 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
11165 &rsq21
,&rsq22
,&rsq23
,
11166 &rsq31
,&rsq32
,&rsq33
);
11168 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
11171 &rinv11
,&rinv12
,&rinv13
,
11172 &rinv21
,&rinv22
,&rinv23
,
11173 &rinv31
,&rinv32
,&rinv33
);
11175 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
11176 &rinv21
,&rinv22
,&rinv23
,
11177 &rinv31
,&rinv32
,&rinv33
);
11179 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
11180 r11
= vec_madd(rsq11
,rinv11
,nul
);
11181 r12
= vec_madd(rsq12
,rinv12
,nul
);
11182 r13
= vec_madd(rsq13
,rinv13
,nul
);
11183 r21
= vec_madd(rsq21
,rinv21
,nul
);
11184 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
11185 r22
= vec_madd(rsq22
,rinv22
,nul
);
11186 r23
= vec_madd(rsq23
,rinv23
,nul
);
11187 r31
= vec_madd(rsq31
,rinv31
,nul
);
11188 r32
= vec_madd(rsq32
,rinv32
,nul
);
11189 r33
= vec_madd(rsq33
,rinv33
,nul
);
11190 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
11192 do_2_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
11193 do_2_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
11194 do_2_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
11195 do_2_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
11196 do_2_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
11197 do_2_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
11198 do_2_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
11199 do_2_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
11200 do_2_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
11202 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
11203 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
11204 fs11
= vec_madd(vec_twelve(),vnb12
,nul
);
11205 fs11c
= vec_nmsub(qqOOt
,FF11c
,nul
);
11206 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
11207 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
11208 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
11209 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
11210 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
11211 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
11212 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
11213 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
11214 fs11
= vec_madd(fs11
,rinv11
,nul
);
11215 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
11216 vnbtot
= vec_add(vnbtot
,vnb12
);
11217 vnbtot
= vec_sub(vnbtot
,vnb6
);
11218 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
11219 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
11220 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
11221 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
11222 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
11223 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
11224 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
11225 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
11226 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
11228 fs11
= vec_madd(fs11c
,tsc
,fs11
);
11229 fs12
= vec_madd(fs12
,tsc
,nul
);
11230 fs13
= vec_madd(fs13
,tsc
,nul
);
11231 fs21
= vec_madd(fs21
,tsc
,nul
);
11232 fs22
= vec_madd(fs22
,tsc
,nul
);
11233 fs23
= vec_madd(fs23
,tsc
,nul
);
11234 fs31
= vec_madd(fs31
,tsc
,nul
);
11235 fs32
= vec_madd(fs32
,tsc
,nul
);
11236 fs33
= vec_madd(fs33
,tsc
,nul
);
11238 fs11
= vec_madd(fs11
,rinv11
,nul
);
11239 fs12
= vec_madd(fs12
,rinv12
,nul
);
11240 fs13
= vec_madd(fs13
,rinv13
,nul
);
11241 fs21
= vec_madd(fs21
,rinv21
,nul
);
11242 fs22
= vec_madd(fs22
,rinv22
,nul
);
11243 fs23
= vec_madd(fs23
,rinv23
,nul
);
11244 fs31
= vec_madd(fs31
,rinv31
,nul
);
11245 fs32
= vec_madd(fs32
,rinv32
,nul
);
11246 fs33
= vec_madd(fs33
,rinv33
,nul
);
11248 fix1
= vec_madd(fs11
,dx11
,fix1
);
11249 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
11250 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
11251 fix2
= vec_madd(fs21
,dx21
,fix2
);
11252 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
11253 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
11254 fix3
= vec_madd(fs31
,dx31
,fix3
);
11255 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
11256 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
11258 fix1
= vec_madd(fs12
,dx12
,fix1
);
11259 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
11260 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
11261 fix2
= vec_madd(fs22
,dx22
,fix2
);
11262 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
11263 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
11264 fix3
= vec_madd(fs32
,dx32
,fix3
);
11265 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
11266 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
11268 fix1
= vec_madd(fs13
,dx13
,fix1
);
11269 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
11270 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
11271 fix2
= vec_madd(fs23
,dx23
,fix2
);
11272 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
11273 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
11274 fix3
= vec_madd(fs33
,dx33
,fix3
);
11275 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
11276 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
11278 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
11279 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
11280 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
11281 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
11282 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
11283 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
11284 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
11285 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
11286 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
11288 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
11289 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
11290 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
11291 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
11292 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
11293 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
11294 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
11295 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
11296 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
11298 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
11299 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
11300 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
11301 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
11302 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
11303 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
11304 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
11305 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
11306 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
11308 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
11309 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
11313 load_1_water(pos
+j3a
,
11314 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
11315 qqOOt
= vec_sld(qqOO
,nul
,12);
11316 qqOHt
= vec_sld(qqOH
,nul
,12);
11317 qqHHt
= vec_sld(qqHH
,nul
,12);
11318 c6t
= vec_sld(c6
,nul
,12);
11319 c12t
= vec_sld(c12
,nul
,12);
11321 dx11
= vec_sub(ix1
,jx1
);
11322 dx12
= vec_sub(ix1
,jx2
);
11323 dx13
= vec_sub(ix1
,jx3
);
11324 dy11
= vec_sub(iy1
,jy1
);
11325 dy12
= vec_sub(iy1
,jy2
);
11326 dy13
= vec_sub(iy1
,jy3
);
11327 dz11
= vec_sub(iz1
,jz1
);
11328 dz12
= vec_sub(iz1
,jz2
);
11329 dz13
= vec_sub(iz1
,jz3
);
11330 dx21
= vec_sub(ix2
,jx1
);
11331 dx22
= vec_sub(ix2
,jx2
);
11332 dx23
= vec_sub(ix2
,jx3
);
11333 dy21
= vec_sub(iy2
,jy1
);
11334 dy22
= vec_sub(iy2
,jy2
);
11335 dy23
= vec_sub(iy2
,jy3
);
11336 dz21
= vec_sub(iz2
,jz1
);
11337 dz22
= vec_sub(iz2
,jz2
);
11338 dz23
= vec_sub(iz2
,jz3
);
11339 dx31
= vec_sub(ix3
,jx1
);
11340 dx32
= vec_sub(ix3
,jx2
);
11341 dx33
= vec_sub(ix3
,jx3
);
11342 dy31
= vec_sub(iy3
,jy1
);
11343 dy32
= vec_sub(iy3
,jy2
);
11344 dy33
= vec_sub(iy3
,jy3
);
11345 dz31
= vec_sub(iz3
,jz1
);
11346 dz32
= vec_sub(iz3
,jz2
);
11347 dz33
= vec_sub(iz3
,jz3
);
11349 rsq11
= vec_madd(dx11
,dx11
,nul
);
11350 rsq12
= vec_madd(dx12
,dx12
,nul
);
11351 rsq13
= vec_madd(dx13
,dx13
,nul
);
11352 rsq21
= vec_madd(dx21
,dx21
,nul
);
11353 rsq22
= vec_madd(dx22
,dx22
,nul
);
11354 rsq23
= vec_madd(dx23
,dx23
,nul
);
11355 rsq31
= vec_madd(dx31
,dx31
,nul
);
11356 rsq32
= vec_madd(dx32
,dx32
,nul
);
11357 rsq33
= vec_madd(dx33
,dx33
,nul
);
11358 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
11359 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
11360 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
11361 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
11362 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
11363 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
11364 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
11365 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
11366 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
11367 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
11368 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
11369 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
11370 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
11371 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
11372 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
11373 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
11374 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
11375 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
11377 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
11378 &rsq21
,&rsq22
,&rsq23
,
11379 &rsq31
,&rsq32
,&rsq33
);
11381 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
11384 &rinv11
,&rinv12
,&rinv13
,
11385 &rinv21
,&rinv22
,&rinv23
,
11386 &rinv31
,&rinv32
,&rinv33
);
11388 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
11389 &rinv21
,&rinv22
,&rinv23
,
11390 &rinv31
,&rinv32
,&rinv33
);
11392 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
11393 r11
= vec_madd(rsq11
,rinv11
,nul
);
11394 r12
= vec_madd(rsq12
,rinv12
,nul
);
11395 r13
= vec_madd(rsq13
,rinv13
,nul
);
11396 r21
= vec_madd(rsq21
,rinv21
,nul
);
11397 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
11398 r22
= vec_madd(rsq22
,rinv22
,nul
);
11399 r23
= vec_madd(rsq23
,rinv23
,nul
);
11400 r31
= vec_madd(rsq31
,rinv31
,nul
);
11401 r32
= vec_madd(rsq32
,rinv32
,nul
);
11402 r33
= vec_madd(rsq33
,rinv33
,nul
);
11403 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
11405 do_1_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
,&FF11c
);
11406 do_1_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
11407 do_1_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
11408 do_1_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
11409 do_1_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
11410 do_1_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
11411 do_1_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
11412 do_1_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
11413 do_1_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
11415 vnb6
= vec_madd(c6t
,rinvsix
,nul
);
11416 vnb12
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),nul
);
11417 fs11
= vec_madd(vec_twelve(),vnb12
,nul
);
11418 fs11c
= vec_nmsub(qqOOt
,FF11c
,nul
);
11419 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
11420 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
11421 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
11422 fs11
= vec_nmsub(vec_six(),vnb6
,fs11
);
11423 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
11424 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
11425 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
11426 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
11427 fs11
= vec_madd(fs11
,rinv11
,nul
);
11428 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
11429 vnbtot
= vec_add(vnbtot
,vnb12
);
11430 vnbtot
= vec_sub(vnbtot
,vnb6
);
11431 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
11432 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
11433 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
11434 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
11435 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
11436 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
11437 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
11438 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
11439 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
11441 fs11
= vec_madd(fs11c
,tsc
,fs11
);
11442 fs12
= vec_madd(fs12
,tsc
,nul
);
11443 fs13
= vec_madd(fs13
,tsc
,nul
);
11444 fs21
= vec_madd(fs21
,tsc
,nul
);
11445 fs22
= vec_madd(fs22
,tsc
,nul
);
11446 fs23
= vec_madd(fs23
,tsc
,nul
);
11447 fs31
= vec_madd(fs31
,tsc
,nul
);
11448 fs32
= vec_madd(fs32
,tsc
,nul
);
11449 fs33
= vec_madd(fs33
,tsc
,nul
);
11451 fs11
= vec_madd(fs11
,rinv11
,nul
);
11452 fs12
= vec_madd(fs12
,rinv12
,nul
);
11453 fs13
= vec_madd(fs13
,rinv13
,nul
);
11454 fs21
= vec_madd(fs21
,rinv21
,nul
);
11455 fs22
= vec_madd(fs22
,rinv22
,nul
);
11456 fs23
= vec_madd(fs23
,rinv23
,nul
);
11457 fs31
= vec_madd(fs31
,rinv31
,nul
);
11458 fs32
= vec_madd(fs32
,rinv32
,nul
);
11459 fs33
= vec_madd(fs33
,rinv33
,nul
);
11461 fix1
= vec_madd(fs11
,dx11
,fix1
);
11462 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
11463 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
11464 fix2
= vec_madd(fs21
,dx21
,fix2
);
11465 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
11466 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
11467 fix3
= vec_madd(fs31
,dx31
,fix3
);
11468 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
11469 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
11471 fix1
= vec_madd(fs12
,dx12
,fix1
);
11472 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
11473 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
11474 fix2
= vec_madd(fs22
,dx22
,fix2
);
11475 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
11476 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
11477 fix3
= vec_madd(fs32
,dx32
,fix3
);
11478 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
11479 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
11481 fix1
= vec_madd(fs13
,dx13
,fix1
);
11482 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
11483 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
11484 fix2
= vec_madd(fs23
,dx23
,fix2
);
11485 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
11486 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
11487 fix3
= vec_madd(fs33
,dx33
,fix3
);
11488 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
11489 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
11491 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
11492 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
11493 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
11494 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
11495 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
11496 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
11497 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
11498 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
11499 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
11501 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
11502 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
11503 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
11504 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
11505 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
11506 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
11507 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
11508 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
11509 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
11511 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
11512 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
11513 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
11514 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
11515 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
11516 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
11517 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
11518 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
11519 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
11521 add_force_to_1_water(faction
+j3a
,
11522 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
11524 /* update outer data */
11525 update_i_water_forces(faction
+ii3
,fshift
+is3
,
11526 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
11528 add_vector_to_float(Vc
+gid
[n
],vctot
);
11529 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
11534 void inl3330_altivec(
11555 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
11556 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
11558 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
11559 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
11560 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
11562 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
11563 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
11564 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
11565 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
;
11567 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
11568 vector
float fs11
,fs12
,fs13
,fs21
,fs22
,fs23
,fs31
,fs32
,fs33
;
11569 vector
float fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
;
11570 vector
float fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
;
11571 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
;
11572 vector
float vnb6
,vnb12
,vnbtot
,tsc
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
11573 vector
float VV11c
,FF11c
,VV12c
,FF12c
,VV13c
,FF13c
;
11574 vector
float VV21c
,FF21c
,VV22c
,FF22c
,VV23c
,FF23c
;
11575 vector
float VV31c
,FF31c
,VV32c
,FF32c
,VV33c
,FF33c
;
11576 vector
float VVd
,FFd
,VVr
,FFr
;
11578 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
11579 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
11580 int j3a
,j3b
,j3c
,j3d
;
11583 vfacel
=load_float_and_splat(&facel
);
11584 tsc
=load_float_and_splat(&tabscale
);
11586 qO
= load_float_and_splat(charge
+ii
);
11587 qH
= load_float_and_splat(charge
+ii
+1);
11588 qqOO
= vec_madd(qO
,qO
,nul
);
11589 qqOH
= vec_madd(qO
,qH
,nul
);
11590 qqHH
= vec_madd(qH
,qH
,nul
);
11591 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
11592 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
11593 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
11596 load_1_pair(nbfp
+tj
,&c6
,&c12
);
11597 c6
= vec_splat(c6
,0);
11598 c12
= vec_splat(c12
,0);
11600 for(n
=0;n
<nri
;n
++) {
11604 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
11605 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
11620 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
11629 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
11630 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
11631 dx11
= vec_sub(ix1
,jx1
);
11632 dx12
= vec_sub(ix1
,jx2
);
11633 dx13
= vec_sub(ix1
,jx3
);
11634 dy11
= vec_sub(iy1
,jy1
);
11635 dy12
= vec_sub(iy1
,jy2
);
11636 dy13
= vec_sub(iy1
,jy3
);
11637 dz11
= vec_sub(iz1
,jz1
);
11638 dz12
= vec_sub(iz1
,jz2
);
11639 dz13
= vec_sub(iz1
,jz3
);
11640 dx21
= vec_sub(ix2
,jx1
);
11641 dx22
= vec_sub(ix2
,jx2
);
11642 dx23
= vec_sub(ix2
,jx3
);
11643 dy21
= vec_sub(iy2
,jy1
);
11644 dy22
= vec_sub(iy2
,jy2
);
11645 dy23
= vec_sub(iy2
,jy3
);
11646 dz21
= vec_sub(iz2
,jz1
);
11647 dz22
= vec_sub(iz2
,jz2
);
11648 dz23
= vec_sub(iz2
,jz3
);
11649 dx31
= vec_sub(ix3
,jx1
);
11650 dx32
= vec_sub(ix3
,jx2
);
11651 dx33
= vec_sub(ix3
,jx3
);
11652 dy31
= vec_sub(iy3
,jy1
);
11653 dy32
= vec_sub(iy3
,jy2
);
11654 dy33
= vec_sub(iy3
,jy3
);
11655 dz31
= vec_sub(iz3
,jz1
);
11656 dz32
= vec_sub(iz3
,jz2
);
11657 dz33
= vec_sub(iz3
,jz3
);
11659 rsq11
= vec_madd(dx11
,dx11
,nul
);
11660 rsq12
= vec_madd(dx12
,dx12
,nul
);
11661 rsq13
= vec_madd(dx13
,dx13
,nul
);
11662 rsq21
= vec_madd(dx21
,dx21
,nul
);
11663 rsq22
= vec_madd(dx22
,dx22
,nul
);
11664 rsq23
= vec_madd(dx23
,dx23
,nul
);
11665 rsq31
= vec_madd(dx31
,dx31
,nul
);
11666 rsq32
= vec_madd(dx32
,dx32
,nul
);
11667 rsq33
= vec_madd(dx33
,dx33
,nul
);
11668 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
11669 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
11670 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
11671 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
11672 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
11673 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
11674 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
11675 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
11676 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
11677 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
11678 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
11679 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
11680 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
11681 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
11682 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
11683 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
11684 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
11685 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
11687 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
11690 &rinv11
,&rinv12
,&rinv13
,
11691 &rinv21
,&rinv22
,&rinv23
,
11692 &rinv31
,&rinv32
,&rinv33
);
11694 r11
= vec_madd(rsq11
,rinv11
,nul
);
11695 r12
= vec_madd(rsq12
,rinv12
,nul
);
11696 r13
= vec_madd(rsq13
,rinv13
,nul
);
11697 r21
= vec_madd(rsq21
,rinv21
,nul
);
11698 r22
= vec_madd(rsq22
,rinv22
,nul
);
11699 r23
= vec_madd(rsq23
,rinv23
,nul
);
11700 r31
= vec_madd(rsq31
,rinv31
,nul
);
11701 r32
= vec_madd(rsq32
,rinv32
,nul
);
11702 r33
= vec_madd(rsq33
,rinv33
,nul
);
11704 do_4_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
11705 &VV11c
,&FF11c
,&VVd
,&FFd
,&VVr
,&FFr
);
11706 do_4_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
11707 do_4_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
11708 do_4_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
11709 do_4_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
11710 do_4_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
11711 do_4_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
11712 do_4_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
11713 do_4_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
11715 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
11716 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
11718 fs11
= vec_nmsub(qqOO
,FF11c
,nul
);
11719 fs12
= vec_nmsub(qqOH
,FF12c
,nul
);
11720 fs13
= vec_nmsub(qqOH
,FF13c
,nul
);
11721 fs21
= vec_nmsub(qqOH
,FF21c
,nul
);
11722 fs11
= vec_nmsub(c6
,FFd
,fs11
);
11723 fs22
= vec_nmsub(qqHH
,FF22c
,nul
);
11724 fs23
= vec_nmsub(qqHH
,FF23c
,nul
);
11725 fs31
= vec_nmsub(qqOH
,FF31c
,nul
);
11726 fs32
= vec_nmsub(qqHH
,FF32c
,nul
);
11727 fs33
= vec_nmsub(qqHH
,FF33c
,nul
);
11728 fs11
= vec_nmsub(c12
,FFr
,fs11
);
11730 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
11731 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
11732 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
11733 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
11734 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
11735 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
11736 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
11737 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
11738 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
11740 fs11
= vec_madd(fs11
,tsc
,nul
);
11741 fs12
= vec_madd(fs12
,tsc
,nul
);
11742 fs13
= vec_madd(fs13
,tsc
,nul
);
11743 fs21
= vec_madd(fs21
,tsc
,nul
);
11744 fs22
= vec_madd(fs22
,tsc
,nul
);
11745 fs23
= vec_madd(fs23
,tsc
,nul
);
11746 fs31
= vec_madd(fs31
,tsc
,nul
);
11747 fs32
= vec_madd(fs32
,tsc
,nul
);
11748 fs33
= vec_madd(fs33
,tsc
,nul
);
11750 fs11
= vec_madd(fs11
,rinv11
,nul
);
11751 fs12
= vec_madd(fs12
,rinv12
,nul
);
11752 fs13
= vec_madd(fs13
,rinv13
,nul
);
11753 fs21
= vec_madd(fs21
,rinv21
,nul
);
11754 fs22
= vec_madd(fs22
,rinv22
,nul
);
11755 fs23
= vec_madd(fs23
,rinv23
,nul
);
11756 fs31
= vec_madd(fs31
,rinv31
,nul
);
11757 fs32
= vec_madd(fs32
,rinv32
,nul
);
11758 fs33
= vec_madd(fs33
,rinv33
,nul
);
11760 fix1
= vec_madd(fs11
,dx11
,fix1
);
11761 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
11762 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
11763 fix2
= vec_madd(fs21
,dx21
,fix2
);
11764 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
11765 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
11766 fix3
= vec_madd(fs31
,dx31
,fix3
);
11767 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
11768 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
11770 fix1
= vec_madd(fs12
,dx12
,fix1
);
11771 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
11772 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
11773 fix2
= vec_madd(fs22
,dx22
,fix2
);
11774 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
11775 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
11776 fix3
= vec_madd(fs32
,dx32
,fix3
);
11777 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
11778 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
11780 fix1
= vec_madd(fs13
,dx13
,fix1
);
11781 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
11782 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
11783 fix2
= vec_madd(fs23
,dx23
,fix2
);
11784 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
11785 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
11786 fix3
= vec_madd(fs33
,dx33
,fix3
);
11787 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
11788 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
11790 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
11791 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
11792 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
11793 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
11794 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
11795 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
11796 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
11797 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
11798 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
11800 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
11801 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
11802 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
11803 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
11804 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
11805 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
11806 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
11807 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
11808 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
11810 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
11811 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
11812 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
11813 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
11814 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
11815 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
11816 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
11817 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
11818 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
11820 add_force_to_4_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,faction
+j3d
,
11821 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
11830 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
11831 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
11832 qqOOt
= vec_sld(qqOO
,nul
,4);
11833 qqOHt
= vec_sld(qqOH
,nul
,4);
11834 qqHHt
= vec_sld(qqHH
,nul
,4);
11835 c6t
= vec_sld(c6
,nul
,4);
11836 c12t
= vec_sld(c12
,nul
,4);
11838 dx11
= vec_sub(ix1
,jx1
);
11839 dx12
= vec_sub(ix1
,jx2
);
11840 dx13
= vec_sub(ix1
,jx3
);
11841 dy11
= vec_sub(iy1
,jy1
);
11842 dy12
= vec_sub(iy1
,jy2
);
11843 dy13
= vec_sub(iy1
,jy3
);
11844 dz11
= vec_sub(iz1
,jz1
);
11845 dz12
= vec_sub(iz1
,jz2
);
11846 dz13
= vec_sub(iz1
,jz3
);
11847 dx21
= vec_sub(ix2
,jx1
);
11848 dx22
= vec_sub(ix2
,jx2
);
11849 dx23
= vec_sub(ix2
,jx3
);
11850 dy21
= vec_sub(iy2
,jy1
);
11851 dy22
= vec_sub(iy2
,jy2
);
11852 dy23
= vec_sub(iy2
,jy3
);
11853 dz21
= vec_sub(iz2
,jz1
);
11854 dz22
= vec_sub(iz2
,jz2
);
11855 dz23
= vec_sub(iz2
,jz3
);
11856 dx31
= vec_sub(ix3
,jx1
);
11857 dx32
= vec_sub(ix3
,jx2
);
11858 dx33
= vec_sub(ix3
,jx3
);
11859 dy31
= vec_sub(iy3
,jy1
);
11860 dy32
= vec_sub(iy3
,jy2
);
11861 dy33
= vec_sub(iy3
,jy3
);
11862 dz31
= vec_sub(iz3
,jz1
);
11863 dz32
= vec_sub(iz3
,jz2
);
11864 dz33
= vec_sub(iz3
,jz3
);
11866 rsq11
= vec_madd(dx11
,dx11
,nul
);
11867 rsq12
= vec_madd(dx12
,dx12
,nul
);
11868 rsq13
= vec_madd(dx13
,dx13
,nul
);
11869 rsq21
= vec_madd(dx21
,dx21
,nul
);
11870 rsq22
= vec_madd(dx22
,dx22
,nul
);
11871 rsq23
= vec_madd(dx23
,dx23
,nul
);
11872 rsq31
= vec_madd(dx31
,dx31
,nul
);
11873 rsq32
= vec_madd(dx32
,dx32
,nul
);
11874 rsq33
= vec_madd(dx33
,dx33
,nul
);
11875 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
11876 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
11877 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
11878 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
11879 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
11880 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
11881 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
11882 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
11883 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
11884 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
11885 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
11886 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
11887 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
11888 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
11889 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
11890 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
11891 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
11892 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
11894 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
11895 &rsq21
,&rsq22
,&rsq23
,
11896 &rsq31
,&rsq32
,&rsq33
);
11898 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
11901 &rinv11
,&rinv12
,&rinv13
,
11902 &rinv21
,&rinv22
,&rinv23
,
11903 &rinv31
,&rinv32
,&rinv33
);
11905 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
11906 &rinv21
,&rinv22
,&rinv23
,
11907 &rinv31
,&rinv32
,&rinv33
);
11909 r11
= vec_madd(rsq11
,rinv11
,nul
);
11910 r12
= vec_madd(rsq12
,rinv12
,nul
);
11911 r13
= vec_madd(rsq13
,rinv13
,nul
);
11912 r21
= vec_madd(rsq21
,rinv21
,nul
);
11913 r22
= vec_madd(rsq22
,rinv22
,nul
);
11914 r23
= vec_madd(rsq23
,rinv23
,nul
);
11915 r31
= vec_madd(rsq31
,rinv31
,nul
);
11916 r32
= vec_madd(rsq32
,rinv32
,nul
);
11917 r33
= vec_madd(rsq33
,rinv33
,nul
);
11919 do_3_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
11920 &VV11c
,&FF11c
,&VVd
,&FFd
,&VVr
,&FFr
);
11921 do_3_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
11922 do_3_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
11923 do_3_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
11924 do_3_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
11925 do_3_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
11926 do_3_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
11927 do_3_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
11928 do_3_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
11930 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
11931 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
11933 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
11934 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
11935 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
11936 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
11937 fs11
= vec_nmsub(c6t
,FFd
,fs11
);
11938 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
11939 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
11940 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
11941 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
11942 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
11943 fs11
= vec_nmsub(c12t
,FFr
,fs11
);
11945 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
11946 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
11947 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
11948 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
11949 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
11950 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
11951 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
11952 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
11953 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
11955 fs11
= vec_madd(fs11
,tsc
,nul
);
11956 fs12
= vec_madd(fs12
,tsc
,nul
);
11957 fs13
= vec_madd(fs13
,tsc
,nul
);
11958 fs21
= vec_madd(fs21
,tsc
,nul
);
11959 fs22
= vec_madd(fs22
,tsc
,nul
);
11960 fs23
= vec_madd(fs23
,tsc
,nul
);
11961 fs31
= vec_madd(fs31
,tsc
,nul
);
11962 fs32
= vec_madd(fs32
,tsc
,nul
);
11963 fs33
= vec_madd(fs33
,tsc
,nul
);
11965 fs11
= vec_madd(fs11
,rinv11
,nul
);
11966 fs12
= vec_madd(fs12
,rinv12
,nul
);
11967 fs13
= vec_madd(fs13
,rinv13
,nul
);
11968 fs21
= vec_madd(fs21
,rinv21
,nul
);
11969 fs22
= vec_madd(fs22
,rinv22
,nul
);
11970 fs23
= vec_madd(fs23
,rinv23
,nul
);
11971 fs31
= vec_madd(fs31
,rinv31
,nul
);
11972 fs32
= vec_madd(fs32
,rinv32
,nul
);
11973 fs33
= vec_madd(fs33
,rinv33
,nul
);
11975 fix1
= vec_madd(fs11
,dx11
,fix1
);
11976 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
11977 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
11978 fix2
= vec_madd(fs21
,dx21
,fix2
);
11979 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
11980 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
11981 fix3
= vec_madd(fs31
,dx31
,fix3
);
11982 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
11983 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
11985 fix1
= vec_madd(fs12
,dx12
,fix1
);
11986 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
11987 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
11988 fix2
= vec_madd(fs22
,dx22
,fix2
);
11989 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
11990 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
11991 fix3
= vec_madd(fs32
,dx32
,fix3
);
11992 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
11993 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
11995 fix1
= vec_madd(fs13
,dx13
,fix1
);
11996 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
11997 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
11998 fix2
= vec_madd(fs23
,dx23
,fix2
);
11999 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
12000 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
12001 fix3
= vec_madd(fs33
,dx33
,fix3
);
12002 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
12003 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
12005 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
12006 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
12007 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
12008 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
12009 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
12010 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
12011 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
12012 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
12013 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
12015 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
12016 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
12017 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
12018 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
12019 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
12020 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
12021 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
12022 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
12023 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
12025 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
12026 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
12027 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
12028 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
12029 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
12030 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
12031 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
12032 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
12033 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
12035 add_force_to_3_water(faction
+j3a
,faction
+j3b
,faction
+j3c
,
12036 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
12037 } else if(k
<(nj1
-1)) {
12042 load_2_water(pos
+j3a
,pos
+j3b
,
12043 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
12044 qqOOt
= vec_sld(qqOO
,nul
,8);
12045 qqOHt
= vec_sld(qqOH
,nul
,8);
12046 qqHHt
= vec_sld(qqHH
,nul
,8);
12047 c6t
= vec_sld(c6
,nul
,8);
12048 c12t
= vec_sld(c12
,nul
,8);
12050 dx11
= vec_sub(ix1
,jx1
);
12051 dx12
= vec_sub(ix1
,jx2
);
12052 dx13
= vec_sub(ix1
,jx3
);
12053 dy11
= vec_sub(iy1
,jy1
);
12054 dy12
= vec_sub(iy1
,jy2
);
12055 dy13
= vec_sub(iy1
,jy3
);
12056 dz11
= vec_sub(iz1
,jz1
);
12057 dz12
= vec_sub(iz1
,jz2
);
12058 dz13
= vec_sub(iz1
,jz3
);
12059 dx21
= vec_sub(ix2
,jx1
);
12060 dx22
= vec_sub(ix2
,jx2
);
12061 dx23
= vec_sub(ix2
,jx3
);
12062 dy21
= vec_sub(iy2
,jy1
);
12063 dy22
= vec_sub(iy2
,jy2
);
12064 dy23
= vec_sub(iy2
,jy3
);
12065 dz21
= vec_sub(iz2
,jz1
);
12066 dz22
= vec_sub(iz2
,jz2
);
12067 dz23
= vec_sub(iz2
,jz3
);
12068 dx31
= vec_sub(ix3
,jx1
);
12069 dx32
= vec_sub(ix3
,jx2
);
12070 dx33
= vec_sub(ix3
,jx3
);
12071 dy31
= vec_sub(iy3
,jy1
);
12072 dy32
= vec_sub(iy3
,jy2
);
12073 dy33
= vec_sub(iy3
,jy3
);
12074 dz31
= vec_sub(iz3
,jz1
);
12075 dz32
= vec_sub(iz3
,jz2
);
12076 dz33
= vec_sub(iz3
,jz3
);
12078 rsq11
= vec_madd(dx11
,dx11
,nul
);
12079 rsq12
= vec_madd(dx12
,dx12
,nul
);
12080 rsq13
= vec_madd(dx13
,dx13
,nul
);
12081 rsq21
= vec_madd(dx21
,dx21
,nul
);
12082 rsq22
= vec_madd(dx22
,dx22
,nul
);
12083 rsq23
= vec_madd(dx23
,dx23
,nul
);
12084 rsq31
= vec_madd(dx31
,dx31
,nul
);
12085 rsq32
= vec_madd(dx32
,dx32
,nul
);
12086 rsq33
= vec_madd(dx33
,dx33
,nul
);
12087 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
12088 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
12089 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
12090 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
12091 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
12092 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
12093 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
12094 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
12095 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
12096 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
12097 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
12098 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
12099 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
12100 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
12101 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
12102 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
12103 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
12104 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
12106 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
12107 &rsq21
,&rsq22
,&rsq23
,
12108 &rsq31
,&rsq32
,&rsq33
);
12110 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
12113 &rinv11
,&rinv12
,&rinv13
,
12114 &rinv21
,&rinv22
,&rinv23
,
12115 &rinv31
,&rinv32
,&rinv33
);
12117 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
12118 &rinv21
,&rinv22
,&rinv23
,
12119 &rinv31
,&rinv32
,&rinv33
);
12121 r11
= vec_madd(rsq11
,rinv11
,nul
);
12122 r12
= vec_madd(rsq12
,rinv12
,nul
);
12123 r13
= vec_madd(rsq13
,rinv13
,nul
);
12124 r21
= vec_madd(rsq21
,rinv21
,nul
);
12125 r22
= vec_madd(rsq22
,rinv22
,nul
);
12126 r23
= vec_madd(rsq23
,rinv23
,nul
);
12127 r31
= vec_madd(rsq31
,rinv31
,nul
);
12128 r32
= vec_madd(rsq32
,rinv32
,nul
);
12129 r33
= vec_madd(rsq33
,rinv33
,nul
);
12131 do_2_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
12132 &VV11c
,&FF11c
,&VVd
,&FFd
,&VVr
,&FFr
);
12133 do_2_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
12134 do_2_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
12135 do_2_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
12136 do_2_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
12137 do_2_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
12138 do_2_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
12139 do_2_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
12140 do_2_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
12142 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
12143 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
12145 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
12146 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
12147 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
12148 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
12149 fs11
= vec_nmsub(c6t
,FFd
,fs11
);
12150 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
12151 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
12152 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
12153 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
12154 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
12155 fs11
= vec_nmsub(c12t
,FFr
,fs11
);
12157 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
12158 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
12159 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
12160 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
12161 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
12162 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
12163 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
12164 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
12165 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
12167 fs11
= vec_madd(fs11
,tsc
,nul
);
12168 fs12
= vec_madd(fs12
,tsc
,nul
);
12169 fs13
= vec_madd(fs13
,tsc
,nul
);
12170 fs21
= vec_madd(fs21
,tsc
,nul
);
12171 fs22
= vec_madd(fs22
,tsc
,nul
);
12172 fs23
= vec_madd(fs23
,tsc
,nul
);
12173 fs31
= vec_madd(fs31
,tsc
,nul
);
12174 fs32
= vec_madd(fs32
,tsc
,nul
);
12175 fs33
= vec_madd(fs33
,tsc
,nul
);
12177 fs11
= vec_madd(fs11
,rinv11
,nul
);
12178 fs12
= vec_madd(fs12
,rinv12
,nul
);
12179 fs13
= vec_madd(fs13
,rinv13
,nul
);
12180 fs21
= vec_madd(fs21
,rinv21
,nul
);
12181 fs22
= vec_madd(fs22
,rinv22
,nul
);
12182 fs23
= vec_madd(fs23
,rinv23
,nul
);
12183 fs31
= vec_madd(fs31
,rinv31
,nul
);
12184 fs32
= vec_madd(fs32
,rinv32
,nul
);
12185 fs33
= vec_madd(fs33
,rinv33
,nul
);
12187 fix1
= vec_madd(fs11
,dx11
,fix1
);
12188 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
12189 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
12190 fix2
= vec_madd(fs21
,dx21
,fix2
);
12191 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
12192 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
12193 fix3
= vec_madd(fs31
,dx31
,fix3
);
12194 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
12195 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
12197 fix1
= vec_madd(fs12
,dx12
,fix1
);
12198 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
12199 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
12200 fix2
= vec_madd(fs22
,dx22
,fix2
);
12201 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
12202 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
12203 fix3
= vec_madd(fs32
,dx32
,fix3
);
12204 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
12205 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
12207 fix1
= vec_madd(fs13
,dx13
,fix1
);
12208 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
12209 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
12210 fix2
= vec_madd(fs23
,dx23
,fix2
);
12211 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
12212 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
12213 fix3
= vec_madd(fs33
,dx33
,fix3
);
12214 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
12215 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
12217 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
12218 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
12219 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
12220 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
12221 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
12222 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
12223 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
12224 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
12225 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
12227 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
12228 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
12229 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
12230 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
12231 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
12232 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
12233 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
12234 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
12235 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
12237 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
12238 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
12239 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
12240 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
12241 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
12242 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
12243 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
12244 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
12245 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
12247 add_force_to_2_water(faction
+j3a
,faction
+j3b
,
12248 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
12252 load_1_water(pos
+j3a
,
12253 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
12254 qqOOt
= vec_sld(qqOO
,nul
,12);
12255 qqOHt
= vec_sld(qqOH
,nul
,12);
12256 qqHHt
= vec_sld(qqHH
,nul
,12);
12257 c6t
= vec_sld(c6
,nul
,12);
12258 c12t
= vec_sld(c12
,nul
,12);
12260 dx11
= vec_sub(ix1
,jx1
);
12261 dx12
= vec_sub(ix1
,jx2
);
12262 dx13
= vec_sub(ix1
,jx3
);
12263 dy11
= vec_sub(iy1
,jy1
);
12264 dy12
= vec_sub(iy1
,jy2
);
12265 dy13
= vec_sub(iy1
,jy3
);
12266 dz11
= vec_sub(iz1
,jz1
);
12267 dz12
= vec_sub(iz1
,jz2
);
12268 dz13
= vec_sub(iz1
,jz3
);
12269 dx21
= vec_sub(ix2
,jx1
);
12270 dx22
= vec_sub(ix2
,jx2
);
12271 dx23
= vec_sub(ix2
,jx3
);
12272 dy21
= vec_sub(iy2
,jy1
);
12273 dy22
= vec_sub(iy2
,jy2
);
12274 dy23
= vec_sub(iy2
,jy3
);
12275 dz21
= vec_sub(iz2
,jz1
);
12276 dz22
= vec_sub(iz2
,jz2
);
12277 dz23
= vec_sub(iz2
,jz3
);
12278 dx31
= vec_sub(ix3
,jx1
);
12279 dx32
= vec_sub(ix3
,jx2
);
12280 dx33
= vec_sub(ix3
,jx3
);
12281 dy31
= vec_sub(iy3
,jy1
);
12282 dy32
= vec_sub(iy3
,jy2
);
12283 dy33
= vec_sub(iy3
,jy3
);
12284 dz31
= vec_sub(iz3
,jz1
);
12285 dz32
= vec_sub(iz3
,jz2
);
12286 dz33
= vec_sub(iz3
,jz3
);
12288 rsq11
= vec_madd(dx11
,dx11
,nul
);
12289 rsq12
= vec_madd(dx12
,dx12
,nul
);
12290 rsq13
= vec_madd(dx13
,dx13
,nul
);
12291 rsq21
= vec_madd(dx21
,dx21
,nul
);
12292 rsq22
= vec_madd(dx22
,dx22
,nul
);
12293 rsq23
= vec_madd(dx23
,dx23
,nul
);
12294 rsq31
= vec_madd(dx31
,dx31
,nul
);
12295 rsq32
= vec_madd(dx32
,dx32
,nul
);
12296 rsq33
= vec_madd(dx33
,dx33
,nul
);
12297 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
12298 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
12299 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
12300 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
12301 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
12302 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
12303 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
12304 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
12305 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
12306 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
12307 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
12308 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
12309 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
12310 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
12311 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
12312 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
12313 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
12314 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
12316 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
12317 &rsq21
,&rsq22
,&rsq23
,
12318 &rsq31
,&rsq32
,&rsq33
);
12320 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
12323 &rinv11
,&rinv12
,&rinv13
,
12324 &rinv21
,&rinv22
,&rinv23
,
12325 &rinv31
,&rinv32
,&rinv33
);
12327 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
12328 &rinv21
,&rinv22
,&rinv23
,
12329 &rinv31
,&rinv32
,&rinv33
);
12331 r11
= vec_madd(rsq11
,rinv11
,nul
);
12332 r12
= vec_madd(rsq12
,rinv12
,nul
);
12333 r13
= vec_madd(rsq13
,rinv13
,nul
);
12334 r21
= vec_madd(rsq21
,rinv21
,nul
);
12335 r22
= vec_madd(rsq22
,rinv22
,nul
);
12336 r23
= vec_madd(rsq23
,rinv23
,nul
);
12337 r31
= vec_madd(rsq31
,rinv31
,nul
);
12338 r32
= vec_madd(rsq32
,rinv32
,nul
);
12339 r33
= vec_madd(rsq33
,rinv33
,nul
);
12341 do_1_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
12342 &VV11c
,&FF11c
,&VVd
,&FFd
,&VVr
,&FFr
);
12343 do_1_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
,&FF12c
);
12344 do_1_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
,&FF13c
);
12345 do_1_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
,&FF21c
);
12346 do_1_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
,&FF22c
);
12347 do_1_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
,&FF23c
);
12348 do_1_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
,&FF31c
);
12349 do_1_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
,&FF32c
);
12350 do_1_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
,&FF33c
);
12352 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
12353 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
12355 fs11
= vec_nmsub(qqOOt
,FF11c
,nul
);
12356 fs12
= vec_nmsub(qqOHt
,FF12c
,nul
);
12357 fs13
= vec_nmsub(qqOHt
,FF13c
,nul
);
12358 fs21
= vec_nmsub(qqOHt
,FF21c
,nul
);
12359 fs11
= vec_nmsub(c6t
,FFd
,fs11
);
12360 fs22
= vec_nmsub(qqHHt
,FF22c
,nul
);
12361 fs23
= vec_nmsub(qqHHt
,FF23c
,nul
);
12362 fs31
= vec_nmsub(qqOHt
,FF31c
,nul
);
12363 fs32
= vec_nmsub(qqHHt
,FF32c
,nul
);
12364 fs33
= vec_nmsub(qqHHt
,FF33c
,nul
);
12365 fs11
= vec_nmsub(c12t
,FFr
,fs11
);
12367 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
12368 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
12369 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
12370 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
12371 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
12372 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
12373 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
12374 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
12375 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
12377 fs11
= vec_madd(fs11
,tsc
,nul
);
12378 fs12
= vec_madd(fs12
,tsc
,nul
);
12379 fs13
= vec_madd(fs13
,tsc
,nul
);
12380 fs21
= vec_madd(fs21
,tsc
,nul
);
12381 fs22
= vec_madd(fs22
,tsc
,nul
);
12382 fs23
= vec_madd(fs23
,tsc
,nul
);
12383 fs31
= vec_madd(fs31
,tsc
,nul
);
12384 fs32
= vec_madd(fs32
,tsc
,nul
);
12385 fs33
= vec_madd(fs33
,tsc
,nul
);
12387 fs11
= vec_madd(fs11
,rinv11
,nul
);
12388 fs12
= vec_madd(fs12
,rinv12
,nul
);
12389 fs13
= vec_madd(fs13
,rinv13
,nul
);
12390 fs21
= vec_madd(fs21
,rinv21
,nul
);
12391 fs22
= vec_madd(fs22
,rinv22
,nul
);
12392 fs23
= vec_madd(fs23
,rinv23
,nul
);
12393 fs31
= vec_madd(fs31
,rinv31
,nul
);
12394 fs32
= vec_madd(fs32
,rinv32
,nul
);
12395 fs33
= vec_madd(fs33
,rinv33
,nul
);
12397 fix1
= vec_madd(fs11
,dx11
,fix1
);
12398 fiy1
= vec_madd(fs11
,dy11
,fiy1
);
12399 fiz1
= vec_madd(fs11
,dz11
,fiz1
);
12400 fix2
= vec_madd(fs21
,dx21
,fix2
);
12401 fiy2
= vec_madd(fs21
,dy21
,fiy2
);
12402 fiz2
= vec_madd(fs21
,dz21
,fiz2
);
12403 fix3
= vec_madd(fs31
,dx31
,fix3
);
12404 fiy3
= vec_madd(fs31
,dy31
,fiy3
);
12405 fiz3
= vec_madd(fs31
,dz31
,fiz3
);
12407 fix1
= vec_madd(fs12
,dx12
,fix1
);
12408 fiy1
= vec_madd(fs12
,dy12
,fiy1
);
12409 fiz1
= vec_madd(fs12
,dz12
,fiz1
);
12410 fix2
= vec_madd(fs22
,dx22
,fix2
);
12411 fiy2
= vec_madd(fs22
,dy22
,fiy2
);
12412 fiz2
= vec_madd(fs22
,dz22
,fiz2
);
12413 fix3
= vec_madd(fs32
,dx32
,fix3
);
12414 fiy3
= vec_madd(fs32
,dy32
,fiy3
);
12415 fiz3
= vec_madd(fs32
,dz32
,fiz3
);
12417 fix1
= vec_madd(fs13
,dx13
,fix1
);
12418 fiy1
= vec_madd(fs13
,dy13
,fiy1
);
12419 fiz1
= vec_madd(fs13
,dz13
,fiz1
);
12420 fix2
= vec_madd(fs23
,dx23
,fix2
);
12421 fiy2
= vec_madd(fs23
,dy23
,fiy2
);
12422 fiz2
= vec_madd(fs23
,dz23
,fiz2
);
12423 fix3
= vec_madd(fs33
,dx33
,fix3
);
12424 fiy3
= vec_madd(fs33
,dy33
,fiy3
);
12425 fiz3
= vec_madd(fs33
,dz33
,fiz3
);
12427 fjx1
= vec_nmsub(fs11
,dx11
,nul
);
12428 fjy1
= vec_nmsub(fs11
,dy11
,nul
);
12429 fjz1
= vec_nmsub(fs11
,dz11
,nul
);
12430 fjx2
= vec_nmsub(fs12
,dx12
,nul
);
12431 fjy2
= vec_nmsub(fs12
,dy12
,nul
);
12432 fjz2
= vec_nmsub(fs12
,dz12
,nul
);
12433 fjx3
= vec_nmsub(fs13
,dx13
,nul
);
12434 fjy3
= vec_nmsub(fs13
,dy13
,nul
);
12435 fjz3
= vec_nmsub(fs13
,dz13
,nul
);
12437 fjx1
= vec_nmsub(fs21
,dx21
,fjx1
);
12438 fjy1
= vec_nmsub(fs21
,dy21
,fjy1
);
12439 fjz1
= vec_nmsub(fs21
,dz21
,fjz1
);
12440 fjx2
= vec_nmsub(fs22
,dx22
,fjx2
);
12441 fjy2
= vec_nmsub(fs22
,dy22
,fjy2
);
12442 fjz2
= vec_nmsub(fs22
,dz22
,fjz2
);
12443 fjx3
= vec_nmsub(fs23
,dx23
,fjx3
);
12444 fjy3
= vec_nmsub(fs23
,dy23
,fjy3
);
12445 fjz3
= vec_nmsub(fs23
,dz23
,fjz3
);
12447 fjx1
= vec_nmsub(fs31
,dx31
,fjx1
);
12448 fjy1
= vec_nmsub(fs31
,dy31
,fjy1
);
12449 fjz1
= vec_nmsub(fs31
,dz31
,fjz1
);
12450 fjx2
= vec_nmsub(fs32
,dx32
,fjx2
);
12451 fjy2
= vec_nmsub(fs32
,dy32
,fjy2
);
12452 fjz2
= vec_nmsub(fs32
,dz32
,fjz2
);
12453 fjx3
= vec_nmsub(fs33
,dx33
,fjx3
);
12454 fjy3
= vec_nmsub(fs33
,dy33
,fjy3
);
12455 fjz3
= vec_nmsub(fs33
,dz33
,fjz3
);
12457 add_force_to_1_water(faction
+j3a
,
12458 fjx1
,fjy1
,fjz1
,fjx2
,fjy2
,fjz2
,fjx3
,fjy3
,fjz3
);
12460 /* update outer data */
12461 update_i_water_forces(faction
+ii3
,fshift
+is3
,
12462 fix1
,fiy1
,fiz1
,fix2
,fiy2
,fiz2
,fix3
,fiy3
,fiz3
);
12464 add_vector_to_float(Vc
+gid
[n
],vctot
);
12465 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
12471 void mcinl0100_altivec(
12485 vector
float ix
,iy
,iz
,shvec
;
12487 vector
float dx
,dy
,dz
;
12488 vector
float vnbtot
,c6
,c12
;
12489 vector
float rinvsq
,rsq
,rinvsix
;
12491 int n
,k
,k0
,ii
,is3
,ii3
,nj0
,nj1
;
12492 int jnra
,jnrb
,jnrc
,jnrd
;
12493 int j3a
,j3b
,j3c
,j3d
;
12494 int ntiA
,tja
,tjb
,tjc
,tjd
;
12498 for(n
=0;n
<nri
;n
++) {
12500 shvec
= load_xyz(shiftvec
+is3
);
12503 ix
= load_xyz(pos
+ii3
);
12505 ix
= vec_add(ix
,shvec
);
12508 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
12509 ntiA
= 2*ntype
*type
[ii
];
12510 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
12519 transpose_4_to_3(load_xyz(pos
+j3a
),
12522 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
12523 dx
= vec_sub(ix
,dx
);
12524 dy
= vec_sub(iy
,dy
);
12525 dz
= vec_sub(iz
,dz
);
12526 rsq
= vec_madd(dx
,dx
,nul
);
12527 rsq
= vec_madd(dy
,dy
,rsq
);
12528 rsq
= vec_madd(dz
,dz
,rsq
);
12529 rinvsq
= do_recip(rsq
);
12530 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12531 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12532 tja
= ntiA
+2*type
[jnra
];
12533 tjb
= ntiA
+2*type
[jnrb
];
12534 tjc
= ntiA
+2*type
[jnrc
];
12535 tjd
= ntiA
+2*type
[jnrd
];
12536 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
12537 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12538 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12545 transpose_2_to_3(load_xyz(pos
+j3a
),
12546 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
12547 dx
= vec_sub(ix
,dx
);
12548 dy
= vec_sub(iy
,dy
);
12549 dz
= vec_sub(iz
,dz
);
12550 rsq
= vec_madd(dx
,dx
,nul
);
12551 rsq
= vec_madd(dy
,dy
,rsq
);
12552 rsq
= vec_madd(dz
,dz
,rsq
);
12553 rinvsq
= do_recip(rsq
);
12554 zero_highest_2_elements_in_vector(&rinvsq
);
12555 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12556 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12557 tja
= ntiA
+2*type
[jnra
];
12558 tjb
= ntiA
+2*type
[jnrb
];
12559 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
12560 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12561 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12567 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
12568 dx
= vec_sub(ix
,dx
);
12569 dy
= vec_sub(iy
,dy
);
12570 dz
= vec_sub(iz
,dz
);
12571 rsq
= vec_madd(dx
,dx
,nul
);
12572 rsq
= vec_madd(dy
,dy
,rsq
);
12573 rsq
= vec_madd(dz
,dz
,rsq
);
12574 rinvsq
= do_recip(rsq
);
12575 zero_highest_3_elements_in_vector(&rinvsq
);
12576 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12577 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12578 tja
= ntiA
+2*type
[jnra
];
12579 load_1_pair(nbfp
+tja
,&c6
,&c12
);
12580 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12581 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12583 /* update outer data */
12584 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
12589 void mcinl0300_altivec(
12605 vector
float ix
,iy
,iz
,shvec
;
12606 vector
float nul
,tsc
;
12607 vector
float dx
,dy
,dz
;
12608 vector
float vnbtot
,c6
,c12
;
12609 vector
float rinv
,r
,rsq
;
12610 vector
float VVd
,VVr
;
12612 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
12613 int jnra
,jnrb
,jnrc
,jnrd
;
12614 int j3a
,j3b
,j3c
,j3d
;
12615 int tja
,tjb
,tjc
,tjd
;
12618 tsc
=load_float_and_splat(&tabscale
);
12620 for(n
=0;n
<nri
;n
++) {
12622 shvec
= load_xyz(shiftvec
+is3
);
12625 ix
= load_xyz(pos
+ii3
);
12627 ix
= vec_add(ix
,shvec
);
12630 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
12631 ntiA
= 2*ntype
*type
[ii
];
12633 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
12642 transpose_4_to_3(load_xyz(pos
+j3a
),
12645 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
12646 dx
= vec_sub(ix
,dx
);
12647 dy
= vec_sub(iy
,dy
);
12648 dz
= vec_sub(iz
,dz
);
12649 rsq
= vec_madd(dx
,dx
,nul
);
12650 rsq
= vec_madd(dy
,dy
,rsq
);
12651 rsq
= vec_madd(dz
,dz
,rsq
);
12652 rinv
= do_invsqrt(rsq
);
12653 r
= vec_madd(rinv
,rsq
,nul
);
12654 tja
= ntiA
+2*type
[jnra
];
12655 tjb
= ntiA
+2*type
[jnrb
];
12656 tjc
= ntiA
+2*type
[jnrc
];
12657 tjd
= ntiA
+2*type
[jnrd
];
12658 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
12659 do_vonly_4_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&VVr
);
12660 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
12661 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
12668 transpose_2_to_3(load_xyz(pos
+j3a
),
12669 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
12670 dx
= vec_sub(ix
,dx
);
12671 dy
= vec_sub(iy
,dy
);
12672 dz
= vec_sub(iz
,dz
);
12673 rsq
= vec_madd(dx
,dx
,nul
);
12674 rsq
= vec_madd(dy
,dy
,rsq
);
12675 rsq
= vec_madd(dz
,dz
,rsq
);
12676 zero_highest_2_elements_in_vector(&rsq
);
12677 rinv
= do_invsqrt(rsq
);
12678 zero_highest_2_elements_in_vector(&rinv
);
12679 r
= vec_madd(rinv
,rsq
,nul
);
12680 tja
= ntiA
+2*type
[jnra
];
12681 tjb
= ntiA
+2*type
[jnrb
];
12682 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
12683 do_vonly_2_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&VVr
);
12684 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
12685 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
12691 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
12692 dx
= vec_sub(ix
,dx
);
12693 dy
= vec_sub(iy
,dy
);
12694 dz
= vec_sub(iz
,dz
);
12695 rsq
= vec_madd(dx
,dx
,nul
);
12696 rsq
= vec_madd(dy
,dy
,rsq
);
12697 rsq
= vec_madd(dz
,dz
,rsq
);
12698 zero_highest_3_elements_in_vector(&rsq
);
12699 rinv
= do_invsqrt(rsq
);
12700 zero_highest_3_elements_in_vector(&rinv
);
12701 r
= vec_madd(rinv
,rsq
,nul
);
12702 tja
= ntiA
+2*type
[jnra
];
12703 load_1_pair(nbfp
+tja
,&c6
,&c12
);
12704 do_vonly_1_ljtable_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVd
,&VVr
);
12705 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
12706 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
12708 /* update outer data */
12709 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
12715 void mcinl1000_altivec(
12728 vector
float ix
,iy
,iz
,shvec
;
12729 vector
float vfacel
,nul
;
12730 vector
float dx
,dy
,dz
;
12731 vector
float vctot
,qq
,iq
;
12732 vector
float rinv
,rsq
;
12734 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
12735 int jnra
,jnrb
,jnrc
,jnrd
;
12736 int j3a
,j3b
,j3c
,j3d
;
12739 vfacel
=load_float_and_splat(&facel
);
12741 for(n
=0;n
<nri
;n
++) {
12743 shvec
= load_xyz(shiftvec
+is3
);
12746 ix
= load_xyz(pos
+ii3
);
12748 ix
= vec_add(ix
,shvec
);
12751 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
12752 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
12754 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
12763 transpose_4_to_3(load_xyz(pos
+j3a
),
12766 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
12767 dx
= vec_sub(ix
,dx
);
12768 dy
= vec_sub(iy
,dy
);
12769 dz
= vec_sub(iz
,dz
);
12770 rsq
= vec_madd(dx
,dx
,nul
);
12771 rsq
= vec_madd(dy
,dy
,rsq
);
12772 rsq
= vec_madd(dz
,dz
,rsq
);
12773 rinv
= do_invsqrt(rsq
);
12774 /* load 4 j charges and multiply by iq */
12775 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
12776 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
12777 vctot
= vec_madd(qq
,rinv
,vctot
);
12784 transpose_2_to_3(load_xyz(pos
+j3a
),
12785 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
12786 dx
= vec_sub(ix
,dx
);
12787 dy
= vec_sub(iy
,dy
);
12788 dz
= vec_sub(iz
,dz
);
12789 rsq
= vec_madd(dx
,dx
,nul
);
12790 rsq
= vec_madd(dy
,dy
,rsq
);
12791 rsq
= vec_madd(dz
,dz
,rsq
);
12792 rinv
= do_invsqrt(rsq
);
12793 zero_highest_2_elements_in_vector(&rinv
);
12794 /* load 2 j charges and multiply by iq */
12795 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
12796 vctot
= vec_madd(qq
,rinv
,vctot
);
12802 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
12803 dx
= vec_sub(ix
,dx
);
12804 dy
= vec_sub(iy
,dy
);
12805 dz
= vec_sub(iz
,dz
);
12806 rsq
= vec_madd(dx
,dx
,nul
);
12807 rsq
= vec_madd(dy
,dy
,rsq
);
12808 rsq
= vec_madd(dz
,dz
,rsq
);
12809 rinv
= do_invsqrt(rsq
);
12810 zero_highest_3_elements_in_vector(&rinv
);
12811 /* load 1 j charge and multiply by iq */
12812 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
12813 vctot
= vec_madd(qq
,rinv
,vctot
);
12815 /* update outer data */
12816 add_vector_to_float(Vc
+gid
[n
],vctot
);
12822 void mcinl1100_altivec(
12839 vector
float ix
,iy
,iz
,shvec
;
12840 vector
float vfacel
,nul
;
12841 vector
float dx
,dy
,dz
;
12842 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
;
12843 vector
float rinv
,rinvsq
,rsq
,rinvsix
;
12845 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
12846 int jnra
,jnrb
,jnrc
,jnrd
;
12847 int j3a
,j3b
,j3c
,j3d
;
12848 int tja
,tjb
,tjc
,tjd
;
12851 vfacel
=load_float_and_splat(&facel
);
12853 for(n
=0;n
<nri
;n
++) {
12855 shvec
= load_xyz(shiftvec
+is3
);
12858 ix
= load_xyz(pos
+ii3
);
12861 ix
= vec_add(ix
,shvec
);
12864 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
12865 ntiA
= 2*ntype
*type
[ii
];
12866 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
12868 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
12877 transpose_4_to_3(load_xyz(pos
+j3a
),
12880 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
12881 dx
= vec_sub(ix
,dx
);
12882 dy
= vec_sub(iy
,dy
);
12883 dz
= vec_sub(iz
,dz
);
12884 rsq
= vec_madd(dx
,dx
,nul
);
12885 rsq
= vec_madd(dy
,dy
,rsq
);
12886 rsq
= vec_madd(dz
,dz
,rsq
);
12887 rinv
= do_invsqrt(rsq
);
12888 rinvsq
= vec_madd(rinv
,rinv
,nul
);
12889 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12890 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12891 tja
= ntiA
+2*type
[jnra
];
12892 tjb
= ntiA
+2*type
[jnrb
];
12893 tjc
= ntiA
+2*type
[jnrc
];
12894 tjd
= ntiA
+2*type
[jnrd
];
12895 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
12896 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
12897 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
12898 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12899 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12900 vctot
= vec_madd(qq
,rinv
,vctot
);
12907 transpose_2_to_3(load_xyz(pos
+j3a
),
12908 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
12909 dx
= vec_sub(ix
,dx
);
12910 dy
= vec_sub(iy
,dy
);
12911 dz
= vec_sub(iz
,dz
);
12912 rsq
= vec_madd(dx
,dx
,nul
);
12913 rsq
= vec_madd(dy
,dy
,rsq
);
12914 rsq
= vec_madd(dz
,dz
,rsq
);
12915 rinv
= do_invsqrt(rsq
);
12916 zero_highest_2_elements_in_vector(&rinv
);
12917 rinvsq
= vec_madd(rinv
,rinv
,nul
);
12918 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12919 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12920 tja
= ntiA
+2*type
[jnra
];
12921 tjb
= ntiA
+2*type
[jnrb
];
12922 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
12923 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
12924 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12925 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12926 vctot
= vec_madd(qq
,rinv
,vctot
);
12932 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
12933 dx
= vec_sub(ix
,dx
);
12934 dy
= vec_sub(iy
,dy
);
12935 dz
= vec_sub(iz
,dz
);
12936 rsq
= vec_madd(dx
,dx
,nul
);
12937 rsq
= vec_madd(dy
,dy
,rsq
);
12938 rsq
= vec_madd(dz
,dz
,rsq
);
12939 rinv
= do_invsqrt(rsq
);
12940 zero_highest_3_elements_in_vector(&rinv
);
12941 rinvsq
= vec_madd(rinv
,rinv
,nul
);
12942 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
12943 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
12944 tja
= ntiA
+2*type
[jnra
];
12945 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
12946 load_1_pair(nbfp
+tja
,&c6
,&c12
);
12947 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
12948 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
12949 vctot
= vec_madd(qq
,rinv
,vctot
);
12951 /* update outer data */
12952 add_vector_to_float(Vc
+gid
[n
],vctot
);
12953 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
12960 void mcinl2000_altivec(
12975 vector
float ix
,iy
,iz
,shvec
;
12976 vector
float vfacel
,vkrf
,vcrf
,krsq
,nul
,vcoul
;
12977 vector
float dx
,dy
,dz
;
12978 vector
float vctot
,qq
,iq
;
12979 vector
float rinv
,rsq
;
12981 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
12982 int jnra
,jnrb
,jnrc
,jnrd
;
12983 int j3a
,j3b
,j3c
,j3d
;
12986 vfacel
=load_float_and_splat(&facel
);
12987 vkrf
=load_float_and_splat(&krf
);
12988 vcrf
=load_float_and_splat(&crf
);
12990 for(n
=0;n
<nri
;n
++) {
12992 shvec
= load_xyz(shiftvec
+is3
);
12995 ix
= load_xyz(pos
+ii3
);
12997 ix
= vec_add(ix
,shvec
);
13000 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
13001 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13003 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13012 transpose_4_to_3(load_xyz(pos
+j3a
),
13015 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
13016 dx
= vec_sub(ix
,dx
);
13017 dy
= vec_sub(iy
,dy
);
13018 dz
= vec_sub(iz
,dz
);
13019 rsq
= vec_madd(dx
,dx
,nul
);
13020 rsq
= vec_madd(dy
,dy
,rsq
);
13021 rsq
= vec_madd(dz
,dz
,rsq
);
13022 rinv
= do_invsqrt(rsq
);
13023 /* load 4 j charges and multiply by iq */
13024 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
13025 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
13026 krsq
= vec_madd(vkrf
,rsq
,nul
);
13027 vcoul
= vec_add(rinv
,krsq
);
13028 vcoul
= vec_sub(vcoul
,vcrf
);
13029 vctot
= vec_madd(qq
,vcoul
,vctot
);
13036 transpose_2_to_3(load_xyz(pos
+j3a
),
13037 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
13038 dx
= vec_sub(ix
,dx
);
13039 dy
= vec_sub(iy
,dy
);
13040 dz
= vec_sub(iz
,dz
);
13041 rsq
= vec_madd(dx
,dx
,nul
);
13042 rsq
= vec_madd(dy
,dy
,rsq
);
13043 rsq
= vec_madd(dz
,dz
,rsq
);
13044 zero_highest_2_elements_in_vector(&rsq
);
13045 rinv
= do_invsqrt(rsq
);
13046 zero_highest_2_elements_in_vector(&rinv
);
13047 /* load 2 j charges and multiply by iq */
13048 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
13049 krsq
= vec_madd(vkrf
,rsq
,nul
);
13050 vcoul
= vec_add(rinv
,krsq
);
13051 vcoul
= vec_sub(vcoul
,vcrf
);
13052 vctot
= vec_madd(qq
,vcoul
,vctot
);
13058 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
13059 dx
= vec_sub(ix
,dx
);
13060 dy
= vec_sub(iy
,dy
);
13061 dz
= vec_sub(iz
,dz
);
13062 rsq
= vec_madd(dx
,dx
,nul
);
13063 rsq
= vec_madd(dy
,dy
,rsq
);
13064 rsq
= vec_madd(dz
,dz
,rsq
);
13065 zero_highest_3_elements_in_vector(&rsq
);
13066 rinv
= do_invsqrt(rsq
);
13067 zero_highest_3_elements_in_vector(&rinv
);
13068 /* load 1 j charge and multiply by iq */
13069 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
13070 krsq
= vec_madd(vkrf
,rsq
,nul
);
13071 vcoul
= vec_add(rinv
,krsq
);
13072 vcoul
= vec_sub(vcoul
,vcrf
);
13073 vctot
= vec_madd(qq
,vcoul
,vctot
);
13075 /* update outer data */
13076 add_vector_to_float(Vc
+gid
[n
],vctot
);
13082 void mcinl2100_altivec(
13101 vector
float ix
,iy
,iz
,shvec
;
13102 vector
float vfacel
,vkrf
,vcrf
,krsq
,vcoul
,nul
;
13103 vector
float dx
,dy
,dz
;
13104 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
;
13105 vector
float rinv
,rinvsq
,rsq
,rinvsix
;
13107 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13108 int jnra
,jnrb
,jnrc
,jnrd
;
13109 int j3a
,j3b
,j3c
,j3d
;
13110 int tja
,tjb
,tjc
,tjd
;
13113 vfacel
=load_float_and_splat(&facel
);
13114 vkrf
=load_float_and_splat(&krf
);
13115 vcrf
=load_float_and_splat(&crf
);
13117 for(n
=0;n
<nri
;n
++) {
13119 shvec
= load_xyz(shiftvec
+is3
);
13122 ix
= load_xyz(pos
+ii3
);
13125 ix
= vec_add(ix
,shvec
);
13128 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
13129 ntiA
= 2*ntype
*type
[ii
];
13130 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13132 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13141 transpose_4_to_3(load_xyz(pos
+j3a
),
13144 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
13145 dx
= vec_sub(ix
,dx
);
13146 dy
= vec_sub(iy
,dy
);
13147 dz
= vec_sub(iz
,dz
);
13148 rsq
= vec_madd(dx
,dx
,nul
);
13149 rsq
= vec_madd(dy
,dy
,rsq
);
13150 rsq
= vec_madd(dz
,dz
,rsq
);
13151 rinv
= do_invsqrt(rsq
);
13152 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13153 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13154 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13155 tja
= ntiA
+2*type
[jnra
];
13156 tjb
= ntiA
+2*type
[jnrb
];
13157 tjc
= ntiA
+2*type
[jnrc
];
13158 tjd
= ntiA
+2*type
[jnrd
];
13159 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
13160 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
13161 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
13162 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13163 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13164 krsq
= vec_madd(vkrf
,rsq
,nul
);
13165 vcoul
= vec_add(rinv
,krsq
);
13166 vcoul
= vec_sub(vcoul
,vcrf
);
13167 vctot
= vec_madd(qq
,vcoul
,vctot
);
13174 transpose_2_to_3(load_xyz(pos
+j3a
),
13175 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
13176 dx
= vec_sub(ix
,dx
);
13177 dy
= vec_sub(iy
,dy
);
13178 dz
= vec_sub(iz
,dz
);
13179 rsq
= vec_madd(dx
,dx
,nul
);
13180 rsq
= vec_madd(dy
,dy
,rsq
);
13181 rsq
= vec_madd(dz
,dz
,rsq
);
13182 zero_highest_2_elements_in_vector(&rsq
);
13183 rinv
= do_invsqrt(rsq
);
13184 zero_highest_2_elements_in_vector(&rinv
);
13185 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13186 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13187 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13188 tja
= ntiA
+2*type
[jnra
];
13189 tjb
= ntiA
+2*type
[jnrb
];
13190 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
13191 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
13192 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13193 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13194 krsq
= vec_madd(vkrf
,rsq
,nul
);
13195 vcoul
= vec_add(rinv
,krsq
);
13196 vcoul
= vec_sub(vcoul
,vcrf
);
13197 vctot
= vec_madd(qq
,vcoul
,vctot
);
13203 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
13204 dx
= vec_sub(ix
,dx
);
13205 dy
= vec_sub(iy
,dy
);
13206 dz
= vec_sub(iz
,dz
);
13207 rsq
= vec_madd(dx
,dx
,nul
);
13208 rsq
= vec_madd(dy
,dy
,rsq
);
13209 rsq
= vec_madd(dz
,dz
,rsq
);
13210 zero_highest_3_elements_in_vector(&rsq
);
13211 rinv
= do_invsqrt(rsq
);
13212 zero_highest_3_elements_in_vector(&rinv
);
13213 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13214 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13215 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13216 tja
= ntiA
+2*type
[jnra
];
13217 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
13218 load_1_pair(nbfp
+tja
,&c6
,&c12
);
13219 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13220 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13221 krsq
= vec_madd(vkrf
,rsq
,nul
);
13222 vcoul
= vec_add(rinv
,krsq
);
13223 vcoul
= vec_sub(vcoul
,vcrf
);
13224 vctot
= vec_madd(qq
,vcoul
,vctot
);
13226 /* update outer data */
13227 add_vector_to_float(Vc
+gid
[n
],vctot
);
13228 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
13237 void mcinl3000_altivec(
13252 vector
float ix
,iy
,iz
,shvec
;
13253 vector
float vfacel
,tsc
,nul
;
13254 vector
float dx
,dy
,dz
;
13255 vector
float vctot
,qq
,iq
;
13256 vector
float rinv
,r
,rsq
,VVc
;
13258 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13259 int jnra
,jnrb
,jnrc
,jnrd
;
13260 int j3a
,j3b
,j3c
,j3d
;
13263 vfacel
=load_float_and_splat(&facel
);
13264 tsc
=load_float_and_splat(&tabscale
);
13266 for(n
=0;n
<nri
;n
++) {
13268 shvec
= load_xyz(shiftvec
+is3
);
13271 ix
= load_xyz(pos
+ii3
);
13273 ix
= vec_add(ix
,shvec
);
13276 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
13277 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13279 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13288 transpose_4_to_3(load_xyz(pos
+j3a
),
13291 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
13292 dx
= vec_sub(ix
,dx
);
13293 dy
= vec_sub(iy
,dy
);
13294 dz
= vec_sub(iz
,dz
);
13295 rsq
= vec_madd(dx
,dx
,nul
);
13296 rsq
= vec_madd(dy
,dy
,rsq
);
13297 rsq
= vec_madd(dz
,dz
,rsq
);
13298 rinv
= do_invsqrt(rsq
);
13299 r
= vec_madd(rinv
,rsq
,nul
);
13300 /* load 4 j charges and multiply by iq */
13301 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
13302 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
13303 do_vonly_4_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13304 vctot
= vec_madd(qq
,VVc
,vctot
);
13311 transpose_2_to_3(load_xyz(pos
+j3a
),
13312 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
13313 dx
= vec_sub(ix
,dx
);
13314 dy
= vec_sub(iy
,dy
);
13315 dz
= vec_sub(iz
,dz
);
13316 rsq
= vec_madd(dx
,dx
,nul
);
13317 rsq
= vec_madd(dy
,dy
,rsq
);
13318 rsq
= vec_madd(dz
,dz
,rsq
);
13319 zero_highest_2_elements_in_vector(&rsq
);
13320 rinv
= do_invsqrt(rsq
);
13321 zero_highest_2_elements_in_vector(&rinv
);
13322 r
= vec_madd(rinv
,rsq
,nul
);
13323 /* load 2 j charges and multiply by iq */
13324 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
13325 do_vonly_2_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13326 vctot
= vec_madd(qq
,VVc
,vctot
);
13332 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
13333 dx
= vec_sub(ix
,dx
);
13334 dy
= vec_sub(iy
,dy
);
13335 dz
= vec_sub(iz
,dz
);
13336 rsq
= vec_madd(dx
,dx
,nul
);
13337 rsq
= vec_madd(dy
,dy
,rsq
);
13338 rsq
= vec_madd(dz
,dz
,rsq
);
13339 zero_highest_3_elements_in_vector(&rsq
);
13340 rinv
= do_invsqrt(rsq
);
13341 zero_highest_3_elements_in_vector(&rinv
);
13342 r
= vec_madd(rinv
,rsq
,nul
);
13343 /* load 1 j charge and multiply by iq */
13344 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
13345 do_vonly_1_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13346 vctot
= vec_madd(qq
,VVc
,vctot
);
13348 /* update outer data */
13349 add_vector_to_float(Vc
+gid
[n
],vctot
);
13355 void mcinl3100_altivec(
13374 vector
float ix
,iy
,iz
,shvec
;
13375 vector
float vfacel
,tsc
,nul
;
13376 vector
float dx
,dy
,dz
;
13377 vector
float vnbtot
,vctot
,qq
,iq
,c6
,c12
,VVc
;
13378 vector
float rinv
,r
,rinvsq
,rsq
,rinvsix
;
13380 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13381 int jnra
,jnrb
,jnrc
,jnrd
;
13382 int j3a
,j3b
,j3c
,j3d
;
13383 int tja
,tjb
,tjc
,tjd
;
13386 vfacel
=load_float_and_splat(&facel
);
13387 tsc
=load_float_and_splat(&tabscale
);
13389 for(n
=0;n
<nri
;n
++) {
13391 shvec
= load_xyz(shiftvec
+is3
);
13394 ix
= load_xyz(pos
+ii3
);
13397 ix
= vec_add(ix
,shvec
);
13400 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
13401 ntiA
= 2*ntype
*type
[ii
];
13402 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13404 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13413 transpose_4_to_3(load_xyz(pos
+j3a
),
13416 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
13417 dx
= vec_sub(ix
,dx
);
13418 dy
= vec_sub(iy
,dy
);
13419 dz
= vec_sub(iz
,dz
);
13420 rsq
= vec_madd(dx
,dx
,nul
);
13421 rsq
= vec_madd(dy
,dy
,rsq
);
13422 rsq
= vec_madd(dz
,dz
,rsq
);
13423 rinv
= do_invsqrt(rsq
);
13424 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13425 r
= vec_madd(rinv
,rsq
,nul
);
13426 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13427 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13428 tja
= ntiA
+2*type
[jnra
];
13429 tjb
= ntiA
+2*type
[jnrb
];
13430 tjc
= ntiA
+2*type
[jnrc
];
13431 tjd
= ntiA
+2*type
[jnrd
];
13432 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
13433 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
13434 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
13435 do_vonly_4_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13436 vctot
= vec_madd(qq
,VVc
,vctot
);
13437 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13438 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13445 transpose_2_to_3(load_xyz(pos
+j3a
),
13446 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
13447 dx
= vec_sub(ix
,dx
);
13448 dy
= vec_sub(iy
,dy
);
13449 dz
= vec_sub(iz
,dz
);
13450 rsq
= vec_madd(dx
,dx
,nul
);
13451 rsq
= vec_madd(dy
,dy
,rsq
);
13452 rsq
= vec_madd(dz
,dz
,rsq
);
13453 zero_highest_2_elements_in_vector(&rsq
);
13454 rinv
= do_invsqrt(rsq
);
13455 zero_highest_2_elements_in_vector(&rinv
);
13456 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13457 r
= vec_madd(rinv
,rsq
,nul
);
13458 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13459 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13460 tja
= ntiA
+2*type
[jnra
];
13461 tjb
= ntiA
+2*type
[jnrb
];
13462 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
13463 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
13464 do_vonly_2_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13465 vctot
= vec_madd(qq
,VVc
,vctot
);
13466 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13467 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13473 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
13474 dx
= vec_sub(ix
,dx
);
13475 dy
= vec_sub(iy
,dy
);
13476 dz
= vec_sub(iz
,dz
);
13477 rsq
= vec_madd(dx
,dx
,nul
);
13478 rsq
= vec_madd(dy
,dy
,rsq
);
13479 rsq
= vec_madd(dz
,dz
,rsq
);
13480 zero_highest_3_elements_in_vector(&rsq
);
13481 rinv
= do_invsqrt(rsq
);
13482 zero_highest_3_elements_in_vector(&rinv
);
13483 rinvsq
= vec_madd(rinv
,rinv
,nul
);
13484 r
= vec_madd(rinv
,rsq
,nul
);
13485 rinvsix
= vec_madd(rinvsq
,rinvsq
,nul
);
13486 rinvsix
= vec_madd(rinvsix
,rinvsq
,nul
);
13487 tja
= ntiA
+2*type
[jnra
];
13488 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
13489 load_1_pair(nbfp
+tja
,&c6
,&c12
);
13490 do_vonly_1_ctable_coul(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
);
13491 vctot
= vec_madd(qq
,VVc
,vctot
);
13492 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13493 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13495 /* update outer data */
13496 add_vector_to_float(Vc
+gid
[n
],vctot
);
13497 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
13502 void mcinl3300_altivec(
13521 vector
float ix
,iy
,iz
,shvec
;
13522 vector
float fs
,nul
,tsc
;
13523 vector
float dx
,dy
,dz
,vfacel
,vctot
;
13524 vector
float vnbtot
,c6
,c12
,iq
,qq
;
13525 vector
float rinv
,r
,rsq
;
13526 vector
float VVc
,VVd
,VVr
;
13528 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13529 int jnra
,jnrb
,jnrc
,jnrd
;
13530 int j3a
,j3b
,j3c
,j3d
;
13531 int tja
,tjb
,tjc
,tjd
;
13534 tsc
=load_float_and_splat(&tabscale
);
13535 vfacel
=load_float_and_splat(&facel
);
13537 for(n
=0;n
<nri
;n
++) {
13539 shvec
= load_xyz(shiftvec
+is3
);
13542 ix
= load_xyz(pos
+ii3
);
13545 ix
= vec_add(ix
,shvec
);
13548 splat_xyz_to_vectors(ix
,&ix
,&iy
,&iz
);
13549 ntiA
= 2*ntype
*type
[ii
];
13550 iq
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13552 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13561 transpose_4_to_3(load_xyz(pos
+j3a
),
13564 load_xyz(pos
+j3d
),&dx
,&dy
,&dz
);
13565 dx
= vec_sub(ix
,dx
);
13566 dy
= vec_sub(iy
,dy
);
13567 dz
= vec_sub(iz
,dz
);
13568 rsq
= vec_madd(dx
,dx
,nul
);
13569 rsq
= vec_madd(dy
,dy
,rsq
);
13570 rsq
= vec_madd(dz
,dz
,rsq
);
13571 rinv
= do_invsqrt(rsq
);
13572 r
= vec_madd(rinv
,rsq
,nul
);
13573 qq
= vec_madd(load_4_float(charge
+jnra
,charge
+jnrb
,
13574 charge
+jnrc
,charge
+jnrd
),iq
,nul
);
13575 tja
= ntiA
+2*type
[jnra
];
13576 tjb
= ntiA
+2*type
[jnrb
];
13577 tjc
= ntiA
+2*type
[jnrc
];
13578 tjd
= ntiA
+2*type
[jnrd
];
13579 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
13580 do_vonly_4_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&VVd
,&VVr
);
13581 vctot
= vec_madd(qq
,VVc
,vctot
);
13582 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
13583 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
13590 transpose_2_to_3(load_xyz(pos
+j3a
),
13591 load_xyz(pos
+j3b
),&dx
,&dy
,&dz
);
13592 dx
= vec_sub(ix
,dx
);
13593 dy
= vec_sub(iy
,dy
);
13594 dz
= vec_sub(iz
,dz
);
13595 rsq
= vec_madd(dx
,dx
,nul
);
13596 rsq
= vec_madd(dy
,dy
,rsq
);
13597 rsq
= vec_madd(dz
,dz
,rsq
);
13598 zero_highest_2_elements_in_vector(&rsq
);
13599 rinv
= do_invsqrt(rsq
);
13600 zero_highest_2_elements_in_vector(&rinv
);
13601 r
= vec_madd(rinv
,rsq
,nul
);
13602 qq
= vec_madd(load_2_float(charge
+jnra
,charge
+jnrb
),iq
,nul
);
13603 tja
= ntiA
+2*type
[jnra
];
13604 tjb
= ntiA
+2*type
[jnrb
];
13605 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
13606 do_vonly_2_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&VVd
,&VVr
);
13607 vctot
= vec_madd(qq
,VVc
,vctot
);
13608 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
13609 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
13615 transpose_1_to_3(load_xyz(pos
+j3a
),&dx
,&dy
,&dz
);
13616 dx
= vec_sub(ix
,dx
);
13617 dy
= vec_sub(iy
,dy
);
13618 dz
= vec_sub(iz
,dz
);
13619 rsq
= vec_madd(dx
,dx
,nul
);
13620 rsq
= vec_madd(dy
,dy
,rsq
);
13621 rsq
= vec_madd(dz
,dz
,rsq
);
13622 zero_highest_3_elements_in_vector(&rsq
);
13623 rinv
= do_invsqrt(rsq
);
13624 zero_highest_3_elements_in_vector(&rinv
);
13625 r
= vec_madd(rinv
,rsq
,nul
);
13626 qq
= vec_madd(load_1_float(charge
+jnra
),iq
,nul
);
13627 tja
= ntiA
+2*type
[jnra
];
13628 load_1_pair(nbfp
+tja
,&c6
,&c12
);
13629 do_vonly_1_ljctable_coul_and_lj(VFtab
,vec_madd(r
,tsc
,nul
),&VVc
,&VVd
,&VVr
);
13630 vctot
= vec_madd(qq
,VVc
,vctot
);
13631 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
13632 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
13634 /* update outer data */
13635 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
13636 add_vector_to_float(Vc
+gid
[n
],vctot
);
13641 void mcinl1020_altivec(
13654 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
13655 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
13656 vector
float vfacel
,nul
;
13657 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
13658 vector
float rinvO
,rinvH1
,rinvH2
,rsqO
,rsqH1
,rsqH2
;
13661 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13662 int jnra
,jnrb
,jnrc
,jnrd
;
13663 int j3a
,j3b
,j3c
,j3d
;
13666 vfacel
=load_float_and_splat(&facel
);
13667 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
13668 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
13670 for(n
=0;n
<nri
;n
++) {
13674 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
13675 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
13680 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13689 transpose_4_to_3(load_xyz(pos
+j3a
),
13692 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
13693 dOx
= vec_sub(iOx
,dH2x
);
13694 dOy
= vec_sub(iOy
,dH2y
);
13695 dOz
= vec_sub(iOz
,dH2z
);
13696 dH1x
= vec_sub(iH1x
,dH2x
);
13697 dH1y
= vec_sub(iH1y
,dH2y
);
13698 dH1z
= vec_sub(iH1z
,dH2z
);
13699 dH2x
= vec_sub(iH2x
,dH2x
);
13700 dH2y
= vec_sub(iH2y
,dH2y
);
13701 dH2z
= vec_sub(iH2z
,dH2z
);
13703 rsqO
= vec_madd(dOx
,dOx
,nul
);
13704 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13705 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13706 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13707 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13708 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13709 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13710 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13711 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13712 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13713 /* load 4 j charges and multiply by iq */
13714 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
13715 qqO
= vec_madd(iqO
,jq
,nul
);
13716 qqH
= vec_madd(iqH
,jq
,nul
);
13717 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13718 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13719 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13728 transpose_4_to_3(load_xyz(pos
+j3a
),
13730 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
13731 dOx
= vec_sub(iOx
,dH2x
);
13732 dOy
= vec_sub(iOy
,dH2y
);
13733 dOz
= vec_sub(iOz
,dH2z
);
13734 dH1x
= vec_sub(iH1x
,dH2x
);
13735 dH1y
= vec_sub(iH1y
,dH2y
);
13736 dH1z
= vec_sub(iH1z
,dH2z
);
13737 dH2x
= vec_sub(iH2x
,dH2x
);
13738 dH2y
= vec_sub(iH2y
,dH2y
);
13739 dH2z
= vec_sub(iH2z
,dH2z
);
13741 rsqO
= vec_madd(dOx
,dOx
,nul
);
13742 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13743 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13744 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13745 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13746 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13747 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13748 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13749 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13750 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13751 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
13753 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
13754 qqO
= vec_madd(iqO
,jq
,nul
);
13755 qqH
= vec_madd(iqH
,jq
,nul
);
13756 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13757 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13758 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13759 } else if(k
<(nj1
-1)) {
13764 transpose_2_to_3(load_xyz(pos
+j3a
),
13765 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
13766 dOx
= vec_sub(iOx
,dH2x
);
13767 dOy
= vec_sub(iOy
,dH2y
);
13768 dOz
= vec_sub(iOz
,dH2z
);
13769 dH1x
= vec_sub(iH1x
,dH2x
);
13770 dH1y
= vec_sub(iH1y
,dH2y
);
13771 dH1z
= vec_sub(iH1z
,dH2z
);
13772 dH2x
= vec_sub(iH2x
,dH2x
);
13773 dH2y
= vec_sub(iH2y
,dH2y
);
13774 dH2z
= vec_sub(iH2z
,dH2z
);
13776 rsqO
= vec_madd(dOx
,dOx
,nul
);
13777 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13778 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13779 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13780 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13781 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13782 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13783 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13784 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13785 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13786 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
13788 /* load 2 j charges and multiply by iq */
13789 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
13790 qqO
= vec_madd(iqO
,jq
,nul
);
13791 qqH
= vec_madd(iqH
,jq
,nul
);
13792 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13793 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13794 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13798 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
13799 dOx
= vec_sub(iOx
,dH2x
);
13800 dOy
= vec_sub(iOy
,dH2y
);
13801 dOz
= vec_sub(iOz
,dH2z
);
13802 dH1x
= vec_sub(iH1x
,dH2x
);
13803 dH1y
= vec_sub(iH1y
,dH2y
);
13804 dH1z
= vec_sub(iH1z
,dH2z
);
13805 dH2x
= vec_sub(iH2x
,dH2x
);
13806 dH2y
= vec_sub(iH2y
,dH2y
);
13807 dH2z
= vec_sub(iH2z
,dH2z
);
13809 rsqO
= vec_madd(dOx
,dOx
,nul
);
13810 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13811 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13812 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13813 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13814 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13815 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13816 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13817 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13818 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13819 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
13820 /* load 1 j charges and multiply by iq */
13821 jq
=load_1_float(charge
+jnra
);
13822 qqO
= vec_madd(iqO
,jq
,nul
);
13823 qqH
= vec_madd(iqH
,jq
,nul
);
13824 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13825 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13826 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13828 /* update outer data */
13829 add_vector_to_float(Vc
+gid
[n
],vctot
);
13834 void mcinl1120_altivec(
13851 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
13852 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
13853 vector
float vfacel
,nul
;
13854 vector
float vnbtot
,c6
,c12
,rinvsix
;
13855 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
13856 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rsqO
,rsqH1
,rsqH2
;
13858 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
13859 int jnra
,jnrb
,jnrc
,jnrd
;
13860 int j3a
,j3b
,j3c
,j3d
;
13861 int tja
,tjb
,tjc
,tjd
;
13864 vfacel
=load_float_and_splat(&facel
);
13866 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
13867 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
13868 ntiA
= 2*ntype
*type
[ii
];
13870 for(n
=0;n
<nri
;n
++) {
13874 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
13875 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
13881 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
13890 transpose_4_to_3(load_xyz(pos
+j3a
),
13893 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
13894 dOx
= vec_sub(iOx
,dH2x
);
13895 dOy
= vec_sub(iOy
,dH2y
);
13896 dOz
= vec_sub(iOz
,dH2z
);
13897 dH1x
= vec_sub(iH1x
,dH2x
);
13898 dH1y
= vec_sub(iH1y
,dH2y
);
13899 dH1z
= vec_sub(iH1z
,dH2z
);
13900 dH2x
= vec_sub(iH2x
,dH2x
);
13901 dH2y
= vec_sub(iH2y
,dH2y
);
13902 dH2z
= vec_sub(iH2z
,dH2z
);
13904 rsqO
= vec_madd(dOx
,dOx
,nul
);
13905 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13906 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13907 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13908 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13909 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13910 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13911 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13912 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13913 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13914 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
13915 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
13916 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
13917 tja
= ntiA
+2*type
[jnra
];
13918 tjb
= ntiA
+2*type
[jnrb
];
13919 tjc
= ntiA
+2*type
[jnrc
];
13920 tjd
= ntiA
+2*type
[jnrd
];
13921 /* load 4 j charges and multiply by iq */
13922 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
13923 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
13924 qqO
= vec_madd(iqO
,jq
,nul
);
13925 qqH
= vec_madd(iqH
,jq
,nul
);
13926 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13927 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13928 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13929 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13930 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13939 transpose_4_to_3(load_xyz(pos
+j3a
),
13941 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
13942 dOx
= vec_sub(iOx
,dH2x
);
13943 dOy
= vec_sub(iOy
,dH2y
);
13944 dOz
= vec_sub(iOz
,dH2z
);
13945 dH1x
= vec_sub(iH1x
,dH2x
);
13946 dH1y
= vec_sub(iH1y
,dH2y
);
13947 dH1z
= vec_sub(iH1z
,dH2z
);
13948 dH2x
= vec_sub(iH2x
,dH2x
);
13949 dH2y
= vec_sub(iH2y
,dH2y
);
13950 dH2z
= vec_sub(iH2z
,dH2z
);
13952 rsqO
= vec_madd(dOx
,dOx
,nul
);
13953 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13954 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
13955 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
13956 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
13957 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
13958 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
13959 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
13960 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
13961 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
13962 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
13964 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
13965 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
13966 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
13967 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
13968 tja
= ntiA
+2*type
[jnra
];
13969 tjb
= ntiA
+2*type
[jnrb
];
13970 tjc
= ntiA
+2*type
[jnrc
];
13971 /* load 3 j charges and multiply by iq */
13972 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
13973 qqO
= vec_madd(iqO
,jq
,nul
);
13974 qqH
= vec_madd(iqH
,jq
,nul
);
13975 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
13976 vctot
= vec_madd(qqO
,rinvO
,vctot
);
13977 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
13978 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
13979 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
13980 } else if(k
<(nj1
-1)) {
13985 transpose_2_to_3(load_xyz(pos
+j3a
),
13986 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
13987 dOx
= vec_sub(iOx
,dH2x
);
13988 dOy
= vec_sub(iOy
,dH2y
);
13989 dOz
= vec_sub(iOz
,dH2z
);
13990 dH1x
= vec_sub(iH1x
,dH2x
);
13991 dH1y
= vec_sub(iH1y
,dH2y
);
13992 dH1z
= vec_sub(iH1z
,dH2z
);
13993 dH2x
= vec_sub(iH2x
,dH2x
);
13994 dH2y
= vec_sub(iH2y
,dH2y
);
13995 dH2z
= vec_sub(iH2z
,dH2z
);
13997 rsqO
= vec_madd(dOx
,dOx
,nul
);
13998 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
13999 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14000 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14001 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14002 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14003 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14004 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14005 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14006 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14007 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14009 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14010 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14011 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14012 tja
= ntiA
+2*type
[jnra
];
14013 tjb
= ntiA
+2*type
[jnrb
];
14014 /* load 2 j charges and multiply by iq */
14015 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
14016 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
14017 qqO
= vec_madd(iqO
,jq
,nul
);
14018 qqH
= vec_madd(iqH
,jq
,nul
);
14019 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14020 vctot
= vec_madd(qqO
,rinvO
,vctot
);
14021 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14022 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
14023 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
14027 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
14028 dOx
= vec_sub(iOx
,dH2x
);
14029 dOy
= vec_sub(iOy
,dH2y
);
14030 dOz
= vec_sub(iOz
,dH2z
);
14031 dH1x
= vec_sub(iH1x
,dH2x
);
14032 dH1y
= vec_sub(iH1y
,dH2y
);
14033 dH1z
= vec_sub(iH1z
,dH2z
);
14034 dH2x
= vec_sub(iH2x
,dH2x
);
14035 dH2y
= vec_sub(iH2y
,dH2y
);
14036 dH2z
= vec_sub(iH2z
,dH2z
);
14038 rsqO
= vec_madd(dOx
,dOx
,nul
);
14039 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14040 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14041 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14042 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14043 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14044 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14045 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14046 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14047 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14048 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14050 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14051 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14052 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14053 tja
= ntiA
+2*type
[jnra
];
14054 /* load 1 j charges and multiply by iq */
14055 jq
=load_1_float(charge
+jnra
);
14056 load_1_pair(nbfp
+tja
,&c6
,&c12
);
14057 qqO
= vec_madd(iqO
,jq
,nul
);
14058 qqH
= vec_madd(iqH
,jq
,nul
);
14059 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14060 vctot
= vec_madd(qqO
,rinvO
,vctot
);
14061 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14062 vctot
= vec_madd(qqH
,rinvH1
,vctot
);
14063 vctot
= vec_madd(qqH
,rinvH2
,vctot
);
14065 /* update outer data */
14066 add_vector_to_float(Vc
+gid
[n
],vctot
);
14067 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
14073 void mcinl2020_altivec(
14088 vector
float vkrf
,vcrf
;
14089 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
14090 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
14091 vector
float vfacel
,nul
;
14092 vector
float krsqO
,krsqH1
,krsqH2
;
14093 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
,vcoulO
,vcoulH1
,vcoulH2
;
14094 vector
float rinvO
,rinvH1
,rinvH2
,rsqO
,rsqH1
,rsqH2
;
14096 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
14097 int jnra
,jnrb
,jnrc
,jnrd
;
14098 int j3a
,j3b
,j3c
,j3d
;
14101 vfacel
=load_float_and_splat(&facel
);
14102 vkrf
=load_float_and_splat(&krf
);
14103 vcrf
=load_float_and_splat(&crf
);
14105 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
14106 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
14108 for(n
=0;n
<nri
;n
++) {
14112 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
14113 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
14118 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
14127 transpose_4_to_3(load_xyz(pos
+j3a
),
14130 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
14131 dOx
= vec_sub(iOx
,dH2x
);
14132 dOy
= vec_sub(iOy
,dH2y
);
14133 dOz
= vec_sub(iOz
,dH2z
);
14134 dH1x
= vec_sub(iH1x
,dH2x
);
14135 dH1y
= vec_sub(iH1y
,dH2y
);
14136 dH1z
= vec_sub(iH1z
,dH2z
);
14137 dH2x
= vec_sub(iH2x
,dH2x
);
14138 dH2y
= vec_sub(iH2y
,dH2y
);
14139 dH2z
= vec_sub(iH2z
,dH2z
);
14141 rsqO
= vec_madd(dOx
,dOx
,nul
);
14142 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14143 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14144 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14145 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14146 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14147 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14148 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14149 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14150 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14151 /* load 4 j charges and multiply by iq */
14152 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
14153 qqO
= vec_madd(iqO
,jq
,nul
);
14154 qqH
= vec_madd(iqH
,jq
,nul
);
14155 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14156 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14157 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14158 vcoulO
= vec_add(rinvO
,krsqO
);
14159 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14160 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14161 vcoulO
= vec_sub(vcoulO
,vcrf
);
14162 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14163 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14164 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14165 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14166 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14175 transpose_4_to_3(load_xyz(pos
+j3a
),
14177 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
14178 dOx
= vec_sub(iOx
,dH2x
);
14179 dOy
= vec_sub(iOy
,dH2y
);
14180 dOz
= vec_sub(iOz
,dH2z
);
14181 dH1x
= vec_sub(iH1x
,dH2x
);
14182 dH1y
= vec_sub(iH1y
,dH2y
);
14183 dH1z
= vec_sub(iH1z
,dH2z
);
14184 dH2x
= vec_sub(iH2x
,dH2x
);
14185 dH2y
= vec_sub(iH2y
,dH2y
);
14186 dH2z
= vec_sub(iH2z
,dH2z
);
14188 rsqO
= vec_madd(dOx
,dOx
,nul
);
14189 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14190 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14191 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14192 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14193 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14194 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14195 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14196 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14197 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14198 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14199 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14201 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
14202 /* load 3 j charges and multiply by iq */
14203 qqO
= vec_madd(iqO
,jq
,nul
);
14204 qqH
= vec_madd(iqH
,jq
,nul
);
14205 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14206 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14207 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14208 vcoulO
= vec_add(rinvO
,krsqO
);
14209 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14210 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14211 vcoulO
= vec_sub(vcoulO
,vcrf
);
14212 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14213 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14214 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14215 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14216 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14217 } else if(k
<(nj1
-1)) {
14222 transpose_2_to_3(load_xyz(pos
+j3a
),
14223 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
14224 dOx
= vec_sub(iOx
,dH2x
);
14225 dOy
= vec_sub(iOy
,dH2y
);
14226 dOz
= vec_sub(iOz
,dH2z
);
14227 dH1x
= vec_sub(iH1x
,dH2x
);
14228 dH1y
= vec_sub(iH1y
,dH2y
);
14229 dH1z
= vec_sub(iH1z
,dH2z
);
14230 dH2x
= vec_sub(iH2x
,dH2x
);
14231 dH2y
= vec_sub(iH2y
,dH2y
);
14232 dH2z
= vec_sub(iH2z
,dH2z
);
14234 rsqO
= vec_madd(dOx
,dOx
,nul
);
14235 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14236 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14237 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14238 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14239 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14240 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14241 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14242 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14243 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14244 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14245 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14246 /* load 2 j charges and multiply by iq */
14247 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
14248 qqO
= vec_madd(iqO
,jq
,nul
);
14249 qqH
= vec_madd(iqH
,jq
,nul
);
14250 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14251 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14252 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14253 vcoulO
= vec_add(rinvO
,krsqO
);
14254 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14255 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14256 vcoulO
= vec_sub(vcoulO
,vcrf
);
14257 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14258 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14259 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14260 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14261 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14265 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
14266 dOx
= vec_sub(iOx
,dH2x
);
14267 dOy
= vec_sub(iOy
,dH2y
);
14268 dOz
= vec_sub(iOz
,dH2z
);
14269 dH1x
= vec_sub(iH1x
,dH2x
);
14270 dH1y
= vec_sub(iH1y
,dH2y
);
14271 dH1z
= vec_sub(iH1z
,dH2z
);
14272 dH2x
= vec_sub(iH2x
,dH2x
);
14273 dH2y
= vec_sub(iH2y
,dH2y
);
14274 dH2z
= vec_sub(iH2z
,dH2z
);
14276 rsqO
= vec_madd(dOx
,dOx
,nul
);
14277 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14278 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14279 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14280 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14281 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14282 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14283 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14284 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14285 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14286 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14287 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14288 /* load 1 j charges and multiply by iq */
14289 jq
=load_1_float(charge
+jnra
);
14290 qqO
= vec_madd(iqO
,jq
,nul
);
14291 qqH
= vec_madd(iqH
,jq
,nul
);
14292 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14293 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14294 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14295 vcoulO
= vec_add(rinvO
,krsqO
);
14296 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14297 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14298 vcoulO
= vec_sub(vcoulO
,vcrf
);
14299 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14300 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14301 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14302 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14303 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14305 /* update outer data */
14306 add_vector_to_float(Vc
+gid
[n
],vctot
);
14312 void mcinl2120_altivec(
14331 vector
float vkrf
,vcrf
;
14332 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
14333 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
14334 vector
float vfacel
,nul
,vcoulO
,vcoulH1
,vcoulH2
;
14335 vector
float vnbtot
,c6
,c12
,rinvsix
;
14336 vector
float krsqO
,krsqH1
,krsqH2
;
14337 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
14338 vector
float rinvO
,rinvH1
,rinvH2
,rinvsqO
,rsqO
,rsqH1
,rsqH2
;
14340 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
14341 int jnra
,jnrb
,jnrc
,jnrd
;
14342 int j3a
,j3b
,j3c
,j3d
;
14343 int tja
,tjb
,tjc
,tjd
;
14346 vfacel
=load_float_and_splat(&facel
);
14347 vkrf
=load_float_and_splat(&krf
);
14348 vcrf
=load_float_and_splat(&crf
);
14350 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
14351 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
14352 ntiA
= 2*ntype
*type
[ii
];
14354 for(n
=0;n
<nri
;n
++) {
14358 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
14359 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
14365 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
14374 transpose_4_to_3(load_xyz(pos
+j3a
),
14377 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
14378 dOx
= vec_sub(iOx
,dH2x
);
14379 dOy
= vec_sub(iOy
,dH2y
);
14380 dOz
= vec_sub(iOz
,dH2z
);
14381 dH1x
= vec_sub(iH1x
,dH2x
);
14382 dH1y
= vec_sub(iH1y
,dH2y
);
14383 dH1z
= vec_sub(iH1z
,dH2z
);
14384 dH2x
= vec_sub(iH2x
,dH2x
);
14385 dH2y
= vec_sub(iH2y
,dH2y
);
14386 dH2z
= vec_sub(iH2z
,dH2z
);
14388 rsqO
= vec_madd(dOx
,dOx
,nul
);
14389 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14390 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14391 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14392 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14393 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14394 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14395 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14396 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14397 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14398 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14399 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14400 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14401 tja
= ntiA
+2*type
[jnra
];
14402 tjb
= ntiA
+2*type
[jnrb
];
14403 tjc
= ntiA
+2*type
[jnrc
];
14404 tjd
= ntiA
+2*type
[jnrd
];
14405 /* load 4 j charges and multiply by iq */
14406 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
14407 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
14408 qqO
= vec_madd(iqO
,jq
,nul
);
14409 qqH
= vec_madd(iqH
,jq
,nul
);
14410 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14411 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14412 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14413 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14414 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14415 vcoulO
= vec_add(rinvO
,krsqO
);
14416 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14417 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14418 vcoulO
= vec_sub(vcoulO
,vcrf
);
14419 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14420 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14421 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14422 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14423 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14432 transpose_4_to_3(load_xyz(pos
+j3a
),
14434 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
14435 dOx
= vec_sub(iOx
,dH2x
);
14436 dOy
= vec_sub(iOy
,dH2y
);
14437 dOz
= vec_sub(iOz
,dH2z
);
14438 dH1x
= vec_sub(iH1x
,dH2x
);
14439 dH1y
= vec_sub(iH1y
,dH2y
);
14440 dH1z
= vec_sub(iH1z
,dH2z
);
14441 dH2x
= vec_sub(iH2x
,dH2x
);
14442 dH2y
= vec_sub(iH2y
,dH2y
);
14443 dH2z
= vec_sub(iH2z
,dH2z
);
14445 rsqO
= vec_madd(dOx
,dOx
,nul
);
14446 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14447 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14448 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14449 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14450 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14451 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14452 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14453 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14454 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14455 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14456 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14457 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14458 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14459 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14460 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
14461 tja
= ntiA
+2*type
[jnra
];
14462 tjb
= ntiA
+2*type
[jnrb
];
14463 tjc
= ntiA
+2*type
[jnrc
];
14464 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
14465 qqO
= vec_madd(iqO
,jq
,nul
);
14466 qqH
= vec_madd(iqH
,jq
,nul
);
14467 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14468 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14469 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14470 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14471 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14472 vcoulO
= vec_add(rinvO
,krsqO
);
14473 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14474 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14475 vcoulO
= vec_sub(vcoulO
,vcrf
);
14476 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14477 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14478 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14479 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14480 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14481 } else if(k
<(nj1
-1)) {
14486 transpose_2_to_3(load_xyz(pos
+j3a
),
14487 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
14488 dOx
= vec_sub(iOx
,dH2x
);
14489 dOy
= vec_sub(iOy
,dH2y
);
14490 dOz
= vec_sub(iOz
,dH2z
);
14491 dH1x
= vec_sub(iH1x
,dH2x
);
14492 dH1y
= vec_sub(iH1y
,dH2y
);
14493 dH1z
= vec_sub(iH1z
,dH2z
);
14494 dH2x
= vec_sub(iH2x
,dH2x
);
14495 dH2y
= vec_sub(iH2y
,dH2y
);
14496 dH2z
= vec_sub(iH2z
,dH2z
);
14498 rsqO
= vec_madd(dOx
,dOx
,nul
);
14499 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14500 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14501 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14502 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14503 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14504 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14505 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14506 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14507 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14508 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14509 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14510 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14511 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14512 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14513 tja
= ntiA
+2*type
[jnra
];
14514 tjb
= ntiA
+2*type
[jnrb
];
14515 /* load 2 j charges and multiply by iq */
14516 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
14517 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
14518 qqO
= vec_madd(iqO
,jq
,nul
);
14519 qqH
= vec_madd(iqH
,jq
,nul
);
14520 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14521 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14522 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14523 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14524 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14525 vcoulO
= vec_add(rinvO
,krsqO
);
14526 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14527 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14528 vcoulO
= vec_sub(vcoulO
,vcrf
);
14529 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14530 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14531 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14532 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14533 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14537 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
14538 dOx
= vec_sub(iOx
,dH2x
);
14539 dOy
= vec_sub(iOy
,dH2y
);
14540 dOz
= vec_sub(iOz
,dH2z
);
14541 dH1x
= vec_sub(iH1x
,dH2x
);
14542 dH1y
= vec_sub(iH1y
,dH2y
);
14543 dH1z
= vec_sub(iH1z
,dH2z
);
14544 dH2x
= vec_sub(iH2x
,dH2x
);
14545 dH2y
= vec_sub(iH2y
,dH2y
);
14546 dH2z
= vec_sub(iH2z
,dH2z
);
14548 rsqO
= vec_madd(dOx
,dOx
,nul
);
14549 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14550 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14551 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14552 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14553 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14554 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14555 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14556 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14557 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14558 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14559 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14560 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14561 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14562 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14563 tja
= ntiA
+2*type
[jnra
];
14564 /* load 1 j charges and multiply by iq */
14565 jq
=load_1_float(charge
+jnra
);
14566 load_1_pair(nbfp
+tja
,&c6
,&c12
);
14567 qqO
= vec_madd(iqO
,jq
,nul
);
14568 qqH
= vec_madd(iqH
,jq
,nul
);
14569 krsqO
= vec_madd(vkrf
,rsqO
,nul
);
14570 krsqH1
= vec_madd(vkrf
,rsqH1
,nul
);
14571 krsqH2
= vec_madd(vkrf
,rsqH2
,nul
);
14572 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14573 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14574 vcoulO
= vec_add(rinvO
,krsqO
);
14575 vcoulH1
= vec_add(rinvH1
,krsqH1
);
14576 vcoulH2
= vec_add(rinvH2
,krsqH2
);
14577 vcoulO
= vec_sub(vcoulO
,vcrf
);
14578 vcoulH1
= vec_sub(vcoulH1
,vcrf
);
14579 vcoulH2
= vec_sub(vcoulH2
,vcrf
);
14580 vctot
= vec_madd(qqO
,vcoulO
,vctot
);
14581 vctot
= vec_madd(qqH
,vcoulH1
,vctot
);
14582 vctot
= vec_madd(qqH
,vcoulH2
,vctot
);
14584 /* update outer data */
14585 add_vector_to_float(Vc
+gid
[n
],vctot
);
14586 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
14592 void mcinl3020_altivec(
14607 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
14608 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
14609 vector
float vfacel
,nul
;
14610 vector
float tsc
,VVcO
,VVcH1
,VVcH2
;
14611 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
14612 vector
float rinvO
,rinvH1
,rinvH2
,rO
,rH1
,rH2
,rsqO
,rsqH1
,rsqH2
;
14614 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
14615 int jnra
,jnrb
,jnrc
,jnrd
;
14616 int j3a
,j3b
,j3c
,j3d
;
14619 vfacel
=load_float_and_splat(&facel
);
14620 tsc
=load_float_and_splat(&tabscale
);
14621 iqO
= vec_madd(load_float_and_splat(charge
+iinr
[0]),vfacel
,nul
);
14622 iqH
= vec_madd(load_float_and_splat(charge
+iinr
[0]+1),vfacel
,nul
);
14624 for(n
=0;n
<nri
;n
++) {
14628 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
14629 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
14634 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
14643 transpose_4_to_3(load_xyz(pos
+j3a
),
14646 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
14647 dOx
= vec_sub(iOx
,dH2x
);
14648 dOy
= vec_sub(iOy
,dH2y
);
14649 dOz
= vec_sub(iOz
,dH2z
);
14650 dH1x
= vec_sub(iH1x
,dH2x
);
14651 dH1y
= vec_sub(iH1y
,dH2y
);
14652 dH1z
= vec_sub(iH1z
,dH2z
);
14653 dH2x
= vec_sub(iH2x
,dH2x
);
14654 dH2y
= vec_sub(iH2y
,dH2y
);
14655 dH2z
= vec_sub(iH2z
,dH2z
);
14657 rsqO
= vec_madd(dOx
,dOx
,nul
);
14658 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14659 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14660 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14661 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14662 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14663 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14664 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14665 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14666 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14667 rO
= vec_madd(rsqO
,rinvO
,nul
);
14668 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14669 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14671 /* load 4 j charges and multiply by iq */
14672 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
14673 do_vonly_4_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14674 do_vonly_4_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14675 do_vonly_4_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14676 qqO
= vec_madd(iqO
,jq
,nul
);
14677 qqH
= vec_madd(iqH
,jq
,nul
);
14678 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14679 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14680 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14689 transpose_4_to_3(load_xyz(pos
+j3a
),
14691 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
14692 dOx
= vec_sub(iOx
,dH2x
);
14693 dOy
= vec_sub(iOy
,dH2y
);
14694 dOz
= vec_sub(iOz
,dH2z
);
14695 dH1x
= vec_sub(iH1x
,dH2x
);
14696 dH1y
= vec_sub(iH1y
,dH2y
);
14697 dH1z
= vec_sub(iH1z
,dH2z
);
14698 dH2x
= vec_sub(iH2x
,dH2x
);
14699 dH2y
= vec_sub(iH2y
,dH2y
);
14700 dH2z
= vec_sub(iH2z
,dH2z
);
14702 rsqO
= vec_madd(dOx
,dOx
,nul
);
14703 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14704 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14705 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14706 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14707 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14708 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14709 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14710 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14711 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14712 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14713 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14714 rO
= vec_madd(rsqO
,rinvO
,nul
);
14715 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14716 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14718 /* load 3 j charges and multiply by iq */
14719 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
14720 do_vonly_3_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14721 do_vonly_3_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14722 do_vonly_3_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14723 qqO
= vec_madd(iqO
,jq
,nul
);
14724 qqH
= vec_madd(iqH
,jq
,nul
);
14725 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14726 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14727 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14728 } else if(k
<(nj1
-1)) {
14733 transpose_2_to_3(load_xyz(pos
+j3a
),
14734 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
14735 dOx
= vec_sub(iOx
,dH2x
);
14736 dOy
= vec_sub(iOy
,dH2y
);
14737 dOz
= vec_sub(iOz
,dH2z
);
14738 dH1x
= vec_sub(iH1x
,dH2x
);
14739 dH1y
= vec_sub(iH1y
,dH2y
);
14740 dH1z
= vec_sub(iH1z
,dH2z
);
14741 dH2x
= vec_sub(iH2x
,dH2x
);
14742 dH2y
= vec_sub(iH2y
,dH2y
);
14743 dH2z
= vec_sub(iH2z
,dH2z
);
14745 rsqO
= vec_madd(dOx
,dOx
,nul
);
14746 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14747 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14748 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14749 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14750 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14751 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14752 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14753 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14754 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14755 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14756 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14757 rO
= vec_madd(rsqO
,rinvO
,nul
);
14758 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14759 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14761 /* load 2 j charges and multiply by iq */
14762 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
14763 do_vonly_2_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14764 do_vonly_2_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14765 do_vonly_2_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14766 qqO
= vec_madd(iqO
,jq
,nul
);
14767 qqH
= vec_madd(iqH
,jq
,nul
);
14768 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14769 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14770 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14774 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
14775 dOx
= vec_sub(iOx
,dH2x
);
14776 dOy
= vec_sub(iOy
,dH2y
);
14777 dOz
= vec_sub(iOz
,dH2z
);
14778 dH1x
= vec_sub(iH1x
,dH2x
);
14779 dH1y
= vec_sub(iH1y
,dH2y
);
14780 dH1z
= vec_sub(iH1z
,dH2z
);
14781 dH2x
= vec_sub(iH2x
,dH2x
);
14782 dH2y
= vec_sub(iH2y
,dH2y
);
14783 dH2z
= vec_sub(iH2z
,dH2z
);
14785 rsqO
= vec_madd(dOx
,dOx
,nul
);
14786 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14787 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14788 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14789 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14790 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14791 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14792 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14793 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14794 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14795 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14796 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14797 rO
= vec_madd(rsqO
,rinvO
,nul
);
14798 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14799 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14801 /* load 1 j charges and multiply by iq */
14802 jq
=load_1_float(charge
+jnra
);
14803 do_vonly_1_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14804 do_vonly_1_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14805 do_vonly_1_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14806 qqO
= vec_madd(iqO
,jq
,nul
);
14807 qqH
= vec_madd(iqH
,jq
,nul
);
14808 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14809 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14810 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14812 /* update outer data */
14813 add_vector_to_float(Vc
+gid
[n
],vctot
);
14819 void mcinl3120_altivec(
14838 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
14839 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
14840 vector
float vfacel
,nul
;
14841 vector
float vnbtot
,c6
,c12
,rinvsix
,rinvsqO
;
14842 vector
float tsc
,VVcO
,VVcH1
,VVcH2
;
14843 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
14844 vector
float rinvO
,rinvH1
,rinvH2
,rO
,rH1
,rH2
,rsqO
,rsqH1
,rsqH2
;
14846 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
14847 int jnra
,jnrb
,jnrc
,jnrd
;
14848 int j3a
,j3b
,j3c
,j3d
;
14849 int tja
,tjb
,tjc
,tjd
;
14852 vfacel
=load_float_and_splat(&facel
);
14853 tsc
=load_float_and_splat(&tabscale
);
14855 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
14856 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
14857 ntiA
= 2*ntype
*type
[ii
];
14859 for(n
=0;n
<nri
;n
++) {
14863 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
14864 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
14870 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
14879 transpose_4_to_3(load_xyz(pos
+j3a
),
14882 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
14883 dOx
= vec_sub(iOx
,dH2x
);
14884 dOy
= vec_sub(iOy
,dH2y
);
14885 dOz
= vec_sub(iOz
,dH2z
);
14886 dH1x
= vec_sub(iH1x
,dH2x
);
14887 dH1y
= vec_sub(iH1y
,dH2y
);
14888 dH1z
= vec_sub(iH1z
,dH2z
);
14889 dH2x
= vec_sub(iH2x
,dH2x
);
14890 dH2y
= vec_sub(iH2y
,dH2y
);
14891 dH2z
= vec_sub(iH2z
,dH2z
);
14893 rsqO
= vec_madd(dOx
,dOx
,nul
);
14894 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14895 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14896 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14897 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14898 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14899 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14900 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14901 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14902 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14903 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14904 rO
= vec_madd(rsqO
,rinvO
,nul
);
14905 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14906 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14907 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14908 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14909 tja
= ntiA
+2*type
[jnra
];
14910 tjb
= ntiA
+2*type
[jnrb
];
14911 tjc
= ntiA
+2*type
[jnrc
];
14912 tjd
= ntiA
+2*type
[jnrd
];
14913 /* load 4 j charges and multiply by iq */
14914 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
14915 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
14916 do_vonly_4_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14917 do_vonly_4_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14918 do_vonly_4_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14919 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14920 qqO
= vec_madd(iqO
,jq
,nul
);
14921 qqH
= vec_madd(iqH
,jq
,nul
);
14922 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14923 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14924 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14925 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14934 transpose_4_to_3(load_xyz(pos
+j3a
),
14936 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
14937 dOx
= vec_sub(iOx
,dH2x
);
14938 dOy
= vec_sub(iOy
,dH2y
);
14939 dOz
= vec_sub(iOz
,dH2z
);
14940 dH1x
= vec_sub(iH1x
,dH2x
);
14941 dH1y
= vec_sub(iH1y
,dH2y
);
14942 dH1z
= vec_sub(iH1z
,dH2z
);
14943 dH2x
= vec_sub(iH2x
,dH2x
);
14944 dH2y
= vec_sub(iH2y
,dH2y
);
14945 dH2z
= vec_sub(iH2z
,dH2z
);
14947 rsqO
= vec_madd(dOx
,dOx
,nul
);
14948 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
14949 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
14950 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
14951 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
14952 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
14953 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
14954 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
14955 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
14956 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
14957 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
14958 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
14959 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
14960 rO
= vec_madd(rsqO
,rinvO
,nul
);
14961 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
14962 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
14963 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
14964 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
14965 tja
= ntiA
+2*type
[jnra
];
14966 tjb
= ntiA
+2*type
[jnrb
];
14967 tjc
= ntiA
+2*type
[jnrc
];
14968 /* load 3 j charges and multiply by iq */
14969 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
14970 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
14971 do_vonly_3_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
14972 do_vonly_3_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
14973 do_vonly_3_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
14974 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
14975 qqO
= vec_madd(iqO
,jq
,nul
);
14976 qqH
= vec_madd(iqH
,jq
,nul
);
14977 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
14978 vctot
= vec_madd(qqO
,VVcO
,vctot
);
14979 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
14980 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
14981 } else if(k
<(nj1
-1)) {
14986 transpose_2_to_3(load_xyz(pos
+j3a
),
14987 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
14988 dOx
= vec_sub(iOx
,dH2x
);
14989 dOy
= vec_sub(iOy
,dH2y
);
14990 dOz
= vec_sub(iOz
,dH2z
);
14991 dH1x
= vec_sub(iH1x
,dH2x
);
14992 dH1y
= vec_sub(iH1y
,dH2y
);
14993 dH1z
= vec_sub(iH1z
,dH2z
);
14994 dH2x
= vec_sub(iH2x
,dH2x
);
14995 dH2y
= vec_sub(iH2y
,dH2y
);
14996 dH2z
= vec_sub(iH2z
,dH2z
);
14998 rsqO
= vec_madd(dOx
,dOx
,nul
);
14999 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15000 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15001 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15002 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15003 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15004 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15005 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15006 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15007 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
15008 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15009 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
15010 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
15011 rO
= vec_madd(rsqO
,rinvO
,nul
);
15012 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15013 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15014 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
15015 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
15016 tja
= ntiA
+2*type
[jnra
];
15017 tjb
= ntiA
+2*type
[jnrb
];
15018 /* load 2 j charges and multiply by iq */
15019 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
15020 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
15021 do_vonly_2_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
15022 do_vonly_2_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15023 do_vonly_2_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15024 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
15025 qqO
= vec_madd(iqO
,jq
,nul
);
15026 qqH
= vec_madd(iqH
,jq
,nul
);
15027 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
15028 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15029 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15030 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15034 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
15035 dOx
= vec_sub(iOx
,dH2x
);
15036 dOy
= vec_sub(iOy
,dH2y
);
15037 dOz
= vec_sub(iOz
,dH2z
);
15038 dH1x
= vec_sub(iH1x
,dH2x
);
15039 dH1y
= vec_sub(iH1y
,dH2y
);
15040 dH1z
= vec_sub(iH1z
,dH2z
);
15041 dH2x
= vec_sub(iH2x
,dH2x
);
15042 dH2y
= vec_sub(iH2y
,dH2y
);
15043 dH2z
= vec_sub(iH2z
,dH2z
);
15045 rsqO
= vec_madd(dOx
,dOx
,nul
);
15046 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15047 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15048 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15049 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15050 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15051 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15052 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15053 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15054 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
15055 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15056 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
15057 rinvsqO
= vec_madd(rinvO
,rinvO
,nul
);
15058 rO
= vec_madd(rsqO
,rinvO
,nul
);
15059 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15060 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15061 rinvsix
= vec_madd(rinvsqO
,rinvsqO
,nul
);
15062 rinvsix
= vec_madd(rinvsix
,rinvsqO
,nul
);
15063 tja
= ntiA
+2*type
[jnra
];
15064 /* load 1 j charges and multiply by iq */
15065 jq
=load_1_float(charge
+jnra
);
15066 load_1_pair(nbfp
+tja
,&c6
,&c12
);
15067 do_vonly_1_ctable_coul(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
);
15068 do_vonly_1_ctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15069 do_vonly_1_ctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15070 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
15071 qqO
= vec_madd(iqO
,jq
,nul
);
15072 qqH
= vec_madd(iqH
,jq
,nul
);
15073 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
15074 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15075 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15076 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15078 /* update outer data */
15079 add_vector_to_float(Vc
+gid
[n
],vctot
);
15080 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
15085 void mcinl3320_altivec(
15105 vector
float iOx
,iOy
,iOz
,iH1x
,iH1y
,iH1z
,iH2x
,iH2y
,iH2z
;
15106 vector
float dOx
,dOy
,dOz
,dH1x
,dH1y
,dH1z
,dH2x
,dH2y
,dH2z
;
15107 vector
float vfacel
,nul
;
15108 vector
float vnbtot
,c6
,c12
;
15109 vector
float vctot
,qqO
,qqH
,iqO
,iqH
,jq
;
15110 vector
float rinvO
,rinvH1
,rinvH2
,rsqO
,rsqH1
,rsqH2
;
15111 vector
float rO
,rH1
,rH2
,VVcO
,VVcH1
,VVcH2
,VVd
,VVr
;
15113 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
15114 int jnra
,jnrb
,jnrc
,jnrd
;
15115 int j3a
,j3b
,j3c
,j3d
;
15116 int tja
,tjb
,tjc
,tjd
;
15119 tsc
=load_float_and_splat(&tabscale
);
15120 vfacel
=load_float_and_splat(&facel
);
15123 iqO
= vec_madd(load_float_and_splat(charge
+ii
),vfacel
,nul
);
15124 iqH
= vec_madd(load_float_and_splat(charge
+ii
+1),vfacel
,nul
);
15125 ntiA
= 2*ntype
*type
[ii
];
15127 for(n
=0;n
<nri
;n
++) {
15131 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&iOx
,&iOy
,&iOz
,
15132 &iH1x
,&iH1y
,&iH1z
,&iH2x
,&iH2y
,&iH2z
);
15138 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
15147 transpose_4_to_3(load_xyz(pos
+j3a
),
15150 load_xyz(pos
+j3d
),&dH2x
,&dH2y
,&dH2z
);
15151 dOx
= vec_sub(iOx
,dH2x
);
15152 dOy
= vec_sub(iOy
,dH2y
);
15153 dOz
= vec_sub(iOz
,dH2z
);
15154 dH1x
= vec_sub(iH1x
,dH2x
);
15155 dH1y
= vec_sub(iH1y
,dH2y
);
15156 dH1z
= vec_sub(iH1z
,dH2z
);
15157 dH2x
= vec_sub(iH2x
,dH2x
);
15158 dH2y
= vec_sub(iH2y
,dH2y
);
15159 dH2z
= vec_sub(iH2z
,dH2z
);
15161 rsqO
= vec_madd(dOx
,dOx
,nul
);
15162 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15163 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15164 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15165 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15166 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15167 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15168 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15169 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15170 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15171 rO
= vec_madd(rsqO
,rinvO
,nul
);
15172 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15173 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15174 tja
= ntiA
+2*type
[jnra
];
15175 tjb
= ntiA
+2*type
[jnrb
];
15176 tjc
= ntiA
+2*type
[jnrc
];
15177 tjd
= ntiA
+2*type
[jnrd
];
15178 /* load 4 j charges and multiply by iq */
15179 jq
=load_4_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
,charge
+jnrd
);
15180 load_4_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,nbfp
+tjd
,&c6
,&c12
);
15181 do_vonly_4_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&VVd
,&VVr
);
15182 do_vonly_4_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15183 do_vonly_4_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15184 qqO
= vec_madd(iqO
,jq
,nul
);
15185 qqH
= vec_madd(iqH
,jq
,nul
);
15186 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15187 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
15188 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15189 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
15190 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15199 transpose_4_to_3(load_xyz(pos
+j3a
),
15201 load_xyz(pos
+j3c
),nul
,&dH2x
,&dH2y
,&dH2z
);
15202 dOx
= vec_sub(iOx
,dH2x
);
15203 dOy
= vec_sub(iOy
,dH2y
);
15204 dOz
= vec_sub(iOz
,dH2z
);
15205 dH1x
= vec_sub(iH1x
,dH2x
);
15206 dH1y
= vec_sub(iH1y
,dH2y
);
15207 dH1z
= vec_sub(iH1z
,dH2z
);
15208 dH2x
= vec_sub(iH2x
,dH2x
);
15209 dH2y
= vec_sub(iH2y
,dH2y
);
15210 dH2z
= vec_sub(iH2z
,dH2z
);
15212 rsqO
= vec_madd(dOx
,dOx
,nul
);
15213 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15214 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15215 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15216 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15217 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15218 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15219 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15220 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15221 zero_highest_element_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
15222 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15223 zero_highest_element_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
15224 rO
= vec_madd(rsqO
,rinvO
,nul
);
15225 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15226 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15227 tja
= ntiA
+2*type
[jnra
];
15228 tjb
= ntiA
+2*type
[jnrb
];
15229 tjc
= ntiA
+2*type
[jnrc
];
15231 load_3_pair(nbfp
+tja
,nbfp
+tjb
,nbfp
+tjc
,&c6
,&c12
);
15232 jq
=load_3_float(charge
+jnra
,charge
+jnrb
,charge
+jnrc
);
15233 do_vonly_3_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&VVd
,&VVr
);
15234 do_vonly_3_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15235 do_vonly_3_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15236 qqO
= vec_madd(iqO
,jq
,nul
);
15237 qqH
= vec_madd(iqH
,jq
,nul
);
15238 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15239 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
15240 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15241 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
15242 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15243 } else if(k
<(nj1
-1)) {
15248 transpose_2_to_3(load_xyz(pos
+j3a
),
15249 load_xyz(pos
+j3b
),&dH2x
,&dH2y
,&dH2z
);
15250 dOx
= vec_sub(iOx
,dH2x
);
15251 dOy
= vec_sub(iOy
,dH2y
);
15252 dOz
= vec_sub(iOz
,dH2z
);
15253 dH1x
= vec_sub(iH1x
,dH2x
);
15254 dH1y
= vec_sub(iH1y
,dH2y
);
15255 dH1z
= vec_sub(iH1z
,dH2z
);
15256 dH2x
= vec_sub(iH2x
,dH2x
);
15257 dH2y
= vec_sub(iH2y
,dH2y
);
15258 dH2z
= vec_sub(iH2z
,dH2z
);
15260 rsqO
= vec_madd(dOx
,dOx
,nul
);
15261 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15262 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15263 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15264 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15265 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15266 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15267 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15268 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15269 zero_highest_2_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
15270 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15271 zero_highest_2_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
15272 rO
= vec_madd(rsqO
,rinvO
,nul
);
15273 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15274 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15275 tja
= ntiA
+2*type
[jnra
];
15276 tjb
= ntiA
+2*type
[jnrb
];
15277 /* load 2 j charges and multiply by iq */
15278 jq
=load_2_float(charge
+jnra
,charge
+jnrb
);
15279 load_2_pair(nbfp
+tja
,nbfp
+tjb
,&c6
,&c12
);
15280 do_vonly_2_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&VVd
,&VVr
);
15281 do_vonly_2_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15282 do_vonly_2_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15283 qqO
= vec_madd(iqO
,jq
,nul
);
15284 qqH
= vec_madd(iqH
,jq
,nul
);
15285 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15286 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
15287 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15288 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
15289 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15293 transpose_1_to_3(load_xyz(pos
+j3a
),&dH2x
,&dH2y
,&dH2z
);
15294 dOx
= vec_sub(iOx
,dH2x
);
15295 dOy
= vec_sub(iOy
,dH2y
);
15296 dOz
= vec_sub(iOz
,dH2z
);
15297 dH1x
= vec_sub(iH1x
,dH2x
);
15298 dH1y
= vec_sub(iH1y
,dH2y
);
15299 dH1z
= vec_sub(iH1z
,dH2z
);
15300 dH2x
= vec_sub(iH2x
,dH2x
);
15301 dH2y
= vec_sub(iH2y
,dH2y
);
15302 dH2z
= vec_sub(iH2z
,dH2z
);
15304 rsqO
= vec_madd(dOx
,dOx
,nul
);
15305 rsqH1
= vec_madd(dH1x
,dH1x
,nul
);
15306 rsqH2
= vec_madd(dH2x
,dH2x
,nul
);
15307 rsqO
= vec_madd(dOy
,dOy
,rsqO
);
15308 rsqH1
= vec_madd(dH1y
,dH1y
,rsqH1
);
15309 rsqH2
= vec_madd(dH2y
,dH2y
,rsqH2
);
15310 rsqO
= vec_madd(dOz
,dOz
,rsqO
);
15311 rsqH1
= vec_madd(dH1z
,dH1z
,rsqH1
);
15312 rsqH2
= vec_madd(dH2z
,dH2z
,rsqH2
);
15313 zero_highest_3_elements_in_3_vectors(&rsqO
,&rsqH1
,&rsqH2
);
15314 do_3_invsqrt(rsqO
,rsqH1
,rsqH2
,&rinvO
,&rinvH1
,&rinvH2
);
15315 zero_highest_3_elements_in_3_vectors(&rinvO
,&rinvH1
,&rinvH2
);
15316 rO
= vec_madd(rsqO
,rinvO
,nul
);
15317 rH1
= vec_madd(rsqH1
,rinvH1
,nul
);
15318 rH2
= vec_madd(rsqH2
,rinvH2
,nul
);
15319 tja
= ntiA
+2*type
[jnra
];
15320 /* load 1 j charges and multiply by iq */
15321 jq
=load_1_float(charge
+jnra
);
15322 load_1_pair(nbfp
+tja
,&c6
,&c12
);
15323 do_vonly_1_ljctable_coul_and_lj(VFtab
,vec_madd(rO
,tsc
,nul
),&VVcO
,&VVd
,&VVr
);
15324 do_vonly_1_ljctable_coul(VFtab
,vec_madd(rH1
,tsc
,nul
),&VVcH1
);
15325 do_vonly_1_ljctable_coul(VFtab
,vec_madd(rH2
,tsc
,nul
),&VVcH2
);
15326 qqO
= vec_madd(iqO
,jq
,nul
);
15327 qqH
= vec_madd(iqH
,jq
,nul
);
15328 vctot
= vec_madd(qqO
,VVcO
,vctot
);
15329 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
15330 vctot
= vec_madd(qqH
,VVcH1
,vctot
);
15331 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
15332 vctot
= vec_madd(qqH
,VVcH2
,vctot
);
15334 /* update outer data */
15335 add_vector_to_float(Vc
+gid
[n
],vctot
);
15336 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
15343 void mcinl1030_altivec(
15356 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
15357 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
15359 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
15360 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
15361 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
15363 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
15364 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
15366 vector
float vfacel
,vcoul1
,vcoul2
,vcoul3
,nul
;
15367 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,qqOOt
,qqOHt
,qqHHt
;
15371 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
15372 int jnra
,jnrb
,jnrc
,jnrd
;
15373 int j3a
,j3b
,j3c
,j3d
;
15376 vfacel
=load_float_and_splat(&facel
);
15377 qO
= load_float_and_splat(charge
+iinr
[0]);
15378 qH
= load_float_and_splat(charge
+iinr
[0]+1);
15379 qqOO
= vec_madd(qO
,qO
,nul
);
15380 qqOH
= vec_madd(qO
,qH
,nul
);
15381 qqHH
= vec_madd(qH
,qH
,nul
);
15382 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
15383 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
15384 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
15386 for(n
=0;n
<nri
;n
++) {
15390 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
15391 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
15396 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
15405 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
15406 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15408 dx11
= vec_sub(ix1
,jx1
);
15409 dx12
= vec_sub(ix1
,jx2
);
15410 dx13
= vec_sub(ix1
,jx3
);
15411 dy11
= vec_sub(iy1
,jy1
);
15412 dy12
= vec_sub(iy1
,jy2
);
15413 dy13
= vec_sub(iy1
,jy3
);
15414 dz11
= vec_sub(iz1
,jz1
);
15415 dz12
= vec_sub(iz1
,jz2
);
15416 dz13
= vec_sub(iz1
,jz3
);
15417 dx21
= vec_sub(ix2
,jx1
);
15418 dx22
= vec_sub(ix2
,jx2
);
15419 dx23
= vec_sub(ix2
,jx3
);
15420 dy21
= vec_sub(iy2
,jy1
);
15421 dy22
= vec_sub(iy2
,jy2
);
15422 dy23
= vec_sub(iy2
,jy3
);
15423 dz21
= vec_sub(iz2
,jz1
);
15424 dz22
= vec_sub(iz2
,jz2
);
15425 dz23
= vec_sub(iz2
,jz3
);
15426 dx31
= vec_sub(ix3
,jx1
);
15427 dx32
= vec_sub(ix3
,jx2
);
15428 dx33
= vec_sub(ix3
,jx3
);
15429 dy31
= vec_sub(iy3
,jy1
);
15430 dy32
= vec_sub(iy3
,jy2
);
15431 dy33
= vec_sub(iy3
,jy3
);
15432 dz31
= vec_sub(iz3
,jz1
);
15433 dz32
= vec_sub(iz3
,jz2
);
15434 dz33
= vec_sub(iz3
,jz3
);
15436 rsq11
= vec_madd(dx11
,dx11
,nul
);
15437 rsq12
= vec_madd(dx12
,dx12
,nul
);
15438 rsq13
= vec_madd(dx13
,dx13
,nul
);
15439 rsq21
= vec_madd(dx21
,dx21
,nul
);
15440 rsq22
= vec_madd(dx22
,dx22
,nul
);
15441 rsq23
= vec_madd(dx23
,dx23
,nul
);
15442 rsq31
= vec_madd(dx31
,dx31
,nul
);
15443 rsq32
= vec_madd(dx32
,dx32
,nul
);
15444 rsq33
= vec_madd(dx33
,dx33
,nul
);
15445 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15446 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15447 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15448 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15449 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15450 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15451 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15452 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15453 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15454 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15455 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15456 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15457 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15458 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15459 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15460 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15461 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15462 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15464 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15467 &rinv11
,&rinv12
,&rinv13
,
15468 &rinv21
,&rinv22
,&rinv23
,
15469 &rinv31
,&rinv32
,&rinv33
);
15471 vctot
= vec_madd(rinv11
,qqOO
,vctot
);
15472 vctot
= vec_madd(rinv12
,qqOH
,vctot
);
15473 vctot
= vec_madd(rinv13
,qqOH
,vctot
);
15474 vctot
= vec_madd(rinv21
,qqOH
,vctot
);
15475 vctot
= vec_madd(rinv22
,qqHH
,vctot
);
15476 vctot
= vec_madd(rinv23
,qqHH
,vctot
);
15477 vctot
= vec_madd(rinv31
,qqOH
,vctot
);
15478 vctot
= vec_madd(rinv32
,qqHH
,vctot
);
15479 vctot
= vec_madd(rinv33
,qqHH
,vctot
);
15488 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
15489 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15490 qqOOt
= vec_sld(qqOO
,nul
,4);
15491 qqOHt
= vec_sld(qqOH
,nul
,4);
15492 qqHHt
= vec_sld(qqHH
,nul
,4);
15494 dx11
= vec_sub(ix1
,jx1
);
15495 dx12
= vec_sub(ix1
,jx2
);
15496 dx13
= vec_sub(ix1
,jx3
);
15497 dy11
= vec_sub(iy1
,jy1
);
15498 dy12
= vec_sub(iy1
,jy2
);
15499 dy13
= vec_sub(iy1
,jy3
);
15500 dz11
= vec_sub(iz1
,jz1
);
15501 dz12
= vec_sub(iz1
,jz2
);
15502 dz13
= vec_sub(iz1
,jz3
);
15503 dx21
= vec_sub(ix2
,jx1
);
15504 dx22
= vec_sub(ix2
,jx2
);
15505 dx23
= vec_sub(ix2
,jx3
);
15506 dy21
= vec_sub(iy2
,jy1
);
15507 dy22
= vec_sub(iy2
,jy2
);
15508 dy23
= vec_sub(iy2
,jy3
);
15509 dz21
= vec_sub(iz2
,jz1
);
15510 dz22
= vec_sub(iz2
,jz2
);
15511 dz23
= vec_sub(iz2
,jz3
);
15512 dx31
= vec_sub(ix3
,jx1
);
15513 dx32
= vec_sub(ix3
,jx2
);
15514 dx33
= vec_sub(ix3
,jx3
);
15515 dy31
= vec_sub(iy3
,jy1
);
15516 dy32
= vec_sub(iy3
,jy2
);
15517 dy33
= vec_sub(iy3
,jy3
);
15518 dz31
= vec_sub(iz3
,jz1
);
15519 dz32
= vec_sub(iz3
,jz2
);
15520 dz33
= vec_sub(iz3
,jz3
);
15522 rsq11
= vec_madd(dx11
,dx11
,nul
);
15523 rsq12
= vec_madd(dx12
,dx12
,nul
);
15524 rsq13
= vec_madd(dx13
,dx13
,nul
);
15525 rsq21
= vec_madd(dx21
,dx21
,nul
);
15526 rsq22
= vec_madd(dx22
,dx22
,nul
);
15527 rsq23
= vec_madd(dx23
,dx23
,nul
);
15528 rsq31
= vec_madd(dx31
,dx31
,nul
);
15529 rsq32
= vec_madd(dx32
,dx32
,nul
);
15530 rsq33
= vec_madd(dx33
,dx33
,nul
);
15531 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15532 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15533 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15534 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15535 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15536 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15537 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15538 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15539 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15540 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15541 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15542 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15543 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15544 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15545 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15546 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15547 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15548 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15550 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15553 &rinv11
,&rinv12
,&rinv13
,
15554 &rinv21
,&rinv22
,&rinv23
,
15555 &rinv31
,&rinv32
,&rinv33
);
15557 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
15558 &rinv21
,&rinv22
,&rinv23
,
15559 &rinv31
,&rinv32
,&rinv33
);
15561 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
15562 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
15563 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
15564 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
15565 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
15566 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
15567 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
15568 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
15569 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
15570 } else if(k
<(nj1
-1)) {
15575 load_2_water(pos
+j3a
,pos
+j3b
,
15576 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15577 qqOOt
= vec_sld(qqOO
,nul
,8);
15578 qqOHt
= vec_sld(qqOH
,nul
,8);
15579 qqHHt
= vec_sld(qqHH
,nul
,8);
15581 dx11
= vec_sub(ix1
,jx1
);
15582 dx12
= vec_sub(ix1
,jx2
);
15583 dx13
= vec_sub(ix1
,jx3
);
15584 dy11
= vec_sub(iy1
,jy1
);
15585 dy12
= vec_sub(iy1
,jy2
);
15586 dy13
= vec_sub(iy1
,jy3
);
15587 dz11
= vec_sub(iz1
,jz1
);
15588 dz12
= vec_sub(iz1
,jz2
);
15589 dz13
= vec_sub(iz1
,jz3
);
15590 dx21
= vec_sub(ix2
,jx1
);
15591 dx22
= vec_sub(ix2
,jx2
);
15592 dx23
= vec_sub(ix2
,jx3
);
15593 dy21
= vec_sub(iy2
,jy1
);
15594 dy22
= vec_sub(iy2
,jy2
);
15595 dy23
= vec_sub(iy2
,jy3
);
15596 dz21
= vec_sub(iz2
,jz1
);
15597 dz22
= vec_sub(iz2
,jz2
);
15598 dz23
= vec_sub(iz2
,jz3
);
15599 dx31
= vec_sub(ix3
,jx1
);
15600 dx32
= vec_sub(ix3
,jx2
);
15601 dx33
= vec_sub(ix3
,jx3
);
15602 dy31
= vec_sub(iy3
,jy1
);
15603 dy32
= vec_sub(iy3
,jy2
);
15604 dy33
= vec_sub(iy3
,jy3
);
15605 dz31
= vec_sub(iz3
,jz1
);
15606 dz32
= vec_sub(iz3
,jz2
);
15607 dz33
= vec_sub(iz3
,jz3
);
15609 rsq11
= vec_madd(dx11
,dx11
,nul
);
15610 rsq12
= vec_madd(dx12
,dx12
,nul
);
15611 rsq13
= vec_madd(dx13
,dx13
,nul
);
15612 rsq21
= vec_madd(dx21
,dx21
,nul
);
15613 rsq22
= vec_madd(dx22
,dx22
,nul
);
15614 rsq23
= vec_madd(dx23
,dx23
,nul
);
15615 rsq31
= vec_madd(dx31
,dx31
,nul
);
15616 rsq32
= vec_madd(dx32
,dx32
,nul
);
15617 rsq33
= vec_madd(dx33
,dx33
,nul
);
15618 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15619 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15620 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15621 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15622 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15623 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15624 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15625 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15626 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15627 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15628 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15629 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15630 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15631 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15632 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15633 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15634 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15635 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15637 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15640 &rinv11
,&rinv12
,&rinv13
,
15641 &rinv21
,&rinv22
,&rinv23
,
15642 &rinv31
,&rinv32
,&rinv33
);
15644 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
15645 &rinv21
,&rinv22
,&rinv23
,
15646 &rinv31
,&rinv32
,&rinv33
);
15648 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
15649 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
15650 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
15651 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
15652 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
15653 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
15654 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
15655 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
15656 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
15660 load_1_water(pos
+j3a
,
15661 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15662 qqOOt
= vec_sld(qqOO
,nul
,12);
15663 qqOHt
= vec_sld(qqOH
,nul
,12);
15664 qqHHt
= vec_sld(qqHH
,nul
,12);
15666 dx11
= vec_sub(ix1
,jx1
);
15667 dx12
= vec_sub(ix1
,jx2
);
15668 dx13
= vec_sub(ix1
,jx3
);
15669 dy11
= vec_sub(iy1
,jy1
);
15670 dy12
= vec_sub(iy1
,jy2
);
15671 dy13
= vec_sub(iy1
,jy3
);
15672 dz11
= vec_sub(iz1
,jz1
);
15673 dz12
= vec_sub(iz1
,jz2
);
15674 dz13
= vec_sub(iz1
,jz3
);
15675 dx21
= vec_sub(ix2
,jx1
);
15676 dx22
= vec_sub(ix2
,jx2
);
15677 dx23
= vec_sub(ix2
,jx3
);
15678 dy21
= vec_sub(iy2
,jy1
);
15679 dy22
= vec_sub(iy2
,jy2
);
15680 dy23
= vec_sub(iy2
,jy3
);
15681 dz21
= vec_sub(iz2
,jz1
);
15682 dz22
= vec_sub(iz2
,jz2
);
15683 dz23
= vec_sub(iz2
,jz3
);
15684 dx31
= vec_sub(ix3
,jx1
);
15685 dx32
= vec_sub(ix3
,jx2
);
15686 dx33
= vec_sub(ix3
,jx3
);
15687 dy31
= vec_sub(iy3
,jy1
);
15688 dy32
= vec_sub(iy3
,jy2
);
15689 dy33
= vec_sub(iy3
,jy3
);
15690 dz31
= vec_sub(iz3
,jz1
);
15691 dz32
= vec_sub(iz3
,jz2
);
15692 dz33
= vec_sub(iz3
,jz3
);
15694 rsq11
= vec_madd(dx11
,dx11
,nul
);
15695 rsq12
= vec_madd(dx12
,dx12
,nul
);
15696 rsq13
= vec_madd(dx13
,dx13
,nul
);
15697 rsq21
= vec_madd(dx21
,dx21
,nul
);
15698 rsq22
= vec_madd(dx22
,dx22
,nul
);
15699 rsq23
= vec_madd(dx23
,dx23
,nul
);
15700 rsq31
= vec_madd(dx31
,dx31
,nul
);
15701 rsq32
= vec_madd(dx32
,dx32
,nul
);
15702 rsq33
= vec_madd(dx33
,dx33
,nul
);
15703 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15704 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15705 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15706 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15707 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15708 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15709 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15710 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15711 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15712 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15713 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15714 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15715 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15716 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15717 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15718 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15719 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15720 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15722 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15725 &rinv11
,&rinv12
,&rinv13
,
15726 &rinv21
,&rinv22
,&rinv23
,
15727 &rinv31
,&rinv32
,&rinv33
);
15729 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
15730 &rinv21
,&rinv22
,&rinv23
,
15731 &rinv31
,&rinv32
,&rinv33
);
15733 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
15734 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
15735 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
15736 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
15737 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
15738 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
15739 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
15740 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
15741 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
15743 /* update outer data */
15744 add_vector_to_float(Vc
+gid
[n
],vctot
);
15749 void mcinl1130_altivec(
15766 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
15767 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
15769 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
15770 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
15771 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
15773 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
15774 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
15775 vector
float rinvsq11
;
15777 vector
float vfacel
,nul
;
15778 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
,rinvsix
;
15779 vector
float vnb6
,vnb12
,vnbtot
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
15781 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
15782 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
15783 int j3a
,j3b
,j3c
,j3d
;
15786 vfacel
=load_float_and_splat(&facel
);
15788 qO
= load_float_and_splat(charge
+ii
);
15789 qH
= load_float_and_splat(charge
+ii
+1);
15790 qqOO
= vec_madd(qO
,qO
,nul
);
15791 qqOH
= vec_madd(qO
,qH
,nul
);
15792 qqHH
= vec_madd(qH
,qH
,nul
);
15793 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
15794 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
15795 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
15798 load_1_pair(nbfp
+tj
,&c6
,&c12
);
15799 c6
= vec_splat(c6
,0);
15800 c12
= vec_splat(c12
,0);
15802 for(n
=0;n
<nri
;n
++) {
15806 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
15807 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
15813 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
15822 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
15823 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15824 dx11
= vec_sub(ix1
,jx1
);
15825 dx12
= vec_sub(ix1
,jx2
);
15826 dx13
= vec_sub(ix1
,jx3
);
15827 dy11
= vec_sub(iy1
,jy1
);
15828 dy12
= vec_sub(iy1
,jy2
);
15829 dy13
= vec_sub(iy1
,jy3
);
15830 dz11
= vec_sub(iz1
,jz1
);
15831 dz12
= vec_sub(iz1
,jz2
);
15832 dz13
= vec_sub(iz1
,jz3
);
15833 dx21
= vec_sub(ix2
,jx1
);
15834 dx22
= vec_sub(ix2
,jx2
);
15835 dx23
= vec_sub(ix2
,jx3
);
15836 dy21
= vec_sub(iy2
,jy1
);
15837 dy22
= vec_sub(iy2
,jy2
);
15838 dy23
= vec_sub(iy2
,jy3
);
15839 dz21
= vec_sub(iz2
,jz1
);
15840 dz22
= vec_sub(iz2
,jz2
);
15841 dz23
= vec_sub(iz2
,jz3
);
15842 dx31
= vec_sub(ix3
,jx1
);
15843 dx32
= vec_sub(ix3
,jx2
);
15844 dx33
= vec_sub(ix3
,jx3
);
15845 dy31
= vec_sub(iy3
,jy1
);
15846 dy32
= vec_sub(iy3
,jy2
);
15847 dy33
= vec_sub(iy3
,jy3
);
15848 dz31
= vec_sub(iz3
,jz1
);
15849 dz32
= vec_sub(iz3
,jz2
);
15850 dz33
= vec_sub(iz3
,jz3
);
15852 rsq11
= vec_madd(dx11
,dx11
,nul
);
15853 rsq12
= vec_madd(dx12
,dx12
,nul
);
15854 rsq13
= vec_madd(dx13
,dx13
,nul
);
15855 rsq21
= vec_madd(dx21
,dx21
,nul
);
15856 rsq22
= vec_madd(dx22
,dx22
,nul
);
15857 rsq23
= vec_madd(dx23
,dx23
,nul
);
15858 rsq31
= vec_madd(dx31
,dx31
,nul
);
15859 rsq32
= vec_madd(dx32
,dx32
,nul
);
15860 rsq33
= vec_madd(dx33
,dx33
,nul
);
15861 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15862 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15863 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15864 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15865 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15866 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15867 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15868 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15869 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15870 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15871 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15872 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15873 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15874 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15875 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15876 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15877 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15878 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15880 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15883 &rinv11
,&rinv12
,&rinv13
,
15884 &rinv21
,&rinv22
,&rinv23
,
15885 &rinv31
,&rinv32
,&rinv33
);
15887 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
15888 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
15889 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
15890 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
15891 rinvsix
= vec_madd(rinvsix
,rinvsix
,nul
);
15892 vctot
= vec_madd(rinv11
,qqOO
,vctot
);
15893 vctot
= vec_madd(rinv12
,qqOH
,vctot
);
15894 vctot
= vec_madd(rinv13
,qqOH
,vctot
);
15895 vnbtot
= vec_madd(c12
,rinvsix
,vnbtot
);
15896 vctot
= vec_madd(rinv21
,qqOH
,vctot
);
15897 vctot
= vec_madd(rinv22
,qqHH
,vctot
);
15898 vctot
= vec_madd(rinv23
,qqHH
,vctot
);
15899 vctot
= vec_madd(rinv31
,qqOH
,vctot
);
15900 vctot
= vec_madd(rinv32
,qqHH
,vctot
);
15901 vctot
= vec_madd(rinv33
,qqHH
,vctot
);
15910 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
15911 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
15912 qqOOt
= vec_sld(qqOO
,nul
,4);
15913 qqOHt
= vec_sld(qqOH
,nul
,4);
15914 qqHHt
= vec_sld(qqHH
,nul
,4);
15915 c6t
= vec_sld(c6
,nul
,4);
15916 c12t
= vec_sld(c12
,nul
,4);
15918 dx11
= vec_sub(ix1
,jx1
);
15919 dx12
= vec_sub(ix1
,jx2
);
15920 dx13
= vec_sub(ix1
,jx3
);
15921 dy11
= vec_sub(iy1
,jy1
);
15922 dy12
= vec_sub(iy1
,jy2
);
15923 dy13
= vec_sub(iy1
,jy3
);
15924 dz11
= vec_sub(iz1
,jz1
);
15925 dz12
= vec_sub(iz1
,jz2
);
15926 dz13
= vec_sub(iz1
,jz3
);
15927 dx21
= vec_sub(ix2
,jx1
);
15928 dx22
= vec_sub(ix2
,jx2
);
15929 dx23
= vec_sub(ix2
,jx3
);
15930 dy21
= vec_sub(iy2
,jy1
);
15931 dy22
= vec_sub(iy2
,jy2
);
15932 dy23
= vec_sub(iy2
,jy3
);
15933 dz21
= vec_sub(iz2
,jz1
);
15934 dz22
= vec_sub(iz2
,jz2
);
15935 dz23
= vec_sub(iz2
,jz3
);
15936 dx31
= vec_sub(ix3
,jx1
);
15937 dx32
= vec_sub(ix3
,jx2
);
15938 dx33
= vec_sub(ix3
,jx3
);
15939 dy31
= vec_sub(iy3
,jy1
);
15940 dy32
= vec_sub(iy3
,jy2
);
15941 dy33
= vec_sub(iy3
,jy3
);
15942 dz31
= vec_sub(iz3
,jz1
);
15943 dz32
= vec_sub(iz3
,jz2
);
15944 dz33
= vec_sub(iz3
,jz3
);
15946 rsq11
= vec_madd(dx11
,dx11
,nul
);
15947 rsq12
= vec_madd(dx12
,dx12
,nul
);
15948 rsq13
= vec_madd(dx13
,dx13
,nul
);
15949 rsq21
= vec_madd(dx21
,dx21
,nul
);
15950 rsq22
= vec_madd(dx22
,dx22
,nul
);
15951 rsq23
= vec_madd(dx23
,dx23
,nul
);
15952 rsq31
= vec_madd(dx31
,dx31
,nul
);
15953 rsq32
= vec_madd(dx32
,dx32
,nul
);
15954 rsq33
= vec_madd(dx33
,dx33
,nul
);
15955 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
15956 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
15957 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
15958 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
15959 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
15960 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
15961 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
15962 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
15963 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
15964 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
15965 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
15966 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
15967 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
15968 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
15969 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
15970 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
15971 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
15972 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
15974 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
15977 &rinv11
,&rinv12
,&rinv13
,
15978 &rinv21
,&rinv22
,&rinv23
,
15979 &rinv31
,&rinv32
,&rinv33
);
15981 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
15982 &rinv21
,&rinv22
,&rinv23
,
15983 &rinv31
,&rinv32
,&rinv33
);
15985 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
15986 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
15987 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
15988 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
15989 rinvsix
= vec_madd(rinvsix
,rinvsix
,nul
);
15990 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
15991 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
15992 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
15993 vnbtot
= vec_madd(c12t
,rinvsix
,vnbtot
);
15994 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
15995 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
15996 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
15997 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
15998 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
15999 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
16000 } else if(k
<(nj1
-1)) {
16005 load_2_water(pos
+j3a
,pos
+j3b
,
16006 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16007 qqOOt
= vec_sld(qqOO
,nul
,8);
16008 qqOHt
= vec_sld(qqOH
,nul
,8);
16009 qqHHt
= vec_sld(qqHH
,nul
,8);
16010 c6t
= vec_sld(c6
,nul
,8);
16011 c12t
= vec_sld(c12
,nul
,8);
16013 dx11
= vec_sub(ix1
,jx1
);
16014 dx12
= vec_sub(ix1
,jx2
);
16015 dx13
= vec_sub(ix1
,jx3
);
16016 dy11
= vec_sub(iy1
,jy1
);
16017 dy12
= vec_sub(iy1
,jy2
);
16018 dy13
= vec_sub(iy1
,jy3
);
16019 dz11
= vec_sub(iz1
,jz1
);
16020 dz12
= vec_sub(iz1
,jz2
);
16021 dz13
= vec_sub(iz1
,jz3
);
16022 dx21
= vec_sub(ix2
,jx1
);
16023 dx22
= vec_sub(ix2
,jx2
);
16024 dx23
= vec_sub(ix2
,jx3
);
16025 dy21
= vec_sub(iy2
,jy1
);
16026 dy22
= vec_sub(iy2
,jy2
);
16027 dy23
= vec_sub(iy2
,jy3
);
16028 dz21
= vec_sub(iz2
,jz1
);
16029 dz22
= vec_sub(iz2
,jz2
);
16030 dz23
= vec_sub(iz2
,jz3
);
16031 dx31
= vec_sub(ix3
,jx1
);
16032 dx32
= vec_sub(ix3
,jx2
);
16033 dx33
= vec_sub(ix3
,jx3
);
16034 dy31
= vec_sub(iy3
,jy1
);
16035 dy32
= vec_sub(iy3
,jy2
);
16036 dy33
= vec_sub(iy3
,jy3
);
16037 dz31
= vec_sub(iz3
,jz1
);
16038 dz32
= vec_sub(iz3
,jz2
);
16039 dz33
= vec_sub(iz3
,jz3
);
16041 rsq11
= vec_madd(dx11
,dx11
,nul
);
16042 rsq12
= vec_madd(dx12
,dx12
,nul
);
16043 rsq13
= vec_madd(dx13
,dx13
,nul
);
16044 rsq21
= vec_madd(dx21
,dx21
,nul
);
16045 rsq22
= vec_madd(dx22
,dx22
,nul
);
16046 rsq23
= vec_madd(dx23
,dx23
,nul
);
16047 rsq31
= vec_madd(dx31
,dx31
,nul
);
16048 rsq32
= vec_madd(dx32
,dx32
,nul
);
16049 rsq33
= vec_madd(dx33
,dx33
,nul
);
16050 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16051 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16052 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16053 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16054 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16055 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16056 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16057 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16058 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16059 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16060 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16061 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16062 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16063 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16064 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16065 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16066 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16067 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16069 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16072 &rinv11
,&rinv12
,&rinv13
,
16073 &rinv21
,&rinv22
,&rinv23
,
16074 &rinv31
,&rinv32
,&rinv33
);
16076 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
16077 &rinv21
,&rinv22
,&rinv23
,
16078 &rinv31
,&rinv32
,&rinv33
);
16080 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
16081 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
16082 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
16083 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
16084 rinvsix
= vec_madd(rinvsix
,rinvsix
,nul
);
16085 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
16086 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
16087 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
16088 vnbtot
= vec_madd(c12t
,rinvsix
,vnbtot
);
16089 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
16090 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
16091 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
16092 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
16093 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
16094 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
16098 load_1_water(pos
+j3a
,
16099 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16100 qqOOt
= vec_sld(qqOO
,nul
,12);
16101 qqOHt
= vec_sld(qqOH
,nul
,12);
16102 qqHHt
= vec_sld(qqHH
,nul
,12);
16103 c6t
= vec_sld(c6
,nul
,12);
16104 c12t
= vec_sld(c12
,nul
,12);
16106 dx11
= vec_sub(ix1
,jx1
);
16107 dx12
= vec_sub(ix1
,jx2
);
16108 dx13
= vec_sub(ix1
,jx3
);
16109 dy11
= vec_sub(iy1
,jy1
);
16110 dy12
= vec_sub(iy1
,jy2
);
16111 dy13
= vec_sub(iy1
,jy3
);
16112 dz11
= vec_sub(iz1
,jz1
);
16113 dz12
= vec_sub(iz1
,jz2
);
16114 dz13
= vec_sub(iz1
,jz3
);
16115 dx21
= vec_sub(ix2
,jx1
);
16116 dx22
= vec_sub(ix2
,jx2
);
16117 dx23
= vec_sub(ix2
,jx3
);
16118 dy21
= vec_sub(iy2
,jy1
);
16119 dy22
= vec_sub(iy2
,jy2
);
16120 dy23
= vec_sub(iy2
,jy3
);
16121 dz21
= vec_sub(iz2
,jz1
);
16122 dz22
= vec_sub(iz2
,jz2
);
16123 dz23
= vec_sub(iz2
,jz3
);
16124 dx31
= vec_sub(ix3
,jx1
);
16125 dx32
= vec_sub(ix3
,jx2
);
16126 dx33
= vec_sub(ix3
,jx3
);
16127 dy31
= vec_sub(iy3
,jy1
);
16128 dy32
= vec_sub(iy3
,jy2
);
16129 dy33
= vec_sub(iy3
,jy3
);
16130 dz31
= vec_sub(iz3
,jz1
);
16131 dz32
= vec_sub(iz3
,jz2
);
16132 dz33
= vec_sub(iz3
,jz3
);
16134 rsq11
= vec_madd(dx11
,dx11
,nul
);
16135 rsq12
= vec_madd(dx12
,dx12
,nul
);
16136 rsq13
= vec_madd(dx13
,dx13
,nul
);
16137 rsq21
= vec_madd(dx21
,dx21
,nul
);
16138 rsq22
= vec_madd(dx22
,dx22
,nul
);
16139 rsq23
= vec_madd(dx23
,dx23
,nul
);
16140 rsq31
= vec_madd(dx31
,dx31
,nul
);
16141 rsq32
= vec_madd(dx32
,dx32
,nul
);
16142 rsq33
= vec_madd(dx33
,dx33
,nul
);
16143 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16144 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16145 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16146 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16147 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16148 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16149 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16150 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16151 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16152 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16153 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16154 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16155 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16156 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16157 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16158 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16159 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16160 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16162 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16165 &rinv11
,&rinv12
,&rinv13
,
16166 &rinv21
,&rinv22
,&rinv23
,
16167 &rinv31
,&rinv32
,&rinv33
);
16169 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
16170 &rinv21
,&rinv22
,&rinv23
,
16171 &rinv31
,&rinv32
,&rinv33
);
16173 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
16174 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
16175 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
16176 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
16177 rinvsix
= vec_madd(rinvsix
,rinvsix
,nul
);
16178 vctot
= vec_madd(rinv11
,qqOOt
,vctot
);
16179 vctot
= vec_madd(rinv12
,qqOHt
,vctot
);
16180 vctot
= vec_madd(rinv13
,qqOHt
,vctot
);
16181 vnbtot
= vec_madd(c12t
,rinvsix
,vnbtot
);
16182 vctot
= vec_madd(rinv21
,qqOHt
,vctot
);
16183 vctot
= vec_madd(rinv22
,qqHHt
,vctot
);
16184 vctot
= vec_madd(rinv23
,qqHHt
,vctot
);
16185 vctot
= vec_madd(rinv31
,qqOHt
,vctot
);
16186 vctot
= vec_madd(rinv32
,qqHHt
,vctot
);
16187 vctot
= vec_madd(rinv33
,qqHHt
,vctot
);
16189 add_vector_to_float(Vc
+gid
[n
],vctot
);
16190 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
16198 void mcinl2030_altivec(
16213 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
16214 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
16216 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
16217 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
16218 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
16220 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
16221 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
16223 vector
float vfacel
,nul
;
16224 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,vkrf
,vcrf
;
16225 vector
float krsq11
,krsq12
,krsq13
,krsq21
,krsq22
,krsq23
,krsq31
,krsq32
,krsq33
;
16226 vector
float qqOOt
,qqOHt
,qqHHt
;
16228 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
16229 int jnra
,jnrb
,jnrc
,jnrd
;
16230 int j3a
,j3b
,j3c
,j3d
;
16233 vfacel
=load_float_and_splat(&facel
);
16234 vkrf
=load_float_and_splat(&krf
);
16235 vcrf
=load_float_and_splat(&crf
);
16236 qO
= load_float_and_splat(charge
+iinr
[0]);
16237 qH
= load_float_and_splat(charge
+iinr
[0]+1);
16238 qqOO
= vec_madd(qO
,qO
,nul
);
16239 qqOH
= vec_madd(qO
,qH
,nul
);
16240 qqHH
= vec_madd(qH
,qH
,nul
);
16241 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
16242 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
16243 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
16245 for(n
=0;n
<nri
;n
++) {
16249 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
16250 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
16255 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
16264 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
16265 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16266 dx11
= vec_sub(ix1
,jx1
);
16267 dx12
= vec_sub(ix1
,jx2
);
16268 dx13
= vec_sub(ix1
,jx3
);
16269 dy11
= vec_sub(iy1
,jy1
);
16270 dy12
= vec_sub(iy1
,jy2
);
16271 dy13
= vec_sub(iy1
,jy3
);
16272 dz11
= vec_sub(iz1
,jz1
);
16273 dz12
= vec_sub(iz1
,jz2
);
16274 dz13
= vec_sub(iz1
,jz3
);
16275 dx21
= vec_sub(ix2
,jx1
);
16276 dx22
= vec_sub(ix2
,jx2
);
16277 dx23
= vec_sub(ix2
,jx3
);
16278 dy21
= vec_sub(iy2
,jy1
);
16279 dy22
= vec_sub(iy2
,jy2
);
16280 dy23
= vec_sub(iy2
,jy3
);
16281 dz21
= vec_sub(iz2
,jz1
);
16282 dz22
= vec_sub(iz2
,jz2
);
16283 dz23
= vec_sub(iz2
,jz3
);
16284 dx31
= vec_sub(ix3
,jx1
);
16285 dx32
= vec_sub(ix3
,jx2
);
16286 dx33
= vec_sub(ix3
,jx3
);
16287 dy31
= vec_sub(iy3
,jy1
);
16288 dy32
= vec_sub(iy3
,jy2
);
16289 dy33
= vec_sub(iy3
,jy3
);
16290 dz31
= vec_sub(iz3
,jz1
);
16291 dz32
= vec_sub(iz3
,jz2
);
16292 dz33
= vec_sub(iz3
,jz3
);
16294 rsq11
= vec_madd(dx11
,dx11
,nul
);
16295 rsq12
= vec_madd(dx12
,dx12
,nul
);
16296 rsq13
= vec_madd(dx13
,dx13
,nul
);
16297 rsq21
= vec_madd(dx21
,dx21
,nul
);
16298 rsq22
= vec_madd(dx22
,dx22
,nul
);
16299 rsq23
= vec_madd(dx23
,dx23
,nul
);
16300 rsq31
= vec_madd(dx31
,dx31
,nul
);
16301 rsq32
= vec_madd(dx32
,dx32
,nul
);
16302 rsq33
= vec_madd(dx33
,dx33
,nul
);
16303 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16304 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16305 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16306 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16307 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16308 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16309 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16310 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16311 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16312 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16313 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16314 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16315 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16316 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16317 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16318 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16319 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16320 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16322 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16325 &rinv11
,&rinv12
,&rinv13
,
16326 &rinv21
,&rinv22
,&rinv23
,
16327 &rinv31
,&rinv32
,&rinv33
);
16329 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
16330 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
16331 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
16332 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
16333 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
16334 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
16335 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
16336 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
16337 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
16339 rinv11
= vec_add(rinv11
,krsq11
);
16340 rinv12
= vec_add(rinv12
,krsq12
);
16341 rinv13
= vec_add(rinv13
,krsq13
);
16342 rinv21
= vec_add(rinv21
,krsq21
);
16343 rinv22
= vec_add(rinv22
,krsq22
);
16344 rinv23
= vec_add(rinv23
,krsq23
);
16345 rinv31
= vec_add(rinv31
,krsq31
);
16346 rinv32
= vec_add(rinv32
,krsq32
);
16347 rinv33
= vec_add(rinv33
,krsq33
);
16349 rinv11
= vec_sub(rinv11
,vcrf
);
16350 rinv12
= vec_sub(rinv12
,vcrf
);
16351 rinv13
= vec_sub(rinv13
,vcrf
);
16352 rinv21
= vec_sub(rinv21
,vcrf
);
16353 rinv22
= vec_sub(rinv22
,vcrf
);
16354 rinv23
= vec_sub(rinv23
,vcrf
);
16355 rinv31
= vec_sub(rinv31
,vcrf
);
16356 rinv32
= vec_sub(rinv32
,vcrf
);
16357 rinv33
= vec_sub(rinv33
,vcrf
);
16359 vctot
= vec_madd(qqOO
,rinv11
,vctot
);
16360 vctot
= vec_madd(qqOH
,rinv12
,vctot
);
16361 vctot
= vec_madd(qqOH
,rinv13
,vctot
);
16362 vctot
= vec_madd(qqOH
,rinv21
,vctot
);
16363 vctot
= vec_madd(qqHH
,rinv22
,vctot
);
16364 vctot
= vec_madd(qqHH
,rinv23
,vctot
);
16365 vctot
= vec_madd(qqOH
,rinv31
,vctot
);
16366 vctot
= vec_madd(qqHH
,rinv32
,vctot
);
16367 vctot
= vec_madd(qqHH
,rinv33
,vctot
);
16376 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
16377 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16378 qqOOt
= vec_sld(qqOO
,nul
,4);
16379 qqOHt
= vec_sld(qqOH
,nul
,4);
16380 qqHHt
= vec_sld(qqHH
,nul
,4);
16382 dx11
= vec_sub(ix1
,jx1
);
16383 dx12
= vec_sub(ix1
,jx2
);
16384 dx13
= vec_sub(ix1
,jx3
);
16385 dy11
= vec_sub(iy1
,jy1
);
16386 dy12
= vec_sub(iy1
,jy2
);
16387 dy13
= vec_sub(iy1
,jy3
);
16388 dz11
= vec_sub(iz1
,jz1
);
16389 dz12
= vec_sub(iz1
,jz2
);
16390 dz13
= vec_sub(iz1
,jz3
);
16391 dx21
= vec_sub(ix2
,jx1
);
16392 dx22
= vec_sub(ix2
,jx2
);
16393 dx23
= vec_sub(ix2
,jx3
);
16394 dy21
= vec_sub(iy2
,jy1
);
16395 dy22
= vec_sub(iy2
,jy2
);
16396 dy23
= vec_sub(iy2
,jy3
);
16397 dz21
= vec_sub(iz2
,jz1
);
16398 dz22
= vec_sub(iz2
,jz2
);
16399 dz23
= vec_sub(iz2
,jz3
);
16400 dx31
= vec_sub(ix3
,jx1
);
16401 dx32
= vec_sub(ix3
,jx2
);
16402 dx33
= vec_sub(ix3
,jx3
);
16403 dy31
= vec_sub(iy3
,jy1
);
16404 dy32
= vec_sub(iy3
,jy2
);
16405 dy33
= vec_sub(iy3
,jy3
);
16406 dz31
= vec_sub(iz3
,jz1
);
16407 dz32
= vec_sub(iz3
,jz2
);
16408 dz33
= vec_sub(iz3
,jz3
);
16410 rsq11
= vec_madd(dx11
,dx11
,nul
);
16411 rsq12
= vec_madd(dx12
,dx12
,nul
);
16412 rsq13
= vec_madd(dx13
,dx13
,nul
);
16413 rsq21
= vec_madd(dx21
,dx21
,nul
);
16414 rsq22
= vec_madd(dx22
,dx22
,nul
);
16415 rsq23
= vec_madd(dx23
,dx23
,nul
);
16416 rsq31
= vec_madd(dx31
,dx31
,nul
);
16417 rsq32
= vec_madd(dx32
,dx32
,nul
);
16418 rsq33
= vec_madd(dx33
,dx33
,nul
);
16419 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16420 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16421 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16422 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16423 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16424 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16425 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16426 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16427 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16428 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16429 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16430 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16431 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16432 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16433 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16434 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16435 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16436 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16438 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
16439 &rsq21
,&rsq22
,&rsq23
,
16440 &rsq31
,&rsq32
,&rsq33
);
16442 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16445 &rinv11
,&rinv12
,&rinv13
,
16446 &rinv21
,&rinv22
,&rinv23
,
16447 &rinv31
,&rinv32
,&rinv33
);
16449 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
16450 &rinv21
,&rinv22
,&rinv23
,
16451 &rinv31
,&rinv32
,&rinv33
);
16453 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
16454 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
16455 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
16456 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
16457 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
16458 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
16459 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
16460 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
16461 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
16463 rinv11
= vec_add(rinv11
,krsq11
);
16464 rinv12
= vec_add(rinv12
,krsq12
);
16465 rinv13
= vec_add(rinv13
,krsq13
);
16466 rinv21
= vec_add(rinv21
,krsq21
);
16467 rinv22
= vec_add(rinv22
,krsq22
);
16468 rinv23
= vec_add(rinv23
,krsq23
);
16469 rinv31
= vec_add(rinv31
,krsq31
);
16470 rinv32
= vec_add(rinv32
,krsq32
);
16471 rinv33
= vec_add(rinv33
,krsq33
);
16473 rinv11
= vec_sub(rinv11
,vcrf
);
16474 rinv12
= vec_sub(rinv12
,vcrf
);
16475 rinv13
= vec_sub(rinv13
,vcrf
);
16476 rinv21
= vec_sub(rinv21
,vcrf
);
16477 rinv22
= vec_sub(rinv22
,vcrf
);
16478 rinv23
= vec_sub(rinv23
,vcrf
);
16479 rinv31
= vec_sub(rinv31
,vcrf
);
16480 rinv32
= vec_sub(rinv32
,vcrf
);
16481 rinv33
= vec_sub(rinv33
,vcrf
);
16483 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
16484 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
16485 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
16486 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
16487 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
16488 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
16489 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
16490 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
16491 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
16492 } else if(k
<(nj1
-1)) {
16497 load_2_water(pos
+j3a
,pos
+j3b
,
16498 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16499 qqOOt
= vec_sld(qqOO
,nul
,8);
16500 qqOHt
= vec_sld(qqOH
,nul
,8);
16501 qqHHt
= vec_sld(qqHH
,nul
,8);
16503 dx11
= vec_sub(ix1
,jx1
);
16504 dx12
= vec_sub(ix1
,jx2
);
16505 dx13
= vec_sub(ix1
,jx3
);
16506 dy11
= vec_sub(iy1
,jy1
);
16507 dy12
= vec_sub(iy1
,jy2
);
16508 dy13
= vec_sub(iy1
,jy3
);
16509 dz11
= vec_sub(iz1
,jz1
);
16510 dz12
= vec_sub(iz1
,jz2
);
16511 dz13
= vec_sub(iz1
,jz3
);
16512 dx21
= vec_sub(ix2
,jx1
);
16513 dx22
= vec_sub(ix2
,jx2
);
16514 dx23
= vec_sub(ix2
,jx3
);
16515 dy21
= vec_sub(iy2
,jy1
);
16516 dy22
= vec_sub(iy2
,jy2
);
16517 dy23
= vec_sub(iy2
,jy3
);
16518 dz21
= vec_sub(iz2
,jz1
);
16519 dz22
= vec_sub(iz2
,jz2
);
16520 dz23
= vec_sub(iz2
,jz3
);
16521 dx31
= vec_sub(ix3
,jx1
);
16522 dx32
= vec_sub(ix3
,jx2
);
16523 dx33
= vec_sub(ix3
,jx3
);
16524 dy31
= vec_sub(iy3
,jy1
);
16525 dy32
= vec_sub(iy3
,jy2
);
16526 dy33
= vec_sub(iy3
,jy3
);
16527 dz31
= vec_sub(iz3
,jz1
);
16528 dz32
= vec_sub(iz3
,jz2
);
16529 dz33
= vec_sub(iz3
,jz3
);
16531 rsq11
= vec_madd(dx11
,dx11
,nul
);
16532 rsq12
= vec_madd(dx12
,dx12
,nul
);
16533 rsq13
= vec_madd(dx13
,dx13
,nul
);
16534 rsq21
= vec_madd(dx21
,dx21
,nul
);
16535 rsq22
= vec_madd(dx22
,dx22
,nul
);
16536 rsq23
= vec_madd(dx23
,dx23
,nul
);
16537 rsq31
= vec_madd(dx31
,dx31
,nul
);
16538 rsq32
= vec_madd(dx32
,dx32
,nul
);
16539 rsq33
= vec_madd(dx33
,dx33
,nul
);
16540 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16541 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16542 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16543 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16544 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16545 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16546 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16547 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16548 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16549 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16550 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16551 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16552 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16553 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16554 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16555 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16556 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16557 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16559 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
16560 &rsq21
,&rsq22
,&rsq23
,
16561 &rsq31
,&rsq32
,&rsq33
);
16563 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16566 &rinv11
,&rinv12
,&rinv13
,
16567 &rinv21
,&rinv22
,&rinv23
,
16568 &rinv31
,&rinv32
,&rinv33
);
16570 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
16571 &rinv21
,&rinv22
,&rinv23
,
16572 &rinv31
,&rinv32
,&rinv33
);
16574 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
16575 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
16576 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
16577 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
16578 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
16579 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
16580 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
16581 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
16582 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
16584 rinv11
= vec_add(rinv11
,krsq11
);
16585 rinv12
= vec_add(rinv12
,krsq12
);
16586 rinv13
= vec_add(rinv13
,krsq13
);
16587 rinv21
= vec_add(rinv21
,krsq21
);
16588 rinv22
= vec_add(rinv22
,krsq22
);
16589 rinv23
= vec_add(rinv23
,krsq23
);
16590 rinv31
= vec_add(rinv31
,krsq31
);
16591 rinv32
= vec_add(rinv32
,krsq32
);
16592 rinv33
= vec_add(rinv33
,krsq33
);
16594 rinv11
= vec_sub(rinv11
,vcrf
);
16595 rinv12
= vec_sub(rinv12
,vcrf
);
16596 rinv13
= vec_sub(rinv13
,vcrf
);
16597 rinv21
= vec_sub(rinv21
,vcrf
);
16598 rinv22
= vec_sub(rinv22
,vcrf
);
16599 rinv23
= vec_sub(rinv23
,vcrf
);
16600 rinv31
= vec_sub(rinv31
,vcrf
);
16601 rinv32
= vec_sub(rinv32
,vcrf
);
16602 rinv33
= vec_sub(rinv33
,vcrf
);
16604 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
16605 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
16606 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
16607 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
16608 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
16609 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
16610 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
16611 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
16612 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
16616 load_1_water(pos
+j3a
,
16617 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16618 qqOOt
= vec_sld(qqOO
,nul
,12);
16619 qqOHt
= vec_sld(qqOH
,nul
,12);
16620 qqHHt
= vec_sld(qqHH
,nul
,12);
16622 dx11
= vec_sub(ix1
,jx1
);
16623 dx12
= vec_sub(ix1
,jx2
);
16624 dx13
= vec_sub(ix1
,jx3
);
16625 dy11
= vec_sub(iy1
,jy1
);
16626 dy12
= vec_sub(iy1
,jy2
);
16627 dy13
= vec_sub(iy1
,jy3
);
16628 dz11
= vec_sub(iz1
,jz1
);
16629 dz12
= vec_sub(iz1
,jz2
);
16630 dz13
= vec_sub(iz1
,jz3
);
16631 dx21
= vec_sub(ix2
,jx1
);
16632 dx22
= vec_sub(ix2
,jx2
);
16633 dx23
= vec_sub(ix2
,jx3
);
16634 dy21
= vec_sub(iy2
,jy1
);
16635 dy22
= vec_sub(iy2
,jy2
);
16636 dy23
= vec_sub(iy2
,jy3
);
16637 dz21
= vec_sub(iz2
,jz1
);
16638 dz22
= vec_sub(iz2
,jz2
);
16639 dz23
= vec_sub(iz2
,jz3
);
16640 dx31
= vec_sub(ix3
,jx1
);
16641 dx32
= vec_sub(ix3
,jx2
);
16642 dx33
= vec_sub(ix3
,jx3
);
16643 dy31
= vec_sub(iy3
,jy1
);
16644 dy32
= vec_sub(iy3
,jy2
);
16645 dy33
= vec_sub(iy3
,jy3
);
16646 dz31
= vec_sub(iz3
,jz1
);
16647 dz32
= vec_sub(iz3
,jz2
);
16648 dz33
= vec_sub(iz3
,jz3
);
16650 rsq11
= vec_madd(dx11
,dx11
,nul
);
16651 rsq12
= vec_madd(dx12
,dx12
,nul
);
16652 rsq13
= vec_madd(dx13
,dx13
,nul
);
16653 rsq21
= vec_madd(dx21
,dx21
,nul
);
16654 rsq22
= vec_madd(dx22
,dx22
,nul
);
16655 rsq23
= vec_madd(dx23
,dx23
,nul
);
16656 rsq31
= vec_madd(dx31
,dx31
,nul
);
16657 rsq32
= vec_madd(dx32
,dx32
,nul
);
16658 rsq33
= vec_madd(dx33
,dx33
,nul
);
16659 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16660 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16661 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16662 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16663 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16664 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16665 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16666 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16667 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16668 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16669 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16670 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16671 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16672 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16673 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16674 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16675 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16676 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16678 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
16679 &rsq21
,&rsq22
,&rsq23
,
16680 &rsq31
,&rsq32
,&rsq33
);
16682 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16685 &rinv11
,&rinv12
,&rinv13
,
16686 &rinv21
,&rinv22
,&rinv23
,
16687 &rinv31
,&rinv32
,&rinv33
);
16689 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
16690 &rinv21
,&rinv22
,&rinv23
,
16691 &rinv31
,&rinv32
,&rinv33
);
16693 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
16694 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
16695 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
16696 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
16697 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
16698 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
16699 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
16700 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
16701 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
16703 rinv11
= vec_add(rinv11
,krsq11
);
16704 rinv12
= vec_add(rinv12
,krsq12
);
16705 rinv13
= vec_add(rinv13
,krsq13
);
16706 rinv21
= vec_add(rinv21
,krsq21
);
16707 rinv22
= vec_add(rinv22
,krsq22
);
16708 rinv23
= vec_add(rinv23
,krsq23
);
16709 rinv31
= vec_add(rinv31
,krsq31
);
16710 rinv32
= vec_add(rinv32
,krsq32
);
16711 rinv33
= vec_add(rinv33
,krsq33
);
16713 rinv11
= vec_sub(rinv11
,vcrf
);
16714 rinv12
= vec_sub(rinv12
,vcrf
);
16715 rinv13
= vec_sub(rinv13
,vcrf
);
16716 rinv21
= vec_sub(rinv21
,vcrf
);
16717 rinv22
= vec_sub(rinv22
,vcrf
);
16718 rinv23
= vec_sub(rinv23
,vcrf
);
16719 rinv31
= vec_sub(rinv31
,vcrf
);
16720 rinv32
= vec_sub(rinv32
,vcrf
);
16721 rinv33
= vec_sub(rinv33
,vcrf
);
16723 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
16724 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
16725 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
16726 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
16727 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
16728 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
16729 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
16730 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
16731 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
16733 /* update outer data */
16734 add_vector_to_float(Vc
+gid
[n
],vctot
);
16740 void mcinl2130_altivec(
16759 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
16760 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
16762 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
16763 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
16764 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
16766 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
16767 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
16768 vector
float rinvsq11
,vkrf
,vcrf
;
16769 vector
float krsq11
,krsq12
,krsq13
,krsq21
,krsq22
,krsq23
,krsq31
,krsq32
,krsq33
;
16771 vector
float vfacel
,nul
;
16772 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
,rinvsix
;
16773 vector
float vnbtot
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
16775 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
16776 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
16777 int j3a
,j3b
,j3c
,j3d
;
16780 vfacel
=load_float_and_splat(&facel
);
16781 vkrf
=load_float_and_splat(&krf
);
16782 vcrf
=load_float_and_splat(&crf
);
16784 qO
= load_float_and_splat(charge
+ii
);
16785 qH
= load_float_and_splat(charge
+ii
+1);
16786 qqOO
= vec_madd(qO
,qO
,nul
);
16787 qqOH
= vec_madd(qO
,qH
,nul
);
16788 qqHH
= vec_madd(qH
,qH
,nul
);
16789 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
16790 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
16791 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
16794 load_1_pair(nbfp
+tj
,&c6
,&c12
);
16795 c6
= vec_splat(c6
,0);
16796 c12
= vec_splat(c12
,0);
16798 for(n
=0;n
<nri
;n
++) {
16802 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
16803 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
16809 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
16818 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
16819 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16820 dx11
= vec_sub(ix1
,jx1
);
16821 dx12
= vec_sub(ix1
,jx2
);
16822 dx13
= vec_sub(ix1
,jx3
);
16823 dy11
= vec_sub(iy1
,jy1
);
16824 dy12
= vec_sub(iy1
,jy2
);
16825 dy13
= vec_sub(iy1
,jy3
);
16826 dz11
= vec_sub(iz1
,jz1
);
16827 dz12
= vec_sub(iz1
,jz2
);
16828 dz13
= vec_sub(iz1
,jz3
);
16829 dx21
= vec_sub(ix2
,jx1
);
16830 dx22
= vec_sub(ix2
,jx2
);
16831 dx23
= vec_sub(ix2
,jx3
);
16832 dy21
= vec_sub(iy2
,jy1
);
16833 dy22
= vec_sub(iy2
,jy2
);
16834 dy23
= vec_sub(iy2
,jy3
);
16835 dz21
= vec_sub(iz2
,jz1
);
16836 dz22
= vec_sub(iz2
,jz2
);
16837 dz23
= vec_sub(iz2
,jz3
);
16838 dx31
= vec_sub(ix3
,jx1
);
16839 dx32
= vec_sub(ix3
,jx2
);
16840 dx33
= vec_sub(ix3
,jx3
);
16841 dy31
= vec_sub(iy3
,jy1
);
16842 dy32
= vec_sub(iy3
,jy2
);
16843 dy33
= vec_sub(iy3
,jy3
);
16844 dz31
= vec_sub(iz3
,jz1
);
16845 dz32
= vec_sub(iz3
,jz2
);
16846 dz33
= vec_sub(iz3
,jz3
);
16848 rsq11
= vec_madd(dx11
,dx11
,nul
);
16849 rsq12
= vec_madd(dx12
,dx12
,nul
);
16850 rsq13
= vec_madd(dx13
,dx13
,nul
);
16851 rsq21
= vec_madd(dx21
,dx21
,nul
);
16852 rsq22
= vec_madd(dx22
,dx22
,nul
);
16853 rsq23
= vec_madd(dx23
,dx23
,nul
);
16854 rsq31
= vec_madd(dx31
,dx31
,nul
);
16855 rsq32
= vec_madd(dx32
,dx32
,nul
);
16856 rsq33
= vec_madd(dx33
,dx33
,nul
);
16857 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16858 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16859 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16860 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16861 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16862 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16863 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16864 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16865 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16866 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16867 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16868 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16869 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16870 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16871 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16872 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16873 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16874 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
16876 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
16879 &rinv11
,&rinv12
,&rinv13
,
16880 &rinv21
,&rinv22
,&rinv23
,
16881 &rinv31
,&rinv32
,&rinv33
);
16883 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
16884 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
16885 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
16886 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
16887 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
16888 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
16889 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
16890 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
16891 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
16892 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
16894 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
16895 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
16896 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
16897 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
16899 rinv11
= vec_add(rinv11
,krsq11
);
16900 rinv12
= vec_add(rinv12
,krsq12
);
16901 rinv13
= vec_add(rinv13
,krsq13
);
16902 rinv21
= vec_add(rinv21
,krsq21
);
16903 rinv22
= vec_add(rinv22
,krsq22
);
16904 rinv23
= vec_add(rinv23
,krsq23
);
16905 rinv31
= vec_add(rinv31
,krsq31
);
16906 rinv32
= vec_add(rinv32
,krsq32
);
16907 rinv33
= vec_add(rinv33
,krsq33
);
16909 rinv11
= vec_sub(rinv11
,vcrf
);
16910 rinv12
= vec_sub(rinv12
,vcrf
);
16911 rinv13
= vec_sub(rinv13
,vcrf
);
16912 rinv21
= vec_sub(rinv21
,vcrf
);
16913 rinv22
= vec_sub(rinv22
,vcrf
);
16914 rinv23
= vec_sub(rinv23
,vcrf
);
16915 rinv31
= vec_sub(rinv31
,vcrf
);
16916 rinv32
= vec_sub(rinv32
,vcrf
);
16917 rinv33
= vec_sub(rinv33
,vcrf
);
16919 vctot
= vec_madd(qqOO
,rinv11
,vctot
);
16920 vctot
= vec_madd(qqOH
,rinv12
,vctot
);
16921 vctot
= vec_madd(qqOH
,rinv13
,vctot
);
16922 vctot
= vec_madd(qqOH
,rinv21
,vctot
);
16923 vctot
= vec_madd(qqHH
,rinv22
,vctot
);
16924 vctot
= vec_madd(qqHH
,rinv23
,vctot
);
16925 vctot
= vec_madd(qqOH
,rinv31
,vctot
);
16926 vctot
= vec_madd(qqHH
,rinv32
,vctot
);
16927 vctot
= vec_madd(qqHH
,rinv33
,vctot
);
16936 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
16937 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
16938 qqOOt
= vec_sld(qqOO
,nul
,4);
16939 qqOHt
= vec_sld(qqOH
,nul
,4);
16940 qqHHt
= vec_sld(qqHH
,nul
,4);
16941 c6t
= vec_sld(c6
,nul
,4);
16942 c12t
= vec_sld(c12
,nul
,4);
16944 dx11
= vec_sub(ix1
,jx1
);
16945 dx12
= vec_sub(ix1
,jx2
);
16946 dx13
= vec_sub(ix1
,jx3
);
16947 dy11
= vec_sub(iy1
,jy1
);
16948 dy12
= vec_sub(iy1
,jy2
);
16949 dy13
= vec_sub(iy1
,jy3
);
16950 dz11
= vec_sub(iz1
,jz1
);
16951 dz12
= vec_sub(iz1
,jz2
);
16952 dz13
= vec_sub(iz1
,jz3
);
16953 dx21
= vec_sub(ix2
,jx1
);
16954 dx22
= vec_sub(ix2
,jx2
);
16955 dx23
= vec_sub(ix2
,jx3
);
16956 dy21
= vec_sub(iy2
,jy1
);
16957 dy22
= vec_sub(iy2
,jy2
);
16958 dy23
= vec_sub(iy2
,jy3
);
16959 dz21
= vec_sub(iz2
,jz1
);
16960 dz22
= vec_sub(iz2
,jz2
);
16961 dz23
= vec_sub(iz2
,jz3
);
16962 dx31
= vec_sub(ix3
,jx1
);
16963 dx32
= vec_sub(ix3
,jx2
);
16964 dx33
= vec_sub(ix3
,jx3
);
16965 dy31
= vec_sub(iy3
,jy1
);
16966 dy32
= vec_sub(iy3
,jy2
);
16967 dy33
= vec_sub(iy3
,jy3
);
16968 dz31
= vec_sub(iz3
,jz1
);
16969 dz32
= vec_sub(iz3
,jz2
);
16970 dz33
= vec_sub(iz3
,jz3
);
16972 rsq11
= vec_madd(dx11
,dx11
,nul
);
16973 rsq12
= vec_madd(dx12
,dx12
,nul
);
16974 rsq13
= vec_madd(dx13
,dx13
,nul
);
16975 rsq21
= vec_madd(dx21
,dx21
,nul
);
16976 rsq22
= vec_madd(dx22
,dx22
,nul
);
16977 rsq23
= vec_madd(dx23
,dx23
,nul
);
16978 rsq31
= vec_madd(dx31
,dx31
,nul
);
16979 rsq32
= vec_madd(dx32
,dx32
,nul
);
16980 rsq33
= vec_madd(dx33
,dx33
,nul
);
16981 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
16982 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
16983 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
16984 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
16985 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
16986 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
16987 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
16988 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
16989 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
16990 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
16991 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
16992 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
16993 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
16994 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
16995 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
16996 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
16997 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
16998 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17000 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17001 &rsq21
,&rsq22
,&rsq23
,
17002 &rsq31
,&rsq32
,&rsq33
);
17004 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17007 &rinv11
,&rinv12
,&rinv13
,
17008 &rinv21
,&rinv22
,&rinv23
,
17009 &rinv31
,&rinv32
,&rinv33
);
17011 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17012 &rinv21
,&rinv22
,&rinv23
,
17013 &rinv31
,&rinv32
,&rinv33
);
17015 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
17016 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
17017 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
17018 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
17019 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
17020 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
17021 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
17022 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
17023 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
17024 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
17026 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
17027 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
17028 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
17029 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
17031 rinv11
= vec_add(rinv11
,krsq11
);
17032 rinv12
= vec_add(rinv12
,krsq12
);
17033 rinv13
= vec_add(rinv13
,krsq13
);
17034 rinv21
= vec_add(rinv21
,krsq21
);
17035 rinv22
= vec_add(rinv22
,krsq22
);
17036 rinv23
= vec_add(rinv23
,krsq23
);
17037 rinv31
= vec_add(rinv31
,krsq31
);
17038 rinv32
= vec_add(rinv32
,krsq32
);
17039 rinv33
= vec_add(rinv33
,krsq33
);
17041 rinv11
= vec_sub(rinv11
,vcrf
);
17042 rinv12
= vec_sub(rinv12
,vcrf
);
17043 rinv13
= vec_sub(rinv13
,vcrf
);
17044 rinv21
= vec_sub(rinv21
,vcrf
);
17045 rinv22
= vec_sub(rinv22
,vcrf
);
17046 rinv23
= vec_sub(rinv23
,vcrf
);
17047 rinv31
= vec_sub(rinv31
,vcrf
);
17048 rinv32
= vec_sub(rinv32
,vcrf
);
17049 rinv33
= vec_sub(rinv33
,vcrf
);
17051 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
17052 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
17053 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
17054 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
17055 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
17056 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
17057 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
17058 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
17059 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
17060 } else if(k
<(nj1
-1)) {
17065 load_2_water(pos
+j3a
,pos
+j3b
,
17066 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17067 qqOOt
= vec_sld(qqOO
,nul
,8);
17068 qqOHt
= vec_sld(qqOH
,nul
,8);
17069 qqHHt
= vec_sld(qqHH
,nul
,8);
17070 c6t
= vec_sld(c6
,nul
,8);
17071 c12t
= vec_sld(c12
,nul
,8);
17073 dx11
= vec_sub(ix1
,jx1
);
17074 dx12
= vec_sub(ix1
,jx2
);
17075 dx13
= vec_sub(ix1
,jx3
);
17076 dy11
= vec_sub(iy1
,jy1
);
17077 dy12
= vec_sub(iy1
,jy2
);
17078 dy13
= vec_sub(iy1
,jy3
);
17079 dz11
= vec_sub(iz1
,jz1
);
17080 dz12
= vec_sub(iz1
,jz2
);
17081 dz13
= vec_sub(iz1
,jz3
);
17082 dx21
= vec_sub(ix2
,jx1
);
17083 dx22
= vec_sub(ix2
,jx2
);
17084 dx23
= vec_sub(ix2
,jx3
);
17085 dy21
= vec_sub(iy2
,jy1
);
17086 dy22
= vec_sub(iy2
,jy2
);
17087 dy23
= vec_sub(iy2
,jy3
);
17088 dz21
= vec_sub(iz2
,jz1
);
17089 dz22
= vec_sub(iz2
,jz2
);
17090 dz23
= vec_sub(iz2
,jz3
);
17091 dx31
= vec_sub(ix3
,jx1
);
17092 dx32
= vec_sub(ix3
,jx2
);
17093 dx33
= vec_sub(ix3
,jx3
);
17094 dy31
= vec_sub(iy3
,jy1
);
17095 dy32
= vec_sub(iy3
,jy2
);
17096 dy33
= vec_sub(iy3
,jy3
);
17097 dz31
= vec_sub(iz3
,jz1
);
17098 dz32
= vec_sub(iz3
,jz2
);
17099 dz33
= vec_sub(iz3
,jz3
);
17101 rsq11
= vec_madd(dx11
,dx11
,nul
);
17102 rsq12
= vec_madd(dx12
,dx12
,nul
);
17103 rsq13
= vec_madd(dx13
,dx13
,nul
);
17104 rsq21
= vec_madd(dx21
,dx21
,nul
);
17105 rsq22
= vec_madd(dx22
,dx22
,nul
);
17106 rsq23
= vec_madd(dx23
,dx23
,nul
);
17107 rsq31
= vec_madd(dx31
,dx31
,nul
);
17108 rsq32
= vec_madd(dx32
,dx32
,nul
);
17109 rsq33
= vec_madd(dx33
,dx33
,nul
);
17110 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17111 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17112 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17113 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17114 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17115 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17116 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17117 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17118 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17119 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17120 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17121 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17122 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17123 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17124 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17125 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17126 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17127 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17129 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17130 &rsq21
,&rsq22
,&rsq23
,
17131 &rsq31
,&rsq32
,&rsq33
);
17133 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17136 &rinv11
,&rinv12
,&rinv13
,
17137 &rinv21
,&rinv22
,&rinv23
,
17138 &rinv31
,&rinv32
,&rinv33
);
17140 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17141 &rinv21
,&rinv22
,&rinv23
,
17142 &rinv31
,&rinv32
,&rinv33
);
17144 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
17145 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
17146 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
17147 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
17148 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
17149 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
17150 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
17151 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
17152 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
17153 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
17155 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
17156 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
17157 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
17158 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
17160 rinv11
= vec_add(rinv11
,krsq11
);
17161 rinv12
= vec_add(rinv12
,krsq12
);
17162 rinv13
= vec_add(rinv13
,krsq13
);
17163 rinv21
= vec_add(rinv21
,krsq21
);
17164 rinv22
= vec_add(rinv22
,krsq22
);
17165 rinv23
= vec_add(rinv23
,krsq23
);
17166 rinv31
= vec_add(rinv31
,krsq31
);
17167 rinv32
= vec_add(rinv32
,krsq32
);
17168 rinv33
= vec_add(rinv33
,krsq33
);
17170 rinv11
= vec_sub(rinv11
,vcrf
);
17171 rinv12
= vec_sub(rinv12
,vcrf
);
17172 rinv13
= vec_sub(rinv13
,vcrf
);
17173 rinv21
= vec_sub(rinv21
,vcrf
);
17174 rinv22
= vec_sub(rinv22
,vcrf
);
17175 rinv23
= vec_sub(rinv23
,vcrf
);
17176 rinv31
= vec_sub(rinv31
,vcrf
);
17177 rinv32
= vec_sub(rinv32
,vcrf
);
17178 rinv33
= vec_sub(rinv33
,vcrf
);
17180 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
17181 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
17182 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
17183 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
17184 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
17185 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
17186 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
17187 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
17188 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
17192 load_1_water(pos
+j3a
,
17193 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17194 qqOOt
= vec_sld(qqOO
,nul
,12);
17195 qqOHt
= vec_sld(qqOH
,nul
,12);
17196 qqHHt
= vec_sld(qqHH
,nul
,12);
17197 c6t
= vec_sld(c6
,nul
,12);
17198 c12t
= vec_sld(c12
,nul
,12);
17200 dx11
= vec_sub(ix1
,jx1
);
17201 dx12
= vec_sub(ix1
,jx2
);
17202 dx13
= vec_sub(ix1
,jx3
);
17203 dy11
= vec_sub(iy1
,jy1
);
17204 dy12
= vec_sub(iy1
,jy2
);
17205 dy13
= vec_sub(iy1
,jy3
);
17206 dz11
= vec_sub(iz1
,jz1
);
17207 dz12
= vec_sub(iz1
,jz2
);
17208 dz13
= vec_sub(iz1
,jz3
);
17209 dx21
= vec_sub(ix2
,jx1
);
17210 dx22
= vec_sub(ix2
,jx2
);
17211 dx23
= vec_sub(ix2
,jx3
);
17212 dy21
= vec_sub(iy2
,jy1
);
17213 dy22
= vec_sub(iy2
,jy2
);
17214 dy23
= vec_sub(iy2
,jy3
);
17215 dz21
= vec_sub(iz2
,jz1
);
17216 dz22
= vec_sub(iz2
,jz2
);
17217 dz23
= vec_sub(iz2
,jz3
);
17218 dx31
= vec_sub(ix3
,jx1
);
17219 dx32
= vec_sub(ix3
,jx2
);
17220 dx33
= vec_sub(ix3
,jx3
);
17221 dy31
= vec_sub(iy3
,jy1
);
17222 dy32
= vec_sub(iy3
,jy2
);
17223 dy33
= vec_sub(iy3
,jy3
);
17224 dz31
= vec_sub(iz3
,jz1
);
17225 dz32
= vec_sub(iz3
,jz2
);
17226 dz33
= vec_sub(iz3
,jz3
);
17228 rsq11
= vec_madd(dx11
,dx11
,nul
);
17229 rsq12
= vec_madd(dx12
,dx12
,nul
);
17230 rsq13
= vec_madd(dx13
,dx13
,nul
);
17231 rsq21
= vec_madd(dx21
,dx21
,nul
);
17232 rsq22
= vec_madd(dx22
,dx22
,nul
);
17233 rsq23
= vec_madd(dx23
,dx23
,nul
);
17234 rsq31
= vec_madd(dx31
,dx31
,nul
);
17235 rsq32
= vec_madd(dx32
,dx32
,nul
);
17236 rsq33
= vec_madd(dx33
,dx33
,nul
);
17237 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17238 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17239 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17240 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17241 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17242 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17243 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17244 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17245 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17246 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17247 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17248 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17249 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17250 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17251 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17252 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17253 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17254 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17256 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17257 &rsq21
,&rsq22
,&rsq23
,
17258 &rsq31
,&rsq32
,&rsq33
);
17260 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17263 &rinv11
,&rinv12
,&rinv13
,
17264 &rinv21
,&rinv22
,&rinv23
,
17265 &rinv31
,&rinv32
,&rinv33
);
17267 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17268 &rinv21
,&rinv22
,&rinv23
,
17269 &rinv31
,&rinv32
,&rinv33
);
17271 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
17272 krsq11
= vec_madd(vkrf
,rsq11
,nul
);
17273 krsq12
= vec_madd(vkrf
,rsq12
,nul
);
17274 krsq13
= vec_madd(vkrf
,rsq13
,nul
);
17275 krsq21
= vec_madd(vkrf
,rsq21
,nul
);
17276 krsq22
= vec_madd(vkrf
,rsq22
,nul
);
17277 krsq23
= vec_madd(vkrf
,rsq23
,nul
);
17278 krsq31
= vec_madd(vkrf
,rsq31
,nul
);
17279 krsq32
= vec_madd(vkrf
,rsq32
,nul
);
17280 krsq33
= vec_madd(vkrf
,rsq33
,nul
);
17282 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
17283 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
17284 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
17285 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
17287 rinv11
= vec_add(rinv11
,krsq11
);
17288 rinv12
= vec_add(rinv12
,krsq12
);
17289 rinv13
= vec_add(rinv13
,krsq13
);
17290 rinv21
= vec_add(rinv21
,krsq21
);
17291 rinv22
= vec_add(rinv22
,krsq22
);
17292 rinv23
= vec_add(rinv23
,krsq23
);
17293 rinv31
= vec_add(rinv31
,krsq31
);
17294 rinv32
= vec_add(rinv32
,krsq32
);
17295 rinv33
= vec_add(rinv33
,krsq33
);
17297 rinv11
= vec_sub(rinv11
,vcrf
);
17298 rinv12
= vec_sub(rinv12
,vcrf
);
17299 rinv13
= vec_sub(rinv13
,vcrf
);
17300 rinv21
= vec_sub(rinv21
,vcrf
);
17301 rinv22
= vec_sub(rinv22
,vcrf
);
17302 rinv23
= vec_sub(rinv23
,vcrf
);
17303 rinv31
= vec_sub(rinv31
,vcrf
);
17304 rinv32
= vec_sub(rinv32
,vcrf
);
17305 rinv33
= vec_sub(rinv33
,vcrf
);
17307 vctot
= vec_madd(qqOOt
,rinv11
,vctot
);
17308 vctot
= vec_madd(qqOHt
,rinv12
,vctot
);
17309 vctot
= vec_madd(qqOHt
,rinv13
,vctot
);
17310 vctot
= vec_madd(qqOHt
,rinv21
,vctot
);
17311 vctot
= vec_madd(qqHHt
,rinv22
,vctot
);
17312 vctot
= vec_madd(qqHHt
,rinv23
,vctot
);
17313 vctot
= vec_madd(qqOHt
,rinv31
,vctot
);
17314 vctot
= vec_madd(qqHHt
,rinv32
,vctot
);
17315 vctot
= vec_madd(qqHHt
,rinv33
,vctot
);
17317 /* update outer data */
17318 add_vector_to_float(Vc
+gid
[n
],vctot
);
17319 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
17325 void mcinl3030_altivec(
17340 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
17341 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
17343 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
17344 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
17345 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
17347 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
17348 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
17349 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
17351 vector
float vfacel
,nul
;
17352 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,tsc
;
17353 vector
float VV11c
,VV12c
,VV13c
;
17354 vector
float VV21c
,VV22c
,VV23c
;
17355 vector
float VV31c
,VV32c
,VV33c
;
17356 vector
float qqOOt
,qqOHt
,qqHHt
;
17358 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
17359 int jnra
,jnrb
,jnrc
,jnrd
;
17360 int j3a
,j3b
,j3c
,j3d
;
17363 vfacel
=load_float_and_splat(&facel
);
17364 tsc
=load_float_and_splat(&tabscale
);
17365 qO
= load_float_and_splat(charge
+iinr
[0]);
17366 qH
= load_float_and_splat(charge
+iinr
[0]+1);
17367 qqOO
= vec_madd(qO
,qO
,nul
);
17368 qqOH
= vec_madd(qO
,qH
,nul
);
17369 qqHH
= vec_madd(qH
,qH
,nul
);
17370 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
17371 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
17372 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
17374 for(n
=0;n
<nri
;n
++) {
17378 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
17379 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
17384 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
17393 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
17394 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17395 dx11
= vec_sub(ix1
,jx1
);
17396 dx12
= vec_sub(ix1
,jx2
);
17397 dx13
= vec_sub(ix1
,jx3
);
17398 dy11
= vec_sub(iy1
,jy1
);
17399 dy12
= vec_sub(iy1
,jy2
);
17400 dy13
= vec_sub(iy1
,jy3
);
17401 dz11
= vec_sub(iz1
,jz1
);
17402 dz12
= vec_sub(iz1
,jz2
);
17403 dz13
= vec_sub(iz1
,jz3
);
17404 dx21
= vec_sub(ix2
,jx1
);
17405 dx22
= vec_sub(ix2
,jx2
);
17406 dx23
= vec_sub(ix2
,jx3
);
17407 dy21
= vec_sub(iy2
,jy1
);
17408 dy22
= vec_sub(iy2
,jy2
);
17409 dy23
= vec_sub(iy2
,jy3
);
17410 dz21
= vec_sub(iz2
,jz1
);
17411 dz22
= vec_sub(iz2
,jz2
);
17412 dz23
= vec_sub(iz2
,jz3
);
17413 dx31
= vec_sub(ix3
,jx1
);
17414 dx32
= vec_sub(ix3
,jx2
);
17415 dx33
= vec_sub(ix3
,jx3
);
17416 dy31
= vec_sub(iy3
,jy1
);
17417 dy32
= vec_sub(iy3
,jy2
);
17418 dy33
= vec_sub(iy3
,jy3
);
17419 dz31
= vec_sub(iz3
,jz1
);
17420 dz32
= vec_sub(iz3
,jz2
);
17421 dz33
= vec_sub(iz3
,jz3
);
17423 rsq11
= vec_madd(dx11
,dx11
,nul
);
17424 rsq12
= vec_madd(dx12
,dx12
,nul
);
17425 rsq13
= vec_madd(dx13
,dx13
,nul
);
17426 rsq21
= vec_madd(dx21
,dx21
,nul
);
17427 rsq22
= vec_madd(dx22
,dx22
,nul
);
17428 rsq23
= vec_madd(dx23
,dx23
,nul
);
17429 rsq31
= vec_madd(dx31
,dx31
,nul
);
17430 rsq32
= vec_madd(dx32
,dx32
,nul
);
17431 rsq33
= vec_madd(dx33
,dx33
,nul
);
17432 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17433 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17434 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17435 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17436 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17437 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17438 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17439 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17440 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17441 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17442 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17443 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17444 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17445 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17446 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17447 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17448 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17449 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17451 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17454 &rinv11
,&rinv12
,&rinv13
,
17455 &rinv21
,&rinv22
,&rinv23
,
17456 &rinv31
,&rinv32
,&rinv33
);
17458 r11
= vec_madd(rsq11
,rinv11
,nul
);
17459 r12
= vec_madd(rsq12
,rinv12
,nul
);
17460 r13
= vec_madd(rsq13
,rinv13
,nul
);
17461 r21
= vec_madd(rsq21
,rinv21
,nul
);
17462 r22
= vec_madd(rsq22
,rinv22
,nul
);
17463 r23
= vec_madd(rsq23
,rinv23
,nul
);
17464 r31
= vec_madd(rsq31
,rinv31
,nul
);
17465 r32
= vec_madd(rsq32
,rinv32
,nul
);
17466 r33
= vec_madd(rsq33
,rinv33
,nul
);
17468 do_vonly_4_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
17469 do_vonly_4_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
17470 do_vonly_4_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
17471 do_vonly_4_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
17472 do_vonly_4_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
17473 do_vonly_4_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
17474 do_vonly_4_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
17475 do_vonly_4_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
17476 do_vonly_4_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
17478 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
17479 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
17480 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
17481 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
17482 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
17483 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
17484 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
17485 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
17486 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
17495 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
17496 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17497 qqOOt
= vec_sld(qqOO
,nul
,4);
17498 qqOHt
= vec_sld(qqOH
,nul
,4);
17499 qqHHt
= vec_sld(qqHH
,nul
,4);
17501 dx11
= vec_sub(ix1
,jx1
);
17502 dx12
= vec_sub(ix1
,jx2
);
17503 dx13
= vec_sub(ix1
,jx3
);
17504 dy11
= vec_sub(iy1
,jy1
);
17505 dy12
= vec_sub(iy1
,jy2
);
17506 dy13
= vec_sub(iy1
,jy3
);
17507 dz11
= vec_sub(iz1
,jz1
);
17508 dz12
= vec_sub(iz1
,jz2
);
17509 dz13
= vec_sub(iz1
,jz3
);
17510 dx21
= vec_sub(ix2
,jx1
);
17511 dx22
= vec_sub(ix2
,jx2
);
17512 dx23
= vec_sub(ix2
,jx3
);
17513 dy21
= vec_sub(iy2
,jy1
);
17514 dy22
= vec_sub(iy2
,jy2
);
17515 dy23
= vec_sub(iy2
,jy3
);
17516 dz21
= vec_sub(iz2
,jz1
);
17517 dz22
= vec_sub(iz2
,jz2
);
17518 dz23
= vec_sub(iz2
,jz3
);
17519 dx31
= vec_sub(ix3
,jx1
);
17520 dx32
= vec_sub(ix3
,jx2
);
17521 dx33
= vec_sub(ix3
,jx3
);
17522 dy31
= vec_sub(iy3
,jy1
);
17523 dy32
= vec_sub(iy3
,jy2
);
17524 dy33
= vec_sub(iy3
,jy3
);
17525 dz31
= vec_sub(iz3
,jz1
);
17526 dz32
= vec_sub(iz3
,jz2
);
17527 dz33
= vec_sub(iz3
,jz3
);
17529 rsq11
= vec_madd(dx11
,dx11
,nul
);
17530 rsq12
= vec_madd(dx12
,dx12
,nul
);
17531 rsq13
= vec_madd(dx13
,dx13
,nul
);
17532 rsq21
= vec_madd(dx21
,dx21
,nul
);
17533 rsq22
= vec_madd(dx22
,dx22
,nul
);
17534 rsq23
= vec_madd(dx23
,dx23
,nul
);
17535 rsq31
= vec_madd(dx31
,dx31
,nul
);
17536 rsq32
= vec_madd(dx32
,dx32
,nul
);
17537 rsq33
= vec_madd(dx33
,dx33
,nul
);
17538 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17539 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17540 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17541 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17542 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17543 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17544 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17545 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17546 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17547 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17548 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17549 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17550 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17551 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17552 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17553 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17554 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17555 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17557 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17558 &rsq21
,&rsq22
,&rsq23
,
17559 &rsq31
,&rsq32
,&rsq33
);
17561 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17564 &rinv11
,&rinv12
,&rinv13
,
17565 &rinv21
,&rinv22
,&rinv23
,
17566 &rinv31
,&rinv32
,&rinv33
);
17568 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17569 &rinv21
,&rinv22
,&rinv23
,
17570 &rinv31
,&rinv32
,&rinv33
);
17572 r11
= vec_madd(rsq11
,rinv11
,nul
);
17573 r12
= vec_madd(rsq12
,rinv12
,nul
);
17574 r13
= vec_madd(rsq13
,rinv13
,nul
);
17575 r21
= vec_madd(rsq21
,rinv21
,nul
);
17576 r22
= vec_madd(rsq22
,rinv22
,nul
);
17577 r23
= vec_madd(rsq23
,rinv23
,nul
);
17578 r31
= vec_madd(rsq31
,rinv31
,nul
);
17579 r32
= vec_madd(rsq32
,rinv32
,nul
);
17580 r33
= vec_madd(rsq33
,rinv33
,nul
);
17582 do_vonly_3_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
17583 do_vonly_3_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
17584 do_vonly_3_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
17585 do_vonly_3_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
17586 do_vonly_3_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
17587 do_vonly_3_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
17588 do_vonly_3_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
17589 do_vonly_3_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
17590 do_vonly_3_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
17592 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
17593 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
17594 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
17595 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
17596 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
17597 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
17598 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
17599 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
17600 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
17601 } else if(k
<(nj1
-1)) {
17606 load_2_water(pos
+j3a
,pos
+j3b
,
17607 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17608 qqOOt
= vec_sld(qqOO
,nul
,8);
17609 qqOHt
= vec_sld(qqOH
,nul
,8);
17610 qqHHt
= vec_sld(qqHH
,nul
,8);
17612 dx11
= vec_sub(ix1
,jx1
);
17613 dx12
= vec_sub(ix1
,jx2
);
17614 dx13
= vec_sub(ix1
,jx3
);
17615 dy11
= vec_sub(iy1
,jy1
);
17616 dy12
= vec_sub(iy1
,jy2
);
17617 dy13
= vec_sub(iy1
,jy3
);
17618 dz11
= vec_sub(iz1
,jz1
);
17619 dz12
= vec_sub(iz1
,jz2
);
17620 dz13
= vec_sub(iz1
,jz3
);
17621 dx21
= vec_sub(ix2
,jx1
);
17622 dx22
= vec_sub(ix2
,jx2
);
17623 dx23
= vec_sub(ix2
,jx3
);
17624 dy21
= vec_sub(iy2
,jy1
);
17625 dy22
= vec_sub(iy2
,jy2
);
17626 dy23
= vec_sub(iy2
,jy3
);
17627 dz21
= vec_sub(iz2
,jz1
);
17628 dz22
= vec_sub(iz2
,jz2
);
17629 dz23
= vec_sub(iz2
,jz3
);
17630 dx31
= vec_sub(ix3
,jx1
);
17631 dx32
= vec_sub(ix3
,jx2
);
17632 dx33
= vec_sub(ix3
,jx3
);
17633 dy31
= vec_sub(iy3
,jy1
);
17634 dy32
= vec_sub(iy3
,jy2
);
17635 dy33
= vec_sub(iy3
,jy3
);
17636 dz31
= vec_sub(iz3
,jz1
);
17637 dz32
= vec_sub(iz3
,jz2
);
17638 dz33
= vec_sub(iz3
,jz3
);
17640 rsq11
= vec_madd(dx11
,dx11
,nul
);
17641 rsq12
= vec_madd(dx12
,dx12
,nul
);
17642 rsq13
= vec_madd(dx13
,dx13
,nul
);
17643 rsq21
= vec_madd(dx21
,dx21
,nul
);
17644 rsq22
= vec_madd(dx22
,dx22
,nul
);
17645 rsq23
= vec_madd(dx23
,dx23
,nul
);
17646 rsq31
= vec_madd(dx31
,dx31
,nul
);
17647 rsq32
= vec_madd(dx32
,dx32
,nul
);
17648 rsq33
= vec_madd(dx33
,dx33
,nul
);
17649 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17650 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17651 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17652 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17653 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17654 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17655 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17656 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17657 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17658 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17659 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17660 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17661 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17662 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17663 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17664 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17665 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17666 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17668 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17669 &rsq21
,&rsq22
,&rsq23
,
17670 &rsq31
,&rsq32
,&rsq33
);
17672 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17675 &rinv11
,&rinv12
,&rinv13
,
17676 &rinv21
,&rinv22
,&rinv23
,
17677 &rinv31
,&rinv32
,&rinv33
);
17679 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17680 &rinv21
,&rinv22
,&rinv23
,
17681 &rinv31
,&rinv32
,&rinv33
);
17683 r11
= vec_madd(rsq11
,rinv11
,nul
);
17684 r12
= vec_madd(rsq12
,rinv12
,nul
);
17685 r13
= vec_madd(rsq13
,rinv13
,nul
);
17686 r21
= vec_madd(rsq21
,rinv21
,nul
);
17687 r22
= vec_madd(rsq22
,rinv22
,nul
);
17688 r23
= vec_madd(rsq23
,rinv23
,nul
);
17689 r31
= vec_madd(rsq31
,rinv31
,nul
);
17690 r32
= vec_madd(rsq32
,rinv32
,nul
);
17691 r33
= vec_madd(rsq33
,rinv33
,nul
);
17693 do_vonly_2_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
17694 do_vonly_2_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
17695 do_vonly_2_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
17696 do_vonly_2_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
17697 do_vonly_2_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
17698 do_vonly_2_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
17699 do_vonly_2_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
17700 do_vonly_2_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
17701 do_vonly_2_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
17703 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
17704 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
17705 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
17706 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
17707 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
17708 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
17709 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
17710 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
17711 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
17715 load_1_water(pos
+j3a
,
17716 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17717 qqOOt
= vec_sld(qqOO
,nul
,12);
17718 qqOHt
= vec_sld(qqOH
,nul
,12);
17719 qqHHt
= vec_sld(qqHH
,nul
,12);
17721 dx11
= vec_sub(ix1
,jx1
);
17722 dx12
= vec_sub(ix1
,jx2
);
17723 dx13
= vec_sub(ix1
,jx3
);
17724 dy11
= vec_sub(iy1
,jy1
);
17725 dy12
= vec_sub(iy1
,jy2
);
17726 dy13
= vec_sub(iy1
,jy3
);
17727 dz11
= vec_sub(iz1
,jz1
);
17728 dz12
= vec_sub(iz1
,jz2
);
17729 dz13
= vec_sub(iz1
,jz3
);
17730 dx21
= vec_sub(ix2
,jx1
);
17731 dx22
= vec_sub(ix2
,jx2
);
17732 dx23
= vec_sub(ix2
,jx3
);
17733 dy21
= vec_sub(iy2
,jy1
);
17734 dy22
= vec_sub(iy2
,jy2
);
17735 dy23
= vec_sub(iy2
,jy3
);
17736 dz21
= vec_sub(iz2
,jz1
);
17737 dz22
= vec_sub(iz2
,jz2
);
17738 dz23
= vec_sub(iz2
,jz3
);
17739 dx31
= vec_sub(ix3
,jx1
);
17740 dx32
= vec_sub(ix3
,jx2
);
17741 dx33
= vec_sub(ix3
,jx3
);
17742 dy31
= vec_sub(iy3
,jy1
);
17743 dy32
= vec_sub(iy3
,jy2
);
17744 dy33
= vec_sub(iy3
,jy3
);
17745 dz31
= vec_sub(iz3
,jz1
);
17746 dz32
= vec_sub(iz3
,jz2
);
17747 dz33
= vec_sub(iz3
,jz3
);
17749 rsq11
= vec_madd(dx11
,dx11
,nul
);
17750 rsq12
= vec_madd(dx12
,dx12
,nul
);
17751 rsq13
= vec_madd(dx13
,dx13
,nul
);
17752 rsq21
= vec_madd(dx21
,dx21
,nul
);
17753 rsq22
= vec_madd(dx22
,dx22
,nul
);
17754 rsq23
= vec_madd(dx23
,dx23
,nul
);
17755 rsq31
= vec_madd(dx31
,dx31
,nul
);
17756 rsq32
= vec_madd(dx32
,dx32
,nul
);
17757 rsq33
= vec_madd(dx33
,dx33
,nul
);
17758 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17759 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17760 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17761 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17762 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17763 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17764 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17765 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17766 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17767 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17768 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17769 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17770 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17771 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17772 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17773 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17774 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17775 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17777 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
17778 &rsq21
,&rsq22
,&rsq23
,
17779 &rsq31
,&rsq32
,&rsq33
);
17781 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17784 &rinv11
,&rinv12
,&rinv13
,
17785 &rinv21
,&rinv22
,&rinv23
,
17786 &rinv31
,&rinv32
,&rinv33
);
17788 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
17789 &rinv21
,&rinv22
,&rinv23
,
17790 &rinv31
,&rinv32
,&rinv33
);
17792 r11
= vec_madd(rsq11
,rinv11
,nul
);
17793 r12
= vec_madd(rsq12
,rinv12
,nul
);
17794 r13
= vec_madd(rsq13
,rinv13
,nul
);
17795 r21
= vec_madd(rsq21
,rinv21
,nul
);
17796 r22
= vec_madd(rsq22
,rinv22
,nul
);
17797 r23
= vec_madd(rsq23
,rinv23
,nul
);
17798 r31
= vec_madd(rsq31
,rinv31
,nul
);
17799 r32
= vec_madd(rsq32
,rinv32
,nul
);
17800 r33
= vec_madd(rsq33
,rinv33
,nul
);
17802 do_vonly_1_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
17803 do_vonly_1_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
17804 do_vonly_1_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
17805 do_vonly_1_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
17806 do_vonly_1_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
17807 do_vonly_1_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
17808 do_vonly_1_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
17809 do_vonly_1_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
17810 do_vonly_1_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
17812 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
17813 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
17814 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
17815 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
17816 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
17817 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
17818 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
17819 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
17820 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
17822 /* update outer data */
17823 add_vector_to_float(Vc
+gid
[n
],vctot
);
17829 void mcinl3130_altivec(
17848 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
17849 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
17851 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
17852 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
17853 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
17855 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
17856 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
17857 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
17858 vector
float rinvsq11
;
17859 vector
float vc11
,vc12
,vc13
,vc21
,vc22
,vc23
,vc31
,vc32
,vc33
,tsc
,VVc
;
17861 vector
float vfacel
,nul
;
17862 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
,rinvsix
;
17863 vector
float vnbtot
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
17864 vector
float VV11c
,VV12c
,VV13c
;
17865 vector
float VV21c
,VV22c
,VV23c
;
17866 vector
float VV31c
,VV32c
,VV33c
;
17868 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
17869 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
17870 int j3a
,j3b
,j3c
,j3d
;
17873 vfacel
=load_float_and_splat(&facel
);
17874 tsc
=load_float_and_splat(&tabscale
);
17876 qO
= load_float_and_splat(charge
+ii
);
17877 qH
= load_float_and_splat(charge
+ii
+1);
17878 qqOO
= vec_madd(qO
,qO
,nul
);
17879 qqOH
= vec_madd(qO
,qH
,nul
);
17880 qqHH
= vec_madd(qH
,qH
,nul
);
17881 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
17882 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
17883 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
17886 load_1_pair(nbfp
+tj
,&c6
,&c12
);
17887 c6
= vec_splat(c6
,0);
17888 c12
= vec_splat(c12
,0);
17890 for(n
=0;n
<nri
;n
++) {
17894 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
17895 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
17901 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
17910 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
17911 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
17912 dx11
= vec_sub(ix1
,jx1
);
17913 dx12
= vec_sub(ix1
,jx2
);
17914 dx13
= vec_sub(ix1
,jx3
);
17915 dy11
= vec_sub(iy1
,jy1
);
17916 dy12
= vec_sub(iy1
,jy2
);
17917 dy13
= vec_sub(iy1
,jy3
);
17918 dz11
= vec_sub(iz1
,jz1
);
17919 dz12
= vec_sub(iz1
,jz2
);
17920 dz13
= vec_sub(iz1
,jz3
);
17921 dx21
= vec_sub(ix2
,jx1
);
17922 dx22
= vec_sub(ix2
,jx2
);
17923 dx23
= vec_sub(ix2
,jx3
);
17924 dy21
= vec_sub(iy2
,jy1
);
17925 dy22
= vec_sub(iy2
,jy2
);
17926 dy23
= vec_sub(iy2
,jy3
);
17927 dz21
= vec_sub(iz2
,jz1
);
17928 dz22
= vec_sub(iz2
,jz2
);
17929 dz23
= vec_sub(iz2
,jz3
);
17930 dx31
= vec_sub(ix3
,jx1
);
17931 dx32
= vec_sub(ix3
,jx2
);
17932 dx33
= vec_sub(ix3
,jx3
);
17933 dy31
= vec_sub(iy3
,jy1
);
17934 dy32
= vec_sub(iy3
,jy2
);
17935 dy33
= vec_sub(iy3
,jy3
);
17936 dz31
= vec_sub(iz3
,jz1
);
17937 dz32
= vec_sub(iz3
,jz2
);
17938 dz33
= vec_sub(iz3
,jz3
);
17940 rsq11
= vec_madd(dx11
,dx11
,nul
);
17941 rsq12
= vec_madd(dx12
,dx12
,nul
);
17942 rsq13
= vec_madd(dx13
,dx13
,nul
);
17943 rsq21
= vec_madd(dx21
,dx21
,nul
);
17944 rsq22
= vec_madd(dx22
,dx22
,nul
);
17945 rsq23
= vec_madd(dx23
,dx23
,nul
);
17946 rsq31
= vec_madd(dx31
,dx31
,nul
);
17947 rsq32
= vec_madd(dx32
,dx32
,nul
);
17948 rsq33
= vec_madd(dx33
,dx33
,nul
);
17949 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
17950 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
17951 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
17952 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
17953 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
17954 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
17955 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
17956 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
17957 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
17958 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
17959 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
17960 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
17961 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
17962 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
17963 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
17964 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
17965 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
17966 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
17968 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
17971 &rinv11
,&rinv12
,&rinv13
,
17972 &rinv21
,&rinv22
,&rinv23
,
17973 &rinv31
,&rinv32
,&rinv33
);
17975 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
17976 r11
= vec_madd(rsq11
,rinv11
,nul
);
17977 r12
= vec_madd(rsq12
,rinv12
,nul
);
17978 r13
= vec_madd(rsq13
,rinv13
,nul
);
17979 r21
= vec_madd(rsq21
,rinv21
,nul
);
17980 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
17981 r22
= vec_madd(rsq22
,rinv22
,nul
);
17982 r23
= vec_madd(rsq23
,rinv23
,nul
);
17983 r31
= vec_madd(rsq31
,rinv31
,nul
);
17984 r32
= vec_madd(rsq32
,rinv32
,nul
);
17985 r33
= vec_madd(rsq33
,rinv33
,nul
);
17986 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
17988 do_vonly_4_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
17989 do_vonly_4_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
17990 do_vonly_4_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
17991 do_vonly_4_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
17992 do_vonly_4_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
17993 do_vonly_4_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
17994 do_vonly_4_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
17995 do_vonly_4_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
17996 do_vonly_4_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
17998 vnbtot
= vec_nmsub(c6
,rinvsix
,vnbtot
);
17999 vnbtot
= vec_madd(c12
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
18000 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
18001 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
18002 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
18003 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
18004 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
18005 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
18006 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
18007 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
18008 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
18017 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
18018 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18019 qqOOt
= vec_sld(qqOO
,nul
,4);
18020 qqOHt
= vec_sld(qqOH
,nul
,4);
18021 qqHHt
= vec_sld(qqHH
,nul
,4);
18022 c6t
= vec_sld(c6
,nul
,4);
18023 c12t
= vec_sld(c12
,nul
,4);
18025 dx11
= vec_sub(ix1
,jx1
);
18026 dx12
= vec_sub(ix1
,jx2
);
18027 dx13
= vec_sub(ix1
,jx3
);
18028 dy11
= vec_sub(iy1
,jy1
);
18029 dy12
= vec_sub(iy1
,jy2
);
18030 dy13
= vec_sub(iy1
,jy3
);
18031 dz11
= vec_sub(iz1
,jz1
);
18032 dz12
= vec_sub(iz1
,jz2
);
18033 dz13
= vec_sub(iz1
,jz3
);
18034 dx21
= vec_sub(ix2
,jx1
);
18035 dx22
= vec_sub(ix2
,jx2
);
18036 dx23
= vec_sub(ix2
,jx3
);
18037 dy21
= vec_sub(iy2
,jy1
);
18038 dy22
= vec_sub(iy2
,jy2
);
18039 dy23
= vec_sub(iy2
,jy3
);
18040 dz21
= vec_sub(iz2
,jz1
);
18041 dz22
= vec_sub(iz2
,jz2
);
18042 dz23
= vec_sub(iz2
,jz3
);
18043 dx31
= vec_sub(ix3
,jx1
);
18044 dx32
= vec_sub(ix3
,jx2
);
18045 dx33
= vec_sub(ix3
,jx3
);
18046 dy31
= vec_sub(iy3
,jy1
);
18047 dy32
= vec_sub(iy3
,jy2
);
18048 dy33
= vec_sub(iy3
,jy3
);
18049 dz31
= vec_sub(iz3
,jz1
);
18050 dz32
= vec_sub(iz3
,jz2
);
18051 dz33
= vec_sub(iz3
,jz3
);
18053 rsq11
= vec_madd(dx11
,dx11
,nul
);
18054 rsq12
= vec_madd(dx12
,dx12
,nul
);
18055 rsq13
= vec_madd(dx13
,dx13
,nul
);
18056 rsq21
= vec_madd(dx21
,dx21
,nul
);
18057 rsq22
= vec_madd(dx22
,dx22
,nul
);
18058 rsq23
= vec_madd(dx23
,dx23
,nul
);
18059 rsq31
= vec_madd(dx31
,dx31
,nul
);
18060 rsq32
= vec_madd(dx32
,dx32
,nul
);
18061 rsq33
= vec_madd(dx33
,dx33
,nul
);
18062 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18063 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18064 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18065 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18066 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18067 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18068 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18069 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18070 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18071 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18072 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18073 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18074 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18075 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18076 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18077 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18078 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18079 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18081 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18082 &rsq21
,&rsq22
,&rsq23
,
18083 &rsq31
,&rsq32
,&rsq33
);
18085 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18088 &rinv11
,&rinv12
,&rinv13
,
18089 &rinv21
,&rinv22
,&rinv23
,
18090 &rinv31
,&rinv32
,&rinv33
);
18092 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18093 &rinv21
,&rinv22
,&rinv23
,
18094 &rinv31
,&rinv32
,&rinv33
);
18096 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
18097 r11
= vec_madd(rsq11
,rinv11
,nul
);
18098 r12
= vec_madd(rsq12
,rinv12
,nul
);
18099 r13
= vec_madd(rsq13
,rinv13
,nul
);
18100 r21
= vec_madd(rsq21
,rinv21
,nul
);
18101 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
18102 r22
= vec_madd(rsq22
,rinv22
,nul
);
18103 r23
= vec_madd(rsq23
,rinv23
,nul
);
18104 r31
= vec_madd(rsq31
,rinv31
,nul
);
18105 r32
= vec_madd(rsq32
,rinv32
,nul
);
18106 r33
= vec_madd(rsq33
,rinv33
,nul
);
18107 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
18109 do_vonly_3_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
18110 do_vonly_3_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18111 do_vonly_3_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18112 do_vonly_3_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18113 do_vonly_3_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18114 do_vonly_3_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18115 do_vonly_3_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18116 do_vonly_3_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18117 do_vonly_3_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18119 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
18120 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
18121 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18122 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18123 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18124 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18125 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18126 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18127 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18128 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18129 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18130 } else if(k
<(nj1
-1)) {
18135 load_2_water(pos
+j3a
,pos
+j3b
,
18136 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18137 qqOOt
= vec_sld(qqOO
,nul
,8);
18138 qqOHt
= vec_sld(qqOH
,nul
,8);
18139 qqHHt
= vec_sld(qqHH
,nul
,8);
18140 c6t
= vec_sld(c6
,nul
,8);
18141 c12t
= vec_sld(c12
,nul
,8);
18143 dx11
= vec_sub(ix1
,jx1
);
18144 dx12
= vec_sub(ix1
,jx2
);
18145 dx13
= vec_sub(ix1
,jx3
);
18146 dy11
= vec_sub(iy1
,jy1
);
18147 dy12
= vec_sub(iy1
,jy2
);
18148 dy13
= vec_sub(iy1
,jy3
);
18149 dz11
= vec_sub(iz1
,jz1
);
18150 dz12
= vec_sub(iz1
,jz2
);
18151 dz13
= vec_sub(iz1
,jz3
);
18152 dx21
= vec_sub(ix2
,jx1
);
18153 dx22
= vec_sub(ix2
,jx2
);
18154 dx23
= vec_sub(ix2
,jx3
);
18155 dy21
= vec_sub(iy2
,jy1
);
18156 dy22
= vec_sub(iy2
,jy2
);
18157 dy23
= vec_sub(iy2
,jy3
);
18158 dz21
= vec_sub(iz2
,jz1
);
18159 dz22
= vec_sub(iz2
,jz2
);
18160 dz23
= vec_sub(iz2
,jz3
);
18161 dx31
= vec_sub(ix3
,jx1
);
18162 dx32
= vec_sub(ix3
,jx2
);
18163 dx33
= vec_sub(ix3
,jx3
);
18164 dy31
= vec_sub(iy3
,jy1
);
18165 dy32
= vec_sub(iy3
,jy2
);
18166 dy33
= vec_sub(iy3
,jy3
);
18167 dz31
= vec_sub(iz3
,jz1
);
18168 dz32
= vec_sub(iz3
,jz2
);
18169 dz33
= vec_sub(iz3
,jz3
);
18171 rsq11
= vec_madd(dx11
,dx11
,nul
);
18172 rsq12
= vec_madd(dx12
,dx12
,nul
);
18173 rsq13
= vec_madd(dx13
,dx13
,nul
);
18174 rsq21
= vec_madd(dx21
,dx21
,nul
);
18175 rsq22
= vec_madd(dx22
,dx22
,nul
);
18176 rsq23
= vec_madd(dx23
,dx23
,nul
);
18177 rsq31
= vec_madd(dx31
,dx31
,nul
);
18178 rsq32
= vec_madd(dx32
,dx32
,nul
);
18179 rsq33
= vec_madd(dx33
,dx33
,nul
);
18180 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18181 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18182 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18183 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18184 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18185 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18186 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18187 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18188 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18189 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18190 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18191 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18192 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18193 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18194 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18195 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18196 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18197 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18199 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18200 &rsq21
,&rsq22
,&rsq23
,
18201 &rsq31
,&rsq32
,&rsq33
);
18203 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18206 &rinv11
,&rinv12
,&rinv13
,
18207 &rinv21
,&rinv22
,&rinv23
,
18208 &rinv31
,&rinv32
,&rinv33
);
18210 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18211 &rinv21
,&rinv22
,&rinv23
,
18212 &rinv31
,&rinv32
,&rinv33
);
18214 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
18215 r11
= vec_madd(rsq11
,rinv11
,nul
);
18216 r12
= vec_madd(rsq12
,rinv12
,nul
);
18217 r13
= vec_madd(rsq13
,rinv13
,nul
);
18218 r21
= vec_madd(rsq21
,rinv21
,nul
);
18219 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
18220 r22
= vec_madd(rsq22
,rinv22
,nul
);
18221 r23
= vec_madd(rsq23
,rinv23
,nul
);
18222 r31
= vec_madd(rsq31
,rinv31
,nul
);
18223 r32
= vec_madd(rsq32
,rinv32
,nul
);
18224 r33
= vec_madd(rsq33
,rinv33
,nul
);
18225 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
18227 do_vonly_2_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
18228 do_vonly_2_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18229 do_vonly_2_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18230 do_vonly_2_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18231 do_vonly_2_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18232 do_vonly_2_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18233 do_vonly_2_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18234 do_vonly_2_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18235 do_vonly_2_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18237 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
18238 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
18239 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18240 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18241 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18242 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18243 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18244 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18245 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18246 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18247 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18251 load_1_water(pos
+j3a
,
18252 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18253 qqOOt
= vec_sld(qqOO
,nul
,12);
18254 qqOHt
= vec_sld(qqOH
,nul
,12);
18255 qqHHt
= vec_sld(qqHH
,nul
,12);
18256 c6t
= vec_sld(c6
,nul
,12);
18257 c12t
= vec_sld(c12
,nul
,12);
18259 dx11
= vec_sub(ix1
,jx1
);
18260 dx12
= vec_sub(ix1
,jx2
);
18261 dx13
= vec_sub(ix1
,jx3
);
18262 dy11
= vec_sub(iy1
,jy1
);
18263 dy12
= vec_sub(iy1
,jy2
);
18264 dy13
= vec_sub(iy1
,jy3
);
18265 dz11
= vec_sub(iz1
,jz1
);
18266 dz12
= vec_sub(iz1
,jz2
);
18267 dz13
= vec_sub(iz1
,jz3
);
18268 dx21
= vec_sub(ix2
,jx1
);
18269 dx22
= vec_sub(ix2
,jx2
);
18270 dx23
= vec_sub(ix2
,jx3
);
18271 dy21
= vec_sub(iy2
,jy1
);
18272 dy22
= vec_sub(iy2
,jy2
);
18273 dy23
= vec_sub(iy2
,jy3
);
18274 dz21
= vec_sub(iz2
,jz1
);
18275 dz22
= vec_sub(iz2
,jz2
);
18276 dz23
= vec_sub(iz2
,jz3
);
18277 dx31
= vec_sub(ix3
,jx1
);
18278 dx32
= vec_sub(ix3
,jx2
);
18279 dx33
= vec_sub(ix3
,jx3
);
18280 dy31
= vec_sub(iy3
,jy1
);
18281 dy32
= vec_sub(iy3
,jy2
);
18282 dy33
= vec_sub(iy3
,jy3
);
18283 dz31
= vec_sub(iz3
,jz1
);
18284 dz32
= vec_sub(iz3
,jz2
);
18285 dz33
= vec_sub(iz3
,jz3
);
18287 rsq11
= vec_madd(dx11
,dx11
,nul
);
18288 rsq12
= vec_madd(dx12
,dx12
,nul
);
18289 rsq13
= vec_madd(dx13
,dx13
,nul
);
18290 rsq21
= vec_madd(dx21
,dx21
,nul
);
18291 rsq22
= vec_madd(dx22
,dx22
,nul
);
18292 rsq23
= vec_madd(dx23
,dx23
,nul
);
18293 rsq31
= vec_madd(dx31
,dx31
,nul
);
18294 rsq32
= vec_madd(dx32
,dx32
,nul
);
18295 rsq33
= vec_madd(dx33
,dx33
,nul
);
18296 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18297 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18298 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18299 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18300 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18301 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18302 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18303 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18304 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18305 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18306 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18307 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18308 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18309 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18310 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18311 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18312 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18313 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18315 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18316 &rsq21
,&rsq22
,&rsq23
,
18317 &rsq31
,&rsq32
,&rsq33
);
18319 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18322 &rinv11
,&rinv12
,&rinv13
,
18323 &rinv21
,&rinv22
,&rinv23
,
18324 &rinv31
,&rinv32
,&rinv33
);
18326 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18327 &rinv21
,&rinv22
,&rinv23
,
18328 &rinv31
,&rinv32
,&rinv33
);
18330 rinvsq11
= vec_madd(rinv11
,rinv11
,nul
);
18331 r11
= vec_madd(rsq11
,rinv11
,nul
);
18332 r12
= vec_madd(rsq12
,rinv12
,nul
);
18333 r13
= vec_madd(rsq13
,rinv13
,nul
);
18334 r21
= vec_madd(rsq21
,rinv21
,nul
);
18335 rinvsix
= vec_madd(rinvsq11
,rinvsq11
,nul
);
18336 r22
= vec_madd(rsq22
,rinv22
,nul
);
18337 r23
= vec_madd(rsq23
,rinv23
,nul
);
18338 r31
= vec_madd(rsq31
,rinv31
,nul
);
18339 r32
= vec_madd(rsq32
,rinv32
,nul
);
18340 r33
= vec_madd(rsq33
,rinv33
,nul
);
18341 rinvsix
= vec_madd(rinvsix
,rinvsq11
,nul
);
18343 do_vonly_1_ctable_coul(VFtab
,vec_madd(r11
,tsc
,nul
),&VV11c
);
18344 do_vonly_1_ctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18345 do_vonly_1_ctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18346 do_vonly_1_ctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18347 do_vonly_1_ctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18348 do_vonly_1_ctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18349 do_vonly_1_ctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18350 do_vonly_1_ctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18351 do_vonly_1_ctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18353 vnbtot
= vec_nmsub(c6t
,rinvsix
,vnbtot
);
18354 vnbtot
= vec_madd(c12t
,vec_madd(rinvsix
,rinvsix
,nul
),vnbtot
);
18355 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18356 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18357 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18358 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18359 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18360 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18361 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18362 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18363 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18365 /* update outer data */
18366 add_vector_to_float(Vc
+gid
[n
],vctot
);
18367 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);
18372 void mcinl3330_altivec(
18391 vector
float ix1
,iy1
,iz1
,ix2
,iy2
,iz2
,ix3
,iy3
,iz3
;
18392 vector
float jx1
,jy1
,jz1
,jx2
,jy2
,jz2
,jx3
,jy3
,jz3
;
18394 vector
float dx11
,dy11
,dz11
,dx12
,dy12
,dz12
,dx13
,dy13
,dz13
;
18395 vector
float dx21
,dy21
,dz21
,dx22
,dy22
,dz22
,dx23
,dy23
,dz23
;
18396 vector
float dx31
,dy31
,dz31
,dx32
,dy32
,dz32
,dx33
,dy33
,dz33
;
18398 vector
float rsq11
,rsq12
,rsq13
,rsq21
,rsq22
,rsq23
,rsq31
,rsq32
,rsq33
;
18399 vector
float r11
,r12
,r13
,r21
,r22
,r23
,r31
,r32
,r33
;
18400 vector
float rinv11
,rinv12
,rinv13
,rinv21
,rinv22
,rinv23
,rinv31
,rinv32
,rinv33
;
18402 vector
float vfacel
,nul
;
18403 vector
float vctot
,qqOO
,qqOH
,qqHH
,qO
,qH
,c6
,c12
;
18404 vector
float vnbtot
,tsc
,qqOOt
,qqOHt
,qqHHt
,c6t
,c12t
;
18405 vector
float VV11c
,VV12c
,VV13c
;
18406 vector
float VV21c
,VV22c
,VV23c
;
18407 vector
float VV31c
,VV32c
,VV33c
;
18408 vector
float VVd
,VVr
;
18410 int n
,k
,k0
,ii
,is3
,ii3
,ntiA
,nj0
,nj1
;
18411 int jnra
,jnrb
,jnrc
,jnrd
,tp
,tj
;
18412 int j3a
,j3b
,j3c
,j3d
;
18415 vfacel
=load_float_and_splat(&facel
);
18416 tsc
=load_float_and_splat(&tabscale
);
18418 qO
= load_float_and_splat(charge
+ii
);
18419 qH
= load_float_and_splat(charge
+ii
+1);
18420 qqOO
= vec_madd(qO
,qO
,nul
);
18421 qqOH
= vec_madd(qO
,qH
,nul
);
18422 qqHH
= vec_madd(qH
,qH
,nul
);
18423 qqOO
= vec_madd(qqOO
,vfacel
,nul
);
18424 qqOH
= vec_madd(qqOH
,vfacel
,nul
);
18425 qqHH
= vec_madd(qqHH
,vfacel
,nul
);
18428 load_1_pair(nbfp
+tj
,&c6
,&c12
);
18429 c6
= vec_splat(c6
,0);
18430 c12
= vec_splat(c12
,0);
18432 for(n
=0;n
<nri
;n
++) {
18436 load_1_water_shift_and_splat(pos
+ii3
,shiftvec
+is3
,&ix1
,&iy1
,&iz1
,
18437 &ix2
,&iy2
,&iz2
,&ix3
,&iy3
,&iz3
);
18443 for(k
=nj0
; k
<(nj1
-3); k
+=4) {
18452 load_4_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,pos
+j3d
,
18453 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18454 dx11
= vec_sub(ix1
,jx1
);
18455 dx12
= vec_sub(ix1
,jx2
);
18456 dx13
= vec_sub(ix1
,jx3
);
18457 dy11
= vec_sub(iy1
,jy1
);
18458 dy12
= vec_sub(iy1
,jy2
);
18459 dy13
= vec_sub(iy1
,jy3
);
18460 dz11
= vec_sub(iz1
,jz1
);
18461 dz12
= vec_sub(iz1
,jz2
);
18462 dz13
= vec_sub(iz1
,jz3
);
18463 dx21
= vec_sub(ix2
,jx1
);
18464 dx22
= vec_sub(ix2
,jx2
);
18465 dx23
= vec_sub(ix2
,jx3
);
18466 dy21
= vec_sub(iy2
,jy1
);
18467 dy22
= vec_sub(iy2
,jy2
);
18468 dy23
= vec_sub(iy2
,jy3
);
18469 dz21
= vec_sub(iz2
,jz1
);
18470 dz22
= vec_sub(iz2
,jz2
);
18471 dz23
= vec_sub(iz2
,jz3
);
18472 dx31
= vec_sub(ix3
,jx1
);
18473 dx32
= vec_sub(ix3
,jx2
);
18474 dx33
= vec_sub(ix3
,jx3
);
18475 dy31
= vec_sub(iy3
,jy1
);
18476 dy32
= vec_sub(iy3
,jy2
);
18477 dy33
= vec_sub(iy3
,jy3
);
18478 dz31
= vec_sub(iz3
,jz1
);
18479 dz32
= vec_sub(iz3
,jz2
);
18480 dz33
= vec_sub(iz3
,jz3
);
18482 rsq11
= vec_madd(dx11
,dx11
,nul
);
18483 rsq12
= vec_madd(dx12
,dx12
,nul
);
18484 rsq13
= vec_madd(dx13
,dx13
,nul
);
18485 rsq21
= vec_madd(dx21
,dx21
,nul
);
18486 rsq22
= vec_madd(dx22
,dx22
,nul
);
18487 rsq23
= vec_madd(dx23
,dx23
,nul
);
18488 rsq31
= vec_madd(dx31
,dx31
,nul
);
18489 rsq32
= vec_madd(dx32
,dx32
,nul
);
18490 rsq33
= vec_madd(dx33
,dx33
,nul
);
18491 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18492 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18493 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18494 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18495 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18496 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18497 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18498 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18499 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18500 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18501 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18502 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18503 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18504 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18505 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18506 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18507 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18508 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18510 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18513 &rinv11
,&rinv12
,&rinv13
,
18514 &rinv21
,&rinv22
,&rinv23
,
18515 &rinv31
,&rinv32
,&rinv33
);
18517 r11
= vec_madd(rsq11
,rinv11
,nul
);
18518 r12
= vec_madd(rsq12
,rinv12
,nul
);
18519 r13
= vec_madd(rsq13
,rinv13
,nul
);
18520 r21
= vec_madd(rsq21
,rinv21
,nul
);
18521 r22
= vec_madd(rsq22
,rinv22
,nul
);
18522 r23
= vec_madd(rsq23
,rinv23
,nul
);
18523 r31
= vec_madd(rsq31
,rinv31
,nul
);
18524 r32
= vec_madd(rsq32
,rinv32
,nul
);
18525 r33
= vec_madd(rsq33
,rinv33
,nul
);
18527 do_vonly_4_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
18529 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18530 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18531 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18532 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18533 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18534 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18535 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18536 do_vonly_4_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18538 vnbtot
= vec_madd(c6
,VVd
,vnbtot
);
18539 vnbtot
= vec_madd(c12
,VVr
,vnbtot
);
18540 vctot
= vec_madd(qqOO
,VV11c
,vctot
);
18541 vctot
= vec_madd(qqOH
,VV12c
,vctot
);
18542 vctot
= vec_madd(qqOH
,VV13c
,vctot
);
18543 vctot
= vec_madd(qqOH
,VV21c
,vctot
);
18544 vctot
= vec_madd(qqHH
,VV22c
,vctot
);
18545 vctot
= vec_madd(qqHH
,VV23c
,vctot
);
18546 vctot
= vec_madd(qqOH
,VV31c
,vctot
);
18547 vctot
= vec_madd(qqHH
,VV32c
,vctot
);
18548 vctot
= vec_madd(qqHH
,VV33c
,vctot
);
18557 load_3_water(pos
+j3a
,pos
+j3b
,pos
+j3c
,
18558 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18559 qqOOt
= vec_sld(qqOO
,nul
,4);
18560 qqOHt
= vec_sld(qqOH
,nul
,4);
18561 qqHHt
= vec_sld(qqHH
,nul
,4);
18562 c6t
= vec_sld(c6
,nul
,4);
18563 c12t
= vec_sld(c12
,nul
,4);
18565 dx11
= vec_sub(ix1
,jx1
);
18566 dx12
= vec_sub(ix1
,jx2
);
18567 dx13
= vec_sub(ix1
,jx3
);
18568 dy11
= vec_sub(iy1
,jy1
);
18569 dy12
= vec_sub(iy1
,jy2
);
18570 dy13
= vec_sub(iy1
,jy3
);
18571 dz11
= vec_sub(iz1
,jz1
);
18572 dz12
= vec_sub(iz1
,jz2
);
18573 dz13
= vec_sub(iz1
,jz3
);
18574 dx21
= vec_sub(ix2
,jx1
);
18575 dx22
= vec_sub(ix2
,jx2
);
18576 dx23
= vec_sub(ix2
,jx3
);
18577 dy21
= vec_sub(iy2
,jy1
);
18578 dy22
= vec_sub(iy2
,jy2
);
18579 dy23
= vec_sub(iy2
,jy3
);
18580 dz21
= vec_sub(iz2
,jz1
);
18581 dz22
= vec_sub(iz2
,jz2
);
18582 dz23
= vec_sub(iz2
,jz3
);
18583 dx31
= vec_sub(ix3
,jx1
);
18584 dx32
= vec_sub(ix3
,jx2
);
18585 dx33
= vec_sub(ix3
,jx3
);
18586 dy31
= vec_sub(iy3
,jy1
);
18587 dy32
= vec_sub(iy3
,jy2
);
18588 dy33
= vec_sub(iy3
,jy3
);
18589 dz31
= vec_sub(iz3
,jz1
);
18590 dz32
= vec_sub(iz3
,jz2
);
18591 dz33
= vec_sub(iz3
,jz3
);
18593 rsq11
= vec_madd(dx11
,dx11
,nul
);
18594 rsq12
= vec_madd(dx12
,dx12
,nul
);
18595 rsq13
= vec_madd(dx13
,dx13
,nul
);
18596 rsq21
= vec_madd(dx21
,dx21
,nul
);
18597 rsq22
= vec_madd(dx22
,dx22
,nul
);
18598 rsq23
= vec_madd(dx23
,dx23
,nul
);
18599 rsq31
= vec_madd(dx31
,dx31
,nul
);
18600 rsq32
= vec_madd(dx32
,dx32
,nul
);
18601 rsq33
= vec_madd(dx33
,dx33
,nul
);
18602 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18603 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18604 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18605 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18606 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18607 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18608 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18609 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18610 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18611 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18612 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18613 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18614 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18615 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18616 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18617 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18618 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18619 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18621 zero_highest_element_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18622 &rsq21
,&rsq22
,&rsq23
,
18623 &rsq31
,&rsq32
,&rsq33
);
18625 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18628 &rinv11
,&rinv12
,&rinv13
,
18629 &rinv21
,&rinv22
,&rinv23
,
18630 &rinv31
,&rinv32
,&rinv33
);
18632 zero_highest_element_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18633 &rinv21
,&rinv22
,&rinv23
,
18634 &rinv31
,&rinv32
,&rinv33
);
18636 r11
= vec_madd(rsq11
,rinv11
,nul
);
18637 r12
= vec_madd(rsq12
,rinv12
,nul
);
18638 r13
= vec_madd(rsq13
,rinv13
,nul
);
18639 r21
= vec_madd(rsq21
,rinv21
,nul
);
18640 r22
= vec_madd(rsq22
,rinv22
,nul
);
18641 r23
= vec_madd(rsq23
,rinv23
,nul
);
18642 r31
= vec_madd(rsq31
,rinv31
,nul
);
18643 r32
= vec_madd(rsq32
,rinv32
,nul
);
18644 r33
= vec_madd(rsq33
,rinv33
,nul
);
18646 do_vonly_3_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
18648 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18649 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18650 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18651 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18652 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18653 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18654 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18655 do_vonly_3_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18657 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
18658 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
18659 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18660 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18661 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18662 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18663 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18664 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18665 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18666 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18667 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18668 } else if(k
<(nj1
-1)) {
18673 load_2_water(pos
+j3a
,pos
+j3b
,
18674 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18675 qqOOt
= vec_sld(qqOO
,nul
,8);
18676 qqOHt
= vec_sld(qqOH
,nul
,8);
18677 qqHHt
= vec_sld(qqHH
,nul
,8);
18678 c6t
= vec_sld(c6
,nul
,8);
18679 c12t
= vec_sld(c12
,nul
,8);
18681 dx11
= vec_sub(ix1
,jx1
);
18682 dx12
= vec_sub(ix1
,jx2
);
18683 dx13
= vec_sub(ix1
,jx3
);
18684 dy11
= vec_sub(iy1
,jy1
);
18685 dy12
= vec_sub(iy1
,jy2
);
18686 dy13
= vec_sub(iy1
,jy3
);
18687 dz11
= vec_sub(iz1
,jz1
);
18688 dz12
= vec_sub(iz1
,jz2
);
18689 dz13
= vec_sub(iz1
,jz3
);
18690 dx21
= vec_sub(ix2
,jx1
);
18691 dx22
= vec_sub(ix2
,jx2
);
18692 dx23
= vec_sub(ix2
,jx3
);
18693 dy21
= vec_sub(iy2
,jy1
);
18694 dy22
= vec_sub(iy2
,jy2
);
18695 dy23
= vec_sub(iy2
,jy3
);
18696 dz21
= vec_sub(iz2
,jz1
);
18697 dz22
= vec_sub(iz2
,jz2
);
18698 dz23
= vec_sub(iz2
,jz3
);
18699 dx31
= vec_sub(ix3
,jx1
);
18700 dx32
= vec_sub(ix3
,jx2
);
18701 dx33
= vec_sub(ix3
,jx3
);
18702 dy31
= vec_sub(iy3
,jy1
);
18703 dy32
= vec_sub(iy3
,jy2
);
18704 dy33
= vec_sub(iy3
,jy3
);
18705 dz31
= vec_sub(iz3
,jz1
);
18706 dz32
= vec_sub(iz3
,jz2
);
18707 dz33
= vec_sub(iz3
,jz3
);
18709 rsq11
= vec_madd(dx11
,dx11
,nul
);
18710 rsq12
= vec_madd(dx12
,dx12
,nul
);
18711 rsq13
= vec_madd(dx13
,dx13
,nul
);
18712 rsq21
= vec_madd(dx21
,dx21
,nul
);
18713 rsq22
= vec_madd(dx22
,dx22
,nul
);
18714 rsq23
= vec_madd(dx23
,dx23
,nul
);
18715 rsq31
= vec_madd(dx31
,dx31
,nul
);
18716 rsq32
= vec_madd(dx32
,dx32
,nul
);
18717 rsq33
= vec_madd(dx33
,dx33
,nul
);
18718 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18719 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18720 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18721 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18722 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18723 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18724 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18725 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18726 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18727 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18728 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18729 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18730 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18731 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18732 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18733 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18734 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18735 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18737 zero_highest_2_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18738 &rsq21
,&rsq22
,&rsq23
,
18739 &rsq31
,&rsq32
,&rsq33
);
18741 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18744 &rinv11
,&rinv12
,&rinv13
,
18745 &rinv21
,&rinv22
,&rinv23
,
18746 &rinv31
,&rinv32
,&rinv33
);
18748 zero_highest_2_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18749 &rinv21
,&rinv22
,&rinv23
,
18750 &rinv31
,&rinv32
,&rinv33
);
18752 r11
= vec_madd(rsq11
,rinv11
,nul
);
18753 r12
= vec_madd(rsq12
,rinv12
,nul
);
18754 r13
= vec_madd(rsq13
,rinv13
,nul
);
18755 r21
= vec_madd(rsq21
,rinv21
,nul
);
18756 r22
= vec_madd(rsq22
,rinv22
,nul
);
18757 r23
= vec_madd(rsq23
,rinv23
,nul
);
18758 r31
= vec_madd(rsq31
,rinv31
,nul
);
18759 r32
= vec_madd(rsq32
,rinv32
,nul
);
18760 r33
= vec_madd(rsq33
,rinv33
,nul
);
18762 do_vonly_2_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
18764 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18765 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18766 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18767 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18768 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18769 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18770 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18771 do_vonly_2_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18773 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
18774 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
18775 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18776 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18777 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18778 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18779 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18780 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18781 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18782 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18783 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18787 load_1_water(pos
+j3a
,
18788 &jx1
,&jy1
,&jz1
,&jx2
,&jy2
,&jz2
,&jx3
,&jy3
,&jz3
);
18789 qqOOt
= vec_sld(qqOO
,nul
,12);
18790 qqOHt
= vec_sld(qqOH
,nul
,12);
18791 qqHHt
= vec_sld(qqHH
,nul
,12);
18792 c6t
= vec_sld(c6
,nul
,12);
18793 c12t
= vec_sld(c12
,nul
,12);
18795 dx11
= vec_sub(ix1
,jx1
);
18796 dx12
= vec_sub(ix1
,jx2
);
18797 dx13
= vec_sub(ix1
,jx3
);
18798 dy11
= vec_sub(iy1
,jy1
);
18799 dy12
= vec_sub(iy1
,jy2
);
18800 dy13
= vec_sub(iy1
,jy3
);
18801 dz11
= vec_sub(iz1
,jz1
);
18802 dz12
= vec_sub(iz1
,jz2
);
18803 dz13
= vec_sub(iz1
,jz3
);
18804 dx21
= vec_sub(ix2
,jx1
);
18805 dx22
= vec_sub(ix2
,jx2
);
18806 dx23
= vec_sub(ix2
,jx3
);
18807 dy21
= vec_sub(iy2
,jy1
);
18808 dy22
= vec_sub(iy2
,jy2
);
18809 dy23
= vec_sub(iy2
,jy3
);
18810 dz21
= vec_sub(iz2
,jz1
);
18811 dz22
= vec_sub(iz2
,jz2
);
18812 dz23
= vec_sub(iz2
,jz3
);
18813 dx31
= vec_sub(ix3
,jx1
);
18814 dx32
= vec_sub(ix3
,jx2
);
18815 dx33
= vec_sub(ix3
,jx3
);
18816 dy31
= vec_sub(iy3
,jy1
);
18817 dy32
= vec_sub(iy3
,jy2
);
18818 dy33
= vec_sub(iy3
,jy3
);
18819 dz31
= vec_sub(iz3
,jz1
);
18820 dz32
= vec_sub(iz3
,jz2
);
18821 dz33
= vec_sub(iz3
,jz3
);
18823 rsq11
= vec_madd(dx11
,dx11
,nul
);
18824 rsq12
= vec_madd(dx12
,dx12
,nul
);
18825 rsq13
= vec_madd(dx13
,dx13
,nul
);
18826 rsq21
= vec_madd(dx21
,dx21
,nul
);
18827 rsq22
= vec_madd(dx22
,dx22
,nul
);
18828 rsq23
= vec_madd(dx23
,dx23
,nul
);
18829 rsq31
= vec_madd(dx31
,dx31
,nul
);
18830 rsq32
= vec_madd(dx32
,dx32
,nul
);
18831 rsq33
= vec_madd(dx33
,dx33
,nul
);
18832 rsq11
= vec_madd(dy11
,dy11
,rsq11
);
18833 rsq12
= vec_madd(dy12
,dy12
,rsq12
);
18834 rsq13
= vec_madd(dy13
,dy13
,rsq13
);
18835 rsq21
= vec_madd(dy21
,dy21
,rsq21
);
18836 rsq22
= vec_madd(dy22
,dy22
,rsq22
);
18837 rsq23
= vec_madd(dy23
,dy23
,rsq23
);
18838 rsq31
= vec_madd(dy31
,dy31
,rsq31
);
18839 rsq32
= vec_madd(dy32
,dy32
,rsq32
);
18840 rsq33
= vec_madd(dy33
,dy33
,rsq33
);
18841 rsq11
= vec_madd(dz11
,dz11
,rsq11
);
18842 rsq12
= vec_madd(dz12
,dz12
,rsq12
);
18843 rsq13
= vec_madd(dz13
,dz13
,rsq13
);
18844 rsq21
= vec_madd(dz21
,dz21
,rsq21
);
18845 rsq22
= vec_madd(dz22
,dz22
,rsq22
);
18846 rsq23
= vec_madd(dz23
,dz23
,rsq23
);
18847 rsq31
= vec_madd(dz31
,dz31
,rsq31
);
18848 rsq32
= vec_madd(dz32
,dz32
,rsq32
);
18849 rsq33
= vec_madd(dz33
,dz33
,rsq33
);
18851 zero_highest_3_elements_in_9_vectors(&rsq11
,&rsq12
,&rsq13
,
18852 &rsq21
,&rsq22
,&rsq23
,
18853 &rsq31
,&rsq32
,&rsq33
);
18855 do_9_invsqrt(rsq11
,rsq12
,rsq13
,
18858 &rinv11
,&rinv12
,&rinv13
,
18859 &rinv21
,&rinv22
,&rinv23
,
18860 &rinv31
,&rinv32
,&rinv33
);
18862 zero_highest_3_elements_in_9_vectors(&rinv11
,&rinv12
,&rinv13
,
18863 &rinv21
,&rinv22
,&rinv23
,
18864 &rinv31
,&rinv32
,&rinv33
);
18866 r11
= vec_madd(rsq11
,rinv11
,nul
);
18867 r12
= vec_madd(rsq12
,rinv12
,nul
);
18868 r13
= vec_madd(rsq13
,rinv13
,nul
);
18869 r21
= vec_madd(rsq21
,rinv21
,nul
);
18870 r22
= vec_madd(rsq22
,rinv22
,nul
);
18871 r23
= vec_madd(rsq23
,rinv23
,nul
);
18872 r31
= vec_madd(rsq31
,rinv31
,nul
);
18873 r32
= vec_madd(rsq32
,rinv32
,nul
);
18874 r33
= vec_madd(rsq33
,rinv33
,nul
);
18876 do_vonly_1_ljctable_coul_and_lj(VFtab
,vec_madd(r11
,tsc
,nul
),
18878 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r12
,tsc
,nul
),&VV12c
);
18879 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r13
,tsc
,nul
),&VV13c
);
18880 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r21
,tsc
,nul
),&VV21c
);
18881 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r22
,tsc
,nul
),&VV22c
);
18882 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r23
,tsc
,nul
),&VV23c
);
18883 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r31
,tsc
,nul
),&VV31c
);
18884 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r32
,tsc
,nul
),&VV32c
);
18885 do_vonly_1_ljctable_coul(VFtab
,vec_madd(r33
,tsc
,nul
),&VV33c
);
18887 vnbtot
= vec_madd(c6t
,VVd
,vnbtot
);
18888 vnbtot
= vec_madd(c12t
,VVr
,vnbtot
);
18889 vctot
= vec_madd(qqOOt
,VV11c
,vctot
);
18890 vctot
= vec_madd(qqOHt
,VV12c
,vctot
);
18891 vctot
= vec_madd(qqOHt
,VV13c
,vctot
);
18892 vctot
= vec_madd(qqOHt
,VV21c
,vctot
);
18893 vctot
= vec_madd(qqHHt
,VV22c
,vctot
);
18894 vctot
= vec_madd(qqHHt
,VV23c
,vctot
);
18895 vctot
= vec_madd(qqOHt
,VV31c
,vctot
);
18896 vctot
= vec_madd(qqHHt
,VV32c
,vctot
);
18897 vctot
= vec_madd(qqHHt
,VV33c
,vctot
);
18899 /* update outer data */
18900 add_vector_to_float(Vc
+gid
[n
],vctot
);
18901 add_vector_to_float(Vnb
+gid
[n
],vnbtot
);