2 C This source code is part of
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
36 # define gmxreal real*8
38 # define gmxreal real*4
44 C Gromacs nonbonded kernel pwr6kernel131
45 C Coulomb interaction: Normal Coulomb
46 C VdW interaction: Tabulated
47 C water optimization: SPC/TIP3P - other atoms
48 C Calculate forces: yes
50 subroutine pwr6kernel131(
83 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
84 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
85 integer*4 gid(*),type(*),ntype
86 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87 gmxreal Vvdw(*),tabscale,VFtab(*)
88 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89 integer*4 nthreads,count,mtx,outeriter,inneriter
92 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93 integer*4 nn0,nn1,nouter,ninner
95 gmxreal fscal,tx,ty,tz
98 gmxreal qq,vcoul,vctot
101 gmxreal Vvdw6,Vvdwtot
103 gmxreal r,rt,eps,eps2
105 gmxreal Y,F,Geps,Heps2,Fp,VV
108 gmxreal ix1,iy1,iz1,fix1,fiy1,fiz1
109 gmxreal ix2,iy2,iz2,fix2,fiy2,fiz2
110 gmxreal ix3,iy3,iz3,fix3,fiy3,fiz3
111 gmxreal jx1,jy1,jz1,fjx1,fjy1,fjz1
112 gmxreal dx11,dy11,dz11,rsq11,rinv11
113 gmxreal dx21,dy21,dz21,rsq21,rinv21
114 gmxreal dx31,dy31,dz31,rsq31,rinv31
119 C Initialize water data
121 qO = facel*charge(ii)
122 qH = facel*charge(ii+1)
123 nti = 2*ntype*type(ii)
126 C Reset outer and inner iteration counters
130 C Loop over thread workunits
131 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
132 if(nn1.gt.nri) nn1=nri
134 C Start outer loop over neighborlists
138 C Load shift vector for this list
141 shY = shiftvec(is3+1)
142 shZ = shiftvec(is3+2)
144 C Load limits for loop over neighbors
148 C Get outer coordinate index
152 C Load i atom data, add shift vector
153 ix1 = shX + pos(ii3+0)
154 iy1 = shY + pos(ii3+1)
155 iz1 = shZ + pos(ii3+2)
156 ix2 = shX + pos(ii3+3)
157 iy2 = shY + pos(ii3+4)
158 iz2 = shZ + pos(ii3+5)
159 ix3 = shX + pos(ii3+6)
160 iy3 = shY + pos(ii3+7)
161 iz3 = shZ + pos(ii3+8)
163 C Zero the potential energy for this list
167 C Clear i atom forces
180 C Get j neighbor index, and coordinate index
184 C load j atom coordinates
193 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
197 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
201 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
203 C Calculate 1/r and 1/r2
205 C PowerPC intrinsics 1/sqrt lookup table
207 rinv11 = frsqrtes(rsq11)
209 rinv11 = frsqrte(dble(rsq11))
211 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
214 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
218 C PowerPC intrinsics 1/sqrt lookup table
220 rinv21 = frsqrtes(rsq21)
222 rinv21 = frsqrte(dble(rsq21))
224 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
227 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
231 C PowerPC intrinsics 1/sqrt lookup table
233 rinv31 = frsqrtes(rsq31)
235 rinv31 = frsqrte(dble(rsq31))
237 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
240 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
244 C Load parameters for j atom
247 tj = nti+2*type(jnr)+1
250 rinvsq = rinv11*rinv11
252 C Coulomb interaction
256 C Calculate table index
259 C Calculate table index
266 C Tabulated VdW interaction - dispersion
269 Geps = eps*VFtab(nnn+2)
270 Heps2 = eps2*VFtab(nnn+3)
273 FF = Fp+Geps+2.0*Heps2
277 C Tabulated VdW interaction - repulsion
281 Geps = eps*VFtab(nnn+2)
282 Heps2 = eps2*VFtab(nnn+3)
285 FF = Fp+Geps+2.0*Heps2
288 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
289 fscal = (vcoul)*rinvsq-((fijD+fijR)
292 C Calculate temporary vectorial force
297 C Increment i atom force
302 C Decrement j atom force
303 fjx1 = faction(j3+0) - tx
304 fjy1 = faction(j3+1) - ty
305 fjz1 = faction(j3+2) - tz
307 C Load parameters for j atom
309 rinvsq = rinv21*rinv21
311 C Coulomb interaction
314 fscal = (vcoul)*rinvsq
316 C Calculate temporary vectorial force
321 C Increment i atom force
326 C Decrement j atom force
331 C Load parameters for j atom
332 rinvsq = rinv31*rinv31
334 C Coulomb interaction
337 fscal = (vcoul)*rinvsq
339 C Calculate temporary vectorial force
344 C Increment i atom force
349 C Decrement j atom force
350 faction(j3+0) = fjx1 - tx
351 faction(j3+1) = fjy1 - ty
352 faction(j3+2) = fjz1 - tz
354 C Inner loop uses 115 flops/iteration
358 C Add i forces to mem and shifted force list
359 faction(ii3+0) = faction(ii3+0) + fix1
360 faction(ii3+1) = faction(ii3+1) + fiy1
361 faction(ii3+2) = faction(ii3+2) + fiz1
362 faction(ii3+3) = faction(ii3+3) + fix2
363 faction(ii3+4) = faction(ii3+4) + fiy2
364 faction(ii3+5) = faction(ii3+5) + fiz2
365 faction(ii3+6) = faction(ii3+6) + fix3
366 faction(ii3+7) = faction(ii3+7) + fiy3
367 faction(ii3+8) = faction(ii3+8) + fiz3
368 fshift(is3) = fshift(is3)+fix1+fix2+fix3
369 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3
370 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3
372 C Add potential energies to the group for this list
374 Vc(ggid) = Vc(ggid) + vctot
375 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
377 C Increment number of inner iterations
378 ninner = ninner + nj1 - nj0
380 C Outer loop uses 29 flops/iteration
384 C Increment number of outer iterations
385 nouter = nouter + nn1 - nn0
386 if(nn1.lt.nri) goto 10
388 C Write outer/inner iteration count to pointers
400 C Gromacs nonbonded kernel pwr6kernel131nf
401 C Coulomb interaction: Normal Coulomb
402 C VdW interaction: Tabulated
403 C water optimization: SPC/TIP3P - other atoms
404 C Calculate forces: no
406 subroutine pwr6kernel131nf(
439 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
440 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
441 integer*4 gid(*),type(*),ntype
442 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
443 gmxreal Vvdw(*),tabscale,VFtab(*)
444 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
445 integer*4 nthreads,count,mtx,outeriter,inneriter
448 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
449 integer*4 nn0,nn1,nouter,ninner
452 gmxreal qq,vcoul,vctot
455 gmxreal Vvdw6,Vvdwtot
457 gmxreal r,rt,eps,eps2
459 gmxreal Y,F,Geps,Heps2,Fp,VV
464 gmxreal dx11,dy11,dz11,rsq11,rinv11
465 gmxreal dx21,dy21,dz21,rsq21,rinv21
466 gmxreal dx31,dy31,dz31,rsq31,rinv31
471 C Initialize water data
473 qO = facel*charge(ii)
474 qH = facel*charge(ii+1)
475 nti = 2*ntype*type(ii)
478 C Reset outer and inner iteration counters
482 C Loop over thread workunits
483 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
484 if(nn1.gt.nri) nn1=nri
486 C Start outer loop over neighborlists
490 C Load shift vector for this list
493 shY = shiftvec(is3+1)
494 shZ = shiftvec(is3+2)
496 C Load limits for loop over neighbors
500 C Get outer coordinate index
504 C Load i atom data, add shift vector
505 ix1 = shX + pos(ii3+0)
506 iy1 = shY + pos(ii3+1)
507 iz1 = shZ + pos(ii3+2)
508 ix2 = shX + pos(ii3+3)
509 iy2 = shY + pos(ii3+4)
510 iz2 = shZ + pos(ii3+5)
511 ix3 = shX + pos(ii3+6)
512 iy3 = shY + pos(ii3+7)
513 iz3 = shZ + pos(ii3+8)
515 C Zero the potential energy for this list
519 C Clear i atom forces
523 C Get j neighbor index, and coordinate index
527 C load j atom coordinates
536 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
540 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
544 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
546 C Calculate 1/r and 1/r2
548 C PowerPC intrinsics 1/sqrt lookup table
550 rinv11 = frsqrtes(rsq11)
552 rinv11 = frsqrte(dble(rsq11))
554 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
557 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
561 C PowerPC intrinsics 1/sqrt lookup table
563 rinv21 = frsqrtes(rsq21)
565 rinv21 = frsqrte(dble(rsq21))
567 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
570 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
574 C PowerPC intrinsics 1/sqrt lookup table
576 rinv31 = frsqrtes(rsq31)
578 rinv31 = frsqrte(dble(rsq31))
580 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
583 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
587 C Load parameters for j atom
590 tj = nti+2*type(jnr)+1
594 C Coulomb interaction
598 C Calculate table index
601 C Calculate table index
608 C Tabulated VdW interaction - dispersion
611 Geps = eps*VFtab(nnn+2)
612 Heps2 = eps2*VFtab(nnn+3)
617 C Tabulated VdW interaction - repulsion
621 Geps = eps*VFtab(nnn+2)
622 Heps2 = eps2*VFtab(nnn+3)
626 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
628 C Load parameters for j atom
631 C Coulomb interaction
635 C Load parameters for j atom
637 C Coulomb interaction
641 C Inner loop uses 70 flops/iteration
645 C Add i forces to mem and shifted force list
647 C Add potential energies to the group for this list
649 Vc(ggid) = Vc(ggid) + vctot
650 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
652 C Increment number of inner iterations
653 ninner = ninner + nj1 - nj0
655 C Outer loop uses 11 flops/iteration
659 C Increment number of outer iterations
660 nouter = nouter + nn1 - nn0
661 if(nn1.lt.nri) goto 10
663 C Write outer/inner iteration count to pointers