2 C This source code is part of
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
36 # define gmxreal real*8
38 # define gmxreal real*4
44 C Gromacs nonbonded kernel pwr6kernel114
45 C Coulomb interaction: Normal Coulomb
46 C VdW interaction: Lennard-Jones
47 C water optimization: pairs of TIP4P interactions
48 C Calculate forces: yes
50 subroutine pwr6kernel114(
83 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
84 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
85 integer*4 gid(*),type(*),ntype
86 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87 gmxreal Vvdw(*),tabscale,VFtab(*)
88 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89 integer*4 nthreads,count,mtx,outeriter,inneriter
92 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93 integer*4 nn0,nn1,nouter,ninner
95 gmxreal fscal,tx,ty,tz
97 gmxreal qq,vcoul,vctot
100 gmxreal Vvdw6,Vvdwtot
102 gmxreal ix1,iy1,iz1,fix1,fiy1,fiz1
103 gmxreal ix2,iy2,iz2,fix2,fiy2,fiz2
104 gmxreal ix3,iy3,iz3,fix3,fiy3,fiz3
105 gmxreal ix4,iy4,iz4,fix4,fiy4,fiz4
107 gmxreal jx2,jy2,jz2,fjx2,fjy2,fjz2
108 gmxreal jx3,jy3,jz3,fjx3,fjy3,fjz3
109 gmxreal jx4,jy4,jz4,fjx4,fjy4,fjz4
110 gmxreal dx11,dy11,dz11,rsq11
111 gmxreal dx22,dy22,dz22,rsq22,rinv22
112 gmxreal dx23,dy23,dz23,rsq23,rinv23
113 gmxreal dx24,dy24,dz24,rsq24,rinv24
114 gmxreal dx32,dy32,dz32,rsq32,rinv32
115 gmxreal dx33,dy33,dz33,rsq33,rinv33
116 gmxreal dx34,dy34,dz34,rsq34,rinv34
117 gmxreal dx42,dy42,dz42,rsq42,rinv42
118 gmxreal dx43,dy43,dz43,rsq43,rinv43
119 gmxreal dx44,dy44,dz44,rsq44,rinv44
120 gmxreal qH,qM,qqMM,qqMH,qqHH
124 C Initialize water data
131 tj = 2*(ntype+1)*type(ii)+1
136 C Reset outer and inner iteration counters
140 C Loop over thread workunits
141 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
142 if(nn1.gt.nri) nn1=nri
144 C Start outer loop over neighborlists
148 C Load shift vector for this list
151 shY = shiftvec(is3+1)
152 shZ = shiftvec(is3+2)
154 C Load limits for loop over neighbors
158 C Get outer coordinate index
162 C Load i atom data, add shift vector
163 ix1 = shX + pos(ii3+0)
164 iy1 = shY + pos(ii3+1)
165 iz1 = shZ + pos(ii3+2)
166 ix2 = shX + pos(ii3+3)
167 iy2 = shY + pos(ii3+4)
168 iz2 = shZ + pos(ii3+5)
169 ix3 = shX + pos(ii3+6)
170 iy3 = shY + pos(ii3+7)
171 iz3 = shZ + pos(ii3+8)
172 ix4 = shX + pos(ii3+9)
173 iy4 = shY + pos(ii3+10)
174 iz4 = shZ + pos(ii3+11)
176 C Zero the potential energy for this list
180 C Clear i atom forces
196 C Get j neighbor index, and coordinate index
200 C load j atom coordinates
218 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
222 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
226 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
230 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24
234 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
238 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
242 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34
246 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42
250 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43
254 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44
256 C Calculate 1/r and 1/r2
259 C PowerPC intrinsics 1/sqrt lookup table
261 rinv22 = frsqrtes(rsq22)
263 rinv22 = frsqrte(dble(rsq22))
265 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
268 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
272 C PowerPC intrinsics 1/sqrt lookup table
274 rinv23 = frsqrtes(rsq23)
276 rinv23 = frsqrte(dble(rsq23))
278 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
281 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
285 C PowerPC intrinsics 1/sqrt lookup table
287 rinv24 = frsqrtes(rsq24)
289 rinv24 = frsqrte(dble(rsq24))
291 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
294 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
298 C PowerPC intrinsics 1/sqrt lookup table
300 rinv32 = frsqrtes(rsq32)
302 rinv32 = frsqrte(dble(rsq32))
304 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
307 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
311 C PowerPC intrinsics 1/sqrt lookup table
313 rinv33 = frsqrtes(rsq33)
315 rinv33 = frsqrte(dble(rsq33))
317 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
320 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
324 C PowerPC intrinsics 1/sqrt lookup table
326 rinv34 = frsqrtes(rsq34)
328 rinv34 = frsqrte(dble(rsq34))
330 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
333 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
337 C PowerPC intrinsics 1/sqrt lookup table
339 rinv42 = frsqrtes(rsq42)
341 rinv42 = frsqrte(dble(rsq42))
343 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
346 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
350 C PowerPC intrinsics 1/sqrt lookup table
352 rinv43 = frsqrtes(rsq43)
354 rinv43 = frsqrte(dble(rsq43))
356 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
359 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
363 C PowerPC intrinsics 1/sqrt lookup table
365 rinv44 = frsqrtes(rsq44)
367 rinv44 = frsqrte(dble(rsq44))
369 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
372 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
376 C Load parameters for j atom
378 C Lennard-Jones interaction
379 rinvsix = rinvsq*rinvsq*rinvsq
381 Vvdw12 = c12*rinvsix*rinvsix
382 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
383 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq
385 C Calculate temporary vectorial force
390 C Increment i atom force
395 C Decrement j atom force
396 faction(j3+0) = faction(j3+0) - tx
397 faction(j3+1) = faction(j3+1) - ty
398 faction(j3+2) = faction(j3+2) - tz
400 C Load parameters for j atom
402 rinvsq = rinv22*rinv22
404 C Coulomb interaction
407 fscal = (vcoul)*rinvsq
409 C Calculate temporary vectorial force
414 C Increment i atom force
419 C Decrement j atom force
420 fjx2 = faction(j3+3) - tx
421 fjy2 = faction(j3+4) - ty
422 fjz2 = faction(j3+5) - tz
424 C Load parameters for j atom
426 rinvsq = rinv23*rinv23
428 C Coulomb interaction
431 fscal = (vcoul)*rinvsq
433 C Calculate temporary vectorial force
438 C Increment i atom force
443 C Decrement j atom force
444 fjx3 = faction(j3+6) - tx
445 fjy3 = faction(j3+7) - ty
446 fjz3 = faction(j3+8) - tz
448 C Load parameters for j atom
450 rinvsq = rinv24*rinv24
452 C Coulomb interaction
455 fscal = (vcoul)*rinvsq
457 C Calculate temporary vectorial force
462 C Increment i atom force
467 C Decrement j atom force
468 fjx4 = faction(j3+9) - tx
469 fjy4 = faction(j3+10) - ty
470 fjz4 = faction(j3+11) - tz
472 C Load parameters for j atom
474 rinvsq = rinv32*rinv32
476 C Coulomb interaction
479 fscal = (vcoul)*rinvsq
481 C Calculate temporary vectorial force
486 C Increment i atom force
491 C Decrement j atom force
496 C Load parameters for j atom
498 rinvsq = rinv33*rinv33
500 C Coulomb interaction
503 fscal = (vcoul)*rinvsq
505 C Calculate temporary vectorial force
510 C Increment i atom force
515 C Decrement j atom force
520 C Load parameters for j atom
522 rinvsq = rinv34*rinv34
524 C Coulomb interaction
527 fscal = (vcoul)*rinvsq
529 C Calculate temporary vectorial force
534 C Increment i atom force
539 C Decrement j atom force
544 C Load parameters for j atom
546 rinvsq = rinv42*rinv42
548 C Coulomb interaction
551 fscal = (vcoul)*rinvsq
553 C Calculate temporary vectorial force
558 C Increment i atom force
563 C Decrement j atom force
564 faction(j3+3) = fjx2 - tx
565 faction(j3+4) = fjy2 - ty
566 faction(j3+5) = fjz2 - tz
568 C Load parameters for j atom
570 rinvsq = rinv43*rinv43
572 C Coulomb interaction
575 fscal = (vcoul)*rinvsq
577 C Calculate temporary vectorial force
582 C Increment i atom force
587 C Decrement j atom force
588 faction(j3+6) = fjx3 - tx
589 faction(j3+7) = fjy3 - ty
590 faction(j3+8) = fjz3 - tz
592 C Load parameters for j atom
594 rinvsq = rinv44*rinv44
596 C Coulomb interaction
599 fscal = (vcoul)*rinvsq
601 C Calculate temporary vectorial force
606 C Increment i atom force
611 C Decrement j atom force
612 faction(j3+9) = fjx4 - tx
613 faction(j3+10) = fjy4 - ty
614 faction(j3+11) = fjz4 - tz
616 C Inner loop uses 276 flops/iteration
620 C Add i forces to mem and shifted force list
621 faction(ii3+0) = faction(ii3+0) + fix1
622 faction(ii3+1) = faction(ii3+1) + fiy1
623 faction(ii3+2) = faction(ii3+2) + fiz1
624 faction(ii3+3) = faction(ii3+3) + fix2
625 faction(ii3+4) = faction(ii3+4) + fiy2
626 faction(ii3+5) = faction(ii3+5) + fiz2
627 faction(ii3+6) = faction(ii3+6) + fix3
628 faction(ii3+7) = faction(ii3+7) + fiy3
629 faction(ii3+8) = faction(ii3+8) + fiz3
630 faction(ii3+9) = faction(ii3+9) + fix4
631 faction(ii3+10) = faction(ii3+10) + fiy4
632 faction(ii3+11) = faction(ii3+11) + fiz4
633 fshift(is3) = fshift(is3)+fix1+fix2+fix3+fix4
634 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3+fiy4
635 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3+fiz4
637 C Add potential energies to the group for this list
639 Vc(ggid) = Vc(ggid) + vctot
640 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
642 C Increment number of inner iterations
643 ninner = ninner + nj1 - nj0
645 C Outer loop uses 38 flops/iteration
649 C Increment number of outer iterations
650 nouter = nouter + nn1 - nn0
651 if(nn1.lt.nri) goto 10
653 C Write outer/inner iteration count to pointers
665 C Gromacs nonbonded kernel pwr6kernel114nf
666 C Coulomb interaction: Normal Coulomb
667 C VdW interaction: Lennard-Jones
668 C water optimization: pairs of TIP4P interactions
669 C Calculate forces: no
671 subroutine pwr6kernel114nf(
704 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
705 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
706 integer*4 gid(*),type(*),ntype
707 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
708 gmxreal Vvdw(*),tabscale,VFtab(*)
709 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
710 integer*4 nthreads,count,mtx,outeriter,inneriter
713 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
714 integer*4 nn0,nn1,nouter,ninner
717 gmxreal qq,vcoul,vctot
720 gmxreal Vvdw6,Vvdwtot
730 gmxreal dx11,dy11,dz11,rsq11
731 gmxreal dx22,dy22,dz22,rsq22,rinv22
732 gmxreal dx23,dy23,dz23,rsq23,rinv23
733 gmxreal dx24,dy24,dz24,rsq24,rinv24
734 gmxreal dx32,dy32,dz32,rsq32,rinv32
735 gmxreal dx33,dy33,dz33,rsq33,rinv33
736 gmxreal dx34,dy34,dz34,rsq34,rinv34
737 gmxreal dx42,dy42,dz42,rsq42,rinv42
738 gmxreal dx43,dy43,dz43,rsq43,rinv43
739 gmxreal dx44,dy44,dz44,rsq44,rinv44
740 gmxreal qH,qM,qqMM,qqMH,qqHH
744 C Initialize water data
751 tj = 2*(ntype+1)*type(ii)+1
756 C Reset outer and inner iteration counters
760 C Loop over thread workunits
761 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
762 if(nn1.gt.nri) nn1=nri
764 C Start outer loop over neighborlists
768 C Load shift vector for this list
771 shY = shiftvec(is3+1)
772 shZ = shiftvec(is3+2)
774 C Load limits for loop over neighbors
778 C Get outer coordinate index
782 C Load i atom data, add shift vector
783 ix1 = shX + pos(ii3+0)
784 iy1 = shY + pos(ii3+1)
785 iz1 = shZ + pos(ii3+2)
786 ix2 = shX + pos(ii3+3)
787 iy2 = shY + pos(ii3+4)
788 iz2 = shZ + pos(ii3+5)
789 ix3 = shX + pos(ii3+6)
790 iy3 = shY + pos(ii3+7)
791 iz3 = shZ + pos(ii3+8)
792 ix4 = shX + pos(ii3+9)
793 iy4 = shY + pos(ii3+10)
794 iz4 = shZ + pos(ii3+11)
796 C Zero the potential energy for this list
800 C Clear i atom forces
804 C Get j neighbor index, and coordinate index
808 C load j atom coordinates
826 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
830 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
834 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
838 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24
842 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
846 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
850 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34
854 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42
858 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43
862 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44
864 C Calculate 1/r and 1/r2
867 C PowerPC intrinsics 1/sqrt lookup table
869 rinv22 = frsqrtes(rsq22)
871 rinv22 = frsqrte(dble(rsq22))
873 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
876 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
880 C PowerPC intrinsics 1/sqrt lookup table
882 rinv23 = frsqrtes(rsq23)
884 rinv23 = frsqrte(dble(rsq23))
886 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
889 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
893 C PowerPC intrinsics 1/sqrt lookup table
895 rinv24 = frsqrtes(rsq24)
897 rinv24 = frsqrte(dble(rsq24))
899 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
902 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
906 C PowerPC intrinsics 1/sqrt lookup table
908 rinv32 = frsqrtes(rsq32)
910 rinv32 = frsqrte(dble(rsq32))
912 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
915 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
919 C PowerPC intrinsics 1/sqrt lookup table
921 rinv33 = frsqrtes(rsq33)
923 rinv33 = frsqrte(dble(rsq33))
925 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
928 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
932 C PowerPC intrinsics 1/sqrt lookup table
934 rinv34 = frsqrtes(rsq34)
936 rinv34 = frsqrte(dble(rsq34))
938 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
941 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
945 C PowerPC intrinsics 1/sqrt lookup table
947 rinv42 = frsqrtes(rsq42)
949 rinv42 = frsqrte(dble(rsq42))
951 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
954 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
958 C PowerPC intrinsics 1/sqrt lookup table
960 rinv43 = frsqrtes(rsq43)
962 rinv43 = frsqrte(dble(rsq43))
964 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
967 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
971 C PowerPC intrinsics 1/sqrt lookup table
973 rinv44 = frsqrtes(rsq44)
975 rinv44 = frsqrte(dble(rsq44))
977 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
980 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
984 C Load parameters for j atom
986 C Lennard-Jones interaction
987 rinvsix = rinvsq*rinvsq*rinvsq
989 Vvdw12 = c12*rinvsix*rinvsix
990 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
992 C Load parameters for j atom
995 C Coulomb interaction
999 C Load parameters for j atom
1002 C Coulomb interaction
1006 C Load parameters for j atom
1009 C Coulomb interaction
1013 C Load parameters for j atom
1016 C Coulomb interaction
1020 C Load parameters for j atom
1023 C Coulomb interaction
1027 C Load parameters for j atom
1030 C Coulomb interaction
1034 C Load parameters for j atom
1037 C Coulomb interaction
1041 C Load parameters for j atom
1044 C Coulomb interaction
1048 C Load parameters for j atom
1051 C Coulomb interaction
1055 C Inner loop uses 163 flops/iteration
1059 C Add i forces to mem and shifted force list
1061 C Add potential energies to the group for this list
1063 Vc(ggid) = Vc(ggid) + vctot
1064 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
1066 C Increment number of inner iterations
1067 ninner = ninner + nj1 - nj0
1069 C Outer loop uses 14 flops/iteration
1073 C Increment number of outer iterations
1074 nouter = nouter + nn1 - nn0
1075 if(nn1.lt.nri) goto 10
1077 C Write outer/inner iteration count to pointers