2 C This source code is part of
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
36 # define gmxreal real*8
38 # define gmxreal real*4
44 C Gromacs nonbonded kernel pwr6kernel104
45 C Coulomb interaction: Normal Coulomb
46 C VdW interaction: Not calculated
47 C water optimization: pairs of TIP4P interactions
48 C Calculate forces: yes
50 subroutine pwr6kernel104(
83 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
84 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
85 integer*4 gid(*),type(*),ntype
86 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87 gmxreal Vvdw(*),tabscale,VFtab(*)
88 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89 integer*4 nthreads,count,mtx,outeriter,inneriter
92 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93 integer*4 nn0,nn1,nouter,ninner
95 gmxreal fscal,tx,ty,tz
97 gmxreal qq,vcoul,vctot
98 gmxreal ix2,iy2,iz2,fix2,fiy2,fiz2
99 gmxreal ix3,iy3,iz3,fix3,fiy3,fiz3
100 gmxreal ix4,iy4,iz4,fix4,fiy4,fiz4
101 gmxreal jx2,jy2,jz2,fjx2,fjy2,fjz2
102 gmxreal jx3,jy3,jz3,fjx3,fjy3,fjz3
103 gmxreal jx4,jy4,jz4,fjx4,fjy4,fjz4
104 gmxreal dx22,dy22,dz22,rsq22,rinv22
105 gmxreal dx23,dy23,dz23,rsq23,rinv23
106 gmxreal dx24,dy24,dz24,rsq24,rinv24
107 gmxreal dx32,dy32,dz32,rsq32,rinv32
108 gmxreal dx33,dy33,dz33,rsq33,rinv33
109 gmxreal dx34,dy34,dz34,rsq34,rinv34
110 gmxreal dx42,dy42,dz42,rsq42,rinv42
111 gmxreal dx43,dy43,dz43,rsq43,rinv43
112 gmxreal dx44,dy44,dz44,rsq44,rinv44
113 gmxreal qH,qM,qqMM,qqMH,qqHH
116 C Initialize water data
125 C Reset outer and inner iteration counters
129 C Loop over thread workunits
130 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
131 if(nn1.gt.nri) nn1=nri
133 C Start outer loop over neighborlists
137 C Load shift vector for this list
140 shY = shiftvec(is3+1)
141 shZ = shiftvec(is3+2)
143 C Load limits for loop over neighbors
147 C Get outer coordinate index
151 C Load i atom data, add shift vector
152 ix2 = shX + pos(ii3+3)
153 iy2 = shY + pos(ii3+4)
154 iz2 = shZ + pos(ii3+5)
155 ix3 = shX + pos(ii3+6)
156 iy3 = shY + pos(ii3+7)
157 iz3 = shZ + pos(ii3+8)
158 ix4 = shX + pos(ii3+9)
159 iy4 = shY + pos(ii3+10)
160 iz4 = shZ + pos(ii3+11)
162 C Zero the potential energy for this list
165 C Clear i atom forces
178 C Get j neighbor index, and coordinate index
182 C load j atom coordinates
197 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
201 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
205 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24
209 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
213 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
217 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34
221 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42
225 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43
229 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44
231 C Calculate 1/r and 1/r2
233 C PowerPC intrinsics 1/sqrt lookup table
235 rinv22 = frsqrtes(rsq22)
237 rinv22 = frsqrte(dble(rsq22))
239 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
242 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
246 C PowerPC intrinsics 1/sqrt lookup table
248 rinv23 = frsqrtes(rsq23)
250 rinv23 = frsqrte(dble(rsq23))
252 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
255 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
259 C PowerPC intrinsics 1/sqrt lookup table
261 rinv24 = frsqrtes(rsq24)
263 rinv24 = frsqrte(dble(rsq24))
265 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
268 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
272 C PowerPC intrinsics 1/sqrt lookup table
274 rinv32 = frsqrtes(rsq32)
276 rinv32 = frsqrte(dble(rsq32))
278 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
281 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
285 C PowerPC intrinsics 1/sqrt lookup table
287 rinv33 = frsqrtes(rsq33)
289 rinv33 = frsqrte(dble(rsq33))
291 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
294 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
298 C PowerPC intrinsics 1/sqrt lookup table
300 rinv34 = frsqrtes(rsq34)
302 rinv34 = frsqrte(dble(rsq34))
304 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
307 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
311 C PowerPC intrinsics 1/sqrt lookup table
313 rinv42 = frsqrtes(rsq42)
315 rinv42 = frsqrte(dble(rsq42))
317 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
320 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
324 C PowerPC intrinsics 1/sqrt lookup table
326 rinv43 = frsqrtes(rsq43)
328 rinv43 = frsqrte(dble(rsq43))
330 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
333 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
337 C PowerPC intrinsics 1/sqrt lookup table
339 rinv44 = frsqrtes(rsq44)
341 rinv44 = frsqrte(dble(rsq44))
343 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
346 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
350 C Load parameters for j atom
352 rinvsq = rinv22*rinv22
354 C Coulomb interaction
357 fscal = (vcoul)*rinvsq
359 C Calculate temporary vectorial force
364 C Increment i atom force
369 C Decrement j atom force
370 fjx2 = faction(j3+3) - tx
371 fjy2 = faction(j3+4) - ty
372 fjz2 = faction(j3+5) - tz
374 C Load parameters for j atom
376 rinvsq = rinv23*rinv23
378 C Coulomb interaction
381 fscal = (vcoul)*rinvsq
383 C Calculate temporary vectorial force
388 C Increment i atom force
393 C Decrement j atom force
394 fjx3 = faction(j3+6) - tx
395 fjy3 = faction(j3+7) - ty
396 fjz3 = faction(j3+8) - tz
398 C Load parameters for j atom
400 rinvsq = rinv24*rinv24
402 C Coulomb interaction
405 fscal = (vcoul)*rinvsq
407 C Calculate temporary vectorial force
412 C Increment i atom force
417 C Decrement j atom force
418 fjx4 = faction(j3+9) - tx
419 fjy4 = faction(j3+10) - ty
420 fjz4 = faction(j3+11) - tz
422 C Load parameters for j atom
424 rinvsq = rinv32*rinv32
426 C Coulomb interaction
429 fscal = (vcoul)*rinvsq
431 C Calculate temporary vectorial force
436 C Increment i atom force
441 C Decrement j atom force
446 C Load parameters for j atom
448 rinvsq = rinv33*rinv33
450 C Coulomb interaction
453 fscal = (vcoul)*rinvsq
455 C Calculate temporary vectorial force
460 C Increment i atom force
465 C Decrement j atom force
470 C Load parameters for j atom
472 rinvsq = rinv34*rinv34
474 C Coulomb interaction
477 fscal = (vcoul)*rinvsq
479 C Calculate temporary vectorial force
484 C Increment i atom force
489 C Decrement j atom force
494 C Load parameters for j atom
496 rinvsq = rinv42*rinv42
498 C Coulomb interaction
501 fscal = (vcoul)*rinvsq
503 C Calculate temporary vectorial force
508 C Increment i atom force
513 C Decrement j atom force
514 faction(j3+3) = fjx2 - tx
515 faction(j3+4) = fjy2 - ty
516 faction(j3+5) = fjz2 - tz
518 C Load parameters for j atom
520 rinvsq = rinv43*rinv43
522 C Coulomb interaction
525 fscal = (vcoul)*rinvsq
527 C Calculate temporary vectorial force
532 C Increment i atom force
537 C Decrement j atom force
538 faction(j3+6) = fjx3 - tx
539 faction(j3+7) = fjy3 - ty
540 faction(j3+8) = fjz3 - tz
542 C Load parameters for j atom
544 rinvsq = rinv44*rinv44
546 C Coulomb interaction
549 fscal = (vcoul)*rinvsq
551 C Calculate temporary vectorial force
556 C Increment i atom force
561 C Decrement j atom force
562 faction(j3+9) = fjx4 - tx
563 faction(j3+10) = fjy4 - ty
564 faction(j3+11) = fjz4 - tz
566 C Inner loop uses 243 flops/iteration
570 C Add i forces to mem and shifted force list
571 faction(ii3+3) = faction(ii3+3) + fix2
572 faction(ii3+4) = faction(ii3+4) + fiy2
573 faction(ii3+5) = faction(ii3+5) + fiz2
574 faction(ii3+6) = faction(ii3+6) + fix3
575 faction(ii3+7) = faction(ii3+7) + fiy3
576 faction(ii3+8) = faction(ii3+8) + fiz3
577 faction(ii3+9) = faction(ii3+9) + fix4
578 faction(ii3+10) = faction(ii3+10) + fiy4
579 faction(ii3+11) = faction(ii3+11) + fiz4
580 fshift(is3) = fshift(is3)+fix2+fix3+fix4
581 fshift(is3+1) = fshift(is3+1)+fiy2+fiy3+fiy4
582 fshift(is3+2) = fshift(is3+2)+fiz2+fiz3+fiz4
584 C Add potential energies to the group for this list
586 Vc(ggid) = Vc(ggid) + vctot
588 C Increment number of inner iterations
589 ninner = ninner + nj1 - nj0
591 C Outer loop uses 28 flops/iteration
595 C Increment number of outer iterations
596 nouter = nouter + nn1 - nn0
597 if(nn1.lt.nri) goto 10
599 C Write outer/inner iteration count to pointers
611 C Gromacs nonbonded kernel pwr6kernel104nf
612 C Coulomb interaction: Normal Coulomb
613 C VdW interaction: Not calculated
614 C water optimization: pairs of TIP4P interactions
615 C Calculate forces: no
617 subroutine pwr6kernel104nf(
650 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
651 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
652 integer*4 gid(*),type(*),ntype
653 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
654 gmxreal Vvdw(*),tabscale,VFtab(*)
655 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
656 integer*4 nthreads,count,mtx,outeriter,inneriter
659 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
660 integer*4 nn0,nn1,nouter,ninner
662 gmxreal qq,vcoul,vctot
669 gmxreal dx22,dy22,dz22,rsq22,rinv22
670 gmxreal dx23,dy23,dz23,rsq23,rinv23
671 gmxreal dx24,dy24,dz24,rsq24,rinv24
672 gmxreal dx32,dy32,dz32,rsq32,rinv32
673 gmxreal dx33,dy33,dz33,rsq33,rinv33
674 gmxreal dx34,dy34,dz34,rsq34,rinv34
675 gmxreal dx42,dy42,dz42,rsq42,rinv42
676 gmxreal dx43,dy43,dz43,rsq43,rinv43
677 gmxreal dx44,dy44,dz44,rsq44,rinv44
678 gmxreal qH,qM,qqMM,qqMH,qqHH
681 C Initialize water data
690 C Reset outer and inner iteration counters
694 C Loop over thread workunits
695 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
696 if(nn1.gt.nri) nn1=nri
698 C Start outer loop over neighborlists
702 C Load shift vector for this list
705 shY = shiftvec(is3+1)
706 shZ = shiftvec(is3+2)
708 C Load limits for loop over neighbors
712 C Get outer coordinate index
716 C Load i atom data, add shift vector
717 ix2 = shX + pos(ii3+3)
718 iy2 = shY + pos(ii3+4)
719 iz2 = shZ + pos(ii3+5)
720 ix3 = shX + pos(ii3+6)
721 iy3 = shY + pos(ii3+7)
722 iz3 = shZ + pos(ii3+8)
723 ix4 = shX + pos(ii3+9)
724 iy4 = shY + pos(ii3+10)
725 iz4 = shZ + pos(ii3+11)
727 C Zero the potential energy for this list
730 C Clear i atom forces
734 C Get j neighbor index, and coordinate index
738 C load j atom coordinates
753 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
757 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
761 rsq24 = dx24*dx24+dy24*dy24+dz24*dz24
765 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
769 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
773 rsq34 = dx34*dx34+dy34*dy34+dz34*dz34
777 rsq42 = dx42*dx42+dy42*dy42+dz42*dz42
781 rsq43 = dx43*dx43+dy43*dy43+dz43*dz43
785 rsq44 = dx44*dx44+dy44*dy44+dz44*dz44
787 C Calculate 1/r and 1/r2
789 C PowerPC intrinsics 1/sqrt lookup table
791 rinv22 = frsqrtes(rsq22)
793 rinv22 = frsqrte(dble(rsq22))
795 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
798 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
802 C PowerPC intrinsics 1/sqrt lookup table
804 rinv23 = frsqrtes(rsq23)
806 rinv23 = frsqrte(dble(rsq23))
808 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
811 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
815 C PowerPC intrinsics 1/sqrt lookup table
817 rinv24 = frsqrtes(rsq24)
819 rinv24 = frsqrte(dble(rsq24))
821 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
824 rinv24 = (0.5*rinv24*(3.0-((rsq24*rinv24)
828 C PowerPC intrinsics 1/sqrt lookup table
830 rinv32 = frsqrtes(rsq32)
832 rinv32 = frsqrte(dble(rsq32))
834 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
837 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
841 C PowerPC intrinsics 1/sqrt lookup table
843 rinv33 = frsqrtes(rsq33)
845 rinv33 = frsqrte(dble(rsq33))
847 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
850 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
854 C PowerPC intrinsics 1/sqrt lookup table
856 rinv34 = frsqrtes(rsq34)
858 rinv34 = frsqrte(dble(rsq34))
860 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
863 rinv34 = (0.5*rinv34*(3.0-((rsq34*rinv34)
867 C PowerPC intrinsics 1/sqrt lookup table
869 rinv42 = frsqrtes(rsq42)
871 rinv42 = frsqrte(dble(rsq42))
873 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
876 rinv42 = (0.5*rinv42*(3.0-((rsq42*rinv42)
880 C PowerPC intrinsics 1/sqrt lookup table
882 rinv43 = frsqrtes(rsq43)
884 rinv43 = frsqrte(dble(rsq43))
886 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
889 rinv43 = (0.5*rinv43*(3.0-((rsq43*rinv43)
893 C PowerPC intrinsics 1/sqrt lookup table
895 rinv44 = frsqrtes(rsq44)
897 rinv44 = frsqrte(dble(rsq44))
899 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
902 rinv44 = (0.5*rinv44*(3.0-((rsq44*rinv44)
906 C Load parameters for j atom
909 C Coulomb interaction
913 C Load parameters for j atom
916 C Coulomb interaction
920 C Load parameters for j atom
923 C Coulomb interaction
927 C Load parameters for j atom
930 C Coulomb interaction
934 C Load parameters for j atom
937 C Coulomb interaction
941 C Load parameters for j atom
944 C Coulomb interaction
948 C Load parameters for j atom
951 C Coulomb interaction
955 C Load parameters for j atom
958 C Coulomb interaction
962 C Load parameters for j atom
965 C Coulomb interaction
969 C Inner loop uses 144 flops/iteration
973 C Add i forces to mem and shifted force list
975 C Add potential energies to the group for this list
977 Vc(ggid) = Vc(ggid) + vctot
979 C Increment number of inner iterations
980 ninner = ninner + nj1 - nj0
982 C Outer loop uses 10 flops/iteration
986 C Increment number of outer iterations
987 nouter = nouter + nn1 - nn0
988 if(nn1.lt.nri) goto 10
990 C Write outer/inner iteration count to pointers