2 C This source code is part of
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
36 # define gmxreal real*8
38 # define gmxreal real*4
44 C Gromacs nonbonded kernel pwr6kernel202
45 C Coulomb interaction: Reaction field
46 C VdW interaction: Not calculated
47 C water optimization: pairs of SPC/TIP3P interactions
48 C Calculate forces: yes
50 subroutine pwr6kernel202(
83 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
84 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
85 integer*4 gid(*),type(*),ntype
86 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87 gmxreal Vvdw(*),tabscale,VFtab(*)
88 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89 integer*4 nthreads,count,mtx,outeriter,inneriter
92 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93 integer*4 nn0,nn1,nouter,ninner
95 gmxreal fscal,tx,ty,tz
97 gmxreal qq,vcoul,vctot
99 gmxreal ix1,iy1,iz1,fix1,fiy1,fiz1
100 gmxreal ix2,iy2,iz2,fix2,fiy2,fiz2
101 gmxreal ix3,iy3,iz3,fix3,fiy3,fiz3
102 gmxreal jx1,jy1,jz1,fjx1,fjy1,fjz1
103 gmxreal jx2,jy2,jz2,fjx2,fjy2,fjz2
104 gmxreal jx3,jy3,jz3,fjx3,fjy3,fjz3
105 gmxreal dx11,dy11,dz11,rsq11,rinv11
106 gmxreal dx12,dy12,dz12,rsq12,rinv12
107 gmxreal dx13,dy13,dz13,rsq13,rinv13
108 gmxreal dx21,dy21,dz21,rsq21,rinv21
109 gmxreal dx22,dy22,dz22,rsq22,rinv22
110 gmxreal dx23,dy23,dz23,rsq23,rinv23
111 gmxreal dx31,dy31,dz31,rsq31,rinv31
112 gmxreal dx32,dy32,dz32,rsq32,rinv32
113 gmxreal dx33,dy33,dz33,rsq33,rinv33
114 gmxreal qO,qH,qqOO,qqOH,qqHH
117 C Initialize water data
126 C Reset outer and inner iteration counters
130 C Loop over thread workunits
131 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
132 if(nn1.gt.nri) nn1=nri
134 C Start outer loop over neighborlists
138 C Load shift vector for this list
141 shY = shiftvec(is3+1)
142 shZ = shiftvec(is3+2)
144 C Load limits for loop over neighbors
148 C Get outer coordinate index
152 C Load i atom data, add shift vector
153 ix1 = shX + pos(ii3+0)
154 iy1 = shY + pos(ii3+1)
155 iz1 = shZ + pos(ii3+2)
156 ix2 = shX + pos(ii3+3)
157 iy2 = shY + pos(ii3+4)
158 iz2 = shZ + pos(ii3+5)
159 ix3 = shX + pos(ii3+6)
160 iy3 = shY + pos(ii3+7)
161 iz3 = shZ + pos(ii3+8)
163 C Zero the potential energy for this list
166 C Clear i atom forces
179 C Get j neighbor index, and coordinate index
183 C load j atom coordinates
198 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
202 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12
206 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13
210 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
214 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
218 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
222 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
226 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
230 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
232 C Calculate 1/r and 1/r2
234 C PowerPC intrinsics 1/sqrt lookup table
236 rinv11 = frsqrtes(rsq11)
238 rinv11 = frsqrte(dble(rsq11))
240 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
243 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
247 C PowerPC intrinsics 1/sqrt lookup table
249 rinv12 = frsqrtes(rsq12)
251 rinv12 = frsqrte(dble(rsq12))
253 rinv12 = (0.5*rinv12*(3.0-((rsq12*rinv12)
256 rinv12 = (0.5*rinv12*(3.0-((rsq12*rinv12)
260 C PowerPC intrinsics 1/sqrt lookup table
262 rinv13 = frsqrtes(rsq13)
264 rinv13 = frsqrte(dble(rsq13))
266 rinv13 = (0.5*rinv13*(3.0-((rsq13*rinv13)
269 rinv13 = (0.5*rinv13*(3.0-((rsq13*rinv13)
273 C PowerPC intrinsics 1/sqrt lookup table
275 rinv21 = frsqrtes(rsq21)
277 rinv21 = frsqrte(dble(rsq21))
279 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
282 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
286 C PowerPC intrinsics 1/sqrt lookup table
288 rinv22 = frsqrtes(rsq22)
290 rinv22 = frsqrte(dble(rsq22))
292 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
295 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
299 C PowerPC intrinsics 1/sqrt lookup table
301 rinv23 = frsqrtes(rsq23)
303 rinv23 = frsqrte(dble(rsq23))
305 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
308 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
312 C PowerPC intrinsics 1/sqrt lookup table
314 rinv31 = frsqrtes(rsq31)
316 rinv31 = frsqrte(dble(rsq31))
318 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
321 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
325 C PowerPC intrinsics 1/sqrt lookup table
327 rinv32 = frsqrtes(rsq32)
329 rinv32 = frsqrte(dble(rsq32))
331 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
334 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
338 C PowerPC intrinsics 1/sqrt lookup table
340 rinv33 = frsqrtes(rsq33)
342 rinv33 = frsqrte(dble(rsq33))
344 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
347 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
351 C Load parameters for j atom
353 rinvsq = rinv11*rinv11
355 C Coulomb reaction-field interaction
357 vcoul = qq*(rinv11+krsq-crf)
359 fscal = (qq*(rinv11-2.0*krsq))*rinvsq
361 C Calculate temporary vectorial force
366 C Increment i atom force
371 C Decrement j atom force
372 fjx1 = faction(j3+0) - tx
373 fjy1 = faction(j3+1) - ty
374 fjz1 = faction(j3+2) - tz
376 C Load parameters for j atom
378 rinvsq = rinv12*rinv12
380 C Coulomb reaction-field interaction
382 vcoul = qq*(rinv12+krsq-crf)
384 fscal = (qq*(rinv12-2.0*krsq))*rinvsq
386 C Calculate temporary vectorial force
391 C Increment i atom force
396 C Decrement j atom force
397 fjx2 = faction(j3+3) - tx
398 fjy2 = faction(j3+4) - ty
399 fjz2 = faction(j3+5) - tz
401 C Load parameters for j atom
403 rinvsq = rinv13*rinv13
405 C Coulomb reaction-field interaction
407 vcoul = qq*(rinv13+krsq-crf)
409 fscal = (qq*(rinv13-2.0*krsq))*rinvsq
411 C Calculate temporary vectorial force
416 C Increment i atom force
421 C Decrement j atom force
422 fjx3 = faction(j3+6) - tx
423 fjy3 = faction(j3+7) - ty
424 fjz3 = faction(j3+8) - tz
426 C Load parameters for j atom
428 rinvsq = rinv21*rinv21
430 C Coulomb reaction-field interaction
432 vcoul = qq*(rinv21+krsq-crf)
434 fscal = (qq*(rinv21-2.0*krsq))*rinvsq
436 C Calculate temporary vectorial force
441 C Increment i atom force
446 C Decrement j atom force
451 C Load parameters for j atom
453 rinvsq = rinv22*rinv22
455 C Coulomb reaction-field interaction
457 vcoul = qq*(rinv22+krsq-crf)
459 fscal = (qq*(rinv22-2.0*krsq))*rinvsq
461 C Calculate temporary vectorial force
466 C Increment i atom force
471 C Decrement j atom force
476 C Load parameters for j atom
478 rinvsq = rinv23*rinv23
480 C Coulomb reaction-field interaction
482 vcoul = qq*(rinv23+krsq-crf)
484 fscal = (qq*(rinv23-2.0*krsq))*rinvsq
486 C Calculate temporary vectorial force
491 C Increment i atom force
496 C Decrement j atom force
501 C Load parameters for j atom
503 rinvsq = rinv31*rinv31
505 C Coulomb reaction-field interaction
507 vcoul = qq*(rinv31+krsq-crf)
509 fscal = (qq*(rinv31-2.0*krsq))*rinvsq
511 C Calculate temporary vectorial force
516 C Increment i atom force
521 C Decrement j atom force
522 faction(j3+0) = fjx1 - tx
523 faction(j3+1) = fjy1 - ty
524 faction(j3+2) = fjz1 - tz
526 C Load parameters for j atom
528 rinvsq = rinv32*rinv32
530 C Coulomb reaction-field interaction
532 vcoul = qq*(rinv32+krsq-crf)
534 fscal = (qq*(rinv32-2.0*krsq))*rinvsq
536 C Calculate temporary vectorial force
541 C Increment i atom force
546 C Decrement j atom force
547 faction(j3+3) = fjx2 - tx
548 faction(j3+4) = fjy2 - ty
549 faction(j3+5) = fjz2 - tz
551 C Load parameters for j atom
553 rinvsq = rinv33*rinv33
555 C Coulomb reaction-field interaction
557 vcoul = qq*(rinv33+krsq-crf)
559 fscal = (qq*(rinv33-2.0*krsq))*rinvsq
561 C Calculate temporary vectorial force
566 C Increment i atom force
571 C Decrement j atom force
572 faction(j3+6) = fjx3 - tx
573 faction(j3+7) = fjy3 - ty
574 faction(j3+8) = fjz3 - tz
576 C Inner loop uses 297 flops/iteration
580 C Add i forces to mem and shifted force list
581 faction(ii3+0) = faction(ii3+0) + fix1
582 faction(ii3+1) = faction(ii3+1) + fiy1
583 faction(ii3+2) = faction(ii3+2) + fiz1
584 faction(ii3+3) = faction(ii3+3) + fix2
585 faction(ii3+4) = faction(ii3+4) + fiy2
586 faction(ii3+5) = faction(ii3+5) + fiz2
587 faction(ii3+6) = faction(ii3+6) + fix3
588 faction(ii3+7) = faction(ii3+7) + fiy3
589 faction(ii3+8) = faction(ii3+8) + fiz3
590 fshift(is3) = fshift(is3)+fix1+fix2+fix3
591 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3
592 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3
594 C Add potential energies to the group for this list
596 Vc(ggid) = Vc(ggid) + vctot
598 C Increment number of inner iterations
599 ninner = ninner + nj1 - nj0
601 C Outer loop uses 28 flops/iteration
605 C Increment number of outer iterations
606 nouter = nouter + nn1 - nn0
607 if(nn1.lt.nri) goto 10
609 C Write outer/inner iteration count to pointers
621 C Gromacs nonbonded kernel pwr6kernel202nf
622 C Coulomb interaction: Reaction field
623 C VdW interaction: Not calculated
624 C water optimization: pairs of SPC/TIP3P interactions
625 C Calculate forces: no
627 subroutine pwr6kernel202nf(
660 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
661 gmxreal shiftvec(*),fshift(*),pos(*),faction(*)
662 integer*4 gid(*),type(*),ntype
663 gmxreal charge(*),facel,krf,crf,Vc(*),vdwparam(*)
664 gmxreal Vvdw(*),tabscale,VFtab(*)
665 gmxreal invsqrta(*),dvda(*),gbtabscale,GBtab(*)
666 integer*4 nthreads,count,mtx,outeriter,inneriter
669 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
670 integer*4 nn0,nn1,nouter,ninner
672 gmxreal qq,vcoul,vctot
680 gmxreal dx11,dy11,dz11,rsq11,rinv11
681 gmxreal dx12,dy12,dz12,rsq12,rinv12
682 gmxreal dx13,dy13,dz13,rsq13,rinv13
683 gmxreal dx21,dy21,dz21,rsq21,rinv21
684 gmxreal dx22,dy22,dz22,rsq22,rinv22
685 gmxreal dx23,dy23,dz23,rsq23,rinv23
686 gmxreal dx31,dy31,dz31,rsq31,rinv31
687 gmxreal dx32,dy32,dz32,rsq32,rinv32
688 gmxreal dx33,dy33,dz33,rsq33,rinv33
689 gmxreal qO,qH,qqOO,qqOH,qqHH
692 C Initialize water data
701 C Reset outer and inner iteration counters
705 C Loop over thread workunits
706 10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
707 if(nn1.gt.nri) nn1=nri
709 C Start outer loop over neighborlists
713 C Load shift vector for this list
716 shY = shiftvec(is3+1)
717 shZ = shiftvec(is3+2)
719 C Load limits for loop over neighbors
723 C Get outer coordinate index
727 C Load i atom data, add shift vector
728 ix1 = shX + pos(ii3+0)
729 iy1 = shY + pos(ii3+1)
730 iz1 = shZ + pos(ii3+2)
731 ix2 = shX + pos(ii3+3)
732 iy2 = shY + pos(ii3+4)
733 iz2 = shZ + pos(ii3+5)
734 ix3 = shX + pos(ii3+6)
735 iy3 = shY + pos(ii3+7)
736 iz3 = shZ + pos(ii3+8)
738 C Zero the potential energy for this list
741 C Clear i atom forces
745 C Get j neighbor index, and coordinate index
749 C load j atom coordinates
764 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
768 rsq12 = dx12*dx12+dy12*dy12+dz12*dz12
772 rsq13 = dx13*dx13+dy13*dy13+dz13*dz13
776 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
780 rsq22 = dx22*dx22+dy22*dy22+dz22*dz22
784 rsq23 = dx23*dx23+dy23*dy23+dz23*dz23
788 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
792 rsq32 = dx32*dx32+dy32*dy32+dz32*dz32
796 rsq33 = dx33*dx33+dy33*dy33+dz33*dz33
798 C Calculate 1/r and 1/r2
800 C PowerPC intrinsics 1/sqrt lookup table
802 rinv11 = frsqrtes(rsq11)
804 rinv11 = frsqrte(dble(rsq11))
806 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
809 rinv11 = (0.5*rinv11*(3.0-((rsq11*rinv11)
813 C PowerPC intrinsics 1/sqrt lookup table
815 rinv12 = frsqrtes(rsq12)
817 rinv12 = frsqrte(dble(rsq12))
819 rinv12 = (0.5*rinv12*(3.0-((rsq12*rinv12)
822 rinv12 = (0.5*rinv12*(3.0-((rsq12*rinv12)
826 C PowerPC intrinsics 1/sqrt lookup table
828 rinv13 = frsqrtes(rsq13)
830 rinv13 = frsqrte(dble(rsq13))
832 rinv13 = (0.5*rinv13*(3.0-((rsq13*rinv13)
835 rinv13 = (0.5*rinv13*(3.0-((rsq13*rinv13)
839 C PowerPC intrinsics 1/sqrt lookup table
841 rinv21 = frsqrtes(rsq21)
843 rinv21 = frsqrte(dble(rsq21))
845 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
848 rinv21 = (0.5*rinv21*(3.0-((rsq21*rinv21)
852 C PowerPC intrinsics 1/sqrt lookup table
854 rinv22 = frsqrtes(rsq22)
856 rinv22 = frsqrte(dble(rsq22))
858 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
861 rinv22 = (0.5*rinv22*(3.0-((rsq22*rinv22)
865 C PowerPC intrinsics 1/sqrt lookup table
867 rinv23 = frsqrtes(rsq23)
869 rinv23 = frsqrte(dble(rsq23))
871 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
874 rinv23 = (0.5*rinv23*(3.0-((rsq23*rinv23)
878 C PowerPC intrinsics 1/sqrt lookup table
880 rinv31 = frsqrtes(rsq31)
882 rinv31 = frsqrte(dble(rsq31))
884 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
887 rinv31 = (0.5*rinv31*(3.0-((rsq31*rinv31)
891 C PowerPC intrinsics 1/sqrt lookup table
893 rinv32 = frsqrtes(rsq32)
895 rinv32 = frsqrte(dble(rsq32))
897 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
900 rinv32 = (0.5*rinv32*(3.0-((rsq32*rinv32)
904 C PowerPC intrinsics 1/sqrt lookup table
906 rinv33 = frsqrtes(rsq33)
908 rinv33 = frsqrte(dble(rsq33))
910 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
913 rinv33 = (0.5*rinv33*(3.0-((rsq33*rinv33)
917 C Load parameters for j atom
920 C Coulomb reaction-field interaction
922 vcoul = qq*(rinv11+krsq-crf)
925 C Load parameters for j atom
928 C Coulomb reaction-field interaction
930 vcoul = qq*(rinv12+krsq-crf)
933 C Load parameters for j atom
936 C Coulomb reaction-field interaction
938 vcoul = qq*(rinv13+krsq-crf)
941 C Load parameters for j atom
944 C Coulomb reaction-field interaction
946 vcoul = qq*(rinv21+krsq-crf)
949 C Load parameters for j atom
952 C Coulomb reaction-field interaction
954 vcoul = qq*(rinv22+krsq-crf)
957 C Load parameters for j atom
960 C Coulomb reaction-field interaction
962 vcoul = qq*(rinv23+krsq-crf)
965 C Load parameters for j atom
968 C Coulomb reaction-field interaction
970 vcoul = qq*(rinv31+krsq-crf)
973 C Load parameters for j atom
976 C Coulomb reaction-field interaction
978 vcoul = qq*(rinv32+krsq-crf)
981 C Load parameters for j atom
984 C Coulomb reaction-field interaction
986 vcoul = qq*(rinv33+krsq-crf)
989 C Inner loop uses 171 flops/iteration
993 C Add i forces to mem and shifted force list
995 C Add potential energies to the group for this list
997 Vc(ggid) = Vc(ggid) + vctot
999 C Increment number of inner iterations
1000 ninner = ninner + nj1 - nj0
1002 C Outer loop uses 10 flops/iteration
1006 C Increment number of outer iterations
1007 nouter = nouter + nn1 - nn0
1008 if(nn1.lt.nri) goto 10
1010 C Write outer/inner iteration count to pointers