Fixed synchronization counter update for pwr6 kernels
[gromacs/adressmacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel131.F
blobc0dbe2f80c3117fa6079107ddf3888c42c3404e0
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel131
45 C Coulomb interaction:     Normal Coulomb
46 C VdW interaction:         Tabulated
47 C water optimization:      SPC/TIP3P - other atoms
48 C Calculate forces:        yes
50       subroutine pwr6kernel131(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       jq
98       gmxreal       qq,vcoul,vctot
99       integer*4     nti
100       integer*4     tj
101       gmxreal       Vvdw6,Vvdwtot
102       gmxreal       Vvdw12
103       gmxreal       r,rt,eps,eps2
104       integer*4     n0,nnn
105       gmxreal       Y,F,Geps,Heps2,Fp,VV
106       gmxreal       FF
107       gmxreal       fijD,fijR
108       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
109       gmxreal       ix2,iy2,iz2,fix2,fiy2,fiz2
110       gmxreal       ix3,iy3,iz3,fix3,fiy3,fiz3
111       gmxreal       jx1,jy1,jz1,fjx1,fjy1,fjz1
112       gmxreal       dx11,dy11,dz11,rsq11,rinv11
113       gmxreal       dx21,dy21,dz21,rsq21,rinv21
114       gmxreal       dx31,dy31,dz31,rsq31,rinv31
115       gmxreal       qO,qH
116       gmxreal       c6,c12
119 C    Initialize water data
120       ii               = iinr(1)+1       
121       qO               = facel*charge(ii)
122       qH               = facel*charge(ii+1)
123       nti              = 2*ntype*type(ii)
126 C    Reset outer and inner iteration counters
127       nouter           = 0               
128       ninner           = 0               
130 C    Loop over thread workunits
131    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
132         if(nn1.gt.nri) nn1=nri
134 C      Start outer loop over neighborlists
135         
136         do n=nn0+1,nn1
138 C        Load shift vector for this list
139           is3              = 3*shift(n)+1    
140           shX              = shiftvec(is3)   
141           shY              = shiftvec(is3+1) 
142           shZ              = shiftvec(is3+2) 
144 C        Load limits for loop over neighbors
145           nj0              = jindex(n)+1     
146           nj1              = jindex(n+1)     
148 C        Get outer coordinate index
149           ii               = iinr(n)+1       
150           ii3              = 3*ii-2          
152 C        Load i atom data, add shift vector
153           ix1              = shX + pos(ii3+0)
154           iy1              = shY + pos(ii3+1)
155           iz1              = shZ + pos(ii3+2)
156           ix2              = shX + pos(ii3+3)
157           iy2              = shY + pos(ii3+4)
158           iz2              = shZ + pos(ii3+5)
159           ix3              = shX + pos(ii3+6)
160           iy3              = shY + pos(ii3+7)
161           iz3              = shZ + pos(ii3+8)
163 C        Zero the potential energy for this list
164           vctot            = 0               
165           Vvdwtot          = 0               
167 C        Clear i atom forces
168           fix1             = 0               
169           fiy1             = 0               
170           fiz1             = 0               
171           fix2             = 0               
172           fiy2             = 0               
173           fiz2             = 0               
174           fix3             = 0               
175           fiy3             = 0               
176           fiz3             = 0               
177           
178           do k=nj0,nj1
180 C          Get j neighbor index, and coordinate index
181             jnr              = jjnr(k)+1       
182             j3               = 3*jnr-2         
184 C          load j atom coordinates
185             jx1              = pos(j3+0)       
186             jy1              = pos(j3+1)       
187             jz1              = pos(j3+2)       
189 C          Calculate distance
190             dx11             = ix1 - jx1       
191             dy11             = iy1 - jy1       
192             dz11             = iz1 - jz1       
193             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
194             dx21             = ix2 - jx1       
195             dy21             = iy2 - jy1       
196             dz21             = iz2 - jz1       
197             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21
198             dx31             = ix3 - jx1       
199             dy31             = iy3 - jy1       
200             dz31             = iz3 - jz1       
201             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31
203 C          Calculate 1/r and 1/r2
205 C          PowerPC intrinsics 1/sqrt lookup table
206 #ifndef GMX_BLUEGENE
207             rinv11           = frsqrtes(rsq11) 
208 #else
209             rinv11           = frsqrte(dble(rsq11)) 
210 #endif
211             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
212      &  *rinv11)))
213 #ifdef GMX_DOUBLE
214             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
215      &  *rinv11)))
216 #endif
218 C          PowerPC intrinsics 1/sqrt lookup table
219 #ifndef GMX_BLUEGENE
220             rinv21           = frsqrtes(rsq21) 
221 #else
222             rinv21           = frsqrte(dble(rsq21)) 
223 #endif
224             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
225      &  *rinv21)))
226 #ifdef GMX_DOUBLE
227             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
228      &  *rinv21)))
229 #endif
231 C          PowerPC intrinsics 1/sqrt lookup table
232 #ifndef GMX_BLUEGENE
233             rinv31           = frsqrtes(rsq31) 
234 #else
235             rinv31           = frsqrte(dble(rsq31)) 
236 #endif
237             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
238      &  *rinv31)))
239 #ifdef GMX_DOUBLE
240             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
241      &  *rinv31)))
242 #endif
244 C          Load parameters for j atom
245             jq               = charge(jnr+0)   
246             qq               = qO*jq           
247             tj               = nti+2*type(jnr)+1
248             c6               = vdwparam(tj)    
249             c12              = vdwparam(tj+1)  
250             rinvsq           = rinv11*rinv11   
252 C          Coulomb interaction
253             vcoul            = qq*rinv11       
254             vctot            = vctot+vcoul     
256 C          Calculate table index
257             r                = rsq11*rinv11    
259 C          Calculate table index
260             rt               = r*tabscale      
261             n0               = rt              
262             eps              = rt-n0           
263             eps2             = eps*eps         
264             nnn              = 8*n0+1          
266 C          Tabulated VdW interaction - dispersion
267             Y                = VFtab(nnn)      
268             F                = VFtab(nnn+1)    
269             Geps             = eps*VFtab(nnn+2)
270             Heps2            = eps2*VFtab(nnn+3)
271             Fp               = F+Geps+Heps2    
272             VV               = Y+eps*Fp        
273             FF               = Fp+Geps+2.0*Heps2
274             Vvdw6            = c6*VV           
275             fijD             = c6*FF           
277 C          Tabulated VdW interaction - repulsion
278             nnn              = nnn+4           
279             Y                = VFtab(nnn)      
280             F                = VFtab(nnn+1)    
281             Geps             = eps*VFtab(nnn+2)
282             Heps2            = eps2*VFtab(nnn+3)
283             Fp               = F+Geps+Heps2    
284             VV               = Y+eps*Fp        
285             FF               = Fp+Geps+2.0*Heps2
286             Vvdw12           = c12*VV          
287             fijR             = c12*FF          
288             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
289             fscal            = (vcoul)*rinvsq-((fijD+fijR)
290      &  *tabscale)*rinv11
292 C          Calculate temporary vectorial force
293             tx               = fscal*dx11      
294             ty               = fscal*dy11      
295             tz               = fscal*dz11      
297 C          Increment i atom force
298             fix1             = fix1 + tx       
299             fiy1             = fiy1 + ty       
300             fiz1             = fiz1 + tz       
302 C          Decrement j atom force
303             fjx1             = faction(j3+0) - tx
304             fjy1             = faction(j3+1) - ty
305             fjz1             = faction(j3+2) - tz
307 C          Load parameters for j atom
308             qq               = qH*jq           
309             rinvsq           = rinv21*rinv21   
311 C          Coulomb interaction
312             vcoul            = qq*rinv21       
313             vctot            = vctot+vcoul     
314             fscal            = (vcoul)*rinvsq  
316 C          Calculate temporary vectorial force
317             tx               = fscal*dx21      
318             ty               = fscal*dy21      
319             tz               = fscal*dz21      
321 C          Increment i atom force
322             fix2             = fix2 + tx       
323             fiy2             = fiy2 + ty       
324             fiz2             = fiz2 + tz       
326 C          Decrement j atom force
327             fjx1             = fjx1 - tx       
328             fjy1             = fjy1 - ty       
329             fjz1             = fjz1 - tz       
331 C          Load parameters for j atom
332             rinvsq           = rinv31*rinv31   
334 C          Coulomb interaction
335             vcoul            = qq*rinv31       
336             vctot            = vctot+vcoul     
337             fscal            = (vcoul)*rinvsq  
339 C          Calculate temporary vectorial force
340             tx               = fscal*dx31      
341             ty               = fscal*dy31      
342             tz               = fscal*dz31      
344 C          Increment i atom force
345             fix3             = fix3 + tx       
346             fiy3             = fiy3 + ty       
347             fiz3             = fiz3 + tz       
349 C          Decrement j atom force
350             faction(j3+0)    = fjx1 - tx       
351             faction(j3+1)    = fjy1 - ty       
352             faction(j3+2)    = fjz1 - tz       
354 C          Inner loop uses 115 flops/iteration
355           end do
356           
358 C        Add i forces to mem and shifted force list
359           faction(ii3+0)   = faction(ii3+0) + fix1
360           faction(ii3+1)   = faction(ii3+1) + fiy1
361           faction(ii3+2)   = faction(ii3+2) + fiz1
362           faction(ii3+3)   = faction(ii3+3) + fix2
363           faction(ii3+4)   = faction(ii3+4) + fiy2
364           faction(ii3+5)   = faction(ii3+5) + fiz2
365           faction(ii3+6)   = faction(ii3+6) + fix3
366           faction(ii3+7)   = faction(ii3+7) + fiy3
367           faction(ii3+8)   = faction(ii3+8) + fiz3
368           fshift(is3)      = fshift(is3)+fix1+fix2+fix3
369           fshift(is3+1)    = fshift(is3+1)+fiy1+fiy2+fiy3
370           fshift(is3+2)    = fshift(is3+2)+fiz1+fiz2+fiz3
372 C        Add potential energies to the group for this list
373           ggid             = gid(n)+1        
374           Vc(ggid)         = Vc(ggid) + vctot
375           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
377 C        Increment number of inner iterations
378           ninner           = ninner + nj1 - nj0
380 C        Outer loop uses 29 flops/iteration
381         end do
382         
384 C      Increment number of outer iterations
385         nouter           = nouter + nn1 - nn0
386       if(nn1.lt.nri) goto 10
388 C    Write outer/inner iteration count to pointers
389       outeriter        = nouter          
390       inneriter        = ninner          
391       return
392       end
400 C Gromacs nonbonded kernel pwr6kernel131nf
401 C Coulomb interaction:     Normal Coulomb
402 C VdW interaction:         Tabulated
403 C water optimization:      SPC/TIP3P - other atoms
404 C Calculate forces:        no
406       subroutine pwr6kernel131nf(
407      &                          nri,
408      &                          iinr,
409      &                          jindex,
410      &                          jjnr,
411      &                          shift,
412      &                          shiftvec,
413      &                          fshift,
414      &                          gid,
415      &                          pos,
416      &                          faction,
417      &                          charge,
418      &                          facel,
419      &                          krf,
420      &                          crf,
421      &                          Vc,
422      &                          type,
423      &                          ntype,
424      &                          vdwparam,
425      &                          Vvdw,
426      &                          tabscale,
427      &                          VFtab,
428      &                          invsqrta,
429      &                          dvda,
430      &                          gbtabscale,
431      &                          GBtab,
432      &                          nthreads,
433      &                          count,
434      &                          mtx,
435      &                          outeriter,
436      &                          inneriter,
437      &                          work)
438       implicit      none
439       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
440       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
441       integer*4     gid(*),type(*),ntype
442       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
443       gmxreal       Vvdw(*),tabscale,VFtab(*)
444       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
445       integer*4     nthreads,count,mtx,outeriter,inneriter
446       gmxreal       work(*)
448       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
449       integer*4     nn0,nn1,nouter,ninner
450       gmxreal       shX,shY,shZ
451       gmxreal       jq
452       gmxreal       qq,vcoul,vctot
453       integer*4     nti
454       integer*4     tj
455       gmxreal       Vvdw6,Vvdwtot
456       gmxreal       Vvdw12
457       gmxreal       r,rt,eps,eps2
458       integer*4     n0,nnn
459       gmxreal       Y,F,Geps,Heps2,Fp,VV
460       gmxreal       ix1,iy1,iz1
461       gmxreal       ix2,iy2,iz2
462       gmxreal       ix3,iy3,iz3
463       gmxreal       jx1,jy1,jz1
464       gmxreal       dx11,dy11,dz11,rsq11,rinv11
465       gmxreal       dx21,dy21,dz21,rsq21,rinv21
466       gmxreal       dx31,dy31,dz31,rsq31,rinv31
467       gmxreal       qO,qH
468       gmxreal       c6,c12
471 C    Initialize water data
472       ii               = iinr(1)+1       
473       qO               = facel*charge(ii)
474       qH               = facel*charge(ii+1)
475       nti              = 2*ntype*type(ii)
478 C    Reset outer and inner iteration counters
479       nouter           = 0               
480       ninner           = 0               
482 C    Loop over thread workunits
483    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
484         if(nn1.gt.nri) nn1=nri
486 C      Start outer loop over neighborlists
487         
488         do n=nn0+1,nn1
490 C        Load shift vector for this list
491           is3              = 3*shift(n)+1    
492           shX              = shiftvec(is3)   
493           shY              = shiftvec(is3+1) 
494           shZ              = shiftvec(is3+2) 
496 C        Load limits for loop over neighbors
497           nj0              = jindex(n)+1     
498           nj1              = jindex(n+1)     
500 C        Get outer coordinate index
501           ii               = iinr(n)+1       
502           ii3              = 3*ii-2          
504 C        Load i atom data, add shift vector
505           ix1              = shX + pos(ii3+0)
506           iy1              = shY + pos(ii3+1)
507           iz1              = shZ + pos(ii3+2)
508           ix2              = shX + pos(ii3+3)
509           iy2              = shY + pos(ii3+4)
510           iz2              = shZ + pos(ii3+5)
511           ix3              = shX + pos(ii3+6)
512           iy3              = shY + pos(ii3+7)
513           iz3              = shZ + pos(ii3+8)
515 C        Zero the potential energy for this list
516           vctot            = 0               
517           Vvdwtot          = 0               
519 C        Clear i atom forces
520           
521           do k=nj0,nj1
523 C          Get j neighbor index, and coordinate index
524             jnr              = jjnr(k)+1       
525             j3               = 3*jnr-2         
527 C          load j atom coordinates
528             jx1              = pos(j3+0)       
529             jy1              = pos(j3+1)       
530             jz1              = pos(j3+2)       
532 C          Calculate distance
533             dx11             = ix1 - jx1       
534             dy11             = iy1 - jy1       
535             dz11             = iz1 - jz1       
536             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
537             dx21             = ix2 - jx1       
538             dy21             = iy2 - jy1       
539             dz21             = iz2 - jz1       
540             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21
541             dx31             = ix3 - jx1       
542             dy31             = iy3 - jy1       
543             dz31             = iz3 - jz1       
544             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31
546 C          Calculate 1/r and 1/r2
548 C          PowerPC intrinsics 1/sqrt lookup table
549 #ifndef GMX_BLUEGENE
550             rinv11           = frsqrtes(rsq11) 
551 #else
552             rinv11           = frsqrte(dble(rsq11)) 
553 #endif
554             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
555      &  *rinv11)))
556 #ifdef GMX_DOUBLE
557             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
558      &  *rinv11)))
559 #endif
561 C          PowerPC intrinsics 1/sqrt lookup table
562 #ifndef GMX_BLUEGENE
563             rinv21           = frsqrtes(rsq21) 
564 #else
565             rinv21           = frsqrte(dble(rsq21)) 
566 #endif
567             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
568      &  *rinv21)))
569 #ifdef GMX_DOUBLE
570             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
571      &  *rinv21)))
572 #endif
574 C          PowerPC intrinsics 1/sqrt lookup table
575 #ifndef GMX_BLUEGENE
576             rinv31           = frsqrtes(rsq31) 
577 #else
578             rinv31           = frsqrte(dble(rsq31)) 
579 #endif
580             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
581      &  *rinv31)))
582 #ifdef GMX_DOUBLE
583             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
584      &  *rinv31)))
585 #endif
587 C          Load parameters for j atom
588             jq               = charge(jnr+0)   
589             qq               = qO*jq           
590             tj               = nti+2*type(jnr)+1
591             c6               = vdwparam(tj)    
592             c12              = vdwparam(tj+1)  
594 C          Coulomb interaction
595             vcoul            = qq*rinv11       
596             vctot            = vctot+vcoul     
598 C          Calculate table index
599             r                = rsq11*rinv11    
601 C          Calculate table index
602             rt               = r*tabscale      
603             n0               = rt              
604             eps              = rt-n0           
605             eps2             = eps*eps         
606             nnn              = 8*n0+1          
608 C          Tabulated VdW interaction - dispersion
609             Y                = VFtab(nnn)      
610             F                = VFtab(nnn+1)    
611             Geps             = eps*VFtab(nnn+2)
612             Heps2            = eps2*VFtab(nnn+3)
613             Fp               = F+Geps+Heps2    
614             VV               = Y+eps*Fp        
615             Vvdw6            = c6*VV           
617 C          Tabulated VdW interaction - repulsion
618             nnn              = nnn+4           
619             Y                = VFtab(nnn)      
620             F                = VFtab(nnn+1)    
621             Geps             = eps*VFtab(nnn+2)
622             Heps2            = eps2*VFtab(nnn+3)
623             Fp               = F+Geps+Heps2    
624             VV               = Y+eps*Fp        
625             Vvdw12           = c12*VV          
626             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
628 C          Load parameters for j atom
629             qq               = qH*jq           
631 C          Coulomb interaction
632             vcoul            = qq*rinv21       
633             vctot            = vctot+vcoul     
635 C          Load parameters for j atom
637 C          Coulomb interaction
638             vcoul            = qq*rinv31       
639             vctot            = vctot+vcoul     
641 C          Inner loop uses 70 flops/iteration
642           end do
643           
645 C        Add i forces to mem and shifted force list
647 C        Add potential energies to the group for this list
648           ggid             = gid(n)+1        
649           Vc(ggid)         = Vc(ggid) + vctot
650           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
652 C        Increment number of inner iterations
653           ninner           = ninner + nj1 - nj0
655 C        Outer loop uses 11 flops/iteration
656         end do
657         
659 C      Increment number of outer iterations
660         nouter           = nouter + nn1 - nn0
661       if(nn1.lt.nri) goto 10
663 C    Write outer/inner iteration count to pointers
664       outeriter        = nouter          
665       inneriter        = ninner          
666       return
667       end