Fixed synchronization counter update for pwr6 kernels
[gromacs/adressmacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel020.F
blob84d0a4eed95a867b06a3272f16be6897ed61eaa3
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel020
45 C Coulomb interaction:     Not calculated
46 C VdW interaction:         Buckingham
47 C water optimization:      No
48 C Calculate forces:        yes
50       subroutine pwr6kernel020(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       integer*4     nti
98       integer*4     tj
99       gmxreal       rinvsix
100       gmxreal       Vvdw6,Vvdwtot
101       gmxreal       Vvdwexp,br
102       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
103       gmxreal       jx1,jy1,jz1
104       gmxreal       dx11,dy11,dz11,rsq11,rinv11
105       gmxreal       c6,cexp1,cexp2
108 C    Reset outer and inner iteration counters
109       nouter           = 0               
110       ninner           = 0               
112 C    Loop over thread workunits
113    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
114         if(nn1.gt.nri) nn1=nri
116 C      Start outer loop over neighborlists
117         
118         do n=nn0+1,nn1
120 C        Load shift vector for this list
121           is3              = 3*shift(n)+1    
122           shX              = shiftvec(is3)   
123           shY              = shiftvec(is3+1) 
124           shZ              = shiftvec(is3+2) 
126 C        Load limits for loop over neighbors
127           nj0              = jindex(n)+1     
128           nj1              = jindex(n+1)     
130 C        Get outer coordinate index
131           ii               = iinr(n)+1       
132           ii3              = 3*ii-2          
134 C        Load i atom data, add shift vector
135           ix1              = shX + pos(ii3+0)
136           iy1              = shY + pos(ii3+1)
137           iz1              = shZ + pos(ii3+2)
139 C        Load parameters for i atom
140           nti              = 3*ntype*type(ii)
142 C        Zero the potential energy for this list
143           Vvdwtot          = 0               
145 C        Clear i atom forces
146           fix1             = 0               
147           fiy1             = 0               
148           fiz1             = 0               
149           
150           do k=nj0,nj1
152 C          Get j neighbor index, and coordinate index
153             jnr              = jjnr(k)+1       
154             j3               = 3*jnr-2         
156 C          load j atom coordinates
157             jx1              = pos(j3+0)       
158             jy1              = pos(j3+1)       
159             jz1              = pos(j3+2)       
161 C          Calculate distance
162             dx11             = ix1 - jx1       
163             dy11             = iy1 - jy1       
164             dz11             = iz1 - jz1       
165             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
167 C          Calculate 1/r and 1/r2
169 C          PowerPC intrinsics 1/sqrt lookup table
170 #ifndef GMX_BLUEGENE
171             rinv11           = frsqrtes(rsq11) 
172 #else
173             rinv11           = frsqrte(dble(rsq11)) 
174 #endif
175             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
176      &  *rinv11)))
177 #ifdef GMX_DOUBLE
178             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
179      &  *rinv11)))
180 #endif
182 C          Load parameters for j atom
183             tj               = nti+3*type(jnr)+1
184             c6               = vdwparam(tj)    
185             cexp1            = vdwparam(tj+1)  
186             cexp2            = vdwparam(tj+2)  
187             rinvsq           = rinv11*rinv11   
189 C          Buckingham interaction
190             rinvsix          = rinvsq*rinvsq*rinvsq
191             Vvdw6            = c6*rinvsix      
192             br               = cexp2*rsq11*rinv11
193             Vvdwexp          = cexp1*exp(-br)  
194             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
195             fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq
197 C          Calculate temporary vectorial force
198             tx               = fscal*dx11      
199             ty               = fscal*dy11      
200             tz               = fscal*dz11      
202 C          Increment i atom force
203             fix1             = fix1 + tx       
204             fiy1             = fiy1 + ty       
205             fiz1             = fiz1 + tz       
207 C          Decrement j atom force
208             faction(j3+0)    = faction(j3+0) - tx
209             faction(j3+1)    = faction(j3+1) - ty
210             faction(j3+2)    = faction(j3+2) - tz
212 C          Inner loop uses 62 flops/iteration
213           end do
214           
216 C        Add i forces to mem and shifted force list
217           faction(ii3+0)   = faction(ii3+0) + fix1
218           faction(ii3+1)   = faction(ii3+1) + fiy1
219           faction(ii3+2)   = faction(ii3+2) + fiz1
220           fshift(is3)      = fshift(is3)+fix1
221           fshift(is3+1)    = fshift(is3+1)+fiy1
222           fshift(is3+2)    = fshift(is3+2)+fiz1
224 C        Add potential energies to the group for this list
225           ggid             = gid(n)+1        
226           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
228 C        Increment number of inner iterations
229           ninner           = ninner + nj1 - nj0
231 C        Outer loop uses 10 flops/iteration
232         end do
233         
235 C      Increment number of outer iterations
236         nouter           = nouter + nn1 - nn0
237       if(nn1.lt.nri) goto 10
239 C    Write outer/inner iteration count to pointers
240       outeriter        = nouter          
241       inneriter        = ninner          
242       return
243       end
251 C Gromacs nonbonded kernel pwr6kernel020nf
252 C Coulomb interaction:     Not calculated
253 C VdW interaction:         Buckingham
254 C water optimization:      No
255 C Calculate forces:        no
257       subroutine pwr6kernel020nf(
258      &                          nri,
259      &                          iinr,
260      &                          jindex,
261      &                          jjnr,
262      &                          shift,
263      &                          shiftvec,
264      &                          fshift,
265      &                          gid,
266      &                          pos,
267      &                          faction,
268      &                          charge,
269      &                          facel,
270      &                          krf,
271      &                          crf,
272      &                          Vc,
273      &                          type,
274      &                          ntype,
275      &                          vdwparam,
276      &                          Vvdw,
277      &                          tabscale,
278      &                          VFtab,
279      &                          invsqrta,
280      &                          dvda,
281      &                          gbtabscale,
282      &                          GBtab,
283      &                          nthreads,
284      &                          count,
285      &                          mtx,
286      &                          outeriter,
287      &                          inneriter,
288      &                          work)
289       implicit      none
290       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
291       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
292       integer*4     gid(*),type(*),ntype
293       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
294       gmxreal       Vvdw(*),tabscale,VFtab(*)
295       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
296       integer*4     nthreads,count,mtx,outeriter,inneriter
297       gmxreal       work(*)
299       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
300       integer*4     nn0,nn1,nouter,ninner
301       gmxreal       shX,shY,shZ
302       gmxreal       rinvsq
303       integer*4     nti
304       integer*4     tj
305       gmxreal       rinvsix
306       gmxreal       Vvdw6,Vvdwtot
307       gmxreal       Vvdwexp,br
308       gmxreal       ix1,iy1,iz1
309       gmxreal       jx1,jy1,jz1
310       gmxreal       dx11,dy11,dz11,rsq11,rinv11
311       gmxreal       c6,cexp1,cexp2
314 C    Reset outer and inner iteration counters
315       nouter           = 0               
316       ninner           = 0               
318 C    Loop over thread workunits
319    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
320         if(nn1.gt.nri) nn1=nri
322 C      Start outer loop over neighborlists
323         
324         do n=nn0+1,nn1
326 C        Load shift vector for this list
327           is3              = 3*shift(n)+1    
328           shX              = shiftvec(is3)   
329           shY              = shiftvec(is3+1) 
330           shZ              = shiftvec(is3+2) 
332 C        Load limits for loop over neighbors
333           nj0              = jindex(n)+1     
334           nj1              = jindex(n+1)     
336 C        Get outer coordinate index
337           ii               = iinr(n)+1       
338           ii3              = 3*ii-2          
340 C        Load i atom data, add shift vector
341           ix1              = shX + pos(ii3+0)
342           iy1              = shY + pos(ii3+1)
343           iz1              = shZ + pos(ii3+2)
345 C        Load parameters for i atom
346           nti              = 3*ntype*type(ii)
348 C        Zero the potential energy for this list
349           Vvdwtot          = 0               
351 C        Clear i atom forces
352           
353           do k=nj0,nj1
355 C          Get j neighbor index, and coordinate index
356             jnr              = jjnr(k)+1       
357             j3               = 3*jnr-2         
359 C          load j atom coordinates
360             jx1              = pos(j3+0)       
361             jy1              = pos(j3+1)       
362             jz1              = pos(j3+2)       
364 C          Calculate distance
365             dx11             = ix1 - jx1       
366             dy11             = iy1 - jy1       
367             dz11             = iz1 - jz1       
368             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
370 C          Calculate 1/r and 1/r2
372 C          PowerPC intrinsics 1/sqrt lookup table
373 #ifndef GMX_BLUEGENE
374             rinv11           = frsqrtes(rsq11) 
375 #else
376             rinv11           = frsqrte(dble(rsq11)) 
377 #endif
378             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
379      &  *rinv11)))
380 #ifdef GMX_DOUBLE
381             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
382      &  *rinv11)))
383 #endif
385 C          Load parameters for j atom
386             tj               = nti+3*type(jnr)+1
387             c6               = vdwparam(tj)    
388             cexp1            = vdwparam(tj+1)  
389             cexp2            = vdwparam(tj+2)  
390             rinvsq           = rinv11*rinv11   
392 C          Buckingham interaction
393             rinvsix          = rinvsq*rinvsq*rinvsq
394             Vvdw6            = c6*rinvsix      
395             br               = cexp2*rsq11*rinv11
396             Vvdwexp          = cexp1*exp(-br)  
397             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
399 C          Inner loop uses 49 flops/iteration
400           end do
401           
403 C        Add i forces to mem and shifted force list
405 C        Add potential energies to the group for this list
406           ggid             = gid(n)+1        
407           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
409 C        Increment number of inner iterations
410           ninner           = ninner + nj1 - nj0
412 C        Outer loop uses 4 flops/iteration
413         end do
414         
416 C      Increment number of outer iterations
417         nouter           = nouter + nn1 - nn0
418       if(nn1.lt.nri) goto 10
420 C    Write outer/inner iteration count to pointers
421       outeriter        = nouter          
422       inneriter        = ninner          
423       return
424       end