Fixed synchronization counter update for pwr6 kernels
[gromacs/adressmacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel220.F
blobb13ce6710ad23d40ff25d34b1eb8ca02694c38a8
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel220
45 C Coulomb interaction:     Reaction field
46 C VdW interaction:         Buckingham
47 C water optimization:      No
48 C Calculate forces:        yes
50       subroutine pwr6kernel220(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       iq
98       gmxreal       qq,vcoul,vctot
99       integer*4     nti
100       integer*4     tj
101       gmxreal       rinvsix
102       gmxreal       Vvdw6,Vvdwtot
103       gmxreal       krsq
104       gmxreal       Vvdwexp,br
105       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
106       gmxreal       jx1,jy1,jz1
107       gmxreal       dx11,dy11,dz11,rsq11,rinv11
108       gmxreal       c6,cexp1,cexp2
111 C    Reset outer and inner iteration counters
112       nouter           = 0               
113       ninner           = 0               
115 C    Loop over thread workunits
116    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
117         if(nn1.gt.nri) nn1=nri
119 C      Start outer loop over neighborlists
120         
121         do n=nn0+1,nn1
123 C        Load shift vector for this list
124           is3              = 3*shift(n)+1    
125           shX              = shiftvec(is3)   
126           shY              = shiftvec(is3+1) 
127           shZ              = shiftvec(is3+2) 
129 C        Load limits for loop over neighbors
130           nj0              = jindex(n)+1     
131           nj1              = jindex(n+1)     
133 C        Get outer coordinate index
134           ii               = iinr(n)+1       
135           ii3              = 3*ii-2          
137 C        Load i atom data, add shift vector
138           ix1              = shX + pos(ii3+0)
139           iy1              = shY + pos(ii3+1)
140           iz1              = shZ + pos(ii3+2)
142 C        Load parameters for i atom
143           iq               = facel*charge(ii)
144           nti              = 3*ntype*type(ii)
146 C        Zero the potential energy for this list
147           vctot            = 0               
148           Vvdwtot          = 0               
150 C        Clear i atom forces
151           fix1             = 0               
152           fiy1             = 0               
153           fiz1             = 0               
154           
155           do k=nj0,nj1
157 C          Get j neighbor index, and coordinate index
158             jnr              = jjnr(k)+1       
159             j3               = 3*jnr-2         
161 C          load j atom coordinates
162             jx1              = pos(j3+0)       
163             jy1              = pos(j3+1)       
164             jz1              = pos(j3+2)       
166 C          Calculate distance
167             dx11             = ix1 - jx1       
168             dy11             = iy1 - jy1       
169             dz11             = iz1 - jz1       
170             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
172 C          Calculate 1/r and 1/r2
174 C          PowerPC intrinsics 1/sqrt lookup table
175 #ifndef GMX_BLUEGENE
176             rinv11           = frsqrtes(rsq11) 
177 #else
178             rinv11           = frsqrte(dble(rsq11)) 
179 #endif
180             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
181      &  *rinv11)))
182 #ifdef GMX_DOUBLE
183             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
184      &  *rinv11)))
185 #endif
187 C          Load parameters for j atom
188             qq               = iq*charge(jnr)  
189             tj               = nti+3*type(jnr)+1
190             c6               = vdwparam(tj)    
191             cexp1            = vdwparam(tj+1)  
192             cexp2            = vdwparam(tj+2)  
193             rinvsq           = rinv11*rinv11   
195 C          Coulomb reaction-field interaction
196             krsq             = krf*rsq11       
197             vcoul            = qq*(rinv11+krsq-crf)
198             vctot            = vctot+vcoul     
200 C          Buckingham interaction
201             rinvsix          = rinvsq*rinvsq*rinvsq
202             Vvdw6            = c6*rinvsix      
203             br               = cexp2*rsq11*rinv11
204             Vvdwexp          = cexp1*exp(-br)  
205             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
206             fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp
207      &  -6.0*Vvdw6)*rinvsq
209 C          Calculate temporary vectorial force
210             tx               = fscal*dx11      
211             ty               = fscal*dy11      
212             tz               = fscal*dz11      
214 C          Increment i atom force
215             fix1             = fix1 + tx       
216             fiy1             = fiy1 + ty       
217             fiz1             = fiz1 + tz       
219 C          Decrement j atom force
220             faction(j3+0)    = faction(j3+0) - tx
221             faction(j3+1)    = faction(j3+1) - ty
222             faction(j3+2)    = faction(j3+2) - tz
224 C          Inner loop uses 71 flops/iteration
225           end do
226           
228 C        Add i forces to mem and shifted force list
229           faction(ii3+0)   = faction(ii3+0) + fix1
230           faction(ii3+1)   = faction(ii3+1) + fiy1
231           faction(ii3+2)   = faction(ii3+2) + fiz1
232           fshift(is3)      = fshift(is3)+fix1
233           fshift(is3+1)    = fshift(is3+1)+fiy1
234           fshift(is3+2)    = fshift(is3+2)+fiz1
236 C        Add potential energies to the group for this list
237           ggid             = gid(n)+1        
238           Vc(ggid)         = Vc(ggid) + vctot
239           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
241 C        Increment number of inner iterations
242           ninner           = ninner + nj1 - nj0
244 C        Outer loop uses 12 flops/iteration
245         end do
246         
248 C      Increment number of outer iterations
249         nouter           = nouter + nn1 - nn0
250       if(nn1.lt.nri) goto 10
252 C    Write outer/inner iteration count to pointers
253       outeriter        = nouter          
254       inneriter        = ninner          
255       return
256       end
264 C Gromacs nonbonded kernel pwr6kernel220nf
265 C Coulomb interaction:     Reaction field
266 C VdW interaction:         Buckingham
267 C water optimization:      No
268 C Calculate forces:        no
270       subroutine pwr6kernel220nf(
271      &                          nri,
272      &                          iinr,
273      &                          jindex,
274      &                          jjnr,
275      &                          shift,
276      &                          shiftvec,
277      &                          fshift,
278      &                          gid,
279      &                          pos,
280      &                          faction,
281      &                          charge,
282      &                          facel,
283      &                          krf,
284      &                          crf,
285      &                          Vc,
286      &                          type,
287      &                          ntype,
288      &                          vdwparam,
289      &                          Vvdw,
290      &                          tabscale,
291      &                          VFtab,
292      &                          invsqrta,
293      &                          dvda,
294      &                          gbtabscale,
295      &                          GBtab,
296      &                          nthreads,
297      &                          count,
298      &                          mtx,
299      &                          outeriter,
300      &                          inneriter,
301      &                          work)
302       implicit      none
303       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
304       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
305       integer*4     gid(*),type(*),ntype
306       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
307       gmxreal       Vvdw(*),tabscale,VFtab(*)
308       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
309       integer*4     nthreads,count,mtx,outeriter,inneriter
310       gmxreal       work(*)
312       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
313       integer*4     nn0,nn1,nouter,ninner
314       gmxreal       shX,shY,shZ
315       gmxreal       rinvsq
316       gmxreal       iq
317       gmxreal       qq,vcoul,vctot
318       integer*4     nti
319       integer*4     tj
320       gmxreal       rinvsix
321       gmxreal       Vvdw6,Vvdwtot
322       gmxreal       krsq
323       gmxreal       Vvdwexp,br
324       gmxreal       ix1,iy1,iz1
325       gmxreal       jx1,jy1,jz1
326       gmxreal       dx11,dy11,dz11,rsq11,rinv11
327       gmxreal       c6,cexp1,cexp2
330 C    Reset outer and inner iteration counters
331       nouter           = 0               
332       ninner           = 0               
334 C    Loop over thread workunits
335    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
336         if(nn1.gt.nri) nn1=nri
338 C      Start outer loop over neighborlists
339         
340         do n=nn0+1,nn1
342 C        Load shift vector for this list
343           is3              = 3*shift(n)+1    
344           shX              = shiftvec(is3)   
345           shY              = shiftvec(is3+1) 
346           shZ              = shiftvec(is3+2) 
348 C        Load limits for loop over neighbors
349           nj0              = jindex(n)+1     
350           nj1              = jindex(n+1)     
352 C        Get outer coordinate index
353           ii               = iinr(n)+1       
354           ii3              = 3*ii-2          
356 C        Load i atom data, add shift vector
357           ix1              = shX + pos(ii3+0)
358           iy1              = shY + pos(ii3+1)
359           iz1              = shZ + pos(ii3+2)
361 C        Load parameters for i atom
362           iq               = facel*charge(ii)
363           nti              = 3*ntype*type(ii)
365 C        Zero the potential energy for this list
366           vctot            = 0               
367           Vvdwtot          = 0               
369 C        Clear i atom forces
370           
371           do k=nj0,nj1
373 C          Get j neighbor index, and coordinate index
374             jnr              = jjnr(k)+1       
375             j3               = 3*jnr-2         
377 C          load j atom coordinates
378             jx1              = pos(j3+0)       
379             jy1              = pos(j3+1)       
380             jz1              = pos(j3+2)       
382 C          Calculate distance
383             dx11             = ix1 - jx1       
384             dy11             = iy1 - jy1       
385             dz11             = iz1 - jz1       
386             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
388 C          Calculate 1/r and 1/r2
390 C          PowerPC intrinsics 1/sqrt lookup table
391 #ifndef GMX_BLUEGENE
392             rinv11           = frsqrtes(rsq11) 
393 #else
394             rinv11           = frsqrte(dble(rsq11)) 
395 #endif
396             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
397      &  *rinv11)))
398 #ifdef GMX_DOUBLE
399             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
400      &  *rinv11)))
401 #endif
403 C          Load parameters for j atom
404             qq               = iq*charge(jnr)  
405             tj               = nti+3*type(jnr)+1
406             c6               = vdwparam(tj)    
407             cexp1            = vdwparam(tj+1)  
408             cexp2            = vdwparam(tj+2)  
409             rinvsq           = rinv11*rinv11   
411 C          Coulomb reaction-field interaction
412             krsq             = krf*rsq11       
413             vcoul            = qq*(rinv11+krsq-crf)
414             vctot            = vctot+vcoul     
416 C          Buckingham interaction
417             rinvsix          = rinvsq*rinvsq*rinvsq
418             Vvdw6            = c6*rinvsix      
419             br               = cexp2*rsq11*rinv11
420             Vvdwexp          = cexp1*exp(-br)  
421             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
423 C          Inner loop uses 55 flops/iteration
424           end do
425           
427 C        Add i forces to mem and shifted force list
429 C        Add potential energies to the group for this list
430           ggid             = gid(n)+1        
431           Vc(ggid)         = Vc(ggid) + vctot
432           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
434 C        Increment number of inner iterations
435           ninner           = ninner + nj1 - nj0
437 C        Outer loop uses 6 flops/iteration
438         end do
439         
441 C      Increment number of outer iterations
442         nouter           = nouter + nn1 - nn0
443       if(nn1.lt.nri) goto 10
445 C    Write outer/inner iteration count to pointers
446       outeriter        = nouter          
447       inneriter        = ninner          
448       return
449       end