Removal of stale Fortran kernels
[gromacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel410.F
blobc9e07b7bc776d76844b6e38e3ed15415c9d6dae9
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel410
45 C Coulomb interaction:     Generalized-Born
46 C VdW interaction:         Lennard-Jones
47 C water optimization:      No
48 C Calculate forces:        yes
50       subroutine pwr6kernel410(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       iq
98       gmxreal       qq,vcoul,vctot
99       integer*4     nti
100       integer*4     tj
101       gmxreal       rinvsix
102       gmxreal       Vvdw6,Vvdwtot
103       gmxreal       Vvdw12
104       gmxreal       r,rt,eps,eps2
105       integer*4     n0,nnn
106       gmxreal       Y,F,Geps,Heps2,Fp,VV
107       gmxreal       FF
108       gmxreal       fijC
109       gmxreal       isai,isaj,isaprod,gbscale,vgb
110       gmxreal       dvdasum,dvdatmp,dvdaj,fgb
111       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
112       gmxreal       jx1,jy1,jz1
113       gmxreal       dx11,dy11,dz11,rsq11,rinv11
114       gmxreal       c6,c12
117 C    Reset outer and inner iteration counters
118       nouter           = 0               
119       ninner           = 0               
121 C    Loop over thread workunits
122    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
123         if(nn1.gt.nri) nn1=nri
125 C      Start outer loop over neighborlists
126         
127         do n=nn0+1,nn1
129 C        Load shift vector for this list
130           is3              = 3*shift(n)+1    
131           shX              = shiftvec(is3)   
132           shY              = shiftvec(is3+1) 
133           shZ              = shiftvec(is3+2) 
135 C        Load limits for loop over neighbors
136           nj0              = jindex(n)+1     
137           nj1              = jindex(n+1)     
139 C        Get outer coordinate index
140           ii               = iinr(n)+1       
141           ii3              = 3*ii-2          
143 C        Load i atom data, add shift vector
144           ix1              = shX + pos(ii3+0)
145           iy1              = shY + pos(ii3+1)
146           iz1              = shZ + pos(ii3+2)
148 C        Load parameters for i atom
149           iq               = facel*charge(ii)
150           isai             = invsqrta(ii)    
151           nti              = 2*ntype*type(ii)
153 C        Zero the potential energy for this list
154           vctot            = 0               
155           Vvdwtot          = 0               
156           dvdasum          = 0               
158 C        Clear i atom forces
159           fix1             = 0               
160           fiy1             = 0               
161           fiz1             = 0               
162           
163           do k=nj0,nj1
165 C          Get j neighbor index, and coordinate index
166             jnr              = jjnr(k)+1       
167             j3               = 3*jnr-2         
169 C          load j atom coordinates
170             jx1              = pos(j3+0)       
171             jy1              = pos(j3+1)       
172             jz1              = pos(j3+2)       
174 C          Calculate distance
175             dx11             = ix1 - jx1       
176             dy11             = iy1 - jy1       
177             dz11             = iz1 - jz1       
178             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
180 C          Calculate 1/r and 1/r2
182 C          PowerPC intrinsics 1/sqrt lookup table
183 #ifndef GMX_BLUEGENE
184             rinv11           = frsqrtes(rsq11) 
185 #else
186             rinv11           = frsqrte(dble(rsq11)) 
187 #endif
188             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
189      &  *rinv11)))
190 #ifdef GMX_DOUBLE
191             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
192      &  *rinv11)))
193 #endif
195 C          Load parameters for j atom
196             isaj             = invsqrta(jnr)   
197             isaprod          = isai*isaj       
198             qq               = iq*charge(jnr)  
199             vcoul            = qq*rinv11       
200             fscal            = vcoul*rinv11    
201             qq               = isaprod*(-qq)   
202             gbscale          = isaprod*gbtabscale
203             tj               = nti+2*type(jnr)+1
204             c6               = vdwparam(tj)    
205             c12              = vdwparam(tj+1)  
206             rinvsq           = rinv11*rinv11   
208 C          Tabulated Generalized-Born interaction
209             dvdaj            = dvda(jnr)       
210             r                = rsq11*rinv11    
212 C          Calculate table index
213             rt               = r*gbscale       
214             n0               = rt              
215             eps              = rt-n0           
216             eps2             = eps*eps         
217             nnn              = 4*n0+1          
218             Y                = GBtab(nnn)      
219             F                = GBtab(nnn+1)    
220             Geps             = eps*GBtab(nnn+2)
221             Heps2            = eps2*GBtab(nnn+3)
222             Fp               = F+Geps+Heps2    
223             VV               = Y+eps*Fp        
224             FF               = Fp+Geps+2.0*Heps2
225             vgb              = qq*VV           
226             fijC             = qq*FF*gbscale   
227             dvdatmp          = -0.5*(vgb+fijC*r)
228             dvdasum          = dvdasum + dvdatmp
229             dvda(jnr)        = dvdaj+dvdatmp*isaj*isaj
230             vctot            = vctot + vcoul   
232 C          Lennard-Jones interaction
233             rinvsix          = rinvsq*rinvsq*rinvsq
234             Vvdw6            = c6*rinvsix      
235             Vvdw12           = c12*rinvsix*rinvsix
236             Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6
237             fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq
238      &  -(fijC-fscal)*rinv11
240 C          Calculate temporary vectorial force
241             tx               = fscal*dx11      
242             ty               = fscal*dy11      
243             tz               = fscal*dz11      
245 C          Increment i atom force
246             fix1             = fix1 + tx       
247             fiy1             = fiy1 + ty       
248             fiz1             = fiz1 + tz       
250 C          Decrement j atom force
251             faction(j3+0)    = faction(j3+0) - tx
252             faction(j3+1)    = faction(j3+1) - ty
253             faction(j3+2)    = faction(j3+2) - tz
255 C          Inner loop uses 63 flops/iteration
256           end do
257           
259 C        Add i forces to mem and shifted force list
260           faction(ii3+0)   = faction(ii3+0) + fix1
261           faction(ii3+1)   = faction(ii3+1) + fiy1
262           faction(ii3+2)   = faction(ii3+2) + fiz1
263           fshift(is3)      = fshift(is3)+fix1
264           fshift(is3+1)    = fshift(is3+1)+fiy1
265           fshift(is3+2)    = fshift(is3+2)+fiz1
267 C        Add potential energies to the group for this list
268           ggid             = gid(n)+1        
269           Vc(ggid)         = Vc(ggid) + vctot
270           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
271           dvda(ii)         = dvda(ii) + dvdasum*isai*isai
273 C        Increment number of inner iterations
274           ninner           = ninner + nj1 - nj0
276 C        Outer loop uses 13 flops/iteration
277         end do
278         
280 C      Increment number of outer iterations
281         nouter           = nouter + nn1 - nn0
282       if(nn1.lt.nri) goto 10
284 C    Write outer/inner iteration count to pointers
285       outeriter        = nouter          
286       inneriter        = ninner          
287       return
288       end
296 C Gromacs nonbonded kernel pwr6kernel410nf
297 C Coulomb interaction:     Generalized-Born
298 C VdW interaction:         Lennard-Jones
299 C water optimization:      No
300 C Calculate forces:        no
302       subroutine pwr6kernel410nf(
303      &                          nri,
304      &                          iinr,
305      &                          jindex,
306      &                          jjnr,
307      &                          shift,
308      &                          shiftvec,
309      &                          fshift,
310      &                          gid,
311      &                          pos,
312      &                          faction,
313      &                          charge,
314      &                          facel,
315      &                          krf,
316      &                          crf,
317      &                          Vc,
318      &                          type,
319      &                          ntype,
320      &                          vdwparam,
321      &                          Vvdw,
322      &                          tabscale,
323      &                          VFtab,
324      &                          invsqrta,
325      &                          dvda,
326      &                          gbtabscale,
327      &                          GBtab,
328      &                          nthreads,
329      &                          count,
330      &                          mtx,
331      &                          outeriter,
332      &                          inneriter,
333      &                          work)
334       implicit      none
335       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
336       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
337       integer*4     gid(*),type(*),ntype
338       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
339       gmxreal       Vvdw(*),tabscale,VFtab(*)
340       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
341       integer*4     nthreads,count,mtx,outeriter,inneriter
342       gmxreal       work(*)
344       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
345       integer*4     nn0,nn1,nouter,ninner
346       gmxreal       shX,shY,shZ
347       gmxreal       rinvsq
348       gmxreal       iq
349       gmxreal       qq,vcoul,vctot
350       integer*4     nti
351       integer*4     tj
352       gmxreal       rinvsix
353       gmxreal       Vvdw6,Vvdwtot
354       gmxreal       Vvdw12
355       gmxreal       r,rt,eps,eps2
356       integer*4     n0,nnn
357       gmxreal       Y,F,Geps,Heps2,Fp,VV
358       gmxreal       isai,isaj,isaprod,gbscale,vgb
359       gmxreal       ix1,iy1,iz1
360       gmxreal       jx1,jy1,jz1
361       gmxreal       dx11,dy11,dz11,rsq11,rinv11
362       gmxreal       c6,c12
365 C    Reset outer and inner iteration counters
366       nouter           = 0               
367       ninner           = 0               
369 C    Loop over thread workunits
370    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
371         if(nn1.gt.nri) nn1=nri
373 C      Start outer loop over neighborlists
374         
375         do n=nn0+1,nn1
377 C        Load shift vector for this list
378           is3              = 3*shift(n)+1    
379           shX              = shiftvec(is3)   
380           shY              = shiftvec(is3+1) 
381           shZ              = shiftvec(is3+2) 
383 C        Load limits for loop over neighbors
384           nj0              = jindex(n)+1     
385           nj1              = jindex(n+1)     
387 C        Get outer coordinate index
388           ii               = iinr(n)+1       
389           ii3              = 3*ii-2          
391 C        Load i atom data, add shift vector
392           ix1              = shX + pos(ii3+0)
393           iy1              = shY + pos(ii3+1)
394           iz1              = shZ + pos(ii3+2)
396 C        Load parameters for i atom
397           iq               = facel*charge(ii)
398           isai             = invsqrta(ii)    
399           nti              = 2*ntype*type(ii)
401 C        Zero the potential energy for this list
402           vctot            = 0               
403           Vvdwtot          = 0               
405 C        Clear i atom forces
406           
407           do k=nj0,nj1
409 C          Get j neighbor index, and coordinate index
410             jnr              = jjnr(k)+1       
411             j3               = 3*jnr-2         
413 C          load j atom coordinates
414             jx1              = pos(j3+0)       
415             jy1              = pos(j3+1)       
416             jz1              = pos(j3+2)       
418 C          Calculate distance
419             dx11             = ix1 - jx1       
420             dy11             = iy1 - jy1       
421             dz11             = iz1 - jz1       
422             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
424 C          Calculate 1/r and 1/r2
426 C          PowerPC intrinsics 1/sqrt lookup table
427 #ifndef GMX_BLUEGENE
428             rinv11           = frsqrtes(rsq11) 
429 #else
430             rinv11           = frsqrte(dble(rsq11)) 
431 #endif
432             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
433      &  *rinv11)))
434 #ifdef GMX_DOUBLE
435             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
436      &  *rinv11)))
437 #endif
439 C          Load parameters for j atom
440             isaj             = invsqrta(jnr)   
441             isaprod          = isai*isaj       
442             qq               = iq*charge(jnr)  
443             vcoul            = qq*rinv11       
444             qq               = isaprod*(-qq)   
445             gbscale          = isaprod*gbtabscale
446             tj               = nti+2*type(jnr)+1
447             c6               = vdwparam(tj)    
448             c12              = vdwparam(tj+1)  
449             rinvsq           = rinv11*rinv11   
451 C          Tabulated Generalized-Born interaction
452             r                = rsq11*rinv11    
454 C          Calculate table index
455             rt               = r*gbscale       
456             n0               = rt              
457             eps              = rt-n0           
458             eps2             = eps*eps         
459             nnn              = 4*n0+1          
460             Y                = GBtab(nnn)      
461             F                = GBtab(nnn+1)    
462             Geps             = eps*GBtab(nnn+2)
463             Heps2            = eps2*GBtab(nnn+3)
464             Fp               = F+Geps+Heps2    
465             VV               = Y+eps*Fp        
466             vgb              = qq*VV           
467             vctot            = vctot + vcoul   
469 C          Lennard-Jones interaction
470             rinvsix          = rinvsq*rinvsq*rinvsq
471             Vvdw6            = c6*rinvsix      
472             Vvdw12           = c12*rinvsix*rinvsix
473             Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6
475 C          Inner loop uses 38 flops/iteration
476           end do
477           
479 C        Add i forces to mem and shifted force list
481 C        Add potential energies to the group for this list
482           ggid             = gid(n)+1        
483           Vc(ggid)         = Vc(ggid) + vctot
484           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
486 C        Increment number of inner iterations
487           ninner           = ninner + nj1 - nj0
489 C        Outer loop uses 6 flops/iteration
490         end do
491         
493 C      Increment number of outer iterations
494         nouter           = nouter + nn1 - nn0
495       if(nn1.lt.nri) goto 10
497 C    Write outer/inner iteration count to pointers
498       outeriter        = nouter          
499       inneriter        = ninner          
500       return
501       end