Removal of stale Fortran kernels
[gromacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel230.F
blob004f622b9e03e0c654d0ba7b2207de322e722dc1
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel230
45 C Coulomb interaction:     Reaction field
46 C VdW interaction:         Tabulated
47 C water optimization:      No
48 C Calculate forces:        yes
50       subroutine pwr6kernel230(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       iq
98       gmxreal       qq,vcoul,vctot
99       integer*4     nti
100       integer*4     tj
101       gmxreal       Vvdw6,Vvdwtot
102       gmxreal       Vvdw12
103       gmxreal       r,rt,eps,eps2
104       integer*4     n0,nnn
105       gmxreal       Y,F,Geps,Heps2,Fp,VV
106       gmxreal       FF
107       gmxreal       fijD,fijR
108       gmxreal       krsq
109       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
110       gmxreal       jx1,jy1,jz1
111       gmxreal       dx11,dy11,dz11,rsq11,rinv11
112       gmxreal       c6,c12
115 C    Reset outer and inner iteration counters
116       nouter           = 0               
117       ninner           = 0               
119 C    Loop over thread workunits
120    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
121         if(nn1.gt.nri) nn1=nri
123 C      Start outer loop over neighborlists
124         
125         do n=nn0+1,nn1
127 C        Load shift vector for this list
128           is3              = 3*shift(n)+1    
129           shX              = shiftvec(is3)   
130           shY              = shiftvec(is3+1) 
131           shZ              = shiftvec(is3+2) 
133 C        Load limits for loop over neighbors
134           nj0              = jindex(n)+1     
135           nj1              = jindex(n+1)     
137 C        Get outer coordinate index
138           ii               = iinr(n)+1       
139           ii3              = 3*ii-2          
141 C        Load i atom data, add shift vector
142           ix1              = shX + pos(ii3+0)
143           iy1              = shY + pos(ii3+1)
144           iz1              = shZ + pos(ii3+2)
146 C        Load parameters for i atom
147           iq               = facel*charge(ii)
148           nti              = 2*ntype*type(ii)
150 C        Zero the potential energy for this list
151           vctot            = 0               
152           Vvdwtot          = 0               
154 C        Clear i atom forces
155           fix1             = 0               
156           fiy1             = 0               
157           fiz1             = 0               
158           
159           do k=nj0,nj1
161 C          Get j neighbor index, and coordinate index
162             jnr              = jjnr(k)+1       
163             j3               = 3*jnr-2         
165 C          load j atom coordinates
166             jx1              = pos(j3+0)       
167             jy1              = pos(j3+1)       
168             jz1              = pos(j3+2)       
170 C          Calculate distance
171             dx11             = ix1 - jx1       
172             dy11             = iy1 - jy1       
173             dz11             = iz1 - jz1       
174             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
176 C          Calculate 1/r and 1/r2
178 C          PowerPC intrinsics 1/sqrt lookup table
179 #ifndef GMX_BLUEGENE
180             rinv11           = frsqrtes(rsq11) 
181 #else
182             rinv11           = frsqrte(dble(rsq11)) 
183 #endif
184             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
185      &  *rinv11)))
186 #ifdef GMX_DOUBLE
187             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
188      &  *rinv11)))
189 #endif
191 C          Load parameters for j atom
192             qq               = iq*charge(jnr)  
193             tj               = nti+2*type(jnr)+1
194             c6               = vdwparam(tj)    
195             c12              = vdwparam(tj+1)  
196             rinvsq           = rinv11*rinv11   
198 C          Coulomb reaction-field interaction
199             krsq             = krf*rsq11       
200             vcoul            = qq*(rinv11+krsq-crf)
201             vctot            = vctot+vcoul     
203 C          Calculate table index
204             r                = rsq11*rinv11    
206 C          Calculate table index
207             rt               = r*tabscale      
208             n0               = rt              
209             eps              = rt-n0           
210             eps2             = eps*eps         
211             nnn              = 8*n0+1          
213 C          Tabulated VdW interaction - dispersion
214             Y                = VFtab(nnn)      
215             F                = VFtab(nnn+1)    
216             Geps             = eps*VFtab(nnn+2)
217             Heps2            = eps2*VFtab(nnn+3)
218             Fp               = F+Geps+Heps2    
219             VV               = Y+eps*Fp        
220             FF               = Fp+Geps+2.0*Heps2
221             Vvdw6            = c6*VV           
222             fijD             = c6*FF           
224 C          Tabulated VdW interaction - repulsion
225             nnn              = nnn+4           
226             Y                = VFtab(nnn)      
227             F                = VFtab(nnn+1)    
228             Geps             = eps*VFtab(nnn+2)
229             Heps2            = eps2*VFtab(nnn+3)
230             Fp               = F+Geps+Heps2    
231             VV               = Y+eps*Fp        
232             FF               = Fp+Geps+2.0*Heps2
233             Vvdw12           = c12*VV          
234             fijR             = c12*FF          
235             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
236             fscal            = (qq*(rinv11-2.0*krsq))*rinvsq
237      &  -((fijD+fijR)*tabscale)*rinv11
239 C          Calculate temporary vectorial force
240             tx               = fscal*dx11      
241             ty               = fscal*dy11      
242             tz               = fscal*dz11      
244 C          Increment i atom force
245             fix1             = fix1 + tx       
246             fiy1             = fiy1 + ty       
247             fiz1             = fiz1 + tz       
249 C          Decrement j atom force
250             faction(j3+0)    = faction(j3+0) - tx
251             faction(j3+1)    = faction(j3+1) - ty
252             faction(j3+2)    = faction(j3+2) - tz
254 C          Inner loop uses 66 flops/iteration
255           end do
256           
258 C        Add i forces to mem and shifted force list
259           faction(ii3+0)   = faction(ii3+0) + fix1
260           faction(ii3+1)   = faction(ii3+1) + fiy1
261           faction(ii3+2)   = faction(ii3+2) + fiz1
262           fshift(is3)      = fshift(is3)+fix1
263           fshift(is3+1)    = fshift(is3+1)+fiy1
264           fshift(is3+2)    = fshift(is3+2)+fiz1
266 C        Add potential energies to the group for this list
267           ggid             = gid(n)+1        
268           Vc(ggid)         = Vc(ggid) + vctot
269           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
271 C        Increment number of inner iterations
272           ninner           = ninner + nj1 - nj0
274 C        Outer loop uses 12 flops/iteration
275         end do
276         
278 C      Increment number of outer iterations
279         nouter           = nouter + nn1 - nn0
280       if(nn1.lt.nri) goto 10
282 C    Write outer/inner iteration count to pointers
283       outeriter        = nouter          
284       inneriter        = ninner          
285       return
286       end
294 C Gromacs nonbonded kernel pwr6kernel230nf
295 C Coulomb interaction:     Reaction field
296 C VdW interaction:         Tabulated
297 C water optimization:      No
298 C Calculate forces:        no
300       subroutine pwr6kernel230nf(
301      &                          nri,
302      &                          iinr,
303      &                          jindex,
304      &                          jjnr,
305      &                          shift,
306      &                          shiftvec,
307      &                          fshift,
308      &                          gid,
309      &                          pos,
310      &                          faction,
311      &                          charge,
312      &                          facel,
313      &                          krf,
314      &                          crf,
315      &                          Vc,
316      &                          type,
317      &                          ntype,
318      &                          vdwparam,
319      &                          Vvdw,
320      &                          tabscale,
321      &                          VFtab,
322      &                          invsqrta,
323      &                          dvda,
324      &                          gbtabscale,
325      &                          GBtab,
326      &                          nthreads,
327      &                          count,
328      &                          mtx,
329      &                          outeriter,
330      &                          inneriter,
331      &                          work)
332       implicit      none
333       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
334       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
335       integer*4     gid(*),type(*),ntype
336       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
337       gmxreal       Vvdw(*),tabscale,VFtab(*)
338       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
339       integer*4     nthreads,count,mtx,outeriter,inneriter
340       gmxreal       work(*)
342       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
343       integer*4     nn0,nn1,nouter,ninner
344       gmxreal       shX,shY,shZ
345       gmxreal       iq
346       gmxreal       qq,vcoul,vctot
347       integer*4     nti
348       integer*4     tj
349       gmxreal       Vvdw6,Vvdwtot
350       gmxreal       Vvdw12
351       gmxreal       r,rt,eps,eps2
352       integer*4     n0,nnn
353       gmxreal       Y,F,Geps,Heps2,Fp,VV
354       gmxreal       krsq
355       gmxreal       ix1,iy1,iz1
356       gmxreal       jx1,jy1,jz1
357       gmxreal       dx11,dy11,dz11,rsq11,rinv11
358       gmxreal       c6,c12
361 C    Reset outer and inner iteration counters
362       nouter           = 0               
363       ninner           = 0               
365 C    Loop over thread workunits
366    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
367         if(nn1.gt.nri) nn1=nri
369 C      Start outer loop over neighborlists
370         
371         do n=nn0+1,nn1
373 C        Load shift vector for this list
374           is3              = 3*shift(n)+1    
375           shX              = shiftvec(is3)   
376           shY              = shiftvec(is3+1) 
377           shZ              = shiftvec(is3+2) 
379 C        Load limits for loop over neighbors
380           nj0              = jindex(n)+1     
381           nj1              = jindex(n+1)     
383 C        Get outer coordinate index
384           ii               = iinr(n)+1       
385           ii3              = 3*ii-2          
387 C        Load i atom data, add shift vector
388           ix1              = shX + pos(ii3+0)
389           iy1              = shY + pos(ii3+1)
390           iz1              = shZ + pos(ii3+2)
392 C        Load parameters for i atom
393           iq               = facel*charge(ii)
394           nti              = 2*ntype*type(ii)
396 C        Zero the potential energy for this list
397           vctot            = 0               
398           Vvdwtot          = 0               
400 C        Clear i atom forces
401           
402           do k=nj0,nj1
404 C          Get j neighbor index, and coordinate index
405             jnr              = jjnr(k)+1       
406             j3               = 3*jnr-2         
408 C          load j atom coordinates
409             jx1              = pos(j3+0)       
410             jy1              = pos(j3+1)       
411             jz1              = pos(j3+2)       
413 C          Calculate distance
414             dx11             = ix1 - jx1       
415             dy11             = iy1 - jy1       
416             dz11             = iz1 - jz1       
417             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
419 C          Calculate 1/r and 1/r2
421 C          PowerPC intrinsics 1/sqrt lookup table
422 #ifndef GMX_BLUEGENE
423             rinv11           = frsqrtes(rsq11) 
424 #else
425             rinv11           = frsqrte(dble(rsq11)) 
426 #endif
427             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
428      &  *rinv11)))
429 #ifdef GMX_DOUBLE
430             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
431      &  *rinv11)))
432 #endif
434 C          Load parameters for j atom
435             qq               = iq*charge(jnr)  
436             tj               = nti+2*type(jnr)+1
437             c6               = vdwparam(tj)    
438             c12              = vdwparam(tj+1)  
440 C          Coulomb reaction-field interaction
441             krsq             = krf*rsq11       
442             vcoul            = qq*(rinv11+krsq-crf)
443             vctot            = vctot+vcoul     
445 C          Calculate table index
446             r                = rsq11*rinv11    
448 C          Calculate table index
449             rt               = r*tabscale      
450             n0               = rt              
451             eps              = rt-n0           
452             eps2             = eps*eps         
453             nnn              = 8*n0+1          
455 C          Tabulated VdW interaction - dispersion
456             Y                = VFtab(nnn)      
457             F                = VFtab(nnn+1)    
458             Geps             = eps*VFtab(nnn+2)
459             Heps2            = eps2*VFtab(nnn+3)
460             Fp               = F+Geps+Heps2    
461             VV               = Y+eps*Fp        
462             Vvdw6            = c6*VV           
464 C          Tabulated VdW interaction - repulsion
465             nnn              = nnn+4           
466             Y                = VFtab(nnn)      
467             F                = VFtab(nnn+1)    
468             Geps             = eps*VFtab(nnn+2)
469             Heps2            = eps2*VFtab(nnn+3)
470             Fp               = F+Geps+Heps2    
471             VV               = Y+eps*Fp        
472             Vvdw12           = c12*VV          
473             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
475 C          Inner loop uses 40 flops/iteration
476           end do
477           
479 C        Add i forces to mem and shifted force list
481 C        Add potential energies to the group for this list
482           ggid             = gid(n)+1        
483           Vc(ggid)         = Vc(ggid) + vctot
484           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
486 C        Increment number of inner iterations
487           ninner           = ninner + nj1 - nj0
489 C        Outer loop uses 6 flops/iteration
490         end do
491         
493 C      Increment number of outer iterations
494         nouter           = nouter + nn1 - nn0
495       if(nn1.lt.nri) goto 10
497 C    Write outer/inner iteration count to pointers
498       outeriter        = nouter          
499       inneriter        = ninner          
500       return
501       end