Removal of stale Fortran kernels
[gromacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel430.F
blobee2c5d9b8744f35f8ba015b565e716fd1c1afc5a
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel430
45 C Coulomb interaction:     Generalized-Born
46 C VdW interaction:         Tabulated
47 C water optimization:      No
48 C Calculate forces:        yes
50       subroutine pwr6kernel430(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       iq
97       gmxreal       qq,vcoul,vctot
98       integer*4     nti
99       integer*4     tj
100       gmxreal       Vvdw6,Vvdwtot
101       gmxreal       Vvdw12
102       gmxreal       r,rt,eps,eps2
103       integer*4     n0,nnn
104       gmxreal       Y,F,Geps,Heps2,Fp,VV
105       gmxreal       FF
106       gmxreal       fijC
107       gmxreal       fijD,fijR
108       gmxreal       isai,isaj,isaprod,gbscale,vgb
109       gmxreal       dvdasum,dvdatmp,dvdaj,fgb
110       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
111       gmxreal       jx1,jy1,jz1
112       gmxreal       dx11,dy11,dz11,rsq11,rinv11
113       gmxreal       c6,c12
116 C    Reset outer and inner iteration counters
117       nouter           = 0               
118       ninner           = 0               
120 C    Loop over thread workunits
121    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
122         if(nn1.gt.nri) nn1=nri
124 C      Start outer loop over neighborlists
125         
126         do n=nn0+1,nn1
128 C        Load shift vector for this list
129           is3              = 3*shift(n)+1    
130           shX              = shiftvec(is3)   
131           shY              = shiftvec(is3+1) 
132           shZ              = shiftvec(is3+2) 
134 C        Load limits for loop over neighbors
135           nj0              = jindex(n)+1     
136           nj1              = jindex(n+1)     
138 C        Get outer coordinate index
139           ii               = iinr(n)+1       
140           ii3              = 3*ii-2          
142 C        Load i atom data, add shift vector
143           ix1              = shX + pos(ii3+0)
144           iy1              = shY + pos(ii3+1)
145           iz1              = shZ + pos(ii3+2)
147 C        Load parameters for i atom
148           iq               = facel*charge(ii)
149           isai             = invsqrta(ii)    
150           nti              = 2*ntype*type(ii)
152 C        Zero the potential energy for this list
153           vctot            = 0               
154           Vvdwtot          = 0               
155           dvdasum          = 0               
157 C        Clear i atom forces
158           fix1             = 0               
159           fiy1             = 0               
160           fiz1             = 0               
161           
162           do k=nj0,nj1
164 C          Get j neighbor index, and coordinate index
165             jnr              = jjnr(k)+1       
166             j3               = 3*jnr-2         
168 C          load j atom coordinates
169             jx1              = pos(j3+0)       
170             jy1              = pos(j3+1)       
171             jz1              = pos(j3+2)       
173 C          Calculate distance
174             dx11             = ix1 - jx1       
175             dy11             = iy1 - jy1       
176             dz11             = iz1 - jz1       
177             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
179 C          Calculate 1/r and 1/r2
181 C          PowerPC intrinsics 1/sqrt lookup table
182 #ifndef GMX_BLUEGENE
183             rinv11           = frsqrtes(rsq11) 
184 #else
185             rinv11           = frsqrte(dble(rsq11)) 
186 #endif
187             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
188      &  *rinv11)))
189 #ifdef GMX_DOUBLE
190             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
191      &  *rinv11)))
192 #endif
194 C          Load parameters for j atom
195             isaj             = invsqrta(jnr)   
196             isaprod          = isai*isaj       
197             qq               = iq*charge(jnr)  
198             vcoul            = qq*rinv11       
199             fscal            = vcoul*rinv11    
200             qq               = isaprod*(-qq)   
201             gbscale          = isaprod*gbtabscale
202             tj               = nti+2*type(jnr)+1
203             c6               = vdwparam(tj)    
204             c12              = vdwparam(tj+1)  
206 C          Tabulated Generalized-Born interaction
207             dvdaj            = dvda(jnr)       
208             r                = rsq11*rinv11    
210 C          Calculate table index
211             rt               = r*gbscale       
212             n0               = rt              
213             eps              = rt-n0           
214             eps2             = eps*eps         
215             nnn              = 4*n0+1          
216             Y                = GBtab(nnn)      
217             F                = GBtab(nnn+1)    
218             Geps             = eps*GBtab(nnn+2)
219             Heps2            = eps2*GBtab(nnn+3)
220             Fp               = F+Geps+Heps2    
221             VV               = Y+eps*Fp        
222             FF               = Fp+Geps+2.0*Heps2
223             vgb              = qq*VV           
224             fijC             = qq*FF*gbscale   
225             dvdatmp          = -0.5*(vgb+fijC*r)
226             dvdasum          = dvdasum + dvdatmp
227             dvda(jnr)        = dvdaj+dvdatmp*isaj*isaj
228             vctot            = vctot + vcoul   
230 C          Calculate table index
231             r                = rsq11*rinv11    
233 C          Calculate table index
234             rt               = r*tabscale      
235             n0               = rt              
236             eps              = rt-n0           
237             eps2             = eps*eps         
238             nnn              = 8*n0+1          
240 C          Tabulated VdW interaction - dispersion
241             Y                = VFtab(nnn)      
242             F                = VFtab(nnn+1)    
243             Geps             = eps*VFtab(nnn+2)
244             Heps2            = eps2*VFtab(nnn+3)
245             Fp               = F+Geps+Heps2    
246             VV               = Y+eps*Fp        
247             FF               = Fp+Geps+2.0*Heps2
248             Vvdw6            = c6*VV           
249             fijD             = c6*FF           
251 C          Tabulated VdW interaction - repulsion
252             nnn              = nnn+4           
253             Y                = VFtab(nnn)      
254             F                = VFtab(nnn+1)    
255             Geps             = eps*VFtab(nnn+2)
256             Heps2            = eps2*VFtab(nnn+3)
257             Fp               = F+Geps+Heps2    
258             VV               = Y+eps*Fp        
259             FF               = Fp+Geps+2.0*Heps2
260             Vvdw12           = c12*VV          
261             fijR             = c12*FF          
262             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
263             fscal            = -((fijD+fijR)*tabscale+fijC-fscal)
264      &  *rinv11
266 C          Calculate temporary vectorial force
267             tx               = fscal*dx11      
268             ty               = fscal*dy11      
269             tz               = fscal*dz11      
271 C          Increment i atom force
272             fix1             = fix1 + tx       
273             fiy1             = fiy1 + ty       
274             fiz1             = fiz1 + tz       
276 C          Decrement j atom force
277             faction(j3+0)    = faction(j3+0) - tx
278             faction(j3+1)    = faction(j3+1) - ty
279             faction(j3+2)    = faction(j3+2) - tz
281 C          Inner loop uses 81 flops/iteration
282           end do
283           
285 C        Add i forces to mem and shifted force list
286           faction(ii3+0)   = faction(ii3+0) + fix1
287           faction(ii3+1)   = faction(ii3+1) + fiy1
288           faction(ii3+2)   = faction(ii3+2) + fiz1
289           fshift(is3)      = fshift(is3)+fix1
290           fshift(is3+1)    = fshift(is3+1)+fiy1
291           fshift(is3+2)    = fshift(is3+2)+fiz1
293 C        Add potential energies to the group for this list
294           ggid             = gid(n)+1        
295           Vc(ggid)         = Vc(ggid) + vctot
296           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
297           dvda(ii)         = dvda(ii) + dvdasum*isai*isai
299 C        Increment number of inner iterations
300           ninner           = ninner + nj1 - nj0
302 C        Outer loop uses 13 flops/iteration
303         end do
304         
306 C      Increment number of outer iterations
307         nouter           = nouter + nn1 - nn0
308       if(nn1.lt.nri) goto 10
310 C    Write outer/inner iteration count to pointers
311       outeriter        = nouter          
312       inneriter        = ninner          
313       return
314       end
322 C Gromacs nonbonded kernel pwr6kernel430nf
323 C Coulomb interaction:     Generalized-Born
324 C VdW interaction:         Tabulated
325 C water optimization:      No
326 C Calculate forces:        no
328       subroutine pwr6kernel430nf(
329      &                          nri,
330      &                          iinr,
331      &                          jindex,
332      &                          jjnr,
333      &                          shift,
334      &                          shiftvec,
335      &                          fshift,
336      &                          gid,
337      &                          pos,
338      &                          faction,
339      &                          charge,
340      &                          facel,
341      &                          krf,
342      &                          crf,
343      &                          Vc,
344      &                          type,
345      &                          ntype,
346      &                          vdwparam,
347      &                          Vvdw,
348      &                          tabscale,
349      &                          VFtab,
350      &                          invsqrta,
351      &                          dvda,
352      &                          gbtabscale,
353      &                          GBtab,
354      &                          nthreads,
355      &                          count,
356      &                          mtx,
357      &                          outeriter,
358      &                          inneriter,
359      &                          work)
360       implicit      none
361       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
362       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
363       integer*4     gid(*),type(*),ntype
364       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
365       gmxreal       Vvdw(*),tabscale,VFtab(*)
366       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
367       integer*4     nthreads,count,mtx,outeriter,inneriter
368       gmxreal       work(*)
370       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
371       integer*4     nn0,nn1,nouter,ninner
372       gmxreal       shX,shY,shZ
373       gmxreal       iq
374       gmxreal       qq,vcoul,vctot
375       integer*4     nti
376       integer*4     tj
377       gmxreal       Vvdw6,Vvdwtot
378       gmxreal       Vvdw12
379       gmxreal       r,rt,eps,eps2
380       integer*4     n0,nnn
381       gmxreal       Y,F,Geps,Heps2,Fp,VV
382       gmxreal       isai,isaj,isaprod,gbscale,vgb
383       gmxreal       ix1,iy1,iz1
384       gmxreal       jx1,jy1,jz1
385       gmxreal       dx11,dy11,dz11,rsq11,rinv11
386       gmxreal       c6,c12
389 C    Reset outer and inner iteration counters
390       nouter           = 0               
391       ninner           = 0               
393 C    Loop over thread workunits
394    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
395         if(nn1.gt.nri) nn1=nri
397 C      Start outer loop over neighborlists
398         
399         do n=nn0+1,nn1
401 C        Load shift vector for this list
402           is3              = 3*shift(n)+1    
403           shX              = shiftvec(is3)   
404           shY              = shiftvec(is3+1) 
405           shZ              = shiftvec(is3+2) 
407 C        Load limits for loop over neighbors
408           nj0              = jindex(n)+1     
409           nj1              = jindex(n+1)     
411 C        Get outer coordinate index
412           ii               = iinr(n)+1       
413           ii3              = 3*ii-2          
415 C        Load i atom data, add shift vector
416           ix1              = shX + pos(ii3+0)
417           iy1              = shY + pos(ii3+1)
418           iz1              = shZ + pos(ii3+2)
420 C        Load parameters for i atom
421           iq               = facel*charge(ii)
422           isai             = invsqrta(ii)    
423           nti              = 2*ntype*type(ii)
425 C        Zero the potential energy for this list
426           vctot            = 0               
427           Vvdwtot          = 0               
429 C        Clear i atom forces
430           
431           do k=nj0,nj1
433 C          Get j neighbor index, and coordinate index
434             jnr              = jjnr(k)+1       
435             j3               = 3*jnr-2         
437 C          load j atom coordinates
438             jx1              = pos(j3+0)       
439             jy1              = pos(j3+1)       
440             jz1              = pos(j3+2)       
442 C          Calculate distance
443             dx11             = ix1 - jx1       
444             dy11             = iy1 - jy1       
445             dz11             = iz1 - jz1       
446             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
448 C          Calculate 1/r and 1/r2
450 C          PowerPC intrinsics 1/sqrt lookup table
451 #ifndef GMX_BLUEGENE
452             rinv11           = frsqrtes(rsq11) 
453 #else
454             rinv11           = frsqrte(dble(rsq11)) 
455 #endif
456             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
457      &  *rinv11)))
458 #ifdef GMX_DOUBLE
459             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
460      &  *rinv11)))
461 #endif
463 C          Load parameters for j atom
464             isaj             = invsqrta(jnr)   
465             isaprod          = isai*isaj       
466             qq               = iq*charge(jnr)  
467             vcoul            = qq*rinv11       
468             qq               = isaprod*(-qq)   
469             gbscale          = isaprod*gbtabscale
470             tj               = nti+2*type(jnr)+1
471             c6               = vdwparam(tj)    
472             c12              = vdwparam(tj+1)  
474 C          Tabulated Generalized-Born interaction
475             r                = rsq11*rinv11    
477 C          Calculate table index
478             rt               = r*gbscale       
479             n0               = rt              
480             eps              = rt-n0           
481             eps2             = eps*eps         
482             nnn              = 4*n0+1          
483             Y                = GBtab(nnn)      
484             F                = GBtab(nnn+1)    
485             Geps             = eps*GBtab(nnn+2)
486             Heps2            = eps2*GBtab(nnn+3)
487             Fp               = F+Geps+Heps2    
488             VV               = Y+eps*Fp        
489             vgb              = qq*VV           
490             vctot            = vctot + vcoul   
492 C          Calculate table index
493             r                = rsq11*rinv11    
495 C          Calculate table index
496             rt               = r*tabscale      
497             n0               = rt              
498             eps              = rt-n0           
499             eps2             = eps*eps         
500             nnn              = 8*n0+1          
502 C          Tabulated VdW interaction - dispersion
503             Y                = VFtab(nnn)      
504             F                = VFtab(nnn+1)    
505             Geps             = eps*VFtab(nnn+2)
506             Heps2            = eps2*VFtab(nnn+3)
507             Fp               = F+Geps+Heps2    
508             VV               = Y+eps*Fp        
509             Vvdw6            = c6*VV           
511 C          Tabulated VdW interaction - repulsion
512             nnn              = nnn+4           
513             Y                = VFtab(nnn)      
514             F                = VFtab(nnn+1)    
515             Geps             = eps*VFtab(nnn+2)
516             Heps2            = eps2*VFtab(nnn+3)
517             Fp               = F+Geps+Heps2    
518             VV               = Y+eps*Fp        
519             Vvdw12           = c12*VV          
520             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
522 C          Inner loop uses 50 flops/iteration
523           end do
524           
526 C        Add i forces to mem and shifted force list
528 C        Add potential energies to the group for this list
529           ggid             = gid(n)+1        
530           Vc(ggid)         = Vc(ggid) + vctot
531           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
533 C        Increment number of inner iterations
534           ninner           = ninner + nj1 - nj0
536 C        Outer loop uses 6 flops/iteration
537         end do
538         
540 C      Increment number of outer iterations
541         nouter           = nouter + nn1 - nn0
542       if(nn1.lt.nri) goto 10
544 C    Write outer/inner iteration count to pointers
545       outeriter        = nouter          
546       inneriter        = ninner          
547       return
548       end