Fixed synchronization counter update for pwr6 kernels
[gromacs/adressmacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel234.F
bloba2669f43c58ab44a6d044a635cf75415d7b01c31
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel234
45 C Coulomb interaction:     Reaction field
46 C VdW interaction:         Tabulated
47 C water optimization:      pairs of TIP4P interactions
48 C Calculate forces:        yes
50       subroutine pwr6kernel234(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       qq,vcoul,vctot
98       integer*4     tj
99       gmxreal       Vvdw6,Vvdwtot
100       gmxreal       Vvdw12
101       gmxreal       r,rt,eps,eps2
102       integer*4     n0,nnn
103       gmxreal       Y,F,Geps,Heps2,Fp,VV
104       gmxreal       FF
105       gmxreal       fijD,fijR
106       gmxreal       krsq
107       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
108       gmxreal       ix2,iy2,iz2,fix2,fiy2,fiz2
109       gmxreal       ix3,iy3,iz3,fix3,fiy3,fiz3
110       gmxreal       ix4,iy4,iz4,fix4,fiy4,fiz4
111       gmxreal       jx1,jy1,jz1
112       gmxreal       jx2,jy2,jz2,fjx2,fjy2,fjz2
113       gmxreal       jx3,jy3,jz3,fjx3,fjy3,fjz3
114       gmxreal       jx4,jy4,jz4,fjx4,fjy4,fjz4
115       gmxreal       dx11,dy11,dz11,rsq11,rinv11
116       gmxreal       dx22,dy22,dz22,rsq22,rinv22
117       gmxreal       dx23,dy23,dz23,rsq23,rinv23
118       gmxreal       dx24,dy24,dz24,rsq24,rinv24
119       gmxreal       dx32,dy32,dz32,rsq32,rinv32
120       gmxreal       dx33,dy33,dz33,rsq33,rinv33
121       gmxreal       dx34,dy34,dz34,rsq34,rinv34
122       gmxreal       dx42,dy42,dz42,rsq42,rinv42
123       gmxreal       dx43,dy43,dz43,rsq43,rinv43
124       gmxreal       dx44,dy44,dz44,rsq44,rinv44
125       gmxreal       qH,qM,qqMM,qqMH,qqHH
126       gmxreal       c6,c12
129 C    Initialize water data
130       ii               = iinr(1)+1       
131       qH               = charge(ii+1)    
132       qM               = charge(ii+3)    
133       qqMM             = facel*qM*qM     
134       qqMH             = facel*qM*qH     
135       qqHH             = facel*qH*qH     
136       tj               = 2*(ntype+1)*type(ii)+1
137       c6               = vdwparam(tj)    
138       c12              = vdwparam(tj+1)  
141 C    Reset outer and inner iteration counters
142       nouter           = 0               
143       ninner           = 0               
145 C    Loop over thread workunits
146    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
147         if(nn1.gt.nri) nn1=nri
149 C      Start outer loop over neighborlists
150         
151         do n=nn0+1,nn1
153 C        Load shift vector for this list
154           is3              = 3*shift(n)+1    
155           shX              = shiftvec(is3)   
156           shY              = shiftvec(is3+1) 
157           shZ              = shiftvec(is3+2) 
159 C        Load limits for loop over neighbors
160           nj0              = jindex(n)+1     
161           nj1              = jindex(n+1)     
163 C        Get outer coordinate index
164           ii               = iinr(n)+1       
165           ii3              = 3*ii-2          
167 C        Load i atom data, add shift vector
168           ix1              = shX + pos(ii3+0)
169           iy1              = shY + pos(ii3+1)
170           iz1              = shZ + pos(ii3+2)
171           ix2              = shX + pos(ii3+3)
172           iy2              = shY + pos(ii3+4)
173           iz2              = shZ + pos(ii3+5)
174           ix3              = shX + pos(ii3+6)
175           iy3              = shY + pos(ii3+7)
176           iz3              = shZ + pos(ii3+8)
177           ix4              = shX + pos(ii3+9)
178           iy4              = shY + pos(ii3+10)
179           iz4              = shZ + pos(ii3+11)
181 C        Zero the potential energy for this list
182           vctot            = 0               
183           Vvdwtot          = 0               
185 C        Clear i atom forces
186           fix1             = 0               
187           fiy1             = 0               
188           fiz1             = 0               
189           fix2             = 0               
190           fiy2             = 0               
191           fiz2             = 0               
192           fix3             = 0               
193           fiy3             = 0               
194           fiz3             = 0               
195           fix4             = 0               
196           fiy4             = 0               
197           fiz4             = 0               
198           
199           do k=nj0,nj1
201 C          Get j neighbor index, and coordinate index
202             jnr              = jjnr(k)+1       
203             j3               = 3*jnr-2         
205 C          load j atom coordinates
206             jx1              = pos(j3+0)       
207             jy1              = pos(j3+1)       
208             jz1              = pos(j3+2)       
209             jx2              = pos(j3+3)       
210             jy2              = pos(j3+4)       
211             jz2              = pos(j3+5)       
212             jx3              = pos(j3+6)       
213             jy3              = pos(j3+7)       
214             jz3              = pos(j3+8)       
215             jx4              = pos(j3+9)       
216             jy4              = pos(j3+10)      
217             jz4              = pos(j3+11)      
219 C          Calculate distance
220             dx11             = ix1 - jx1       
221             dy11             = iy1 - jy1       
222             dz11             = iz1 - jz1       
223             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
224             dx22             = ix2 - jx2       
225             dy22             = iy2 - jy2       
226             dz22             = iz2 - jz2       
227             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
228             dx23             = ix2 - jx3       
229             dy23             = iy2 - jy3       
230             dz23             = iz2 - jz3       
231             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
232             dx24             = ix2 - jx4       
233             dy24             = iy2 - jy4       
234             dz24             = iz2 - jz4       
235             rsq24            = dx24*dx24+dy24*dy24+dz24*dz24
236             dx32             = ix3 - jx2       
237             dy32             = iy3 - jy2       
238             dz32             = iz3 - jz2       
239             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
240             dx33             = ix3 - jx3       
241             dy33             = iy3 - jy3       
242             dz33             = iz3 - jz3       
243             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
244             dx34             = ix3 - jx4       
245             dy34             = iy3 - jy4       
246             dz34             = iz3 - jz4       
247             rsq34            = dx34*dx34+dy34*dy34+dz34*dz34
248             dx42             = ix4 - jx2       
249             dy42             = iy4 - jy2       
250             dz42             = iz4 - jz2       
251             rsq42            = dx42*dx42+dy42*dy42+dz42*dz42
252             dx43             = ix4 - jx3       
253             dy43             = iy4 - jy3       
254             dz43             = iz4 - jz3       
255             rsq43            = dx43*dx43+dy43*dy43+dz43*dz43
256             dx44             = ix4 - jx4       
257             dy44             = iy4 - jy4       
258             dz44             = iz4 - jz4       
259             rsq44            = dx44*dx44+dy44*dy44+dz44*dz44
261 C          Calculate 1/r and 1/r2
263 C          PowerPC intrinsics 1/sqrt lookup table
264 #ifndef GMX_BLUEGENE
265             rinv11           = frsqrtes(rsq11) 
266 #else
267             rinv11           = frsqrte(dble(rsq11)) 
268 #endif
269             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
270      &  *rinv11)))
271 #ifdef GMX_DOUBLE
272             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
273      &  *rinv11)))
274 #endif
276 C          PowerPC intrinsics 1/sqrt lookup table
277 #ifndef GMX_BLUEGENE
278             rinv22           = frsqrtes(rsq22) 
279 #else
280             rinv22           = frsqrte(dble(rsq22)) 
281 #endif
282             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
283      &  *rinv22)))
284 #ifdef GMX_DOUBLE
285             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
286      &  *rinv22)))
287 #endif
289 C          PowerPC intrinsics 1/sqrt lookup table
290 #ifndef GMX_BLUEGENE
291             rinv23           = frsqrtes(rsq23) 
292 #else
293             rinv23           = frsqrte(dble(rsq23)) 
294 #endif
295             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
296      &  *rinv23)))
297 #ifdef GMX_DOUBLE
298             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
299      &  *rinv23)))
300 #endif
302 C          PowerPC intrinsics 1/sqrt lookup table
303 #ifndef GMX_BLUEGENE
304             rinv24           = frsqrtes(rsq24) 
305 #else
306             rinv24           = frsqrte(dble(rsq24)) 
307 #endif
308             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
309      &  *rinv24)))
310 #ifdef GMX_DOUBLE
311             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
312      &  *rinv24)))
313 #endif
315 C          PowerPC intrinsics 1/sqrt lookup table
316 #ifndef GMX_BLUEGENE
317             rinv32           = frsqrtes(rsq32) 
318 #else
319             rinv32           = frsqrte(dble(rsq32)) 
320 #endif
321             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
322      &  *rinv32)))
323 #ifdef GMX_DOUBLE
324             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
325      &  *rinv32)))
326 #endif
328 C          PowerPC intrinsics 1/sqrt lookup table
329 #ifndef GMX_BLUEGENE
330             rinv33           = frsqrtes(rsq33) 
331 #else
332             rinv33           = frsqrte(dble(rsq33)) 
333 #endif
334             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
335      &  *rinv33)))
336 #ifdef GMX_DOUBLE
337             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
338      &  *rinv33)))
339 #endif
341 C          PowerPC intrinsics 1/sqrt lookup table
342 #ifndef GMX_BLUEGENE
343             rinv34           = frsqrtes(rsq34) 
344 #else
345             rinv34           = frsqrte(dble(rsq34)) 
346 #endif
347             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
348      &  *rinv34)))
349 #ifdef GMX_DOUBLE
350             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
351      &  *rinv34)))
352 #endif
354 C          PowerPC intrinsics 1/sqrt lookup table
355 #ifndef GMX_BLUEGENE
356             rinv42           = frsqrtes(rsq42) 
357 #else
358             rinv42           = frsqrte(dble(rsq42)) 
359 #endif
360             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
361      &  *rinv42)))
362 #ifdef GMX_DOUBLE
363             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
364      &  *rinv42)))
365 #endif
367 C          PowerPC intrinsics 1/sqrt lookup table
368 #ifndef GMX_BLUEGENE
369             rinv43           = frsqrtes(rsq43) 
370 #else
371             rinv43           = frsqrte(dble(rsq43)) 
372 #endif
373             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
374      &  *rinv43)))
375 #ifdef GMX_DOUBLE
376             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
377      &  *rinv43)))
378 #endif
380 C          PowerPC intrinsics 1/sqrt lookup table
381 #ifndef GMX_BLUEGENE
382             rinv44           = frsqrtes(rsq44) 
383 #else
384             rinv44           = frsqrte(dble(rsq44)) 
385 #endif
386             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
387      &  *rinv44)))
388 #ifdef GMX_DOUBLE
389             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
390      &  *rinv44)))
391 #endif
393 C          Load parameters for j atom
395 C          Calculate table index
396             r                = rsq11*rinv11    
398 C          Calculate table index
399             rt               = r*tabscale      
400             n0               = rt              
401             eps              = rt-n0           
402             eps2             = eps*eps         
403             nnn              = 8*n0+1          
405 C          Tabulated VdW interaction - dispersion
406             Y                = VFtab(nnn)      
407             F                = VFtab(nnn+1)    
408             Geps             = eps*VFtab(nnn+2)
409             Heps2            = eps2*VFtab(nnn+3)
410             Fp               = F+Geps+Heps2    
411             VV               = Y+eps*Fp        
412             FF               = Fp+Geps+2.0*Heps2
413             Vvdw6            = c6*VV           
414             fijD             = c6*FF           
416 C          Tabulated VdW interaction - repulsion
417             nnn              = nnn+4           
418             Y                = VFtab(nnn)      
419             F                = VFtab(nnn+1)    
420             Geps             = eps*VFtab(nnn+2)
421             Heps2            = eps2*VFtab(nnn+3)
422             Fp               = F+Geps+Heps2    
423             VV               = Y+eps*Fp        
424             FF               = Fp+Geps+2.0*Heps2
425             Vvdw12           = c12*VV          
426             fijR             = c12*FF          
427             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
428             fscal            = -((fijD+fijR)*tabscale)*rinv11
430 C          Calculate temporary vectorial force
431             tx               = fscal*dx11      
432             ty               = fscal*dy11      
433             tz               = fscal*dz11      
435 C          Increment i atom force
436             fix1             = fix1 + tx       
437             fiy1             = fiy1 + ty       
438             fiz1             = fiz1 + tz       
440 C          Decrement j atom force
441             faction(j3+0)    = faction(j3+0) - tx
442             faction(j3+1)    = faction(j3+1) - ty
443             faction(j3+2)    = faction(j3+2) - tz
445 C          Load parameters for j atom
446             qq               = qqHH            
447             rinvsq           = rinv22*rinv22   
449 C          Coulomb reaction-field interaction
450             krsq             = krf*rsq22       
451             vcoul            = qq*(rinv22+krsq-crf)
452             vctot            = vctot+vcoul     
453             fscal            = (qq*(rinv22-2.0*krsq))*rinvsq
455 C          Calculate temporary vectorial force
456             tx               = fscal*dx22      
457             ty               = fscal*dy22      
458             tz               = fscal*dz22      
460 C          Increment i atom force
461             fix2             = fix2 + tx       
462             fiy2             = fiy2 + ty       
463             fiz2             = fiz2 + tz       
465 C          Decrement j atom force
466             fjx2             = faction(j3+3) - tx
467             fjy2             = faction(j3+4) - ty
468             fjz2             = faction(j3+5) - tz
470 C          Load parameters for j atom
471             qq               = qqHH            
472             rinvsq           = rinv23*rinv23   
474 C          Coulomb reaction-field interaction
475             krsq             = krf*rsq23       
476             vcoul            = qq*(rinv23+krsq-crf)
477             vctot            = vctot+vcoul     
478             fscal            = (qq*(rinv23-2.0*krsq))*rinvsq
480 C          Calculate temporary vectorial force
481             tx               = fscal*dx23      
482             ty               = fscal*dy23      
483             tz               = fscal*dz23      
485 C          Increment i atom force
486             fix2             = fix2 + tx       
487             fiy2             = fiy2 + ty       
488             fiz2             = fiz2 + tz       
490 C          Decrement j atom force
491             fjx3             = faction(j3+6) - tx
492             fjy3             = faction(j3+7) - ty
493             fjz3             = faction(j3+8) - tz
495 C          Load parameters for j atom
496             qq               = qqMH            
497             rinvsq           = rinv24*rinv24   
499 C          Coulomb reaction-field interaction
500             krsq             = krf*rsq24       
501             vcoul            = qq*(rinv24+krsq-crf)
502             vctot            = vctot+vcoul     
503             fscal            = (qq*(rinv24-2.0*krsq))*rinvsq
505 C          Calculate temporary vectorial force
506             tx               = fscal*dx24      
507             ty               = fscal*dy24      
508             tz               = fscal*dz24      
510 C          Increment i atom force
511             fix2             = fix2 + tx       
512             fiy2             = fiy2 + ty       
513             fiz2             = fiz2 + tz       
515 C          Decrement j atom force
516             fjx4             = faction(j3+9) - tx
517             fjy4             = faction(j3+10) - ty
518             fjz4             = faction(j3+11) - tz
520 C          Load parameters for j atom
521             qq               = qqHH            
522             rinvsq           = rinv32*rinv32   
524 C          Coulomb reaction-field interaction
525             krsq             = krf*rsq32       
526             vcoul            = qq*(rinv32+krsq-crf)
527             vctot            = vctot+vcoul     
528             fscal            = (qq*(rinv32-2.0*krsq))*rinvsq
530 C          Calculate temporary vectorial force
531             tx               = fscal*dx32      
532             ty               = fscal*dy32      
533             tz               = fscal*dz32      
535 C          Increment i atom force
536             fix3             = fix3 + tx       
537             fiy3             = fiy3 + ty       
538             fiz3             = fiz3 + tz       
540 C          Decrement j atom force
541             fjx2             = fjx2 - tx       
542             fjy2             = fjy2 - ty       
543             fjz2             = fjz2 - tz       
545 C          Load parameters for j atom
546             qq               = qqHH            
547             rinvsq           = rinv33*rinv33   
549 C          Coulomb reaction-field interaction
550             krsq             = krf*rsq33       
551             vcoul            = qq*(rinv33+krsq-crf)
552             vctot            = vctot+vcoul     
553             fscal            = (qq*(rinv33-2.0*krsq))*rinvsq
555 C          Calculate temporary vectorial force
556             tx               = fscal*dx33      
557             ty               = fscal*dy33      
558             tz               = fscal*dz33      
560 C          Increment i atom force
561             fix3             = fix3 + tx       
562             fiy3             = fiy3 + ty       
563             fiz3             = fiz3 + tz       
565 C          Decrement j atom force
566             fjx3             = fjx3 - tx       
567             fjy3             = fjy3 - ty       
568             fjz3             = fjz3 - tz       
570 C          Load parameters for j atom
571             qq               = qqMH            
572             rinvsq           = rinv34*rinv34   
574 C          Coulomb reaction-field interaction
575             krsq             = krf*rsq34       
576             vcoul            = qq*(rinv34+krsq-crf)
577             vctot            = vctot+vcoul     
578             fscal            = (qq*(rinv34-2.0*krsq))*rinvsq
580 C          Calculate temporary vectorial force
581             tx               = fscal*dx34      
582             ty               = fscal*dy34      
583             tz               = fscal*dz34      
585 C          Increment i atom force
586             fix3             = fix3 + tx       
587             fiy3             = fiy3 + ty       
588             fiz3             = fiz3 + tz       
590 C          Decrement j atom force
591             fjx4             = fjx4 - tx       
592             fjy4             = fjy4 - ty       
593             fjz4             = fjz4 - tz       
595 C          Load parameters for j atom
596             qq               = qqMH            
597             rinvsq           = rinv42*rinv42   
599 C          Coulomb reaction-field interaction
600             krsq             = krf*rsq42       
601             vcoul            = qq*(rinv42+krsq-crf)
602             vctot            = vctot+vcoul     
603             fscal            = (qq*(rinv42-2.0*krsq))*rinvsq
605 C          Calculate temporary vectorial force
606             tx               = fscal*dx42      
607             ty               = fscal*dy42      
608             tz               = fscal*dz42      
610 C          Increment i atom force
611             fix4             = fix4 + tx       
612             fiy4             = fiy4 + ty       
613             fiz4             = fiz4 + tz       
615 C          Decrement j atom force
616             faction(j3+3)    = fjx2 - tx       
617             faction(j3+4)    = fjy2 - ty       
618             faction(j3+5)    = fjz2 - tz       
620 C          Load parameters for j atom
621             qq               = qqMH            
622             rinvsq           = rinv43*rinv43   
624 C          Coulomb reaction-field interaction
625             krsq             = krf*rsq43       
626             vcoul            = qq*(rinv43+krsq-crf)
627             vctot            = vctot+vcoul     
628             fscal            = (qq*(rinv43-2.0*krsq))*rinvsq
630 C          Calculate temporary vectorial force
631             tx               = fscal*dx43      
632             ty               = fscal*dy43      
633             tz               = fscal*dz43      
635 C          Increment i atom force
636             fix4             = fix4 + tx       
637             fiy4             = fiy4 + ty       
638             fiz4             = fiz4 + tz       
640 C          Decrement j atom force
641             faction(j3+6)    = fjx3 - tx       
642             faction(j3+7)    = fjy3 - ty       
643             faction(j3+8)    = fjz3 - tz       
645 C          Load parameters for j atom
646             qq               = qqMM            
647             rinvsq           = rinv44*rinv44   
649 C          Coulomb reaction-field interaction
650             krsq             = krf*rsq44       
651             vcoul            = qq*(rinv44+krsq-crf)
652             vctot            = vctot+vcoul     
653             fscal            = (qq*(rinv44-2.0*krsq))*rinvsq
655 C          Calculate temporary vectorial force
656             tx               = fscal*dx44      
657             ty               = fscal*dy44      
658             tz               = fscal*dz44      
660 C          Increment i atom force
661             fix4             = fix4 + tx       
662             fiy4             = fiy4 + ty       
663             fiz4             = fiz4 + tz       
665 C          Decrement j atom force
666             faction(j3+9)    = fjx4 - tx       
667             faction(j3+10)   = fjy4 - ty       
668             faction(j3+11)   = fjz4 - tz       
670 C          Inner loop uses 352 flops/iteration
671           end do
672           
674 C        Add i forces to mem and shifted force list
675           faction(ii3+0)   = faction(ii3+0) + fix1
676           faction(ii3+1)   = faction(ii3+1) + fiy1
677           faction(ii3+2)   = faction(ii3+2) + fiz1
678           faction(ii3+3)   = faction(ii3+3) + fix2
679           faction(ii3+4)   = faction(ii3+4) + fiy2
680           faction(ii3+5)   = faction(ii3+5) + fiz2
681           faction(ii3+6)   = faction(ii3+6) + fix3
682           faction(ii3+7)   = faction(ii3+7) + fiy3
683           faction(ii3+8)   = faction(ii3+8) + fiz3
684           faction(ii3+9)   = faction(ii3+9) + fix4
685           faction(ii3+10)  = faction(ii3+10) + fiy4
686           faction(ii3+11)  = faction(ii3+11) + fiz4
687           fshift(is3)      = fshift(is3)+fix1+fix2+fix3+fix4
688           fshift(is3+1)    = fshift(is3+1)+fiy1+fiy2+fiy3+fiy4
689           fshift(is3+2)    = fshift(is3+2)+fiz1+fiz2+fiz3+fiz4
691 C        Add potential energies to the group for this list
692           ggid             = gid(n)+1        
693           Vc(ggid)         = Vc(ggid) + vctot
694           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
696 C        Increment number of inner iterations
697           ninner           = ninner + nj1 - nj0
699 C        Outer loop uses 38 flops/iteration
700         end do
701         
703 C      Increment number of outer iterations
704         nouter           = nouter + nn1 - nn0
705       if(nn1.lt.nri) goto 10
707 C    Write outer/inner iteration count to pointers
708       outeriter        = nouter          
709       inneriter        = ninner          
710       return
711       end
719 C Gromacs nonbonded kernel pwr6kernel234nf
720 C Coulomb interaction:     Reaction field
721 C VdW interaction:         Tabulated
722 C water optimization:      pairs of TIP4P interactions
723 C Calculate forces:        no
725       subroutine pwr6kernel234nf(
726      &                          nri,
727      &                          iinr,
728      &                          jindex,
729      &                          jjnr,
730      &                          shift,
731      &                          shiftvec,
732      &                          fshift,
733      &                          gid,
734      &                          pos,
735      &                          faction,
736      &                          charge,
737      &                          facel,
738      &                          krf,
739      &                          crf,
740      &                          Vc,
741      &                          type,
742      &                          ntype,
743      &                          vdwparam,
744      &                          Vvdw,
745      &                          tabscale,
746      &                          VFtab,
747      &                          invsqrta,
748      &                          dvda,
749      &                          gbtabscale,
750      &                          GBtab,
751      &                          nthreads,
752      &                          count,
753      &                          mtx,
754      &                          outeriter,
755      &                          inneriter,
756      &                          work)
757       implicit      none
758       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
759       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
760       integer*4     gid(*),type(*),ntype
761       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
762       gmxreal       Vvdw(*),tabscale,VFtab(*)
763       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
764       integer*4     nthreads,count,mtx,outeriter,inneriter
765       gmxreal       work(*)
767       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
768       integer*4     nn0,nn1,nouter,ninner
769       gmxreal       shX,shY,shZ
770       gmxreal       qq,vcoul,vctot
771       integer*4     tj
772       gmxreal       Vvdw6,Vvdwtot
773       gmxreal       Vvdw12
774       gmxreal       r,rt,eps,eps2
775       integer*4     n0,nnn
776       gmxreal       Y,F,Geps,Heps2,Fp,VV
777       gmxreal       krsq
778       gmxreal       ix1,iy1,iz1
779       gmxreal       ix2,iy2,iz2
780       gmxreal       ix3,iy3,iz3
781       gmxreal       ix4,iy4,iz4
782       gmxreal       jx1,jy1,jz1
783       gmxreal       jx2,jy2,jz2
784       gmxreal       jx3,jy3,jz3
785       gmxreal       jx4,jy4,jz4
786       gmxreal       dx11,dy11,dz11,rsq11,rinv11
787       gmxreal       dx22,dy22,dz22,rsq22,rinv22
788       gmxreal       dx23,dy23,dz23,rsq23,rinv23
789       gmxreal       dx24,dy24,dz24,rsq24,rinv24
790       gmxreal       dx32,dy32,dz32,rsq32,rinv32
791       gmxreal       dx33,dy33,dz33,rsq33,rinv33
792       gmxreal       dx34,dy34,dz34,rsq34,rinv34
793       gmxreal       dx42,dy42,dz42,rsq42,rinv42
794       gmxreal       dx43,dy43,dz43,rsq43,rinv43
795       gmxreal       dx44,dy44,dz44,rsq44,rinv44
796       gmxreal       qH,qM,qqMM,qqMH,qqHH
797       gmxreal       c6,c12
800 C    Initialize water data
801       ii               = iinr(1)+1       
802       qH               = charge(ii+1)    
803       qM               = charge(ii+3)    
804       qqMM             = facel*qM*qM     
805       qqMH             = facel*qM*qH     
806       qqHH             = facel*qH*qH     
807       tj               = 2*(ntype+1)*type(ii)+1
808       c6               = vdwparam(tj)    
809       c12              = vdwparam(tj+1)  
812 C    Reset outer and inner iteration counters
813       nouter           = 0               
814       ninner           = 0               
816 C    Loop over thread workunits
817    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
818         if(nn1.gt.nri) nn1=nri
820 C      Start outer loop over neighborlists
821         
822         do n=nn0+1,nn1
824 C        Load shift vector for this list
825           is3              = 3*shift(n)+1    
826           shX              = shiftvec(is3)   
827           shY              = shiftvec(is3+1) 
828           shZ              = shiftvec(is3+2) 
830 C        Load limits for loop over neighbors
831           nj0              = jindex(n)+1     
832           nj1              = jindex(n+1)     
834 C        Get outer coordinate index
835           ii               = iinr(n)+1       
836           ii3              = 3*ii-2          
838 C        Load i atom data, add shift vector
839           ix1              = shX + pos(ii3+0)
840           iy1              = shY + pos(ii3+1)
841           iz1              = shZ + pos(ii3+2)
842           ix2              = shX + pos(ii3+3)
843           iy2              = shY + pos(ii3+4)
844           iz2              = shZ + pos(ii3+5)
845           ix3              = shX + pos(ii3+6)
846           iy3              = shY + pos(ii3+7)
847           iz3              = shZ + pos(ii3+8)
848           ix4              = shX + pos(ii3+9)
849           iy4              = shY + pos(ii3+10)
850           iz4              = shZ + pos(ii3+11)
852 C        Zero the potential energy for this list
853           vctot            = 0               
854           Vvdwtot          = 0               
856 C        Clear i atom forces
857           
858           do k=nj0,nj1
860 C          Get j neighbor index, and coordinate index
861             jnr              = jjnr(k)+1       
862             j3               = 3*jnr-2         
864 C          load j atom coordinates
865             jx1              = pos(j3+0)       
866             jy1              = pos(j3+1)       
867             jz1              = pos(j3+2)       
868             jx2              = pos(j3+3)       
869             jy2              = pos(j3+4)       
870             jz2              = pos(j3+5)       
871             jx3              = pos(j3+6)       
872             jy3              = pos(j3+7)       
873             jz3              = pos(j3+8)       
874             jx4              = pos(j3+9)       
875             jy4              = pos(j3+10)      
876             jz4              = pos(j3+11)      
878 C          Calculate distance
879             dx11             = ix1 - jx1       
880             dy11             = iy1 - jy1       
881             dz11             = iz1 - jz1       
882             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
883             dx22             = ix2 - jx2       
884             dy22             = iy2 - jy2       
885             dz22             = iz2 - jz2       
886             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
887             dx23             = ix2 - jx3       
888             dy23             = iy2 - jy3       
889             dz23             = iz2 - jz3       
890             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
891             dx24             = ix2 - jx4       
892             dy24             = iy2 - jy4       
893             dz24             = iz2 - jz4       
894             rsq24            = dx24*dx24+dy24*dy24+dz24*dz24
895             dx32             = ix3 - jx2       
896             dy32             = iy3 - jy2       
897             dz32             = iz3 - jz2       
898             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
899             dx33             = ix3 - jx3       
900             dy33             = iy3 - jy3       
901             dz33             = iz3 - jz3       
902             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
903             dx34             = ix3 - jx4       
904             dy34             = iy3 - jy4       
905             dz34             = iz3 - jz4       
906             rsq34            = dx34*dx34+dy34*dy34+dz34*dz34
907             dx42             = ix4 - jx2       
908             dy42             = iy4 - jy2       
909             dz42             = iz4 - jz2       
910             rsq42            = dx42*dx42+dy42*dy42+dz42*dz42
911             dx43             = ix4 - jx3       
912             dy43             = iy4 - jy3       
913             dz43             = iz4 - jz3       
914             rsq43            = dx43*dx43+dy43*dy43+dz43*dz43
915             dx44             = ix4 - jx4       
916             dy44             = iy4 - jy4       
917             dz44             = iz4 - jz4       
918             rsq44            = dx44*dx44+dy44*dy44+dz44*dz44
920 C          Calculate 1/r and 1/r2
922 C          PowerPC intrinsics 1/sqrt lookup table
923 #ifndef GMX_BLUEGENE
924             rinv11           = frsqrtes(rsq11) 
925 #else
926             rinv11           = frsqrte(dble(rsq11)) 
927 #endif
928             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
929      &  *rinv11)))
930 #ifdef GMX_DOUBLE
931             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
932      &  *rinv11)))
933 #endif
935 C          PowerPC intrinsics 1/sqrt lookup table
936 #ifndef GMX_BLUEGENE
937             rinv22           = frsqrtes(rsq22) 
938 #else
939             rinv22           = frsqrte(dble(rsq22)) 
940 #endif
941             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
942      &  *rinv22)))
943 #ifdef GMX_DOUBLE
944             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
945      &  *rinv22)))
946 #endif
948 C          PowerPC intrinsics 1/sqrt lookup table
949 #ifndef GMX_BLUEGENE
950             rinv23           = frsqrtes(rsq23) 
951 #else
952             rinv23           = frsqrte(dble(rsq23)) 
953 #endif
954             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
955      &  *rinv23)))
956 #ifdef GMX_DOUBLE
957             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
958      &  *rinv23)))
959 #endif
961 C          PowerPC intrinsics 1/sqrt lookup table
962 #ifndef GMX_BLUEGENE
963             rinv24           = frsqrtes(rsq24) 
964 #else
965             rinv24           = frsqrte(dble(rsq24)) 
966 #endif
967             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
968      &  *rinv24)))
969 #ifdef GMX_DOUBLE
970             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
971      &  *rinv24)))
972 #endif
974 C          PowerPC intrinsics 1/sqrt lookup table
975 #ifndef GMX_BLUEGENE
976             rinv32           = frsqrtes(rsq32) 
977 #else
978             rinv32           = frsqrte(dble(rsq32)) 
979 #endif
980             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
981      &  *rinv32)))
982 #ifdef GMX_DOUBLE
983             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
984      &  *rinv32)))
985 #endif
987 C          PowerPC intrinsics 1/sqrt lookup table
988 #ifndef GMX_BLUEGENE
989             rinv33           = frsqrtes(rsq33) 
990 #else
991             rinv33           = frsqrte(dble(rsq33)) 
992 #endif
993             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
994      &  *rinv33)))
995 #ifdef GMX_DOUBLE
996             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
997      &  *rinv33)))
998 #endif
1000 C          PowerPC intrinsics 1/sqrt lookup table
1001 #ifndef GMX_BLUEGENE
1002             rinv34           = frsqrtes(rsq34) 
1003 #else
1004             rinv34           = frsqrte(dble(rsq34)) 
1005 #endif
1006             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
1007      &  *rinv34)))
1008 #ifdef GMX_DOUBLE
1009             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
1010      &  *rinv34)))
1011 #endif
1013 C          PowerPC intrinsics 1/sqrt lookup table
1014 #ifndef GMX_BLUEGENE
1015             rinv42           = frsqrtes(rsq42) 
1016 #else
1017             rinv42           = frsqrte(dble(rsq42)) 
1018 #endif
1019             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
1020      &  *rinv42)))
1021 #ifdef GMX_DOUBLE
1022             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
1023      &  *rinv42)))
1024 #endif
1026 C          PowerPC intrinsics 1/sqrt lookup table
1027 #ifndef GMX_BLUEGENE
1028             rinv43           = frsqrtes(rsq43) 
1029 #else
1030             rinv43           = frsqrte(dble(rsq43)) 
1031 #endif
1032             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
1033      &  *rinv43)))
1034 #ifdef GMX_DOUBLE
1035             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
1036      &  *rinv43)))
1037 #endif
1039 C          PowerPC intrinsics 1/sqrt lookup table
1040 #ifndef GMX_BLUEGENE
1041             rinv44           = frsqrtes(rsq44) 
1042 #else
1043             rinv44           = frsqrte(dble(rsq44)) 
1044 #endif
1045             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
1046      &  *rinv44)))
1047 #ifdef GMX_DOUBLE
1048             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
1049      &  *rinv44)))
1050 #endif
1052 C          Load parameters for j atom
1054 C          Calculate table index
1055             r                = rsq11*rinv11    
1057 C          Calculate table index
1058             rt               = r*tabscale      
1059             n0               = rt              
1060             eps              = rt-n0           
1061             eps2             = eps*eps         
1062             nnn              = 8*n0+1          
1064 C          Tabulated VdW interaction - dispersion
1065             Y                = VFtab(nnn)      
1066             F                = VFtab(nnn+1)    
1067             Geps             = eps*VFtab(nnn+2)
1068             Heps2            = eps2*VFtab(nnn+3)
1069             Fp               = F+Geps+Heps2    
1070             VV               = Y+eps*Fp        
1071             Vvdw6            = c6*VV           
1073 C          Tabulated VdW interaction - repulsion
1074             nnn              = nnn+4           
1075             Y                = VFtab(nnn)      
1076             F                = VFtab(nnn+1)    
1077             Geps             = eps*VFtab(nnn+2)
1078             Heps2            = eps2*VFtab(nnn+3)
1079             Fp               = F+Geps+Heps2    
1080             VV               = Y+eps*Fp        
1081             Vvdw12           = c12*VV          
1082             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
1084 C          Load parameters for j atom
1085             qq               = qqHH            
1087 C          Coulomb reaction-field interaction
1088             krsq             = krf*rsq22       
1089             vcoul            = qq*(rinv22+krsq-crf)
1090             vctot            = vctot+vcoul     
1092 C          Load parameters for j atom
1093             qq               = qqHH            
1095 C          Coulomb reaction-field interaction
1096             krsq             = krf*rsq23       
1097             vcoul            = qq*(rinv23+krsq-crf)
1098             vctot            = vctot+vcoul     
1100 C          Load parameters for j atom
1101             qq               = qqMH            
1103 C          Coulomb reaction-field interaction
1104             krsq             = krf*rsq24       
1105             vcoul            = qq*(rinv24+krsq-crf)
1106             vctot            = vctot+vcoul     
1108 C          Load parameters for j atom
1109             qq               = qqHH            
1111 C          Coulomb reaction-field interaction
1112             krsq             = krf*rsq32       
1113             vcoul            = qq*(rinv32+krsq-crf)
1114             vctot            = vctot+vcoul     
1116 C          Load parameters for j atom
1117             qq               = qqHH            
1119 C          Coulomb reaction-field interaction
1120             krsq             = krf*rsq33       
1121             vcoul            = qq*(rinv33+krsq-crf)
1122             vctot            = vctot+vcoul     
1124 C          Load parameters for j atom
1125             qq               = qqMH            
1127 C          Coulomb reaction-field interaction
1128             krsq             = krf*rsq34       
1129             vcoul            = qq*(rinv34+krsq-crf)
1130             vctot            = vctot+vcoul     
1132 C          Load parameters for j atom
1133             qq               = qqMH            
1135 C          Coulomb reaction-field interaction
1136             krsq             = krf*rsq42       
1137             vcoul            = qq*(rinv42+krsq-crf)
1138             vctot            = vctot+vcoul     
1140 C          Load parameters for j atom
1141             qq               = qqMH            
1143 C          Coulomb reaction-field interaction
1144             krsq             = krf*rsq43       
1145             vcoul            = qq*(rinv43+krsq-crf)
1146             vctot            = vctot+vcoul     
1148 C          Load parameters for j atom
1149             qq               = qqMM            
1151 C          Coulomb reaction-field interaction
1152             krsq             = krf*rsq44       
1153             vcoul            = qq*(rinv44+krsq-crf)
1154             vctot            = vctot+vcoul     
1156 C          Inner loop uses 205 flops/iteration
1157           end do
1158           
1160 C        Add i forces to mem and shifted force list
1162 C        Add potential energies to the group for this list
1163           ggid             = gid(n)+1        
1164           Vc(ggid)         = Vc(ggid) + vctot
1165           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
1167 C        Increment number of inner iterations
1168           ninner           = ninner + nj1 - nj0
1170 C        Outer loop uses 14 flops/iteration
1171         end do
1172         
1174 C      Increment number of outer iterations
1175         nouter           = nouter + nn1 - nn0
1176       if(nn1.lt.nri) goto 10
1178 C    Write outer/inner iteration count to pointers
1179       outeriter        = nouter          
1180       inneriter        = ninner          
1181       return
1182       end