Fixed synchronization counter update for pwr6 kernels
[gromacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel224.F
blob88c7170bd61746e414f4958c0da95a8801cc8da5
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel224
45 C Coulomb interaction:     Reaction field
46 C VdW interaction:         Buckingham
47 C water optimization:      pairs of TIP4P interactions
48 C Calculate forces:        yes
50       subroutine pwr6kernel224(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       qq,vcoul,vctot
98       integer*4     tj
99       gmxreal       rinvsix
100       gmxreal       Vvdw6,Vvdwtot
101       gmxreal       krsq
102       gmxreal       Vvdwexp,br
103       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
104       gmxreal       ix2,iy2,iz2,fix2,fiy2,fiz2
105       gmxreal       ix3,iy3,iz3,fix3,fiy3,fiz3
106       gmxreal       ix4,iy4,iz4,fix4,fiy4,fiz4
107       gmxreal       jx1,jy1,jz1
108       gmxreal       jx2,jy2,jz2,fjx2,fjy2,fjz2
109       gmxreal       jx3,jy3,jz3,fjx3,fjy3,fjz3
110       gmxreal       jx4,jy4,jz4,fjx4,fjy4,fjz4
111       gmxreal       dx11,dy11,dz11,rsq11,rinv11
112       gmxreal       dx22,dy22,dz22,rsq22,rinv22
113       gmxreal       dx23,dy23,dz23,rsq23,rinv23
114       gmxreal       dx24,dy24,dz24,rsq24,rinv24
115       gmxreal       dx32,dy32,dz32,rsq32,rinv32
116       gmxreal       dx33,dy33,dz33,rsq33,rinv33
117       gmxreal       dx34,dy34,dz34,rsq34,rinv34
118       gmxreal       dx42,dy42,dz42,rsq42,rinv42
119       gmxreal       dx43,dy43,dz43,rsq43,rinv43
120       gmxreal       dx44,dy44,dz44,rsq44,rinv44
121       gmxreal       qH,qM,qqMM,qqMH,qqHH
122       gmxreal       c6,cexp1,cexp2
125 C    Initialize water data
126       ii               = iinr(1)+1       
127       qH               = charge(ii+1)    
128       qM               = charge(ii+3)    
129       qqMM             = facel*qM*qM     
130       qqMH             = facel*qM*qH     
131       qqHH             = facel*qH*qH     
132       tj               = 3*(ntype+1)*type(ii)+1
133       c6               = vdwparam(tj)    
134       cexp1            = vdwparam(tj+1)  
135       cexp2            = vdwparam(tj+2)  
138 C    Reset outer and inner iteration counters
139       nouter           = 0               
140       ninner           = 0               
142 C    Loop over thread workunits
143    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
144         if(nn1.gt.nri) nn1=nri
146 C      Start outer loop over neighborlists
147         
148         do n=nn0+1,nn1
150 C        Load shift vector for this list
151           is3              = 3*shift(n)+1    
152           shX              = shiftvec(is3)   
153           shY              = shiftvec(is3+1) 
154           shZ              = shiftvec(is3+2) 
156 C        Load limits for loop over neighbors
157           nj0              = jindex(n)+1     
158           nj1              = jindex(n+1)     
160 C        Get outer coordinate index
161           ii               = iinr(n)+1       
162           ii3              = 3*ii-2          
164 C        Load i atom data, add shift vector
165           ix1              = shX + pos(ii3+0)
166           iy1              = shY + pos(ii3+1)
167           iz1              = shZ + pos(ii3+2)
168           ix2              = shX + pos(ii3+3)
169           iy2              = shY + pos(ii3+4)
170           iz2              = shZ + pos(ii3+5)
171           ix3              = shX + pos(ii3+6)
172           iy3              = shY + pos(ii3+7)
173           iz3              = shZ + pos(ii3+8)
174           ix4              = shX + pos(ii3+9)
175           iy4              = shY + pos(ii3+10)
176           iz4              = shZ + pos(ii3+11)
178 C        Zero the potential energy for this list
179           vctot            = 0               
180           Vvdwtot          = 0               
182 C        Clear i atom forces
183           fix1             = 0               
184           fiy1             = 0               
185           fiz1             = 0               
186           fix2             = 0               
187           fiy2             = 0               
188           fiz2             = 0               
189           fix3             = 0               
190           fiy3             = 0               
191           fiz3             = 0               
192           fix4             = 0               
193           fiy4             = 0               
194           fiz4             = 0               
195           
196           do k=nj0,nj1
198 C          Get j neighbor index, and coordinate index
199             jnr              = jjnr(k)+1       
200             j3               = 3*jnr-2         
202 C          load j atom coordinates
203             jx1              = pos(j3+0)       
204             jy1              = pos(j3+1)       
205             jz1              = pos(j3+2)       
206             jx2              = pos(j3+3)       
207             jy2              = pos(j3+4)       
208             jz2              = pos(j3+5)       
209             jx3              = pos(j3+6)       
210             jy3              = pos(j3+7)       
211             jz3              = pos(j3+8)       
212             jx4              = pos(j3+9)       
213             jy4              = pos(j3+10)      
214             jz4              = pos(j3+11)      
216 C          Calculate distance
217             dx11             = ix1 - jx1       
218             dy11             = iy1 - jy1       
219             dz11             = iz1 - jz1       
220             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
221             dx22             = ix2 - jx2       
222             dy22             = iy2 - jy2       
223             dz22             = iz2 - jz2       
224             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
225             dx23             = ix2 - jx3       
226             dy23             = iy2 - jy3       
227             dz23             = iz2 - jz3       
228             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
229             dx24             = ix2 - jx4       
230             dy24             = iy2 - jy4       
231             dz24             = iz2 - jz4       
232             rsq24            = dx24*dx24+dy24*dy24+dz24*dz24
233             dx32             = ix3 - jx2       
234             dy32             = iy3 - jy2       
235             dz32             = iz3 - jz2       
236             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
237             dx33             = ix3 - jx3       
238             dy33             = iy3 - jy3       
239             dz33             = iz3 - jz3       
240             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
241             dx34             = ix3 - jx4       
242             dy34             = iy3 - jy4       
243             dz34             = iz3 - jz4       
244             rsq34            = dx34*dx34+dy34*dy34+dz34*dz34
245             dx42             = ix4 - jx2       
246             dy42             = iy4 - jy2       
247             dz42             = iz4 - jz2       
248             rsq42            = dx42*dx42+dy42*dy42+dz42*dz42
249             dx43             = ix4 - jx3       
250             dy43             = iy4 - jy3       
251             dz43             = iz4 - jz3       
252             rsq43            = dx43*dx43+dy43*dy43+dz43*dz43
253             dx44             = ix4 - jx4       
254             dy44             = iy4 - jy4       
255             dz44             = iz4 - jz4       
256             rsq44            = dx44*dx44+dy44*dy44+dz44*dz44
258 C          Calculate 1/r and 1/r2
260 C          PowerPC intrinsics 1/sqrt lookup table
261 #ifndef GMX_BLUEGENE
262             rinv11           = frsqrtes(rsq11) 
263 #else
264             rinv11           = frsqrte(dble(rsq11)) 
265 #endif
266             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
267      &  *rinv11)))
268 #ifdef GMX_DOUBLE
269             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
270      &  *rinv11)))
271 #endif
273 C          PowerPC intrinsics 1/sqrt lookup table
274 #ifndef GMX_BLUEGENE
275             rinv22           = frsqrtes(rsq22) 
276 #else
277             rinv22           = frsqrte(dble(rsq22)) 
278 #endif
279             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
280      &  *rinv22)))
281 #ifdef GMX_DOUBLE
282             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
283      &  *rinv22)))
284 #endif
286 C          PowerPC intrinsics 1/sqrt lookup table
287 #ifndef GMX_BLUEGENE
288             rinv23           = frsqrtes(rsq23) 
289 #else
290             rinv23           = frsqrte(dble(rsq23)) 
291 #endif
292             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
293      &  *rinv23)))
294 #ifdef GMX_DOUBLE
295             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
296      &  *rinv23)))
297 #endif
299 C          PowerPC intrinsics 1/sqrt lookup table
300 #ifndef GMX_BLUEGENE
301             rinv24           = frsqrtes(rsq24) 
302 #else
303             rinv24           = frsqrte(dble(rsq24)) 
304 #endif
305             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
306      &  *rinv24)))
307 #ifdef GMX_DOUBLE
308             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
309      &  *rinv24)))
310 #endif
312 C          PowerPC intrinsics 1/sqrt lookup table
313 #ifndef GMX_BLUEGENE
314             rinv32           = frsqrtes(rsq32) 
315 #else
316             rinv32           = frsqrte(dble(rsq32)) 
317 #endif
318             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
319      &  *rinv32)))
320 #ifdef GMX_DOUBLE
321             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
322      &  *rinv32)))
323 #endif
325 C          PowerPC intrinsics 1/sqrt lookup table
326 #ifndef GMX_BLUEGENE
327             rinv33           = frsqrtes(rsq33) 
328 #else
329             rinv33           = frsqrte(dble(rsq33)) 
330 #endif
331             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
332      &  *rinv33)))
333 #ifdef GMX_DOUBLE
334             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
335      &  *rinv33)))
336 #endif
338 C          PowerPC intrinsics 1/sqrt lookup table
339 #ifndef GMX_BLUEGENE
340             rinv34           = frsqrtes(rsq34) 
341 #else
342             rinv34           = frsqrte(dble(rsq34)) 
343 #endif
344             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
345      &  *rinv34)))
346 #ifdef GMX_DOUBLE
347             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
348      &  *rinv34)))
349 #endif
351 C          PowerPC intrinsics 1/sqrt lookup table
352 #ifndef GMX_BLUEGENE
353             rinv42           = frsqrtes(rsq42) 
354 #else
355             rinv42           = frsqrte(dble(rsq42)) 
356 #endif
357             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
358      &  *rinv42)))
359 #ifdef GMX_DOUBLE
360             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
361      &  *rinv42)))
362 #endif
364 C          PowerPC intrinsics 1/sqrt lookup table
365 #ifndef GMX_BLUEGENE
366             rinv43           = frsqrtes(rsq43) 
367 #else
368             rinv43           = frsqrte(dble(rsq43)) 
369 #endif
370             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
371      &  *rinv43)))
372 #ifdef GMX_DOUBLE
373             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
374      &  *rinv43)))
375 #endif
377 C          PowerPC intrinsics 1/sqrt lookup table
378 #ifndef GMX_BLUEGENE
379             rinv44           = frsqrtes(rsq44) 
380 #else
381             rinv44           = frsqrte(dble(rsq44)) 
382 #endif
383             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
384      &  *rinv44)))
385 #ifdef GMX_DOUBLE
386             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
387      &  *rinv44)))
388 #endif
390 C          Load parameters for j atom
391             rinvsq           = rinv11*rinv11   
393 C          Buckingham interaction
394             rinvsix          = rinvsq*rinvsq*rinvsq
395             Vvdw6            = c6*rinvsix      
396             br               = cexp2*rsq11*rinv11
397             Vvdwexp          = cexp1*exp(-br)  
398             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
399             fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq
401 C          Calculate temporary vectorial force
402             tx               = fscal*dx11      
403             ty               = fscal*dy11      
404             tz               = fscal*dz11      
406 C          Increment i atom force
407             fix1             = fix1 + tx       
408             fiy1             = fiy1 + ty       
409             fiz1             = fiz1 + tz       
411 C          Decrement j atom force
412             faction(j3+0)    = faction(j3+0) - tx
413             faction(j3+1)    = faction(j3+1) - ty
414             faction(j3+2)    = faction(j3+2) - tz
416 C          Load parameters for j atom
417             qq               = qqHH            
418             rinvsq           = rinv22*rinv22   
420 C          Coulomb reaction-field interaction
421             krsq             = krf*rsq22       
422             vcoul            = qq*(rinv22+krsq-crf)
423             vctot            = vctot+vcoul     
424             fscal            = (qq*(rinv22-2.0*krsq))*rinvsq
426 C          Calculate temporary vectorial force
427             tx               = fscal*dx22      
428             ty               = fscal*dy22      
429             tz               = fscal*dz22      
431 C          Increment i atom force
432             fix2             = fix2 + tx       
433             fiy2             = fiy2 + ty       
434             fiz2             = fiz2 + tz       
436 C          Decrement j atom force
437             fjx2             = faction(j3+3) - tx
438             fjy2             = faction(j3+4) - ty
439             fjz2             = faction(j3+5) - tz
441 C          Load parameters for j atom
442             qq               = qqHH            
443             rinvsq           = rinv23*rinv23   
445 C          Coulomb reaction-field interaction
446             krsq             = krf*rsq23       
447             vcoul            = qq*(rinv23+krsq-crf)
448             vctot            = vctot+vcoul     
449             fscal            = (qq*(rinv23-2.0*krsq))*rinvsq
451 C          Calculate temporary vectorial force
452             tx               = fscal*dx23      
453             ty               = fscal*dy23      
454             tz               = fscal*dz23      
456 C          Increment i atom force
457             fix2             = fix2 + tx       
458             fiy2             = fiy2 + ty       
459             fiz2             = fiz2 + tz       
461 C          Decrement j atom force
462             fjx3             = faction(j3+6) - tx
463             fjy3             = faction(j3+7) - ty
464             fjz3             = faction(j3+8) - tz
466 C          Load parameters for j atom
467             qq               = qqMH            
468             rinvsq           = rinv24*rinv24   
470 C          Coulomb reaction-field interaction
471             krsq             = krf*rsq24       
472             vcoul            = qq*(rinv24+krsq-crf)
473             vctot            = vctot+vcoul     
474             fscal            = (qq*(rinv24-2.0*krsq))*rinvsq
476 C          Calculate temporary vectorial force
477             tx               = fscal*dx24      
478             ty               = fscal*dy24      
479             tz               = fscal*dz24      
481 C          Increment i atom force
482             fix2             = fix2 + tx       
483             fiy2             = fiy2 + ty       
484             fiz2             = fiz2 + tz       
486 C          Decrement j atom force
487             fjx4             = faction(j3+9) - tx
488             fjy4             = faction(j3+10) - ty
489             fjz4             = faction(j3+11) - tz
491 C          Load parameters for j atom
492             qq               = qqHH            
493             rinvsq           = rinv32*rinv32   
495 C          Coulomb reaction-field interaction
496             krsq             = krf*rsq32       
497             vcoul            = qq*(rinv32+krsq-crf)
498             vctot            = vctot+vcoul     
499             fscal            = (qq*(rinv32-2.0*krsq))*rinvsq
501 C          Calculate temporary vectorial force
502             tx               = fscal*dx32      
503             ty               = fscal*dy32      
504             tz               = fscal*dz32      
506 C          Increment i atom force
507             fix3             = fix3 + tx       
508             fiy3             = fiy3 + ty       
509             fiz3             = fiz3 + tz       
511 C          Decrement j atom force
512             fjx2             = fjx2 - tx       
513             fjy2             = fjy2 - ty       
514             fjz2             = fjz2 - tz       
516 C          Load parameters for j atom
517             qq               = qqHH            
518             rinvsq           = rinv33*rinv33   
520 C          Coulomb reaction-field interaction
521             krsq             = krf*rsq33       
522             vcoul            = qq*(rinv33+krsq-crf)
523             vctot            = vctot+vcoul     
524             fscal            = (qq*(rinv33-2.0*krsq))*rinvsq
526 C          Calculate temporary vectorial force
527             tx               = fscal*dx33      
528             ty               = fscal*dy33      
529             tz               = fscal*dz33      
531 C          Increment i atom force
532             fix3             = fix3 + tx       
533             fiy3             = fiy3 + ty       
534             fiz3             = fiz3 + tz       
536 C          Decrement j atom force
537             fjx3             = fjx3 - tx       
538             fjy3             = fjy3 - ty       
539             fjz3             = fjz3 - tz       
541 C          Load parameters for j atom
542             qq               = qqMH            
543             rinvsq           = rinv34*rinv34   
545 C          Coulomb reaction-field interaction
546             krsq             = krf*rsq34       
547             vcoul            = qq*(rinv34+krsq-crf)
548             vctot            = vctot+vcoul     
549             fscal            = (qq*(rinv34-2.0*krsq))*rinvsq
551 C          Calculate temporary vectorial force
552             tx               = fscal*dx34      
553             ty               = fscal*dy34      
554             tz               = fscal*dz34      
556 C          Increment i atom force
557             fix3             = fix3 + tx       
558             fiy3             = fiy3 + ty       
559             fiz3             = fiz3 + tz       
561 C          Decrement j atom force
562             fjx4             = fjx4 - tx       
563             fjy4             = fjy4 - ty       
564             fjz4             = fjz4 - tz       
566 C          Load parameters for j atom
567             qq               = qqMH            
568             rinvsq           = rinv42*rinv42   
570 C          Coulomb reaction-field interaction
571             krsq             = krf*rsq42       
572             vcoul            = qq*(rinv42+krsq-crf)
573             vctot            = vctot+vcoul     
574             fscal            = (qq*(rinv42-2.0*krsq))*rinvsq
576 C          Calculate temporary vectorial force
577             tx               = fscal*dx42      
578             ty               = fscal*dy42      
579             tz               = fscal*dz42      
581 C          Increment i atom force
582             fix4             = fix4 + tx       
583             fiy4             = fiy4 + ty       
584             fiz4             = fiz4 + tz       
586 C          Decrement j atom force
587             faction(j3+3)    = fjx2 - tx       
588             faction(j3+4)    = fjy2 - ty       
589             faction(j3+5)    = fjz2 - tz       
591 C          Load parameters for j atom
592             qq               = qqMH            
593             rinvsq           = rinv43*rinv43   
595 C          Coulomb reaction-field interaction
596             krsq             = krf*rsq43       
597             vcoul            = qq*(rinv43+krsq-crf)
598             vctot            = vctot+vcoul     
599             fscal            = (qq*(rinv43-2.0*krsq))*rinvsq
601 C          Calculate temporary vectorial force
602             tx               = fscal*dx43      
603             ty               = fscal*dy43      
604             tz               = fscal*dz43      
606 C          Increment i atom force
607             fix4             = fix4 + tx       
608             fiy4             = fiy4 + ty       
609             fiz4             = fiz4 + tz       
611 C          Decrement j atom force
612             faction(j3+6)    = fjx3 - tx       
613             faction(j3+7)    = fjy3 - ty       
614             faction(j3+8)    = fjz3 - tz       
616 C          Load parameters for j atom
617             qq               = qqMM            
618             rinvsq           = rinv44*rinv44   
620 C          Coulomb reaction-field interaction
621             krsq             = krf*rsq44       
622             vcoul            = qq*(rinv44+krsq-crf)
623             vctot            = vctot+vcoul     
624             fscal            = (qq*(rinv44-2.0*krsq))*rinvsq
626 C          Calculate temporary vectorial force
627             tx               = fscal*dx44      
628             ty               = fscal*dy44      
629             tz               = fscal*dz44      
631 C          Increment i atom force
632             fix4             = fix4 + tx       
633             fiy4             = fiy4 + ty       
634             fiz4             = fiz4 + tz       
636 C          Decrement j atom force
637             faction(j3+9)    = fjx4 - tx       
638             faction(j3+10)   = fjy4 - ty       
639             faction(j3+11)   = fjz4 - tz       
641 C          Inner loop uses 359 flops/iteration
642           end do
643           
645 C        Add i forces to mem and shifted force list
646           faction(ii3+0)   = faction(ii3+0) + fix1
647           faction(ii3+1)   = faction(ii3+1) + fiy1
648           faction(ii3+2)   = faction(ii3+2) + fiz1
649           faction(ii3+3)   = faction(ii3+3) + fix2
650           faction(ii3+4)   = faction(ii3+4) + fiy2
651           faction(ii3+5)   = faction(ii3+5) + fiz2
652           faction(ii3+6)   = faction(ii3+6) + fix3
653           faction(ii3+7)   = faction(ii3+7) + fiy3
654           faction(ii3+8)   = faction(ii3+8) + fiz3
655           faction(ii3+9)   = faction(ii3+9) + fix4
656           faction(ii3+10)  = faction(ii3+10) + fiy4
657           faction(ii3+11)  = faction(ii3+11) + fiz4
658           fshift(is3)      = fshift(is3)+fix1+fix2+fix3+fix4
659           fshift(is3+1)    = fshift(is3+1)+fiy1+fiy2+fiy3+fiy4
660           fshift(is3+2)    = fshift(is3+2)+fiz1+fiz2+fiz3+fiz4
662 C        Add potential energies to the group for this list
663           ggid             = gid(n)+1        
664           Vc(ggid)         = Vc(ggid) + vctot
665           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
667 C        Increment number of inner iterations
668           ninner           = ninner + nj1 - nj0
670 C        Outer loop uses 38 flops/iteration
671         end do
672         
674 C      Increment number of outer iterations
675         nouter           = nouter + nn1 - nn0
676       if(nn1.lt.nri) goto 10
678 C    Write outer/inner iteration count to pointers
679       outeriter        = nouter          
680       inneriter        = ninner          
681       return
682       end
690 C Gromacs nonbonded kernel pwr6kernel224nf
691 C Coulomb interaction:     Reaction field
692 C VdW interaction:         Buckingham
693 C water optimization:      pairs of TIP4P interactions
694 C Calculate forces:        no
696       subroutine pwr6kernel224nf(
697      &                          nri,
698      &                          iinr,
699      &                          jindex,
700      &                          jjnr,
701      &                          shift,
702      &                          shiftvec,
703      &                          fshift,
704      &                          gid,
705      &                          pos,
706      &                          faction,
707      &                          charge,
708      &                          facel,
709      &                          krf,
710      &                          crf,
711      &                          Vc,
712      &                          type,
713      &                          ntype,
714      &                          vdwparam,
715      &                          Vvdw,
716      &                          tabscale,
717      &                          VFtab,
718      &                          invsqrta,
719      &                          dvda,
720      &                          gbtabscale,
721      &                          GBtab,
722      &                          nthreads,
723      &                          count,
724      &                          mtx,
725      &                          outeriter,
726      &                          inneriter,
727      &                          work)
728       implicit      none
729       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
730       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
731       integer*4     gid(*),type(*),ntype
732       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
733       gmxreal       Vvdw(*),tabscale,VFtab(*)
734       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
735       integer*4     nthreads,count,mtx,outeriter,inneriter
736       gmxreal       work(*)
738       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
739       integer*4     nn0,nn1,nouter,ninner
740       gmxreal       shX,shY,shZ
741       gmxreal       rinvsq
742       gmxreal       qq,vcoul,vctot
743       integer*4     tj
744       gmxreal       rinvsix
745       gmxreal       Vvdw6,Vvdwtot
746       gmxreal       krsq
747       gmxreal       Vvdwexp,br
748       gmxreal       ix1,iy1,iz1
749       gmxreal       ix2,iy2,iz2
750       gmxreal       ix3,iy3,iz3
751       gmxreal       ix4,iy4,iz4
752       gmxreal       jx1,jy1,jz1
753       gmxreal       jx2,jy2,jz2
754       gmxreal       jx3,jy3,jz3
755       gmxreal       jx4,jy4,jz4
756       gmxreal       dx11,dy11,dz11,rsq11,rinv11
757       gmxreal       dx22,dy22,dz22,rsq22,rinv22
758       gmxreal       dx23,dy23,dz23,rsq23,rinv23
759       gmxreal       dx24,dy24,dz24,rsq24,rinv24
760       gmxreal       dx32,dy32,dz32,rsq32,rinv32
761       gmxreal       dx33,dy33,dz33,rsq33,rinv33
762       gmxreal       dx34,dy34,dz34,rsq34,rinv34
763       gmxreal       dx42,dy42,dz42,rsq42,rinv42
764       gmxreal       dx43,dy43,dz43,rsq43,rinv43
765       gmxreal       dx44,dy44,dz44,rsq44,rinv44
766       gmxreal       qH,qM,qqMM,qqMH,qqHH
767       gmxreal       c6,cexp1,cexp2
770 C    Initialize water data
771       ii               = iinr(1)+1       
772       qH               = charge(ii+1)    
773       qM               = charge(ii+3)    
774       qqMM             = facel*qM*qM     
775       qqMH             = facel*qM*qH     
776       qqHH             = facel*qH*qH     
777       tj               = 3*(ntype+1)*type(ii)+1
778       c6               = vdwparam(tj)    
779       cexp1            = vdwparam(tj+1)  
780       cexp2            = vdwparam(tj+2)  
783 C    Reset outer and inner iteration counters
784       nouter           = 0               
785       ninner           = 0               
787 C    Loop over thread workunits
788    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
789         if(nn1.gt.nri) nn1=nri
791 C      Start outer loop over neighborlists
792         
793         do n=nn0+1,nn1
795 C        Load shift vector for this list
796           is3              = 3*shift(n)+1    
797           shX              = shiftvec(is3)   
798           shY              = shiftvec(is3+1) 
799           shZ              = shiftvec(is3+2) 
801 C        Load limits for loop over neighbors
802           nj0              = jindex(n)+1     
803           nj1              = jindex(n+1)     
805 C        Get outer coordinate index
806           ii               = iinr(n)+1       
807           ii3              = 3*ii-2          
809 C        Load i atom data, add shift vector
810           ix1              = shX + pos(ii3+0)
811           iy1              = shY + pos(ii3+1)
812           iz1              = shZ + pos(ii3+2)
813           ix2              = shX + pos(ii3+3)
814           iy2              = shY + pos(ii3+4)
815           iz2              = shZ + pos(ii3+5)
816           ix3              = shX + pos(ii3+6)
817           iy3              = shY + pos(ii3+7)
818           iz3              = shZ + pos(ii3+8)
819           ix4              = shX + pos(ii3+9)
820           iy4              = shY + pos(ii3+10)
821           iz4              = shZ + pos(ii3+11)
823 C        Zero the potential energy for this list
824           vctot            = 0               
825           Vvdwtot          = 0               
827 C        Clear i atom forces
828           
829           do k=nj0,nj1
831 C          Get j neighbor index, and coordinate index
832             jnr              = jjnr(k)+1       
833             j3               = 3*jnr-2         
835 C          load j atom coordinates
836             jx1              = pos(j3+0)       
837             jy1              = pos(j3+1)       
838             jz1              = pos(j3+2)       
839             jx2              = pos(j3+3)       
840             jy2              = pos(j3+4)       
841             jz2              = pos(j3+5)       
842             jx3              = pos(j3+6)       
843             jy3              = pos(j3+7)       
844             jz3              = pos(j3+8)       
845             jx4              = pos(j3+9)       
846             jy4              = pos(j3+10)      
847             jz4              = pos(j3+11)      
849 C          Calculate distance
850             dx11             = ix1 - jx1       
851             dy11             = iy1 - jy1       
852             dz11             = iz1 - jz1       
853             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
854             dx22             = ix2 - jx2       
855             dy22             = iy2 - jy2       
856             dz22             = iz2 - jz2       
857             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
858             dx23             = ix2 - jx3       
859             dy23             = iy2 - jy3       
860             dz23             = iz2 - jz3       
861             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
862             dx24             = ix2 - jx4       
863             dy24             = iy2 - jy4       
864             dz24             = iz2 - jz4       
865             rsq24            = dx24*dx24+dy24*dy24+dz24*dz24
866             dx32             = ix3 - jx2       
867             dy32             = iy3 - jy2       
868             dz32             = iz3 - jz2       
869             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
870             dx33             = ix3 - jx3       
871             dy33             = iy3 - jy3       
872             dz33             = iz3 - jz3       
873             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
874             dx34             = ix3 - jx4       
875             dy34             = iy3 - jy4       
876             dz34             = iz3 - jz4       
877             rsq34            = dx34*dx34+dy34*dy34+dz34*dz34
878             dx42             = ix4 - jx2       
879             dy42             = iy4 - jy2       
880             dz42             = iz4 - jz2       
881             rsq42            = dx42*dx42+dy42*dy42+dz42*dz42
882             dx43             = ix4 - jx3       
883             dy43             = iy4 - jy3       
884             dz43             = iz4 - jz3       
885             rsq43            = dx43*dx43+dy43*dy43+dz43*dz43
886             dx44             = ix4 - jx4       
887             dy44             = iy4 - jy4       
888             dz44             = iz4 - jz4       
889             rsq44            = dx44*dx44+dy44*dy44+dz44*dz44
891 C          Calculate 1/r and 1/r2
893 C          PowerPC intrinsics 1/sqrt lookup table
894 #ifndef GMX_BLUEGENE
895             rinv11           = frsqrtes(rsq11) 
896 #else
897             rinv11           = frsqrte(dble(rsq11)) 
898 #endif
899             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
900      &  *rinv11)))
901 #ifdef GMX_DOUBLE
902             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
903      &  *rinv11)))
904 #endif
906 C          PowerPC intrinsics 1/sqrt lookup table
907 #ifndef GMX_BLUEGENE
908             rinv22           = frsqrtes(rsq22) 
909 #else
910             rinv22           = frsqrte(dble(rsq22)) 
911 #endif
912             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
913      &  *rinv22)))
914 #ifdef GMX_DOUBLE
915             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
916      &  *rinv22)))
917 #endif
919 C          PowerPC intrinsics 1/sqrt lookup table
920 #ifndef GMX_BLUEGENE
921             rinv23           = frsqrtes(rsq23) 
922 #else
923             rinv23           = frsqrte(dble(rsq23)) 
924 #endif
925             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
926      &  *rinv23)))
927 #ifdef GMX_DOUBLE
928             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
929      &  *rinv23)))
930 #endif
932 C          PowerPC intrinsics 1/sqrt lookup table
933 #ifndef GMX_BLUEGENE
934             rinv24           = frsqrtes(rsq24) 
935 #else
936             rinv24           = frsqrte(dble(rsq24)) 
937 #endif
938             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
939      &  *rinv24)))
940 #ifdef GMX_DOUBLE
941             rinv24           = (0.5*rinv24*(3.0-((rsq24*rinv24)
942      &  *rinv24)))
943 #endif
945 C          PowerPC intrinsics 1/sqrt lookup table
946 #ifndef GMX_BLUEGENE
947             rinv32           = frsqrtes(rsq32) 
948 #else
949             rinv32           = frsqrte(dble(rsq32)) 
950 #endif
951             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
952      &  *rinv32)))
953 #ifdef GMX_DOUBLE
954             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
955      &  *rinv32)))
956 #endif
958 C          PowerPC intrinsics 1/sqrt lookup table
959 #ifndef GMX_BLUEGENE
960             rinv33           = frsqrtes(rsq33) 
961 #else
962             rinv33           = frsqrte(dble(rsq33)) 
963 #endif
964             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
965      &  *rinv33)))
966 #ifdef GMX_DOUBLE
967             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
968      &  *rinv33)))
969 #endif
971 C          PowerPC intrinsics 1/sqrt lookup table
972 #ifndef GMX_BLUEGENE
973             rinv34           = frsqrtes(rsq34) 
974 #else
975             rinv34           = frsqrte(dble(rsq34)) 
976 #endif
977             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
978      &  *rinv34)))
979 #ifdef GMX_DOUBLE
980             rinv34           = (0.5*rinv34*(3.0-((rsq34*rinv34)
981      &  *rinv34)))
982 #endif
984 C          PowerPC intrinsics 1/sqrt lookup table
985 #ifndef GMX_BLUEGENE
986             rinv42           = frsqrtes(rsq42) 
987 #else
988             rinv42           = frsqrte(dble(rsq42)) 
989 #endif
990             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
991      &  *rinv42)))
992 #ifdef GMX_DOUBLE
993             rinv42           = (0.5*rinv42*(3.0-((rsq42*rinv42)
994      &  *rinv42)))
995 #endif
997 C          PowerPC intrinsics 1/sqrt lookup table
998 #ifndef GMX_BLUEGENE
999             rinv43           = frsqrtes(rsq43) 
1000 #else
1001             rinv43           = frsqrte(dble(rsq43)) 
1002 #endif
1003             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
1004      &  *rinv43)))
1005 #ifdef GMX_DOUBLE
1006             rinv43           = (0.5*rinv43*(3.0-((rsq43*rinv43)
1007      &  *rinv43)))
1008 #endif
1010 C          PowerPC intrinsics 1/sqrt lookup table
1011 #ifndef GMX_BLUEGENE
1012             rinv44           = frsqrtes(rsq44) 
1013 #else
1014             rinv44           = frsqrte(dble(rsq44)) 
1015 #endif
1016             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
1017      &  *rinv44)))
1018 #ifdef GMX_DOUBLE
1019             rinv44           = (0.5*rinv44*(3.0-((rsq44*rinv44)
1020      &  *rinv44)))
1021 #endif
1023 C          Load parameters for j atom
1024             rinvsq           = rinv11*rinv11   
1026 C          Buckingham interaction
1027             rinvsix          = rinvsq*rinvsq*rinvsq
1028             Vvdw6            = c6*rinvsix      
1029             br               = cexp2*rsq11*rinv11
1030             Vvdwexp          = cexp1*exp(-br)  
1031             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
1033 C          Load parameters for j atom
1034             qq               = qqHH            
1036 C          Coulomb reaction-field interaction
1037             krsq             = krf*rsq22       
1038             vcoul            = qq*(rinv22+krsq-crf)
1039             vctot            = vctot+vcoul     
1041 C          Load parameters for j atom
1042             qq               = qqHH            
1044 C          Coulomb reaction-field interaction
1045             krsq             = krf*rsq23       
1046             vcoul            = qq*(rinv23+krsq-crf)
1047             vctot            = vctot+vcoul     
1049 C          Load parameters for j atom
1050             qq               = qqMH            
1052 C          Coulomb reaction-field interaction
1053             krsq             = krf*rsq24       
1054             vcoul            = qq*(rinv24+krsq-crf)
1055             vctot            = vctot+vcoul     
1057 C          Load parameters for j atom
1058             qq               = qqHH            
1060 C          Coulomb reaction-field interaction
1061             krsq             = krf*rsq32       
1062             vcoul            = qq*(rinv32+krsq-crf)
1063             vctot            = vctot+vcoul     
1065 C          Load parameters for j atom
1066             qq               = qqHH            
1068 C          Coulomb reaction-field interaction
1069             krsq             = krf*rsq33       
1070             vcoul            = qq*(rinv33+krsq-crf)
1071             vctot            = vctot+vcoul     
1073 C          Load parameters for j atom
1074             qq               = qqMH            
1076 C          Coulomb reaction-field interaction
1077             krsq             = krf*rsq34       
1078             vcoul            = qq*(rinv34+krsq-crf)
1079             vctot            = vctot+vcoul     
1081 C          Load parameters for j atom
1082             qq               = qqMH            
1084 C          Coulomb reaction-field interaction
1085             krsq             = krf*rsq42       
1086             vcoul            = qq*(rinv42+krsq-crf)
1087             vctot            = vctot+vcoul     
1089 C          Load parameters for j atom
1090             qq               = qqMH            
1092 C          Coulomb reaction-field interaction
1093             krsq             = krf*rsq43       
1094             vcoul            = qq*(rinv43+krsq-crf)
1095             vctot            = vctot+vcoul     
1097 C          Load parameters for j atom
1098             qq               = qqMM            
1100 C          Coulomb reaction-field interaction
1101             krsq             = krf*rsq44       
1102             vcoul            = qq*(rinv44+krsq-crf)
1103             vctot            = vctot+vcoul     
1105 C          Inner loop uses 220 flops/iteration
1106           end do
1107           
1109 C        Add i forces to mem and shifted force list
1111 C        Add potential energies to the group for this list
1112           ggid             = gid(n)+1        
1113           Vc(ggid)         = Vc(ggid) + vctot
1114           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
1116 C        Increment number of inner iterations
1117           ninner           = ninner + nj1 - nj0
1119 C        Outer loop uses 14 flops/iteration
1120         end do
1121         
1123 C      Increment number of outer iterations
1124         nouter           = nouter + nn1 - nn0
1125       if(nn1.lt.nri) goto 10
1127 C    Write outer/inner iteration count to pointers
1128       outeriter        = nouter          
1129       inneriter        = ninner          
1130       return
1131       end