Fixed synchronization counter update for pwr6 kernels
[gromacs/adressmacs.git] / src / gmxlib / nonbonded / nb_kernel_power6 / pwr6kernel202.F
blobd58d76afe57bd26c0ca52b85ee073a42c38eaaa4
2 C                This source code is part of
4 C                 G   R   O   M   A   C   S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction.  Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
31 #ifdef HAVE_CONFIG_H
32 #  include<config.h>
33 #endif
35 #ifdef GMX_DOUBLE
36 #  define gmxreal real*8
37 #else
38 #  define gmxreal real*4
39 #endif
44 C Gromacs nonbonded kernel pwr6kernel202
45 C Coulomb interaction:     Reaction field
46 C VdW interaction:         Not calculated
47 C water optimization:      pairs of SPC/TIP3P interactions
48 C Calculate forces:        yes
50       subroutine pwr6kernel202(
51      &                          nri,
52      &                          iinr,
53      &                          jindex,
54      &                          jjnr,
55      &                          shift,
56      &                          shiftvec,
57      &                          fshift,
58      &                          gid,
59      &                          pos,
60      &                          faction,
61      &                          charge,
62      &                          facel,
63      &                          krf,
64      &                          crf,
65      &                          Vc,
66      &                          type,
67      &                          ntype,
68      &                          vdwparam,
69      &                          Vvdw,
70      &                          tabscale,
71      &                          VFtab,
72      &                          invsqrta,
73      &                          dvda,
74      &                          gbtabscale,
75      &                          GBtab,
76      &                          nthreads,
77      &                          count,
78      &                          mtx,
79      &                          outeriter,
80      &                          inneriter,
81      &                          work)
82       implicit      none
83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
85       integer*4     gid(*),type(*),ntype
86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
87       gmxreal       Vvdw(*),tabscale,VFtab(*)
88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
89       integer*4     nthreads,count,mtx,outeriter,inneriter
90       gmxreal       work(*)
92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
93       integer*4     nn0,nn1,nouter,ninner
94       gmxreal       shX,shY,shZ
95       gmxreal       fscal,tx,ty,tz
96       gmxreal       rinvsq
97       gmxreal       qq,vcoul,vctot
98       gmxreal       krsq
99       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
100       gmxreal       ix2,iy2,iz2,fix2,fiy2,fiz2
101       gmxreal       ix3,iy3,iz3,fix3,fiy3,fiz3
102       gmxreal       jx1,jy1,jz1,fjx1,fjy1,fjz1
103       gmxreal       jx2,jy2,jz2,fjx2,fjy2,fjz2
104       gmxreal       jx3,jy3,jz3,fjx3,fjy3,fjz3
105       gmxreal       dx11,dy11,dz11,rsq11,rinv11
106       gmxreal       dx12,dy12,dz12,rsq12,rinv12
107       gmxreal       dx13,dy13,dz13,rsq13,rinv13
108       gmxreal       dx21,dy21,dz21,rsq21,rinv21
109       gmxreal       dx22,dy22,dz22,rsq22,rinv22
110       gmxreal       dx23,dy23,dz23,rsq23,rinv23
111       gmxreal       dx31,dy31,dz31,rsq31,rinv31
112       gmxreal       dx32,dy32,dz32,rsq32,rinv32
113       gmxreal       dx33,dy33,dz33,rsq33,rinv33
114       gmxreal       qO,qH,qqOO,qqOH,qqHH
117 C    Initialize water data
118       ii               = iinr(1)+1       
119       qO               = charge(ii)      
120       qH               = charge(ii+1)    
121       qqOO             = facel*qO*qO     
122       qqOH             = facel*qO*qH     
123       qqHH             = facel*qH*qH     
126 C    Reset outer and inner iteration counters
127       nouter           = 0               
128       ninner           = 0               
130 C    Loop over thread workunits
131    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
132         if(nn1.gt.nri) nn1=nri
134 C      Start outer loop over neighborlists
135         
136         do n=nn0+1,nn1
138 C        Load shift vector for this list
139           is3              = 3*shift(n)+1    
140           shX              = shiftvec(is3)   
141           shY              = shiftvec(is3+1) 
142           shZ              = shiftvec(is3+2) 
144 C        Load limits for loop over neighbors
145           nj0              = jindex(n)+1     
146           nj1              = jindex(n+1)     
148 C        Get outer coordinate index
149           ii               = iinr(n)+1       
150           ii3              = 3*ii-2          
152 C        Load i atom data, add shift vector
153           ix1              = shX + pos(ii3+0)
154           iy1              = shY + pos(ii3+1)
155           iz1              = shZ + pos(ii3+2)
156           ix2              = shX + pos(ii3+3)
157           iy2              = shY + pos(ii3+4)
158           iz2              = shZ + pos(ii3+5)
159           ix3              = shX + pos(ii3+6)
160           iy3              = shY + pos(ii3+7)
161           iz3              = shZ + pos(ii3+8)
163 C        Zero the potential energy for this list
164           vctot            = 0               
166 C        Clear i atom forces
167           fix1             = 0               
168           fiy1             = 0               
169           fiz1             = 0               
170           fix2             = 0               
171           fiy2             = 0               
172           fiz2             = 0               
173           fix3             = 0               
174           fiy3             = 0               
175           fiz3             = 0               
176           
177           do k=nj0,nj1
179 C          Get j neighbor index, and coordinate index
180             jnr              = jjnr(k)+1       
181             j3               = 3*jnr-2         
183 C          load j atom coordinates
184             jx1              = pos(j3+0)       
185             jy1              = pos(j3+1)       
186             jz1              = pos(j3+2)       
187             jx2              = pos(j3+3)       
188             jy2              = pos(j3+4)       
189             jz2              = pos(j3+5)       
190             jx3              = pos(j3+6)       
191             jy3              = pos(j3+7)       
192             jz3              = pos(j3+8)       
194 C          Calculate distance
195             dx11             = ix1 - jx1       
196             dy11             = iy1 - jy1       
197             dz11             = iz1 - jz1       
198             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
199             dx12             = ix1 - jx2       
200             dy12             = iy1 - jy2       
201             dz12             = iz1 - jz2       
202             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12
203             dx13             = ix1 - jx3       
204             dy13             = iy1 - jy3       
205             dz13             = iz1 - jz3       
206             rsq13            = dx13*dx13+dy13*dy13+dz13*dz13
207             dx21             = ix2 - jx1       
208             dy21             = iy2 - jy1       
209             dz21             = iz2 - jz1       
210             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21
211             dx22             = ix2 - jx2       
212             dy22             = iy2 - jy2       
213             dz22             = iz2 - jz2       
214             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
215             dx23             = ix2 - jx3       
216             dy23             = iy2 - jy3       
217             dz23             = iz2 - jz3       
218             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
219             dx31             = ix3 - jx1       
220             dy31             = iy3 - jy1       
221             dz31             = iz3 - jz1       
222             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31
223             dx32             = ix3 - jx2       
224             dy32             = iy3 - jy2       
225             dz32             = iz3 - jz2       
226             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
227             dx33             = ix3 - jx3       
228             dy33             = iy3 - jy3       
229             dz33             = iz3 - jz3       
230             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
232 C          Calculate 1/r and 1/r2
234 C          PowerPC intrinsics 1/sqrt lookup table
235 #ifndef GMX_BLUEGENE
236             rinv11           = frsqrtes(rsq11) 
237 #else
238             rinv11           = frsqrte(dble(rsq11)) 
239 #endif
240             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
241      &  *rinv11)))
242 #ifdef GMX_DOUBLE
243             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
244      &  *rinv11)))
245 #endif
247 C          PowerPC intrinsics 1/sqrt lookup table
248 #ifndef GMX_BLUEGENE
249             rinv12           = frsqrtes(rsq12) 
250 #else
251             rinv12           = frsqrte(dble(rsq12)) 
252 #endif
253             rinv12           = (0.5*rinv12*(3.0-((rsq12*rinv12)
254      &  *rinv12)))
255 #ifdef GMX_DOUBLE
256             rinv12           = (0.5*rinv12*(3.0-((rsq12*rinv12)
257      &  *rinv12)))
258 #endif
260 C          PowerPC intrinsics 1/sqrt lookup table
261 #ifndef GMX_BLUEGENE
262             rinv13           = frsqrtes(rsq13) 
263 #else
264             rinv13           = frsqrte(dble(rsq13)) 
265 #endif
266             rinv13           = (0.5*rinv13*(3.0-((rsq13*rinv13)
267      &  *rinv13)))
268 #ifdef GMX_DOUBLE
269             rinv13           = (0.5*rinv13*(3.0-((rsq13*rinv13)
270      &  *rinv13)))
271 #endif
273 C          PowerPC intrinsics 1/sqrt lookup table
274 #ifndef GMX_BLUEGENE
275             rinv21           = frsqrtes(rsq21) 
276 #else
277             rinv21           = frsqrte(dble(rsq21)) 
278 #endif
279             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
280      &  *rinv21)))
281 #ifdef GMX_DOUBLE
282             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
283      &  *rinv21)))
284 #endif
286 C          PowerPC intrinsics 1/sqrt lookup table
287 #ifndef GMX_BLUEGENE
288             rinv22           = frsqrtes(rsq22) 
289 #else
290             rinv22           = frsqrte(dble(rsq22)) 
291 #endif
292             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
293      &  *rinv22)))
294 #ifdef GMX_DOUBLE
295             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
296      &  *rinv22)))
297 #endif
299 C          PowerPC intrinsics 1/sqrt lookup table
300 #ifndef GMX_BLUEGENE
301             rinv23           = frsqrtes(rsq23) 
302 #else
303             rinv23           = frsqrte(dble(rsq23)) 
304 #endif
305             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
306      &  *rinv23)))
307 #ifdef GMX_DOUBLE
308             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
309      &  *rinv23)))
310 #endif
312 C          PowerPC intrinsics 1/sqrt lookup table
313 #ifndef GMX_BLUEGENE
314             rinv31           = frsqrtes(rsq31) 
315 #else
316             rinv31           = frsqrte(dble(rsq31)) 
317 #endif
318             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
319      &  *rinv31)))
320 #ifdef GMX_DOUBLE
321             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
322      &  *rinv31)))
323 #endif
325 C          PowerPC intrinsics 1/sqrt lookup table
326 #ifndef GMX_BLUEGENE
327             rinv32           = frsqrtes(rsq32) 
328 #else
329             rinv32           = frsqrte(dble(rsq32)) 
330 #endif
331             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
332      &  *rinv32)))
333 #ifdef GMX_DOUBLE
334             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
335      &  *rinv32)))
336 #endif
338 C          PowerPC intrinsics 1/sqrt lookup table
339 #ifndef GMX_BLUEGENE
340             rinv33           = frsqrtes(rsq33) 
341 #else
342             rinv33           = frsqrte(dble(rsq33)) 
343 #endif
344             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
345      &  *rinv33)))
346 #ifdef GMX_DOUBLE
347             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
348      &  *rinv33)))
349 #endif
351 C          Load parameters for j atom
352             qq               = qqOO            
353             rinvsq           = rinv11*rinv11   
355 C          Coulomb reaction-field interaction
356             krsq             = krf*rsq11       
357             vcoul            = qq*(rinv11+krsq-crf)
358             vctot            = vctot+vcoul     
359             fscal            = (qq*(rinv11-2.0*krsq))*rinvsq
361 C          Calculate temporary vectorial force
362             tx               = fscal*dx11      
363             ty               = fscal*dy11      
364             tz               = fscal*dz11      
366 C          Increment i atom force
367             fix1             = fix1 + tx       
368             fiy1             = fiy1 + ty       
369             fiz1             = fiz1 + tz       
371 C          Decrement j atom force
372             fjx1             = faction(j3+0) - tx
373             fjy1             = faction(j3+1) - ty
374             fjz1             = faction(j3+2) - tz
376 C          Load parameters for j atom
377             qq               = qqOH            
378             rinvsq           = rinv12*rinv12   
380 C          Coulomb reaction-field interaction
381             krsq             = krf*rsq12       
382             vcoul            = qq*(rinv12+krsq-crf)
383             vctot            = vctot+vcoul     
384             fscal            = (qq*(rinv12-2.0*krsq))*rinvsq
386 C          Calculate temporary vectorial force
387             tx               = fscal*dx12      
388             ty               = fscal*dy12      
389             tz               = fscal*dz12      
391 C          Increment i atom force
392             fix1             = fix1 + tx       
393             fiy1             = fiy1 + ty       
394             fiz1             = fiz1 + tz       
396 C          Decrement j atom force
397             fjx2             = faction(j3+3) - tx
398             fjy2             = faction(j3+4) - ty
399             fjz2             = faction(j3+5) - tz
401 C          Load parameters for j atom
402             qq               = qqOH            
403             rinvsq           = rinv13*rinv13   
405 C          Coulomb reaction-field interaction
406             krsq             = krf*rsq13       
407             vcoul            = qq*(rinv13+krsq-crf)
408             vctot            = vctot+vcoul     
409             fscal            = (qq*(rinv13-2.0*krsq))*rinvsq
411 C          Calculate temporary vectorial force
412             tx               = fscal*dx13      
413             ty               = fscal*dy13      
414             tz               = fscal*dz13      
416 C          Increment i atom force
417             fix1             = fix1 + tx       
418             fiy1             = fiy1 + ty       
419             fiz1             = fiz1 + tz       
421 C          Decrement j atom force
422             fjx3             = faction(j3+6) - tx
423             fjy3             = faction(j3+7) - ty
424             fjz3             = faction(j3+8) - tz
426 C          Load parameters for j atom
427             qq               = qqOH            
428             rinvsq           = rinv21*rinv21   
430 C          Coulomb reaction-field interaction
431             krsq             = krf*rsq21       
432             vcoul            = qq*(rinv21+krsq-crf)
433             vctot            = vctot+vcoul     
434             fscal            = (qq*(rinv21-2.0*krsq))*rinvsq
436 C          Calculate temporary vectorial force
437             tx               = fscal*dx21      
438             ty               = fscal*dy21      
439             tz               = fscal*dz21      
441 C          Increment i atom force
442             fix2             = fix2 + tx       
443             fiy2             = fiy2 + ty       
444             fiz2             = fiz2 + tz       
446 C          Decrement j atom force
447             fjx1             = fjx1 - tx       
448             fjy1             = fjy1 - ty       
449             fjz1             = fjz1 - tz       
451 C          Load parameters for j atom
452             qq               = qqHH            
453             rinvsq           = rinv22*rinv22   
455 C          Coulomb reaction-field interaction
456             krsq             = krf*rsq22       
457             vcoul            = qq*(rinv22+krsq-crf)
458             vctot            = vctot+vcoul     
459             fscal            = (qq*(rinv22-2.0*krsq))*rinvsq
461 C          Calculate temporary vectorial force
462             tx               = fscal*dx22      
463             ty               = fscal*dy22      
464             tz               = fscal*dz22      
466 C          Increment i atom force
467             fix2             = fix2 + tx       
468             fiy2             = fiy2 + ty       
469             fiz2             = fiz2 + tz       
471 C          Decrement j atom force
472             fjx2             = fjx2 - tx       
473             fjy2             = fjy2 - ty       
474             fjz2             = fjz2 - tz       
476 C          Load parameters for j atom
477             qq               = qqHH            
478             rinvsq           = rinv23*rinv23   
480 C          Coulomb reaction-field interaction
481             krsq             = krf*rsq23       
482             vcoul            = qq*(rinv23+krsq-crf)
483             vctot            = vctot+vcoul     
484             fscal            = (qq*(rinv23-2.0*krsq))*rinvsq
486 C          Calculate temporary vectorial force
487             tx               = fscal*dx23      
488             ty               = fscal*dy23      
489             tz               = fscal*dz23      
491 C          Increment i atom force
492             fix2             = fix2 + tx       
493             fiy2             = fiy2 + ty       
494             fiz2             = fiz2 + tz       
496 C          Decrement j atom force
497             fjx3             = fjx3 - tx       
498             fjy3             = fjy3 - ty       
499             fjz3             = fjz3 - tz       
501 C          Load parameters for j atom
502             qq               = qqOH            
503             rinvsq           = rinv31*rinv31   
505 C          Coulomb reaction-field interaction
506             krsq             = krf*rsq31       
507             vcoul            = qq*(rinv31+krsq-crf)
508             vctot            = vctot+vcoul     
509             fscal            = (qq*(rinv31-2.0*krsq))*rinvsq
511 C          Calculate temporary vectorial force
512             tx               = fscal*dx31      
513             ty               = fscal*dy31      
514             tz               = fscal*dz31      
516 C          Increment i atom force
517             fix3             = fix3 + tx       
518             fiy3             = fiy3 + ty       
519             fiz3             = fiz3 + tz       
521 C          Decrement j atom force
522             faction(j3+0)    = fjx1 - tx       
523             faction(j3+1)    = fjy1 - ty       
524             faction(j3+2)    = fjz1 - tz       
526 C          Load parameters for j atom
527             qq               = qqHH            
528             rinvsq           = rinv32*rinv32   
530 C          Coulomb reaction-field interaction
531             krsq             = krf*rsq32       
532             vcoul            = qq*(rinv32+krsq-crf)
533             vctot            = vctot+vcoul     
534             fscal            = (qq*(rinv32-2.0*krsq))*rinvsq
536 C          Calculate temporary vectorial force
537             tx               = fscal*dx32      
538             ty               = fscal*dy32      
539             tz               = fscal*dz32      
541 C          Increment i atom force
542             fix3             = fix3 + tx       
543             fiy3             = fiy3 + ty       
544             fiz3             = fiz3 + tz       
546 C          Decrement j atom force
547             faction(j3+3)    = fjx2 - tx       
548             faction(j3+4)    = fjy2 - ty       
549             faction(j3+5)    = fjz2 - tz       
551 C          Load parameters for j atom
552             qq               = qqHH            
553             rinvsq           = rinv33*rinv33   
555 C          Coulomb reaction-field interaction
556             krsq             = krf*rsq33       
557             vcoul            = qq*(rinv33+krsq-crf)
558             vctot            = vctot+vcoul     
559             fscal            = (qq*(rinv33-2.0*krsq))*rinvsq
561 C          Calculate temporary vectorial force
562             tx               = fscal*dx33      
563             ty               = fscal*dy33      
564             tz               = fscal*dz33      
566 C          Increment i atom force
567             fix3             = fix3 + tx       
568             fiy3             = fiy3 + ty       
569             fiz3             = fiz3 + tz       
571 C          Decrement j atom force
572             faction(j3+6)    = fjx3 - tx       
573             faction(j3+7)    = fjy3 - ty       
574             faction(j3+8)    = fjz3 - tz       
576 C          Inner loop uses 297 flops/iteration
577           end do
578           
580 C        Add i forces to mem and shifted force list
581           faction(ii3+0)   = faction(ii3+0) + fix1
582           faction(ii3+1)   = faction(ii3+1) + fiy1
583           faction(ii3+2)   = faction(ii3+2) + fiz1
584           faction(ii3+3)   = faction(ii3+3) + fix2
585           faction(ii3+4)   = faction(ii3+4) + fiy2
586           faction(ii3+5)   = faction(ii3+5) + fiz2
587           faction(ii3+6)   = faction(ii3+6) + fix3
588           faction(ii3+7)   = faction(ii3+7) + fiy3
589           faction(ii3+8)   = faction(ii3+8) + fiz3
590           fshift(is3)      = fshift(is3)+fix1+fix2+fix3
591           fshift(is3+1)    = fshift(is3+1)+fiy1+fiy2+fiy3
592           fshift(is3+2)    = fshift(is3+2)+fiz1+fiz2+fiz3
594 C        Add potential energies to the group for this list
595           ggid             = gid(n)+1        
596           Vc(ggid)         = Vc(ggid) + vctot
598 C        Increment number of inner iterations
599           ninner           = ninner + nj1 - nj0
601 C        Outer loop uses 28 flops/iteration
602         end do
603         
605 C      Increment number of outer iterations
606         nouter           = nouter + nn1 - nn0
607       if(nn1.lt.nri) goto 10
609 C    Write outer/inner iteration count to pointers
610       outeriter        = nouter          
611       inneriter        = ninner          
612       return
613       end
621 C Gromacs nonbonded kernel pwr6kernel202nf
622 C Coulomb interaction:     Reaction field
623 C VdW interaction:         Not calculated
624 C water optimization:      pairs of SPC/TIP3P interactions
625 C Calculate forces:        no
627       subroutine pwr6kernel202nf(
628      &                          nri,
629      &                          iinr,
630      &                          jindex,
631      &                          jjnr,
632      &                          shift,
633      &                          shiftvec,
634      &                          fshift,
635      &                          gid,
636      &                          pos,
637      &                          faction,
638      &                          charge,
639      &                          facel,
640      &                          krf,
641      &                          crf,
642      &                          Vc,
643      &                          type,
644      &                          ntype,
645      &                          vdwparam,
646      &                          Vvdw,
647      &                          tabscale,
648      &                          VFtab,
649      &                          invsqrta,
650      &                          dvda,
651      &                          gbtabscale,
652      &                          GBtab,
653      &                          nthreads,
654      &                          count,
655      &                          mtx,
656      &                          outeriter,
657      &                          inneriter,
658      &                          work)
659       implicit      none
660       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
661       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
662       integer*4     gid(*),type(*),ntype
663       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
664       gmxreal       Vvdw(*),tabscale,VFtab(*)
665       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
666       integer*4     nthreads,count,mtx,outeriter,inneriter
667       gmxreal       work(*)
669       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
670       integer*4     nn0,nn1,nouter,ninner
671       gmxreal       shX,shY,shZ
672       gmxreal       qq,vcoul,vctot
673       gmxreal       krsq
674       gmxreal       ix1,iy1,iz1
675       gmxreal       ix2,iy2,iz2
676       gmxreal       ix3,iy3,iz3
677       gmxreal       jx1,jy1,jz1
678       gmxreal       jx2,jy2,jz2
679       gmxreal       jx3,jy3,jz3
680       gmxreal       dx11,dy11,dz11,rsq11,rinv11
681       gmxreal       dx12,dy12,dz12,rsq12,rinv12
682       gmxreal       dx13,dy13,dz13,rsq13,rinv13
683       gmxreal       dx21,dy21,dz21,rsq21,rinv21
684       gmxreal       dx22,dy22,dz22,rsq22,rinv22
685       gmxreal       dx23,dy23,dz23,rsq23,rinv23
686       gmxreal       dx31,dy31,dz31,rsq31,rinv31
687       gmxreal       dx32,dy32,dz32,rsq32,rinv32
688       gmxreal       dx33,dy33,dz33,rsq33,rinv33
689       gmxreal       qO,qH,qqOO,qqOH,qqHH
692 C    Initialize water data
693       ii               = iinr(1)+1       
694       qO               = charge(ii)      
695       qH               = charge(ii+1)    
696       qqOO             = facel*qO*qO     
697       qqOH             = facel*qO*qH     
698       qqHH             = facel*qH*qH     
701 C    Reset outer and inner iteration counters
702       nouter           = 0               
703       ninner           = 0               
705 C    Loop over thread workunits
706    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
707         if(nn1.gt.nri) nn1=nri
709 C      Start outer loop over neighborlists
710         
711         do n=nn0+1,nn1
713 C        Load shift vector for this list
714           is3              = 3*shift(n)+1    
715           shX              = shiftvec(is3)   
716           shY              = shiftvec(is3+1) 
717           shZ              = shiftvec(is3+2) 
719 C        Load limits for loop over neighbors
720           nj0              = jindex(n)+1     
721           nj1              = jindex(n+1)     
723 C        Get outer coordinate index
724           ii               = iinr(n)+1       
725           ii3              = 3*ii-2          
727 C        Load i atom data, add shift vector
728           ix1              = shX + pos(ii3+0)
729           iy1              = shY + pos(ii3+1)
730           iz1              = shZ + pos(ii3+2)
731           ix2              = shX + pos(ii3+3)
732           iy2              = shY + pos(ii3+4)
733           iz2              = shZ + pos(ii3+5)
734           ix3              = shX + pos(ii3+6)
735           iy3              = shY + pos(ii3+7)
736           iz3              = shZ + pos(ii3+8)
738 C        Zero the potential energy for this list
739           vctot            = 0               
741 C        Clear i atom forces
742           
743           do k=nj0,nj1
745 C          Get j neighbor index, and coordinate index
746             jnr              = jjnr(k)+1       
747             j3               = 3*jnr-2         
749 C          load j atom coordinates
750             jx1              = pos(j3+0)       
751             jy1              = pos(j3+1)       
752             jz1              = pos(j3+2)       
753             jx2              = pos(j3+3)       
754             jy2              = pos(j3+4)       
755             jz2              = pos(j3+5)       
756             jx3              = pos(j3+6)       
757             jy3              = pos(j3+7)       
758             jz3              = pos(j3+8)       
760 C          Calculate distance
761             dx11             = ix1 - jx1       
762             dy11             = iy1 - jy1       
763             dz11             = iz1 - jz1       
764             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
765             dx12             = ix1 - jx2       
766             dy12             = iy1 - jy2       
767             dz12             = iz1 - jz2       
768             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12
769             dx13             = ix1 - jx3       
770             dy13             = iy1 - jy3       
771             dz13             = iz1 - jz3       
772             rsq13            = dx13*dx13+dy13*dy13+dz13*dz13
773             dx21             = ix2 - jx1       
774             dy21             = iy2 - jy1       
775             dz21             = iz2 - jz1       
776             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21
777             dx22             = ix2 - jx2       
778             dy22             = iy2 - jy2       
779             dz22             = iz2 - jz2       
780             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22
781             dx23             = ix2 - jx3       
782             dy23             = iy2 - jy3       
783             dz23             = iz2 - jz3       
784             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23
785             dx31             = ix3 - jx1       
786             dy31             = iy3 - jy1       
787             dz31             = iz3 - jz1       
788             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31
789             dx32             = ix3 - jx2       
790             dy32             = iy3 - jy2       
791             dz32             = iz3 - jz2       
792             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32
793             dx33             = ix3 - jx3       
794             dy33             = iy3 - jy3       
795             dz33             = iz3 - jz3       
796             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33
798 C          Calculate 1/r and 1/r2
800 C          PowerPC intrinsics 1/sqrt lookup table
801 #ifndef GMX_BLUEGENE
802             rinv11           = frsqrtes(rsq11) 
803 #else
804             rinv11           = frsqrte(dble(rsq11)) 
805 #endif
806             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
807      &  *rinv11)))
808 #ifdef GMX_DOUBLE
809             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
810      &  *rinv11)))
811 #endif
813 C          PowerPC intrinsics 1/sqrt lookup table
814 #ifndef GMX_BLUEGENE
815             rinv12           = frsqrtes(rsq12) 
816 #else
817             rinv12           = frsqrte(dble(rsq12)) 
818 #endif
819             rinv12           = (0.5*rinv12*(3.0-((rsq12*rinv12)
820      &  *rinv12)))
821 #ifdef GMX_DOUBLE
822             rinv12           = (0.5*rinv12*(3.0-((rsq12*rinv12)
823      &  *rinv12)))
824 #endif
826 C          PowerPC intrinsics 1/sqrt lookup table
827 #ifndef GMX_BLUEGENE
828             rinv13           = frsqrtes(rsq13) 
829 #else
830             rinv13           = frsqrte(dble(rsq13)) 
831 #endif
832             rinv13           = (0.5*rinv13*(3.0-((rsq13*rinv13)
833      &  *rinv13)))
834 #ifdef GMX_DOUBLE
835             rinv13           = (0.5*rinv13*(3.0-((rsq13*rinv13)
836      &  *rinv13)))
837 #endif
839 C          PowerPC intrinsics 1/sqrt lookup table
840 #ifndef GMX_BLUEGENE
841             rinv21           = frsqrtes(rsq21) 
842 #else
843             rinv21           = frsqrte(dble(rsq21)) 
844 #endif
845             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
846      &  *rinv21)))
847 #ifdef GMX_DOUBLE
848             rinv21           = (0.5*rinv21*(3.0-((rsq21*rinv21)
849      &  *rinv21)))
850 #endif
852 C          PowerPC intrinsics 1/sqrt lookup table
853 #ifndef GMX_BLUEGENE
854             rinv22           = frsqrtes(rsq22) 
855 #else
856             rinv22           = frsqrte(dble(rsq22)) 
857 #endif
858             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
859      &  *rinv22)))
860 #ifdef GMX_DOUBLE
861             rinv22           = (0.5*rinv22*(3.0-((rsq22*rinv22)
862      &  *rinv22)))
863 #endif
865 C          PowerPC intrinsics 1/sqrt lookup table
866 #ifndef GMX_BLUEGENE
867             rinv23           = frsqrtes(rsq23) 
868 #else
869             rinv23           = frsqrte(dble(rsq23)) 
870 #endif
871             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
872      &  *rinv23)))
873 #ifdef GMX_DOUBLE
874             rinv23           = (0.5*rinv23*(3.0-((rsq23*rinv23)
875      &  *rinv23)))
876 #endif
878 C          PowerPC intrinsics 1/sqrt lookup table
879 #ifndef GMX_BLUEGENE
880             rinv31           = frsqrtes(rsq31) 
881 #else
882             rinv31           = frsqrte(dble(rsq31)) 
883 #endif
884             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
885      &  *rinv31)))
886 #ifdef GMX_DOUBLE
887             rinv31           = (0.5*rinv31*(3.0-((rsq31*rinv31)
888      &  *rinv31)))
889 #endif
891 C          PowerPC intrinsics 1/sqrt lookup table
892 #ifndef GMX_BLUEGENE
893             rinv32           = frsqrtes(rsq32) 
894 #else
895             rinv32           = frsqrte(dble(rsq32)) 
896 #endif
897             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
898      &  *rinv32)))
899 #ifdef GMX_DOUBLE
900             rinv32           = (0.5*rinv32*(3.0-((rsq32*rinv32)
901      &  *rinv32)))
902 #endif
904 C          PowerPC intrinsics 1/sqrt lookup table
905 #ifndef GMX_BLUEGENE
906             rinv33           = frsqrtes(rsq33) 
907 #else
908             rinv33           = frsqrte(dble(rsq33)) 
909 #endif
910             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
911      &  *rinv33)))
912 #ifdef GMX_DOUBLE
913             rinv33           = (0.5*rinv33*(3.0-((rsq33*rinv33)
914      &  *rinv33)))
915 #endif
917 C          Load parameters for j atom
918             qq               = qqOO            
920 C          Coulomb reaction-field interaction
921             krsq             = krf*rsq11       
922             vcoul            = qq*(rinv11+krsq-crf)
923             vctot            = vctot+vcoul     
925 C          Load parameters for j atom
926             qq               = qqOH            
928 C          Coulomb reaction-field interaction
929             krsq             = krf*rsq12       
930             vcoul            = qq*(rinv12+krsq-crf)
931             vctot            = vctot+vcoul     
933 C          Load parameters for j atom
934             qq               = qqOH            
936 C          Coulomb reaction-field interaction
937             krsq             = krf*rsq13       
938             vcoul            = qq*(rinv13+krsq-crf)
939             vctot            = vctot+vcoul     
941 C          Load parameters for j atom
942             qq               = qqOH            
944 C          Coulomb reaction-field interaction
945             krsq             = krf*rsq21       
946             vcoul            = qq*(rinv21+krsq-crf)
947             vctot            = vctot+vcoul     
949 C          Load parameters for j atom
950             qq               = qqHH            
952 C          Coulomb reaction-field interaction
953             krsq             = krf*rsq22       
954             vcoul            = qq*(rinv22+krsq-crf)
955             vctot            = vctot+vcoul     
957 C          Load parameters for j atom
958             qq               = qqHH            
960 C          Coulomb reaction-field interaction
961             krsq             = krf*rsq23       
962             vcoul            = qq*(rinv23+krsq-crf)
963             vctot            = vctot+vcoul     
965 C          Load parameters for j atom
966             qq               = qqOH            
968 C          Coulomb reaction-field interaction
969             krsq             = krf*rsq31       
970             vcoul            = qq*(rinv31+krsq-crf)
971             vctot            = vctot+vcoul     
973 C          Load parameters for j atom
974             qq               = qqHH            
976 C          Coulomb reaction-field interaction
977             krsq             = krf*rsq32       
978             vcoul            = qq*(rinv32+krsq-crf)
979             vctot            = vctot+vcoul     
981 C          Load parameters for j atom
982             qq               = qqHH            
984 C          Coulomb reaction-field interaction
985             krsq             = krf*rsq33       
986             vcoul            = qq*(rinv33+krsq-crf)
987             vctot            = vctot+vcoul     
989 C          Inner loop uses 171 flops/iteration
990           end do
991           
993 C        Add i forces to mem and shifted force list
995 C        Add potential energies to the group for this list
996           ggid             = gid(n)+1        
997           Vc(ggid)         = Vc(ggid) + vctot
999 C        Increment number of inner iterations
1000           ninner           = ninner + nj1 - nj0
1002 C        Outer loop uses 10 flops/iteration
1003         end do
1004         
1006 C      Increment number of outer iterations
1007         nouter           = nouter + nn1 - nn0
1008       if(nn1.lt.nri) goto 10
1010 C    Write outer/inner iteration count to pointers
1011       outeriter        = nouter          
1012       inneriter        = ninner          
1013       return
1014       end