src/gmxlib/nonbonded/nb_kernel_power6/pwr6kernel200.F

   1 C
   2 C                This source code is part of
   3 C
   4 C                 G   R   O   M   A   C   S
   5 C
   6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7 C Copyright (c) 2001-2009, The GROMACS Development Team
   8 C
   9 C Gromacs is a library for molecular simulation and trajectory analysis,
  10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11 C a full list of developers and information, check out http://www.gromacs.org
  12 C
  13 C This program is free software; you can redistribute it and/or modify it under
  14 C the terms of the GNU Lesser General Public License as published by the Free
  15 C Software Foundation; either version 2 of the License, or (at your option) any
  16 C later version.
  17 C As a special exception, you may use this file as part of a free software
  18 C library without restriction.  Specifically, if other files instantiate
  19 C templates or use macros or inline functions from this file, or you compile
  20 C this file and link it with other files to produce an executable, this
  21 C file does not by itself cause the resulting executable to be covered by
  22 C the GNU Lesser General Public License.
  23 C
  24 C In plain-speak: do not worry about classes/macros/templates either - only
  25 C changes to the library have to be LGPL, not an application linking with it.
  26 C
  27 C To help fund GROMACS development, we humbly ask that you cite
  28 C the papers people have written on it - you can find them on the website!
  29 C
  30
  31 #ifdef HAVE_CONFIG_H
  32 #  include<config.h>
  33 #endif
  34
  35 #ifdef GMX_DOUBLE
  36 #  define gmxreal real*8
  37 #else
  38 #  define gmxreal real*4
  39 #endif
  40
  41
  42
  43 C
  44 C Gromacs nonbonded kernel pwr6kernel200
  45 C Coulomb interaction:     Reaction field
  46 C VdW interaction:         Not calculated
  47 C water optimization:      No
  48 C Calculate forces:        yes
  49 C
  50       subroutine pwr6kernel200(
  51      &                          nri,
  52      &                          iinr,
  53      &                          jindex,
  54      &                          jjnr,
  55      &                          shift,
  56      &                          shiftvec,
  57      &                          fshift,
  58      &                          gid,
  59      &                          pos,
  60      &                          faction,
  61      &                          charge,
  62      &                          facel,
  63      &                          krf,
  64      &                          crf,
  65      &                          Vc,
  66      &                          type,
  67      &                          ntype,
  68      &                          vdwparam,
  69      &                          Vvdw,
  70      &                          tabscale,
  71      &                          VFtab,
  72      &                          invsqrta,
  73      &                          dvda,
  74      &                          gbtabscale,
  75      &                          GBtab,
  76      &                          nthreads,
  77      &                          count,
  78      &                          mtx,
  79      &                          outeriter,
  80      &                          inneriter,
  81      &                          work)
  82       implicit      none
  83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
  84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
  85       integer*4     gid(*),type(*),ntype
  86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
  87       gmxreal       Vvdw(*),tabscale,VFtab(*)
  88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
  89       integer*4     nthreads,count,mtx,outeriter,inneriter
  90       gmxreal       work(*)
  91
  92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
  93       integer*4     nn0,nn1,nouter,ninner
  94       gmxreal       shX,shY,shZ
  95       gmxreal       fscal,tx,ty,tz
  96       gmxreal       rinvsq
  97       gmxreal       iq
  98       gmxreal       qq,vcoul,vctot
  99       gmxreal       krsq
 100       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
 101       gmxreal       jx1,jy1,jz1
 102       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 103
 104
 105 C    Reset outer and inner iteration counters
 106       nouter           = 0
 107       ninner           = 0
 108
 109 C    Loop over thread workunits
 110    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 111         if(nn1.gt.nri) nn1=nri
 112
 113 C      Start outer loop over neighborlists
 114
 115         do n=nn0+1,nn1
 116
 117 C        Load shift vector for this list
 118           is3              = 3*shift(n)+1
 119           shX              = shiftvec(is3)
 120           shY              = shiftvec(is3+1)
 121           shZ              = shiftvec(is3+2)
 122
 123 C        Load limits for loop over neighbors
 124           nj0              = jindex(n)+1
 125           nj1              = jindex(n+1)
 126
 127 C        Get outer coordinate index
 128           ii               = iinr(n)+1
 129           ii3              = 3*ii-2
 130
 131 C        Load i atom data, add shift vector
 132           ix1              = shX + pos(ii3+0)
 133           iy1              = shY + pos(ii3+1)
 134           iz1              = shZ + pos(ii3+2)
 135
 136 C        Load parameters for i atom
 137           iq               = facel*charge(ii)
 138
 139 C        Zero the potential energy for this list
 140           vctot            = 0
 141
 142 C        Clear i atom forces
 143           fix1             = 0
 144           fiy1             = 0
 145           fiz1             = 0
 146
 147           do k=nj0,nj1
 148
 149 C          Get j neighbor index, and coordinate index
 150             jnr              = jjnr(k)+1
 151             j3               = 3*jnr-2
 152
 153 C          load j atom coordinates
 154             jx1              = pos(j3+0)
 155             jy1              = pos(j3+1)
 156             jz1              = pos(j3+2)
 157
 158 C          Calculate distance
 159             dx11             = ix1 - jx1
 160             dy11             = iy1 - jy1
 161             dz11             = iz1 - jz1
 162             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 163
 164 C          Calculate 1/r and 1/r2
 165
 166 C          PowerPC intrinsics 1/sqrt lookup table
 167 #ifndef GMX_BLUEGENE
 168             rinv11           = frsqrtes(rsq11)
 169 #else
 170             rinv11           = frsqrte(dble(rsq11))
 171 #endif
 172             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 173      &  *rinv11)))
 174 #ifdef GMX_DOUBLE
 175             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 176      &  *rinv11)))
 177 #endif
 178
 179 C          Load parameters for j atom
 180             qq               = iq*charge(jnr)
 181             rinvsq           = rinv11*rinv11
 182
 183 C          Coulomb reaction-field interaction
 184             krsq             = krf*rsq11
 185             vcoul            = qq*(rinv11+krsq-crf)
 186             vctot            = vctot+vcoul
 187             fscal            = (qq*(rinv11-2.0*krsq))*rinvsq
 188
 189 C          Calculate temporary vectorial force
 190             tx               = fscal*dx11
 191             ty               = fscal*dy11
 192             tz               = fscal*dz11
 193
 194 C          Increment i atom force
 195             fix1             = fix1 + tx
 196             fiy1             = fiy1 + ty
 197             fiz1             = fiz1 + tz
 198
 199 C          Decrement j atom force
 200             faction(j3+0)    = faction(j3+0) - tx
 201             faction(j3+1)    = faction(j3+1) - ty
 202             faction(j3+2)    = faction(j3+2) - tz
 203
 204 C          Inner loop uses 34 flops/iteration
 205           end do
 206
 207
 208 C        Add i forces to mem and shifted force list
 209           faction(ii3+0)   = faction(ii3+0) + fix1
 210           faction(ii3+1)   = faction(ii3+1) + fiy1
 211           faction(ii3+2)   = faction(ii3+2) + fiz1
 212           fshift(is3)      = fshift(is3)+fix1
 213           fshift(is3+1)    = fshift(is3+1)+fiy1
 214           fshift(is3+2)    = fshift(is3+2)+fiz1
 215
 216 C        Add potential energies to the group for this list
 217           ggid             = gid(n)+1
 218           Vc(ggid)         = Vc(ggid) + vctot
 219
 220 C        Increment number of inner iterations
 221           ninner           = ninner + nj1 - nj0
 222
 223 C        Outer loop uses 11 flops/iteration
 224         end do
 225
 226
 227 C      Increment number of outer iterations
 228         nouter           = nouter + nn1 - nn0
 229       if(nn1.lt.nri) goto 10
 230
 231 C    Write outer/inner iteration count to pointers
 232       outeriter        = nouter
 233       inneriter        = ninner
 234       return
 235       end
 236
 237
 238
 239
 240
 241
 242 C
 243 C Gromacs nonbonded kernel pwr6kernel200nf
 244 C Coulomb interaction:     Reaction field
 245 C VdW interaction:         Not calculated
 246 C water optimization:      No
 247 C Calculate forces:        no
 248 C
 249       subroutine pwr6kernel200nf(
 250      &                          nri,
 251      &                          iinr,
 252      &                          jindex,
 253      &                          jjnr,
 254      &                          shift,
 255      &                          shiftvec,
 256      &                          fshift,
 257      &                          gid,
 258      &                          pos,
 259      &                          faction,
 260      &                          charge,
 261      &                          facel,
 262      &                          krf,
 263      &                          crf,
 264      &                          Vc,
 265      &                          type,
 266      &                          ntype,
 267      &                          vdwparam,
 268      &                          Vvdw,
 269      &                          tabscale,
 270      &                          VFtab,
 271      &                          invsqrta,
 272      &                          dvda,
 273      &                          gbtabscale,
 274      &                          GBtab,
 275      &                          nthreads,
 276      &                          count,
 277      &                          mtx,
 278      &                          outeriter,
 279      &                          inneriter,
 280      &                          work)
 281       implicit      none
 282       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
 283       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
 284       integer*4     gid(*),type(*),ntype
 285       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
 286       gmxreal       Vvdw(*),tabscale,VFtab(*)
 287       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
 288       integer*4     nthreads,count,mtx,outeriter,inneriter
 289       gmxreal       work(*)
 290
 291       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
 292       integer*4     nn0,nn1,nouter,ninner
 293       gmxreal       shX,shY,shZ
 294       gmxreal       iq
 295       gmxreal       qq,vcoul,vctot
 296       gmxreal       krsq
 297       gmxreal       ix1,iy1,iz1
 298       gmxreal       jx1,jy1,jz1
 299       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 300
 301
 302 C    Reset outer and inner iteration counters
 303       nouter           = 0
 304       ninner           = 0
 305
 306 C    Loop over thread workunits
 307    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 308         if(nn1.gt.nri) nn1=nri
 309
 310 C      Start outer loop over neighborlists
 311
 312         do n=nn0+1,nn1
 313
 314 C        Load shift vector for this list
 315           is3              = 3*shift(n)+1
 316           shX              = shiftvec(is3)
 317           shY              = shiftvec(is3+1)
 318           shZ              = shiftvec(is3+2)
 319
 320 C        Load limits for loop over neighbors
 321           nj0              = jindex(n)+1
 322           nj1              = jindex(n+1)
 323
 324 C        Get outer coordinate index
 325           ii               = iinr(n)+1
 326           ii3              = 3*ii-2
 327
 328 C        Load i atom data, add shift vector
 329           ix1              = shX + pos(ii3+0)
 330           iy1              = shY + pos(ii3+1)
 331           iz1              = shZ + pos(ii3+2)
 332
 333 C        Load parameters for i atom
 334           iq               = facel*charge(ii)
 335
 336 C        Zero the potential energy for this list
 337           vctot            = 0
 338
 339 C        Clear i atom forces
 340
 341           do k=nj0,nj1
 342
 343 C          Get j neighbor index, and coordinate index
 344             jnr              = jjnr(k)+1
 345             j3               = 3*jnr-2
 346
 347 C          load j atom coordinates
 348             jx1              = pos(j3+0)
 349             jy1              = pos(j3+1)
 350             jz1              = pos(j3+2)
 351
 352 C          Calculate distance
 353             dx11             = ix1 - jx1
 354             dy11             = iy1 - jy1
 355             dz11             = iz1 - jz1
 356             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 357
 358 C          Calculate 1/r and 1/r2
 359
 360 C          PowerPC intrinsics 1/sqrt lookup table
 361 #ifndef GMX_BLUEGENE
 362             rinv11           = frsqrtes(rsq11)
 363 #else
 364             rinv11           = frsqrte(dble(rsq11))
 365 #endif
 366             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 367      &  *rinv11)))
 368 #ifdef GMX_DOUBLE
 369             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 370      &  *rinv11)))
 371 #endif
 372
 373 C          Load parameters for j atom
 374             qq               = iq*charge(jnr)
 375
 376 C          Coulomb reaction-field interaction
 377             krsq             = krf*rsq11
 378             vcoul            = qq*(rinv11+krsq-crf)
 379             vctot            = vctot+vcoul
 380
 381 C          Inner loop uses 20 flops/iteration
 382           end do
 383
 384
 385 C        Add i forces to mem and shifted force list
 386
 387 C        Add potential energies to the group for this list
 388           ggid             = gid(n)+1
 389           Vc(ggid)         = Vc(ggid) + vctot
 390
 391 C        Increment number of inner iterations
 392           ninner           = ninner + nj1 - nj0
 393
 394 C        Outer loop uses 5 flops/iteration
 395         end do
 396
 397
 398 C      Increment number of outer iterations
 399         nouter           = nouter + nn1 - nn0
 400       if(nn1.lt.nri) goto 10
 401
 402 C    Write outer/inner iteration count to pointers
 403       outeriter        = nouter
 404       inneriter        = ninner
 405       return
 406       end
 407
 408
 409