src/gmxlib/nonbonded/nb_kernel_power6/pwr6kernel100.F

   1 C
   2 C                This source code is part of
   3 C
   4 C                 G   R   O   M   A   C   S
   5 C
   6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7 C Copyright (c) 2001-2009, The GROMACS Development Team
   8 C
   9 C Gromacs is a library for molecular simulation and trajectory analysis,
  10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11 C a full list of developers and information, check out http://www.gromacs.org
  12 C
  13 C This program is free software; you can redistribute it and/or modify it under
  14 C the terms of the GNU Lesser General Public License as published by the Free
  15 C Software Foundation; either version 2 of the License, or (at your option) any
  16 C later version.
  17 C As a special exception, you may use this file as part of a free software
  18 C library without restriction.  Specifically, if other files instantiate
  19 C templates or use macros or inline functions from this file, or you compile
  20 C this file and link it with other files to produce an executable, this
  21 C file does not by itself cause the resulting executable to be covered by
  22 C the GNU Lesser General Public License.
  23 C
  24 C In plain-speak: do not worry about classes/macros/templates either - only
  25 C changes to the library have to be LGPL, not an application linking with it.
  26 C
  27 C To help fund GROMACS development, we humbly ask that you cite
  28 C the papers people have written on it - you can find them on the website!
  29 C
  30
  31 #ifdef HAVE_CONFIG_H
  32 #  include<config.h>
  33 #endif
  34
  35 #ifdef GMX_DOUBLE
  36 #  define gmxreal real*8
  37 #else
  38 #  define gmxreal real*4
  39 #endif
  40
  41
  42
  43 C
  44 C Gromacs nonbonded kernel pwr6kernel100
  45 C Coulomb interaction:     Normal Coulomb
  46 C VdW interaction:         Not calculated
  47 C water optimization:      No
  48 C Calculate forces:        yes
  49 C
  50       subroutine pwr6kernel100(
  51      &                          nri,
  52      &                          iinr,
  53      &                          jindex,
  54      &                          jjnr,
  55      &                          shift,
  56      &                          shiftvec,
  57      &                          fshift,
  58      &                          gid,
  59      &                          pos,
  60      &                          faction,
  61      &                          charge,
  62      &                          facel,
  63      &                          krf,
  64      &                          crf,
  65      &                          Vc,
  66      &                          type,
  67      &                          ntype,
  68      &                          vdwparam,
  69      &                          Vvdw,
  70      &                          tabscale,
  71      &                          VFtab,
  72      &                          invsqrta,
  73      &                          dvda,
  74      &                          gbtabscale,
  75      &                          GBtab,
  76      &                          nthreads,
  77      &                          count,
  78      &                          mtx,
  79      &                          outeriter,
  80      &                          inneriter,
  81      &                          work)
  82       implicit      none
  83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
  84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
  85       integer*4     gid(*),type(*),ntype
  86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
  87       gmxreal       Vvdw(*),tabscale,VFtab(*)
  88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
  89       integer*4     nthreads,count,mtx,outeriter,inneriter
  90       gmxreal       work(*)
  91
  92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
  93       integer*4     nn0,nn1,nouter,ninner
  94       gmxreal       shX,shY,shZ
  95       gmxreal       fscal,tx,ty,tz
  96       gmxreal       rinvsq
  97       gmxreal       iq
  98       gmxreal       qq,vcoul,vctot
  99       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
 100       gmxreal       jx1,jy1,jz1
 101       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 102
 103
 104 C    Reset outer and inner iteration counters
 105       nouter           = 0
 106       ninner           = 0
 107
 108 C    Loop over thread workunits
 109    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 110         if(nn1.gt.nri) nn1=nri
 111
 112 C      Start outer loop over neighborlists
 113
 114         do n=nn0+1,nn1
 115
 116 C        Load shift vector for this list
 117           is3              = 3*shift(n)+1
 118           shX              = shiftvec(is3)
 119           shY              = shiftvec(is3+1)
 120           shZ              = shiftvec(is3+2)
 121
 122 C        Load limits for loop over neighbors
 123           nj0              = jindex(n)+1
 124           nj1              = jindex(n+1)
 125
 126 C        Get outer coordinate index
 127           ii               = iinr(n)+1
 128           ii3              = 3*ii-2
 129
 130 C        Load i atom data, add shift vector
 131           ix1              = shX + pos(ii3+0)
 132           iy1              = shY + pos(ii3+1)
 133           iz1              = shZ + pos(ii3+2)
 134
 135 C        Load parameters for i atom
 136           iq               = facel*charge(ii)
 137
 138 C        Zero the potential energy for this list
 139           vctot            = 0
 140
 141 C        Clear i atom forces
 142           fix1             = 0
 143           fiy1             = 0
 144           fiz1             = 0
 145
 146           do k=nj0,nj1
 147
 148 C          Get j neighbor index, and coordinate index
 149             jnr              = jjnr(k)+1
 150             j3               = 3*jnr-2
 151
 152 C          load j atom coordinates
 153             jx1              = pos(j3+0)
 154             jy1              = pos(j3+1)
 155             jz1              = pos(j3+2)
 156
 157 C          Calculate distance
 158             dx11             = ix1 - jx1
 159             dy11             = iy1 - jy1
 160             dz11             = iz1 - jz1
 161             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 162
 163 C          Calculate 1/r and 1/r2
 164
 165 C          PowerPC intrinsics 1/sqrt lookup table
 166 #ifndef GMX_BLUEGENE
 167             rinv11           = frsqrtes(rsq11)
 168 #else
 169             rinv11           = frsqrte(dble(rsq11))
 170 #endif
 171             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 172      &  *rinv11)))
 173 #ifdef GMX_DOUBLE
 174             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 175      &  *rinv11)))
 176 #endif
 177
 178 C          Load parameters for j atom
 179             qq               = iq*charge(jnr)
 180             rinvsq           = rinv11*rinv11
 181
 182 C          Coulomb interaction
 183             vcoul            = qq*rinv11
 184             vctot            = vctot+vcoul
 185             fscal            = (vcoul)*rinvsq
 186
 187 C          Calculate temporary vectorial force
 188             tx               = fscal*dx11
 189             ty               = fscal*dy11
 190             tz               = fscal*dz11
 191
 192 C          Increment i atom force
 193             fix1             = fix1 + tx
 194             fiy1             = fiy1 + ty
 195             fiz1             = fiz1 + tz
 196
 197 C          Decrement j atom force
 198             faction(j3+0)    = faction(j3+0) - tx
 199             faction(j3+1)    = faction(j3+1) - ty
 200             faction(j3+2)    = faction(j3+2) - tz
 201
 202 C          Inner loop uses 28 flops/iteration
 203           end do
 204
 205
 206 C        Add i forces to mem and shifted force list
 207           faction(ii3+0)   = faction(ii3+0) + fix1
 208           faction(ii3+1)   = faction(ii3+1) + fiy1
 209           faction(ii3+2)   = faction(ii3+2) + fiz1
 210           fshift(is3)      = fshift(is3)+fix1
 211           fshift(is3+1)    = fshift(is3+1)+fiy1
 212           fshift(is3+2)    = fshift(is3+2)+fiz1
 213
 214 C        Add potential energies to the group for this list
 215           ggid             = gid(n)+1
 216           Vc(ggid)         = Vc(ggid) + vctot
 217
 218 C        Increment number of inner iterations
 219           ninner           = ninner + nj1 - nj0
 220
 221 C        Outer loop uses 11 flops/iteration
 222         end do
 223
 224
 225 C      Increment number of outer iterations
 226         nouter           = nouter + nn1 - nn0
 227       if(nn1.lt.nri) goto 10
 228
 229 C    Write outer/inner iteration count to pointers
 230       outeriter        = nouter
 231       inneriter        = ninner
 232       return
 233       end
 234
 235
 236
 237
 238
 239
 240 C
 241 C Gromacs nonbonded kernel pwr6kernel100nf
 242 C Coulomb interaction:     Normal Coulomb
 243 C VdW interaction:         Not calculated
 244 C water optimization:      No
 245 C Calculate forces:        no
 246 C
 247       subroutine pwr6kernel100nf(
 248      &                          nri,
 249      &                          iinr,
 250      &                          jindex,
 251      &                          jjnr,
 252      &                          shift,
 253      &                          shiftvec,
 254      &                          fshift,
 255      &                          gid,
 256      &                          pos,
 257      &                          faction,
 258      &                          charge,
 259      &                          facel,
 260      &                          krf,
 261      &                          crf,
 262      &                          Vc,
 263      &                          type,
 264      &                          ntype,
 265      &                          vdwparam,
 266      &                          Vvdw,
 267      &                          tabscale,
 268      &                          VFtab,
 269      &                          invsqrta,
 270      &                          dvda,
 271      &                          gbtabscale,
 272      &                          GBtab,
 273      &                          nthreads,
 274      &                          count,
 275      &                          mtx,
 276      &                          outeriter,
 277      &                          inneriter,
 278      &                          work)
 279       implicit      none
 280       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
 281       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
 282       integer*4     gid(*),type(*),ntype
 283       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
 284       gmxreal       Vvdw(*),tabscale,VFtab(*)
 285       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
 286       integer*4     nthreads,count,mtx,outeriter,inneriter
 287       gmxreal       work(*)
 288
 289       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
 290       integer*4     nn0,nn1,nouter,ninner
 291       gmxreal       shX,shY,shZ
 292       gmxreal       iq
 293       gmxreal       qq,vcoul,vctot
 294       gmxreal       ix1,iy1,iz1
 295       gmxreal       jx1,jy1,jz1
 296       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 297
 298
 299 C    Reset outer and inner iteration counters
 300       nouter           = 0
 301       ninner           = 0
 302
 303 C    Loop over thread workunits
 304    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 305         if(nn1.gt.nri) nn1=nri
 306
 307 C      Start outer loop over neighborlists
 308
 309         do n=nn0+1,nn1
 310
 311 C        Load shift vector for this list
 312           is3              = 3*shift(n)+1
 313           shX              = shiftvec(is3)
 314           shY              = shiftvec(is3+1)
 315           shZ              = shiftvec(is3+2)
 316
 317 C        Load limits for loop over neighbors
 318           nj0              = jindex(n)+1
 319           nj1              = jindex(n+1)
 320
 321 C        Get outer coordinate index
 322           ii               = iinr(n)+1
 323           ii3              = 3*ii-2
 324
 325 C        Load i atom data, add shift vector
 326           ix1              = shX + pos(ii3+0)
 327           iy1              = shY + pos(ii3+1)
 328           iz1              = shZ + pos(ii3+2)
 329
 330 C        Load parameters for i atom
 331           iq               = facel*charge(ii)
 332
 333 C        Zero the potential energy for this list
 334           vctot            = 0
 335
 336 C        Clear i atom forces
 337
 338           do k=nj0,nj1
 339
 340 C          Get j neighbor index, and coordinate index
 341             jnr              = jjnr(k)+1
 342             j3               = 3*jnr-2
 343
 344 C          load j atom coordinates
 345             jx1              = pos(j3+0)
 346             jy1              = pos(j3+1)
 347             jz1              = pos(j3+2)
 348
 349 C          Calculate distance
 350             dx11             = ix1 - jx1
 351             dy11             = iy1 - jy1
 352             dz11             = iz1 - jz1
 353             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 354
 355 C          Calculate 1/r and 1/r2
 356
 357 C          PowerPC intrinsics 1/sqrt lookup table
 358 #ifndef GMX_BLUEGENE
 359             rinv11           = frsqrtes(rsq11)
 360 #else
 361             rinv11           = frsqrte(dble(rsq11))
 362 #endif
 363             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 364      &  *rinv11)))
 365 #ifdef GMX_DOUBLE
 366             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 367      &  *rinv11)))
 368 #endif
 369
 370 C          Load parameters for j atom
 371             qq               = iq*charge(jnr)
 372
 373 C          Coulomb interaction
 374             vcoul            = qq*rinv11
 375             vctot            = vctot+vcoul
 376
 377 C          Inner loop uses 17 flops/iteration
 378           end do
 379
 380
 381 C        Add i forces to mem and shifted force list
 382
 383 C        Add potential energies to the group for this list
 384           ggid             = gid(n)+1
 385           Vc(ggid)         = Vc(ggid) + vctot
 386
 387 C        Increment number of inner iterations
 388           ninner           = ninner + nj1 - nj0
 389
 390 C        Outer loop uses 5 flops/iteration
 391         end do
 392
 393
 394 C      Increment number of outer iterations
 395         nouter           = nouter + nn1 - nn0
 396       if(nn1.lt.nri) goto 10
 397
 398 C    Write outer/inner iteration count to pointers
 399       outeriter        = nouter
 400       inneriter        = ninner
 401       return
 402       end
 403
 404
 405