src/gmxlib/nonbonded/nb_kernel_power6/pwr6kernel420.F

   1 C
   2 C                This source code is part of
   3 C
   4 C                 G   R   O   M   A   C   S
   5 C
   6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7 C Copyright (c) 2001-2009, The GROMACS Development Team
   8 C
   9 C Gromacs is a library for molecular simulation and trajectory analysis,
  10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11 C a full list of developers and information, check out http://www.gromacs.org
  12 C
  13 C This program is free software; you can redistribute it and/or modify it under
  14 C the terms of the GNU Lesser General Public License as published by the Free
  15 C Software Foundation; either version 2 of the License, or (at your option) any
  16 C later version.
  17 C As a special exception, you may use this file as part of a free software
  18 C library without restriction.  Specifically, if other files instantiate
  19 C templates or use macros or inline functions from this file, or you compile
  20 C this file and link it with other files to produce an executable, this
  21 C file does not by itself cause the resulting executable to be covered by
  22 C the GNU Lesser General Public License.
  23 C
  24 C In plain-speak: do not worry about classes/macros/templates either - only
  25 C changes to the library have to be LGPL, not an application linking with it.
  26 C
  27 C To help fund GROMACS development, we humbly ask that you cite
  28 C the papers people have written on it - you can find them on the website!
  29 C
  30
  31 #ifdef HAVE_CONFIG_H
  32 #  include<config.h>
  33 #endif
  34
  35 #ifdef GMX_DOUBLE
  36 #  define gmxreal real*8
  37 #else
  38 #  define gmxreal real*4
  39 #endif
  40
  41
  42
  43 C
  44 C Gromacs nonbonded kernel pwr6kernel420
  45 C Coulomb interaction:     Generalized-Born
  46 C VdW interaction:         Buckingham
  47 C water optimization:      No
  48 C Calculate forces:        yes
  49 C
  50       subroutine pwr6kernel420(
  51      &                          nri,
  52      &                          iinr,
  53      &                          jindex,
  54      &                          jjnr,
  55      &                          shift,
  56      &                          shiftvec,
  57      &                          fshift,
  58      &                          gid,
  59      &                          pos,
  60      &                          faction,
  61      &                          charge,
  62      &                          facel,
  63      &                          krf,
  64      &                          crf,
  65      &                          Vc,
  66      &                          type,
  67      &                          ntype,
  68      &                          vdwparam,
  69      &                          Vvdw,
  70      &                          tabscale,
  71      &                          VFtab,
  72      &                          invsqrta,
  73      &                          dvda,
  74      &                          gbtabscale,
  75      &                          GBtab,
  76      &                          nthreads,
  77      &                          count,
  78      &                          mtx,
  79      &                          outeriter,
  80      &                          inneriter,
  81      &                          work)
  82       implicit      none
  83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
  84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
  85       integer*4     gid(*),type(*),ntype
  86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
  87       gmxreal       Vvdw(*),tabscale,VFtab(*)
  88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
  89       integer*4     nthreads,count,mtx,outeriter,inneriter
  90       gmxreal       work(*)
  91
  92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
  93       integer*4     nn0,nn1,nouter,ninner
  94       gmxreal       shX,shY,shZ
  95       gmxreal       fscal,tx,ty,tz
  96       gmxreal       rinvsq
  97       gmxreal       iq
  98       gmxreal       qq,vcoul,vctot
  99       integer*4     nti
 100       integer*4     tj
 101       gmxreal       rinvsix
 102       gmxreal       Vvdw6,Vvdwtot
 103       gmxreal       r,rt,eps,eps2
 104       integer*4     n0,nnn
 105       gmxreal       Y,F,Geps,Heps2,Fp,VV
 106       gmxreal       FF
 107       gmxreal       fijC
 108       gmxreal       isai,isaj,isaprod,gbscale,vgb
 109       gmxreal       dvdasum,dvdatmp,dvdaj,fgb
 110       gmxreal       Vvdwexp,br
 111       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
 112       gmxreal       jx1,jy1,jz1
 113       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 114       gmxreal       c6,cexp1,cexp2
 115
 116
 117 C    Reset outer and inner iteration counters
 118       nouter           = 0
 119       ninner           = 0
 120
 121 C    Loop over thread workunits
 122    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 123         if(nn1.gt.nri) nn1=nri
 124
 125 C      Start outer loop over neighborlists
 126
 127         do n=nn0+1,nn1
 128
 129 C        Load shift vector for this list
 130           is3              = 3*shift(n)+1
 131           shX              = shiftvec(is3)
 132           shY              = shiftvec(is3+1)
 133           shZ              = shiftvec(is3+2)
 134
 135 C        Load limits for loop over neighbors
 136           nj0              = jindex(n)+1
 137           nj1              = jindex(n+1)
 138
 139 C        Get outer coordinate index
 140           ii               = iinr(n)+1
 141           ii3              = 3*ii-2
 142
 143 C        Load i atom data, add shift vector
 144           ix1              = shX + pos(ii3+0)
 145           iy1              = shY + pos(ii3+1)
 146           iz1              = shZ + pos(ii3+2)
 147
 148 C        Load parameters for i atom
 149           iq               = facel*charge(ii)
 150           isai             = invsqrta(ii)
 151           nti              = 3*ntype*type(ii)
 152
 153 C        Zero the potential energy for this list
 154           vctot            = 0
 155           Vvdwtot          = 0
 156           dvdasum          = 0
 157
 158 C        Clear i atom forces
 159           fix1             = 0
 160           fiy1             = 0
 161           fiz1             = 0
 162
 163           do k=nj0,nj1
 164
 165 C          Get j neighbor index, and coordinate index
 166             jnr              = jjnr(k)+1
 167             j3               = 3*jnr-2
 168
 169 C          load j atom coordinates
 170             jx1              = pos(j3+0)
 171             jy1              = pos(j3+1)
 172             jz1              = pos(j3+2)
 173
 174 C          Calculate distance
 175             dx11             = ix1 - jx1
 176             dy11             = iy1 - jy1
 177             dz11             = iz1 - jz1
 178             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 179
 180 C          Calculate 1/r and 1/r2
 181
 182 C          PowerPC intrinsics 1/sqrt lookup table
 183 #ifndef GMX_BLUEGENE
 184             rinv11           = frsqrtes(rsq11)
 185 #else
 186             rinv11           = frsqrte(dble(rsq11))
 187 #endif
 188             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 189      &  *rinv11)))
 190 #ifdef GMX_DOUBLE
 191             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 192      &  *rinv11)))
 193 #endif
 194
 195 C          Load parameters for j atom
 196             isaj             = invsqrta(jnr)
 197             isaprod          = isai*isaj
 198             qq               = iq*charge(jnr)
 199             vcoul            = qq*rinv11
 200             fscal            = vcoul*rinv11
 201             qq               = isaprod*(-qq)
 202             gbscale          = isaprod*gbtabscale
 203             tj               = nti+3*type(jnr)+1
 204             c6               = vdwparam(tj)
 205             cexp1            = vdwparam(tj+1)
 206             cexp2            = vdwparam(tj+2)
 207             rinvsq           = rinv11*rinv11
 208
 209 C          Tabulated Generalized-Born interaction
 210             dvdaj            = dvda(jnr)
 211             r                = rsq11*rinv11
 212
 213 C          Calculate table index
 214             rt               = r*gbscale
 215             n0               = rt
 216             eps              = rt-n0
 217             eps2             = eps*eps
 218             nnn              = 4*n0+1
 219             Y                = GBtab(nnn)
 220             F                = GBtab(nnn+1)
 221             Geps             = eps*GBtab(nnn+2)
 222             Heps2            = eps2*GBtab(nnn+3)
 223             Fp               = F+Geps+Heps2
 224             VV               = Y+eps*Fp
 225             FF               = Fp+Geps+2.0*Heps2
 226             vgb              = qq*VV
 227             fijC             = qq*FF*gbscale
 228             dvdatmp          = -0.5*(vgb+fijC*r)
 229             dvdasum          = dvdasum + dvdatmp
 230             dvda(jnr)        = dvdaj+dvdatmp*isaj*isaj
 231             vctot            = vctot + vcoul
 232
 233 C          Buckingham interaction
 234             rinvsix          = rinvsq*rinvsq*rinvsq
 235             Vvdw6            = c6*rinvsix
 236             br               = cexp2*rsq11*rinv11
 237             Vvdwexp          = cexp1*exp(-br)
 238             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
 239             fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq
 240      &  -(fijC-fscal)*rinv11
 241
 242 C          Calculate temporary vectorial force
 243             tx               = fscal*dx11
 244             ty               = fscal*dy11
 245             tz               = fscal*dz11
 246
 247 C          Increment i atom force
 248             fix1             = fix1 + tx
 249             fiy1             = fiy1 + ty
 250             fiz1             = fiz1 + tz
 251
 252 C          Decrement j atom force
 253             faction(j3+0)    = faction(j3+0) - tx
 254             faction(j3+1)    = faction(j3+1) - ty
 255             faction(j3+2)    = faction(j3+2) - tz
 256
 257 C          Inner loop uses 89 flops/iteration
 258           end do
 259
 260
 261 C        Add i forces to mem and shifted force list
 262           faction(ii3+0)   = faction(ii3+0) + fix1
 263           faction(ii3+1)   = faction(ii3+1) + fiy1
 264           faction(ii3+2)   = faction(ii3+2) + fiz1
 265           fshift(is3)      = fshift(is3)+fix1
 266           fshift(is3+1)    = fshift(is3+1)+fiy1
 267           fshift(is3+2)    = fshift(is3+2)+fiz1
 268
 269 C        Add potential energies to the group for this list
 270           ggid             = gid(n)+1
 271           Vc(ggid)         = Vc(ggid) + vctot
 272           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 273           dvda(ii)         = dvda(ii) + dvdasum*isai*isai
 274
 275 C        Increment number of inner iterations
 276           ninner           = ninner + nj1 - nj0
 277
 278 C        Outer loop uses 13 flops/iteration
 279         end do
 280
 281
 282 C      Increment number of outer iterations
 283         nouter           = nouter + nn1 - nn0
 284       if(nn1.lt.nri) goto 10
 285
 286 C    Write outer/inner iteration count to pointers
 287       outeriter        = nouter
 288       inneriter        = ninner
 289       return
 290       end
 291
 292
 293
 294
 295
 296
 297 C
 298 C Gromacs nonbonded kernel pwr6kernel420nf
 299 C Coulomb interaction:     Generalized-Born
 300 C VdW interaction:         Buckingham
 301 C water optimization:      No
 302 C Calculate forces:        no
 303 C
 304       subroutine pwr6kernel420nf(
 305      &                          nri,
 306      &                          iinr,
 307      &                          jindex,
 308      &                          jjnr,
 309      &                          shift,
 310      &                          shiftvec,
 311      &                          fshift,
 312      &                          gid,
 313      &                          pos,
 314      &                          faction,
 315      &                          charge,
 316      &                          facel,
 317      &                          krf,
 318      &                          crf,
 319      &                          Vc,
 320      &                          type,
 321      &                          ntype,
 322      &                          vdwparam,
 323      &                          Vvdw,
 324      &                          tabscale,
 325      &                          VFtab,
 326      &                          invsqrta,
 327      &                          dvda,
 328      &                          gbtabscale,
 329      &                          GBtab,
 330      &                          nthreads,
 331      &                          count,
 332      &                          mtx,
 333      &                          outeriter,
 334      &                          inneriter,
 335      &                          work)
 336       implicit      none
 337       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
 338       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
 339       integer*4     gid(*),type(*),ntype
 340       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
 341       gmxreal       Vvdw(*),tabscale,VFtab(*)
 342       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
 343       integer*4     nthreads,count,mtx,outeriter,inneriter
 344       gmxreal       work(*)
 345
 346       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
 347       integer*4     nn0,nn1,nouter,ninner
 348       gmxreal       shX,shY,shZ
 349       gmxreal       rinvsq
 350       gmxreal       iq
 351       gmxreal       qq,vcoul,vctot
 352       integer*4     nti
 353       integer*4     tj
 354       gmxreal       rinvsix
 355       gmxreal       Vvdw6,Vvdwtot
 356       gmxreal       r,rt,eps,eps2
 357       integer*4     n0,nnn
 358       gmxreal       Y,F,Geps,Heps2,Fp,VV
 359       gmxreal       isai,isaj,isaprod,gbscale,vgb
 360       gmxreal       Vvdwexp,br
 361       gmxreal       ix1,iy1,iz1
 362       gmxreal       jx1,jy1,jz1
 363       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 364       gmxreal       c6,cexp1,cexp2
 365
 366
 367 C    Reset outer and inner iteration counters
 368       nouter           = 0
 369       ninner           = 0
 370
 371 C    Loop over thread workunits
 372    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 373         if(nn1.gt.nri) nn1=nri
 374
 375 C      Start outer loop over neighborlists
 376
 377         do n=nn0+1,nn1
 378
 379 C        Load shift vector for this list
 380           is3              = 3*shift(n)+1
 381           shX              = shiftvec(is3)
 382           shY              = shiftvec(is3+1)
 383           shZ              = shiftvec(is3+2)
 384
 385 C        Load limits for loop over neighbors
 386           nj0              = jindex(n)+1
 387           nj1              = jindex(n+1)
 388
 389 C        Get outer coordinate index
 390           ii               = iinr(n)+1
 391           ii3              = 3*ii-2
 392
 393 C        Load i atom data, add shift vector
 394           ix1              = shX + pos(ii3+0)
 395           iy1              = shY + pos(ii3+1)
 396           iz1              = shZ + pos(ii3+2)
 397
 398 C        Load parameters for i atom
 399           iq               = facel*charge(ii)
 400           isai             = invsqrta(ii)
 401           nti              = 3*ntype*type(ii)
 402
 403 C        Zero the potential energy for this list
 404           vctot            = 0
 405           Vvdwtot          = 0
 406
 407 C        Clear i atom forces
 408
 409           do k=nj0,nj1
 410
 411 C          Get j neighbor index, and coordinate index
 412             jnr              = jjnr(k)+1
 413             j3               = 3*jnr-2
 414
 415 C          load j atom coordinates
 416             jx1              = pos(j3+0)
 417             jy1              = pos(j3+1)
 418             jz1              = pos(j3+2)
 419
 420 C          Calculate distance
 421             dx11             = ix1 - jx1
 422             dy11             = iy1 - jy1
 423             dz11             = iz1 - jz1
 424             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 425
 426 C          Calculate 1/r and 1/r2
 427
 428 C          PowerPC intrinsics 1/sqrt lookup table
 429 #ifndef GMX_BLUEGENE
 430             rinv11           = frsqrtes(rsq11)
 431 #else
 432             rinv11           = frsqrte(dble(rsq11))
 433 #endif
 434             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 435      &  *rinv11)))
 436 #ifdef GMX_DOUBLE
 437             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 438      &  *rinv11)))
 439 #endif
 440
 441 C          Load parameters for j atom
 442             isaj             = invsqrta(jnr)
 443             isaprod          = isai*isaj
 444             qq               = iq*charge(jnr)
 445             vcoul            = qq*rinv11
 446             qq               = isaprod*(-qq)
 447             gbscale          = isaprod*gbtabscale
 448             tj               = nti+3*type(jnr)+1
 449             c6               = vdwparam(tj)
 450             cexp1            = vdwparam(tj+1)
 451             cexp2            = vdwparam(tj+2)
 452             rinvsq           = rinv11*rinv11
 453
 454 C          Tabulated Generalized-Born interaction
 455             r                = rsq11*rinv11
 456
 457 C          Calculate table index
 458             rt               = r*gbscale
 459             n0               = rt
 460             eps              = rt-n0
 461             eps2             = eps*eps
 462             nnn              = 4*n0+1
 463             Y                = GBtab(nnn)
 464             F                = GBtab(nnn+1)
 465             Geps             = eps*GBtab(nnn+2)
 466             Heps2            = eps2*GBtab(nnn+3)
 467             Fp               = F+Geps+Heps2
 468             VV               = Y+eps*Fp
 469             vgb              = qq*VV
 470             vctot            = vctot + vcoul
 471
 472 C          Buckingham interaction
 473             rinvsix          = rinvsq*rinvsq*rinvsq
 474             Vvdw6            = c6*rinvsix
 475             br               = cexp2*rsq11*rinv11
 476             Vvdwexp          = cexp1*exp(-br)
 477             Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6
 478
 479 C          Inner loop uses 65 flops/iteration
 480           end do
 481
 482
 483 C        Add i forces to mem and shifted force list
 484
 485 C        Add potential energies to the group for this list
 486           ggid             = gid(n)+1
 487           Vc(ggid)         = Vc(ggid) + vctot
 488           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 489
 490 C        Increment number of inner iterations
 491           ninner           = ninner + nj1 - nj0
 492
 493 C        Outer loop uses 6 flops/iteration
 494         end do
 495
 496
 497 C      Increment number of outer iterations
 498         nouter           = nouter + nn1 - nn0
 499       if(nn1.lt.nri) goto 10
 500
 501 C    Write outer/inner iteration count to pointers
 502       outeriter        = nouter
 503       inneriter        = ninner
 504       return
 505       end
 506
 507
 508