src/gmxlib/nonbonded/nb_kernel_power6/pwr6kernel410.F

   1 C
   2 C                This source code is part of
   3 C
   4 C                 G   R   O   M   A   C   S
   5 C
   6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7 C Copyright (c) 2001-2009, The GROMACS Development Team
   8 C
   9 C Gromacs is a library for molecular simulation and trajectory analysis,
  10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11 C a full list of developers and information, check out http://www.gromacs.org
  12 C
  13 C This program is free software; you can redistribute it and/or modify it under
  14 C the terms of the GNU Lesser General Public License as published by the Free
  15 C Software Foundation; either version 2 of the License, or (at your option) any
  16 C later version.
  17 C As a special exception, you may use this file as part of a free software
  18 C library without restriction.  Specifically, if other files instantiate
  19 C templates or use macros or inline functions from this file, or you compile
  20 C this file and link it with other files to produce an executable, this
  21 C file does not by itself cause the resulting executable to be covered by
  22 C the GNU Lesser General Public License.
  23 C
  24 C In plain-speak: do not worry about classes/macros/templates either - only
  25 C changes to the library have to be LGPL, not an application linking with it.
  26 C
  27 C To help fund GROMACS development, we humbly ask that you cite
  28 C the papers people have written on it - you can find them on the website!
  29 C
  30
  31 #ifdef HAVE_CONFIG_H
  32 #  include<config.h>
  33 #endif
  34
  35 #ifdef GMX_DOUBLE
  36 #  define gmxreal real*8
  37 #else
  38 #  define gmxreal real*4
  39 #endif
  40
  41
  42
  43 C
  44 C Gromacs nonbonded kernel pwr6kernel410
  45 C Coulomb interaction:     Generalized-Born
  46 C VdW interaction:         Lennard-Jones
  47 C water optimization:      No
  48 C Calculate forces:        yes
  49 C
  50       subroutine pwr6kernel410(
  51      &                          nri,
  52      &                          iinr,
  53      &                          jindex,
  54      &                          jjnr,
  55      &                          shift,
  56      &                          shiftvec,
  57      &                          fshift,
  58      &                          gid,
  59      &                          pos,
  60      &                          faction,
  61      &                          charge,
  62      &                          facel,
  63      &                          krf,
  64      &                          crf,
  65      &                          Vc,
  66      &                          type,
  67      &                          ntype,
  68      &                          vdwparam,
  69      &                          Vvdw,
  70      &                          tabscale,
  71      &                          VFtab,
  72      &                          invsqrta,
  73      &                          dvda,
  74      &                          gbtabscale,
  75      &                          GBtab,
  76      &                          nthreads,
  77      &                          count,
  78      &                          mtx,
  79      &                          outeriter,
  80      &                          inneriter,
  81      &                          work)
  82       implicit      none
  83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
  84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
  85       integer*4     gid(*),type(*),ntype
  86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
  87       gmxreal       Vvdw(*),tabscale,VFtab(*)
  88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
  89       integer*4     nthreads,count,mtx,outeriter,inneriter
  90       gmxreal       work(*)
  91
  92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
  93       integer*4     nn0,nn1,nouter,ninner
  94       gmxreal       shX,shY,shZ
  95       gmxreal       fscal,tx,ty,tz
  96       gmxreal       rinvsq
  97       gmxreal       iq
  98       gmxreal       qq,vcoul,vctot
  99       integer*4     nti
 100       integer*4     tj
 101       gmxreal       rinvsix
 102       gmxreal       Vvdw6,Vvdwtot
 103       gmxreal       Vvdw12
 104       gmxreal       r,rt,eps,eps2
 105       integer*4     n0,nnn
 106       gmxreal       Y,F,Geps,Heps2,Fp,VV
 107       gmxreal       FF
 108       gmxreal       fijC
 109       gmxreal       isai,isaj,isaprod,gbscale,vgb
 110       gmxreal       dvdasum,dvdatmp,dvdaj,fgb
 111       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
 112       gmxreal       jx1,jy1,jz1
 113       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 114       gmxreal       c6,c12
 115
 116
 117 C    Reset outer and inner iteration counters
 118       nouter           = 0
 119       ninner           = 0
 120
 121 C    Loop over thread workunits
 122    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 123         if(nn1.gt.nri) nn1=nri
 124
 125 C      Start outer loop over neighborlists
 126
 127         do n=nn0+1,nn1
 128
 129 C        Load shift vector for this list
 130           is3              = 3*shift(n)+1
 131           shX              = shiftvec(is3)
 132           shY              = shiftvec(is3+1)
 133           shZ              = shiftvec(is3+2)
 134
 135 C        Load limits for loop over neighbors
 136           nj0              = jindex(n)+1
 137           nj1              = jindex(n+1)
 138
 139 C        Get outer coordinate index
 140           ii               = iinr(n)+1
 141           ii3              = 3*ii-2
 142
 143 C        Load i atom data, add shift vector
 144           ix1              = shX + pos(ii3+0)
 145           iy1              = shY + pos(ii3+1)
 146           iz1              = shZ + pos(ii3+2)
 147
 148 C        Load parameters for i atom
 149           iq               = facel*charge(ii)
 150           isai             = invsqrta(ii)
 151           nti              = 2*ntype*type(ii)
 152
 153 C        Zero the potential energy for this list
 154           vctot            = 0
 155           Vvdwtot          = 0
 156           dvdasum          = 0
 157
 158 C        Clear i atom forces
 159           fix1             = 0
 160           fiy1             = 0
 161           fiz1             = 0
 162
 163           do k=nj0,nj1
 164
 165 C          Get j neighbor index, and coordinate index
 166             jnr              = jjnr(k)+1
 167             j3               = 3*jnr-2
 168
 169 C          load j atom coordinates
 170             jx1              = pos(j3+0)
 171             jy1              = pos(j3+1)
 172             jz1              = pos(j3+2)
 173
 174 C          Calculate distance
 175             dx11             = ix1 - jx1
 176             dy11             = iy1 - jy1
 177             dz11             = iz1 - jz1
 178             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 179
 180 C          Calculate 1/r and 1/r2
 181
 182 C          PowerPC intrinsics 1/sqrt lookup table
 183 #ifndef GMX_BLUEGENE
 184             rinv11           = frsqrtes(rsq11)
 185 #else
 186             rinv11           = frsqrte(dble(rsq11))
 187 #endif
 188             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 189      &  *rinv11)))
 190 #ifdef GMX_DOUBLE
 191             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 192      &  *rinv11)))
 193 #endif
 194
 195 C          Load parameters for j atom
 196             isaj             = invsqrta(jnr)
 197             isaprod          = isai*isaj
 198             qq               = iq*charge(jnr)
 199             vcoul            = qq*rinv11
 200             fscal            = vcoul*rinv11
 201             qq               = isaprod*(-qq)
 202             gbscale          = isaprod*gbtabscale
 203             tj               = nti+2*type(jnr)+1
 204             c6               = vdwparam(tj)
 205             c12              = vdwparam(tj+1)
 206             rinvsq           = rinv11*rinv11
 207
 208 C          Tabulated Generalized-Born interaction
 209             dvdaj            = dvda(jnr)
 210             r                = rsq11*rinv11
 211
 212 C          Calculate table index
 213             rt               = r*gbscale
 214             n0               = rt
 215             eps              = rt-n0
 216             eps2             = eps*eps
 217             nnn              = 4*n0+1
 218             Y                = GBtab(nnn)
 219             F                = GBtab(nnn+1)
 220             Geps             = eps*GBtab(nnn+2)
 221             Heps2            = eps2*GBtab(nnn+3)
 222             Fp               = F+Geps+Heps2
 223             VV               = Y+eps*Fp
 224             FF               = Fp+Geps+2.0*Heps2
 225             vgb              = qq*VV
 226             fijC             = qq*FF*gbscale
 227             dvdatmp          = -0.5*(vgb+fijC*r)
 228             dvdasum          = dvdasum + dvdatmp
 229             dvda(jnr)        = dvdaj+dvdatmp*isaj*isaj
 230             vctot            = vctot + vcoul
 231
 232 C          Lennard-Jones interaction
 233             rinvsix          = rinvsq*rinvsq*rinvsq
 234             Vvdw6            = c6*rinvsix
 235             Vvdw12           = c12*rinvsix*rinvsix
 236             Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6
 237             fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq
 238      &  -(fijC-fscal)*rinv11
 239
 240 C          Calculate temporary vectorial force
 241             tx               = fscal*dx11
 242             ty               = fscal*dy11
 243             tz               = fscal*dz11
 244
 245 C          Increment i atom force
 246             fix1             = fix1 + tx
 247             fiy1             = fiy1 + ty
 248             fiz1             = fiz1 + tz
 249
 250 C          Decrement j atom force
 251             faction(j3+0)    = faction(j3+0) - tx
 252             faction(j3+1)    = faction(j3+1) - ty
 253             faction(j3+2)    = faction(j3+2) - tz
 254
 255 C          Inner loop uses 63 flops/iteration
 256           end do
 257
 258
 259 C        Add i forces to mem and shifted force list
 260           faction(ii3+0)   = faction(ii3+0) + fix1
 261           faction(ii3+1)   = faction(ii3+1) + fiy1
 262           faction(ii3+2)   = faction(ii3+2) + fiz1
 263           fshift(is3)      = fshift(is3)+fix1
 264           fshift(is3+1)    = fshift(is3+1)+fiy1
 265           fshift(is3+2)    = fshift(is3+2)+fiz1
 266
 267 C        Add potential energies to the group for this list
 268           ggid             = gid(n)+1
 269           Vc(ggid)         = Vc(ggid) + vctot
 270           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 271           dvda(ii)         = dvda(ii) + dvdasum*isai*isai
 272
 273 C        Increment number of inner iterations
 274           ninner           = ninner + nj1 - nj0
 275
 276 C        Outer loop uses 13 flops/iteration
 277         end do
 278
 279
 280 C      Increment number of outer iterations
 281         nouter           = nouter + nn1 - nn0
 282       if(nn1.lt.nri) goto 10
 283
 284 C    Write outer/inner iteration count to pointers
 285       outeriter        = nouter
 286       inneriter        = ninner
 287       return
 288       end
 289
 290
 291
 292
 293
 294
 295 C
 296 C Gromacs nonbonded kernel pwr6kernel410nf
 297 C Coulomb interaction:     Generalized-Born
 298 C VdW interaction:         Lennard-Jones
 299 C water optimization:      No
 300 C Calculate forces:        no
 301 C
 302       subroutine pwr6kernel410nf(
 303      &                          nri,
 304      &                          iinr,
 305      &                          jindex,
 306      &                          jjnr,
 307      &                          shift,
 308      &                          shiftvec,
 309      &                          fshift,
 310      &                          gid,
 311      &                          pos,
 312      &                          faction,
 313      &                          charge,
 314      &                          facel,
 315      &                          krf,
 316      &                          crf,
 317      &                          Vc,
 318      &                          type,
 319      &                          ntype,
 320      &                          vdwparam,
 321      &                          Vvdw,
 322      &                          tabscale,
 323      &                          VFtab,
 324      &                          invsqrta,
 325      &                          dvda,
 326      &                          gbtabscale,
 327      &                          GBtab,
 328      &                          nthreads,
 329      &                          count,
 330      &                          mtx,
 331      &                          outeriter,
 332      &                          inneriter,
 333      &                          work)
 334       implicit      none
 335       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
 336       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
 337       integer*4     gid(*),type(*),ntype
 338       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
 339       gmxreal       Vvdw(*),tabscale,VFtab(*)
 340       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
 341       integer*4     nthreads,count,mtx,outeriter,inneriter
 342       gmxreal       work(*)
 343
 344       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
 345       integer*4     nn0,nn1,nouter,ninner
 346       gmxreal       shX,shY,shZ
 347       gmxreal       rinvsq
 348       gmxreal       iq
 349       gmxreal       qq,vcoul,vctot
 350       integer*4     nti
 351       integer*4     tj
 352       gmxreal       rinvsix
 353       gmxreal       Vvdw6,Vvdwtot
 354       gmxreal       Vvdw12
 355       gmxreal       r,rt,eps,eps2
 356       integer*4     n0,nnn
 357       gmxreal       Y,F,Geps,Heps2,Fp,VV
 358       gmxreal       isai,isaj,isaprod,gbscale,vgb
 359       gmxreal       ix1,iy1,iz1
 360       gmxreal       jx1,jy1,jz1
 361       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 362       gmxreal       c6,c12
 363
 364
 365 C    Reset outer and inner iteration counters
 366       nouter           = 0
 367       ninner           = 0
 368
 369 C    Loop over thread workunits
 370    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 371         if(nn1.gt.nri) nn1=nri
 372
 373 C      Start outer loop over neighborlists
 374
 375         do n=nn0+1,nn1
 376
 377 C        Load shift vector for this list
 378           is3              = 3*shift(n)+1
 379           shX              = shiftvec(is3)
 380           shY              = shiftvec(is3+1)
 381           shZ              = shiftvec(is3+2)
 382
 383 C        Load limits for loop over neighbors
 384           nj0              = jindex(n)+1
 385           nj1              = jindex(n+1)
 386
 387 C        Get outer coordinate index
 388           ii               = iinr(n)+1
 389           ii3              = 3*ii-2
 390
 391 C        Load i atom data, add shift vector
 392           ix1              = shX + pos(ii3+0)
 393           iy1              = shY + pos(ii3+1)
 394           iz1              = shZ + pos(ii3+2)
 395
 396 C        Load parameters for i atom
 397           iq               = facel*charge(ii)
 398           isai             = invsqrta(ii)
 399           nti              = 2*ntype*type(ii)
 400
 401 C        Zero the potential energy for this list
 402           vctot            = 0
 403           Vvdwtot          = 0
 404
 405 C        Clear i atom forces
 406
 407           do k=nj0,nj1
 408
 409 C          Get j neighbor index, and coordinate index
 410             jnr              = jjnr(k)+1
 411             j3               = 3*jnr-2
 412
 413 C          load j atom coordinates
 414             jx1              = pos(j3+0)
 415             jy1              = pos(j3+1)
 416             jz1              = pos(j3+2)
 417
 418 C          Calculate distance
 419             dx11             = ix1 - jx1
 420             dy11             = iy1 - jy1
 421             dz11             = iz1 - jz1
 422             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 423
 424 C          Calculate 1/r and 1/r2
 425
 426 C          PowerPC intrinsics 1/sqrt lookup table
 427 #ifndef GMX_BLUEGENE
 428             rinv11           = frsqrtes(rsq11)
 429 #else
 430             rinv11           = frsqrte(dble(rsq11))
 431 #endif
 432             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 433      &  *rinv11)))
 434 #ifdef GMX_DOUBLE
 435             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 436      &  *rinv11)))
 437 #endif
 438
 439 C          Load parameters for j atom
 440             isaj             = invsqrta(jnr)
 441             isaprod          = isai*isaj
 442             qq               = iq*charge(jnr)
 443             vcoul            = qq*rinv11
 444             qq               = isaprod*(-qq)
 445             gbscale          = isaprod*gbtabscale
 446             tj               = nti+2*type(jnr)+1
 447             c6               = vdwparam(tj)
 448             c12              = vdwparam(tj+1)
 449             rinvsq           = rinv11*rinv11
 450
 451 C          Tabulated Generalized-Born interaction
 452             r                = rsq11*rinv11
 453
 454 C          Calculate table index
 455             rt               = r*gbscale
 456             n0               = rt
 457             eps              = rt-n0
 458             eps2             = eps*eps
 459             nnn              = 4*n0+1
 460             Y                = GBtab(nnn)
 461             F                = GBtab(nnn+1)
 462             Geps             = eps*GBtab(nnn+2)
 463             Heps2            = eps2*GBtab(nnn+3)
 464             Fp               = F+Geps+Heps2
 465             VV               = Y+eps*Fp
 466             vgb              = qq*VV
 467             vctot            = vctot + vcoul
 468
 469 C          Lennard-Jones interaction
 470             rinvsix          = rinvsq*rinvsq*rinvsq
 471             Vvdw6            = c6*rinvsix
 472             Vvdw12           = c12*rinvsix*rinvsix
 473             Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6
 474
 475 C          Inner loop uses 38 flops/iteration
 476           end do
 477
 478
 479 C        Add i forces to mem and shifted force list
 480
 481 C        Add potential energies to the group for this list
 482           ggid             = gid(n)+1
 483           Vc(ggid)         = Vc(ggid) + vctot
 484           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 485
 486 C        Increment number of inner iterations
 487           ninner           = ninner + nj1 - nj0
 488
 489 C        Outer loop uses 6 flops/iteration
 490         end do
 491
 492
 493 C      Increment number of outer iterations
 494         nouter           = nouter + nn1 - nn0
 495       if(nn1.lt.nri) goto 10
 496
 497 C    Write outer/inner iteration count to pointers
 498       outeriter        = nouter
 499       inneriter        = ninner
 500       return
 501       end
 502
 503
 504