src/gmxlib/nonbonded/nb_kernel_power6/pwr6kernel430.F

   1 C
   2 C                This source code is part of
   3 C
   4 C                 G   R   O   M   A   C   S
   5 C
   6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7 C Copyright (c) 2001-2009, The GROMACS Development Team
   8 C
   9 C Gromacs is a library for molecular simulation and trajectory analysis,
  10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11 C a full list of developers and information, check out http://www.gromacs.org
  12 C
  13 C This program is free software; you can redistribute it and/or modify it under
  14 C the terms of the GNU Lesser General Public License as published by the Free
  15 C Software Foundation; either version 2 of the License, or (at your option) any
  16 C later version.
  17 C As a special exception, you may use this file as part of a free software
  18 C library without restriction.  Specifically, if other files instantiate
  19 C templates or use macros or inline functions from this file, or you compile
  20 C this file and link it with other files to produce an executable, this
  21 C file does not by itself cause the resulting executable to be covered by
  22 C the GNU Lesser General Public License.
  23 C
  24 C In plain-speak: do not worry about classes/macros/templates either - only
  25 C changes to the library have to be LGPL, not an application linking with it.
  26 C
  27 C To help fund GROMACS development, we humbly ask that you cite
  28 C the papers people have written on it - you can find them on the website!
  29 C
  30
  31 #ifdef HAVE_CONFIG_H
  32 #  include<config.h>
  33 #endif
  34
  35 #ifdef GMX_DOUBLE
  36 #  define gmxreal real*8
  37 #else
  38 #  define gmxreal real*4
  39 #endif
  40
  41
  42
  43 C
  44 C Gromacs nonbonded kernel pwr6kernel430
  45 C Coulomb interaction:     Generalized-Born
  46 C VdW interaction:         Tabulated
  47 C water optimization:      No
  48 C Calculate forces:        yes
  49 C
  50       subroutine pwr6kernel430(
  51      &                          nri,
  52      &                          iinr,
  53      &                          jindex,
  54      &                          jjnr,
  55      &                          shift,
  56      &                          shiftvec,
  57      &                          fshift,
  58      &                          gid,
  59      &                          pos,
  60      &                          faction,
  61      &                          charge,
  62      &                          facel,
  63      &                          krf,
  64      &                          crf,
  65      &                          Vc,
  66      &                          type,
  67      &                          ntype,
  68      &                          vdwparam,
  69      &                          Vvdw,
  70      &                          tabscale,
  71      &                          VFtab,
  72      &                          invsqrta,
  73      &                          dvda,
  74      &                          gbtabscale,
  75      &                          GBtab,
  76      &                          nthreads,
  77      &                          count,
  78      &                          mtx,
  79      &                          outeriter,
  80      &                          inneriter,
  81      &                          work)
  82       implicit      none
  83       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
  84       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
  85       integer*4     gid(*),type(*),ntype
  86       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
  87       gmxreal       Vvdw(*),tabscale,VFtab(*)
  88       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
  89       integer*4     nthreads,count,mtx,outeriter,inneriter
  90       gmxreal       work(*)
  91
  92       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
  93       integer*4     nn0,nn1,nouter,ninner
  94       gmxreal       shX,shY,shZ
  95       gmxreal       fscal,tx,ty,tz
  96       gmxreal       iq
  97       gmxreal       qq,vcoul,vctot
  98       integer*4     nti
  99       integer*4     tj
 100       gmxreal       Vvdw6,Vvdwtot
 101       gmxreal       Vvdw12
 102       gmxreal       r,rt,eps,eps2
 103       integer*4     n0,nnn
 104       gmxreal       Y,F,Geps,Heps2,Fp,VV
 105       gmxreal       FF
 106       gmxreal       fijC
 107       gmxreal       fijD,fijR
 108       gmxreal       isai,isaj,isaprod,gbscale,vgb
 109       gmxreal       dvdasum,dvdatmp,dvdaj,fgb
 110       gmxreal       ix1,iy1,iz1,fix1,fiy1,fiz1
 111       gmxreal       jx1,jy1,jz1
 112       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 113       gmxreal       c6,c12
 114
 115
 116 C    Reset outer and inner iteration counters
 117       nouter           = 0
 118       ninner           = 0
 119
 120 C    Loop over thread workunits
 121    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 122         if(nn1.gt.nri) nn1=nri
 123
 124 C      Start outer loop over neighborlists
 125
 126         do n=nn0+1,nn1
 127
 128 C        Load shift vector for this list
 129           is3              = 3*shift(n)+1
 130           shX              = shiftvec(is3)
 131           shY              = shiftvec(is3+1)
 132           shZ              = shiftvec(is3+2)
 133
 134 C        Load limits for loop over neighbors
 135           nj0              = jindex(n)+1
 136           nj1              = jindex(n+1)
 137
 138 C        Get outer coordinate index
 139           ii               = iinr(n)+1
 140           ii3              = 3*ii-2
 141
 142 C        Load i atom data, add shift vector
 143           ix1              = shX + pos(ii3+0)
 144           iy1              = shY + pos(ii3+1)
 145           iz1              = shZ + pos(ii3+2)
 146
 147 C        Load parameters for i atom
 148           iq               = facel*charge(ii)
 149           isai             = invsqrta(ii)
 150           nti              = 2*ntype*type(ii)
 151
 152 C        Zero the potential energy for this list
 153           vctot            = 0
 154           Vvdwtot          = 0
 155           dvdasum          = 0
 156
 157 C        Clear i atom forces
 158           fix1             = 0
 159           fiy1             = 0
 160           fiz1             = 0
 161
 162           do k=nj0,nj1
 163
 164 C          Get j neighbor index, and coordinate index
 165             jnr              = jjnr(k)+1
 166             j3               = 3*jnr-2
 167
 168 C          load j atom coordinates
 169             jx1              = pos(j3+0)
 170             jy1              = pos(j3+1)
 171             jz1              = pos(j3+2)
 172
 173 C          Calculate distance
 174             dx11             = ix1 - jx1
 175             dy11             = iy1 - jy1
 176             dz11             = iz1 - jz1
 177             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 178
 179 C          Calculate 1/r and 1/r2
 180
 181 C          PowerPC intrinsics 1/sqrt lookup table
 182 #ifndef GMX_BLUEGENE
 183             rinv11           = frsqrtes(rsq11)
 184 #else
 185             rinv11           = frsqrte(dble(rsq11))
 186 #endif
 187             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 188      &  *rinv11)))
 189 #ifdef GMX_DOUBLE
 190             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 191      &  *rinv11)))
 192 #endif
 193
 194 C          Load parameters for j atom
 195             isaj             = invsqrta(jnr)
 196             isaprod          = isai*isaj
 197             qq               = iq*charge(jnr)
 198             vcoul            = qq*rinv11
 199             fscal            = vcoul*rinv11
 200             qq               = isaprod*(-qq)
 201             gbscale          = isaprod*gbtabscale
 202             tj               = nti+2*type(jnr)+1
 203             c6               = vdwparam(tj)
 204             c12              = vdwparam(tj+1)
 205
 206 C          Tabulated Generalized-Born interaction
 207             dvdaj            = dvda(jnr)
 208             r                = rsq11*rinv11
 209
 210 C          Calculate table index
 211             rt               = r*gbscale
 212             n0               = rt
 213             eps              = rt-n0
 214             eps2             = eps*eps
 215             nnn              = 4*n0+1
 216             Y                = GBtab(nnn)
 217             F                = GBtab(nnn+1)
 218             Geps             = eps*GBtab(nnn+2)
 219             Heps2            = eps2*GBtab(nnn+3)
 220             Fp               = F+Geps+Heps2
 221             VV               = Y+eps*Fp
 222             FF               = Fp+Geps+2.0*Heps2
 223             vgb              = qq*VV
 224             fijC             = qq*FF*gbscale
 225             dvdatmp          = -0.5*(vgb+fijC*r)
 226             dvdasum          = dvdasum + dvdatmp
 227             dvda(jnr)        = dvdaj+dvdatmp*isaj*isaj
 228             vctot            = vctot + vcoul
 229
 230 C          Calculate table index
 231             r                = rsq11*rinv11
 232
 233 C          Calculate table index
 234             rt               = r*tabscale
 235             n0               = rt
 236             eps              = rt-n0
 237             eps2             = eps*eps
 238             nnn              = 8*n0+1
 239
 240 C          Tabulated VdW interaction - dispersion
 241             Y                = VFtab(nnn)
 242             F                = VFtab(nnn+1)
 243             Geps             = eps*VFtab(nnn+2)
 244             Heps2            = eps2*VFtab(nnn+3)
 245             Fp               = F+Geps+Heps2
 246             VV               = Y+eps*Fp
 247             FF               = Fp+Geps+2.0*Heps2
 248             Vvdw6            = c6*VV
 249             fijD             = c6*FF
 250
 251 C          Tabulated VdW interaction - repulsion
 252             nnn              = nnn+4
 253             Y                = VFtab(nnn)
 254             F                = VFtab(nnn+1)
 255             Geps             = eps*VFtab(nnn+2)
 256             Heps2            = eps2*VFtab(nnn+3)
 257             Fp               = F+Geps+Heps2
 258             VV               = Y+eps*Fp
 259             FF               = Fp+Geps+2.0*Heps2
 260             Vvdw12           = c12*VV
 261             fijR             = c12*FF
 262             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
 263             fscal            = -((fijD+fijR)*tabscale+fijC-fscal)
 264      &  *rinv11
 265
 266 C          Calculate temporary vectorial force
 267             tx               = fscal*dx11
 268             ty               = fscal*dy11
 269             tz               = fscal*dz11
 270
 271 C          Increment i atom force
 272             fix1             = fix1 + tx
 273             fiy1             = fiy1 + ty
 274             fiz1             = fiz1 + tz
 275
 276 C          Decrement j atom force
 277             faction(j3+0)    = faction(j3+0) - tx
 278             faction(j3+1)    = faction(j3+1) - ty
 279             faction(j3+2)    = faction(j3+2) - tz
 280
 281 C          Inner loop uses 81 flops/iteration
 282           end do
 283
 284
 285 C        Add i forces to mem and shifted force list
 286           faction(ii3+0)   = faction(ii3+0) + fix1
 287           faction(ii3+1)   = faction(ii3+1) + fiy1
 288           faction(ii3+2)   = faction(ii3+2) + fiz1
 289           fshift(is3)      = fshift(is3)+fix1
 290           fshift(is3+1)    = fshift(is3+1)+fiy1
 291           fshift(is3+2)    = fshift(is3+2)+fiz1
 292
 293 C        Add potential energies to the group for this list
 294           ggid             = gid(n)+1
 295           Vc(ggid)         = Vc(ggid) + vctot
 296           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 297           dvda(ii)         = dvda(ii) + dvdasum*isai*isai
 298
 299 C        Increment number of inner iterations
 300           ninner           = ninner + nj1 - nj0
 301
 302 C        Outer loop uses 13 flops/iteration
 303         end do
 304
 305
 306 C      Increment number of outer iterations
 307         nouter           = nouter + nn1 - nn0
 308       if(nn1.lt.nri) goto 10
 309
 310 C    Write outer/inner iteration count to pointers
 311       outeriter        = nouter
 312       inneriter        = ninner
 313       return
 314       end
 315
 316
 317
 318
 319
 320
 321 C
 322 C Gromacs nonbonded kernel pwr6kernel430nf
 323 C Coulomb interaction:     Generalized-Born
 324 C VdW interaction:         Tabulated
 325 C water optimization:      No
 326 C Calculate forces:        no
 327 C
 328       subroutine pwr6kernel430nf(
 329      &                          nri,
 330      &                          iinr,
 331      &                          jindex,
 332      &                          jjnr,
 333      &                          shift,
 334      &                          shiftvec,
 335      &                          fshift,
 336      &                          gid,
 337      &                          pos,
 338      &                          faction,
 339      &                          charge,
 340      &                          facel,
 341      &                          krf,
 342      &                          crf,
 343      &                          Vc,
 344      &                          type,
 345      &                          ntype,
 346      &                          vdwparam,
 347      &                          Vvdw,
 348      &                          tabscale,
 349      &                          VFtab,
 350      &                          invsqrta,
 351      &                          dvda,
 352      &                          gbtabscale,
 353      &                          GBtab,
 354      &                          nthreads,
 355      &                          count,
 356      &                          mtx,
 357      &                          outeriter,
 358      &                          inneriter,
 359      &                          work)
 360       implicit      none
 361       integer*4     nri,iinr(*),jindex(*),jjnr(*),shift(*)
 362       gmxreal       shiftvec(*),fshift(*),pos(*),faction(*)
 363       integer*4     gid(*),type(*),ntype
 364       gmxreal       charge(*),facel,krf,crf,Vc(*),vdwparam(*)
 365       gmxreal       Vvdw(*),tabscale,VFtab(*)
 366       gmxreal       invsqrta(*),dvda(*),gbtabscale,GBtab(*)
 367       integer*4     nthreads,count,mtx,outeriter,inneriter
 368       gmxreal       work(*)
 369
 370       integer*4     n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
 371       integer*4     nn0,nn1,nouter,ninner
 372       gmxreal       shX,shY,shZ
 373       gmxreal       iq
 374       gmxreal       qq,vcoul,vctot
 375       integer*4     nti
 376       integer*4     tj
 377       gmxreal       Vvdw6,Vvdwtot
 378       gmxreal       Vvdw12
 379       gmxreal       r,rt,eps,eps2
 380       integer*4     n0,nnn
 381       gmxreal       Y,F,Geps,Heps2,Fp,VV
 382       gmxreal       isai,isaj,isaprod,gbscale,vgb
 383       gmxreal       ix1,iy1,iz1
 384       gmxreal       jx1,jy1,jz1
 385       gmxreal       dx11,dy11,dz11,rsq11,rinv11
 386       gmxreal       c6,c12
 387
 388
 389 C    Reset outer and inner iteration counters
 390       nouter           = 0
 391       ninner           = 0
 392
 393 C    Loop over thread workunits
 394    10 call pwr6kernelsync(mtx,count,nri,nthreads,nn0,nn1)
 395         if(nn1.gt.nri) nn1=nri
 396
 397 C      Start outer loop over neighborlists
 398
 399         do n=nn0+1,nn1
 400
 401 C        Load shift vector for this list
 402           is3              = 3*shift(n)+1
 403           shX              = shiftvec(is3)
 404           shY              = shiftvec(is3+1)
 405           shZ              = shiftvec(is3+2)
 406
 407 C        Load limits for loop over neighbors
 408           nj0              = jindex(n)+1
 409           nj1              = jindex(n+1)
 410
 411 C        Get outer coordinate index
 412           ii               = iinr(n)+1
 413           ii3              = 3*ii-2
 414
 415 C        Load i atom data, add shift vector
 416           ix1              = shX + pos(ii3+0)
 417           iy1              = shY + pos(ii3+1)
 418           iz1              = shZ + pos(ii3+2)
 419
 420 C        Load parameters for i atom
 421           iq               = facel*charge(ii)
 422           isai             = invsqrta(ii)
 423           nti              = 2*ntype*type(ii)
 424
 425 C        Zero the potential energy for this list
 426           vctot            = 0
 427           Vvdwtot          = 0
 428
 429 C        Clear i atom forces
 430
 431           do k=nj0,nj1
 432
 433 C          Get j neighbor index, and coordinate index
 434             jnr              = jjnr(k)+1
 435             j3               = 3*jnr-2
 436
 437 C          load j atom coordinates
 438             jx1              = pos(j3+0)
 439             jy1              = pos(j3+1)
 440             jz1              = pos(j3+2)
 441
 442 C          Calculate distance
 443             dx11             = ix1 - jx1
 444             dy11             = iy1 - jy1
 445             dz11             = iz1 - jz1
 446             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11
 447
 448 C          Calculate 1/r and 1/r2
 449
 450 C          PowerPC intrinsics 1/sqrt lookup table
 451 #ifndef GMX_BLUEGENE
 452             rinv11           = frsqrtes(rsq11)
 453 #else
 454             rinv11           = frsqrte(dble(rsq11))
 455 #endif
 456             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 457      &  *rinv11)))
 458 #ifdef GMX_DOUBLE
 459             rinv11           = (0.5*rinv11*(3.0-((rsq11*rinv11)
 460      &  *rinv11)))
 461 #endif
 462
 463 C          Load parameters for j atom
 464             isaj             = invsqrta(jnr)
 465             isaprod          = isai*isaj
 466             qq               = iq*charge(jnr)
 467             vcoul            = qq*rinv11
 468             qq               = isaprod*(-qq)
 469             gbscale          = isaprod*gbtabscale
 470             tj               = nti+2*type(jnr)+1
 471             c6               = vdwparam(tj)
 472             c12              = vdwparam(tj+1)
 473
 474 C          Tabulated Generalized-Born interaction
 475             r                = rsq11*rinv11
 476
 477 C          Calculate table index
 478             rt               = r*gbscale
 479             n0               = rt
 480             eps              = rt-n0
 481             eps2             = eps*eps
 482             nnn              = 4*n0+1
 483             Y                = GBtab(nnn)
 484             F                = GBtab(nnn+1)
 485             Geps             = eps*GBtab(nnn+2)
 486             Heps2            = eps2*GBtab(nnn+3)
 487             Fp               = F+Geps+Heps2
 488             VV               = Y+eps*Fp
 489             vgb              = qq*VV
 490             vctot            = vctot + vcoul
 491
 492 C          Calculate table index
 493             r                = rsq11*rinv11
 494
 495 C          Calculate table index
 496             rt               = r*tabscale
 497             n0               = rt
 498             eps              = rt-n0
 499             eps2             = eps*eps
 500             nnn              = 8*n0+1
 501
 502 C          Tabulated VdW interaction - dispersion
 503             Y                = VFtab(nnn)
 504             F                = VFtab(nnn+1)
 505             Geps             = eps*VFtab(nnn+2)
 506             Heps2            = eps2*VFtab(nnn+3)
 507             Fp               = F+Geps+Heps2
 508             VV               = Y+eps*Fp
 509             Vvdw6            = c6*VV
 510
 511 C          Tabulated VdW interaction - repulsion
 512             nnn              = nnn+4
 513             Y                = VFtab(nnn)
 514             F                = VFtab(nnn+1)
 515             Geps             = eps*VFtab(nnn+2)
 516             Heps2            = eps2*VFtab(nnn+3)
 517             Fp               = F+Geps+Heps2
 518             VV               = Y+eps*Fp
 519             Vvdw12           = c12*VV
 520             Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12
 521
 522 C          Inner loop uses 50 flops/iteration
 523           end do
 524
 525
 526 C        Add i forces to mem and shifted force list
 527
 528 C        Add potential energies to the group for this list
 529           ggid             = gid(n)+1
 530           Vc(ggid)         = Vc(ggid) + vctot
 531           Vvdw(ggid)       = Vvdw(ggid) + Vvdwtot
 532
 533 C        Increment number of inner iterations
 534           ninner           = ninner + nj1 - nj0
 535
 536 C        Outer loop uses 6 flops/iteration
 537         end do
 538
 539
 540 C      Increment number of outer iterations
 541         nouter           = nouter + nn1 - nn0
 542       if(nn1.lt.nri) goto 10
 543
 544 C    Write outer/inner iteration count to pointers
 545       outeriter        = nouter
 546       inneriter        = ninner
 547       return
 548       end
 549
 550
 551