src/gromacs/mdlib/update.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "update.h"
  40
  41 #include <math.h>
  42 #include <stdio.h>
  43
  44 #include <algorithm>
  45
  46 #include "gromacs/domdec/domdec_struct.h"
  47 #include "gromacs/fileio/confio.h"
  48 #include "gromacs/gmxlib/network.h"
  49 #include "gromacs/gmxlib/nrnb.h"
  50 #include "gromacs/listed-forces/disre.h"
  51 #include "gromacs/listed-forces/orires.h"
  52 #include "gromacs/math/functions.h"
  53 #include "gromacs/math/invertmatrix.h"
  54 #include "gromacs/math/units.h"
  55 #include "gromacs/math/vec.h"
  56 #include "gromacs/math/vecdump.h"
  57 #include "gromacs/mdlib/constr.h"
  58 #include "gromacs/mdlib/force.h"
  59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  60 #include "gromacs/mdlib/mdrun.h"
  61 #include "gromacs/mdlib/sim_util.h"
  62 #include "gromacs/mdlib/tgroup.h"
  63 #include "gromacs/mdtypes/commrec.h"
  64 #include "gromacs/mdtypes/group.h"
  65 #include "gromacs/mdtypes/inputrec.h"
  66 #include "gromacs/mdtypes/md_enums.h"
  67 #include "gromacs/pbcutil/boxutilities.h"
  68 #include "gromacs/pbcutil/mshift.h"
  69 #include "gromacs/pbcutil/pbc.h"
  70 #include "gromacs/pulling/pull.h"
  71 #include "gromacs/random/tabulatednormaldistribution.h"
  72 #include "gromacs/random/threefry.h"
  73 #include "gromacs/timing/wallcycle.h"
  74 #include "gromacs/utility/exceptions.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/futil.h"
  77 #include "gromacs/utility/gmxassert.h"
  78 #include "gromacs/utility/gmxomp.h"
  79 #include "gromacs/utility/smalloc.h"
  80
  81 /*For debugging, start at v(-dt/2) for velolcity verlet -- uncomment next line */
  82 /*#define STARTFROMDT2*/
  83
  84 typedef struct {
  85     double em;
  86 } gmx_sd_const_t;
  87
  88 typedef struct {
  89     real V;
  90 } gmx_sd_sigma_t;
  91
  92 typedef struct {
  93     /* BD stuff */
  94     real           *bd_rf;
  95     /* SD stuff */
  96     gmx_sd_const_t *sdc;
  97     gmx_sd_sigma_t *sdsig;
  98     /* andersen temperature control stuff */
  99     gmx_bool       *randomize_group;
 100     real           *boltzfac;
 101 } gmx_stochd_t;
 102
 103 struct gmx_update_t
 104 {
 105     gmx_stochd_t *sd;
 106     /* xprime for constraint algorithms */
 107     rvec         *xp;
 108     int           xp_nalloc;
 109
 110     /* Variables for the deform algorithm */
 111     gmx_int64_t     deformref_step;
 112     matrix          deformref_box;
 113 };
 114
 115
 116 static void do_update_md(int start, int nrend,
 117                          double dt, int nstpcouple,
 118                          t_grp_tcstat *tcstat,
 119                          double nh_vxi[],
 120                          gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
 121                          ivec nFreeze[],
 122                          real invmass[],
 123                          unsigned short ptype[], unsigned short cFREEZE[],
 124                          unsigned short cACC[], unsigned short cTC[],
 125                          rvec x[], rvec xprime[], rvec v[],
 126                          rvec f[], matrix M,
 127                          gmx_bool bNH, gmx_bool bPR)
 128 {
 129     double imass, w_dt;
 130     int    gf = 0, ga = 0, gt = 0;
 131     rvec   vrel;
 132     real   vn, vv, va, vb, vnrel;
 133     real   lg, vxi = 0, u;
 134     int    n, d;
 135
 136     if (bNH || bPR)
 137     {
 138         /* Update with coupling to extended ensembles, used for
 139          * Nose-Hoover and Parrinello-Rahman coupling
 140          * Nose-Hoover uses the reversible leap-frog integrator from
 141          * Holian et al. Phys Rev E 52(3) : 2338, 1995
 142          */
 143         for (n = start; n < nrend; n++)
 144         {
 145             imass = invmass[n];
 146             if (cFREEZE)
 147             {
 148                 gf   = cFREEZE[n];
 149             }
 150             if (cACC)
 151             {
 152                 ga   = cACC[n];
 153             }
 154             if (cTC)
 155             {
 156                 gt   = cTC[n];
 157             }
 158             lg   = tcstat[gt].lambda;
 159             if (bNH)
 160             {
 161                 vxi   = nh_vxi[gt];
 162             }
 163             rvec_sub(v[n], gstat[ga].u, vrel);
 164
 165             for (d = 0; d < DIM; d++)
 166             {
 167                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 168                 {
 169                     vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 170                                               - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 171                     /* do not scale the mean velocities u */
 172                     vn             = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
 173                     v[n][d]        = vn;
 174                     xprime[n][d]   = x[n][d]+vn*dt;
 175                 }
 176                 else
 177                 {
 178                     v[n][d]        = 0.0;
 179                     xprime[n][d]   = x[n][d];
 180                 }
 181             }
 182         }
 183     }
 184     else if (cFREEZE != NULL ||
 185              nFreeze[0][XX] || nFreeze[0][YY] || nFreeze[0][ZZ] ||
 186              bNEMD)
 187     {
 188         /* Update with Berendsen/v-rescale coupling and freeze or NEMD */
 189         for (n = start; n < nrend; n++)
 190         {
 191             w_dt = invmass[n]*dt;
 192             if (cFREEZE)
 193             {
 194                 gf   = cFREEZE[n];
 195             }
 196             if (cACC)
 197             {
 198                 ga   = cACC[n];
 199             }
 200             if (cTC)
 201             {
 202                 gt   = cTC[n];
 203             }
 204             lg   = tcstat[gt].lambda;
 205
 206             for (d = 0; d < DIM; d++)
 207             {
 208                 vn             = v[n][d];
 209                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 210                 {
 211                     vv             = lg*vn + f[n][d]*w_dt;
 212
 213                     /* do not scale the mean velocities u */
 214                     u              = gstat[ga].u[d];
 215                     va             = vv + accel[ga][d]*dt;
 216                     vb             = va + (1.0-lg)*u;
 217                     v[n][d]        = vb;
 218                     xprime[n][d]   = x[n][d]+vb*dt;
 219                 }
 220                 else
 221                 {
 222                     v[n][d]        = 0.0;
 223                     xprime[n][d]   = x[n][d];
 224                 }
 225             }
 226         }
 227     }
 228     else
 229     {
 230         /* Plain update with Berendsen/v-rescale coupling */
 231         for (n = start; n < nrend; n++)
 232         {
 233             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 234             {
 235                 w_dt = invmass[n]*dt;
 236                 if (cTC)
 237                 {
 238                     gt = cTC[n];
 239                 }
 240                 lg = tcstat[gt].lambda;
 241
 242                 for (d = 0; d < DIM; d++)
 243                 {
 244                     vn           = lg*v[n][d] + f[n][d]*w_dt;
 245                     v[n][d]      = vn;
 246                     xprime[n][d] = x[n][d] + vn*dt;
 247                 }
 248             }
 249             else
 250             {
 251                 for (d = 0; d < DIM; d++)
 252                 {
 253                     v[n][d]        = 0.0;
 254                     xprime[n][d]   = x[n][d];
 255                 }
 256             }
 257         }
 258     }
 259 }
 260
 261 static void do_update_vv_vel(int start, int nrend, double dt,
 262                              rvec accel[], ivec nFreeze[], real invmass[],
 263                              unsigned short ptype[], unsigned short cFREEZE[],
 264                              unsigned short cACC[], rvec v[], rvec f[],
 265                              gmx_bool bExtended, real veta, real alpha)
 266 {
 267     double w_dt;
 268     int    gf = 0, ga = 0;
 269     int    n, d;
 270     double g, mv1, mv2;
 271
 272     if (bExtended)
 273     {
 274         g        = 0.25*dt*veta*alpha;
 275         mv1      = exp(-g);
 276         mv2      = gmx::series_sinhx(g);
 277     }
 278     else
 279     {
 280         mv1      = 1.0;
 281         mv2      = 1.0;
 282     }
 283     for (n = start; n < nrend; n++)
 284     {
 285         w_dt = invmass[n]*dt;
 286         if (cFREEZE)
 287         {
 288             gf   = cFREEZE[n];
 289         }
 290         if (cACC)
 291         {
 292             ga   = cACC[n];
 293         }
 294
 295         for (d = 0; d < DIM; d++)
 296         {
 297             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 298             {
 299                 v[n][d]             = mv1*(mv1*v[n][d] + 0.5*(w_dt*mv2*f[n][d]))+0.5*accel[ga][d]*dt;
 300             }
 301             else
 302             {
 303                 v[n][d]        = 0.0;
 304             }
 305         }
 306     }
 307 } /* do_update_vv_vel */
 308
 309 static void do_update_vv_pos(int start, int nrend, double dt,
 310                              ivec nFreeze[],
 311                              unsigned short ptype[], unsigned short cFREEZE[],
 312                              rvec x[], rvec xprime[], rvec v[],
 313                              gmx_bool bExtended, real veta)
 314 {
 315     int    gf = 0;
 316     int    n, d;
 317     double g, mr1, mr2;
 318
 319     /* Would it make more sense if Parrinello-Rahman was put here? */
 320     if (bExtended)
 321     {
 322         g        = 0.5*dt*veta;
 323         mr1      = exp(g);
 324         mr2      = gmx::series_sinhx(g);
 325     }
 326     else
 327     {
 328         mr1      = 1.0;
 329         mr2      = 1.0;
 330     }
 331
 332     for (n = start; n < nrend; n++)
 333     {
 334
 335         if (cFREEZE)
 336         {
 337             gf   = cFREEZE[n];
 338         }
 339
 340         for (d = 0; d < DIM; d++)
 341         {
 342             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 343             {
 344                 xprime[n][d]   = mr1*(mr1*x[n][d]+mr2*dt*v[n][d]);
 345             }
 346             else
 347             {
 348                 xprime[n][d]   = x[n][d];
 349             }
 350         }
 351     }
 352 } /* do_update_vv_pos */
 353
 354 static void do_update_visc(int start, int nrend,
 355                            double dt, int nstpcouple,
 356                            t_grp_tcstat *tcstat,
 357                            double nh_vxi[],
 358                            real invmass[],
 359                            unsigned short ptype[], unsigned short cTC[],
 360                            rvec x[], rvec xprime[], rvec v[],
 361                            rvec f[], matrix M, matrix box, real
 362                            cos_accel, real vcos,
 363                            gmx_bool bNH, gmx_bool bPR)
 364 {
 365     double imass, w_dt;
 366     int    gt = 0;
 367     real   vn, vc;
 368     real   lg, vxi = 0, vv;
 369     real   fac, cosz;
 370     rvec   vrel;
 371     int    n, d;
 372
 373     fac = 2*M_PI/(box[ZZ][ZZ]);
 374
 375     if (bNH || bPR)
 376     {
 377         /* Update with coupling to extended ensembles, used for
 378          * Nose-Hoover and Parrinello-Rahman coupling
 379          */
 380         for (n = start; n < nrend; n++)
 381         {
 382             imass = invmass[n];
 383             if (cTC)
 384             {
 385                 gt   = cTC[n];
 386             }
 387             lg   = tcstat[gt].lambda;
 388             cosz = cos(fac*x[n][ZZ]);
 389
 390             copy_rvec(v[n], vrel);
 391
 392             vc            = cosz*vcos;
 393             vrel[XX]     -= vc;
 394             if (bNH)
 395             {
 396                 vxi        = nh_vxi[gt];
 397             }
 398             for (d = 0; d < DIM; d++)
 399             {
 400                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 401                 {
 402                     vn  = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
 403                                             - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
 404                     if (d == XX)
 405                     {
 406                         vn += vc + dt*cosz*cos_accel;
 407                     }
 408                     v[n][d]        = vn;
 409                     xprime[n][d]   = x[n][d]+vn*dt;
 410                 }
 411                 else
 412                 {
 413                     xprime[n][d]   = x[n][d];
 414                 }
 415             }
 416         }
 417     }
 418     else
 419     {
 420         /* Classic version of update, used with berendsen coupling */
 421         for (n = start; n < nrend; n++)
 422         {
 423             w_dt = invmass[n]*dt;
 424             if (cTC)
 425             {
 426                 gt   = cTC[n];
 427             }
 428             lg   = tcstat[gt].lambda;
 429             cosz = cos(fac*x[n][ZZ]);
 430
 431             for (d = 0; d < DIM; d++)
 432             {
 433                 vn             = v[n][d];
 434
 435                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
 436                 {
 437                     if (d == XX)
 438                     {
 439                         vc           = cosz*vcos;
 440                         /* Do not scale the cosine velocity profile */
 441                         vv           = vc + lg*(vn - vc + f[n][d]*w_dt);
 442                         /* Add the cosine accelaration profile */
 443                         vv          += dt*cosz*cos_accel;
 444                     }
 445                     else
 446                     {
 447                         vv           = lg*(vn + f[n][d]*w_dt);
 448                     }
 449                     v[n][d]        = vv;
 450                     xprime[n][d]   = x[n][d]+vv*dt;
 451                 }
 452                 else
 453                 {
 454                     v[n][d]        = 0.0;
 455                     xprime[n][d]   = x[n][d];
 456                 }
 457             }
 458         }
 459     }
 460 }
 461
 462 static gmx_stochd_t *init_stochd(const t_inputrec *ir)
 463 {
 464     gmx_stochd_t   *sd;
 465
 466     snew(sd, 1);
 467
 468     const t_grpopts *opts = &ir->opts;
 469     int              ngtc = opts->ngtc;
 470
 471     if (ir->eI == eiBD)
 472     {
 473         snew(sd->bd_rf, ngtc);
 474     }
 475     else if (EI_SD(ir->eI))
 476     {
 477         snew(sd->sdc, ngtc);
 478         snew(sd->sdsig, ngtc);
 479
 480         gmx_sd_const_t *sdc = sd->sdc;
 481
 482         for (int gt = 0; gt < ngtc; gt++)
 483         {
 484             if (opts->tau_t[gt] > 0)
 485             {
 486                 sdc[gt].em  = exp(-ir->delta_t/opts->tau_t[gt]);
 487             }
 488             else
 489             {
 490                 /* No friction and noise on this group */
 491                 sdc[gt].em  = 1;
 492             }
 493         }
 494     }
 495     else if (ETC_ANDERSEN(ir->etc))
 496     {
 497         snew(sd->randomize_group, ngtc);
 498         snew(sd->boltzfac, ngtc);
 499
 500         /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
 501         /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
 502
 503         for (int gt = 0; gt < ngtc; gt++)
 504         {
 505             real reft = std::max<real>(0, opts->ref_t[gt]);
 506             if ((opts->tau_t[gt] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
 507             {
 508                 sd->randomize_group[gt] = TRUE;
 509                 sd->boltzfac[gt]        = BOLTZ*opts->ref_t[gt];
 510             }
 511             else
 512             {
 513                 sd->randomize_group[gt] = FALSE;
 514             }
 515         }
 516     }
 517
 518     return sd;
 519 }
 520
 521 void update_temperature_constants(gmx_update_t *upd, const t_inputrec *ir)
 522 {
 523     if (ir->eI == eiBD)
 524     {
 525         if (ir->bd_fric != 0)
 526         {
 527             for (int gt = 0; gt < ir->opts.ngtc; gt++)
 528             {
 529                 upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]/(ir->bd_fric*ir->delta_t));
 530             }
 531         }
 532         else
 533         {
 534             for (int gt = 0; gt < ir->opts.ngtc; gt++)
 535             {
 536                 upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]);
 537             }
 538         }
 539     }
 540     if (ir->eI == eiSD1)
 541     {
 542         for (int gt = 0; gt < ir->opts.ngtc; gt++)
 543         {
 544             real kT = BOLTZ*ir->opts.ref_t[gt];
 545             /* The mass is accounted for later, since this differs per atom */
 546             upd->sd->sdsig[gt].V  = std::sqrt(kT*(1 - upd->sd->sdc[gt].em*upd->sd->sdc[gt].em));
 547         }
 548     }
 549 }
 550
 551 gmx_update_t *init_update(const t_inputrec *ir)
 552 {
 553     gmx_update_t *upd;
 554
 555     snew(upd, 1);
 556
 557     if (ir->eI == eiBD || EI_SD(ir->eI) || ir->etc == etcVRESCALE || ETC_ANDERSEN(ir->etc))
 558     {
 559         upd->sd    = init_stochd(ir);
 560     }
 561
 562     update_temperature_constants(upd, ir);
 563
 564     upd->xp        = NULL;
 565     upd->xp_nalloc = 0;
 566
 567     return upd;
 568 }
 569
 570 void update_realloc(gmx_update_t *upd, int state_nalloc)
 571 {
 572     GMX_ASSERT(upd, "upd must be allocated before its fields can be reallocated");
 573     if (state_nalloc > upd->xp_nalloc)
 574     {
 575         upd->xp_nalloc = state_nalloc;
 576         /* We need to allocate one element extra, since we might use
 577          * (unaligned) 4-wide SIMD loads to access rvec entries. */
 578         srenew(upd->xp, upd->xp_nalloc + 1);
 579     }
 580 }
 581
 582 static void do_update_sd1(gmx_stochd_t *sd,
 583                           int start, int nrend, double dt,
 584                           rvec accel[], ivec nFreeze[],
 585                           real invmass[], unsigned short ptype[],
 586                           unsigned short cFREEZE[], unsigned short cACC[],
 587                           unsigned short cTC[],
 588                           rvec x[], rvec xprime[], rvec v[], rvec f[],
 589                           gmx_bool bDoConstr,
 590                           gmx_bool bFirstHalfConstr,
 591                           gmx_int64_t step, int seed, int* gatindex)
 592 {
 593     gmx_sd_const_t *sdc;
 594     gmx_sd_sigma_t *sig;
 595     int             gf = 0, ga = 0, gt = 0;
 596     real            ism;
 597     int             n, d;
 598
 599     // Even 0 bits internal counter gives 2x64 ints (more than enough for three table lookups)
 600     gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 601     gmx::TabulatedNormalDistribution<real, 14> dist;
 602
 603     sdc = sd->sdc;
 604     sig = sd->sdsig;
 605
 606     if (!bDoConstr)
 607     {
 608         for (n = start; n < nrend; n++)
 609         {
 610             int  ng = gatindex ? gatindex[n] : n;
 611
 612             rng.restart(step, ng);
 613             dist.reset();
 614
 615             ism = std::sqrt(invmass[n]);
 616
 617             if (cFREEZE)
 618             {
 619                 gf  = cFREEZE[n];
 620             }
 621             if (cACC)
 622             {
 623                 ga  = cACC[n];
 624             }
 625             if (cTC)
 626             {
 627                 gt  = cTC[n];
 628             }
 629
 630             for (d = 0; d < DIM; d++)
 631             {
 632                 if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 633                 {
 634                     real sd_V, vn;
 635
 636                     sd_V         = ism*sig[gt].V*dist(rng);
 637                     vn           = v[n][d] + (invmass[n]*f[n][d] + accel[ga][d])*dt;
 638                     v[n][d]      = vn*sdc[gt].em + sd_V;
 639                     /* Here we include half of the friction+noise
 640                      * update of v into the integration of x.
 641                      */
 642                     xprime[n][d] = x[n][d] + 0.5*(vn + v[n][d])*dt;
 643                 }
 644                 else
 645                 {
 646                     v[n][d]      = 0.0;
 647                     xprime[n][d] = x[n][d];
 648                 }
 649             }
 650         }
 651     }
 652     else
 653     {
 654         /* We do have constraints */
 655         if (bFirstHalfConstr)
 656         {
 657             /* First update without friction and noise */
 658             real im;
 659
 660             for (n = start; n < nrend; n++)
 661             {
 662                 im = invmass[n];
 663
 664                 if (cFREEZE)
 665                 {
 666                     gf  = cFREEZE[n];
 667                 }
 668                 if (cACC)
 669                 {
 670                     ga  = cACC[n];
 671                 }
 672
 673                 for (d = 0; d < DIM; d++)
 674                 {
 675                     if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 676                     {
 677                         v[n][d]      = v[n][d] + (im*f[n][d] + accel[ga][d])*dt;
 678                         xprime[n][d] = x[n][d] +  v[n][d]*dt;
 679                     }
 680                     else
 681                     {
 682                         v[n][d]      = 0.0;
 683                         xprime[n][d] = x[n][d];
 684                     }
 685                 }
 686             }
 687         }
 688         else
 689         {
 690             /* Update friction and noise only */
 691             for (n = start; n < nrend; n++)
 692             {
 693                 int  ng = gatindex ? gatindex[n] : n;
 694
 695                 rng.restart(step, ng);
 696                 dist.reset();
 697
 698                 ism = std::sqrt(invmass[n]);
 699
 700                 if (cFREEZE)
 701                 {
 702                     gf  = cFREEZE[n];
 703                 }
 704                 if (cTC)
 705                 {
 706                     gt  = cTC[n];
 707                 }
 708
 709                 for (d = 0; d < DIM; d++)
 710                 {
 711                     if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 712                     {
 713                         real sd_V, vn;
 714
 715                         sd_V         = ism*sig[gt].V*dist(rng);
 716                         vn           = v[n][d];
 717                         v[n][d]      = vn*sdc[gt].em + sd_V;
 718                         /* Add the friction and noise contribution only */
 719                         xprime[n][d] = xprime[n][d] + 0.5*(v[n][d] - vn)*dt;
 720                     }
 721                 }
 722             }
 723         }
 724     }
 725 }
 726
 727 static void do_update_bd(int start, int nrend, double dt,
 728                          ivec nFreeze[],
 729                          real invmass[], unsigned short ptype[],
 730                          unsigned short cFREEZE[], unsigned short cTC[],
 731                          rvec x[], rvec xprime[], rvec v[],
 732                          rvec f[], real friction_coefficient,
 733                          real *rf, gmx_int64_t step, int seed,
 734                          int* gatindex)
 735 {
 736     /* note -- these appear to be full step velocities . . .  */
 737     int    gf = 0, gt = 0;
 738     real   vn;
 739     real   invfr = 0;
 740     int    n, d;
 741     // Use 1 bit of internal counters to give us 2*2 64-bits values per stream
 742     // Each 64-bit value is enough for 4 normal distribution table numbers.
 743     gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 744     gmx::TabulatedNormalDistribution<real, 14> dist;
 745
 746     if (friction_coefficient != 0)
 747     {
 748         invfr = 1.0/friction_coefficient;
 749     }
 750
 751     for (n = start; (n < nrend); n++)
 752     {
 753         int  ng  = gatindex ? gatindex[n] : n;
 754
 755         rng.restart(step, ng);
 756         dist.reset();
 757
 758         if (cFREEZE)
 759         {
 760             gf = cFREEZE[n];
 761         }
 762         if (cTC)
 763         {
 764             gt = cTC[n];
 765         }
 766         for (d = 0; (d < DIM); d++)
 767         {
 768             if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 769             {
 770                 if (friction_coefficient != 0)
 771                 {
 772                     vn = invfr*f[n][d] + rf[gt]*dist(rng);
 773                 }
 774                 else
 775                 {
 776                     /* NOTE: invmass = 2/(mass*friction_constant*dt) */
 777                     vn = 0.5*invmass[n]*f[n][d]*dt
 778                         + std::sqrt(0.5*invmass[n])*rf[gt]*dist(rng);
 779                 }
 780
 781                 v[n][d]      = vn;
 782                 xprime[n][d] = x[n][d]+vn*dt;
 783             }
 784             else
 785             {
 786                 v[n][d]      = 0.0;
 787                 xprime[n][d] = x[n][d];
 788             }
 789         }
 790     }
 791 }
 792
 793 static void dump_it_all(FILE gmx_unused *fp, const char gmx_unused *title,
 794                         int gmx_unused natoms, rvec gmx_unused x[], rvec gmx_unused xp[],
 795                         rvec gmx_unused v[], rvec gmx_unused f[])
 796 {
 797 #ifdef DEBUG
 798     if (fp)
 799     {
 800         fprintf(fp, "%s\n", title);
 801         pr_rvecs(fp, 0, "x", x, natoms);
 802         pr_rvecs(fp, 0, "xp", xp, natoms);
 803         pr_rvecs(fp, 0, "v", v, natoms);
 804         pr_rvecs(fp, 0, "f", f, natoms);
 805     }
 806 #endif
 807 }
 808
 809 static void calc_ke_part_normal(rvec v[], t_grpopts *opts, t_mdatoms *md,
 810                                 gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel)
 811 {
 812     int           g;
 813     t_grp_tcstat *tcstat  = ekind->tcstat;
 814     t_grp_acc    *grpstat = ekind->grpstat;
 815     int           nthread, thread;
 816
 817     /* three main: VV with AveVel, vv with AveEkin, leap with AveEkin.  Leap with AveVel is also
 818        an option, but not supported now.
 819        bEkinAveVel: If TRUE, we sum into ekin, if FALSE, into ekinh.
 820      */
 821
 822     /* group velocities are calculated in update_ekindata and
 823      * accumulated in acumulate_groups.
 824      * Now the partial global and groups ekin.
 825      */
 826     for (g = 0; (g < opts->ngtc); g++)
 827     {
 828         copy_mat(tcstat[g].ekinh, tcstat[g].ekinh_old);
 829         if (bEkinAveVel)
 830         {
 831             clear_mat(tcstat[g].ekinf);
 832             tcstat[g].ekinscalef_nhc = 1.0;   /* need to clear this -- logic is complicated! */
 833         }
 834         else
 835         {
 836             clear_mat(tcstat[g].ekinh);
 837         }
 838     }
 839     ekind->dekindl_old = ekind->dekindl;
 840     nthread            = gmx_omp_nthreads_get(emntUpdate);
 841
 842 #pragma omp parallel for num_threads(nthread) schedule(static)
 843     for (thread = 0; thread < nthread; thread++)
 844     {
 845         // This OpenMP only loops over arrays and does not call any functions
 846         // or memory allocation. It should not be able to throw, so for now
 847         // we do not need a try/catch wrapper.
 848         int     start_t, end_t, n;
 849         int     ga, gt;
 850         rvec    v_corrt;
 851         real    hm;
 852         int     d, m;
 853         matrix *ekin_sum;
 854         real   *dekindl_sum;
 855
 856         start_t = ((thread+0)*md->homenr)/nthread;
 857         end_t   = ((thread+1)*md->homenr)/nthread;
 858
 859         ekin_sum    = ekind->ekin_work[thread];
 860         dekindl_sum = ekind->dekindl_work[thread];
 861
 862         for (gt = 0; gt < opts->ngtc; gt++)
 863         {
 864             clear_mat(ekin_sum[gt]);
 865         }
 866         *dekindl_sum = 0.0;
 867
 868         ga = 0;
 869         gt = 0;
 870         for (n = start_t; n < end_t; n++)
 871         {
 872             if (md->cACC)
 873             {
 874                 ga = md->cACC[n];
 875             }
 876             if (md->cTC)
 877             {
 878                 gt = md->cTC[n];
 879             }
 880             hm   = 0.5*md->massT[n];
 881
 882             for (d = 0; (d < DIM); d++)
 883             {
 884                 v_corrt[d]  = v[n][d]  - grpstat[ga].u[d];
 885             }
 886             for (d = 0; (d < DIM); d++)
 887             {
 888                 for (m = 0; (m < DIM); m++)
 889                 {
 890                     /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
 891                     ekin_sum[gt][m][d] += hm*v_corrt[m]*v_corrt[d];
 892                 }
 893             }
 894             if (md->nMassPerturbed && md->bPerturbed[n])
 895             {
 896                 *dekindl_sum +=
 897                     0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
 898             }
 899         }
 900     }
 901
 902     ekind->dekindl = 0;
 903     for (thread = 0; thread < nthread; thread++)
 904     {
 905         for (g = 0; g < opts->ngtc; g++)
 906         {
 907             if (bEkinAveVel)
 908             {
 909                 m_add(tcstat[g].ekinf, ekind->ekin_work[thread][g],
 910                       tcstat[g].ekinf);
 911             }
 912             else
 913             {
 914                 m_add(tcstat[g].ekinh, ekind->ekin_work[thread][g],
 915                       tcstat[g].ekinh);
 916             }
 917         }
 918
 919         ekind->dekindl += *ekind->dekindl_work[thread];
 920     }
 921
 922     inc_nrnb(nrnb, eNR_EKIN, md->homenr);
 923 }
 924
 925 static void calc_ke_part_visc(matrix box, rvec x[], rvec v[],
 926                               t_grpopts *opts, t_mdatoms *md,
 927                               gmx_ekindata_t *ekind,
 928                               t_nrnb *nrnb, gmx_bool bEkinAveVel)
 929 {
 930     int           start = 0, homenr = md->homenr;
 931     int           g, d, n, m, gt = 0;
 932     rvec          v_corrt;
 933     real          hm;
 934     t_grp_tcstat *tcstat = ekind->tcstat;
 935     t_cos_acc    *cosacc = &(ekind->cosacc);
 936     real          dekindl;
 937     real          fac, cosz;
 938     double        mvcos;
 939
 940     for (g = 0; g < opts->ngtc; g++)
 941     {
 942         copy_mat(ekind->tcstat[g].ekinh, ekind->tcstat[g].ekinh_old);
 943         clear_mat(ekind->tcstat[g].ekinh);
 944     }
 945     ekind->dekindl_old = ekind->dekindl;
 946
 947     fac     = 2*M_PI/box[ZZ][ZZ];
 948     mvcos   = 0;
 949     dekindl = 0;
 950     for (n = start; n < start+homenr; n++)
 951     {
 952         if (md->cTC)
 953         {
 954             gt = md->cTC[n];
 955         }
 956         hm   = 0.5*md->massT[n];
 957
 958         /* Note that the times of x and v differ by half a step */
 959         /* MRS -- would have to be changed for VV */
 960         cosz         = cos(fac*x[n][ZZ]);
 961         /* Calculate the amplitude of the new velocity profile */
 962         mvcos       += 2*cosz*md->massT[n]*v[n][XX];
 963
 964         copy_rvec(v[n], v_corrt);
 965         /* Subtract the profile for the kinetic energy */
 966         v_corrt[XX] -= cosz*cosacc->vcos;
 967         for (d = 0; (d < DIM); d++)
 968         {
 969             for (m = 0; (m < DIM); m++)
 970             {
 971                 /* if we're computing a full step velocity, v_corrt[d] has v(t).  Otherwise, v(t+dt/2) */
 972                 if (bEkinAveVel)
 973                 {
 974                     tcstat[gt].ekinf[m][d] += hm*v_corrt[m]*v_corrt[d];
 975                 }
 976                 else
 977                 {
 978                     tcstat[gt].ekinh[m][d] += hm*v_corrt[m]*v_corrt[d];
 979                 }
 980             }
 981         }
 982         if (md->nPerturbed && md->bPerturbed[n])
 983         {
 984             /* The minus sign here might be confusing.
 985              * The kinetic contribution from dH/dl doesn't come from
 986              * d m(l)/2 v^2 / dl, but rather from d p^2/2m(l) / dl,
 987              * where p are the momenta. The difference is only a minus sign.
 988              */
 989             dekindl -= 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
 990         }
 991     }
 992     ekind->dekindl = dekindl;
 993     cosacc->mvcos  = mvcos;
 994
 995     inc_nrnb(nrnb, eNR_EKIN, homenr);
 996 }
 997
 998 void calc_ke_part(t_state *state, t_grpopts *opts, t_mdatoms *md,
 999                   gmx_ekindata_t *ekind, t_nrnb *nrnb, gmx_bool bEkinAveVel)
1000 {
1001     if (ekind->cosacc.cos_accel == 0)
1002     {
1003         calc_ke_part_normal(state->v, opts, md, ekind, nrnb, bEkinAveVel);
1004     }
1005     else
1006     {
1007         calc_ke_part_visc(state->box, state->x, state->v, opts, md, ekind, nrnb, bEkinAveVel);
1008     }
1009 }
1010
1011 extern void init_ekinstate(ekinstate_t *ekinstate, const t_inputrec *ir)
1012 {
1013     ekinstate->ekin_n = ir->opts.ngtc;
1014     snew(ekinstate->ekinh, ekinstate->ekin_n);
1015     snew(ekinstate->ekinf, ekinstate->ekin_n);
1016     snew(ekinstate->ekinh_old, ekinstate->ekin_n);
1017     snew(ekinstate->ekinscalef_nhc, ekinstate->ekin_n);
1018     snew(ekinstate->ekinscaleh_nhc, ekinstate->ekin_n);
1019     snew(ekinstate->vscale_nhc, ekinstate->ekin_n);
1020     ekinstate->dekindl = 0;
1021     ekinstate->mvcos   = 0;
1022 }
1023
1024 void update_ekinstate(ekinstate_t *ekinstate, gmx_ekindata_t *ekind)
1025 {
1026     int i;
1027
1028     for (i = 0; i < ekinstate->ekin_n; i++)
1029     {
1030         copy_mat(ekind->tcstat[i].ekinh, ekinstate->ekinh[i]);
1031         copy_mat(ekind->tcstat[i].ekinf, ekinstate->ekinf[i]);
1032         copy_mat(ekind->tcstat[i].ekinh_old, ekinstate->ekinh_old[i]);
1033         ekinstate->ekinscalef_nhc[i] = ekind->tcstat[i].ekinscalef_nhc;
1034         ekinstate->ekinscaleh_nhc[i] = ekind->tcstat[i].ekinscaleh_nhc;
1035         ekinstate->vscale_nhc[i]     = ekind->tcstat[i].vscale_nhc;
1036     }
1037
1038     copy_mat(ekind->ekin, ekinstate->ekin_total);
1039     ekinstate->dekindl = ekind->dekindl;
1040     ekinstate->mvcos   = ekind->cosacc.mvcos;
1041
1042 }
1043
1044 void restore_ekinstate_from_state(t_commrec *cr,
1045                                   gmx_ekindata_t *ekind, const ekinstate_t *ekinstate)
1046 {
1047     int i, n;
1048
1049     if (MASTER(cr))
1050     {
1051         for (i = 0; i < ekinstate->ekin_n; i++)
1052         {
1053             copy_mat(ekinstate->ekinh[i], ekind->tcstat[i].ekinh);
1054             copy_mat(ekinstate->ekinf[i], ekind->tcstat[i].ekinf);
1055             copy_mat(ekinstate->ekinh_old[i], ekind->tcstat[i].ekinh_old);
1056             ekind->tcstat[i].ekinscalef_nhc = ekinstate->ekinscalef_nhc[i];
1057             ekind->tcstat[i].ekinscaleh_nhc = ekinstate->ekinscaleh_nhc[i];
1058             ekind->tcstat[i].vscale_nhc     = ekinstate->vscale_nhc[i];
1059         }
1060
1061         copy_mat(ekinstate->ekin_total, ekind->ekin);
1062
1063         ekind->dekindl      = ekinstate->dekindl;
1064         ekind->cosacc.mvcos = ekinstate->mvcos;
1065         n                   = ekinstate->ekin_n;
1066     }
1067
1068     if (PAR(cr))
1069     {
1070         gmx_bcast(sizeof(n), &n, cr);
1071         for (i = 0; i < n; i++)
1072         {
1073             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh[0][0]),
1074                       ekind->tcstat[i].ekinh[0], cr);
1075             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinf[0][0]),
1076                       ekind->tcstat[i].ekinf[0], cr);
1077             gmx_bcast(DIM*DIM*sizeof(ekind->tcstat[i].ekinh_old[0][0]),
1078                       ekind->tcstat[i].ekinh_old[0], cr);
1079
1080             gmx_bcast(sizeof(ekind->tcstat[i].ekinscalef_nhc),
1081                       &(ekind->tcstat[i].ekinscalef_nhc), cr);
1082             gmx_bcast(sizeof(ekind->tcstat[i].ekinscaleh_nhc),
1083                       &(ekind->tcstat[i].ekinscaleh_nhc), cr);
1084             gmx_bcast(sizeof(ekind->tcstat[i].vscale_nhc),
1085                       &(ekind->tcstat[i].vscale_nhc), cr);
1086         }
1087         gmx_bcast(DIM*DIM*sizeof(ekind->ekin[0][0]),
1088                   ekind->ekin[0], cr);
1089
1090         gmx_bcast(sizeof(ekind->dekindl), &ekind->dekindl, cr);
1091         gmx_bcast(sizeof(ekind->cosacc.mvcos), &ekind->cosacc.mvcos, cr);
1092     }
1093 }
1094
1095 void set_deform_reference_box(gmx_update_t *upd, gmx_int64_t step, matrix box)
1096 {
1097     upd->deformref_step = step;
1098     copy_mat(box, upd->deformref_box);
1099 }
1100
1101 static void deform(gmx_update_t *upd,
1102                    int start, int homenr, rvec x[], matrix box,
1103                    const t_inputrec *ir, gmx_int64_t step)
1104 {
1105     matrix bnew, invbox, mu;
1106     real   elapsed_time;
1107     int    i, j;
1108
1109     elapsed_time = (step + 1 - upd->deformref_step)*ir->delta_t;
1110     copy_mat(box, bnew);
1111     for (i = 0; i < DIM; i++)
1112     {
1113         for (j = 0; j < DIM; j++)
1114         {
1115             if (ir->deform[i][j] != 0)
1116             {
1117                 bnew[i][j] =
1118                     upd->deformref_box[i][j] + elapsed_time*ir->deform[i][j];
1119             }
1120         }
1121     }
1122     /* We correct the off-diagonal elements,
1123      * which can grow indefinitely during shearing,
1124      * so the shifts do not get messed up.
1125      */
1126     for (i = 1; i < DIM; i++)
1127     {
1128         for (j = i-1; j >= 0; j--)
1129         {
1130             while (bnew[i][j] - box[i][j] > 0.5*bnew[j][j])
1131             {
1132                 rvec_dec(bnew[i], bnew[j]);
1133             }
1134             while (bnew[i][j] - box[i][j] < -0.5*bnew[j][j])
1135             {
1136                 rvec_inc(bnew[i], bnew[j]);
1137             }
1138         }
1139     }
1140     gmx::invertBoxMatrix(box, invbox);
1141     copy_mat(bnew, box);
1142     mmul_ur0(box, invbox, mu);
1143
1144     for (i = start; i < start+homenr; i++)
1145     {
1146         x[i][XX] = mu[XX][XX]*x[i][XX]+mu[YY][XX]*x[i][YY]+mu[ZZ][XX]*x[i][ZZ];
1147         x[i][YY] = mu[YY][YY]*x[i][YY]+mu[ZZ][YY]*x[i][ZZ];
1148         x[i][ZZ] = mu[ZZ][ZZ]*x[i][ZZ];
1149     }
1150 }
1151
1152 void update_tcouple(gmx_int64_t       step,
1153                     t_inputrec       *inputrec,
1154                     t_state          *state,
1155                     gmx_ekindata_t   *ekind,
1156                     t_extmass        *MassQ,
1157                     t_mdatoms        *md)
1158
1159 {
1160     gmx_bool   bTCouple = FALSE;
1161     real       dttc;
1162     int        i, offset;
1163
1164     /* if using vv with trotter decomposition methods, we do this elsewhere in the code */
1165     if (inputrec->etc != etcNO &&
1166         !(inputrecNvtTrotter(inputrec) || inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec)))
1167     {
1168         /* We should only couple after a step where energies were determined (for leapfrog versions)
1169            or the step energies are determined, for velocity verlet versions */
1170
1171         if (EI_VV(inputrec->eI))
1172         {
1173             offset = 0;
1174         }
1175         else
1176         {
1177             offset = 1;
1178         }
1179         bTCouple = (inputrec->nsttcouple == 1 ||
1180                     do_per_step(step+inputrec->nsttcouple-offset,
1181                                 inputrec->nsttcouple));
1182     }
1183
1184     if (bTCouple)
1185     {
1186         dttc = inputrec->nsttcouple*inputrec->delta_t;
1187
1188         switch (inputrec->etc)
1189         {
1190             case etcNO:
1191                 break;
1192             case etcBERENDSEN:
1193                 berendsen_tcoupl(inputrec, ekind, dttc);
1194                 break;
1195             case etcNOSEHOOVER:
1196                 nosehoover_tcoupl(&(inputrec->opts), ekind, dttc,
1197                                   state->nosehoover_xi, state->nosehoover_vxi, MassQ);
1198                 break;
1199             case etcVRESCALE:
1200                 vrescale_tcoupl(inputrec, step, ekind, dttc,
1201                                 state->therm_integral);
1202                 break;
1203         }
1204         /* rescale in place here */
1205         if (EI_VV(inputrec->eI))
1206         {
1207             rescale_velocities(ekind, md, 0, md->homenr, state->v);
1208         }
1209     }
1210     else
1211     {
1212         /* Set the T scaling lambda to 1 to have no scaling */
1213         for (i = 0; (i < inputrec->opts.ngtc); i++)
1214         {
1215             ekind->tcstat[i].lambda = 1.0;
1216         }
1217     }
1218 }
1219
1220 void update_pcouple(FILE             *fplog,
1221                     gmx_int64_t       step,
1222                     t_inputrec       *inputrec,
1223                     t_state          *state,
1224                     matrix            pcoupl_mu,
1225                     matrix            M,
1226                     gmx_bool          bInitStep)
1227 {
1228     gmx_bool   bPCouple = FALSE;
1229     real       dtpc     = 0;
1230     int        i;
1231
1232     /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
1233     if (inputrec->epc != epcNO && (!(inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec))))
1234     {
1235         /* We should only couple after a step where energies were determined */
1236         bPCouple = (inputrec->nstpcouple == 1 ||
1237                     do_per_step(step+inputrec->nstpcouple-1,
1238                                 inputrec->nstpcouple));
1239     }
1240
1241     clear_mat(pcoupl_mu);
1242     for (i = 0; i < DIM; i++)
1243     {
1244         pcoupl_mu[i][i] = 1.0;
1245     }
1246
1247     clear_mat(M);
1248
1249     if (bPCouple)
1250     {
1251         dtpc = inputrec->nstpcouple*inputrec->delta_t;
1252
1253         switch (inputrec->epc)
1254         {
1255             /* We can always pcoupl, even if we did not sum the energies
1256              * the previous step, since state->pres_prev is only updated
1257              * when the energies have been summed.
1258              */
1259             case (epcNO):
1260                 break;
1261             case (epcBERENDSEN):
1262                 if (!bInitStep)
1263                 {
1264                     berendsen_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev, state->box,
1265                                      pcoupl_mu);
1266                 }
1267                 break;
1268             case (epcPARRINELLORAHMAN):
1269                 parrinellorahman_pcoupl(fplog, step, inputrec, dtpc, state->pres_prev,
1270                                         state->box, state->box_rel, state->boxv,
1271                                         M, pcoupl_mu, bInitStep);
1272                 break;
1273             default:
1274                 break;
1275         }
1276     }
1277 }
1278
1279 void update_constraints(FILE             *fplog,
1280                         gmx_int64_t       step,
1281                         real             *dvdlambda, /* the contribution to be added to the bonded interactions */
1282                         t_inputrec       *inputrec,  /* input record and box stuff      */
1283                         t_mdatoms        *md,
1284                         t_state          *state,
1285                         gmx_bool          bMolPBC,
1286                         t_graph          *graph,
1287                         rvec              force[],   /* forces on home particles */
1288                         t_idef           *idef,
1289                         tensor            vir_part,
1290                         t_commrec        *cr,
1291                         t_nrnb           *nrnb,
1292                         gmx_wallcycle_t   wcycle,
1293                         gmx_update_t     *upd,
1294                         gmx_constr_t      constr,
1295                         gmx_bool          bFirstHalf,
1296                         gmx_bool          bCalcVir)
1297 {
1298     gmx_bool             bLastStep, bLog = FALSE, bEner = FALSE, bDoConstr = FALSE;
1299     double               dt;
1300     int                  start, homenr, nrend, i;
1301     tensor               vir_con;
1302     int                  nth, th;
1303
1304     if (constr)
1305     {
1306         bDoConstr = TRUE;
1307     }
1308     if (bFirstHalf && !EI_VV(inputrec->eI))
1309     {
1310         bDoConstr = FALSE;
1311     }
1312
1313     /* for now, SD update is here -- though it really seems like it
1314        should be reformulated as a velocity verlet method, since it has two parts */
1315
1316     start  = 0;
1317     homenr = md->homenr;
1318     nrend  = start+homenr;
1319
1320     dt   = inputrec->delta_t;
1321
1322     /*
1323      *  Steps (7C, 8C)
1324      *  APPLY CONSTRAINTS:
1325      *  BLOCK SHAKE
1326
1327      * When doing PR pressure coupling we have to constrain the
1328      * bonds in each iteration. If we are only using Nose-Hoover tcoupling
1329      * it is enough to do this once though, since the relative velocities
1330      * after this will be normal to the bond vector
1331      */
1332
1333     if (bDoConstr)
1334     {
1335         /* clear out constraints before applying */
1336         clear_mat(vir_part);
1337
1338         bLastStep = (step == inputrec->init_step+inputrec->nsteps);
1339         bLog      = (do_per_step(step, inputrec->nstlog) || bLastStep || (step < 0));
1340         bEner     = (do_per_step(step, inputrec->nstenergy) || bLastStep);
1341         /* Constrain the coordinates upd->xp */
1342         wallcycle_start(wcycle, ewcCONSTR);
1343         if (EI_VV(inputrec->eI) && bFirstHalf)
1344         {
1345             constrain(NULL, bLog, bEner, constr, idef,
1346                       inputrec, cr, step, 1, 1.0, md,
1347                       state->x, state->v, state->v,
1348                       bMolPBC, state->box,
1349                       state->lambda[efptBONDED], dvdlambda,
1350                       NULL, bCalcVir ? &vir_con : NULL, nrnb, econqVeloc);
1351         }
1352         else
1353         {
1354             constrain(NULL, bLog, bEner, constr, idef,
1355                       inputrec, cr, step, 1, 1.0, md,
1356                       state->x, upd->xp, NULL,
1357                       bMolPBC, state->box,
1358                       state->lambda[efptBONDED], dvdlambda,
1359                       state->v, bCalcVir ? &vir_con : NULL, nrnb, econqCoord);
1360         }
1361         wallcycle_stop(wcycle, ewcCONSTR);
1362
1363         where();
1364
1365         dump_it_all(fplog, "After Shake",
1366                     state->natoms, state->x, upd->xp, state->v, force);
1367
1368         if (bCalcVir)
1369         {
1370             m_add(vir_part, vir_con, vir_part);
1371             if (debug)
1372             {
1373                 pr_rvecs(debug, 0, "constraint virial", vir_part, DIM);
1374             }
1375         }
1376     }
1377
1378     where();
1379
1380     if (inputrec->eI == eiSD1 && bDoConstr && !bFirstHalf)
1381     {
1382         wallcycle_start(wcycle, ewcUPDATE);
1383
1384         nth = gmx_omp_nthreads_get(emntUpdate);
1385
1386 #pragma omp parallel for num_threads(nth) schedule(static)
1387         for (th = 0; th < nth; th++)
1388         {
1389             try
1390             {
1391                 int start_th, end_th;
1392
1393                 start_th = start + ((nrend-start)* th   )/nth;
1394                 end_th   = start + ((nrend-start)*(th+1))/nth;
1395
1396                 /* The second part of the SD integration */
1397                 do_update_sd1(upd->sd,
1398                               start_th, end_th, dt,
1399                               inputrec->opts.acc, inputrec->opts.nFreeze,
1400                               md->invmass, md->ptype,
1401                               md->cFREEZE, md->cACC, md->cTC,
1402                               state->x, upd->xp, state->v, force,
1403                               bDoConstr, FALSE,
1404                               step, inputrec->ld_seed,
1405                               DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1406             }
1407             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1408         }
1409         inc_nrnb(nrnb, eNR_UPDATE, homenr);
1410         wallcycle_stop(wcycle, ewcUPDATE);
1411
1412         if (bDoConstr)
1413         {
1414             /* Constrain the coordinates upd->xp for half a time step */
1415             wallcycle_start(wcycle, ewcCONSTR);
1416
1417             constrain(NULL, bLog, bEner, constr, idef,
1418                       inputrec, cr, step, 1, 0.5, md,
1419                       state->x, upd->xp, NULL,
1420                       bMolPBC, state->box,
1421                       state->lambda[efptBONDED], dvdlambda,
1422                       state->v, NULL, nrnb, econqCoord);
1423
1424             wallcycle_stop(wcycle, ewcCONSTR);
1425         }
1426     }
1427
1428     /* We must always unshift after updating coordinates; if we did not shake
1429        x was shifted in do_force */
1430
1431     if (!(bFirstHalf)) /* in the first half of vv, no shift. */
1432     {
1433         /* NOTE This part of the update actually does not belong with
1434          * the constraints, since we also call it without constraints.
1435          * But currently we always integrate to a temporary buffer and
1436          * then copy the results back here.
1437          */
1438         wallcycle_start_nocount(wcycle, ewcUPDATE);
1439
1440         if (md->cFREEZE != NULL && constr != NULL)
1441         {
1442             /* If we have atoms that are frozen along some, but not all
1443              * dimensions, the constraints will have moved them also along
1444              * the frozen dimensions. To freeze such degrees of freedom
1445              * we copy them back here to later copy them forward. It would
1446              * be more elegant and slightly more efficient to copies zero
1447              * times instead of twice, but the graph case below prevents this.
1448              */
1449             const ivec *nFreeze                     = inputrec->opts.nFreeze;
1450             bool        partialFreezeAndConstraints = false;
1451             for (int g = 0; g < inputrec->opts.ngfrz; g++)
1452             {
1453                 int numFreezeDim = nFreeze[g][XX] + nFreeze[g][YY] + nFreeze[g][ZZ];
1454                 if (numFreezeDim > 0 && numFreezeDim < 3)
1455                 {
1456                     partialFreezeAndConstraints = true;
1457                 }
1458             }
1459             if (partialFreezeAndConstraints)
1460             {
1461                 for (int i = start; i < nrend; i++)
1462                 {
1463                     int g = md->cFREEZE[i];
1464
1465                     for (int d = 0; d < DIM; d++)
1466                     {
1467                         if (nFreeze[g][d])
1468                         {
1469                             upd->xp[i][d] = state->x[i][d];
1470                         }
1471                     }
1472                 }
1473             }
1474         }
1475
1476         if (graph && (graph->nnodes > 0))
1477         {
1478             unshift_x(graph, state->box, state->x, upd->xp);
1479             if (TRICLINIC(state->box))
1480             {
1481                 inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
1482             }
1483             else
1484             {
1485                 inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
1486             }
1487         }
1488         else
1489         {
1490 #ifndef __clang_analyzer__
1491             // cppcheck-suppress unreadVariable
1492             nth = gmx_omp_nthreads_get(emntUpdate);
1493 #endif
1494 #pragma omp parallel for num_threads(nth) schedule(static)
1495             for (i = start; i < nrend; i++)
1496             {
1497                 // Trivial statement, does not throw
1498                 copy_rvec(upd->xp[i], state->x[i]);
1499             }
1500         }
1501         wallcycle_stop(wcycle, ewcUPDATE);
1502
1503         dump_it_all(fplog, "After unshift",
1504                     state->natoms, state->x, upd->xp, state->v, force);
1505     }
1506 /* ############# END the update of velocities and positions ######### */
1507 }
1508
1509 void update_box(FILE             *fplog,
1510                 gmx_int64_t       step,
1511                 t_inputrec       *inputrec,  /* input record and box stuff      */
1512                 t_mdatoms        *md,
1513                 t_state          *state,
1514                 rvec              force[],   /* forces on home particles */
1515                 matrix            pcoupl_mu,
1516                 t_nrnb           *nrnb,
1517                 gmx_update_t     *upd)
1518 {
1519     double               dt;
1520     int                  start, homenr, i, n, m;
1521
1522     start  = 0;
1523     homenr = md->homenr;
1524
1525     dt = inputrec->delta_t;
1526
1527     where();
1528
1529     /* now update boxes */
1530     switch (inputrec->epc)
1531     {
1532         case (epcNO):
1533             break;
1534         case (epcBERENDSEN):
1535             /* We should only scale after a step where the pressure (kinetic
1536              * energy and virial) was calculated. This happens after the
1537              * coordinate update, whereas the current routine is called before
1538              * that, so we scale when step % nstpcouple = 1 instead of 0.
1539              */
1540             if (inputrec->nstpcouple == 1 || (step % inputrec->nstpcouple == 1))
1541             {
1542                 berendsen_pscale(inputrec, pcoupl_mu, state->box, state->box_rel,
1543                                  start, homenr, state->x, md->cFREEZE, nrnb);
1544             }
1545             break;
1546         case (epcPARRINELLORAHMAN):
1547             /* The box velocities were updated in do_pr_pcoupl in the update
1548              * iteration, but we dont change the box vectors until we get here
1549              * since we need to be able to shift/unshift above.
1550              */
1551             for (i = 0; i < DIM; i++)
1552             {
1553                 for (m = 0; m <= i; m++)
1554                 {
1555                     state->box[i][m] += dt*state->boxv[i][m];
1556                 }
1557             }
1558             preserve_box_shape(inputrec, state->box_rel, state->box);
1559
1560             /* Scale the coordinates */
1561             for (n = start; (n < start+homenr); n++)
1562             {
1563                 tmvmul_ur0(pcoupl_mu, state->x[n], state->x[n]);
1564             }
1565             break;
1566         case (epcMTTK):
1567             switch (inputrec->epct)
1568             {
1569                 case (epctISOTROPIC):
1570                     /* DIM * eta = ln V.  so DIM*eta_new = DIM*eta_old + DIM*dt*veta =>
1571                        ln V_new = ln V_old + 3*dt*veta => V_new = V_old*exp(3*dt*veta) =>
1572                        Side length scales as exp(veta*dt) */
1573
1574                     msmul(state->box, exp(state->veta*dt), state->box);
1575
1576                     /* Relate veta to boxv.  veta = d(eta)/dT = (1/DIM)*1/V dV/dT.
1577                        o               If we assume isotropic scaling, and box length scaling
1578                        factor L, then V = L^DIM (det(M)).  So dV/dt = DIM
1579                        L^(DIM-1) dL/dt det(M), and veta = (1/L) dL/dt.  The
1580                        determinant of B is L^DIM det(M), and the determinant
1581                        of dB/dt is (dL/dT)^DIM det (M).  veta will be
1582                        (det(dB/dT)/det(B))^(1/3).  Then since M =
1583                        B_new*(vol_new)^(1/3), dB/dT_new = (veta_new)*B(new). */
1584
1585                     msmul(state->box, state->veta, state->boxv);
1586                     break;
1587                 default:
1588                     break;
1589             }
1590             break;
1591         default:
1592             break;
1593     }
1594
1595     if (inputrecDeform(inputrec))
1596     {
1597         deform(upd, start, homenr, state->x, state->box, inputrec, step);
1598     }
1599     where();
1600     dump_it_all(fplog, "After update",
1601                 state->natoms, state->x, upd->xp, state->v, force);
1602 }
1603
1604 void update_coords(FILE             *fplog,
1605                    gmx_int64_t       step,
1606                    t_inputrec       *inputrec,  /* input record and box stuff   */
1607                    t_mdatoms        *md,
1608                    t_state          *state,
1609                    rvec             *f,    /* forces on home particles */
1610                    t_fcdata         *fcd,
1611                    gmx_ekindata_t   *ekind,
1612                    matrix            M,
1613                    gmx_update_t     *upd,
1614                    int               UpdatePart,
1615                    t_commrec        *cr, /* these shouldn't be here -- need to think about it */
1616                    gmx_constr_t      constr)
1617 {
1618     gmx_bool          bNH, bPR, bDoConstr = FALSE;
1619     double            dt, alpha;
1620     int               start, homenr, nrend;
1621     int               nth, th;
1622
1623     bDoConstr = (NULL != constr);
1624
1625     /* Running the velocity half does nothing except for velocity verlet */
1626     if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
1627         !EI_VV(inputrec->eI))
1628     {
1629         gmx_incons("update_coords called for velocity without VV integrator");
1630     }
1631
1632     start  = 0;
1633     homenr = md->homenr;
1634     nrend  = start+homenr;
1635
1636     dt   = inputrec->delta_t;
1637
1638     /* We need to update the NMR restraint history when time averaging is used */
1639     if (state->flags & (1<<estDISRE_RM3TAV))
1640     {
1641         update_disres_history(fcd, &state->hist);
1642     }
1643     if (state->flags & (1<<estORIRE_DTAV))
1644     {
1645         update_orires_history(fcd, &state->hist);
1646     }
1647
1648
1649     bNH = inputrec->etc == etcNOSEHOOVER;
1650     bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
1651
1652     /* ############# START The update of velocities and positions ######### */
1653     where();
1654     dump_it_all(fplog, "Before update",
1655                 state->natoms, state->x, upd->xp, state->v, f);
1656
1657     nth = gmx_omp_nthreads_get(emntUpdate);
1658
1659 #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
1660     for (th = 0; th < nth; th++)
1661     {
1662         try
1663         {
1664             int start_th, end_th;
1665
1666             start_th = start + ((nrend-start)* th   )/nth;
1667             end_th   = start + ((nrend-start)*(th+1))/nth;
1668
1669             switch (inputrec->eI)
1670             {
1671                 case (eiMD):
1672                     if (ekind->cosacc.cos_accel == 0)
1673                     {
1674                         do_update_md(start_th, end_th,
1675                                      dt, inputrec->nstpcouple,
1676                                      ekind->tcstat, state->nosehoover_vxi,
1677                                      ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
1678                                      inputrec->opts.nFreeze,
1679                                      md->invmass, md->ptype,
1680                                      md->cFREEZE, md->cACC, md->cTC,
1681                                      state->x, upd->xp, state->v, f, M,
1682                                      bNH, bPR);
1683                     }
1684                     else
1685                     {
1686                         do_update_visc(start_th, end_th,
1687                                        dt, inputrec->nstpcouple,
1688                                        ekind->tcstat, state->nosehoover_vxi,
1689                                        md->invmass, md->ptype,
1690                                        md->cTC, state->x, upd->xp, state->v, f, M,
1691                                        state->box,
1692                                        ekind->cosacc.cos_accel,
1693                                        ekind->cosacc.vcos,
1694                                        bNH, bPR);
1695                     }
1696                     break;
1697                 case (eiSD1):
1698                     /* With constraints, the SD1 update is done in 2 parts */
1699                     do_update_sd1(upd->sd,
1700                                   start_th, end_th, dt,
1701                                   inputrec->opts.acc, inputrec->opts.nFreeze,
1702                                   md->invmass, md->ptype,
1703                                   md->cFREEZE, md->cACC, md->cTC,
1704                                   state->x, upd->xp, state->v, f,
1705                                   bDoConstr, TRUE,
1706                                   step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1707                     break;
1708                 case (eiBD):
1709                     do_update_bd(start_th, end_th, dt,
1710                                  inputrec->opts.nFreeze, md->invmass, md->ptype,
1711                                  md->cFREEZE, md->cTC,
1712                                  state->x, upd->xp, state->v, f,
1713                                  inputrec->bd_fric,
1714                                  upd->sd->bd_rf,
1715                                  step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
1716                     break;
1717                 case (eiVV):
1718                 case (eiVVAK):
1719                     alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
1720                     switch (UpdatePart)
1721                     {
1722                         case etrtVELOCITY1:
1723                         case etrtVELOCITY2:
1724                             do_update_vv_vel(start_th, end_th, dt,
1725                                              inputrec->opts.acc, inputrec->opts.nFreeze,
1726                                              md->invmass, md->ptype,
1727                                              md->cFREEZE, md->cACC,
1728                                              state->v, f,
1729                                              (bNH || bPR), state->veta, alpha);
1730                             break;
1731                         case etrtPOSITION:
1732                             do_update_vv_pos(start_th, end_th, dt,
1733                                              inputrec->opts.nFreeze,
1734                                              md->ptype, md->cFREEZE,
1735                                              state->x, upd->xp, state->v,
1736                                              (bNH || bPR), state->veta);
1737                             break;
1738                     }
1739                     break;
1740                 default:
1741                     gmx_fatal(FARGS, "Don't know how to update coordinates");
1742                     break;
1743             }
1744         }
1745         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1746     }
1747
1748 }
1749
1750
1751 void correct_ekin(FILE *log, int start, int end, rvec v[], rvec vcm, real mass[],
1752                   real tmass, tensor ekin)
1753 {
1754     /*
1755      * This is a debugging routine. It should not be called for production code
1756      *
1757      * The kinetic energy should calculated according to:
1758      *   Ekin = 1/2 m (v-vcm)^2
1759      * However the correction is not always applied, since vcm may not be
1760      * known in time and we compute
1761      *   Ekin' = 1/2 m v^2 instead
1762      * This can be corrected afterwards by computing
1763      *   Ekin = Ekin' + 1/2 m ( -2 v vcm + vcm^2)
1764      * or in hsorthand:
1765      *   Ekin = Ekin' - m v vcm + 1/2 m vcm^2
1766      */
1767     int    i, j, k;
1768     real   m, tm;
1769     rvec   hvcm, mv;
1770     tensor dekin;
1771
1772     /* Local particles */
1773     clear_rvec(mv);
1774
1775     /* Processor dependent part. */
1776     tm = 0;
1777     for (i = start; (i < end); i++)
1778     {
1779         m      = mass[i];
1780         tm    += m;
1781         for (j = 0; (j < DIM); j++)
1782         {
1783             mv[j] += m*v[i][j];
1784         }
1785     }
1786     /* Shortcut */
1787     svmul(1/tmass, vcm, vcm);
1788     svmul(0.5, vcm, hvcm);
1789     clear_mat(dekin);
1790     for (j = 0; (j < DIM); j++)
1791     {
1792         for (k = 0; (k < DIM); k++)
1793         {
1794             dekin[j][k] += vcm[k]*(tm*hvcm[j]-mv[j]);
1795         }
1796     }
1797     pr_rvecs(log, 0, "dekin", dekin, DIM);
1798     pr_rvecs(log, 0, " ekin", ekin, DIM);
1799     fprintf(log, "dekin = %g, ekin = %g  vcm = (%8.4f %8.4f %8.4f)\n",
1800             trace(dekin), trace(ekin), vcm[XX], vcm[YY], vcm[ZZ]);
1801     fprintf(log, "mv = (%8.4f %8.4f %8.4f)\n",
1802             mv[XX], mv[YY], mv[ZZ]);
1803 }
1804
1805 extern gmx_bool update_randomize_velocities(t_inputrec *ir, gmx_int64_t step, const t_commrec *cr,
1806                                             t_mdatoms *md, t_state *state, gmx_update_t *upd, gmx_constr_t constr)
1807 {
1808
1809     real rate = (ir->delta_t)/ir->opts.tau_t[0];
1810
1811     if (ir->etc == etcANDERSEN && constr != NULL)
1812     {
1813         /* Currently, Andersen thermostat does not support constrained
1814            systems. Functionality exists in the andersen_tcoupl
1815            function in GROMACS 4.5.7 to allow this combination. That
1816            code could be ported to the current random-number
1817            generation approach, but has not yet been done because of
1818            lack of time and resources. */
1819         gmx_fatal(FARGS, "Normal Andersen is currently not supported with constraints, use massive Andersen instead");
1820     }
1821
1822     /* proceed with andersen if 1) it's fixed probability per
1823        particle andersen or 2) it's massive andersen and it's tau_t/dt */
1824     if ((ir->etc == etcANDERSEN) || do_per_step(step, (int)(1.0/rate)))
1825     {
1826         andersen_tcoupl(ir, step, cr, md, state, rate,
1827                         upd->sd->randomize_group, upd->sd->boltzfac);
1828         return TRUE;
1829     }
1830     return FALSE;
1831 }