src/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "pbc.h"
  52 #include "force.h"
  53 #include "macros.h"
  54 #include "random.h"
  55 #include "names.h"
  56 #include "gmx_fatal.h"
  57 #include "txtdump.h"
  58 #include "typedefs.h"
  59 #include "update.h"
  60 #include "random.h"
  61 #include "constr.h"
  62 #include "vec.h"
  63 #include "statutil.h"
  64 #include "tgroup.h"
  65 #include "mdebin.h"
  66 #include "vsite.h"
  67 #include "force.h"
  68 #include "mdrun.h"
  69 #include "domdec.h"
  70 #include "partdec.h"
  71 #include "trnio.h"
  72 #include "sparsematrix.h"
  73 #include "mtxio.h"
  74 #include "gmx_random.h"
  75 #include "physics.h"
  76 #include "xvgr.h"
  77 #include "mdatoms.h"
  78 #include "gbutil.h"
  79 #include "ns.h"
  80 #include "gmx_wallcycle.h"
  81 #include "mtop_util.h"
  82 #include "gmxfio.h"
  83 #include "pme.h"
  84 #include "pdbio.h"
  85
  86 typedef struct {
  87   t_state s;
  88   rvec    *f;
  89   real    epot;
  90   real    fnorm;
  91   real    fmax;
  92   int     a_fmax;
  93 } em_state_t;
  94
  95 static em_state_t *init_em_state()
  96 {
  97   em_state_t *ems;
  98
  99   snew(ems,1);
 100
 101   return ems;
 102 }
 103
 104 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 105 {
 106   fprintf(out,"%s:\n",minimizer);
 107   fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 108   fprintf(out,"   Number of steps    = %12d\n",nsteps);
 109 }
 110
 111 static void warn_step(FILE *fp,real ftol,bool bConstrain)
 112 {
 113   fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 114           "Converged to machine precision,\n"
 115           "but not to the requested precision Fmax < %g\n",
 116           ftol);
 117   if (sizeof(real)<sizeof(double))
 118       fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 119
 120   if (bConstrain)
 121     fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 122             "off constraints alltogether (set constraints = none in mdp file)\n");
 123 }
 124
 125
 126
 127 static void print_converged(FILE *fp,const char *alg,real ftol,
 128                             gmx_step_t count,bool bDone,gmx_step_t nsteps,
 129                             real epot,real fmax, int nfmax, real fnorm)
 130 {
 131   char buf[22];
 132
 133   if (bDone)
 134     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 135             alg,ftol,gmx_step_str(count,buf));
 136   else if(count<nsteps)
 137     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 138                "but did not reach the requested Fmax < %g.\n",
 139             alg,gmx_step_str(count,buf),ftol);
 140   else
 141     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 142             alg,ftol,gmx_step_str(count,buf));
 143
 144 #ifdef GMX_DOUBLE
 145   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 146   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 147   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 148 #else
 149   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 150   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 151   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 152 #endif
 153 }
 154
 155 static void get_f_norm_max(t_commrec *cr,
 156                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 157                            real *fnorm,real *fmax,int *a_fmax)
 158 {
 159   double fnorm2,*sum;
 160   real fmax2,fmax2_0,fam;
 161   int  la_max,a_max,start,end,i,m,gf;
 162
 163   /* This routine finds the largest force and returns it.
 164    * On parallel machines the global max is taken.
 165    */
 166   fnorm2 = 0;
 167   fmax2 = 0;
 168   la_max = -1;
 169   gf = 0;
 170   start = mdatoms->start;
 171   end   = mdatoms->homenr + start;
 172   if (mdatoms->cFREEZE) {
 173     for(i=start; i<end; i++) {
 174       gf = mdatoms->cFREEZE[i];
 175       fam = 0;
 176       for(m=0; m<DIM; m++)
 177         if (!opts->nFreeze[gf][m])
 178           fam += sqr(f[i][m]);
 179       fnorm2 += fam;
 180       if (fam > fmax2) {
 181         fmax2  = fam;
 182         la_max = i;
 183       }
 184     }
 185   } else {
 186     for(i=start; i<end; i++) {
 187       fam = norm2(f[i]);
 188       fnorm2 += fam;
 189       if (fam > fmax2) {
 190         fmax2  = fam;
 191         la_max = i;
 192       }
 193     }
 194   }
 195
 196   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 197     a_max = cr->dd->gatindex[la_max];
 198   } else {
 199     a_max = la_max;
 200   }
 201   if (PAR(cr)) {
 202     snew(sum,2*cr->nnodes+1);
 203     sum[2*cr->nodeid]   = fmax2;
 204     sum[2*cr->nodeid+1] = a_max;
 205     sum[2*cr->nnodes]   = fnorm2;
 206     gmx_sumd(2*cr->nnodes+1,sum,cr);
 207     fnorm2 = sum[2*cr->nnodes];
 208     /* Determine the global maximum */
 209     for(i=0; i<cr->nnodes; i++) {
 210       if (sum[2*i] > fmax2) {
 211         fmax2 = sum[2*i];
 212         a_max = (int)(sum[2*i+1] + 0.5);
 213       }
 214     }
 215     sfree(sum);
 216   }
 217
 218   if (fnorm)
 219     *fnorm = sqrt(fnorm2);
 220   if (fmax)
 221     *fmax  = sqrt(fmax2);
 222   if (a_fmax)
 223     *a_fmax = a_max;
 224 }
 225
 226 static void get_state_f_norm_max(t_commrec *cr,
 227                            t_grpopts *opts,t_mdatoms *mdatoms,
 228                            em_state_t *ems)
 229 {
 230   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 231 }
 232
 233 void init_em(FILE *fplog,const char *title,
 234              t_commrec *cr,t_inputrec *ir,
 235              t_state *state_global,gmx_mtop_t *top_global,
 236              em_state_t *ems,gmx_localtop_t **top,
 237              rvec *f,rvec **f_global,
 238              t_nrnb *nrnb,rvec mu_tot,
 239              t_forcerec *fr,gmx_enerdata_t **enerd,
 240              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 241              gmx_vsite_t *vsite,gmx_constr_t constr,
 242              int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene,
 243              t_mdebin **mdebin)
 244 {
 245   int  start,homenr,i;
 246   real dvdlambda;
 247
 248   if (fplog)
 249     fprintf(fplog,"Initiating %s\n",title);
 250
 251   state_global->ngtc = 0;
 252   if (ir->eI == eiCG) {
 253     state_global->flags |= (1<<estCGP);
 254     snew(state_global->cg_p,state_global->nalloc);
 255   }
 256
 257   /* Initiate some variables */
 258   if (ir->efep != efepNO)
 259     state_global->lambda = ir->init_lambda;
 260   else
 261     state_global->lambda = 0.0;
 262
 263   init_nrnb(nrnb);
 264
 265   if (DOMAINDECOMP(cr)) {
 266     *top = dd_init_local_top(top_global);
 267
 268     dd_init_local_state(cr->dd,state_global,&ems->s);
 269
 270     /* Distribute the charge groups over the nodes from the master node */
 271     dd_partition_system(fplog,ir->init_step,cr,TRUE,
 272                         state_global,top_global,ir,
 273                         &ems->s,&ems->f,mdatoms,*top,
 274                         fr,vsite,NULL,constr,
 275                         nrnb,NULL,FALSE);
 276     dd_store_state(cr->dd,&ems->s);
 277
 278     if (ir->nstfout) {
 279       snew(*f_global,top_global->natoms);
 280     } else {
 281       *f_global = NULL;
 282     }
 283     *graph = NULL;
 284   } else {
 285     /* Just copy the state */
 286     ems->s = *state_global;
 287     snew(ems->s.x,ems->s.nalloc);
 288     snew(ems->f,ems->s.nalloc);
 289     for(i=0; i<state_global->natoms; i++)
 290       copy_rvec(state_global->x[i],ems->s.x[i]);
 291     copy_mat(state_global->box,ems->s.box);
 292
 293     if (PAR(cr)) {
 294       /* Initialize the particle decomposition and split the topology */
 295       *top = split_system(fplog,top_global,ir,cr);
 296
 297       pd_cg_range(cr,&fr->cg0,&fr->hcg);
 298     } else {
 299       *top = gmx_mtop_generate_local_top(top_global,ir);
 300     }
 301     *f_global = f;
 302
 303     if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
 304       *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 305     } else {
 306       *graph = NULL;
 307     }
 308   }
 309
 310   clear_rvec(mu_tot);
 311   calc_shifts(ems->s.box,fr->shift_vec);
 312
 313   if (PARTDECOMP(cr)) {
 314     pd_at_range(cr,&start,&homenr);
 315     homenr -= start;
 316   } else {
 317     start  = 0;
 318     homenr = top_global->natoms;
 319   }
 320   atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 321   update_mdatoms(mdatoms,state_global->lambda);
 322
 323   if (vsite && !DOMAINDECOMP(cr)) {
 324     set_vsite_top(vsite,*top,mdatoms,cr);
 325   }
 326
 327   if (constr) {
 328     if (ir->eConstrAlg == econtSHAKE &&
 329         gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {
 330       gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 331                 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 332     }
 333
 334     if (!DOMAINDECOMP(cr))
 335       set_constraints(constr,*top,ir,mdatoms,NULL);
 336
 337     /* Constrain the starting coordinates */
 338     dvdlambda=0;
 339     constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 340               ir,cr,-1,0,mdatoms,
 341               ems->s.x,ems->s.x,NULL,ems->s.box,ems->s.lambda,&dvdlambda,
 342               NULL,NULL,nrnb,econqCoord);
 343   }
 344
 345   if (PAR(cr)) {
 346     *gstat = global_stat_init(ir);
 347   }
 348
 349
 350   if (MASTER(cr)) {
 351     if (fp_trn)
 352       *fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");
 353     if (fp_ene)
 354       *fp_ene = open_enx(ftp2fn(efEDR,nfile,fnm),"w");
 355   } else {
 356     if (fp_trn)
 357       *fp_trn = -1;
 358     if (fp_ene)
 359       *fp_ene = -1;
 360   }
 361
 362   snew(*enerd,1);
 363   init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 364
 365   /* Init bin for energy stuff */
 366   *mdebin = init_mdebin(*fp_ene,top_global,ir);
 367
 368         /* If doing gb, copy make local gb data (for dd, this is done in dd_partition_system) */
 369     if(ir->implicit_solvent && !DOMAINDECOMP(cr))
 370     {
 371         make_local_gb(cr,fr->born,ir->gb_algorithm);
 372     }
 373
 374
 375 }
 376
 377 static void finish_em(FILE *fplog,t_commrec *cr,
 378                       int fp_traj,int fp_ene)
 379 {
 380   if (!(cr->duty & DUTY_PME)) {
 381     /* Tell the PME only node to finish */
 382     gmx_pme_finish(cr);
 383   }
 384
 385   if (MASTER(cr)) {
 386     close_trn(fp_traj);
 387     close_enx(fp_ene);
 388   }
 389 }
 390
 391 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 392 {
 393   em_state_t tmp;
 394
 395   tmp   = *ems1;
 396   *ems1 = *ems2;
 397   *ems2 = tmp;
 398 }
 399
 400 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
 401 {
 402   int i;
 403
 404   for(i=0; (i<state->natoms); i++)
 405     copy_rvec(ems->s.x[i],state->x[i]);
 406   if (f != NULL)
 407     copy_rvec(ems->f[i],f[i]);
 408 }
 409
 410 static void write_em_traj(FILE *fplog,t_commrec *cr,
 411                           int fp_trn,bool bX,bool bF,char *confout,
 412                           gmx_mtop_t *top_global,
 413                           t_inputrec *ir,gmx_step_t step,
 414                           em_state_t *state,
 415                           t_state *state_global,rvec *f_global)
 416 {
 417     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 418     {
 419         f_global = state->f;
 420         copy_em_coords_back(state,state_global,bF ? f_global : NULL);
 421     }
 422
 423     write_traj(fplog,cr,fp_trn,bX,FALSE,bF,0,FALSE,0,NULL,FALSE,
 424                top_global,ir->eI,ir->simulation_part,step,(double)step,
 425                &state->s,state_global,state->f,f_global,NULL,NULL);
 426
 427     if (confout != NULL && MASTER(cr))
 428     {
 429         write_sto_conf_mtop(confout,
 430                             *top_global->name,top_global,
 431                             state_global->x,NULL,ir->ePBC,state_global->box);
 432     }
 433 }
 434
 435 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 436                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 437                        gmx_constr_t constr,gmx_localtop_t *top,
 438                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 439                        gmx_step_t count)
 440
 441 {
 442   t_state *s1,*s2;
 443   int  start,end,gf,i,m;
 444   rvec *x1,*x2;
 445   real dvdlambda;
 446
 447   s1 = &ems1->s;
 448   s2 = &ems2->s;
 449
 450   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 451     gmx_incons("state mismatch in do_em_step");
 452
 453   s2->flags = s1->flags;
 454
 455   if (s2->nalloc != s1->nalloc) {
 456     s2->nalloc = s1->nalloc;
 457     srenew(s2->x,s1->nalloc);
 458     srenew(ems2->f,  s1->nalloc);
 459     if (s2->flags & (1<<estCGP))
 460       srenew(s2->cg_p,  s1->nalloc);
 461   }
 462
 463   s2->natoms = s1->natoms;
 464   s2->lambda = s1->lambda;
 465   copy_mat(s1->box,s2->box);
 466
 467   start = md->start;
 468   end   = md->start + md->homenr;
 469
 470   x1 = s1->x;
 471   x2 = s2->x;
 472   gf = 0;
 473   for(i=start; i<end; i++) {
 474     if (md->cFREEZE)
 475       gf = md->cFREEZE[i];
 476     for(m=0; m<DIM; m++) {
 477       if (ir->opts.nFreeze[gf][m])
 478         x2[i][m] = x1[i][m];
 479       else
 480         x2[i][m] = x1[i][m] + a*f[i][m];
 481     }
 482   }
 483
 484   if (s2->flags & (1<<estCGP)) {
 485     /* Copy the CG p vector */
 486     x1 = s1->cg_p;
 487     x2 = s2->cg_p;
 488     for(i=start; i<end; i++)
 489       copy_rvec(x1[i],x2[i]);
 490   }
 491
 492   if (DOMAINDECOMP(cr)) {
 493     s2->ddp_count = s1->ddp_count;
 494     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 495       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 496       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 497     }
 498     s2->ncg_gl = s1->ncg_gl;
 499     for(i=0; i<s2->ncg_gl; i++)
 500       s2->cg_gl[i] = s1->cg_gl[i];
 501     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 502   }
 503
 504   if (constr) {
 505     wallcycle_start(wcycle,ewcCONSTR);
 506     dvdlambda = 0;
 507     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 508               ir,cr,count,0,md,
 509               s1->x,s2->x,NULL,s2->box,s2->lambda,
 510               &dvdlambda,NULL,NULL,nrnb,econqCoord);
 511     wallcycle_stop(wcycle,ewcCONSTR);
 512   }
 513 }
 514
 515 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
 516
 517 {
 518   int  start,end,i,m;
 519
 520   if (DOMAINDECOMP(cr)) {
 521     start = 0;
 522     end   = cr->dd->nat_home;
 523   } else if (PARTDECOMP(cr)) {
 524     pd_at_range(cr,&start,&end);
 525   } else {
 526     start = 0;
 527     end   = n;
 528   }
 529
 530   for(i=start; i<end; i++) {
 531     for(m=0; m<DIM; m++) {
 532       x2[i][m] = x1[i][m] + a*f[i][m];
 533     }
 534   }
 535 }
 536
 537 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
 538
 539 {
 540   int  start,end,i,m;
 541
 542   if (DOMAINDECOMP(cr)) {
 543     start = 0;
 544     end   = cr->dd->nat_home;
 545   } else if (PARTDECOMP(cr)) {
 546     pd_at_range(cr,&start,&end);
 547   } else {
 548     start = 0;
 549     end   = n;
 550   }
 551
 552   for(i=start; i<end; i++) {
 553     for(m=0; m<DIM; m++) {
 554       f[i][m] = (x1[i][m] - x2[i][m])*a;
 555     }
 556   }
 557 }
 558
 559 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 560                                    gmx_mtop_t *top_global,t_inputrec *ir,
 561                                    em_state_t *ems,gmx_localtop_t *top,
 562                                    t_mdatoms *mdatoms,t_forcerec *fr,
 563                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 564                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 565 {
 566   /* Repartition the domain decomposition */
 567   wallcycle_start(wcycle,ewcDOMDEC);
 568   dd_partition_system(fplog,step,cr,FALSE,
 569                       NULL,top_global,ir,
 570                       &ems->s,&ems->f,
 571                       mdatoms,top,fr,vsite,NULL,constr,
 572                       nrnb,wcycle,FALSE);
 573   dd_store_state(cr->dd,&ems->s);
 574   wallcycle_stop(wcycle,ewcDOMDEC);
 575 }
 576
 577 static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr,
 578                             t_state *state_global,gmx_mtop_t *top_global,
 579                             em_state_t *ems,gmx_localtop_t *top,
 580                             t_inputrec *inputrec,
 581                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 582                             gmx_global_stat_t gstat,
 583                             gmx_vsite_t *vsite,gmx_constr_t constr,
 584                             t_fcdata *fcd,
 585                             t_graph *graph,t_mdatoms *mdatoms,
 586                             t_forcerec *fr,rvec mu_tot,
 587                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 588                             gmx_step_t count,bool bFirst)
 589 {
 590   real t;
 591   bool bNS;
 592   int  nabnsb;
 593   tensor force_vir,shake_vir,ekin;
 594   real dvdl;
 595   real terminate=0;
 596
 597   /* Set the time to the initial time, the time does not change during EM */
 598   t = inputrec->init_t;
 599
 600   if (bFirst ||
 601       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 602     /* This the first state or an old state used before the last ns */
 603     bNS = TRUE;
 604   } else {
 605     bNS = FALSE;
 606     if (inputrec->nstlist > 0) {
 607       bNS = TRUE;
 608     } else if (inputrec->nstlist == -1) {
 609       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 610       if (PAR(cr))
 611         gmx_sumi(1,&nabnsb,cr);
 612       bNS = (nabnsb > 0);
 613     }
 614   }
 615
 616   if (vsite)
 617     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 618                      top->idef.iparams,top->idef.il,
 619                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 620
 621   if (DOMAINDECOMP(cr)) {
 622     if (bNS) {
 623       /* Repartition the domain decomposition */
 624       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 625                              ems,top,mdatoms,fr,vsite,constr,
 626                              nrnb,wcycle);
 627     }
 628   }
 629
 630     /* Calc force & energy on new trial position  */
 631     /* do_force always puts the charge groups in the box and shifts again
 632      * We do not unshift, so molecules are always whole in congrad.c
 633      */
 634     do_force(fplog,cr,inputrec,
 635              count,nrnb,wcycle,top,top_global,&top_global->groups,
 636              ems->s.box,ems->s.x,&ems->s.hist,
 637              ems->f,force_vir,mdatoms,enerd,fcd,
 638              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 639              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 640              (bNS ? GMX_FORCE_NS : 0));
 641
 642   /* Clear the unused shake virial and pressure */
 643   clear_mat(shake_vir);
 644   clear_mat(pres);
 645
 646   /* Calculate long range corrections to pressure and energy */
 647   calc_dispcorr(fplog,inputrec,fr,count,mdatoms->nr,ems->s.box,ems->s.lambda,
 648                 pres,force_vir,enerd);
 649
 650   /* Communicate stuff when parallel */
 651   if (PAR(cr)) {
 652     wallcycle_start(wcycle,ewcMoveE);
 653
 654     global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 655                 inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate,
 656                 top_global,&ems->s);
 657
 658     wallcycle_stop(wcycle,ewcMoveE);
 659   }
 660
 661   ems->epot = enerd->term[F_EPOT];
 662
 663   if (constr) {
 664     /* Project out the constraint components of the force */
 665     wallcycle_start(wcycle,ewcCONSTR);
 666     dvdl = 0;
 667     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 668               inputrec,cr,count,0,mdatoms,
 669               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 670               NULL,&shake_vir,nrnb,econqForceDispl);
 671     if (fr->bSepDVDL && fplog)
 672       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 673     enerd->term[F_DHDL_CON] += dvdl;
 674     m_add(force_vir,shake_vir,vir);
 675     wallcycle_stop(wcycle,ewcCONSTR);
 676   } else {
 677     copy_mat(force_vir,vir);
 678   }
 679
 680   clear_mat(ekin);
 681   enerd->term[F_PRES] =
 682     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 683               (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 684
 685   sum_dhdl(enerd,ems->s.lambda,inputrec);
 686
 687   get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 688 }
 689
 690 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 691                               gmx_mtop_t *mtop,
 692                               em_state_t *s_min,em_state_t *s_b)
 693 {
 694   rvec *fm,*fb,*fmg;
 695   t_block *cgs_gl;
 696   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 697   double partsum;
 698   unsigned char *grpnrFREEZE;
 699
 700   if (debug)
 701     fprintf(debug,"Doing reorder_partsum\n");
 702
 703   fm = s_min->f;
 704   fb = s_b->f;
 705
 706   cgs_gl = dd_charge_groups_global(cr->dd);
 707   index = cgs_gl->index;
 708
 709   /* Collect fm in a global vector fmg.
 710    * This conflics with the spirit of domain decomposition,
 711    * but to fully optimize this a much more complicated algorithm is required.
 712    */
 713   snew(fmg,mtop->natoms);
 714
 715   ncg   = s_min->s.ncg_gl;
 716   cg_gl = s_min->s.cg_gl;
 717   i = 0;
 718   for(c=0; c<ncg; c++) {
 719     cg = cg_gl[c];
 720     a0 = index[cg];
 721     a1 = index[cg+1];
 722     for(a=a0; a<a1; a++) {
 723       copy_rvec(fm[i],fmg[a]);
 724       i++;
 725     }
 726   }
 727   gmx_sum(mtop->natoms*3,fmg[0],cr);
 728
 729   /* Now we will determine the part of the sum for the cgs in state s_b */
 730   ncg   = s_b->s.ncg_gl;
 731   cg_gl = s_b->s.cg_gl;
 732   partsum = 0;
 733   i = 0;
 734   gf = 0;
 735   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 736   for(c=0; c<ncg; c++) {
 737     cg = cg_gl[c];
 738     a0 = index[cg];
 739     a1 = index[cg+1];
 740     for(a=a0; a<a1; a++) {
 741       if (mdatoms->cFREEZE && grpnrFREEZE) {
 742         gf = grpnrFREEZE[i];
 743       }
 744       for(m=0; m<DIM; m++) {
 745         if (!opts->nFreeze[gf][m]) {
 746           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 747         }
 748       }
 749       i++;
 750     }
 751   }
 752
 753   sfree(fmg);
 754
 755   return partsum;
 756 }
 757
 758 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 759                     gmx_mtop_t *mtop,
 760                     em_state_t *s_min,em_state_t *s_b)
 761 {
 762   rvec *fm,*fb;
 763   double sum;
 764   int  gf,i,m;
 765
 766   /* This is just the classical Polak-Ribiere calculation of beta;
 767    * it looks a bit complicated since we take freeze groups into account,
 768    * and might have to sum it in parallel runs.
 769    */
 770
 771   if (!DOMAINDECOMP(cr) ||
 772       (s_min->s.ddp_count == cr->dd->ddp_count &&
 773        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 774     fm = s_min->f;
 775     fb = s_b->f;
 776     sum = 0;
 777     gf = 0;
 778     /* This part of code can be incorrect with DD,
 779      * since the atom ordering in s_b and s_min might differ.
 780      */
 781     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 782       if (mdatoms->cFREEZE)
 783         gf = mdatoms->cFREEZE[i];
 784       for(m=0; m<DIM; m++)
 785         if (!opts->nFreeze[gf][m]) {
 786           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 787         }
 788     }
 789   } else {
 790     /* We need to reorder cgs while summing */
 791     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 792   }
 793   if (PAR(cr))
 794     gmx_sumd(1,&sum,cr);
 795
 796   return sum/sqr(s_min->fnorm);
 797 }
 798
 799 double do_cg(FILE *fplog,t_commrec *cr,
 800              int nfile,t_filenm fnm[],
 801              bool bVerbose,bool bCompact,
 802              gmx_vsite_t *vsite,gmx_constr_t constr,
 803              int stepout,
 804              t_inputrec *inputrec,
 805              gmx_mtop_t *top_global,t_fcdata *fcd,
 806              t_state *state_global,rvec f[],
 807              t_mdatoms *mdatoms,
 808              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 809              gmx_edsam_t ed,
 810              t_forcerec *fr,
 811              int repl_ex_nst,int repl_ex_seed,
 812              real cpt_period,real max_hours,
 813              unsigned long Flags,
 814              gmx_runtime_t *runtime)
 815 {
 816   const char *CG="Polak-Ribiere Conjugate Gradients";
 817
 818   em_state_t *s_min,*s_a,*s_b,*s_c;
 819   gmx_localtop_t *top;
 820   gmx_enerdata_t *enerd;
 821   gmx_global_stat_t gstat;
 822   t_graph    *graph;
 823   rvec   *f_global,*p,*sf,*sfm;
 824   double gpa,gpb,gpc,tmp,sum[2],minstep;
 825   real   fnormn;
 826   real   stepsize;
 827   real   a,b,c,beta=0.0;
 828   real   epot_repl=0;
 829   real   pnorm;
 830   t_mdebin   *mdebin;
 831   bool   converged,foundlower;
 832   rvec   mu_tot;
 833   bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 834   tensor vir,pres;
 835   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 836   int    fp_trn,fp_ene;
 837   int    i,m,gf,step,nminstep;
 838   real   terminate=0;
 839
 840   step=0;
 841
 842   s_min = init_em_state();
 843   s_a   = init_em_state();
 844   s_b   = init_em_state();
 845   s_c   = init_em_state();
 846
 847   /* Init em and store the local state in s_min */
 848   init_em(fplog,CG,cr,inputrec,
 849           state_global,top_global,s_min,&top,f,&f_global,
 850           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 851           nfile,fnm,&fp_trn,&fp_ene,&mdebin);
 852
 853   /* Print to log file */
 854   print_date_and_time(fplog,cr->nodeid,
 855                       "Started Polak-Ribiere Conjugate Gradients",NULL);
 856   wallcycle_start(wcycle,ewcRUN);
 857
 858   /* Max number of steps */
 859   number_steps=inputrec->nsteps;
 860
 861   if (MASTER(cr))
 862     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 863   if (fplog)
 864     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 865
 866   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 867   /* do_force always puts the charge groups in the box and shifts again
 868    * We do not unshift, so molecules are always whole in congrad.c
 869    */
 870   evaluate_energy(fplog,bVerbose,cr,
 871                   state_global,top_global,s_min,top,
 872                   inputrec,nrnb,wcycle,gstat,
 873                   vsite,constr,fcd,graph,mdatoms,fr,
 874                   mu_tot,enerd,vir,pres,-1,TRUE);
 875   where();
 876
 877   if (MASTER(cr)) {
 878     /* Copy stuff to the energy bin for easy printing etc. */
 879     upd_mdebin(mdebin,NULL,TRUE,(double)step,
 880                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 881                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 882
 883     print_ebin_header(fplog,step,step,s_min->s.lambda);
 884     print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 885                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 886   }
 887   where();
 888
 889   /* Estimate/guess the initial stepsize */
 890   stepsize = inputrec->em_stepsize/s_min->fnorm;
 891
 892   if (MASTER(cr)) {
 893     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 894             s_min->fmax,s_min->a_fmax+1);
 895     fprintf(stderr,"   F-Norm            = %12.5e\n",
 896             s_min->fnorm/sqrt(state_global->natoms));
 897     fprintf(stderr,"\n");
 898     /* and copy to the log file too... */
 899     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 900             s_min->fmax,s_min->a_fmax+1);
 901     fprintf(fplog,"   F-Norm            = %12.5e\n",
 902             s_min->fnorm/sqrt(state_global->natoms));
 903     fprintf(fplog,"\n");
 904   }
 905   /* Start the loop over CG steps.
 906    * Each successful step is counted, and we continue until
 907    * we either converge or reach the max number of steps.
 908    */
 909   for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
 910
 911     /* start taking steps in a new direction
 912      * First time we enter the routine, beta=0, and the direction is
 913      * simply the negative gradient.
 914      */
 915
 916     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 917     p  = s_min->s.cg_p;
 918     sf = s_min->f;
 919     gpa = 0;
 920     gf = 0;
 921     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 922       if (mdatoms->cFREEZE)
 923         gf = mdatoms->cFREEZE[i];
 924       for(m=0; m<DIM; m++) {
 925         if (!inputrec->opts.nFreeze[gf][m]) {
 926           p[i][m] = sf[i][m] + beta*p[i][m];
 927           gpa -= p[i][m]*sf[i][m];
 928           /* f is negative gradient, thus the sign */
 929         } else {
 930           p[i][m] = 0;
 931         }
 932       }
 933     }
 934
 935     /* Sum the gradient along the line across CPUs */
 936     if (PAR(cr))
 937       gmx_sumd(1,&gpa,cr);
 938
 939     /* Calculate the norm of the search vector */
 940     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 941
 942     /* Just in case stepsize reaches zero due to numerical precision... */
 943     if(stepsize<=0)
 944       stepsize = inputrec->em_stepsize/pnorm;
 945
 946     /*
 947      * Double check the value of the derivative in the search direction.
 948      * If it is positive it must be due to the old information in the
 949      * CG formula, so just remove that and start over with beta=0.
 950      * This corresponds to a steepest descent step.
 951      */
 952     if(gpa>0) {
 953       beta = 0;
 954       step--; /* Don't count this step since we are restarting */
 955       continue; /* Go back to the beginning of the big for-loop */
 956     }
 957
 958     /* Calculate minimum allowed stepsize, before the average (norm)
 959      * relative change in coordinate is smaller than precision
 960      */
 961     minstep=0;
 962     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 963       for(m=0; m<DIM; m++) {
 964         tmp = fabs(s_min->s.x[i][m]);
 965         if(tmp < 1.0)
 966           tmp = 1.0;
 967         tmp = p[i][m]/tmp;
 968         minstep += tmp*tmp;
 969       }
 970     }
 971     /* Add up from all CPUs */
 972     if(PAR(cr))
 973       gmx_sumd(1,&minstep,cr);
 974
 975     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 976
 977     if(stepsize<minstep) {
 978       converged=TRUE;
 979       break;
 980     }
 981
 982     /* Write coordinates if necessary */
 983     do_x = do_per_step(step,inputrec->nstxout);
 984     do_f = do_per_step(step,inputrec->nstfout);
 985
 986     write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
 987                   top_global,inputrec,step,
 988                   s_min,state_global,f_global);
 989
 990     /* Take a step downhill.
 991      * In theory, we should minimize the function along this direction.
 992      * That is quite possible, but it turns out to take 5-10 function evaluations
 993      * for each line. However, we dont really need to find the exact minimum -
 994      * it is much better to start a new CG step in a modified direction as soon
 995      * as we are close to it. This will save a lot of energy evaluations.
 996      *
 997      * In practice, we just try to take a single step.
 998      * If it worked (i.e. lowered the energy), we increase the stepsize but
 999      * the continue straight to the next CG step without trying to find any minimum.
1000      * If it didn't work (higher energy), there must be a minimum somewhere between
1001      * the old position and the new one.
1002      *
1003      * Due to the finite numerical accuracy, it turns out that it is a good idea
1004      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1005      * This leads to lower final energies in the tests I've done. / Erik
1006      */
1007     s_a->epot = s_min->epot;
1008     a = 0.0;
1009     c = a + stepsize; /* reference position along line is zero */
1010
1011     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1012       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1013                              s_min,top,mdatoms,fr,vsite,constr,
1014                              nrnb,wcycle);
1015     }
1016
1017     /* Take a trial step (new coords in s_c) */
1018     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1019                constr,top,nrnb,wcycle,-1);
1020
1021     neval++;
1022     /* Calculate energy for the trial step */
1023     evaluate_energy(fplog,bVerbose,cr,
1024                     state_global,top_global,s_c,top,
1025                     inputrec,nrnb,wcycle,gstat,
1026                     vsite,constr,fcd,graph,mdatoms,fr,
1027                     mu_tot,enerd,vir,pres,-1,FALSE);
1028
1029     /* Calc derivative along line */
1030     p  = s_c->s.cg_p;
1031     sf = s_c->f;
1032     gpc=0;
1033     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1034       for(m=0; m<DIM; m++)
1035           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1036     }
1037     /* Sum the gradient along the line across CPUs */
1038     if (PAR(cr))
1039       gmx_sumd(1,&gpc,cr);
1040
1041     /* This is the max amount of increase in energy we tolerate */
1042     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1043
1044     /* Accept the step if the energy is lower, or if it is not significantly higher
1045      * and the line derivative is still negative.
1046      */
1047     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1048       foundlower = TRUE;
1049       /* Great, we found a better energy. Increase step for next iteration
1050        * if we are still going down, decrease it otherwise
1051        */
1052       if(gpc<0)
1053         stepsize *= 1.618034;  /* The golden section */
1054       else
1055         stepsize *= 0.618034;  /* 1/golden section */
1056     } else {
1057       /* New energy is the same or higher. We will have to do some work
1058        * to find a smaller value in the interval. Take smaller step next time!
1059        */
1060       foundlower = FALSE;
1061       stepsize *= 0.618034;
1062     }
1063
1064
1065
1066
1067     /* OK, if we didn't find a lower value we will have to locate one now - there must
1068      * be one in the interval [a=0,c].
1069      * The same thing is valid here, though: Don't spend dozens of iterations to find
1070      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1071      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1072      *
1073      * I also have a safeguard for potentially really patological functions so we never
1074      * take more than 20 steps before we give up ...
1075      *
1076      * If we already found a lower value we just skip this step and continue to the update.
1077      */
1078     if (!foundlower) {
1079       nminstep=0;
1080
1081       do {
1082         /* Select a new trial point.
1083          * If the derivatives at points a & c have different sign we interpolate to zero,
1084          * otherwise just do a bisection.
1085          */
1086         if(gpa<0 && gpc>0)
1087           b = a + gpa*(a-c)/(gpc-gpa);
1088         else
1089           b = 0.5*(a+c);
1090
1091         /* safeguard if interpolation close to machine accuracy causes errors:
1092          * never go outside the interval
1093          */
1094         if(b<=a || b>=c)
1095           b = 0.5*(a+c);
1096
1097         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1098           /* Reload the old state */
1099           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1100                                  s_min,top,mdatoms,fr,vsite,constr,
1101                                  nrnb,wcycle);
1102         }
1103
1104         /* Take a trial step to this new point - new coords in s_b */
1105         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1106                    constr,top,nrnb,wcycle,-1);
1107
1108         neval++;
1109         /* Calculate energy for the trial step */
1110         evaluate_energy(fplog,bVerbose,cr,
1111                         state_global,top_global,s_b,top,
1112                         inputrec,nrnb,wcycle,gstat,
1113                         vsite,constr,fcd,graph,mdatoms,fr,
1114                         mu_tot,enerd,vir,pres,-1,FALSE);
1115
1116         /* p does not change within a step, but since the domain decomposition
1117          * might change, we have to use cg_p of s_b here.
1118          */
1119         p  = s_b->s.cg_p;
1120         sf = s_b->f;
1121         gpb=0;
1122         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1123           for(m=0; m<DIM; m++)
1124               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1125         }
1126         /* Sum the gradient along the line across CPUs */
1127         if (PAR(cr))
1128           gmx_sumd(1,&gpb,cr);
1129
1130         if (debug)
1131           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1132                   s_a->epot,s_b->epot,s_c->epot,gpb);
1133
1134         epot_repl = s_b->epot;
1135
1136         /* Keep one of the intervals based on the value of the derivative at the new point */
1137         if (gpb > 0) {
1138           /* Replace c endpoint with b */
1139           swap_em_state(s_b,s_c);
1140           c = b;
1141           gpc = gpb;
1142         } else {
1143           /* Replace a endpoint with b */
1144           swap_em_state(s_b,s_a);
1145           a = b;
1146           gpa = gpb;
1147         }
1148
1149         /*
1150          * Stop search as soon as we find a value smaller than the endpoints.
1151          * Never run more than 20 steps, no matter what.
1152          */
1153         nminstep++;
1154       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1155                (nminstep < 20));
1156
1157       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1158           nminstep >= 20) {
1159         /* OK. We couldn't find a significantly lower energy.
1160          * If beta==0 this was steepest descent, and then we give up.
1161          * If not, set beta=0 and restart with steepest descent before quitting.
1162          */
1163         if (beta == 0.0) {
1164           /* Converged */
1165           converged = TRUE;
1166           break;
1167         } else {
1168           /* Reset memory before giving up */
1169           beta = 0.0;
1170           continue;
1171         }
1172       }
1173
1174       /* Select min energy state of A & C, put the best in B.
1175        */
1176       if (s_c->epot < s_a->epot) {
1177         if (debug)
1178           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1179                   s_c->epot,s_a->epot);
1180         swap_em_state(s_b,s_c);
1181         gpb = gpc;
1182         b = c;
1183       } else {
1184         if (debug)
1185           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1186                   s_a->epot,s_c->epot);
1187         swap_em_state(s_b,s_a);
1188         gpb = gpa;
1189         b = a;
1190       }
1191
1192     } else {
1193       if (debug)
1194         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1195                 s_c->epot);
1196       swap_em_state(s_b,s_c);
1197       gpb = gpc;
1198       b = c;
1199     }
1200
1201     /* new search direction */
1202     /* beta = 0 means forget all memory and restart with steepest descents. */
1203     if (nstcg && ((step % nstcg)==0))
1204       beta = 0.0;
1205     else {
1206       /* s_min->fnorm cannot be zero, because then we would have converged
1207        * and broken out.
1208        */
1209
1210       /* Polak-Ribiere update.
1211        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1212        */
1213       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1214     }
1215     /* Limit beta to prevent oscillations */
1216     if (fabs(beta) > 5.0)
1217       beta = 0.0;
1218
1219
1220     /* update positions */
1221     swap_em_state(s_min,s_b);
1222     gpa = gpb;
1223
1224     /* Print it if necessary */
1225     if (MASTER(cr)) {
1226       if(bVerbose)
1227         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1228                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1229                 s_min->fmax,s_min->a_fmax+1);
1230       /* Store the new (lower) energies */
1231       upd_mdebin(mdebin,NULL,TRUE,(double)step,
1232                  mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1233                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1234       do_log = do_per_step(step,inputrec->nstlog);
1235       do_ene = do_per_step(step,inputrec->nstenergy);
1236       if(do_log)
1237         print_ebin_header(fplog,step,step,s_min->s.lambda);
1238       print_ebin(fp_ene,do_ene,FALSE,FALSE,
1239                  do_log ? fplog : NULL,step,step,eprNORMAL,
1240                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1241     }
1242
1243     /* Stop when the maximum force lies below tolerance.
1244      * If we have reached machine precision, converged is already set to true.
1245      */
1246     converged = converged || (s_min->fmax < inputrec->em_tol);
1247
1248   } /* End of the loop */
1249
1250   if (converged)
1251     step--; /* we never took that last step in this case */
1252
1253   if (s_min->fmax > inputrec->em_tol) {
1254     if (MASTER(cr)) {
1255       warn_step(stderr,inputrec->em_tol,FALSE);
1256       warn_step(fplog,inputrec->em_tol,FALSE);
1257     }
1258     converged = FALSE;
1259   }
1260
1261   if (MASTER(cr)) {
1262     /* If we printed energy and/or logfile last step (which was the last step)
1263      * we don't have to do it again, but otherwise print the final values.
1264      */
1265     if(!do_log) {
1266       /* Write final value to log since we didn't do anything the last step */
1267       print_ebin_header(fplog,step,step,s_min->s.lambda);
1268     }
1269     if (!do_ene || !do_log) {
1270       /* Write final energy file entries */
1271       print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1272                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1273                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1274     }
1275   }
1276
1277   /* Print some stuff... */
1278   if (MASTER(cr))
1279     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1280
1281   /* IMPORTANT!
1282    * For accurate normal mode calculation it is imperative that we
1283    * store the last conformation into the full precision binary trajectory.
1284    *
1285    * However, we should only do it if we did NOT already write this step
1286    * above (which we did if do_x or do_f was true).
1287    */
1288   do_x = !do_per_step(step,inputrec->nstxout);
1289   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1290
1291   write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1292                 top_global,inputrec,step,
1293                 s_min,state_global,f_global);
1294
1295   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1296
1297   if (MASTER(cr)) {
1298     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1299                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1300     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1301                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1302
1303     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1304   }
1305
1306   finish_em(fplog,cr,fp_trn,fp_ene);
1307
1308   /* To print the actual number of steps we needed somewhere */
1309   runtime->nsteps_done = step;
1310
1311   return 0;
1312 } /* That's all folks */
1313
1314
1315 double do_lbfgs(FILE *fplog,t_commrec *cr,
1316                 int nfile,t_filenm fnm[],
1317                 bool bVerbose,bool bCompact,
1318                 gmx_vsite_t *vsite,gmx_constr_t constr,
1319                 int stepout,
1320                 t_inputrec *inputrec,
1321                 gmx_mtop_t *top_global,t_fcdata *fcd,
1322                 t_state *state,rvec f[],
1323                 t_mdatoms *mdatoms,
1324                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1325                 gmx_edsam_t ed,
1326                 t_forcerec *fr,
1327                 int repl_ex_nst,int repl_ex_seed,
1328                 real cpt_period,real max_hours,
1329                 unsigned long Flags,
1330                 gmx_runtime_t *runtime)
1331 {
1332   static const char *LBFGS="Low-Memory BFGS Minimizer";
1333   em_state_t ems;
1334   gmx_localtop_t *top;
1335   gmx_enerdata_t *enerd;
1336   gmx_global_stat_t gstat;
1337   t_graph    *graph;
1338   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1339   double stepsize,gpa,gpb,gpc,tmp,minstep;
1340   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1341   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1342   real   a,b,c,maxdelta,delta;
1343   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1344   real   dgdx,dgdg,sq,yr,beta;
1345   t_mdebin   *mdebin;
1346   bool   converged,first;
1347   rvec   mu_tot;
1348   real   fnorm,fmax;
1349   bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1350   tensor vir,pres;
1351   int    fp_trn,fp_ene,start,end,number_steps;
1352   int    i,k,m,n,nfmax,gf,step;
1353   /* not used */
1354   real   terminate;
1355
1356   if (PAR(cr))
1357     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1358
1359   n = 3*state->natoms;
1360   nmaxcorr = inputrec->nbfgscorr;
1361
1362   /* Allocate memory */
1363   snew(f,state->natoms);
1364   /* Use pointers to real so we dont have to loop over both atoms and
1365    * dimensions all the time...
1366    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1367    * that point to the same memory.
1368    */
1369   snew(xa,n);
1370   snew(xb,n);
1371   snew(xc,n);
1372   snew(fa,n);
1373   snew(fb,n);
1374   snew(fc,n);
1375   snew(frozen,n);
1376
1377   xx = (real *)state->x;
1378   ff = (real *)f;
1379
1380         printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]);
1381
1382   snew(p,n);
1383   snew(lastx,n);
1384   snew(lastf,n);
1385   snew(rho,nmaxcorr);
1386   snew(alpha,nmaxcorr);
1387
1388   snew(dx,nmaxcorr);
1389   for(i=0;i<nmaxcorr;i++)
1390     snew(dx[i],n);
1391
1392   snew(dg,nmaxcorr);
1393   for(i=0;i<nmaxcorr;i++)
1394     snew(dg[i],n);
1395
1396   step = 0;
1397   neval = 0;
1398
1399   /* Init em */
1400   init_em(fplog,LBFGS,cr,inputrec,
1401           state,top_global,&ems,&top,f,&f,
1402           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1403           nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1404   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1405    * so we free some memory again.
1406    */
1407   sfree(ems.s.x);
1408   sfree(ems.f);
1409
1410   start = mdatoms->start;
1411   end   = mdatoms->homenr + start;
1412
1413   /* Print to log file */
1414   print_date_and_time(fplog,cr->nodeid,
1415                       "Started Low-Memory BFGS Minimization",NULL);
1416   wallcycle_start(wcycle,ewcRUN);
1417
1418   do_log = do_ene = do_x = do_f = TRUE;
1419
1420   /* Max number of steps */
1421   number_steps=inputrec->nsteps;
1422
1423   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1424   gf = 0;
1425   for(i=start; i<end; i++) {
1426     if (mdatoms->cFREEZE)
1427       gf = mdatoms->cFREEZE[i];
1428      for(m=0; m<DIM; m++)
1429        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1430   }
1431   if (MASTER(cr))
1432     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1433   if (fplog)
1434     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1435
1436   if (vsite)
1437     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1438                      top->idef.iparams,top->idef.il,
1439                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1440
1441   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1442   /* do_force always puts the charge groups in the box and shifts again
1443    * We do not unshift, so molecules are always whole
1444    */
1445   neval++;
1446   ems.s.x = state->x;
1447   ems.f = f;
1448   evaluate_energy(fplog,bVerbose,cr,
1449                   state,top_global,&ems,top,
1450                   inputrec,nrnb,wcycle,gstat,
1451                   vsite,constr,fcd,graph,mdatoms,fr,
1452                   mu_tot,enerd,vir,pres,-1,TRUE);
1453   where();
1454
1455   if (MASTER(cr)) {
1456     /* Copy stuff to the energy bin for easy printing etc. */
1457     upd_mdebin(mdebin,NULL,TRUE,(double)step,
1458                mdatoms->tmass,enerd,state,state->box,
1459                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1460
1461     print_ebin_header(fplog,step,step,state->lambda);
1462     print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1463                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1464   }
1465   where();
1466
1467   /* This is the starting energy */
1468   Epot = enerd->term[F_EPOT];
1469
1470   fnorm = ems.fnorm;
1471   fmax  = ems.fmax;
1472   nfmax = ems.a_fmax;
1473
1474   /* Set the initial step.
1475    * since it will be multiplied by the non-normalized search direction
1476    * vector (force vector the first time), we scale it by the
1477    * norm of the force.
1478    */
1479
1480   if (MASTER(cr)) {
1481     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1482     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1483     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1484     fprintf(stderr,"\n");
1485     /* and copy to the log file too... */
1486     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1487     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1488     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1489     fprintf(fplog,"\n");
1490   }
1491
1492   point=0;
1493   for(i=0;i<n;i++)
1494     if(!frozen[i])
1495       dx[point][i] = ff[i];  /* Initial search direction */
1496     else
1497       dx[point][i] = 0;
1498
1499   stepsize = 1.0/fnorm;
1500   converged = FALSE;
1501
1502   /* Start the loop over BFGS steps.
1503    * Each successful step is counted, and we continue until
1504    * we either converge or reach the max number of steps.
1505    */
1506
1507   ncorr=0;
1508
1509   /* Set the gradient from the force */
1510   for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
1511
1512     /* Write coordinates if necessary */
1513     do_x = do_per_step(step,inputrec->nstxout);
1514     do_f = do_per_step(step,inputrec->nstfout);
1515
1516     write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
1517                top_global,inputrec->eI,inputrec->simulation_part,
1518                step,(real)step,state,state,f,f,NULL,NULL);
1519
1520     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1521
1522     /* pointer to current direction - point=0 first time here */
1523     s=dx[point];
1524
1525     /* calculate line gradient */
1526     for(gpa=0,i=0;i<n;i++)
1527         gpa-=s[i]*ff[i];
1528
1529     /* Calculate minimum allowed stepsize, before the average (norm)
1530      * relative change in coordinate is smaller than precision
1531      */
1532     for(minstep=0,i=0;i<n;i++) {
1533       tmp=fabs(xx[i]);
1534       if(tmp<1.0)
1535         tmp=1.0;
1536       tmp = s[i]/tmp;
1537       minstep += tmp*tmp;
1538     }
1539     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1540
1541     if(stepsize<minstep) {
1542       converged=TRUE;
1543       break;
1544     }
1545
1546     /* Store old forces and coordinates */
1547     for(i=0;i<n;i++) {
1548       lastx[i]=xx[i];
1549       lastf[i]=ff[i];
1550     }
1551     Epot0=Epot;
1552
1553     first=TRUE;
1554
1555     for(i=0;i<n;i++)
1556       xa[i]=xx[i];
1557
1558     /* Take a step downhill.
1559      * In theory, we should minimize the function along this direction.
1560      * That is quite possible, but it turns out to take 5-10 function evaluations
1561      * for each line. However, we dont really need to find the exact minimum -
1562      * it is much better to start a new BFGS step in a modified direction as soon
1563      * as we are close to it. This will save a lot of energy evaluations.
1564      *
1565      * In practice, we just try to take a single step.
1566      * If it worked (i.e. lowered the energy), we increase the stepsize but
1567      * the continue straight to the next BFGS step without trying to find any minimum.
1568      * If it didn't work (higher energy), there must be a minimum somewhere between
1569      * the old position and the new one.
1570      *
1571      * Due to the finite numerical accuracy, it turns out that it is a good idea
1572      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1573      * This leads to lower final energies in the tests I've done. / Erik
1574      */
1575     foundlower=FALSE;
1576     EpotA = Epot0;
1577     a = 0.0;
1578     c = a + stepsize; /* reference position along line is zero */
1579
1580     /* Check stepsize first. We do not allow displacements
1581      * larger than emstep.
1582      */
1583     do {
1584       c = a + stepsize;
1585       maxdelta=0;
1586       for(i=0;i<n;i++) {
1587         delta=c*s[i];
1588         if(delta>maxdelta)
1589           maxdelta=delta;
1590       }
1591       if(maxdelta>inputrec->em_stepsize)
1592         stepsize*=0.1;
1593     } while(maxdelta>inputrec->em_stepsize);
1594
1595     /* Take a trial step */
1596     for (i=0; i<n; i++)
1597       xc[i] = lastx[i] + c*s[i];
1598
1599     neval++;
1600     /* Calculate energy for the trial step */
1601     ems.s.x = (rvec *)xc;
1602     ems.f   = (rvec *)fc;
1603     evaluate_energy(fplog,bVerbose,cr,
1604                     state,top_global,&ems,top,
1605                     inputrec,nrnb,wcycle,gstat,
1606                     vsite,constr,fcd,graph,mdatoms,fr,
1607                     mu_tot,enerd,vir,pres,step,FALSE);
1608     EpotC = ems.epot;
1609
1610     /* Calc derivative along line */
1611     for(gpc=0,i=0; i<n; i++) {
1612         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1613     }
1614     /* Sum the gradient along the line across CPUs */
1615     if (PAR(cr))
1616       gmx_sumd(1,&gpc,cr);
1617
1618      /* This is the max amount of increase in energy we tolerate */
1619    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1620
1621     /* Accept the step if the energy is lower, or if it is not significantly higher
1622      * and the line derivative is still negative.
1623      */
1624     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1625       foundlower = TRUE;
1626       /* Great, we found a better energy. Increase step for next iteration
1627        * if we are still going down, decrease it otherwise
1628        */
1629       if(gpc<0)
1630         stepsize *= 1.618034;  /* The golden section */
1631       else
1632         stepsize *= 0.618034;  /* 1/golden section */
1633     } else {
1634       /* New energy is the same or higher. We will have to do some work
1635        * to find a smaller value in the interval. Take smaller step next time!
1636        */
1637       foundlower = FALSE;
1638       stepsize *= 0.618034;
1639     }
1640
1641     /* OK, if we didn't find a lower value we will have to locate one now - there must
1642      * be one in the interval [a=0,c].
1643      * The same thing is valid here, though: Don't spend dozens of iterations to find
1644      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1645      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1646      *
1647      * I also have a safeguard for potentially really patological functions so we never
1648      * take more than 20 steps before we give up ...
1649      *
1650      * If we already found a lower value we just skip this step and continue to the update.
1651      */
1652
1653     if(!foundlower) {
1654
1655       nminstep=0;
1656       do {
1657         /* Select a new trial point.
1658          * If the derivatives at points a & c have different sign we interpolate to zero,
1659          * otherwise just do a bisection.
1660          */
1661
1662         if(gpa<0 && gpc>0)
1663           b = a + gpa*(a-c)/(gpc-gpa);
1664         else
1665           b = 0.5*(a+c);
1666
1667         /* safeguard if interpolation close to machine accuracy causes errors:
1668          * never go outside the interval
1669          */
1670         if(b<=a || b>=c)
1671           b = 0.5*(a+c);
1672
1673         /* Take a trial step */
1674         for (i=0; i<n; i++)
1675           xb[i] = lastx[i] + b*s[i];
1676
1677         neval++;
1678         /* Calculate energy for the trial step */
1679         ems.s.x = (rvec *)xb;
1680         ems.f   = (rvec *)fb;
1681         evaluate_energy(fplog,bVerbose,cr,
1682                         state,top_global,&ems,top,
1683                         inputrec,nrnb,wcycle,gstat,
1684                         vsite,constr,fcd,graph,mdatoms,fr,
1685                         mu_tot,enerd,vir,pres,step,FALSE);
1686         EpotB = ems.epot;
1687
1688         fnorm = ems.fnorm;
1689
1690         for(gpb=0,i=0; i<n; i++)
1691           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1692
1693         /* Sum the gradient along the line across CPUs */
1694         if (PAR(cr))
1695           gmx_sumd(1,&gpb,cr);
1696
1697         /* Keep one of the intervals based on the value of the derivative at the new point */
1698         if(gpb>0) {
1699           /* Replace c endpoint with b */
1700           EpotC = EpotB;
1701           c = b;
1702           gpc = gpb;
1703           /* swap coord pointers b/c */
1704           xtmp = xb;
1705           ftmp = fb;
1706           xb = xc;
1707           fb = fc;
1708           xc = xtmp;
1709           fc = ftmp;
1710         } else {
1711           /* Replace a endpoint with b */
1712           EpotA = EpotB;
1713           a = b;
1714           gpa = gpb;
1715           /* swap coord pointers a/b */
1716           xtmp = xb;
1717           ftmp = fb;
1718           xb = xa;
1719           fb = fa;
1720           xa = xtmp;
1721           fa = ftmp;
1722         }
1723
1724         /*
1725          * Stop search as soon as we find a value smaller than the endpoints,
1726          * or if the tolerance is below machine precision.
1727          * Never run more than 20 steps, no matter what.
1728          */
1729         nminstep++;
1730       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1731
1732       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1733         /* OK. We couldn't find a significantly lower energy.
1734          * If ncorr==0 this was steepest descent, and then we give up.
1735          * If not, reset memory to restart as steepest descent before quitting.
1736          */
1737         if(ncorr==0) {
1738         /* Converged */
1739           converged=TRUE;
1740           break;
1741         } else {
1742           /* Reset memory */
1743           ncorr=0;
1744           /* Search in gradient direction */
1745           for(i=0;i<n;i++)
1746             dx[point][i]=ff[i];
1747           /* Reset stepsize */
1748           stepsize = 1.0/fnorm;
1749           continue;
1750         }
1751       }
1752
1753       /* Select min energy state of A & C, put the best in xx/ff/Epot
1754        */
1755       if(EpotC<EpotA) {
1756         Epot = EpotC;
1757         /* Use state C */
1758         for(i=0;i<n;i++) {
1759           xx[i]=xc[i];
1760           ff[i]=fc[i];
1761         }
1762         stepsize=c;
1763       } else {
1764         Epot = EpotA;
1765         /* Use state A */
1766         for(i=0;i<n;i++) {
1767           xx[i]=xa[i];
1768           ff[i]=fa[i];
1769         }
1770         stepsize=a;
1771       }
1772
1773     } else {
1774       /* found lower */
1775       Epot = EpotC;
1776       /* Use state C */
1777       for(i=0;i<n;i++) {
1778         xx[i]=xc[i];
1779         ff[i]=fc[i];
1780       }
1781       stepsize=c;
1782     }
1783
1784     /* Update the memory information, and calculate a new
1785      * approximation of the inverse hessian
1786      */
1787
1788     /* Have new data in Epot, xx, ff */
1789     if(ncorr<nmaxcorr)
1790       ncorr++;
1791
1792     for(i=0;i<n;i++) {
1793       dg[point][i]=lastf[i]-ff[i];
1794       dx[point][i]*=stepsize;
1795     }
1796
1797     dgdg=0;
1798     dgdx=0;
1799     for(i=0;i<n;i++) {
1800       dgdg+=dg[point][i]*dg[point][i];
1801       dgdx+=dg[point][i]*dx[point][i];
1802     }
1803
1804     diag=dgdx/dgdg;
1805
1806     rho[point]=1.0/dgdx;
1807     point++;
1808
1809     if(point>=nmaxcorr)
1810       point=0;
1811
1812     /* Update */
1813     for(i=0;i<n;i++)
1814       p[i]=ff[i];
1815
1816     cp=point;
1817
1818     /* Recursive update. First go back over the memory points */
1819     for(k=0;k<ncorr;k++) {
1820       cp--;
1821       if(cp<0)
1822         cp=ncorr-1;
1823
1824       sq=0;
1825       for(i=0;i<n;i++)
1826         sq+=dx[cp][i]*p[i];
1827
1828       alpha[cp]=rho[cp]*sq;
1829
1830       for(i=0;i<n;i++)
1831         p[i] -= alpha[cp]*dg[cp][i];
1832     }
1833
1834     for(i=0;i<n;i++)
1835       p[i] *= diag;
1836
1837     /* And then go forward again */
1838     for(k=0;k<ncorr;k++) {
1839       yr = 0;
1840       for(i=0;i<n;i++)
1841         yr += p[i]*dg[cp][i];
1842
1843       beta = rho[cp]*yr;
1844       beta = alpha[cp]-beta;
1845
1846       for(i=0;i<n;i++)
1847         p[i] += beta*dx[cp][i];
1848
1849       cp++;
1850       if(cp>=ncorr)
1851         cp=0;
1852     }
1853
1854     for(i=0;i<n;i++)
1855       if(!frozen[i])
1856         dx[point][i] = p[i];
1857       else
1858         dx[point][i] = 0;
1859
1860     stepsize=1.0;
1861
1862     /* Test whether the convergence criterion is met */
1863     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1864
1865     /* Print it if necessary */
1866     if (MASTER(cr)) {
1867       if(bVerbose)
1868         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1869                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1870       /* Store the new (lower) energies */
1871       upd_mdebin(mdebin,NULL,TRUE,(double)step,
1872                  mdatoms->tmass,enerd,state,state->box,
1873                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1874       do_log = do_per_step(step,inputrec->nstlog);
1875       do_ene = do_per_step(step,inputrec->nstenergy);
1876       if(do_log)
1877         print_ebin_header(fplog,step,step,state->lambda);
1878       print_ebin(fp_ene,do_ene,FALSE,FALSE,
1879                  do_log ? fplog : NULL,step,step,eprNORMAL,
1880                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1881     }
1882
1883     /* Stop when the maximum force lies below tolerance.
1884      * If we have reached machine precision, converged is already set to true.
1885      */
1886
1887     converged = converged || (fmax < inputrec->em_tol);
1888
1889   } /* End of the loop */
1890
1891   if(converged)
1892     step--; /* we never took that last step in this case */
1893
1894   if(fmax>inputrec->em_tol) {
1895     if (MASTER(cr)) {
1896       warn_step(stderr,inputrec->em_tol,FALSE);
1897       warn_step(fplog,inputrec->em_tol,FALSE);
1898     }
1899     converged = FALSE;
1900   }
1901
1902   /* If we printed energy and/or logfile last step (which was the last step)
1903    * we don't have to do it again, but otherwise print the final values.
1904    */
1905   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1906     print_ebin_header(fplog,step,step,state->lambda);
1907   if(!do_ene || !do_log) /* Write final energy file entries */
1908     print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1909                !do_log ? fplog : NULL,step,step,eprNORMAL,
1910                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1911
1912   /* Print some stuff... */
1913   if (MASTER(cr))
1914     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1915
1916   /* IMPORTANT!
1917    * For accurate normal mode calculation it is imperative that we
1918    * store the last conformation into the full precision binary trajectory.
1919    *
1920    * However, we should only do it if we did NOT already write this step
1921    * above (which we did if do_x or do_f was true).
1922    */
1923   do_x = !do_per_step(step,inputrec->nstxout);
1924   do_f = !do_per_step(step,inputrec->nstfout);
1925   write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1926                 top_global,inputrec,step,
1927                 &ems,state,f);
1928
1929   if (MASTER(cr)) {
1930     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1931                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1932     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1933                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1934
1935     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1936   }
1937
1938   finish_em(fplog,cr,fp_trn,fp_ene);
1939
1940   /* To print the actual number of steps we needed somewhere */
1941   runtime->nsteps_done = step;
1942
1943   return 0;
1944 } /* That's all folks */
1945
1946
1947 double do_steep(FILE *fplog,t_commrec *cr,
1948                 int nfile,t_filenm fnm[],
1949                 bool bVerbose,bool bCompact,
1950                 gmx_vsite_t *vsite,gmx_constr_t constr,
1951                 int stepout,
1952                 t_inputrec *inputrec,
1953                 gmx_mtop_t *top_global,t_fcdata *fcd,
1954                 t_state *state_global,rvec f[],
1955                 t_mdatoms *mdatoms,
1956                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1957                 gmx_edsam_t ed,
1958                 t_forcerec *fr,
1959                 int repl_ex_nst,int repl_ex_seed,
1960                 real cpt_period,real max_hours,
1961                 unsigned long Flags,
1962                 gmx_runtime_t *runtime)
1963 {
1964   const char *SD="Steepest Descents";
1965   em_state_t *s_min,*s_try;
1966   rvec       *f_global;
1967   gmx_localtop_t *top;
1968   gmx_enerdata_t *enerd;
1969   gmx_global_stat_t gstat;
1970   t_graph    *graph;
1971   real   stepsize,constepsize;
1972   real   ustep,dvdlambda,fnormn;
1973   int        fp_trn,fp_ene;
1974   t_mdebin   *mdebin;
1975   bool   bDone,bAbort,do_x,do_f;
1976   tensor vir,pres;
1977   rvec   mu_tot;
1978   int    nsteps;
1979   int    count=0;
1980   int    steps_accepted=0;
1981   /* not used */
1982   real   terminate=0;
1983
1984   s_min = init_em_state();
1985   s_try = init_em_state();
1986
1987   /* Init em and store the local state in s_try */
1988   init_em(fplog,SD,cr,inputrec,
1989           state_global,top_global,s_try,&top,f,&f_global,
1990           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1991           nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1992
1993   /* Print to log file  */
1994   print_date_and_time(fplog,cr->nodeid,"Started Steepest Descents",NULL);
1995   wallcycle_start(wcycle,ewcRUN);
1996
1997   /* Set variables for stepsize (in nm). This is the largest
1998    * step that we are going to make in any direction.
1999    */
2000   ustep = inputrec->em_stepsize;
2001   stepsize = 0;
2002
2003   /* Max number of steps  */
2004   nsteps = inputrec->nsteps;
2005
2006   if (MASTER(cr))
2007     /* Print to the screen  */
2008     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2009   if (fplog)
2010     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2011
2012   /**** HERE STARTS THE LOOP ****
2013    * count is the counter for the number of steps
2014    * bDone will be TRUE when the minimization has converged
2015    * bAbort will be TRUE when nsteps steps have been performed or when
2016    * the stepsize becomes smaller than is reasonable for machine precision
2017    */
2018   count  = 0;
2019   bDone  = FALSE;
2020   bAbort = FALSE;
2021   while( !bDone && !bAbort ) {
2022     bAbort = (nsteps > 0) && (count==nsteps);
2023
2024     /* set new coordinates, except for first step */
2025     if (count > 0) {
2026       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2027                  constr,top,nrnb,wcycle,count);
2028     }
2029
2030     evaluate_energy(fplog,bVerbose,cr,
2031                     state_global,top_global,s_try,top,
2032                     inputrec,nrnb,wcycle,gstat,
2033                     vsite,constr,fcd,graph,mdatoms,fr,
2034                     mu_tot,enerd,vir,pres,count,count==0);
2035
2036     if (MASTER(cr))
2037       print_ebin_header(fplog,count,count,s_try->s.lambda);
2038
2039     if (count == 0)
2040       s_min->epot = s_try->epot + 1;
2041
2042     /* Print it if necessary  */
2043     if (MASTER(cr)) {
2044       if (bVerbose) {
2045         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2046                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2047                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2048       }
2049
2050       if (s_try->epot < s_min->epot) {
2051         /* Store the new (lower) energies  */
2052         upd_mdebin(mdebin,NULL,TRUE,(double)count,
2053                    mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2054                    NULL,NULL,vir,pres,NULL,mu_tot,constr);
2055         print_ebin(fp_ene,TRUE,
2056                    do_per_step(steps_accepted,inputrec->nstdisreout),
2057                    do_per_step(steps_accepted,inputrec->nstorireout),
2058                    fplog,count,count,eprNORMAL,TRUE,
2059                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2060         fflush(fplog);
2061       }
2062     }
2063
2064     /* Now if the new energy is smaller than the previous...
2065      * or if this is the first step!
2066      * or if we did random steps!
2067      */
2068
2069     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2070       steps_accepted++;
2071
2072       /* Test whether the convergence criterion is met...  */
2073       bDone = (s_try->fmax < inputrec->em_tol);
2074
2075       /* Copy the arrays for force, positions and energy  */
2076       /* The 'Min' array always holds the coords and forces of the minimal
2077          sampled energy  */
2078       swap_em_state(s_min,s_try);
2079       if (count > 0)
2080         ustep *= 1.2;
2081
2082       /* Write to trn, if necessary */
2083       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2084       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2085       write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
2086                     top_global,inputrec,count,
2087                     s_min,state_global,f_global);
2088     }
2089     else {
2090       /* If energy is not smaller make the step smaller...  */
2091       ustep *= 0.5;
2092
2093       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2094         /* Reload the old state */
2095         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2096                                s_min,top,mdatoms,fr,vsite,constr,
2097                                nrnb,wcycle);
2098       }
2099     }
2100
2101     /* Determine new step  */
2102     stepsize = ustep/s_min->fmax;
2103
2104     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2105 #ifdef GMX_DOUBLE
2106     if (ustep < 1e-12)
2107 #else
2108     if (ustep < 1e-6)
2109 #endif
2110       {
2111         if (MASTER(cr)) {
2112           warn_step(stderr,inputrec->em_tol,constr!=NULL);
2113           warn_step(fplog,inputrec->em_tol,constr!=NULL);
2114         }
2115         bAbort=TRUE;
2116       }
2117
2118     count++;
2119   } /* End of the loop  */
2120
2121     /* Print some shit...  */
2122   if (MASTER(cr))
2123     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2124   write_em_traj(fplog,cr,fp_trn,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2125                 top_global,inputrec,count,
2126                 s_min,state_global,f_global);
2127
2128   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2129
2130   if (MASTER(cr)) {
2131     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2132                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2133     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2134                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2135   }
2136
2137   finish_em(fplog,cr,fp_trn,fp_ene);
2138
2139   /* To print the actual number of steps we needed somewhere */
2140   inputrec->nsteps=count;
2141
2142   runtime->nsteps_done = count;
2143
2144   return 0;
2145 } /* That's all folks */
2146
2147
2148 double do_nm(FILE *fplog,t_commrec *cr,
2149              int nfile,t_filenm fnm[],
2150              bool bVerbose,bool bCompact,
2151              gmx_vsite_t *vsite,gmx_constr_t constr,
2152              int stepout,
2153              t_inputrec *inputrec,
2154              gmx_mtop_t *top_global,t_fcdata *fcd,
2155              t_state *state_global,rvec f[],
2156              t_mdatoms *mdatoms,
2157              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2158              gmx_edsam_t ed,
2159              t_forcerec *fr,
2160              int repl_ex_nst,int repl_ex_seed,
2161              real cpt_period,real max_hours,
2162              unsigned long Flags,
2163              gmx_runtime_t *runtime)
2164 {
2165     t_mdebin   *mdebin;
2166         const char *NM = "Normal Mode Analysis";
2167     int        fp_ene,step,i;
2168     rvec       *f_global;
2169     gmx_localtop_t *top;
2170     gmx_enerdata_t *enerd;
2171     gmx_global_stat_t gstat;
2172     t_graph    *graph;
2173     real       t,lambda,t0,lam0;
2174     bool       bNS;
2175     tensor     vir,pres;
2176     int        nfmax,count;
2177     rvec       mu_tot;
2178     rvec       *fneg,*fpos;
2179     bool       bSparse; /* use sparse matrix storage format */
2180     size_t     sz;
2181     gmx_sparsematrix_t * sparse_matrix = NULL;
2182     real *     full_matrix             = NULL;
2183         em_state_t *   state_work;
2184
2185     /* added with respect to mdrun */
2186     int        idum,jdum,kdum,row,col;
2187     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2188     real       fnorm,fmax;
2189     real       dfdx;
2190
2191     state_work = init_em_state();
2192
2193     /* Init em and store the local state in state_minimum */
2194     init_em(fplog,NM,cr,inputrec,
2195             state_global,top_global,state_work,&top,
2196             f,&f_global,
2197             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2198             nfile,fnm,NULL,&fp_ene,&mdebin);
2199
2200     snew(fneg,top_global->natoms);
2201     snew(fpos,top_global->natoms);
2202
2203 #ifndef GMX_DOUBLE
2204     fprintf(stderr,
2205             "NOTE: This version of Gromacs has been compiled in single precision,\n"
2206             "      which MIGHT not be accurate enough for normal mode analysis.\n"
2207             "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2208             "      are fairly modest even if you recompile in double precision.\n\n");
2209 #endif
2210
2211     /* Check if we can/should use sparse storage format.
2212      *
2213      * Sparse format is only useful when the Hessian itself is sparse, which it
2214       * will be when we use a cutoff.
2215       * For small systems (n<1000) it is easier to always use full matrix format, though.
2216       */
2217     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2218     {
2219         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2220         bSparse = FALSE;
2221     }
2222     else if(top_global->natoms < 1000)
2223     {
2224         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2225         bSparse = FALSE;
2226     }
2227     else
2228     {
2229         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2230         bSparse = TRUE;
2231     }
2232
2233     sz = DIM*top_global->natoms;
2234
2235     fprintf(stderr,"Allocating Hessian memory...\n\n");
2236
2237     if(bSparse)
2238     {
2239         sparse_matrix=gmx_sparsematrix_init(sz);
2240         sparse_matrix->compressed_symmetric = TRUE;
2241     }
2242     else
2243     {
2244         snew(full_matrix,sz*sz);
2245     }
2246
2247     /* Initial values */
2248     t0           = inputrec->init_t;
2249     lam0         = inputrec->init_lambda;
2250     t            = t0;
2251     lambda       = lam0;
2252
2253     init_nrnb(nrnb);
2254
2255     where();
2256
2257     /* Write start time and temperature */
2258     print_date_and_time(fplog,cr->nodeid,"Started nmrun",NULL);
2259     wallcycle_start(wcycle,ewcRUN);
2260     if (MASTER(cr))
2261     {
2262         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global->name),
2263                 top_global->natoms);
2264     }
2265
2266     count = 0;
2267     evaluate_energy(fplog,bVerbose,cr,
2268                     state_global,top_global,state_work,top,
2269                     inputrec,nrnb,wcycle,gstat,
2270                     vsite,constr,fcd,graph,mdatoms,fr,
2271                     mu_tot,enerd,vir,pres,count,count==0);
2272     count++;
2273
2274     /* if forces are not small, warn user */
2275     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2276
2277     fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2278     if (state_work->fmax > 1.0e-3)
2279     {
2280         fprintf(stderr,"Maximum force probably not small enough to");
2281         fprintf(stderr," ensure that you are in an \nenergy well. ");
2282         fprintf(stderr,"Be aware that negative eigenvalues may occur");
2283         fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2284     }
2285
2286     /***********************************************************
2287      *
2288      *      Loop over all pairs in matrix
2289      *
2290      *      do_force called twice. Once with positive and
2291      *      once with negative displacement
2292      *
2293      ************************************************************/
2294
2295     /* fudge nr of steps to nr of atoms */
2296
2297     inputrec->nsteps = top_global->natoms;
2298
2299     for (step=0; (step<inputrec->nsteps); step++)
2300     {
2301
2302         for (idum=0; (idum<DIM); idum++)
2303         {
2304           row = DIM*step+idum;
2305
2306           state_work->s.x[step][idum] -= der_range;
2307
2308           evaluate_energy(fplog,bVerbose,cr,
2309                           state_global,top_global,state_work,top,
2310                           inputrec,nrnb,wcycle,gstat,
2311                           vsite,constr,fcd,graph,mdatoms,fr,
2312                           mu_tot,enerd,vir,pres,count,count==0);
2313           count++;
2314
2315           for ( i=0 ; i < top_global->natoms ; i++ )
2316             {
2317               copy_rvec ( state_work->f[i] , fneg[i] );
2318             }
2319
2320           state_work->s.x[step][idum] += 2*der_range;
2321
2322           evaluate_energy(fplog,bVerbose,cr,
2323                           state_global,top_global,state_work,top,
2324                           inputrec,nrnb,wcycle,gstat,
2325                           vsite,constr,fcd,graph,mdatoms,fr,
2326                           mu_tot,enerd,vir,pres,count,count==0);
2327           count++;
2328
2329           for ( i=0 ; i < top_global->natoms ; i++ )
2330             {
2331               copy_rvec ( state_work->f[i] , fpos[i] );
2332             }
2333
2334           for (jdum=0; (jdum<top_global->natoms); jdum++)
2335             {
2336                 for (kdum=0; (kdum<DIM); kdum++)
2337                 {
2338                     dfdx=-(fpos[jdum][kdum]-fneg[jdum][kdum])/(2*der_range);
2339                     col = DIM*jdum+kdum;
2340
2341                     if(bSparse)
2342                     {
2343                         if(col>=row && dfdx!=0.0)
2344                             gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx);
2345                     }
2346                     else
2347                     {
2348                         full_matrix[row*sz+col] = dfdx;
2349                     }
2350                 }
2351             }
2352
2353             /* x is restored to original */
2354                         state_work->s.x[step][idum] -= der_range;
2355
2356             if (bVerbose && fplog)
2357                 fflush(fplog);
2358         }
2359         /* write progress */
2360         if (MASTER(cr) && bVerbose)
2361         {
2362             fprintf(stderr,"\rFinished step %d out of %d",
2363                     step+1,top_global->natoms);
2364             fflush(stderr);
2365         }
2366     }
2367     t=t0+step*inputrec->delta_t;
2368     lambda=lam0+step*inputrec->delta_lambda;
2369
2370     if (MASTER(cr))
2371     {
2372         print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,t,eprAVER,
2373                    FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2374     }
2375
2376     fprintf(stderr,"\n\nWriting Hessian...\n");
2377     gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2378
2379     runtime->nsteps_done = step;
2380
2381     return 0;
2382 }
2383
2384
2385 static void global_max(t_commrec *cr,int *n)
2386 {
2387   int *sum,i;
2388
2389   snew(sum,cr->nnodes);
2390   sum[cr->nodeid] = *n;
2391   gmx_sumi(cr->nnodes,sum,cr);
2392   for(i=0; i<cr->nnodes; i++)
2393     *n = max(*n,sum[i]);
2394
2395   sfree(sum);
2396 }
2397
2398 static void realloc_bins(double **bin,int *nbin,int nbin_new)
2399 {
2400   int i;
2401
2402   if (nbin_new != *nbin) {
2403     srenew(*bin,nbin_new);
2404     for(i=*nbin; i<nbin_new; i++)
2405       (*bin)[i] = 0;
2406     *nbin = nbin_new;
2407   }
2408 }
2409
2410 double do_tpi(FILE *fplog,t_commrec *cr,
2411               int nfile,t_filenm fnm[],
2412               bool bVerbose,bool bCompact,
2413               gmx_vsite_t *vsite,gmx_constr_t constr,
2414               int stepout,
2415               t_inputrec *inputrec,
2416               gmx_mtop_t *top_global,t_fcdata *fcd,
2417               t_state *state,rvec f[],
2418               t_mdatoms *mdatoms,
2419               t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2420               gmx_edsam_t ed,
2421               t_forcerec *fr,
2422               int repl_ex_nst,int repl_ex_seed,
2423               real cpt_period,real max_hours,
2424               unsigned long Flags,
2425               gmx_runtime_t *runtime)
2426 {
2427   const char *TPI="Test Particle Insertion";
2428   gmx_localtop_t *top;
2429   gmx_groups_t *groups;
2430   gmx_enerdata_t *enerd;
2431   real   lambda,t,temp,beta,drmax,epot;
2432   double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;
2433   int    status;
2434   t_trxframe rerun_fr;
2435   bool   bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;
2436   tensor force_vir,shake_vir,vir,pres;
2437   int    cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;
2438   rvec   *x_mol;
2439   rvec   mu_tot,x_init,dx,x_tp;
2440   int    nnodes,frame,nsteps,step;
2441   int    i,start,end;
2442   gmx_rng_t tpi_rand;
2443   FILE   *fp_tpi=NULL;
2444   char   *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];
2445   double dbl,dump_ener;
2446   bool   bCavity;
2447   int    nat_cavity=0,d;
2448   real   *mass_cavity=NULL,mass_tot;
2449   int    nbin;
2450   double invbinw,*bin,refvolshift,logV,bUlogV;
2451   const char *tpid_leg[2]={"direct","reweighted"};
2452
2453   /* Since numerical problems can lead to extreme negative energies
2454    * when atoms overlap, we need to set a lower limit for beta*U.
2455    */
2456   real bU_neg_limit = -50;
2457
2458   /* Since there is no upper limit to the insertion energies,
2459    * we need to set an upper limit for the distribution output.
2460    */
2461   real bU_bin_limit      = 50;
2462   real bU_logV_bin_limit = bU_bin_limit + 10;
2463
2464   nnodes = cr->nnodes;
2465
2466   top = gmx_mtop_generate_local_top(top_global,inputrec);
2467
2468   groups = &top_global->groups;
2469
2470   bCavity = (inputrec->eI == eiTPIC);
2471   if (bCavity) {
2472     ptr = getenv("GMX_TPIC_MASSES");
2473     if (ptr == NULL) {
2474       nat_cavity = 1;
2475     } else {
2476       /* Read (multiple) masses from env var GMX_TPIC_MASSES,
2477        * The center of mass of the last atoms is then used for TPIC.
2478        */
2479       nat_cavity = 0;
2480       while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) {
2481         srenew(mass_cavity,nat_cavity+1);
2482         mass_cavity[nat_cavity] = dbl;
2483         fprintf(fplog,"mass[%d] = %f\n",
2484                 nat_cavity+1,mass_cavity[nat_cavity]);
2485         nat_cavity++;
2486         ptr += i;
2487       }
2488       if (nat_cavity == 0)
2489         gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);
2490     }
2491   }
2492
2493   /*
2494   init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
2495   state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
2496   /* We never need full pbc for TPI */
2497   fr->ePBC = epbcXYZ;
2498   /* Determine the temperature for the Boltzmann weighting */
2499   temp = inputrec->opts.ref_t[0];
2500   if (fplog) {
2501     for(i=1; (i<inputrec->opts.ngtc); i++) {
2502       if (inputrec->opts.ref_t[i] != temp) {
2503         fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2504         fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2505       }
2506     }
2507     fprintf(fplog,
2508             "\n  The temperature for test particle insertion is %.3f K\n\n",
2509             temp);
2510   }
2511   beta = 1.0/(BOLTZ*temp);
2512
2513   /* Number of insertions per frame */
2514   nsteps = inputrec->nsteps;
2515
2516   /* Use the same neighborlist with more insertions points
2517    * in a sphere of radius drmax around the initial point
2518    */
2519   /* This should be a proper mdp parameter */
2520   drmax = inputrec->rtpi;
2521
2522   /* An environment variable can be set to dump all configurations
2523    * to pdb with an insertion energy <= this value.
2524    */
2525   dump_pdb = getenv("GMX_TPI_DUMP");
2526   dump_ener = 0;
2527   if (dump_pdb)
2528     sscanf(dump_pdb,"%lf",&dump_ener);
2529
2530   atoms2md(top_global,inputrec,0,NULL,0,top_global->natoms,mdatoms);
2531   update_mdatoms(mdatoms,inputrec->init_lambda);
2532
2533   snew(enerd,1);
2534   init_enerdata(groups->grps[egcENER].nr,inputrec->n_flambda,enerd);
2535
2536   /* Print to log file  */
2537   runtime_start(runtime);
2538   print_date_and_time(fplog,cr->nodeid,
2539                       "Started Test Particle Insertion",runtime);
2540   wallcycle_start(wcycle,ewcRUN);
2541
2542   /* The last charge group is the group to be inserted */
2543   cg_tp = top->cgs.nr - 1;
2544   a_tp0 = top->cgs.index[cg_tp];
2545   a_tp1 = top->cgs.index[cg_tp+1];
2546   if (debug)
2547     fprintf(debug,"TPI cg %d, atoms %d-%d\n",cg_tp,a_tp0,a_tp1);
2548   if (a_tp1 - a_tp0 > 1 &&
2549       (inputrec->rlist < inputrec->rcoulomb ||
2550        inputrec->rlist < inputrec->rvdw))
2551     gmx_fatal(FARGS,"Can not do TPI for multi-atom molecule with a twin-range cut-off");
2552   snew(x_mol,a_tp1-a_tp0);
2553
2554   bDispCorr = (inputrec->eDispCorr != edispcNO);
2555   bCharge = FALSE;
2556   for(i=a_tp0; i<a_tp1; i++) {
2557     /* Copy the coordinates of the molecule to be insterted */
2558     copy_rvec(state->x[i],x_mol[i-a_tp0]);
2559     /* Check if we need to print electrostatic energies */
2560     bCharge |= (mdatoms->chargeA[i] != 0 ||
2561                 (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
2562   }
2563   bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype!=eelRF_NEC);
2564
2565   calc_cgcm(fplog,cg_tp,cg_tp+1,&(top->cgs),state->x,fr->cg_cm);
2566   if (bCavity) {
2567     if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) {
2568       fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
2569       fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");
2570     }
2571   } else {
2572     /* Center the molecule to be inserted at zero */
2573      for(i=0; i<a_tp1-a_tp0; i++)
2574       rvec_dec(x_mol[i],fr->cg_cm[cg_tp]);
2575   }
2576
2577   if (fplog) {
2578     fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",
2579             a_tp1-a_tp0,bCharge ? "with" : "without");
2580
2581     fprintf(fplog,"\nWill insert %d times in each frame of %s\n",
2582             nsteps,opt2fn("-rerun",nfile,fnm));
2583   }
2584
2585   if (!bCavity) {
2586     if (inputrec->nstlist > 1) {
2587       if (drmax==0 && a_tp1-a_tp0==1) {
2588         gmx_fatal(FARGS,"Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec->nstlist,drmax);
2589       }
2590       if (fplog)
2591         fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec->nstlist,drmax);
2592     }
2593   } else {
2594     if (fplog)
2595       fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);
2596   }
2597
2598   ngid = groups->grps[egcENER].nr;
2599   gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
2600   nener = 1 + ngid;
2601   if (bDispCorr)
2602     nener += 1;
2603   if (bCharge) {
2604     nener += ngid;
2605     if (bRFExcl)
2606       nener += 1;
2607     if (EEL_FULL(fr->eeltype))
2608       nener += 1;
2609   }
2610   snew(sum_UgembU,nener);
2611
2612   /* Initialize random generator */
2613   tpi_rand = gmx_rng_init(inputrec->ld_seed);
2614
2615   if (MASTER(cr)) {
2616     fp_tpi = xvgropen(opt2fn("-tpi",nfile,fnm),
2617                       "TPI energies","Time (ps)",
2618                       "(kJ mol\\S-1\\N) / (nm\\S3\\N)");
2619     xvgr_subtitle(fp_tpi,"f. are averages over one frame");
2620     snew(leg,4+nener);
2621     e = 0;
2622     sprintf(str,"-kT log(<Ve\\S-\\8b\\4U\\N>/<V>)");
2623     leg[e++] = strdup(str);
2624     sprintf(str,"f. -kT log<e\\S-\\8b\\4U\\N>");
2625     leg[e++] = strdup(str);
2626     sprintf(str,"f. <e\\S-\\8b\\4U\\N>");
2627     leg[e++] = strdup(str);
2628     sprintf(str,"f. V");
2629     leg[e++] = strdup(str);
2630     sprintf(str,"f. <Ue\\S-\\8b\\4U\\N>");
2631     leg[e++] = strdup(str);
2632     for(i=0; i<ngid; i++) {
2633       sprintf(str,"f. <U\\sVdW %s\\Ne\\S-\\8b\\4U\\N>",
2634               *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2635       leg[e++] = strdup(str);
2636     }
2637     if (bDispCorr) {
2638       sprintf(str,"f. <U\\sdisp c\\Ne\\S-\\8b\\4U\\N>");
2639       leg[e++] = strdup(str);
2640     }
2641     if (bCharge) {
2642       for(i=0; i<ngid; i++) {
2643         sprintf(str,"f. <U\\sCoul %s\\Ne\\S-\\8b\\4U\\N>",
2644                 *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2645         leg[e++] = strdup(str);
2646       }
2647       if (bRFExcl) {
2648         sprintf(str,"f. <U\\sRF excl\\Ne\\S-\\8b\\4U\\N>");
2649         leg[e++] = strdup(str);
2650       }
2651       if (EEL_FULL(fr->eeltype)) {
2652         sprintf(str,"f. <U\\sCoul recip\\Ne\\S-\\8b\\4U\\N>");
2653         leg[e++] = strdup(str);
2654       }
2655     }
2656     xvgr_legend(fp_tpi,4+nener,leg);
2657     for(i=0; i<4+nener; i++)
2658       sfree(leg[i]);
2659     sfree(leg);
2660   }
2661   clear_rvec(x_init);
2662   V_all = 0;
2663   VembU_all = 0;
2664
2665   invbinw = 10;
2666   nbin = 10;
2667   snew(bin,nbin);
2668
2669   bNotLastFrame = read_first_frame(&status,opt2fn("-rerun",nfile,fnm),
2670                                    &rerun_fr,TRX_NEED_X);
2671   frame = 0;
2672
2673   if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
2674       mdatoms->nr - (a_tp1 - a_tp0))
2675     gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "
2676               "is not equal the number in the run input file (%d) "
2677               "minus the number of atoms to insert (%d)\n",
2678               rerun_fr.natoms,bCavity ? " minus one" : "",
2679               mdatoms->nr,a_tp1-a_tp0);
2680
2681   refvolshift = log(det(rerun_fr.box));
2682
2683   while (bNotLastFrame) {
2684     lambda = rerun_fr.lambda;
2685     t = rerun_fr.time;
2686
2687     sum_embU = 0;
2688     for(e=0; e<nener; e++)
2689       sum_UgembU[e] = 0;
2690
2691     /* Copy the coordinates from the input trajectory */
2692     for(i=0; i<rerun_fr.natoms; i++)
2693       copy_rvec(rerun_fr.x[i],state->x[i]);
2694
2695     V = det(rerun_fr.box);
2696     logV = log(V);
2697
2698     bStateChanged = TRUE;
2699     bNS = TRUE;
2700     for(step=0; step<nsteps; step++) {
2701       /* In parallel all nodes generate all random configurations.
2702        * In that way the result is identical to a single cpu tpi run.
2703        */
2704       if (!bCavity) {
2705         /* Random insertion in the whole volume */
2706         bNS = (step % inputrec->nstlist == 0);
2707         if (bNS) {
2708           /* Generate a random position in the box */
2709           x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state->box[XX][XX];
2710           x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state->box[YY][YY];
2711           x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state->box[ZZ][ZZ];
2712         }
2713         if (inputrec->nstlist == 1) {
2714           copy_rvec(x_init,x_tp);
2715         } else {
2716           /* Generate coordinates within |dx|=drmax of x_init */
2717           do {
2718             dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2719             dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2720             dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2721           } while (norm2(dx) > drmax*drmax);
2722           rvec_add(x_init,dx,x_tp);
2723         }
2724       } else {
2725         /* Random insertion around a cavity location
2726          * given by the last coordinate of the trajectory.
2727          */
2728         if (step == 0) {
2729           if (nat_cavity == 1) {
2730             /* Copy the location of the cavity */
2731             copy_rvec(rerun_fr.x[rerun_fr.natoms-1],x_init);
2732           } else {
2733             /* Determine the center of mass of the last molecule */
2734             clear_rvec(x_init);
2735             mass_tot = 0;
2736             for(i=0; i<nat_cavity; i++) {
2737               for(d=0; d<DIM; d++)
2738                 x_init[d] +=
2739                   mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
2740               mass_tot += mass_cavity[i];
2741             }
2742             for(d=0; d<DIM; d++)
2743               x_init[d] /= mass_tot;
2744           }
2745         }
2746         /* Generate coordinates within |dx|=drmax of x_init */
2747         do {
2748           dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2749           dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2750           dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2751         } while (norm2(dx) > drmax*drmax);
2752         rvec_add(x_init,dx,x_tp);
2753       }
2754
2755       if (a_tp1 - a_tp0 == 1) {
2756         /* Insert a single atom, just copy the insertion location */
2757         copy_rvec(x_tp,state->x[a_tp0]);
2758       } else {
2759         /* Copy the coordinates from the top file */
2760         for(i=a_tp0; i<a_tp1; i++)
2761           copy_rvec(x_mol[i-a_tp0],state->x[i]);
2762         /* Rotate the molecule randomly */
2763         rotate_conf(a_tp1-a_tp0,state->x+a_tp0,NULL,
2764                     2*M_PI*gmx_rng_uniform_real(tpi_rand),
2765                     2*M_PI*gmx_rng_uniform_real(tpi_rand),
2766                     2*M_PI*gmx_rng_uniform_real(tpi_rand));
2767         /* Shift to the insertion location */
2768         for(i=a_tp0; i<a_tp1; i++)
2769           rvec_inc(state->x[i],x_tp);
2770       }
2771
2772       /* Check if this insertion belongs to this node */
2773       bOurStep = TRUE;
2774       if (PAR(cr)) {
2775         switch (inputrec->eI) {
2776         case eiTPI:
2777           bOurStep = ((step / inputrec->nstlist) % nnodes == cr->nodeid);
2778           break;
2779         case eiTPIC:
2780           bOurStep = (step % nnodes == cr->nodeid);
2781           break;
2782         default:
2783           gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec->eI]);
2784         }
2785       }
2786       if (bOurStep) {
2787         /* Clear some matrix variables  */
2788         clear_mat(force_vir);
2789         clear_mat(shake_vir);
2790         clear_mat(vir);
2791         clear_mat(pres);
2792
2793         /* Set the charge group center of mass of the test particle */
2794         copy_rvec(x_init,fr->cg_cm[top->cgs.nr-1]);
2795
2796         /* Calc energy (no forces) on new positions.
2797          * Since we only need the intermolecular energy
2798          * and the RF exclusion terms of the inserted molecule occur
2799          * within a single charge group we can pass NULL for the graph.
2800          * This also avoids shifts that would move charge groups
2801          * out of the box.
2802          */
2803         /* Make do_force do a single node fore calculation */
2804         cr->nnodes = 1;
2805         do_force(fplog,cr,inputrec,
2806                  step,nrnb,wcycle,top,top_global,&top_global->groups,
2807                  rerun_fr.box,state->x,&state->hist,
2808                  f,force_vir,mdatoms,enerd,fcd,
2809                  lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL,FALSE,
2810                  GMX_FORCE_NONBONDED |
2811                  (bNS ? GMX_FORCE_NS : 0) |
2812                  (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
2813         cr->nnodes = nnodes;
2814         bStateChanged = FALSE;
2815         bNS = FALSE;
2816
2817         /* Calculate long range corrections to pressure and energy */
2818         calc_dispcorr(fplog,inputrec,fr,step,mdatoms->nr,rerun_fr.box,lambda,
2819                       pres,vir,enerd);
2820
2821         /* If the compiler doesn't optimize this check away
2822          * we catch the NAN energies.
2823          * With tables extreme negative energies might occur close to r=0.
2824          */
2825         epot = enerd->term[F_EPOT];
2826         if (epot != epot || epot*beta < bU_neg_limit) {
2827           if (debug)
2828             fprintf(debug,"\n  time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n",t,step,epot);
2829           embU = 0;
2830         } else {
2831           {
2832             real eo;
2833             printf(pdbformat,"ATOM",step,"CA","GLY",' ',step,' ',
2834                    x_tp[XX]*10,x_tp[YY]*10,x_tp[ZZ]*10);
2835             eo = epot*0.05;
2836             printf("%6.2f%6.2f\n",
2837                    1.0,
2838                    eo < -9.99 ? -9.99 : eo > 9.99 ? 9.99 : eo);
2839           }
2840           embU = exp(-beta*epot);
2841           sum_embU += embU;
2842           /* Determine the weighted energy contributions of each energy group */
2843           e = 0;
2844           sum_UgembU[e++] += epot*embU;
2845           if (fr->bBHAM) {
2846             for(i=0; i<ngid; i++)
2847               sum_UgembU[e++] +=
2848                 (enerd->grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] +
2849                  enerd->grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU;
2850           } else {
2851             for(i=0; i<ngid; i++)
2852               sum_UgembU[e++] +=
2853                 (enerd->grpp.ener[egLJSR][GID(i,gid_tp,ngid)] +
2854                  enerd->grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU;
2855           }
2856           if (bDispCorr)
2857             sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
2858           if (bCharge) {
2859             for(i=0; i<ngid; i++)
2860               sum_UgembU[e++] +=
2861                 (enerd->grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] +
2862                  enerd->grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU;
2863             if (bRFExcl)
2864               sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
2865             if (EEL_FULL(fr->eeltype))
2866               sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
2867           }
2868         }
2869
2870         if (embU == 0 || beta*epot > bU_bin_limit) {
2871           bin[0]++;
2872         } else {
2873           i = (int)((bU_logV_bin_limit
2874                      - (beta*epot - logV + refvolshift))*invbinw
2875                     + 0.5);
2876           if (i < 0)
2877             i = 0;
2878           if (i >= nbin)
2879             realloc_bins(&bin,&nbin,i+10);
2880           bin[i]++;
2881         }
2882
2883         if (debug)
2884           fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
2885                   step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]);
2886
2887         if (dump_pdb && epot <= dump_ener) {
2888           sprintf(str,"t%g_step%d.pdb",t,step);
2889           sprintf(str2,"t: %f step %d ener: %f",t,step,epot);
2890           write_sto_conf_mtop(str,str2,top_global,state->x,state->v,
2891                               inputrec->ePBC,state->box);
2892         }
2893       }
2894     }
2895
2896     if (PAR(cr)) {
2897       /* When running in parallel sum the energies over the processes */
2898       gmx_sumd(1,    &sum_embU, cr);
2899       gmx_sumd(nener,sum_UgembU,cr);
2900     }
2901
2902     frame++;
2903     V_all += V;
2904     VembU_all += V*sum_embU/nsteps;
2905
2906     if (fp_tpi) {
2907       if (bVerbose || frame%10==0 || frame<10)
2908         fprintf(stderr,"mu %10.3e <mu> %10.3e\n",
2909                 -log(sum_embU/nsteps)/beta,-log(VembU_all/V_all)/beta);
2910
2911       fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",
2912               t,
2913               VembU_all==0 ? 20/beta : -log(VembU_all/V_all)/beta,
2914               sum_embU==0  ? 20/beta : -log(sum_embU/nsteps)/beta,
2915               sum_embU/nsteps,V);
2916       for(e=0; e<nener; e++)
2917         fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);
2918       fprintf(fp_tpi,"\n");
2919       fflush(fp_tpi);
2920     }
2921
2922     bNotLastFrame = read_next_frame(status,&rerun_fr);
2923   } /* End of the loop  */
2924
2925   runtime_end(runtime);
2926
2927   close_trj(status);
2928
2929   if (fp_tpi)
2930     gmx_fio_fclose(fp_tpi);
2931
2932   if (fplog) {
2933     fprintf(fplog,"\n");
2934     fprintf(fplog,"  <V>  = %12.5e nm^3\n",V_all/frame);
2935     fprintf(fplog,"  <mu> = %12.5e kJ/mol\n",-log(VembU_all/V_all)/beta);
2936   }
2937
2938   /* Write the Boltzmann factor histogram */
2939   if (PAR(cr)) {
2940     /* When running in parallel sum the bins over the processes */
2941     i = nbin;
2942     global_max(cr,&i);
2943     realloc_bins(&bin,&nbin,i);
2944     gmx_sumd(nbin,bin,cr);
2945   }
2946   fp_tpi = xvgropen(opt2fn("-tpid",nfile,fnm),
2947                     "TPI energy distribution",
2948                     "\\8b\\4U - log(V/<V>)","count");
2949   sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]);
2950   xvgr_subtitle(fp_tpi,str);
2951   xvgr_legend(fp_tpi,2,(char **)tpid_leg);
2952   for(i=nbin-1; i>0; i--) {
2953     bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
2954     fprintf(fp_tpi,"%6.2f %10d %12.5e\n",
2955             bUlogV,
2956             (int)(bin[i]+0.5),
2957             bin[i]*exp(-bUlogV)*V_all/VembU_all);
2958   }
2959   gmx_fio_fclose(fp_tpi);
2960   sfree(bin);
2961
2962   sfree(sum_UgembU);
2963
2964   runtime->nsteps_done = frame*inputrec->nsteps;
2965
2966   return 0;
2967 }