src/mdlib/minimize.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <string.h>
  41 #include <time.h>
  42 #include <math.h>
  43 #include "sysstuff.h"
  44 #include "string2.h"
  45 #include "network.h"
  46 #include "confio.h"
  47 #include "copyrite.h"
  48 #include "smalloc.h"
  49 #include "nrnb.h"
  50 #include "main.h"
  51 #include "force.h"
  52 #include "macros.h"
  53 #include "random.h"
  54 #include "names.h"
  55 #include "gmx_fatal.h"
  56 #include "txtdump.h"
  57 #include "typedefs.h"
  58 #include "update.h"
  59 #include "constr.h"
  60 #include "vec.h"
  61 #include "statutil.h"
  62 #include "tgroup.h"
  63 #include "mdebin.h"
  64 #include "vsite.h"
  65 #include "force.h"
  66 #include "mdrun.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "trnio.h"
  70 #include "sparsematrix.h"
  71 #include "mtxio.h"
  72 #include "mdatoms.h"
  73 #include "ns.h"
  74 #include "gmx_wallcycle.h"
  75 #include "mtop_util.h"
  76 #include "gmxfio.h"
  77 #include "pme.h"
  78
  79 typedef struct {
  80   t_state s;
  81   rvec    *f;
  82   real    epot;
  83   real    fnorm;
  84   real    fmax;
  85   int     a_fmax;
  86 } em_state_t;
  87
  88 static em_state_t *init_em_state()
  89 {
  90   em_state_t *ems;
  91
  92   snew(ems,1);
  93
  94   return ems;
  95 }
  96
  97 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
  98                            gmx_wallcycle_t wcycle,
  99                            const char *name)
 100 {
 101     char buf[STRLEN];
 102
 103     runtime_start(runtime);
 104
 105     sprintf(buf,"Started %s",name);
 106     print_date_and_time(fplog,cr->nodeid,buf,NULL);
 107
 108     wallcycle_start(wcycle,ewcRUN);
 109 }
 110 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 111                         gmx_wallcycle_t wcycle)
 112 {
 113     wallcycle_stop(wcycle,ewcRUN);
 114
 115     runtime_end(runtime);
 116 }
 117
 118 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 119 {
 120     fprintf(out,"\n");
 121     fprintf(out,"%s:\n",minimizer);
 122     fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 123     fprintf(out,"   Number of steps    = %12d\n",nsteps);
 124 }
 125
 126 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 127 {
 128     if (bLastStep)
 129     {
 130         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 131     }
 132     else
 133     {
 134         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 135                 "Converged to machine precision,\n"
 136                 "but not to the requested precision Fmax < %g\n",
 137                 ftol);
 138         if (sizeof(real)<sizeof(double))
 139         {
 140             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 141         }
 142         if (bConstrain)
 143         {
 144             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 145                     "off constraints alltogether (set constraints = none in mdp file)\n");
 146         }
 147     }
 148 }
 149
 150
 151
 152 static void print_converged(FILE *fp,const char *alg,real ftol,
 153                             gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 154                             real epot,real fmax, int nfmax, real fnorm)
 155 {
 156   char buf[STEPSTRSIZE];
 157
 158   if (bDone)
 159     fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 160             alg,ftol,gmx_step_str(count,buf));
 161   else if(count<nsteps)
 162     fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 163                "but did not reach the requested Fmax < %g.\n",
 164             alg,gmx_step_str(count,buf),ftol);
 165   else
 166     fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 167             alg,ftol,gmx_step_str(count,buf));
 168
 169 #ifdef GMX_DOUBLE
 170   fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 171   fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 172   fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 173 #else
 174   fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 175   fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 176   fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 177 #endif
 178 }
 179
 180 static void get_f_norm_max(t_commrec *cr,
 181                            t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 182                            real *fnorm,real *fmax,int *a_fmax)
 183 {
 184   double fnorm2,*sum;
 185   real fmax2,fmax2_0,fam;
 186   int  la_max,a_max,start,end,i,m,gf;
 187
 188   /* This routine finds the largest force and returns it.
 189    * On parallel machines the global max is taken.
 190    */
 191   fnorm2 = 0;
 192   fmax2 = 0;
 193   la_max = -1;
 194   gf = 0;
 195   start = mdatoms->start;
 196   end   = mdatoms->homenr + start;
 197   if (mdatoms->cFREEZE) {
 198     for(i=start; i<end; i++) {
 199       gf = mdatoms->cFREEZE[i];
 200       fam = 0;
 201       for(m=0; m<DIM; m++)
 202         if (!opts->nFreeze[gf][m])
 203           fam += sqr(f[i][m]);
 204       fnorm2 += fam;
 205       if (fam > fmax2) {
 206         fmax2  = fam;
 207         la_max = i;
 208       }
 209     }
 210   } else {
 211     for(i=start; i<end; i++) {
 212       fam = norm2(f[i]);
 213       fnorm2 += fam;
 214       if (fam > fmax2) {
 215         fmax2  = fam;
 216         la_max = i;
 217       }
 218     }
 219   }
 220
 221   if (la_max >= 0 && DOMAINDECOMP(cr)) {
 222     a_max = cr->dd->gatindex[la_max];
 223   } else {
 224     a_max = la_max;
 225   }
 226   if (PAR(cr)) {
 227     snew(sum,2*cr->nnodes+1);
 228     sum[2*cr->nodeid]   = fmax2;
 229     sum[2*cr->nodeid+1] = a_max;
 230     sum[2*cr->nnodes]   = fnorm2;
 231     gmx_sumd(2*cr->nnodes+1,sum,cr);
 232     fnorm2 = sum[2*cr->nnodes];
 233     /* Determine the global maximum */
 234     for(i=0; i<cr->nnodes; i++) {
 235       if (sum[2*i] > fmax2) {
 236         fmax2 = sum[2*i];
 237         a_max = (int)(sum[2*i+1] + 0.5);
 238       }
 239     }
 240     sfree(sum);
 241   }
 242
 243   if (fnorm)
 244     *fnorm = sqrt(fnorm2);
 245   if (fmax)
 246     *fmax  = sqrt(fmax2);
 247   if (a_fmax)
 248     *a_fmax = a_max;
 249 }
 250
 251 static void get_state_f_norm_max(t_commrec *cr,
 252                            t_grpopts *opts,t_mdatoms *mdatoms,
 253                            em_state_t *ems)
 254 {
 255   get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 256 }
 257
 258 void init_em(FILE *fplog,const char *title,
 259              t_commrec *cr,t_inputrec *ir,
 260              t_state *state_global,gmx_mtop_t *top_global,
 261              em_state_t *ems,gmx_localtop_t **top,
 262              rvec **f,rvec **f_global,
 263              t_nrnb *nrnb,rvec mu_tot,
 264              t_forcerec *fr,gmx_enerdata_t **enerd,
 265              t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 266              gmx_vsite_t *vsite,gmx_constr_t constr,
 267              int nfile,const t_filenm fnm[],
 268              gmx_mdoutf_t **outf,t_mdebin **mdebin)
 269 {
 270     int  start,homenr,i;
 271     real dvdlambda;
 272
 273     if (fplog)
 274     {
 275         fprintf(fplog,"Initiating %s\n",title);
 276     }
 277
 278     state_global->ngtc = 0;
 279
 280     /* Initiate some variables */
 281     if (ir->efep != efepNO)
 282     {
 283         state_global->lambda = ir->init_lambda;
 284     }
 285     else
 286     {
 287         state_global->lambda = 0.0;
 288     }
 289
 290     init_nrnb(nrnb);
 291
 292     if (DOMAINDECOMP(cr))
 293     {
 294         *top = dd_init_local_top(top_global);
 295
 296         dd_init_local_state(cr->dd,state_global,&ems->s);
 297
 298         *f = NULL;
 299
 300         /* Distribute the charge groups over the nodes from the master node */
 301         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 302                             state_global,top_global,ir,
 303                             &ems->s,&ems->f,mdatoms,*top,
 304                             fr,vsite,NULL,constr,
 305                             nrnb,NULL,FALSE);
 306         dd_store_state(cr->dd,&ems->s);
 307
 308         if (ir->nstfout)
 309         {
 310             snew(*f_global,top_global->natoms);
 311         }
 312         else
 313         {
 314             *f_global = NULL;
 315         }
 316         *graph = NULL;
 317     }
 318     else
 319     {
 320         snew(*f,top_global->natoms);
 321
 322         /* Just copy the state */
 323         ems->s = *state_global;
 324         snew(ems->s.x,ems->s.nalloc);
 325         snew(ems->f,ems->s.nalloc);
 326         for(i=0; i<state_global->natoms; i++)
 327         {
 328             copy_rvec(state_global->x[i],ems->s.x[i]);
 329         }
 330         copy_mat(state_global->box,ems->s.box);
 331
 332         if (PAR(cr) && ir->eI != eiNM)
 333         {
 334             /* Initialize the particle decomposition and split the topology */
 335             *top = split_system(fplog,top_global,ir,cr);
 336
 337             pd_cg_range(cr,&fr->cg0,&fr->hcg);
 338         }
 339         else
 340         {
 341             *top = gmx_mtop_generate_local_top(top_global,ir);
 342         }
 343         *f_global = *f;
 344
 345         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 346         {
 347             *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 348         }
 349         else
 350         {
 351             *graph = NULL;
 352         }
 353
 354         if (PARTDECOMP(cr))
 355         {
 356             pd_at_range(cr,&start,&homenr);
 357             homenr -= start;
 358         }
 359         else
 360         {
 361             start  = 0;
 362             homenr = top_global->natoms;
 363         }
 364         atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 365         update_mdatoms(mdatoms,state_global->lambda);
 366
 367         if (vsite)
 368         {
 369             set_vsite_top(vsite,*top,mdatoms,cr);
 370         }
 371     }
 372
 373     if (constr)
 374     {
 375         if (ir->eConstrAlg == econtSHAKE &&
 376             gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 377         {
 378             gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 379                       econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 380         }
 381
 382         if (!DOMAINDECOMP(cr))
 383         {
 384             set_constraints(constr,*top,ir,mdatoms,cr);
 385         }
 386
 387         if (!ir->bContinuation)
 388         {
 389             /* Constrain the starting coordinates */
 390             dvdlambda=0;
 391             constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 392                       ir,NULL,cr,-1,0,mdatoms,
 393                       ems->s.x,ems->s.x,NULL,ems->s.box,
 394                       ems->s.lambda,&dvdlambda,
 395                       NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 396         }
 397     }
 398
 399     if (PAR(cr))
 400     {
 401         *gstat = global_stat_init(ir);
 402     }
 403
 404     *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 405
 406     snew(*enerd,1);
 407     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 408
 409     if (mdebin != NULL)
 410     {
 411         /* Init bin for energy stuff */
 412         *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 413     }
 414
 415     clear_rvec(mu_tot);
 416     calc_shifts(ems->s.box,fr->shift_vec);
 417 }
 418
 419 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 420                       gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 421 {
 422   if (!(cr->duty & DUTY_PME)) {
 423     /* Tell the PME only node to finish */
 424     gmx_pme_finish(cr);
 425   }
 426
 427   done_mdoutf(outf);
 428
 429   em_time_end(fplog,cr,runtime,wcycle);
 430 }
 431
 432 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 433 {
 434   em_state_t tmp;
 435
 436   tmp   = *ems1;
 437   *ems1 = *ems2;
 438   *ems2 = tmp;
 439 }
 440
 441 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
 442 {
 443   int i;
 444
 445   for(i=0; (i<state->natoms); i++)
 446     copy_rvec(ems->s.x[i],state->x[i]);
 447   if (f != NULL)
 448     copy_rvec(ems->f[i],f[i]);
 449 }
 450
 451 static void write_em_traj(FILE *fplog,t_commrec *cr,
 452                           gmx_mdoutf_t *outf,
 453                           gmx_bool bX,gmx_bool bF,const char *confout,
 454                           gmx_mtop_t *top_global,
 455                           t_inputrec *ir,gmx_large_int_t step,
 456                           em_state_t *state,
 457                           t_state *state_global,rvec *f_global)
 458 {
 459     int mdof_flags;
 460
 461     if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 462     {
 463         f_global = state->f;
 464         copy_em_coords_back(state,state_global,bF ? f_global : NULL);
 465     }
 466
 467     mdof_flags = 0;
 468     if (bX) { mdof_flags |= MDOF_X; }
 469     if (bF) { mdof_flags |= MDOF_F; }
 470     write_traj(fplog,cr,outf,mdof_flags,
 471                top_global,step,(double)step,
 472                &state->s,state_global,state->f,f_global,NULL,NULL);
 473
 474     if (confout != NULL && MASTER(cr))
 475     {
 476         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 477         {
 478             /* Make molecules whole only for confout writing */
 479             do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 480                         state_global->x);
 481         }
 482
 483         write_sto_conf_mtop(confout,
 484                             *top_global->name,top_global,
 485                             state_global->x,NULL,ir->ePBC,state_global->box);
 486     }
 487 }
 488
 489 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 490                        em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 491                        gmx_constr_t constr,gmx_localtop_t *top,
 492                        t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 493                        gmx_large_int_t count)
 494
 495 {
 496   t_state *s1,*s2;
 497   int  start,end,gf,i,m;
 498   rvec *x1,*x2;
 499   real dvdlambda;
 500
 501   s1 = &ems1->s;
 502   s2 = &ems2->s;
 503
 504   if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 505     gmx_incons("state mismatch in do_em_step");
 506
 507   s2->flags = s1->flags;
 508
 509   if (s2->nalloc != s1->nalloc) {
 510     s2->nalloc = s1->nalloc;
 511     srenew(s2->x,s1->nalloc);
 512     srenew(ems2->f,  s1->nalloc);
 513     if (s2->flags & (1<<estCGP))
 514       srenew(s2->cg_p,  s1->nalloc);
 515   }
 516
 517   s2->natoms = s1->natoms;
 518   s2->lambda = s1->lambda;
 519   copy_mat(s1->box,s2->box);
 520
 521   start = md->start;
 522   end   = md->start + md->homenr;
 523
 524   x1 = s1->x;
 525   x2 = s2->x;
 526   gf = 0;
 527   for(i=start; i<end; i++) {
 528     if (md->cFREEZE)
 529       gf = md->cFREEZE[i];
 530     for(m=0; m<DIM; m++) {
 531       if (ir->opts.nFreeze[gf][m])
 532         x2[i][m] = x1[i][m];
 533       else
 534         x2[i][m] = x1[i][m] + a*f[i][m];
 535     }
 536   }
 537
 538   if (s2->flags & (1<<estCGP)) {
 539     /* Copy the CG p vector */
 540     x1 = s1->cg_p;
 541     x2 = s2->cg_p;
 542     for(i=start; i<end; i++)
 543       copy_rvec(x1[i],x2[i]);
 544   }
 545
 546   if (DOMAINDECOMP(cr)) {
 547     s2->ddp_count = s1->ddp_count;
 548     if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 549       s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 550       srenew(s2->cg_gl,s2->cg_gl_nalloc);
 551     }
 552     s2->ncg_gl = s1->ncg_gl;
 553     for(i=0; i<s2->ncg_gl; i++)
 554       s2->cg_gl[i] = s1->cg_gl[i];
 555     s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 556   }
 557
 558   if (constr) {
 559     wallcycle_start(wcycle,ewcCONSTR);
 560     dvdlambda = 0;
 561     constrain(NULL,TRUE,TRUE,constr,&top->idef,
 562               ir,NULL,cr,count,0,md,
 563               s1->x,s2->x,NULL,s2->box,s2->lambda,
 564               &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 565     wallcycle_stop(wcycle,ewcCONSTR);
 566   }
 567 }
 568
 569 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
 570
 571 {
 572   int  start,end,i,m;
 573
 574   if (DOMAINDECOMP(cr)) {
 575     start = 0;
 576     end   = cr->dd->nat_home;
 577   } else if (PARTDECOMP(cr)) {
 578     pd_at_range(cr,&start,&end);
 579   } else {
 580     start = 0;
 581     end   = n;
 582   }
 583
 584   for(i=start; i<end; i++) {
 585     for(m=0; m<DIM; m++) {
 586       x2[i][m] = x1[i][m] + a*f[i][m];
 587     }
 588   }
 589 }
 590
 591 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
 592
 593 {
 594   int  start,end,i,m;
 595
 596   if (DOMAINDECOMP(cr)) {
 597     start = 0;
 598     end   = cr->dd->nat_home;
 599   } else if (PARTDECOMP(cr)) {
 600     pd_at_range(cr,&start,&end);
 601   } else {
 602     start = 0;
 603     end   = n;
 604   }
 605
 606   for(i=start; i<end; i++) {
 607     for(m=0; m<DIM; m++) {
 608       f[i][m] = (x1[i][m] - x2[i][m])*a;
 609     }
 610   }
 611 }
 612
 613 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 614                                    gmx_mtop_t *top_global,t_inputrec *ir,
 615                                    em_state_t *ems,gmx_localtop_t *top,
 616                                    t_mdatoms *mdatoms,t_forcerec *fr,
 617                                    gmx_vsite_t *vsite,gmx_constr_t constr,
 618                                    t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 619 {
 620     /* Repartition the domain decomposition */
 621     wallcycle_start(wcycle,ewcDOMDEC);
 622     dd_partition_system(fplog,step,cr,FALSE,1,
 623                         NULL,top_global,ir,
 624                         &ems->s,&ems->f,
 625                         mdatoms,top,fr,vsite,NULL,constr,
 626                         nrnb,wcycle,FALSE);
 627     dd_store_state(cr->dd,&ems->s);
 628     wallcycle_stop(wcycle,ewcDOMDEC);
 629 }
 630
 631 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 632                             t_state *state_global,gmx_mtop_t *top_global,
 633                             em_state_t *ems,gmx_localtop_t *top,
 634                             t_inputrec *inputrec,
 635                             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 636                             gmx_global_stat_t gstat,
 637                             gmx_vsite_t *vsite,gmx_constr_t constr,
 638                             t_fcdata *fcd,
 639                             t_graph *graph,t_mdatoms *mdatoms,
 640                             t_forcerec *fr,rvec mu_tot,
 641                             gmx_enerdata_t *enerd,tensor vir,tensor pres,
 642                             gmx_large_int_t count,gmx_bool bFirst)
 643 {
 644   real t;
 645   gmx_bool bNS;
 646   int  nabnsb;
 647   tensor force_vir,shake_vir,ekin;
 648   real dvdl,prescorr,enercorr,dvdlcorr;
 649   real terminate=0;
 650
 651   /* Set the time to the initial time, the time does not change during EM */
 652   t = inputrec->init_t;
 653
 654   if (bFirst ||
 655       (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 656     /* This the first state or an old state used before the last ns */
 657     bNS = TRUE;
 658   } else {
 659     bNS = FALSE;
 660     if (inputrec->nstlist > 0) {
 661       bNS = TRUE;
 662     } else if (inputrec->nstlist == -1) {
 663       nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 664       if (PAR(cr))
 665         gmx_sumi(1,&nabnsb,cr);
 666       bNS = (nabnsb > 0);
 667     }
 668   }
 669
 670   if (vsite)
 671     construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 672                      top->idef.iparams,top->idef.il,
 673                      fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 674
 675   if (DOMAINDECOMP(cr)) {
 676     if (bNS) {
 677       /* Repartition the domain decomposition */
 678       em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 679                              ems,top,mdatoms,fr,vsite,constr,
 680                              nrnb,wcycle);
 681     }
 682   }
 683
 684     /* Calc force & energy on new trial position  */
 685     /* do_force always puts the charge groups in the box and shifts again
 686      * We do not unshift, so molecules are always whole in congrad.c
 687      */
 688     do_force(fplog,cr,inputrec,
 689              count,nrnb,wcycle,top,top_global,&top_global->groups,
 690              ems->s.box,ems->s.x,&ems->s.hist,
 691              ems->f,force_vir,mdatoms,enerd,fcd,
 692              ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 693              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 694              (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 695
 696   /* Clear the unused shake virial and pressure */
 697   clear_mat(shake_vir);
 698   clear_mat(pres);
 699
 700   /* Calculate long range corrections to pressure and energy */
 701   calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 702                 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 703   /* don't think these next 4 lines  can be moved in for now, because we
 704      don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
 705   enerd->term[F_DISPCORR] = enercorr;
 706   enerd->term[F_EPOT] += enercorr;
 707   enerd->term[F_PRES] += prescorr;
 708   enerd->term[F_DVDL] += dvdlcorr;
 709
 710     /* Communicate stuff when parallel */
 711     if (PAR(cr) && inputrec->eI != eiNM)
 712     {
 713         wallcycle_start(wcycle,ewcMoveE);
 714
 715         global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 716                     inputrec,NULL,NULL,NULL,1,&terminate,
 717                     top_global,&ems->s,FALSE,
 718                     CGLO_ENERGY |
 719                     CGLO_PRESSURE |
 720                     CGLO_CONSTRAINT |
 721                     CGLO_FIRSTITERATE);
 722
 723         wallcycle_stop(wcycle,ewcMoveE);
 724     }
 725
 726   ems->epot = enerd->term[F_EPOT];
 727
 728   if (constr) {
 729     /* Project out the constraint components of the force */
 730     wallcycle_start(wcycle,ewcCONSTR);
 731     dvdl = 0;
 732     constrain(NULL,FALSE,FALSE,constr,&top->idef,
 733               inputrec,NULL,cr,count,0,mdatoms,
 734               ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 735               NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 736     if (fr->bSepDVDL && fplog)
 737       fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 738     enerd->term[F_DHDL_CON] += dvdl;
 739     m_add(force_vir,shake_vir,vir);
 740     wallcycle_stop(wcycle,ewcCONSTR);
 741   } else {
 742     copy_mat(force_vir,vir);
 743   }
 744
 745   clear_mat(ekin);
 746   enerd->term[F_PRES] =
 747     calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 748               (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 749
 750   sum_dhdl(enerd,ems->s.lambda,inputrec);
 751
 752     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 753     {
 754         get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 755     }
 756 }
 757
 758 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 759                               gmx_mtop_t *mtop,
 760                               em_state_t *s_min,em_state_t *s_b)
 761 {
 762   rvec *fm,*fb,*fmg;
 763   t_block *cgs_gl;
 764   int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 765   double partsum;
 766   unsigned char *grpnrFREEZE;
 767
 768   if (debug)
 769     fprintf(debug,"Doing reorder_partsum\n");
 770
 771   fm = s_min->f;
 772   fb = s_b->f;
 773
 774   cgs_gl = dd_charge_groups_global(cr->dd);
 775   index = cgs_gl->index;
 776
 777   /* Collect fm in a global vector fmg.
 778    * This conflicts with the spirit of domain decomposition,
 779    * but to fully optimize this a much more complicated algorithm is required.
 780    */
 781   snew(fmg,mtop->natoms);
 782
 783   ncg   = s_min->s.ncg_gl;
 784   cg_gl = s_min->s.cg_gl;
 785   i = 0;
 786   for(c=0; c<ncg; c++) {
 787     cg = cg_gl[c];
 788     a0 = index[cg];
 789     a1 = index[cg+1];
 790     for(a=a0; a<a1; a++) {
 791       copy_rvec(fm[i],fmg[a]);
 792       i++;
 793     }
 794   }
 795   gmx_sum(mtop->natoms*3,fmg[0],cr);
 796
 797   /* Now we will determine the part of the sum for the cgs in state s_b */
 798   ncg   = s_b->s.ncg_gl;
 799   cg_gl = s_b->s.cg_gl;
 800   partsum = 0;
 801   i = 0;
 802   gf = 0;
 803   grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 804   for(c=0; c<ncg; c++) {
 805     cg = cg_gl[c];
 806     a0 = index[cg];
 807     a1 = index[cg+1];
 808     for(a=a0; a<a1; a++) {
 809       if (mdatoms->cFREEZE && grpnrFREEZE) {
 810         gf = grpnrFREEZE[i];
 811       }
 812       for(m=0; m<DIM; m++) {
 813         if (!opts->nFreeze[gf][m]) {
 814           partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 815         }
 816       }
 817       i++;
 818     }
 819   }
 820
 821   sfree(fmg);
 822
 823   return partsum;
 824 }
 825
 826 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 827                     gmx_mtop_t *mtop,
 828                     em_state_t *s_min,em_state_t *s_b)
 829 {
 830   rvec *fm,*fb;
 831   double sum;
 832   int  gf,i,m;
 833
 834   /* This is just the classical Polak-Ribiere calculation of beta;
 835    * it looks a bit complicated since we take freeze groups into account,
 836    * and might have to sum it in parallel runs.
 837    */
 838
 839   if (!DOMAINDECOMP(cr) ||
 840       (s_min->s.ddp_count == cr->dd->ddp_count &&
 841        s_b->s.ddp_count   == cr->dd->ddp_count)) {
 842     fm = s_min->f;
 843     fb = s_b->f;
 844     sum = 0;
 845     gf = 0;
 846     /* This part of code can be incorrect with DD,
 847      * since the atom ordering in s_b and s_min might differ.
 848      */
 849     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 850       if (mdatoms->cFREEZE)
 851         gf = mdatoms->cFREEZE[i];
 852       for(m=0; m<DIM; m++)
 853         if (!opts->nFreeze[gf][m]) {
 854           sum += (fb[i][m] - fm[i][m])*fb[i][m];
 855         }
 856     }
 857   } else {
 858     /* We need to reorder cgs while summing */
 859     sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 860   }
 861   if (PAR(cr))
 862     gmx_sumd(1,&sum,cr);
 863
 864   return sum/sqr(s_min->fnorm);
 865 }
 866
 867 double do_cg(FILE *fplog,t_commrec *cr,
 868              int nfile,const t_filenm fnm[],
 869              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 870              int nstglobalcomm,
 871              gmx_vsite_t *vsite,gmx_constr_t constr,
 872              int stepout,
 873              t_inputrec *inputrec,
 874              gmx_mtop_t *top_global,t_fcdata *fcd,
 875              t_state *state_global,
 876              t_mdatoms *mdatoms,
 877              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 878              gmx_edsam_t ed,
 879              t_forcerec *fr,
 880              int repl_ex_nst,int repl_ex_seed,
 881              real cpt_period,real max_hours,
 882              const char *deviceOptions,
 883              unsigned long Flags,
 884              gmx_runtime_t *runtime)
 885 {
 886   const char *CG="Polak-Ribiere Conjugate Gradients";
 887
 888   em_state_t *s_min,*s_a,*s_b,*s_c;
 889   gmx_localtop_t *top;
 890   gmx_enerdata_t *enerd;
 891   rvec   *f;
 892   gmx_global_stat_t gstat;
 893   t_graph    *graph;
 894   rvec   *f_global,*p,*sf,*sfm;
 895   double gpa,gpb,gpc,tmp,sum[2],minstep;
 896   real   fnormn;
 897   real   stepsize;
 898   real   a,b,c,beta=0.0;
 899   real   epot_repl=0;
 900   real   pnorm;
 901   t_mdebin   *mdebin;
 902   gmx_bool   converged,foundlower;
 903   rvec   mu_tot;
 904   gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 905   tensor vir,pres;
 906   int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 907   gmx_mdoutf_t *outf;
 908   int    i,m,gf,step,nminstep;
 909   real   terminate=0;
 910
 911   step=0;
 912
 913   s_min = init_em_state();
 914   s_a   = init_em_state();
 915   s_b   = init_em_state();
 916   s_c   = init_em_state();
 917
 918   /* Init em and store the local state in s_min */
 919   init_em(fplog,CG,cr,inputrec,
 920           state_global,top_global,s_min,&top,&f,&f_global,
 921           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 922           nfile,fnm,&outf,&mdebin);
 923
 924   /* Print to log file */
 925   print_em_start(fplog,cr,runtime,wcycle,CG);
 926
 927   /* Max number of steps */
 928   number_steps=inputrec->nsteps;
 929
 930   if (MASTER(cr))
 931     sp_header(stderr,CG,inputrec->em_tol,number_steps);
 932   if (fplog)
 933     sp_header(fplog,CG,inputrec->em_tol,number_steps);
 934
 935   /* Call the force routine and some auxiliary (neighboursearching etc.) */
 936   /* do_force always puts the charge groups in the box and shifts again
 937    * We do not unshift, so molecules are always whole in congrad.c
 938    */
 939   evaluate_energy(fplog,bVerbose,cr,
 940                   state_global,top_global,s_min,top,
 941                   inputrec,nrnb,wcycle,gstat,
 942                   vsite,constr,fcd,graph,mdatoms,fr,
 943                   mu_tot,enerd,vir,pres,-1,TRUE);
 944   where();
 945
 946   if (MASTER(cr)) {
 947     /* Copy stuff to the energy bin for easy printing etc. */
 948     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 949                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 950                NULL,NULL,vir,pres,NULL,mu_tot,constr);
 951
 952     print_ebin_header(fplog,step,step,s_min->s.lambda);
 953     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 954                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 955   }
 956   where();
 957
 958   /* Estimate/guess the initial stepsize */
 959   stepsize = inputrec->em_stepsize/s_min->fnorm;
 960
 961   if (MASTER(cr)) {
 962     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 963             s_min->fmax,s_min->a_fmax+1);
 964     fprintf(stderr,"   F-Norm            = %12.5e\n",
 965             s_min->fnorm/sqrt(state_global->natoms));
 966     fprintf(stderr,"\n");
 967     /* and copy to the log file too... */
 968     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 969             s_min->fmax,s_min->a_fmax+1);
 970     fprintf(fplog,"   F-Norm            = %12.5e\n",
 971             s_min->fnorm/sqrt(state_global->natoms));
 972     fprintf(fplog,"\n");
 973   }
 974   /* Start the loop over CG steps.
 975    * Each successful step is counted, and we continue until
 976    * we either converge or reach the max number of steps.
 977    */
 978   converged = FALSE;
 979   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 980
 981     /* start taking steps in a new direction
 982      * First time we enter the routine, beta=0, and the direction is
 983      * simply the negative gradient.
 984      */
 985
 986     /* Calculate the new direction in p, and the gradient in this direction, gpa */
 987     p  = s_min->s.cg_p;
 988     sf = s_min->f;
 989     gpa = 0;
 990     gf = 0;
 991     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 992       if (mdatoms->cFREEZE)
 993         gf = mdatoms->cFREEZE[i];
 994       for(m=0; m<DIM; m++) {
 995         if (!inputrec->opts.nFreeze[gf][m]) {
 996           p[i][m] = sf[i][m] + beta*p[i][m];
 997           gpa -= p[i][m]*sf[i][m];
 998           /* f is negative gradient, thus the sign */
 999         } else {
1000           p[i][m] = 0;
1001         }
1002       }
1003     }
1004
1005     /* Sum the gradient along the line across CPUs */
1006     if (PAR(cr))
1007       gmx_sumd(1,&gpa,cr);
1008
1009     /* Calculate the norm of the search vector */
1010     get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1011
1012     /* Just in case stepsize reaches zero due to numerical precision... */
1013     if(stepsize<=0)
1014       stepsize = inputrec->em_stepsize/pnorm;
1015
1016     /*
1017      * Double check the value of the derivative in the search direction.
1018      * If it is positive it must be due to the old information in the
1019      * CG formula, so just remove that and start over with beta=0.
1020      * This corresponds to a steepest descent step.
1021      */
1022     if(gpa>0) {
1023       beta = 0;
1024       step--; /* Don't count this step since we are restarting */
1025       continue; /* Go back to the beginning of the big for-loop */
1026     }
1027
1028     /* Calculate minimum allowed stepsize, before the average (norm)
1029      * relative change in coordinate is smaller than precision
1030      */
1031     minstep=0;
1032     for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1033       for(m=0; m<DIM; m++) {
1034         tmp = fabs(s_min->s.x[i][m]);
1035         if(tmp < 1.0)
1036           tmp = 1.0;
1037         tmp = p[i][m]/tmp;
1038         minstep += tmp*tmp;
1039       }
1040     }
1041     /* Add up from all CPUs */
1042     if(PAR(cr))
1043       gmx_sumd(1,&minstep,cr);
1044
1045     minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1046
1047     if(stepsize<minstep) {
1048       converged=TRUE;
1049       break;
1050     }
1051
1052     /* Write coordinates if necessary */
1053     do_x = do_per_step(step,inputrec->nstxout);
1054     do_f = do_per_step(step,inputrec->nstfout);
1055
1056     write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1057                   top_global,inputrec,step,
1058                   s_min,state_global,f_global);
1059
1060     /* Take a step downhill.
1061      * In theory, we should minimize the function along this direction.
1062      * That is quite possible, but it turns out to take 5-10 function evaluations
1063      * for each line. However, we dont really need to find the exact minimum -
1064      * it is much better to start a new CG step in a modified direction as soon
1065      * as we are close to it. This will save a lot of energy evaluations.
1066      *
1067      * In practice, we just try to take a single step.
1068      * If it worked (i.e. lowered the energy), we increase the stepsize but
1069      * the continue straight to the next CG step without trying to find any minimum.
1070      * If it didn't work (higher energy), there must be a minimum somewhere between
1071      * the old position and the new one.
1072      *
1073      * Due to the finite numerical accuracy, it turns out that it is a good idea
1074      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1075      * This leads to lower final energies in the tests I've done. / Erik
1076      */
1077     s_a->epot = s_min->epot;
1078     a = 0.0;
1079     c = a + stepsize; /* reference position along line is zero */
1080
1081     if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1082       em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1083                              s_min,top,mdatoms,fr,vsite,constr,
1084                              nrnb,wcycle);
1085     }
1086
1087     /* Take a trial step (new coords in s_c) */
1088     do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1089                constr,top,nrnb,wcycle,-1);
1090
1091     neval++;
1092     /* Calculate energy for the trial step */
1093     evaluate_energy(fplog,bVerbose,cr,
1094                     state_global,top_global,s_c,top,
1095                     inputrec,nrnb,wcycle,gstat,
1096                     vsite,constr,fcd,graph,mdatoms,fr,
1097                     mu_tot,enerd,vir,pres,-1,FALSE);
1098
1099     /* Calc derivative along line */
1100     p  = s_c->s.cg_p;
1101     sf = s_c->f;
1102     gpc=0;
1103     for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1104       for(m=0; m<DIM; m++)
1105           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
1106     }
1107     /* Sum the gradient along the line across CPUs */
1108     if (PAR(cr))
1109       gmx_sumd(1,&gpc,cr);
1110
1111     /* This is the max amount of increase in energy we tolerate */
1112     tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1113
1114     /* Accept the step if the energy is lower, or if it is not significantly higher
1115      * and the line derivative is still negative.
1116      */
1117     if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1118       foundlower = TRUE;
1119       /* Great, we found a better energy. Increase step for next iteration
1120        * if we are still going down, decrease it otherwise
1121        */
1122       if(gpc<0)
1123         stepsize *= 1.618034;  /* The golden section */
1124       else
1125         stepsize *= 0.618034;  /* 1/golden section */
1126     } else {
1127       /* New energy is the same or higher. We will have to do some work
1128        * to find a smaller value in the interval. Take smaller step next time!
1129        */
1130       foundlower = FALSE;
1131       stepsize *= 0.618034;
1132     }
1133
1134
1135
1136
1137     /* OK, if we didn't find a lower value we will have to locate one now - there must
1138      * be one in the interval [a=0,c].
1139      * The same thing is valid here, though: Don't spend dozens of iterations to find
1140      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1141      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1142      *
1143      * I also have a safeguard for potentially really patological functions so we never
1144      * take more than 20 steps before we give up ...
1145      *
1146      * If we already found a lower value we just skip this step and continue to the update.
1147      */
1148     if (!foundlower) {
1149       nminstep=0;
1150
1151       do {
1152         /* Select a new trial point.
1153          * If the derivatives at points a & c have different sign we interpolate to zero,
1154          * otherwise just do a bisection.
1155          */
1156         if(gpa<0 && gpc>0)
1157           b = a + gpa*(a-c)/(gpc-gpa);
1158         else
1159           b = 0.5*(a+c);
1160
1161         /* safeguard if interpolation close to machine accuracy causes errors:
1162          * never go outside the interval
1163          */
1164         if(b<=a || b>=c)
1165           b = 0.5*(a+c);
1166
1167         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1168           /* Reload the old state */
1169           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1170                                  s_min,top,mdatoms,fr,vsite,constr,
1171                                  nrnb,wcycle);
1172         }
1173
1174         /* Take a trial step to this new point - new coords in s_b */
1175         do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1176                    constr,top,nrnb,wcycle,-1);
1177
1178         neval++;
1179         /* Calculate energy for the trial step */
1180         evaluate_energy(fplog,bVerbose,cr,
1181                         state_global,top_global,s_b,top,
1182                         inputrec,nrnb,wcycle,gstat,
1183                         vsite,constr,fcd,graph,mdatoms,fr,
1184                         mu_tot,enerd,vir,pres,-1,FALSE);
1185
1186         /* p does not change within a step, but since the domain decomposition
1187          * might change, we have to use cg_p of s_b here.
1188          */
1189         p  = s_b->s.cg_p;
1190         sf = s_b->f;
1191         gpb=0;
1192         for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1193           for(m=0; m<DIM; m++)
1194               gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
1195         }
1196         /* Sum the gradient along the line across CPUs */
1197         if (PAR(cr))
1198           gmx_sumd(1,&gpb,cr);
1199
1200         if (debug)
1201           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1202                   s_a->epot,s_b->epot,s_c->epot,gpb);
1203
1204         epot_repl = s_b->epot;
1205
1206         /* Keep one of the intervals based on the value of the derivative at the new point */
1207         if (gpb > 0) {
1208           /* Replace c endpoint with b */
1209           swap_em_state(s_b,s_c);
1210           c = b;
1211           gpc = gpb;
1212         } else {
1213           /* Replace a endpoint with b */
1214           swap_em_state(s_b,s_a);
1215           a = b;
1216           gpa = gpb;
1217         }
1218
1219         /*
1220          * Stop search as soon as we find a value smaller than the endpoints.
1221          * Never run more than 20 steps, no matter what.
1222          */
1223         nminstep++;
1224       } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1225                (nminstep < 20));
1226
1227       if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1228           nminstep >= 20) {
1229         /* OK. We couldn't find a significantly lower energy.
1230          * If beta==0 this was steepest descent, and then we give up.
1231          * If not, set beta=0 and restart with steepest descent before quitting.
1232          */
1233         if (beta == 0.0) {
1234           /* Converged */
1235           converged = TRUE;
1236           break;
1237         } else {
1238           /* Reset memory before giving up */
1239           beta = 0.0;
1240           continue;
1241         }
1242       }
1243
1244       /* Select min energy state of A & C, put the best in B.
1245        */
1246       if (s_c->epot < s_a->epot) {
1247         if (debug)
1248           fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1249                   s_c->epot,s_a->epot);
1250         swap_em_state(s_b,s_c);
1251         gpb = gpc;
1252         b = c;
1253       } else {
1254         if (debug)
1255           fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1256                   s_a->epot,s_c->epot);
1257         swap_em_state(s_b,s_a);
1258         gpb = gpa;
1259         b = a;
1260       }
1261
1262     } else {
1263       if (debug)
1264         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1265                 s_c->epot);
1266       swap_em_state(s_b,s_c);
1267       gpb = gpc;
1268       b = c;
1269     }
1270
1271     /* new search direction */
1272     /* beta = 0 means forget all memory and restart with steepest descents. */
1273     if (nstcg && ((step % nstcg)==0))
1274       beta = 0.0;
1275     else {
1276       /* s_min->fnorm cannot be zero, because then we would have converged
1277        * and broken out.
1278        */
1279
1280       /* Polak-Ribiere update.
1281        * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1282        */
1283       beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1284     }
1285     /* Limit beta to prevent oscillations */
1286     if (fabs(beta) > 5.0)
1287       beta = 0.0;
1288
1289
1290     /* update positions */
1291     swap_em_state(s_min,s_b);
1292     gpa = gpb;
1293
1294     /* Print it if necessary */
1295     if (MASTER(cr)) {
1296       if(bVerbose)
1297         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1298                 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1299                 s_min->fmax,s_min->a_fmax+1);
1300       /* Store the new (lower) energies */
1301       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1302                  mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1303                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1304       do_log = do_per_step(step,inputrec->nstlog);
1305       do_ene = do_per_step(step,inputrec->nstenergy);
1306       if(do_log)
1307         print_ebin_header(fplog,step,step,s_min->s.lambda);
1308       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1309                  do_log ? fplog : NULL,step,step,eprNORMAL,
1310                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1311     }
1312
1313     /* Stop when the maximum force lies below tolerance.
1314      * If we have reached machine precision, converged is already set to true.
1315      */
1316     converged = converged || (s_min->fmax < inputrec->em_tol);
1317
1318   } /* End of the loop */
1319
1320   if (converged)
1321     step--; /* we never took that last step in this case */
1322
1323     if (s_min->fmax > inputrec->em_tol)
1324     {
1325         if (MASTER(cr))
1326         {
1327             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1328             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1329         }
1330         converged = FALSE;
1331     }
1332
1333   if (MASTER(cr)) {
1334     /* If we printed energy and/or logfile last step (which was the last step)
1335      * we don't have to do it again, but otherwise print the final values.
1336      */
1337     if(!do_log) {
1338       /* Write final value to log since we didn't do anything the last step */
1339       print_ebin_header(fplog,step,step,s_min->s.lambda);
1340     }
1341     if (!do_ene || !do_log) {
1342       /* Write final energy file entries */
1343       print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1344                  !do_log ? fplog : NULL,step,step,eprNORMAL,
1345                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1346     }
1347   }
1348
1349   /* Print some stuff... */
1350   if (MASTER(cr))
1351     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1352
1353   /* IMPORTANT!
1354    * For accurate normal mode calculation it is imperative that we
1355    * store the last conformation into the full precision binary trajectory.
1356    *
1357    * However, we should only do it if we did NOT already write this step
1358    * above (which we did if do_x or do_f was true).
1359    */
1360   do_x = !do_per_step(step,inputrec->nstxout);
1361   do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1362
1363   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1364                 top_global,inputrec,step,
1365                 s_min,state_global,f_global);
1366
1367   fnormn = s_min->fnorm/sqrt(state_global->natoms);
1368
1369   if (MASTER(cr)) {
1370     print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1371                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1372     print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1373                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1374
1375     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1376   }
1377
1378   finish_em(fplog,cr,outf,runtime,wcycle);
1379
1380   /* To print the actual number of steps we needed somewhere */
1381   runtime->nsteps_done = step;
1382
1383   return 0;
1384 } /* That's all folks */
1385
1386
1387 double do_lbfgs(FILE *fplog,t_commrec *cr,
1388                 int nfile,const t_filenm fnm[],
1389                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1390                 int nstglobalcomm,
1391                 gmx_vsite_t *vsite,gmx_constr_t constr,
1392                 int stepout,
1393                 t_inputrec *inputrec,
1394                 gmx_mtop_t *top_global,t_fcdata *fcd,
1395                 t_state *state,
1396                 t_mdatoms *mdatoms,
1397                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1398                 gmx_edsam_t ed,
1399                 t_forcerec *fr,
1400                 int repl_ex_nst,int repl_ex_seed,
1401                 real cpt_period,real max_hours,
1402                 const char *deviceOptions,
1403                 unsigned long Flags,
1404                 gmx_runtime_t *runtime)
1405 {
1406   static const char *LBFGS="Low-Memory BFGS Minimizer";
1407   em_state_t ems;
1408   gmx_localtop_t *top;
1409   gmx_enerdata_t *enerd;
1410   rvec   *f;
1411   gmx_global_stat_t gstat;
1412   t_graph    *graph;
1413   rvec   *f_global;
1414   int    ncorr,nmaxcorr,point,cp,neval,nminstep;
1415   double stepsize,gpa,gpb,gpc,tmp,minstep;
1416   real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1417   real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1418   real   a,b,c,maxdelta,delta;
1419   real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
1420   real   dgdx,dgdg,sq,yr,beta;
1421   t_mdebin   *mdebin;
1422   gmx_bool   converged,first;
1423   rvec   mu_tot;
1424   real   fnorm,fmax;
1425   gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
1426   tensor vir,pres;
1427   int    start,end,number_steps;
1428   gmx_mdoutf_t *outf;
1429   int    i,k,m,n,nfmax,gf,step;
1430   /* not used */
1431   real   terminate;
1432
1433   if (PAR(cr))
1434     gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1435
1436   n = 3*state->natoms;
1437   nmaxcorr = inputrec->nbfgscorr;
1438
1439   /* Allocate memory */
1440   /* Use pointers to real so we dont have to loop over both atoms and
1441    * dimensions all the time...
1442    * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1443    * that point to the same memory.
1444    */
1445   snew(xa,n);
1446   snew(xb,n);
1447   snew(xc,n);
1448   snew(fa,n);
1449   snew(fb,n);
1450   snew(fc,n);
1451   snew(frozen,n);
1452
1453   snew(p,n);
1454   snew(lastx,n);
1455   snew(lastf,n);
1456   snew(rho,nmaxcorr);
1457   snew(alpha,nmaxcorr);
1458
1459   snew(dx,nmaxcorr);
1460   for(i=0;i<nmaxcorr;i++)
1461     snew(dx[i],n);
1462
1463   snew(dg,nmaxcorr);
1464   for(i=0;i<nmaxcorr;i++)
1465     snew(dg[i],n);
1466
1467   step = 0;
1468   neval = 0;
1469
1470   /* Init em */
1471   init_em(fplog,LBFGS,cr,inputrec,
1472           state,top_global,&ems,&top,&f,&f_global,
1473           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1474           nfile,fnm,&outf,&mdebin);
1475   /* Do_lbfgs is not completely updated like do_steep and do_cg,
1476    * so we free some memory again.
1477    */
1478   sfree(ems.s.x);
1479   sfree(ems.f);
1480
1481   xx = (real *)state->x;
1482   ff = (real *)f;
1483
1484   start = mdatoms->start;
1485   end   = mdatoms->homenr + start;
1486
1487   /* Print to log file */
1488   print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1489
1490   do_log = do_ene = do_x = do_f = TRUE;
1491
1492   /* Max number of steps */
1493   number_steps=inputrec->nsteps;
1494
1495   /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1496   gf = 0;
1497   for(i=start; i<end; i++) {
1498     if (mdatoms->cFREEZE)
1499       gf = mdatoms->cFREEZE[i];
1500      for(m=0; m<DIM; m++)
1501        frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1502   }
1503   if (MASTER(cr))
1504     sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1505   if (fplog)
1506     sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1507
1508   if (vsite)
1509     construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1510                      top->idef.iparams,top->idef.il,
1511                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1512
1513   /* Call the force routine and some auxiliary (neighboursearching etc.) */
1514   /* do_force always puts the charge groups in the box and shifts again
1515    * We do not unshift, so molecules are always whole
1516    */
1517   neval++;
1518   ems.s.x = state->x;
1519   ems.f = f;
1520   evaluate_energy(fplog,bVerbose,cr,
1521                   state,top_global,&ems,top,
1522                   inputrec,nrnb,wcycle,gstat,
1523                   vsite,constr,fcd,graph,mdatoms,fr,
1524                   mu_tot,enerd,vir,pres,-1,TRUE);
1525   where();
1526
1527   if (MASTER(cr)) {
1528     /* Copy stuff to the energy bin for easy printing etc. */
1529     upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1530                mdatoms->tmass,enerd,state,state->box,
1531                NULL,NULL,vir,pres,NULL,mu_tot,constr);
1532
1533     print_ebin_header(fplog,step,step,state->lambda);
1534     print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1535                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1536   }
1537   where();
1538
1539   /* This is the starting energy */
1540   Epot = enerd->term[F_EPOT];
1541
1542   fnorm = ems.fnorm;
1543   fmax  = ems.fmax;
1544   nfmax = ems.a_fmax;
1545
1546   /* Set the initial step.
1547    * since it will be multiplied by the non-normalized search direction
1548    * vector (force vector the first time), we scale it by the
1549    * norm of the force.
1550    */
1551
1552   if (MASTER(cr)) {
1553     fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1554     fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1555     fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1556     fprintf(stderr,"\n");
1557     /* and copy to the log file too... */
1558     fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1559     fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
1560     fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
1561     fprintf(fplog,"\n");
1562   }
1563
1564   point=0;
1565   for(i=0;i<n;i++)
1566     if(!frozen[i])
1567       dx[point][i] = ff[i];  /* Initial search direction */
1568     else
1569       dx[point][i] = 0;
1570
1571   stepsize = 1.0/fnorm;
1572   converged = FALSE;
1573
1574   /* Start the loop over BFGS steps.
1575    * Each successful step is counted, and we continue until
1576    * we either converge or reach the max number of steps.
1577    */
1578
1579   ncorr=0;
1580
1581   /* Set the gradient from the force */
1582   converged = FALSE;
1583   for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1584
1585     /* Write coordinates if necessary */
1586     do_x = do_per_step(step,inputrec->nstxout);
1587     do_f = do_per_step(step,inputrec->nstfout);
1588
1589     write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1590                top_global,step,(real)step,state,state,f,f,NULL,NULL);
1591
1592     /* Do the linesearching in the direction dx[point][0..(n-1)] */
1593
1594     /* pointer to current direction - point=0 first time here */
1595     s=dx[point];
1596
1597     /* calculate line gradient */
1598     for(gpa=0,i=0;i<n;i++)
1599         gpa-=s[i]*ff[i];
1600
1601     /* Calculate minimum allowed stepsize, before the average (norm)
1602      * relative change in coordinate is smaller than precision
1603      */
1604     for(minstep=0,i=0;i<n;i++) {
1605       tmp=fabs(xx[i]);
1606       if(tmp<1.0)
1607         tmp=1.0;
1608       tmp = s[i]/tmp;
1609       minstep += tmp*tmp;
1610     }
1611     minstep = GMX_REAL_EPS/sqrt(minstep/n);
1612
1613     if(stepsize<minstep) {
1614       converged=TRUE;
1615       break;
1616     }
1617
1618     /* Store old forces and coordinates */
1619     for(i=0;i<n;i++) {
1620       lastx[i]=xx[i];
1621       lastf[i]=ff[i];
1622     }
1623     Epot0=Epot;
1624
1625     first=TRUE;
1626
1627     for(i=0;i<n;i++)
1628       xa[i]=xx[i];
1629
1630     /* Take a step downhill.
1631      * In theory, we should minimize the function along this direction.
1632      * That is quite possible, but it turns out to take 5-10 function evaluations
1633      * for each line. However, we dont really need to find the exact minimum -
1634      * it is much better to start a new BFGS step in a modified direction as soon
1635      * as we are close to it. This will save a lot of energy evaluations.
1636      *
1637      * In practice, we just try to take a single step.
1638      * If it worked (i.e. lowered the energy), we increase the stepsize but
1639      * the continue straight to the next BFGS step without trying to find any minimum.
1640      * If it didn't work (higher energy), there must be a minimum somewhere between
1641      * the old position and the new one.
1642      *
1643      * Due to the finite numerical accuracy, it turns out that it is a good idea
1644      * to even accept a SMALL increase in energy, if the derivative is still downhill.
1645      * This leads to lower final energies in the tests I've done. / Erik
1646      */
1647     foundlower=FALSE;
1648     EpotA = Epot0;
1649     a = 0.0;
1650     c = a + stepsize; /* reference position along line is zero */
1651
1652     /* Check stepsize first. We do not allow displacements
1653      * larger than emstep.
1654      */
1655     do {
1656       c = a + stepsize;
1657       maxdelta=0;
1658       for(i=0;i<n;i++) {
1659         delta=c*s[i];
1660         if(delta>maxdelta)
1661           maxdelta=delta;
1662       }
1663       if(maxdelta>inputrec->em_stepsize)
1664         stepsize*=0.1;
1665     } while(maxdelta>inputrec->em_stepsize);
1666
1667     /* Take a trial step */
1668     for (i=0; i<n; i++)
1669       xc[i] = lastx[i] + c*s[i];
1670
1671     neval++;
1672     /* Calculate energy for the trial step */
1673     ems.s.x = (rvec *)xc;
1674     ems.f   = (rvec *)fc;
1675     evaluate_energy(fplog,bVerbose,cr,
1676                     state,top_global,&ems,top,
1677                     inputrec,nrnb,wcycle,gstat,
1678                     vsite,constr,fcd,graph,mdatoms,fr,
1679                     mu_tot,enerd,vir,pres,step,FALSE);
1680     EpotC = ems.epot;
1681
1682     /* Calc derivative along line */
1683     for(gpc=0,i=0; i<n; i++) {
1684         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
1685     }
1686     /* Sum the gradient along the line across CPUs */
1687     if (PAR(cr))
1688       gmx_sumd(1,&gpc,cr);
1689
1690      /* This is the max amount of increase in energy we tolerate */
1691    tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1692
1693     /* Accept the step if the energy is lower, or if it is not significantly higher
1694      * and the line derivative is still negative.
1695      */
1696     if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1697       foundlower = TRUE;
1698       /* Great, we found a better energy. Increase step for next iteration
1699        * if we are still going down, decrease it otherwise
1700        */
1701       if(gpc<0)
1702         stepsize *= 1.618034;  /* The golden section */
1703       else
1704         stepsize *= 0.618034;  /* 1/golden section */
1705     } else {
1706       /* New energy is the same or higher. We will have to do some work
1707        * to find a smaller value in the interval. Take smaller step next time!
1708        */
1709       foundlower = FALSE;
1710       stepsize *= 0.618034;
1711     }
1712
1713     /* OK, if we didn't find a lower value we will have to locate one now - there must
1714      * be one in the interval [a=0,c].
1715      * The same thing is valid here, though: Don't spend dozens of iterations to find
1716      * the line minimum. We try to interpolate based on the derivative at the endpoints,
1717      * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1718      *
1719      * I also have a safeguard for potentially really patological functions so we never
1720      * take more than 20 steps before we give up ...
1721      *
1722      * If we already found a lower value we just skip this step and continue to the update.
1723      */
1724
1725     if(!foundlower) {
1726
1727       nminstep=0;
1728       do {
1729         /* Select a new trial point.
1730          * If the derivatives at points a & c have different sign we interpolate to zero,
1731          * otherwise just do a bisection.
1732          */
1733
1734         if(gpa<0 && gpc>0)
1735           b = a + gpa*(a-c)/(gpc-gpa);
1736         else
1737           b = 0.5*(a+c);
1738
1739         /* safeguard if interpolation close to machine accuracy causes errors:
1740          * never go outside the interval
1741          */
1742         if(b<=a || b>=c)
1743           b = 0.5*(a+c);
1744
1745         /* Take a trial step */
1746         for (i=0; i<n; i++)
1747           xb[i] = lastx[i] + b*s[i];
1748
1749         neval++;
1750         /* Calculate energy for the trial step */
1751         ems.s.x = (rvec *)xb;
1752         ems.f   = (rvec *)fb;
1753         evaluate_energy(fplog,bVerbose,cr,
1754                         state,top_global,&ems,top,
1755                         inputrec,nrnb,wcycle,gstat,
1756                         vsite,constr,fcd,graph,mdatoms,fr,
1757                         mu_tot,enerd,vir,pres,step,FALSE);
1758         EpotB = ems.epot;
1759
1760         fnorm = ems.fnorm;
1761
1762         for(gpb=0,i=0; i<n; i++)
1763           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
1764
1765         /* Sum the gradient along the line across CPUs */
1766         if (PAR(cr))
1767           gmx_sumd(1,&gpb,cr);
1768
1769         /* Keep one of the intervals based on the value of the derivative at the new point */
1770         if(gpb>0) {
1771           /* Replace c endpoint with b */
1772           EpotC = EpotB;
1773           c = b;
1774           gpc = gpb;
1775           /* swap coord pointers b/c */
1776           xtmp = xb;
1777           ftmp = fb;
1778           xb = xc;
1779           fb = fc;
1780           xc = xtmp;
1781           fc = ftmp;
1782         } else {
1783           /* Replace a endpoint with b */
1784           EpotA = EpotB;
1785           a = b;
1786           gpa = gpb;
1787           /* swap coord pointers a/b */
1788           xtmp = xb;
1789           ftmp = fb;
1790           xb = xa;
1791           fb = fa;
1792           xa = xtmp;
1793           fa = ftmp;
1794         }
1795
1796         /*
1797          * Stop search as soon as we find a value smaller than the endpoints,
1798          * or if the tolerance is below machine precision.
1799          * Never run more than 20 steps, no matter what.
1800          */
1801         nminstep++;
1802       } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1803
1804       if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1805         /* OK. We couldn't find a significantly lower energy.
1806          * If ncorr==0 this was steepest descent, and then we give up.
1807          * If not, reset memory to restart as steepest descent before quitting.
1808          */
1809         if(ncorr==0) {
1810         /* Converged */
1811           converged=TRUE;
1812           break;
1813         } else {
1814           /* Reset memory */
1815           ncorr=0;
1816           /* Search in gradient direction */
1817           for(i=0;i<n;i++)
1818             dx[point][i]=ff[i];
1819           /* Reset stepsize */
1820           stepsize = 1.0/fnorm;
1821           continue;
1822         }
1823       }
1824
1825       /* Select min energy state of A & C, put the best in xx/ff/Epot
1826        */
1827       if(EpotC<EpotA) {
1828         Epot = EpotC;
1829         /* Use state C */
1830         for(i=0;i<n;i++) {
1831           xx[i]=xc[i];
1832           ff[i]=fc[i];
1833         }
1834         stepsize=c;
1835       } else {
1836         Epot = EpotA;
1837         /* Use state A */
1838         for(i=0;i<n;i++) {
1839           xx[i]=xa[i];
1840           ff[i]=fa[i];
1841         }
1842         stepsize=a;
1843       }
1844
1845     } else {
1846       /* found lower */
1847       Epot = EpotC;
1848       /* Use state C */
1849       for(i=0;i<n;i++) {
1850         xx[i]=xc[i];
1851         ff[i]=fc[i];
1852       }
1853       stepsize=c;
1854     }
1855
1856     /* Update the memory information, and calculate a new
1857      * approximation of the inverse hessian
1858      */
1859
1860     /* Have new data in Epot, xx, ff */
1861     if(ncorr<nmaxcorr)
1862       ncorr++;
1863
1864     for(i=0;i<n;i++) {
1865       dg[point][i]=lastf[i]-ff[i];
1866       dx[point][i]*=stepsize;
1867     }
1868
1869     dgdg=0;
1870     dgdx=0;
1871     for(i=0;i<n;i++) {
1872       dgdg+=dg[point][i]*dg[point][i];
1873       dgdx+=dg[point][i]*dx[point][i];
1874     }
1875
1876     diag=dgdx/dgdg;
1877
1878     rho[point]=1.0/dgdx;
1879     point++;
1880
1881     if(point>=nmaxcorr)
1882       point=0;
1883
1884     /* Update */
1885     for(i=0;i<n;i++)
1886       p[i]=ff[i];
1887
1888     cp=point;
1889
1890     /* Recursive update. First go back over the memory points */
1891     for(k=0;k<ncorr;k++) {
1892       cp--;
1893       if(cp<0)
1894         cp=ncorr-1;
1895
1896       sq=0;
1897       for(i=0;i<n;i++)
1898         sq+=dx[cp][i]*p[i];
1899
1900       alpha[cp]=rho[cp]*sq;
1901
1902       for(i=0;i<n;i++)
1903         p[i] -= alpha[cp]*dg[cp][i];
1904     }
1905
1906     for(i=0;i<n;i++)
1907       p[i] *= diag;
1908
1909     /* And then go forward again */
1910     for(k=0;k<ncorr;k++) {
1911       yr = 0;
1912       for(i=0;i<n;i++)
1913         yr += p[i]*dg[cp][i];
1914
1915       beta = rho[cp]*yr;
1916       beta = alpha[cp]-beta;
1917
1918       for(i=0;i<n;i++)
1919         p[i] += beta*dx[cp][i];
1920
1921       cp++;
1922       if(cp>=ncorr)
1923         cp=0;
1924     }
1925
1926     for(i=0;i<n;i++)
1927       if(!frozen[i])
1928         dx[point][i] = p[i];
1929       else
1930         dx[point][i] = 0;
1931
1932     stepsize=1.0;
1933
1934     /* Test whether the convergence criterion is met */
1935     get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1936
1937     /* Print it if necessary */
1938     if (MASTER(cr)) {
1939       if(bVerbose)
1940         fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1941                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1942       /* Store the new (lower) energies */
1943       upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1944                  mdatoms->tmass,enerd,state,state->box,
1945                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
1946       do_log = do_per_step(step,inputrec->nstlog);
1947       do_ene = do_per_step(step,inputrec->nstenergy);
1948       if(do_log)
1949         print_ebin_header(fplog,step,step,state->lambda);
1950       print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1951                  do_log ? fplog : NULL,step,step,eprNORMAL,
1952                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1953     }
1954
1955     /* Stop when the maximum force lies below tolerance.
1956      * If we have reached machine precision, converged is already set to true.
1957      */
1958
1959     converged = converged || (fmax < inputrec->em_tol);
1960
1961   } /* End of the loop */
1962
1963   if(converged)
1964     step--; /* we never took that last step in this case */
1965
1966     if(fmax>inputrec->em_tol)
1967     {
1968         if (MASTER(cr))
1969         {
1970             warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1971             warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1972         }
1973         converged = FALSE;
1974     }
1975
1976   /* If we printed energy and/or logfile last step (which was the last step)
1977    * we don't have to do it again, but otherwise print the final values.
1978    */
1979   if(!do_log) /* Write final value to log since we didn't do anythin last step */
1980     print_ebin_header(fplog,step,step,state->lambda);
1981   if(!do_ene || !do_log) /* Write final energy file entries */
1982     print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1983                !do_log ? fplog : NULL,step,step,eprNORMAL,
1984                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1985
1986   /* Print some stuff... */
1987   if (MASTER(cr))
1988     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1989
1990   /* IMPORTANT!
1991    * For accurate normal mode calculation it is imperative that we
1992    * store the last conformation into the full precision binary trajectory.
1993    *
1994    * However, we should only do it if we did NOT already write this step
1995    * above (which we did if do_x or do_f was true).
1996    */
1997   do_x = !do_per_step(step,inputrec->nstxout);
1998   do_f = !do_per_step(step,inputrec->nstfout);
1999   write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2000                 top_global,inputrec,step,
2001                 &ems,state,f);
2002
2003   if (MASTER(cr)) {
2004     print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2005                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2006     print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2007                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2008
2009     fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2010   }
2011
2012   finish_em(fplog,cr,outf,runtime,wcycle);
2013
2014   /* To print the actual number of steps we needed somewhere */
2015   runtime->nsteps_done = step;
2016
2017   return 0;
2018 } /* That's all folks */
2019
2020
2021 double do_steep(FILE *fplog,t_commrec *cr,
2022                 int nfile, const t_filenm fnm[],
2023                 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2024                 int nstglobalcomm,
2025                 gmx_vsite_t *vsite,gmx_constr_t constr,
2026                 int stepout,
2027                 t_inputrec *inputrec,
2028                 gmx_mtop_t *top_global,t_fcdata *fcd,
2029                 t_state *state_global,
2030                 t_mdatoms *mdatoms,
2031                 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2032                 gmx_edsam_t ed,
2033                 t_forcerec *fr,
2034                 int repl_ex_nst,int repl_ex_seed,
2035                 real cpt_period,real max_hours,
2036                 const char *deviceOptions,
2037                 unsigned long Flags,
2038                 gmx_runtime_t *runtime)
2039 {
2040   const char *SD="Steepest Descents";
2041   em_state_t *s_min,*s_try;
2042   rvec       *f_global;
2043   gmx_localtop_t *top;
2044   gmx_enerdata_t *enerd;
2045   rvec   *f;
2046   gmx_global_stat_t gstat;
2047   t_graph    *graph;
2048   real   stepsize,constepsize;
2049   real   ustep,dvdlambda,fnormn;
2050   gmx_mdoutf_t *outf;
2051   t_mdebin   *mdebin;
2052   gmx_bool   bDone,bAbort,do_x,do_f;
2053   tensor vir,pres;
2054   rvec   mu_tot;
2055   int    nsteps;
2056   int    count=0;
2057   int    steps_accepted=0;
2058   /* not used */
2059   real   terminate=0;
2060
2061   s_min = init_em_state();
2062   s_try = init_em_state();
2063
2064   /* Init em and store the local state in s_try */
2065   init_em(fplog,SD,cr,inputrec,
2066           state_global,top_global,s_try,&top,&f,&f_global,
2067           nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2068           nfile,fnm,&outf,&mdebin);
2069
2070   /* Print to log file  */
2071   print_em_start(fplog,cr,runtime,wcycle,SD);
2072
2073   /* Set variables for stepsize (in nm). This is the largest
2074    * step that we are going to make in any direction.
2075    */
2076   ustep = inputrec->em_stepsize;
2077   stepsize = 0;
2078
2079   /* Max number of steps  */
2080   nsteps = inputrec->nsteps;
2081
2082   if (MASTER(cr))
2083     /* Print to the screen  */
2084     sp_header(stderr,SD,inputrec->em_tol,nsteps);
2085   if (fplog)
2086     sp_header(fplog,SD,inputrec->em_tol,nsteps);
2087
2088   /**** HERE STARTS THE LOOP ****
2089    * count is the counter for the number of steps
2090    * bDone will be TRUE when the minimization has converged
2091    * bAbort will be TRUE when nsteps steps have been performed or when
2092    * the stepsize becomes smaller than is reasonable for machine precision
2093    */
2094   count  = 0;
2095   bDone  = FALSE;
2096   bAbort = FALSE;
2097   while( !bDone && !bAbort ) {
2098     bAbort = (nsteps >= 0) && (count == nsteps);
2099
2100     /* set new coordinates, except for first step */
2101     if (count > 0) {
2102       do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2103                  constr,top,nrnb,wcycle,count);
2104     }
2105
2106     evaluate_energy(fplog,bVerbose,cr,
2107                     state_global,top_global,s_try,top,
2108                     inputrec,nrnb,wcycle,gstat,
2109                     vsite,constr,fcd,graph,mdatoms,fr,
2110                     mu_tot,enerd,vir,pres,count,count==0);
2111
2112     if (MASTER(cr))
2113       print_ebin_header(fplog,count,count,s_try->s.lambda);
2114
2115     if (count == 0)
2116       s_min->epot = s_try->epot + 1;
2117
2118     /* Print it if necessary  */
2119     if (MASTER(cr)) {
2120       if (bVerbose) {
2121         fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2122                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2123                 (s_try->epot < s_min->epot) ? '\n' : '\r');
2124       }
2125
2126       if (s_try->epot < s_min->epot) {
2127         /* Store the new (lower) energies  */
2128         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2129                    mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2130                    NULL,NULL,vir,pres,NULL,mu_tot,constr);
2131         print_ebin(outf->fp_ene,TRUE,
2132                    do_per_step(steps_accepted,inputrec->nstdisreout),
2133                    do_per_step(steps_accepted,inputrec->nstorireout),
2134                    fplog,count,count,eprNORMAL,TRUE,
2135                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2136         fflush(fplog);
2137       }
2138     }
2139
2140     /* Now if the new energy is smaller than the previous...
2141      * or if this is the first step!
2142      * or if we did random steps!
2143      */
2144
2145     if ( (count==0) || (s_try->epot < s_min->epot) ) {
2146       steps_accepted++;
2147
2148       /* Test whether the convergence criterion is met...  */
2149       bDone = (s_try->fmax < inputrec->em_tol);
2150
2151       /* Copy the arrays for force, positions and energy  */
2152       /* The 'Min' array always holds the coords and forces of the minimal
2153          sampled energy  */
2154       swap_em_state(s_min,s_try);
2155       if (count > 0)
2156         ustep *= 1.2;
2157
2158       /* Write to trn, if necessary */
2159       do_x = do_per_step(steps_accepted,inputrec->nstxout);
2160       do_f = do_per_step(steps_accepted,inputrec->nstfout);
2161       write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2162                     top_global,inputrec,count,
2163                     s_min,state_global,f_global);
2164     }
2165     else {
2166       /* If energy is not smaller make the step smaller...  */
2167       ustep *= 0.5;
2168
2169       if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2170         /* Reload the old state */
2171         em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2172                                s_min,top,mdatoms,fr,vsite,constr,
2173                                nrnb,wcycle);
2174       }
2175     }
2176
2177     /* Determine new step  */
2178     stepsize = ustep/s_min->fmax;
2179
2180     /* Check if stepsize is too small, with 1 nm as a characteristic length */
2181 #ifdef GMX_DOUBLE
2182         if (count == nsteps || ustep < 1e-12)
2183 #else
2184         if (count == nsteps || ustep < 1e-6)
2185 #endif
2186         {
2187             if (MASTER(cr))
2188             {
2189                 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2190                 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2191             }
2192             bAbort=TRUE;
2193         }
2194
2195     count++;
2196   } /* End of the loop  */
2197
2198     /* Print some shit...  */
2199   if (MASTER(cr))
2200     fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2201   write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2202                 top_global,inputrec,count,
2203                 s_min,state_global,f_global);
2204
2205   fnormn = s_min->fnorm/sqrt(state_global->natoms);
2206
2207   if (MASTER(cr)) {
2208     print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2209                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2210     print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2211                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2212   }
2213
2214   finish_em(fplog,cr,outf,runtime,wcycle);
2215
2216   /* To print the actual number of steps we needed somewhere */
2217   inputrec->nsteps=count;
2218
2219   runtime->nsteps_done = count;
2220
2221   return 0;
2222 } /* That's all folks */
2223
2224
2225 double do_nm(FILE *fplog,t_commrec *cr,
2226              int nfile,const t_filenm fnm[],
2227              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2228              int nstglobalcomm,
2229              gmx_vsite_t *vsite,gmx_constr_t constr,
2230              int stepout,
2231              t_inputrec *inputrec,
2232              gmx_mtop_t *top_global,t_fcdata *fcd,
2233              t_state *state_global,
2234              t_mdatoms *mdatoms,
2235              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2236              gmx_edsam_t ed,
2237              t_forcerec *fr,
2238              int repl_ex_nst,int repl_ex_seed,
2239              real cpt_period,real max_hours,
2240              const char *deviceOptions,
2241              unsigned long Flags,
2242              gmx_runtime_t *runtime)
2243 {
2244     const char *NM = "Normal Mode Analysis";
2245     gmx_mdoutf_t *outf;
2246     int        natoms,atom,d;
2247     int        nnodes,node;
2248     rvec       *f_global;
2249     gmx_localtop_t *top;
2250     gmx_enerdata_t *enerd;
2251     rvec       *f;
2252     gmx_global_stat_t gstat;
2253     t_graph    *graph;
2254     real       t,lambda;
2255     gmx_bool       bNS;
2256     tensor     vir,pres;
2257     rvec       mu_tot;
2258     rvec       *fneg,*dfdx;
2259     gmx_bool       bSparse; /* use sparse matrix storage format */
2260     size_t     sz;
2261     gmx_sparsematrix_t * sparse_matrix = NULL;
2262     real *     full_matrix             = NULL;
2263     em_state_t *   state_work;
2264
2265     /* added with respect to mdrun */
2266     int        i,j,k,row,col;
2267     real       der_range=10.0*sqrt(GMX_REAL_EPS);
2268     real       x_min;
2269     real       fnorm,fmax;
2270
2271     if (constr != NULL)
2272     {
2273         gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2274     }
2275
2276     state_work = init_em_state();
2277
2278     /* Init em and store the local state in state_minimum */
2279     init_em(fplog,NM,cr,inputrec,
2280             state_global,top_global,state_work,&top,
2281             &f,&f_global,
2282             nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2283             nfile,fnm,&outf,NULL);
2284
2285     natoms = top_global->natoms;
2286     snew(fneg,natoms);
2287     snew(dfdx,natoms);
2288
2289 #ifndef GMX_DOUBLE
2290     if (MASTER(cr))
2291     {
2292         fprintf(stderr,
2293                 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2294                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2295                 "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
2296                 "      are fairly modest even if you recompile in double precision.\n\n");
2297     }
2298 #endif
2299
2300     /* Check if we can/should use sparse storage format.
2301      *
2302      * Sparse format is only useful when the Hessian itself is sparse, which it
2303       * will be when we use a cutoff.
2304       * For small systems (n<1000) it is easier to always use full matrix format, though.
2305       */
2306     if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2307     {
2308         fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2309         bSparse = FALSE;
2310     }
2311     else if(top_global->natoms < 1000)
2312     {
2313         fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2314         bSparse = FALSE;
2315     }
2316     else
2317     {
2318         fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2319         bSparse = TRUE;
2320     }
2321
2322     sz = DIM*top_global->natoms;
2323
2324     fprintf(stderr,"Allocating Hessian memory...\n\n");
2325
2326     if(bSparse)
2327     {
2328         sparse_matrix=gmx_sparsematrix_init(sz);
2329         sparse_matrix->compressed_symmetric = TRUE;
2330     }
2331     else
2332     {
2333         snew(full_matrix,sz*sz);
2334     }
2335
2336     /* Initial values */
2337     t      = inputrec->init_t;
2338     lambda = inputrec->init_lambda;
2339
2340     init_nrnb(nrnb);
2341
2342     where();
2343
2344     /* Write start time and temperature */
2345     print_em_start(fplog,cr,runtime,wcycle,NM);
2346
2347     /* fudge nr of steps to nr of atoms */
2348     inputrec->nsteps = natoms*2;
2349
2350     if (MASTER(cr))
2351     {
2352         fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2353                 *(top_global->name),(int)inputrec->nsteps);
2354     }
2355
2356     nnodes = cr->nnodes;
2357
2358     /* Make evaluate_energy do a single node force calculation */
2359     cr->nnodes = 1;
2360     evaluate_energy(fplog,bVerbose,cr,
2361                     state_global,top_global,state_work,top,
2362                     inputrec,nrnb,wcycle,gstat,
2363                     vsite,constr,fcd,graph,mdatoms,fr,
2364                     mu_tot,enerd,vir,pres,-1,TRUE);
2365     cr->nnodes = nnodes;
2366
2367     /* if forces are not small, warn user */
2368     get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2369
2370     if (MASTER(cr))
2371     {
2372         fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2373         if (state_work->fmax > 1.0e-3)
2374         {
2375             fprintf(stderr,"Maximum force probably not small enough to");
2376             fprintf(stderr," ensure that you are in an \nenergy well. ");
2377             fprintf(stderr,"Be aware that negative eigenvalues may occur");
2378             fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2379         }
2380     }
2381
2382     /***********************************************************
2383      *
2384      *      Loop over all pairs in matrix
2385      *
2386      *      do_force called twice. Once with positive and
2387      *      once with negative displacement
2388      *
2389      ************************************************************/
2390
2391     /* Steps are divided one by one over the nodes */
2392     for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2393     {
2394
2395         for (d=0; d<DIM; d++)
2396         {
2397             x_min = state_work->s.x[atom][d];
2398
2399             state_work->s.x[atom][d] = x_min - der_range;
2400
2401             /* Make evaluate_energy do a single node force calculation */
2402             cr->nnodes = 1;
2403             evaluate_energy(fplog,bVerbose,cr,
2404                             state_global,top_global,state_work,top,
2405                             inputrec,nrnb,wcycle,gstat,
2406                             vsite,constr,fcd,graph,mdatoms,fr,
2407                             mu_tot,enerd,vir,pres,atom*2,FALSE);
2408
2409             for(i=0; i<natoms; i++)
2410             {
2411                 copy_rvec(state_work->f[i], fneg[i]);
2412             }
2413
2414             state_work->s.x[atom][d] = x_min + der_range;
2415
2416             evaluate_energy(fplog,bVerbose,cr,
2417                             state_global,top_global,state_work,top,
2418                             inputrec,nrnb,wcycle,gstat,
2419                             vsite,constr,fcd,graph,mdatoms,fr,
2420                             mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2421             cr->nnodes = nnodes;
2422
2423             /* x is restored to original */
2424             state_work->s.x[atom][d] = x_min;
2425
2426             for(j=0; j<natoms; j++)
2427             {
2428                 for (k=0; (k<DIM); k++)
2429                 {
2430                     dfdx[j][k] =
2431                         -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2432                 }
2433             }
2434
2435             if (!MASTER(cr))
2436             {
2437 #ifdef GMX_MPI
2438 #ifdef GMX_DOUBLE
2439 #define mpi_type MPI_DOUBLE
2440 #else
2441 #define mpi_type MPI_FLOAT
2442 #endif
2443                 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2444                          cr->mpi_comm_mygroup);
2445 #endif
2446             }
2447             else
2448             {
2449                 for(node=0; (node<nnodes && atom+node<natoms); node++)
2450                 {
2451                     if (node > 0)
2452                     {
2453 #ifdef GMX_MPI
2454                         MPI_Status stat;
2455                         MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2456                                  cr->mpi_comm_mygroup,&stat);
2457 #undef mpi_type
2458 #endif
2459                     }
2460
2461                     row = (atom + node)*DIM + d;
2462
2463                     for(j=0; j<natoms; j++)
2464                     {
2465                         for(k=0; k<DIM; k++)
2466                         {
2467                             col = j*DIM + k;
2468
2469                             if (bSparse)
2470                             {
2471                                 if (col >= row && dfdx[j][k] != 0.0)
2472                                 {
2473                                     gmx_sparsematrix_increment_value(sparse_matrix,
2474                                                                      row,col,dfdx[j][k]);
2475                                 }
2476                             }
2477                             else
2478                             {
2479                                 full_matrix[row*sz+col] = dfdx[j][k];
2480                             }
2481                         }
2482                     }
2483                 }
2484             }
2485
2486             if (bVerbose && fplog)
2487             {
2488                 fflush(fplog);
2489             }
2490         }
2491         /* write progress */
2492         if (MASTER(cr) && bVerbose)
2493         {
2494             fprintf(stderr,"\rFinished step %d out of %d",
2495                     min(atom+nnodes,natoms),natoms);
2496             fflush(stderr);
2497         }
2498     }
2499
2500     if (MASTER(cr))
2501     {
2502         fprintf(stderr,"\n\nWriting Hessian...\n");
2503         gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2504     }
2505
2506     finish_em(fplog,cr,outf,runtime,wcycle);
2507
2508     runtime->nsteps_done = natoms*2;
2509
2510     return 0;
2511 }