removed (nan) average printing with normal modes
[gromacs.git] / src / mdlib / minimize.c
blob588544caf74446180f237dbc56dd5150a9cfae10
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROwing Monsters And Cloning Shrimps
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <string.h>
41 #include <time.h>
42 #include <math.h>
43 #include "sysstuff.h"
44 #include "string2.h"
45 #include "network.h"
46 #include "confio.h"
47 #include "copyrite.h"
48 #include "smalloc.h"
49 #include "nrnb.h"
50 #include "main.h"
51 #include "force.h"
52 #include "macros.h"
53 #include "random.h"
54 #include "names.h"
55 #include "gmx_fatal.h"
56 #include "txtdump.h"
57 #include "typedefs.h"
58 #include "update.h"
59 #include "constr.h"
60 #include "vec.h"
61 #include "statutil.h"
62 #include "tgroup.h"
63 #include "mdebin.h"
64 #include "vsite.h"
65 #include "force.h"
66 #include "mdrun.h"
67 #include "domdec.h"
68 #include "partdec.h"
69 #include "trnio.h"
70 #include "sparsematrix.h"
71 #include "mtxio.h"
72 #include "mdatoms.h"
73 #include "ns.h"
74 #include "gmx_wallcycle.h"
75 #include "mtop_util.h"
76 #include "gmxfio.h"
77 #include "pme.h"
79 typedef struct {
80 t_state s;
81 rvec *f;
82 real epot;
83 real fnorm;
84 real fmax;
85 int a_fmax;
86 } em_state_t;
88 static em_state_t *init_em_state()
90 em_state_t *ems;
92 snew(ems,1);
94 return ems;
97 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
98 gmx_wallcycle_t wcycle,
99 const char *name)
101 char buf[STRLEN];
103 runtime_start(runtime);
105 sprintf(buf,"Started %s",name);
106 print_date_and_time(fplog,cr->nodeid,buf,NULL);
108 wallcycle_start(wcycle,ewcRUN);
110 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
111 gmx_wallcycle_t wcycle)
113 wallcycle_stop(wcycle,ewcRUN);
115 runtime_end(runtime);
118 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
120 fprintf(out,"\n");
121 fprintf(out,"%s:\n",minimizer);
122 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
123 fprintf(out," Number of steps = %12d\n",nsteps);
126 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
128 if (bLastStep)
130 fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
132 else
134 fprintf(fp,"\nStepsize too small, or no change in energy.\n"
135 "Converged to machine precision,\n"
136 "but not to the requested precision Fmax < %g\n",
137 ftol);
138 if (sizeof(real)<sizeof(double))
140 fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
142 if (bConstrain)
144 fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
145 "off constraints alltogether (set constraints = none in mdp file)\n");
152 static void print_converged(FILE *fp,const char *alg,real ftol,
153 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
154 real epot,real fmax, int nfmax, real fnorm)
156 char buf[STEPSTRSIZE];
158 if (bDone)
159 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
160 alg,ftol,gmx_step_str(count,buf));
161 else if(count<nsteps)
162 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
163 "but did not reach the requested Fmax < %g.\n",
164 alg,gmx_step_str(count,buf),ftol);
165 else
166 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
167 alg,ftol,gmx_step_str(count,buf));
169 #ifdef GMX_DOUBLE
170 fprintf(fp,"Potential Energy = %21.14e\n",epot);
171 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
172 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
173 #else
174 fprintf(fp,"Potential Energy = %14.7e\n",epot);
175 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
176 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
177 #endif
180 static void get_f_norm_max(t_commrec *cr,
181 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
182 real *fnorm,real *fmax,int *a_fmax)
184 double fnorm2,*sum;
185 real fmax2,fmax2_0,fam;
186 int la_max,a_max,start,end,i,m,gf;
188 /* This routine finds the largest force and returns it.
189 * On parallel machines the global max is taken.
191 fnorm2 = 0;
192 fmax2 = 0;
193 la_max = -1;
194 gf = 0;
195 start = mdatoms->start;
196 end = mdatoms->homenr + start;
197 if (mdatoms->cFREEZE) {
198 for(i=start; i<end; i++) {
199 gf = mdatoms->cFREEZE[i];
200 fam = 0;
201 for(m=0; m<DIM; m++)
202 if (!opts->nFreeze[gf][m])
203 fam += sqr(f[i][m]);
204 fnorm2 += fam;
205 if (fam > fmax2) {
206 fmax2 = fam;
207 la_max = i;
210 } else {
211 for(i=start; i<end; i++) {
212 fam = norm2(f[i]);
213 fnorm2 += fam;
214 if (fam > fmax2) {
215 fmax2 = fam;
216 la_max = i;
221 if (la_max >= 0 && DOMAINDECOMP(cr)) {
222 a_max = cr->dd->gatindex[la_max];
223 } else {
224 a_max = la_max;
226 if (PAR(cr)) {
227 snew(sum,2*cr->nnodes+1);
228 sum[2*cr->nodeid] = fmax2;
229 sum[2*cr->nodeid+1] = a_max;
230 sum[2*cr->nnodes] = fnorm2;
231 gmx_sumd(2*cr->nnodes+1,sum,cr);
232 fnorm2 = sum[2*cr->nnodes];
233 /* Determine the global maximum */
234 for(i=0; i<cr->nnodes; i++) {
235 if (sum[2*i] > fmax2) {
236 fmax2 = sum[2*i];
237 a_max = (int)(sum[2*i+1] + 0.5);
240 sfree(sum);
243 if (fnorm)
244 *fnorm = sqrt(fnorm2);
245 if (fmax)
246 *fmax = sqrt(fmax2);
247 if (a_fmax)
248 *a_fmax = a_max;
251 static void get_state_f_norm_max(t_commrec *cr,
252 t_grpopts *opts,t_mdatoms *mdatoms,
253 em_state_t *ems)
255 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
258 void init_em(FILE *fplog,const char *title,
259 t_commrec *cr,t_inputrec *ir,
260 t_state *state_global,gmx_mtop_t *top_global,
261 em_state_t *ems,gmx_localtop_t **top,
262 rvec **f,rvec **f_global,
263 t_nrnb *nrnb,rvec mu_tot,
264 t_forcerec *fr,gmx_enerdata_t **enerd,
265 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
266 gmx_vsite_t *vsite,gmx_constr_t constr,
267 int nfile,const t_filenm fnm[],
268 gmx_mdoutf_t **outf,t_mdebin **mdebin)
270 int start,homenr,i;
271 real dvdlambda;
273 if (fplog)
275 fprintf(fplog,"Initiating %s\n",title);
278 state_global->ngtc = 0;
280 /* Initiate some variables */
281 if (ir->efep != efepNO)
283 state_global->lambda = ir->init_lambda;
285 else
287 state_global->lambda = 0.0;
290 init_nrnb(nrnb);
292 if (DOMAINDECOMP(cr))
294 *top = dd_init_local_top(top_global);
296 dd_init_local_state(cr->dd,state_global,&ems->s);
298 *f = NULL;
300 /* Distribute the charge groups over the nodes from the master node */
301 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
302 state_global,top_global,ir,
303 &ems->s,&ems->f,mdatoms,*top,
304 fr,vsite,NULL,constr,
305 nrnb,NULL,FALSE);
306 dd_store_state(cr->dd,&ems->s);
308 if (ir->nstfout)
310 snew(*f_global,top_global->natoms);
312 else
314 *f_global = NULL;
316 *graph = NULL;
318 else
320 snew(*f,top_global->natoms);
322 /* Just copy the state */
323 ems->s = *state_global;
324 snew(ems->s.x,ems->s.nalloc);
325 snew(ems->f,ems->s.nalloc);
326 for(i=0; i<state_global->natoms; i++)
328 copy_rvec(state_global->x[i],ems->s.x[i]);
330 copy_mat(state_global->box,ems->s.box);
332 if (PAR(cr) && ir->eI != eiNM)
334 /* Initialize the particle decomposition and split the topology */
335 *top = split_system(fplog,top_global,ir,cr);
337 pd_cg_range(cr,&fr->cg0,&fr->hcg);
339 else
341 *top = gmx_mtop_generate_local_top(top_global,ir);
343 *f_global = *f;
345 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
347 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
349 else
351 *graph = NULL;
354 if (PARTDECOMP(cr))
356 pd_at_range(cr,&start,&homenr);
357 homenr -= start;
359 else
361 start = 0;
362 homenr = top_global->natoms;
364 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
365 update_mdatoms(mdatoms,state_global->lambda);
367 if (vsite)
369 set_vsite_top(vsite,*top,mdatoms,cr);
373 if (constr)
375 if (ir->eConstrAlg == econtSHAKE &&
376 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
378 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
379 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
382 if (!DOMAINDECOMP(cr))
384 set_constraints(constr,*top,ir,mdatoms,cr);
387 if (!ir->bContinuation)
389 /* Constrain the starting coordinates */
390 dvdlambda=0;
391 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
392 ir,NULL,cr,-1,0,mdatoms,
393 ems->s.x,ems->s.x,NULL,ems->s.box,
394 ems->s.lambda,&dvdlambda,
395 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
399 if (PAR(cr))
401 *gstat = global_stat_init(ir);
404 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
406 snew(*enerd,1);
407 init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
409 if (mdebin != NULL)
411 /* Init bin for energy stuff */
412 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
415 clear_rvec(mu_tot);
416 calc_shifts(ems->s.box,fr->shift_vec);
419 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
420 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
422 if (!(cr->duty & DUTY_PME)) {
423 /* Tell the PME only node to finish */
424 gmx_pme_finish(cr);
427 done_mdoutf(outf);
429 em_time_end(fplog,cr,runtime,wcycle);
432 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
434 em_state_t tmp;
436 tmp = *ems1;
437 *ems1 = *ems2;
438 *ems2 = tmp;
441 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
443 int i;
445 for(i=0; (i<state->natoms); i++)
446 copy_rvec(ems->s.x[i],state->x[i]);
447 if (f != NULL)
448 copy_rvec(ems->f[i],f[i]);
451 static void write_em_traj(FILE *fplog,t_commrec *cr,
452 gmx_mdoutf_t *outf,
453 gmx_bool bX,gmx_bool bF,const char *confout,
454 gmx_mtop_t *top_global,
455 t_inputrec *ir,gmx_large_int_t step,
456 em_state_t *state,
457 t_state *state_global,rvec *f_global)
459 int mdof_flags;
461 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
463 f_global = state->f;
464 copy_em_coords_back(state,state_global,bF ? f_global : NULL);
467 mdof_flags = 0;
468 if (bX) { mdof_flags |= MDOF_X; }
469 if (bF) { mdof_flags |= MDOF_F; }
470 write_traj(fplog,cr,outf,mdof_flags,
471 top_global,step,(double)step,
472 &state->s,state_global,state->f,f_global,NULL,NULL);
474 if (confout != NULL && MASTER(cr))
476 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
478 /* Make molecules whole only for confout writing */
479 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
480 state_global->x);
483 write_sto_conf_mtop(confout,
484 *top_global->name,top_global,
485 state_global->x,NULL,ir->ePBC,state_global->box);
489 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
490 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
491 gmx_constr_t constr,gmx_localtop_t *top,
492 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
493 gmx_large_int_t count)
496 t_state *s1,*s2;
497 int start,end,gf,i,m;
498 rvec *x1,*x2;
499 real dvdlambda;
501 s1 = &ems1->s;
502 s2 = &ems2->s;
504 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
505 gmx_incons("state mismatch in do_em_step");
507 s2->flags = s1->flags;
509 if (s2->nalloc != s1->nalloc) {
510 s2->nalloc = s1->nalloc;
511 srenew(s2->x,s1->nalloc);
512 srenew(ems2->f, s1->nalloc);
513 if (s2->flags & (1<<estCGP))
514 srenew(s2->cg_p, s1->nalloc);
517 s2->natoms = s1->natoms;
518 s2->lambda = s1->lambda;
519 copy_mat(s1->box,s2->box);
521 start = md->start;
522 end = md->start + md->homenr;
524 x1 = s1->x;
525 x2 = s2->x;
526 gf = 0;
527 for(i=start; i<end; i++) {
528 if (md->cFREEZE)
529 gf = md->cFREEZE[i];
530 for(m=0; m<DIM; m++) {
531 if (ir->opts.nFreeze[gf][m])
532 x2[i][m] = x1[i][m];
533 else
534 x2[i][m] = x1[i][m] + a*f[i][m];
538 if (s2->flags & (1<<estCGP)) {
539 /* Copy the CG p vector */
540 x1 = s1->cg_p;
541 x2 = s2->cg_p;
542 for(i=start; i<end; i++)
543 copy_rvec(x1[i],x2[i]);
546 if (DOMAINDECOMP(cr)) {
547 s2->ddp_count = s1->ddp_count;
548 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
549 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
550 srenew(s2->cg_gl,s2->cg_gl_nalloc);
552 s2->ncg_gl = s1->ncg_gl;
553 for(i=0; i<s2->ncg_gl; i++)
554 s2->cg_gl[i] = s1->cg_gl[i];
555 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
558 if (constr) {
559 wallcycle_start(wcycle,ewcCONSTR);
560 dvdlambda = 0;
561 constrain(NULL,TRUE,TRUE,constr,&top->idef,
562 ir,NULL,cr,count,0,md,
563 s1->x,s2->x,NULL,s2->box,s2->lambda,
564 &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
565 wallcycle_stop(wcycle,ewcCONSTR);
569 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
572 int start,end,i,m;
574 if (DOMAINDECOMP(cr)) {
575 start = 0;
576 end = cr->dd->nat_home;
577 } else if (PARTDECOMP(cr)) {
578 pd_at_range(cr,&start,&end);
579 } else {
580 start = 0;
581 end = n;
584 for(i=start; i<end; i++) {
585 for(m=0; m<DIM; m++) {
586 x2[i][m] = x1[i][m] + a*f[i][m];
591 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
594 int start,end,i,m;
596 if (DOMAINDECOMP(cr)) {
597 start = 0;
598 end = cr->dd->nat_home;
599 } else if (PARTDECOMP(cr)) {
600 pd_at_range(cr,&start,&end);
601 } else {
602 start = 0;
603 end = n;
606 for(i=start; i<end; i++) {
607 for(m=0; m<DIM; m++) {
608 f[i][m] = (x1[i][m] - x2[i][m])*a;
613 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
614 gmx_mtop_t *top_global,t_inputrec *ir,
615 em_state_t *ems,gmx_localtop_t *top,
616 t_mdatoms *mdatoms,t_forcerec *fr,
617 gmx_vsite_t *vsite,gmx_constr_t constr,
618 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
620 /* Repartition the domain decomposition */
621 wallcycle_start(wcycle,ewcDOMDEC);
622 dd_partition_system(fplog,step,cr,FALSE,1,
623 NULL,top_global,ir,
624 &ems->s,&ems->f,
625 mdatoms,top,fr,vsite,NULL,constr,
626 nrnb,wcycle,FALSE);
627 dd_store_state(cr->dd,&ems->s);
628 wallcycle_stop(wcycle,ewcDOMDEC);
631 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
632 t_state *state_global,gmx_mtop_t *top_global,
633 em_state_t *ems,gmx_localtop_t *top,
634 t_inputrec *inputrec,
635 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
636 gmx_global_stat_t gstat,
637 gmx_vsite_t *vsite,gmx_constr_t constr,
638 t_fcdata *fcd,
639 t_graph *graph,t_mdatoms *mdatoms,
640 t_forcerec *fr,rvec mu_tot,
641 gmx_enerdata_t *enerd,tensor vir,tensor pres,
642 gmx_large_int_t count,gmx_bool bFirst)
644 real t;
645 gmx_bool bNS;
646 int nabnsb;
647 tensor force_vir,shake_vir,ekin;
648 real dvdl,prescorr,enercorr,dvdlcorr;
649 real terminate=0;
651 /* Set the time to the initial time, the time does not change during EM */
652 t = inputrec->init_t;
654 if (bFirst ||
655 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
656 /* This the first state or an old state used before the last ns */
657 bNS = TRUE;
658 } else {
659 bNS = FALSE;
660 if (inputrec->nstlist > 0) {
661 bNS = TRUE;
662 } else if (inputrec->nstlist == -1) {
663 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
664 if (PAR(cr))
665 gmx_sumi(1,&nabnsb,cr);
666 bNS = (nabnsb > 0);
670 if (vsite)
671 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
672 top->idef.iparams,top->idef.il,
673 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
675 if (DOMAINDECOMP(cr)) {
676 if (bNS) {
677 /* Repartition the domain decomposition */
678 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
679 ems,top,mdatoms,fr,vsite,constr,
680 nrnb,wcycle);
684 /* Calc force & energy on new trial position */
685 /* do_force always puts the charge groups in the box and shifts again
686 * We do not unshift, so molecules are always whole in congrad.c
688 do_force(fplog,cr,inputrec,
689 count,nrnb,wcycle,top,top_global,&top_global->groups,
690 ems->s.box,ems->s.x,&ems->s.hist,
691 ems->f,force_vir,mdatoms,enerd,fcd,
692 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
693 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
694 (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
696 /* Clear the unused shake virial and pressure */
697 clear_mat(shake_vir);
698 clear_mat(pres);
700 /* Calculate long range corrections to pressure and energy */
701 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
702 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
703 /* don't think these next 4 lines can be moved in for now, because we
704 don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
705 enerd->term[F_DISPCORR] = enercorr;
706 enerd->term[F_EPOT] += enercorr;
707 enerd->term[F_PRES] += prescorr;
708 enerd->term[F_DVDL] += dvdlcorr;
710 /* Communicate stuff when parallel */
711 if (PAR(cr) && inputrec->eI != eiNM)
713 wallcycle_start(wcycle,ewcMoveE);
715 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
716 inputrec,NULL,NULL,NULL,1,&terminate,
717 top_global,&ems->s,FALSE,
718 CGLO_ENERGY |
719 CGLO_PRESSURE |
720 CGLO_CONSTRAINT |
721 CGLO_FIRSTITERATE);
723 wallcycle_stop(wcycle,ewcMoveE);
726 ems->epot = enerd->term[F_EPOT];
728 if (constr) {
729 /* Project out the constraint components of the force */
730 wallcycle_start(wcycle,ewcCONSTR);
731 dvdl = 0;
732 constrain(NULL,FALSE,FALSE,constr,&top->idef,
733 inputrec,NULL,cr,count,0,mdatoms,
734 ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
735 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
736 if (fr->bSepDVDL && fplog)
737 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
738 enerd->term[F_DHDL_CON] += dvdl;
739 m_add(force_vir,shake_vir,vir);
740 wallcycle_stop(wcycle,ewcCONSTR);
741 } else {
742 copy_mat(force_vir,vir);
745 clear_mat(ekin);
746 enerd->term[F_PRES] =
747 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
748 (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
750 sum_dhdl(enerd,ems->s.lambda,inputrec);
752 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
754 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
758 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
759 gmx_mtop_t *mtop,
760 em_state_t *s_min,em_state_t *s_b)
762 rvec *fm,*fb,*fmg;
763 t_block *cgs_gl;
764 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
765 double partsum;
766 unsigned char *grpnrFREEZE;
768 if (debug)
769 fprintf(debug,"Doing reorder_partsum\n");
771 fm = s_min->f;
772 fb = s_b->f;
774 cgs_gl = dd_charge_groups_global(cr->dd);
775 index = cgs_gl->index;
777 /* Collect fm in a global vector fmg.
778 * This conflicts with the spirit of domain decomposition,
779 * but to fully optimize this a much more complicated algorithm is required.
781 snew(fmg,mtop->natoms);
783 ncg = s_min->s.ncg_gl;
784 cg_gl = s_min->s.cg_gl;
785 i = 0;
786 for(c=0; c<ncg; c++) {
787 cg = cg_gl[c];
788 a0 = index[cg];
789 a1 = index[cg+1];
790 for(a=a0; a<a1; a++) {
791 copy_rvec(fm[i],fmg[a]);
792 i++;
795 gmx_sum(mtop->natoms*3,fmg[0],cr);
797 /* Now we will determine the part of the sum for the cgs in state s_b */
798 ncg = s_b->s.ncg_gl;
799 cg_gl = s_b->s.cg_gl;
800 partsum = 0;
801 i = 0;
802 gf = 0;
803 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
804 for(c=0; c<ncg; c++) {
805 cg = cg_gl[c];
806 a0 = index[cg];
807 a1 = index[cg+1];
808 for(a=a0; a<a1; a++) {
809 if (mdatoms->cFREEZE && grpnrFREEZE) {
810 gf = grpnrFREEZE[i];
812 for(m=0; m<DIM; m++) {
813 if (!opts->nFreeze[gf][m]) {
814 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
817 i++;
821 sfree(fmg);
823 return partsum;
826 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
827 gmx_mtop_t *mtop,
828 em_state_t *s_min,em_state_t *s_b)
830 rvec *fm,*fb;
831 double sum;
832 int gf,i,m;
834 /* This is just the classical Polak-Ribiere calculation of beta;
835 * it looks a bit complicated since we take freeze groups into account,
836 * and might have to sum it in parallel runs.
839 if (!DOMAINDECOMP(cr) ||
840 (s_min->s.ddp_count == cr->dd->ddp_count &&
841 s_b->s.ddp_count == cr->dd->ddp_count)) {
842 fm = s_min->f;
843 fb = s_b->f;
844 sum = 0;
845 gf = 0;
846 /* This part of code can be incorrect with DD,
847 * since the atom ordering in s_b and s_min might differ.
849 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
850 if (mdatoms->cFREEZE)
851 gf = mdatoms->cFREEZE[i];
852 for(m=0; m<DIM; m++)
853 if (!opts->nFreeze[gf][m]) {
854 sum += (fb[i][m] - fm[i][m])*fb[i][m];
857 } else {
858 /* We need to reorder cgs while summing */
859 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
861 if (PAR(cr))
862 gmx_sumd(1,&sum,cr);
864 return sum/sqr(s_min->fnorm);
867 double do_cg(FILE *fplog,t_commrec *cr,
868 int nfile,const t_filenm fnm[],
869 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
870 int nstglobalcomm,
871 gmx_vsite_t *vsite,gmx_constr_t constr,
872 int stepout,
873 t_inputrec *inputrec,
874 gmx_mtop_t *top_global,t_fcdata *fcd,
875 t_state *state_global,
876 t_mdatoms *mdatoms,
877 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
878 gmx_edsam_t ed,
879 t_forcerec *fr,
880 int repl_ex_nst,int repl_ex_seed,
881 real cpt_period,real max_hours,
882 const char *deviceOptions,
883 unsigned long Flags,
884 gmx_runtime_t *runtime)
886 const char *CG="Polak-Ribiere Conjugate Gradients";
888 em_state_t *s_min,*s_a,*s_b,*s_c;
889 gmx_localtop_t *top;
890 gmx_enerdata_t *enerd;
891 rvec *f;
892 gmx_global_stat_t gstat;
893 t_graph *graph;
894 rvec *f_global,*p,*sf,*sfm;
895 double gpa,gpb,gpc,tmp,sum[2],minstep;
896 real fnormn;
897 real stepsize;
898 real a,b,c,beta=0.0;
899 real epot_repl=0;
900 real pnorm;
901 t_mdebin *mdebin;
902 gmx_bool converged,foundlower;
903 rvec mu_tot;
904 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
905 tensor vir,pres;
906 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
907 gmx_mdoutf_t *outf;
908 int i,m,gf,step,nminstep;
909 real terminate=0;
911 step=0;
913 s_min = init_em_state();
914 s_a = init_em_state();
915 s_b = init_em_state();
916 s_c = init_em_state();
918 /* Init em and store the local state in s_min */
919 init_em(fplog,CG,cr,inputrec,
920 state_global,top_global,s_min,&top,&f,&f_global,
921 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
922 nfile,fnm,&outf,&mdebin);
924 /* Print to log file */
925 print_em_start(fplog,cr,runtime,wcycle,CG);
927 /* Max number of steps */
928 number_steps=inputrec->nsteps;
930 if (MASTER(cr))
931 sp_header(stderr,CG,inputrec->em_tol,number_steps);
932 if (fplog)
933 sp_header(fplog,CG,inputrec->em_tol,number_steps);
935 /* Call the force routine and some auxiliary (neighboursearching etc.) */
936 /* do_force always puts the charge groups in the box and shifts again
937 * We do not unshift, so molecules are always whole in congrad.c
939 evaluate_energy(fplog,bVerbose,cr,
940 state_global,top_global,s_min,top,
941 inputrec,nrnb,wcycle,gstat,
942 vsite,constr,fcd,graph,mdatoms,fr,
943 mu_tot,enerd,vir,pres,-1,TRUE);
944 where();
946 if (MASTER(cr)) {
947 /* Copy stuff to the energy bin for easy printing etc. */
948 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
949 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
950 NULL,NULL,vir,pres,NULL,mu_tot,constr);
952 print_ebin_header(fplog,step,step,s_min->s.lambda);
953 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
954 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
956 where();
958 /* Estimate/guess the initial stepsize */
959 stepsize = inputrec->em_stepsize/s_min->fnorm;
961 if (MASTER(cr)) {
962 fprintf(stderr," F-max = %12.5e on atom %d\n",
963 s_min->fmax,s_min->a_fmax+1);
964 fprintf(stderr," F-Norm = %12.5e\n",
965 s_min->fnorm/sqrt(state_global->natoms));
966 fprintf(stderr,"\n");
967 /* and copy to the log file too... */
968 fprintf(fplog," F-max = %12.5e on atom %d\n",
969 s_min->fmax,s_min->a_fmax+1);
970 fprintf(fplog," F-Norm = %12.5e\n",
971 s_min->fnorm/sqrt(state_global->natoms));
972 fprintf(fplog,"\n");
974 /* Start the loop over CG steps.
975 * Each successful step is counted, and we continue until
976 * we either converge or reach the max number of steps.
978 converged = FALSE;
979 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
981 /* start taking steps in a new direction
982 * First time we enter the routine, beta=0, and the direction is
983 * simply the negative gradient.
986 /* Calculate the new direction in p, and the gradient in this direction, gpa */
987 p = s_min->s.cg_p;
988 sf = s_min->f;
989 gpa = 0;
990 gf = 0;
991 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
992 if (mdatoms->cFREEZE)
993 gf = mdatoms->cFREEZE[i];
994 for(m=0; m<DIM; m++) {
995 if (!inputrec->opts.nFreeze[gf][m]) {
996 p[i][m] = sf[i][m] + beta*p[i][m];
997 gpa -= p[i][m]*sf[i][m];
998 /* f is negative gradient, thus the sign */
999 } else {
1000 p[i][m] = 0;
1005 /* Sum the gradient along the line across CPUs */
1006 if (PAR(cr))
1007 gmx_sumd(1,&gpa,cr);
1009 /* Calculate the norm of the search vector */
1010 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1012 /* Just in case stepsize reaches zero due to numerical precision... */
1013 if(stepsize<=0)
1014 stepsize = inputrec->em_stepsize/pnorm;
1017 * Double check the value of the derivative in the search direction.
1018 * If it is positive it must be due to the old information in the
1019 * CG formula, so just remove that and start over with beta=0.
1020 * This corresponds to a steepest descent step.
1022 if(gpa>0) {
1023 beta = 0;
1024 step--; /* Don't count this step since we are restarting */
1025 continue; /* Go back to the beginning of the big for-loop */
1028 /* Calculate minimum allowed stepsize, before the average (norm)
1029 * relative change in coordinate is smaller than precision
1031 minstep=0;
1032 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1033 for(m=0; m<DIM; m++) {
1034 tmp = fabs(s_min->s.x[i][m]);
1035 if(tmp < 1.0)
1036 tmp = 1.0;
1037 tmp = p[i][m]/tmp;
1038 minstep += tmp*tmp;
1041 /* Add up from all CPUs */
1042 if(PAR(cr))
1043 gmx_sumd(1,&minstep,cr);
1045 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1047 if(stepsize<minstep) {
1048 converged=TRUE;
1049 break;
1052 /* Write coordinates if necessary */
1053 do_x = do_per_step(step,inputrec->nstxout);
1054 do_f = do_per_step(step,inputrec->nstfout);
1056 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1057 top_global,inputrec,step,
1058 s_min,state_global,f_global);
1060 /* Take a step downhill.
1061 * In theory, we should minimize the function along this direction.
1062 * That is quite possible, but it turns out to take 5-10 function evaluations
1063 * for each line. However, we dont really need to find the exact minimum -
1064 * it is much better to start a new CG step in a modified direction as soon
1065 * as we are close to it. This will save a lot of energy evaluations.
1067 * In practice, we just try to take a single step.
1068 * If it worked (i.e. lowered the energy), we increase the stepsize but
1069 * the continue straight to the next CG step without trying to find any minimum.
1070 * If it didn't work (higher energy), there must be a minimum somewhere between
1071 * the old position and the new one.
1073 * Due to the finite numerical accuracy, it turns out that it is a good idea
1074 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1075 * This leads to lower final energies in the tests I've done. / Erik
1077 s_a->epot = s_min->epot;
1078 a = 0.0;
1079 c = a + stepsize; /* reference position along line is zero */
1081 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1082 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1083 s_min,top,mdatoms,fr,vsite,constr,
1084 nrnb,wcycle);
1087 /* Take a trial step (new coords in s_c) */
1088 do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1089 constr,top,nrnb,wcycle,-1);
1091 neval++;
1092 /* Calculate energy for the trial step */
1093 evaluate_energy(fplog,bVerbose,cr,
1094 state_global,top_global,s_c,top,
1095 inputrec,nrnb,wcycle,gstat,
1096 vsite,constr,fcd,graph,mdatoms,fr,
1097 mu_tot,enerd,vir,pres,-1,FALSE);
1099 /* Calc derivative along line */
1100 p = s_c->s.cg_p;
1101 sf = s_c->f;
1102 gpc=0;
1103 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1104 for(m=0; m<DIM; m++)
1105 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1107 /* Sum the gradient along the line across CPUs */
1108 if (PAR(cr))
1109 gmx_sumd(1,&gpc,cr);
1111 /* This is the max amount of increase in energy we tolerate */
1112 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1114 /* Accept the step if the energy is lower, or if it is not significantly higher
1115 * and the line derivative is still negative.
1117 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1118 foundlower = TRUE;
1119 /* Great, we found a better energy. Increase step for next iteration
1120 * if we are still going down, decrease it otherwise
1122 if(gpc<0)
1123 stepsize *= 1.618034; /* The golden section */
1124 else
1125 stepsize *= 0.618034; /* 1/golden section */
1126 } else {
1127 /* New energy is the same or higher. We will have to do some work
1128 * to find a smaller value in the interval. Take smaller step next time!
1130 foundlower = FALSE;
1131 stepsize *= 0.618034;
1137 /* OK, if we didn't find a lower value we will have to locate one now - there must
1138 * be one in the interval [a=0,c].
1139 * The same thing is valid here, though: Don't spend dozens of iterations to find
1140 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1141 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1143 * I also have a safeguard for potentially really patological functions so we never
1144 * take more than 20 steps before we give up ...
1146 * If we already found a lower value we just skip this step and continue to the update.
1148 if (!foundlower) {
1149 nminstep=0;
1151 do {
1152 /* Select a new trial point.
1153 * If the derivatives at points a & c have different sign we interpolate to zero,
1154 * otherwise just do a bisection.
1156 if(gpa<0 && gpc>0)
1157 b = a + gpa*(a-c)/(gpc-gpa);
1158 else
1159 b = 0.5*(a+c);
1161 /* safeguard if interpolation close to machine accuracy causes errors:
1162 * never go outside the interval
1164 if(b<=a || b>=c)
1165 b = 0.5*(a+c);
1167 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1168 /* Reload the old state */
1169 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1170 s_min,top,mdatoms,fr,vsite,constr,
1171 nrnb,wcycle);
1174 /* Take a trial step to this new point - new coords in s_b */
1175 do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1176 constr,top,nrnb,wcycle,-1);
1178 neval++;
1179 /* Calculate energy for the trial step */
1180 evaluate_energy(fplog,bVerbose,cr,
1181 state_global,top_global,s_b,top,
1182 inputrec,nrnb,wcycle,gstat,
1183 vsite,constr,fcd,graph,mdatoms,fr,
1184 mu_tot,enerd,vir,pres,-1,FALSE);
1186 /* p does not change within a step, but since the domain decomposition
1187 * might change, we have to use cg_p of s_b here.
1189 p = s_b->s.cg_p;
1190 sf = s_b->f;
1191 gpb=0;
1192 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1193 for(m=0; m<DIM; m++)
1194 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1196 /* Sum the gradient along the line across CPUs */
1197 if (PAR(cr))
1198 gmx_sumd(1,&gpb,cr);
1200 if (debug)
1201 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1202 s_a->epot,s_b->epot,s_c->epot,gpb);
1204 epot_repl = s_b->epot;
1206 /* Keep one of the intervals based on the value of the derivative at the new point */
1207 if (gpb > 0) {
1208 /* Replace c endpoint with b */
1209 swap_em_state(s_b,s_c);
1210 c = b;
1211 gpc = gpb;
1212 } else {
1213 /* Replace a endpoint with b */
1214 swap_em_state(s_b,s_a);
1215 a = b;
1216 gpa = gpb;
1220 * Stop search as soon as we find a value smaller than the endpoints.
1221 * Never run more than 20 steps, no matter what.
1223 nminstep++;
1224 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1225 (nminstep < 20));
1227 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1228 nminstep >= 20) {
1229 /* OK. We couldn't find a significantly lower energy.
1230 * If beta==0 this was steepest descent, and then we give up.
1231 * If not, set beta=0 and restart with steepest descent before quitting.
1233 if (beta == 0.0) {
1234 /* Converged */
1235 converged = TRUE;
1236 break;
1237 } else {
1238 /* Reset memory before giving up */
1239 beta = 0.0;
1240 continue;
1244 /* Select min energy state of A & C, put the best in B.
1246 if (s_c->epot < s_a->epot) {
1247 if (debug)
1248 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1249 s_c->epot,s_a->epot);
1250 swap_em_state(s_b,s_c);
1251 gpb = gpc;
1252 b = c;
1253 } else {
1254 if (debug)
1255 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1256 s_a->epot,s_c->epot);
1257 swap_em_state(s_b,s_a);
1258 gpb = gpa;
1259 b = a;
1262 } else {
1263 if (debug)
1264 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1265 s_c->epot);
1266 swap_em_state(s_b,s_c);
1267 gpb = gpc;
1268 b = c;
1271 /* new search direction */
1272 /* beta = 0 means forget all memory and restart with steepest descents. */
1273 if (nstcg && ((step % nstcg)==0))
1274 beta = 0.0;
1275 else {
1276 /* s_min->fnorm cannot be zero, because then we would have converged
1277 * and broken out.
1280 /* Polak-Ribiere update.
1281 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1283 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1285 /* Limit beta to prevent oscillations */
1286 if (fabs(beta) > 5.0)
1287 beta = 0.0;
1290 /* update positions */
1291 swap_em_state(s_min,s_b);
1292 gpa = gpb;
1294 /* Print it if necessary */
1295 if (MASTER(cr)) {
1296 if(bVerbose)
1297 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1298 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1299 s_min->fmax,s_min->a_fmax+1);
1300 /* Store the new (lower) energies */
1301 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1302 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1303 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1304 do_log = do_per_step(step,inputrec->nstlog);
1305 do_ene = do_per_step(step,inputrec->nstenergy);
1306 if(do_log)
1307 print_ebin_header(fplog,step,step,s_min->s.lambda);
1308 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1309 do_log ? fplog : NULL,step,step,eprNORMAL,
1310 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1313 /* Stop when the maximum force lies below tolerance.
1314 * If we have reached machine precision, converged is already set to true.
1316 converged = converged || (s_min->fmax < inputrec->em_tol);
1318 } /* End of the loop */
1320 if (converged)
1321 step--; /* we never took that last step in this case */
1323 if (s_min->fmax > inputrec->em_tol)
1325 if (MASTER(cr))
1327 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1328 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1330 converged = FALSE;
1333 if (MASTER(cr)) {
1334 /* If we printed energy and/or logfile last step (which was the last step)
1335 * we don't have to do it again, but otherwise print the final values.
1337 if(!do_log) {
1338 /* Write final value to log since we didn't do anything the last step */
1339 print_ebin_header(fplog,step,step,s_min->s.lambda);
1341 if (!do_ene || !do_log) {
1342 /* Write final energy file entries */
1343 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1344 !do_log ? fplog : NULL,step,step,eprNORMAL,
1345 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1349 /* Print some stuff... */
1350 if (MASTER(cr))
1351 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1353 /* IMPORTANT!
1354 * For accurate normal mode calculation it is imperative that we
1355 * store the last conformation into the full precision binary trajectory.
1357 * However, we should only do it if we did NOT already write this step
1358 * above (which we did if do_x or do_f was true).
1360 do_x = !do_per_step(step,inputrec->nstxout);
1361 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1363 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1364 top_global,inputrec,step,
1365 s_min,state_global,f_global);
1367 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1369 if (MASTER(cr)) {
1370 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1371 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1372 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1373 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1375 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1378 finish_em(fplog,cr,outf,runtime,wcycle);
1380 /* To print the actual number of steps we needed somewhere */
1381 runtime->nsteps_done = step;
1383 return 0;
1384 } /* That's all folks */
1387 double do_lbfgs(FILE *fplog,t_commrec *cr,
1388 int nfile,const t_filenm fnm[],
1389 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1390 int nstglobalcomm,
1391 gmx_vsite_t *vsite,gmx_constr_t constr,
1392 int stepout,
1393 t_inputrec *inputrec,
1394 gmx_mtop_t *top_global,t_fcdata *fcd,
1395 t_state *state,
1396 t_mdatoms *mdatoms,
1397 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1398 gmx_edsam_t ed,
1399 t_forcerec *fr,
1400 int repl_ex_nst,int repl_ex_seed,
1401 real cpt_period,real max_hours,
1402 const char *deviceOptions,
1403 unsigned long Flags,
1404 gmx_runtime_t *runtime)
1406 static const char *LBFGS="Low-Memory BFGS Minimizer";
1407 em_state_t ems;
1408 gmx_localtop_t *top;
1409 gmx_enerdata_t *enerd;
1410 rvec *f;
1411 gmx_global_stat_t gstat;
1412 t_graph *graph;
1413 rvec *f_global;
1414 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1415 double stepsize,gpa,gpb,gpc,tmp,minstep;
1416 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1417 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1418 real a,b,c,maxdelta,delta;
1419 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1420 real dgdx,dgdg,sq,yr,beta;
1421 t_mdebin *mdebin;
1422 gmx_bool converged,first;
1423 rvec mu_tot;
1424 real fnorm,fmax;
1425 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1426 tensor vir,pres;
1427 int start,end,number_steps;
1428 gmx_mdoutf_t *outf;
1429 int i,k,m,n,nfmax,gf,step;
1430 /* not used */
1431 real terminate;
1433 if (PAR(cr))
1434 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1436 n = 3*state->natoms;
1437 nmaxcorr = inputrec->nbfgscorr;
1439 /* Allocate memory */
1440 /* Use pointers to real so we dont have to loop over both atoms and
1441 * dimensions all the time...
1442 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1443 * that point to the same memory.
1445 snew(xa,n);
1446 snew(xb,n);
1447 snew(xc,n);
1448 snew(fa,n);
1449 snew(fb,n);
1450 snew(fc,n);
1451 snew(frozen,n);
1453 snew(p,n);
1454 snew(lastx,n);
1455 snew(lastf,n);
1456 snew(rho,nmaxcorr);
1457 snew(alpha,nmaxcorr);
1459 snew(dx,nmaxcorr);
1460 for(i=0;i<nmaxcorr;i++)
1461 snew(dx[i],n);
1463 snew(dg,nmaxcorr);
1464 for(i=0;i<nmaxcorr;i++)
1465 snew(dg[i],n);
1467 step = 0;
1468 neval = 0;
1470 /* Init em */
1471 init_em(fplog,LBFGS,cr,inputrec,
1472 state,top_global,&ems,&top,&f,&f_global,
1473 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1474 nfile,fnm,&outf,&mdebin);
1475 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1476 * so we free some memory again.
1478 sfree(ems.s.x);
1479 sfree(ems.f);
1481 xx = (real *)state->x;
1482 ff = (real *)f;
1484 start = mdatoms->start;
1485 end = mdatoms->homenr + start;
1487 /* Print to log file */
1488 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1490 do_log = do_ene = do_x = do_f = TRUE;
1492 /* Max number of steps */
1493 number_steps=inputrec->nsteps;
1495 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1496 gf = 0;
1497 for(i=start; i<end; i++) {
1498 if (mdatoms->cFREEZE)
1499 gf = mdatoms->cFREEZE[i];
1500 for(m=0; m<DIM; m++)
1501 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1503 if (MASTER(cr))
1504 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1505 if (fplog)
1506 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1508 if (vsite)
1509 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1510 top->idef.iparams,top->idef.il,
1511 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1513 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1514 /* do_force always puts the charge groups in the box and shifts again
1515 * We do not unshift, so molecules are always whole
1517 neval++;
1518 ems.s.x = state->x;
1519 ems.f = f;
1520 evaluate_energy(fplog,bVerbose,cr,
1521 state,top_global,&ems,top,
1522 inputrec,nrnb,wcycle,gstat,
1523 vsite,constr,fcd,graph,mdatoms,fr,
1524 mu_tot,enerd,vir,pres,-1,TRUE);
1525 where();
1527 if (MASTER(cr)) {
1528 /* Copy stuff to the energy bin for easy printing etc. */
1529 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1530 mdatoms->tmass,enerd,state,state->box,
1531 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1533 print_ebin_header(fplog,step,step,state->lambda);
1534 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1535 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1537 where();
1539 /* This is the starting energy */
1540 Epot = enerd->term[F_EPOT];
1542 fnorm = ems.fnorm;
1543 fmax = ems.fmax;
1544 nfmax = ems.a_fmax;
1546 /* Set the initial step.
1547 * since it will be multiplied by the non-normalized search direction
1548 * vector (force vector the first time), we scale it by the
1549 * norm of the force.
1552 if (MASTER(cr)) {
1553 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1554 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1555 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1556 fprintf(stderr,"\n");
1557 /* and copy to the log file too... */
1558 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1559 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1560 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1561 fprintf(fplog,"\n");
1564 point=0;
1565 for(i=0;i<n;i++)
1566 if(!frozen[i])
1567 dx[point][i] = ff[i]; /* Initial search direction */
1568 else
1569 dx[point][i] = 0;
1571 stepsize = 1.0/fnorm;
1572 converged = FALSE;
1574 /* Start the loop over BFGS steps.
1575 * Each successful step is counted, and we continue until
1576 * we either converge or reach the max number of steps.
1579 ncorr=0;
1581 /* Set the gradient from the force */
1582 converged = FALSE;
1583 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1585 /* Write coordinates if necessary */
1586 do_x = do_per_step(step,inputrec->nstxout);
1587 do_f = do_per_step(step,inputrec->nstfout);
1589 write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
1590 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1592 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1594 /* pointer to current direction - point=0 first time here */
1595 s=dx[point];
1597 /* calculate line gradient */
1598 for(gpa=0,i=0;i<n;i++)
1599 gpa-=s[i]*ff[i];
1601 /* Calculate minimum allowed stepsize, before the average (norm)
1602 * relative change in coordinate is smaller than precision
1604 for(minstep=0,i=0;i<n;i++) {
1605 tmp=fabs(xx[i]);
1606 if(tmp<1.0)
1607 tmp=1.0;
1608 tmp = s[i]/tmp;
1609 minstep += tmp*tmp;
1611 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1613 if(stepsize<minstep) {
1614 converged=TRUE;
1615 break;
1618 /* Store old forces and coordinates */
1619 for(i=0;i<n;i++) {
1620 lastx[i]=xx[i];
1621 lastf[i]=ff[i];
1623 Epot0=Epot;
1625 first=TRUE;
1627 for(i=0;i<n;i++)
1628 xa[i]=xx[i];
1630 /* Take a step downhill.
1631 * In theory, we should minimize the function along this direction.
1632 * That is quite possible, but it turns out to take 5-10 function evaluations
1633 * for each line. However, we dont really need to find the exact minimum -
1634 * it is much better to start a new BFGS step in a modified direction as soon
1635 * as we are close to it. This will save a lot of energy evaluations.
1637 * In practice, we just try to take a single step.
1638 * If it worked (i.e. lowered the energy), we increase the stepsize but
1639 * the continue straight to the next BFGS step without trying to find any minimum.
1640 * If it didn't work (higher energy), there must be a minimum somewhere between
1641 * the old position and the new one.
1643 * Due to the finite numerical accuracy, it turns out that it is a good idea
1644 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1645 * This leads to lower final energies in the tests I've done. / Erik
1647 foundlower=FALSE;
1648 EpotA = Epot0;
1649 a = 0.0;
1650 c = a + stepsize; /* reference position along line is zero */
1652 /* Check stepsize first. We do not allow displacements
1653 * larger than emstep.
1655 do {
1656 c = a + stepsize;
1657 maxdelta=0;
1658 for(i=0;i<n;i++) {
1659 delta=c*s[i];
1660 if(delta>maxdelta)
1661 maxdelta=delta;
1663 if(maxdelta>inputrec->em_stepsize)
1664 stepsize*=0.1;
1665 } while(maxdelta>inputrec->em_stepsize);
1667 /* Take a trial step */
1668 for (i=0; i<n; i++)
1669 xc[i] = lastx[i] + c*s[i];
1671 neval++;
1672 /* Calculate energy for the trial step */
1673 ems.s.x = (rvec *)xc;
1674 ems.f = (rvec *)fc;
1675 evaluate_energy(fplog,bVerbose,cr,
1676 state,top_global,&ems,top,
1677 inputrec,nrnb,wcycle,gstat,
1678 vsite,constr,fcd,graph,mdatoms,fr,
1679 mu_tot,enerd,vir,pres,step,FALSE);
1680 EpotC = ems.epot;
1682 /* Calc derivative along line */
1683 for(gpc=0,i=0; i<n; i++) {
1684 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1686 /* Sum the gradient along the line across CPUs */
1687 if (PAR(cr))
1688 gmx_sumd(1,&gpc,cr);
1690 /* This is the max amount of increase in energy we tolerate */
1691 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1693 /* Accept the step if the energy is lower, or if it is not significantly higher
1694 * and the line derivative is still negative.
1696 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1697 foundlower = TRUE;
1698 /* Great, we found a better energy. Increase step for next iteration
1699 * if we are still going down, decrease it otherwise
1701 if(gpc<0)
1702 stepsize *= 1.618034; /* The golden section */
1703 else
1704 stepsize *= 0.618034; /* 1/golden section */
1705 } else {
1706 /* New energy is the same or higher. We will have to do some work
1707 * to find a smaller value in the interval. Take smaller step next time!
1709 foundlower = FALSE;
1710 stepsize *= 0.618034;
1713 /* OK, if we didn't find a lower value we will have to locate one now - there must
1714 * be one in the interval [a=0,c].
1715 * The same thing is valid here, though: Don't spend dozens of iterations to find
1716 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1717 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1719 * I also have a safeguard for potentially really patological functions so we never
1720 * take more than 20 steps before we give up ...
1722 * If we already found a lower value we just skip this step and continue to the update.
1725 if(!foundlower) {
1727 nminstep=0;
1728 do {
1729 /* Select a new trial point.
1730 * If the derivatives at points a & c have different sign we interpolate to zero,
1731 * otherwise just do a bisection.
1734 if(gpa<0 && gpc>0)
1735 b = a + gpa*(a-c)/(gpc-gpa);
1736 else
1737 b = 0.5*(a+c);
1739 /* safeguard if interpolation close to machine accuracy causes errors:
1740 * never go outside the interval
1742 if(b<=a || b>=c)
1743 b = 0.5*(a+c);
1745 /* Take a trial step */
1746 for (i=0; i<n; i++)
1747 xb[i] = lastx[i] + b*s[i];
1749 neval++;
1750 /* Calculate energy for the trial step */
1751 ems.s.x = (rvec *)xb;
1752 ems.f = (rvec *)fb;
1753 evaluate_energy(fplog,bVerbose,cr,
1754 state,top_global,&ems,top,
1755 inputrec,nrnb,wcycle,gstat,
1756 vsite,constr,fcd,graph,mdatoms,fr,
1757 mu_tot,enerd,vir,pres,step,FALSE);
1758 EpotB = ems.epot;
1760 fnorm = ems.fnorm;
1762 for(gpb=0,i=0; i<n; i++)
1763 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1765 /* Sum the gradient along the line across CPUs */
1766 if (PAR(cr))
1767 gmx_sumd(1,&gpb,cr);
1769 /* Keep one of the intervals based on the value of the derivative at the new point */
1770 if(gpb>0) {
1771 /* Replace c endpoint with b */
1772 EpotC = EpotB;
1773 c = b;
1774 gpc = gpb;
1775 /* swap coord pointers b/c */
1776 xtmp = xb;
1777 ftmp = fb;
1778 xb = xc;
1779 fb = fc;
1780 xc = xtmp;
1781 fc = ftmp;
1782 } else {
1783 /* Replace a endpoint with b */
1784 EpotA = EpotB;
1785 a = b;
1786 gpa = gpb;
1787 /* swap coord pointers a/b */
1788 xtmp = xb;
1789 ftmp = fb;
1790 xb = xa;
1791 fb = fa;
1792 xa = xtmp;
1793 fa = ftmp;
1797 * Stop search as soon as we find a value smaller than the endpoints,
1798 * or if the tolerance is below machine precision.
1799 * Never run more than 20 steps, no matter what.
1801 nminstep++;
1802 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1804 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1805 /* OK. We couldn't find a significantly lower energy.
1806 * If ncorr==0 this was steepest descent, and then we give up.
1807 * If not, reset memory to restart as steepest descent before quitting.
1809 if(ncorr==0) {
1810 /* Converged */
1811 converged=TRUE;
1812 break;
1813 } else {
1814 /* Reset memory */
1815 ncorr=0;
1816 /* Search in gradient direction */
1817 for(i=0;i<n;i++)
1818 dx[point][i]=ff[i];
1819 /* Reset stepsize */
1820 stepsize = 1.0/fnorm;
1821 continue;
1825 /* Select min energy state of A & C, put the best in xx/ff/Epot
1827 if(EpotC<EpotA) {
1828 Epot = EpotC;
1829 /* Use state C */
1830 for(i=0;i<n;i++) {
1831 xx[i]=xc[i];
1832 ff[i]=fc[i];
1834 stepsize=c;
1835 } else {
1836 Epot = EpotA;
1837 /* Use state A */
1838 for(i=0;i<n;i++) {
1839 xx[i]=xa[i];
1840 ff[i]=fa[i];
1842 stepsize=a;
1845 } else {
1846 /* found lower */
1847 Epot = EpotC;
1848 /* Use state C */
1849 for(i=0;i<n;i++) {
1850 xx[i]=xc[i];
1851 ff[i]=fc[i];
1853 stepsize=c;
1856 /* Update the memory information, and calculate a new
1857 * approximation of the inverse hessian
1860 /* Have new data in Epot, xx, ff */
1861 if(ncorr<nmaxcorr)
1862 ncorr++;
1864 for(i=0;i<n;i++) {
1865 dg[point][i]=lastf[i]-ff[i];
1866 dx[point][i]*=stepsize;
1869 dgdg=0;
1870 dgdx=0;
1871 for(i=0;i<n;i++) {
1872 dgdg+=dg[point][i]*dg[point][i];
1873 dgdx+=dg[point][i]*dx[point][i];
1876 diag=dgdx/dgdg;
1878 rho[point]=1.0/dgdx;
1879 point++;
1881 if(point>=nmaxcorr)
1882 point=0;
1884 /* Update */
1885 for(i=0;i<n;i++)
1886 p[i]=ff[i];
1888 cp=point;
1890 /* Recursive update. First go back over the memory points */
1891 for(k=0;k<ncorr;k++) {
1892 cp--;
1893 if(cp<0)
1894 cp=ncorr-1;
1896 sq=0;
1897 for(i=0;i<n;i++)
1898 sq+=dx[cp][i]*p[i];
1900 alpha[cp]=rho[cp]*sq;
1902 for(i=0;i<n;i++)
1903 p[i] -= alpha[cp]*dg[cp][i];
1906 for(i=0;i<n;i++)
1907 p[i] *= diag;
1909 /* And then go forward again */
1910 for(k=0;k<ncorr;k++) {
1911 yr = 0;
1912 for(i=0;i<n;i++)
1913 yr += p[i]*dg[cp][i];
1915 beta = rho[cp]*yr;
1916 beta = alpha[cp]-beta;
1918 for(i=0;i<n;i++)
1919 p[i] += beta*dx[cp][i];
1921 cp++;
1922 if(cp>=ncorr)
1923 cp=0;
1926 for(i=0;i<n;i++)
1927 if(!frozen[i])
1928 dx[point][i] = p[i];
1929 else
1930 dx[point][i] = 0;
1932 stepsize=1.0;
1934 /* Test whether the convergence criterion is met */
1935 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1937 /* Print it if necessary */
1938 if (MASTER(cr)) {
1939 if(bVerbose)
1940 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1941 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1942 /* Store the new (lower) energies */
1943 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1944 mdatoms->tmass,enerd,state,state->box,
1945 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1946 do_log = do_per_step(step,inputrec->nstlog);
1947 do_ene = do_per_step(step,inputrec->nstenergy);
1948 if(do_log)
1949 print_ebin_header(fplog,step,step,state->lambda);
1950 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1951 do_log ? fplog : NULL,step,step,eprNORMAL,
1952 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1955 /* Stop when the maximum force lies below tolerance.
1956 * If we have reached machine precision, converged is already set to true.
1959 converged = converged || (fmax < inputrec->em_tol);
1961 } /* End of the loop */
1963 if(converged)
1964 step--; /* we never took that last step in this case */
1966 if(fmax>inputrec->em_tol)
1968 if (MASTER(cr))
1970 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1971 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1973 converged = FALSE;
1976 /* If we printed energy and/or logfile last step (which was the last step)
1977 * we don't have to do it again, but otherwise print the final values.
1979 if(!do_log) /* Write final value to log since we didn't do anythin last step */
1980 print_ebin_header(fplog,step,step,state->lambda);
1981 if(!do_ene || !do_log) /* Write final energy file entries */
1982 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1983 !do_log ? fplog : NULL,step,step,eprNORMAL,
1984 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1986 /* Print some stuff... */
1987 if (MASTER(cr))
1988 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1990 /* IMPORTANT!
1991 * For accurate normal mode calculation it is imperative that we
1992 * store the last conformation into the full precision binary trajectory.
1994 * However, we should only do it if we did NOT already write this step
1995 * above (which we did if do_x or do_f was true).
1997 do_x = !do_per_step(step,inputrec->nstxout);
1998 do_f = !do_per_step(step,inputrec->nstfout);
1999 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2000 top_global,inputrec,step,
2001 &ems,state,f);
2003 if (MASTER(cr)) {
2004 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2005 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2006 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2007 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2009 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2012 finish_em(fplog,cr,outf,runtime,wcycle);
2014 /* To print the actual number of steps we needed somewhere */
2015 runtime->nsteps_done = step;
2017 return 0;
2018 } /* That's all folks */
2021 double do_steep(FILE *fplog,t_commrec *cr,
2022 int nfile, const t_filenm fnm[],
2023 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2024 int nstglobalcomm,
2025 gmx_vsite_t *vsite,gmx_constr_t constr,
2026 int stepout,
2027 t_inputrec *inputrec,
2028 gmx_mtop_t *top_global,t_fcdata *fcd,
2029 t_state *state_global,
2030 t_mdatoms *mdatoms,
2031 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2032 gmx_edsam_t ed,
2033 t_forcerec *fr,
2034 int repl_ex_nst,int repl_ex_seed,
2035 real cpt_period,real max_hours,
2036 const char *deviceOptions,
2037 unsigned long Flags,
2038 gmx_runtime_t *runtime)
2040 const char *SD="Steepest Descents";
2041 em_state_t *s_min,*s_try;
2042 rvec *f_global;
2043 gmx_localtop_t *top;
2044 gmx_enerdata_t *enerd;
2045 rvec *f;
2046 gmx_global_stat_t gstat;
2047 t_graph *graph;
2048 real stepsize,constepsize;
2049 real ustep,dvdlambda,fnormn;
2050 gmx_mdoutf_t *outf;
2051 t_mdebin *mdebin;
2052 gmx_bool bDone,bAbort,do_x,do_f;
2053 tensor vir,pres;
2054 rvec mu_tot;
2055 int nsteps;
2056 int count=0;
2057 int steps_accepted=0;
2058 /* not used */
2059 real terminate=0;
2061 s_min = init_em_state();
2062 s_try = init_em_state();
2064 /* Init em and store the local state in s_try */
2065 init_em(fplog,SD,cr,inputrec,
2066 state_global,top_global,s_try,&top,&f,&f_global,
2067 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2068 nfile,fnm,&outf,&mdebin);
2070 /* Print to log file */
2071 print_em_start(fplog,cr,runtime,wcycle,SD);
2073 /* Set variables for stepsize (in nm). This is the largest
2074 * step that we are going to make in any direction.
2076 ustep = inputrec->em_stepsize;
2077 stepsize = 0;
2079 /* Max number of steps */
2080 nsteps = inputrec->nsteps;
2082 if (MASTER(cr))
2083 /* Print to the screen */
2084 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2085 if (fplog)
2086 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2088 /**** HERE STARTS THE LOOP ****
2089 * count is the counter for the number of steps
2090 * bDone will be TRUE when the minimization has converged
2091 * bAbort will be TRUE when nsteps steps have been performed or when
2092 * the stepsize becomes smaller than is reasonable for machine precision
2094 count = 0;
2095 bDone = FALSE;
2096 bAbort = FALSE;
2097 while( !bDone && !bAbort ) {
2098 bAbort = (nsteps >= 0) && (count == nsteps);
2100 /* set new coordinates, except for first step */
2101 if (count > 0) {
2102 do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2103 constr,top,nrnb,wcycle,count);
2106 evaluate_energy(fplog,bVerbose,cr,
2107 state_global,top_global,s_try,top,
2108 inputrec,nrnb,wcycle,gstat,
2109 vsite,constr,fcd,graph,mdatoms,fr,
2110 mu_tot,enerd,vir,pres,count,count==0);
2112 if (MASTER(cr))
2113 print_ebin_header(fplog,count,count,s_try->s.lambda);
2115 if (count == 0)
2116 s_min->epot = s_try->epot + 1;
2118 /* Print it if necessary */
2119 if (MASTER(cr)) {
2120 if (bVerbose) {
2121 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2122 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2123 (s_try->epot < s_min->epot) ? '\n' : '\r');
2126 if (s_try->epot < s_min->epot) {
2127 /* Store the new (lower) energies */
2128 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2129 mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2130 NULL,NULL,vir,pres,NULL,mu_tot,constr);
2131 print_ebin(outf->fp_ene,TRUE,
2132 do_per_step(steps_accepted,inputrec->nstdisreout),
2133 do_per_step(steps_accepted,inputrec->nstorireout),
2134 fplog,count,count,eprNORMAL,TRUE,
2135 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2136 fflush(fplog);
2140 /* Now if the new energy is smaller than the previous...
2141 * or if this is the first step!
2142 * or if we did random steps!
2145 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2146 steps_accepted++;
2148 /* Test whether the convergence criterion is met... */
2149 bDone = (s_try->fmax < inputrec->em_tol);
2151 /* Copy the arrays for force, positions and energy */
2152 /* The 'Min' array always holds the coords and forces of the minimal
2153 sampled energy */
2154 swap_em_state(s_min,s_try);
2155 if (count > 0)
2156 ustep *= 1.2;
2158 /* Write to trn, if necessary */
2159 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2160 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2161 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2162 top_global,inputrec,count,
2163 s_min,state_global,f_global);
2165 else {
2166 /* If energy is not smaller make the step smaller... */
2167 ustep *= 0.5;
2169 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2170 /* Reload the old state */
2171 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2172 s_min,top,mdatoms,fr,vsite,constr,
2173 nrnb,wcycle);
2177 /* Determine new step */
2178 stepsize = ustep/s_min->fmax;
2180 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2181 #ifdef GMX_DOUBLE
2182 if (count == nsteps || ustep < 1e-12)
2183 #else
2184 if (count == nsteps || ustep < 1e-6)
2185 #endif
2187 if (MASTER(cr))
2189 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2190 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2192 bAbort=TRUE;
2195 count++;
2196 } /* End of the loop */
2198 /* Print some shit... */
2199 if (MASTER(cr))
2200 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2201 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2202 top_global,inputrec,count,
2203 s_min,state_global,f_global);
2205 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2207 if (MASTER(cr)) {
2208 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2209 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2210 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2211 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2214 finish_em(fplog,cr,outf,runtime,wcycle);
2216 /* To print the actual number of steps we needed somewhere */
2217 inputrec->nsteps=count;
2219 runtime->nsteps_done = count;
2221 return 0;
2222 } /* That's all folks */
2225 double do_nm(FILE *fplog,t_commrec *cr,
2226 int nfile,const t_filenm fnm[],
2227 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2228 int nstglobalcomm,
2229 gmx_vsite_t *vsite,gmx_constr_t constr,
2230 int stepout,
2231 t_inputrec *inputrec,
2232 gmx_mtop_t *top_global,t_fcdata *fcd,
2233 t_state *state_global,
2234 t_mdatoms *mdatoms,
2235 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2236 gmx_edsam_t ed,
2237 t_forcerec *fr,
2238 int repl_ex_nst,int repl_ex_seed,
2239 real cpt_period,real max_hours,
2240 const char *deviceOptions,
2241 unsigned long Flags,
2242 gmx_runtime_t *runtime)
2244 const char *NM = "Normal Mode Analysis";
2245 gmx_mdoutf_t *outf;
2246 int natoms,atom,d;
2247 int nnodes,node;
2248 rvec *f_global;
2249 gmx_localtop_t *top;
2250 gmx_enerdata_t *enerd;
2251 rvec *f;
2252 gmx_global_stat_t gstat;
2253 t_graph *graph;
2254 real t,lambda;
2255 gmx_bool bNS;
2256 tensor vir,pres;
2257 rvec mu_tot;
2258 rvec *fneg,*dfdx;
2259 gmx_bool bSparse; /* use sparse matrix storage format */
2260 size_t sz;
2261 gmx_sparsematrix_t * sparse_matrix = NULL;
2262 real * full_matrix = NULL;
2263 em_state_t * state_work;
2265 /* added with respect to mdrun */
2266 int i,j,k,row,col;
2267 real der_range=10.0*sqrt(GMX_REAL_EPS);
2268 real x_min;
2269 real fnorm,fmax;
2271 if (constr != NULL)
2273 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2276 state_work = init_em_state();
2278 /* Init em and store the local state in state_minimum */
2279 init_em(fplog,NM,cr,inputrec,
2280 state_global,top_global,state_work,&top,
2281 &f,&f_global,
2282 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2283 nfile,fnm,&outf,NULL);
2285 natoms = top_global->natoms;
2286 snew(fneg,natoms);
2287 snew(dfdx,natoms);
2289 #ifndef GMX_DOUBLE
2290 if (MASTER(cr))
2292 fprintf(stderr,
2293 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2294 " which MIGHT not be accurate enough for normal mode analysis.\n"
2295 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2296 " are fairly modest even if you recompile in double precision.\n\n");
2298 #endif
2300 /* Check if we can/should use sparse storage format.
2302 * Sparse format is only useful when the Hessian itself is sparse, which it
2303 * will be when we use a cutoff.
2304 * For small systems (n<1000) it is easier to always use full matrix format, though.
2306 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2308 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2309 bSparse = FALSE;
2311 else if(top_global->natoms < 1000)
2313 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2314 bSparse = FALSE;
2316 else
2318 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2319 bSparse = TRUE;
2322 sz = DIM*top_global->natoms;
2324 fprintf(stderr,"Allocating Hessian memory...\n\n");
2326 if(bSparse)
2328 sparse_matrix=gmx_sparsematrix_init(sz);
2329 sparse_matrix->compressed_symmetric = TRUE;
2331 else
2333 snew(full_matrix,sz*sz);
2336 /* Initial values */
2337 t = inputrec->init_t;
2338 lambda = inputrec->init_lambda;
2340 init_nrnb(nrnb);
2342 where();
2344 /* Write start time and temperature */
2345 print_em_start(fplog,cr,runtime,wcycle,NM);
2347 /* fudge nr of steps to nr of atoms */
2348 inputrec->nsteps = natoms*2;
2350 if (MASTER(cr))
2352 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2353 *(top_global->name),(int)inputrec->nsteps);
2356 nnodes = cr->nnodes;
2358 /* Make evaluate_energy do a single node force calculation */
2359 cr->nnodes = 1;
2360 evaluate_energy(fplog,bVerbose,cr,
2361 state_global,top_global,state_work,top,
2362 inputrec,nrnb,wcycle,gstat,
2363 vsite,constr,fcd,graph,mdatoms,fr,
2364 mu_tot,enerd,vir,pres,-1,TRUE);
2365 cr->nnodes = nnodes;
2367 /* if forces are not small, warn user */
2368 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2370 if (MASTER(cr))
2372 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2373 if (state_work->fmax > 1.0e-3)
2375 fprintf(stderr,"Maximum force probably not small enough to");
2376 fprintf(stderr," ensure that you are in an \nenergy well. ");
2377 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2378 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2382 /***********************************************************
2384 * Loop over all pairs in matrix
2386 * do_force called twice. Once with positive and
2387 * once with negative displacement
2389 ************************************************************/
2391 /* Steps are divided one by one over the nodes */
2392 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2395 for (d=0; d<DIM; d++)
2397 x_min = state_work->s.x[atom][d];
2399 state_work->s.x[atom][d] = x_min - der_range;
2401 /* Make evaluate_energy do a single node force calculation */
2402 cr->nnodes = 1;
2403 evaluate_energy(fplog,bVerbose,cr,
2404 state_global,top_global,state_work,top,
2405 inputrec,nrnb,wcycle,gstat,
2406 vsite,constr,fcd,graph,mdatoms,fr,
2407 mu_tot,enerd,vir,pres,atom*2,FALSE);
2409 for(i=0; i<natoms; i++)
2411 copy_rvec(state_work->f[i], fneg[i]);
2414 state_work->s.x[atom][d] = x_min + der_range;
2416 evaluate_energy(fplog,bVerbose,cr,
2417 state_global,top_global,state_work,top,
2418 inputrec,nrnb,wcycle,gstat,
2419 vsite,constr,fcd,graph,mdatoms,fr,
2420 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2421 cr->nnodes = nnodes;
2423 /* x is restored to original */
2424 state_work->s.x[atom][d] = x_min;
2426 for(j=0; j<natoms; j++)
2428 for (k=0; (k<DIM); k++)
2430 dfdx[j][k] =
2431 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2435 if (!MASTER(cr))
2437 #ifdef GMX_MPI
2438 #ifdef GMX_DOUBLE
2439 #define mpi_type MPI_DOUBLE
2440 #else
2441 #define mpi_type MPI_FLOAT
2442 #endif
2443 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2444 cr->mpi_comm_mygroup);
2445 #endif
2447 else
2449 for(node=0; (node<nnodes && atom+node<natoms); node++)
2451 if (node > 0)
2453 #ifdef GMX_MPI
2454 MPI_Status stat;
2455 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2456 cr->mpi_comm_mygroup,&stat);
2457 #undef mpi_type
2458 #endif
2461 row = (atom + node)*DIM + d;
2463 for(j=0; j<natoms; j++)
2465 for(k=0; k<DIM; k++)
2467 col = j*DIM + k;
2469 if (bSparse)
2471 if (col >= row && dfdx[j][k] != 0.0)
2473 gmx_sparsematrix_increment_value(sparse_matrix,
2474 row,col,dfdx[j][k]);
2477 else
2479 full_matrix[row*sz+col] = dfdx[j][k];
2486 if (bVerbose && fplog)
2488 fflush(fplog);
2491 /* write progress */
2492 if (MASTER(cr) && bVerbose)
2494 fprintf(stderr,"\rFinished step %d out of %d",
2495 min(atom+nnodes,natoms),natoms);
2496 fflush(stderr);
2500 if (MASTER(cr))
2502 fprintf(stderr,"\n\nWriting Hessian...\n");
2503 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2506 finish_em(fplog,cr,outf,runtime,wcycle);
2508 runtime->nsteps_done = natoms*2;
2510 return 0;