added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / mdlib / minimize.c
blobd14f434eedeb83f59610d324d04b818a5a71678f
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
6 * G R O M A C S
8 * GROningen MAchine for Chemical Simulations
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROwing Monsters And Cloning Shrimps
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <string.h>
41 #include <time.h>
42 #include <math.h>
43 #include "sysstuff.h"
44 #include "string2.h"
45 #include "network.h"
46 #include "confio.h"
47 #include "copyrite.h"
48 #include "smalloc.h"
49 #include "nrnb.h"
50 #include "main.h"
51 #include "force.h"
52 #include "macros.h"
53 #include "random.h"
54 #include "names.h"
55 #include "gmx_fatal.h"
56 #include "txtdump.h"
57 #include "typedefs.h"
58 #include "update.h"
59 #include "constr.h"
60 #include "vec.h"
61 #include "statutil.h"
62 #include "tgroup.h"
63 #include "mdebin.h"
64 #include "vsite.h"
65 #include "force.h"
66 #include "mdrun.h"
67 #include "md_support.h"
68 #include "domdec.h"
69 #include "partdec.h"
70 #include "trnio.h"
71 #include "sparsematrix.h"
72 #include "mtxio.h"
73 #include "mdatoms.h"
74 #include "ns.h"
75 #include "gmx_wallcycle.h"
76 #include "mtop_util.h"
77 #include "gmxfio.h"
78 #include "pme.h"
79 #include "bondf.h"
80 #include "gmx_omp_nthreads.h"
83 typedef struct {
84 t_state s;
85 rvec *f;
86 real epot;
87 real fnorm;
88 real fmax;
89 int a_fmax;
90 } em_state_t;
92 static em_state_t *init_em_state()
94 em_state_t *ems;
96 snew(ems,1);
98 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
99 snew(ems->s.lambda,efptNR);
101 return ems;
104 static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
105 gmx_wallcycle_t wcycle,
106 const char *name)
108 char buf[STRLEN];
110 runtime_start(runtime);
112 sprintf(buf,"Started %s",name);
113 print_date_and_time(fplog,cr->nodeid,buf,NULL);
115 wallcycle_start(wcycle,ewcRUN);
117 static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
118 gmx_wallcycle_t wcycle)
120 wallcycle_stop(wcycle,ewcRUN);
122 runtime_end(runtime);
125 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
127 fprintf(out,"\n");
128 fprintf(out,"%s:\n",minimizer);
129 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
130 fprintf(out," Number of steps = %12d\n",nsteps);
133 static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
135 char buffer[2048];
136 if (bLastStep)
138 sprintf(buffer,
139 "\nEnergy minimization reached the maximum number"
140 "of steps before the forces reached the requested"
141 "precision Fmax < %g.\n",ftol);
143 else
145 sprintf(buffer,
146 "\nEnergy minimization has stopped, but the forces have"
147 "not converged to the requested precision Fmax < %g (which"
148 "may not be possible for your system). It stopped"
149 "because the algorithm tried to make a new step whose size"
150 "was too small, or there was no change in the energy since"
151 "last step. Either way, we regard the minimization as"
152 "converged to within the available machine precision,"
153 "given your starting configuration and EM parameters.\n%s%s",
154 ftol,
155 sizeof(real)<sizeof(double) ?
156 "\nDouble precision normally gives you higher accuracy, but"
157 "this is often not needed for preparing to run molecular"
158 "dynamics.\n" :
160 bConstrain ?
161 "You might need to increase your constraint accuracy, or turn\n"
162 "off constraints altogether (set constraints = none in mdp file)\n" :
163 "");
165 fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
170 static void print_converged(FILE *fp,const char *alg,real ftol,
171 gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
172 real epot,real fmax, int nfmax, real fnorm)
174 char buf[STEPSTRSIZE];
176 if (bDone)
177 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
178 alg,ftol,gmx_step_str(count,buf));
179 else if(count<nsteps)
180 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
181 "but did not reach the requested Fmax < %g.\n",
182 alg,gmx_step_str(count,buf),ftol);
183 else
184 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
185 alg,ftol,gmx_step_str(count,buf));
187 #ifdef GMX_DOUBLE
188 fprintf(fp,"Potential Energy = %21.14e\n",epot);
189 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
190 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
191 #else
192 fprintf(fp,"Potential Energy = %14.7e\n",epot);
193 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
194 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
195 #endif
198 static void get_f_norm_max(t_commrec *cr,
199 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
200 real *fnorm,real *fmax,int *a_fmax)
202 double fnorm2,*sum;
203 real fmax2,fmax2_0,fam;
204 int la_max,a_max,start,end,i,m,gf;
206 /* This routine finds the largest force and returns it.
207 * On parallel machines the global max is taken.
209 fnorm2 = 0;
210 fmax2 = 0;
211 la_max = -1;
212 gf = 0;
213 start = mdatoms->start;
214 end = mdatoms->homenr + start;
215 if (mdatoms->cFREEZE) {
216 for(i=start; i<end; i++) {
217 gf = mdatoms->cFREEZE[i];
218 fam = 0;
219 for(m=0; m<DIM; m++)
220 if (!opts->nFreeze[gf][m])
221 fam += sqr(f[i][m]);
222 fnorm2 += fam;
223 if (fam > fmax2) {
224 fmax2 = fam;
225 la_max = i;
228 } else {
229 for(i=start; i<end; i++) {
230 fam = norm2(f[i]);
231 fnorm2 += fam;
232 if (fam > fmax2) {
233 fmax2 = fam;
234 la_max = i;
239 if (la_max >= 0 && DOMAINDECOMP(cr)) {
240 a_max = cr->dd->gatindex[la_max];
241 } else {
242 a_max = la_max;
244 if (PAR(cr)) {
245 snew(sum,2*cr->nnodes+1);
246 sum[2*cr->nodeid] = fmax2;
247 sum[2*cr->nodeid+1] = a_max;
248 sum[2*cr->nnodes] = fnorm2;
249 gmx_sumd(2*cr->nnodes+1,sum,cr);
250 fnorm2 = sum[2*cr->nnodes];
251 /* Determine the global maximum */
252 for(i=0; i<cr->nnodes; i++) {
253 if (sum[2*i] > fmax2) {
254 fmax2 = sum[2*i];
255 a_max = (int)(sum[2*i+1] + 0.5);
258 sfree(sum);
261 if (fnorm)
262 *fnorm = sqrt(fnorm2);
263 if (fmax)
264 *fmax = sqrt(fmax2);
265 if (a_fmax)
266 *a_fmax = a_max;
269 static void get_state_f_norm_max(t_commrec *cr,
270 t_grpopts *opts,t_mdatoms *mdatoms,
271 em_state_t *ems)
273 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
276 void init_em(FILE *fplog,const char *title,
277 t_commrec *cr,t_inputrec *ir,
278 t_state *state_global,gmx_mtop_t *top_global,
279 em_state_t *ems,gmx_localtop_t **top,
280 rvec **f,rvec **f_global,
281 t_nrnb *nrnb,rvec mu_tot,
282 t_forcerec *fr,gmx_enerdata_t **enerd,
283 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
284 gmx_vsite_t *vsite,gmx_constr_t constr,
285 int nfile,const t_filenm fnm[],
286 gmx_mdoutf_t **outf,t_mdebin **mdebin)
288 int start,homenr,i;
289 real dvdlambda;
291 if (fplog)
293 fprintf(fplog,"Initiating %s\n",title);
296 state_global->ngtc = 0;
298 /* Initialize lambda variables */
299 initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
301 init_nrnb(nrnb);
303 if (DOMAINDECOMP(cr))
305 *top = dd_init_local_top(top_global);
307 dd_init_local_state(cr->dd,state_global,&ems->s);
309 *f = NULL;
311 /* Distribute the charge groups over the nodes from the master node */
312 dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
313 state_global,top_global,ir,
314 &ems->s,&ems->f,mdatoms,*top,
315 fr,vsite,NULL,constr,
316 nrnb,NULL,FALSE);
317 dd_store_state(cr->dd,&ems->s);
319 if (ir->nstfout)
321 snew(*f_global,top_global->natoms);
323 else
325 *f_global = NULL;
327 *graph = NULL;
329 else
331 snew(*f,top_global->natoms);
333 /* Just copy the state */
334 ems->s = *state_global;
335 snew(ems->s.x,ems->s.nalloc);
336 snew(ems->f,ems->s.nalloc);
337 for(i=0; i<state_global->natoms; i++)
339 copy_rvec(state_global->x[i],ems->s.x[i]);
341 copy_mat(state_global->box,ems->s.box);
343 if (PAR(cr) && ir->eI != eiNM)
345 /* Initialize the particle decomposition and split the topology */
346 *top = split_system(fplog,top_global,ir,cr);
348 pd_cg_range(cr,&fr->cg0,&fr->hcg);
350 else
352 *top = gmx_mtop_generate_local_top(top_global,ir);
354 *f_global = *f;
356 forcerec_set_excl_load(fr,*top,cr);
358 init_bonded_thread_force_reduction(fr,&(*top)->idef);
360 if (ir->ePBC != epbcNONE && !fr->bMolPBC)
362 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
364 else
366 *graph = NULL;
369 if (PARTDECOMP(cr))
371 pd_at_range(cr,&start,&homenr);
372 homenr -= start;
374 else
376 start = 0;
377 homenr = top_global->natoms;
379 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
380 update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
382 if (vsite)
384 set_vsite_top(vsite,*top,mdatoms,cr);
388 if (constr)
390 if (ir->eConstrAlg == econtSHAKE &&
391 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
393 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
394 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
397 if (!DOMAINDECOMP(cr))
399 set_constraints(constr,*top,ir,mdatoms,cr);
402 if (!ir->bContinuation)
404 /* Constrain the starting coordinates */
405 dvdlambda=0;
406 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
407 ir,NULL,cr,-1,0,mdatoms,
408 ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
409 ems->s.lambda[efptFEP],&dvdlambda,
410 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
414 if (PAR(cr))
416 *gstat = global_stat_init(ir);
419 *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
421 snew(*enerd,1);
422 init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
423 *enerd);
425 if (mdebin != NULL)
427 /* Init bin for energy stuff */
428 *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
431 clear_rvec(mu_tot);
432 calc_shifts(ems->s.box,fr->shift_vec);
435 static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
436 gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
438 if (!(cr->duty & DUTY_PME)) {
439 /* Tell the PME only node to finish */
440 gmx_pme_send_finish(cr);
443 done_mdoutf(outf);
445 em_time_end(fplog,cr,runtime,wcycle);
448 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
450 em_state_t tmp;
452 tmp = *ems1;
453 *ems1 = *ems2;
454 *ems2 = tmp;
457 static void copy_em_coords(em_state_t *ems,t_state *state)
459 int i;
461 for(i=0; (i<state->natoms); i++)
463 copy_rvec(ems->s.x[i],state->x[i]);
467 static void write_em_traj(FILE *fplog,t_commrec *cr,
468 gmx_mdoutf_t *outf,
469 gmx_bool bX,gmx_bool bF,const char *confout,
470 gmx_mtop_t *top_global,
471 t_inputrec *ir,gmx_large_int_t step,
472 em_state_t *state,
473 t_state *state_global,rvec *f_global)
475 int mdof_flags;
477 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
479 copy_em_coords(state,state_global);
480 f_global = state->f;
483 mdof_flags = 0;
484 if (bX) { mdof_flags |= MDOF_X; }
485 if (bF) { mdof_flags |= MDOF_F; }
486 write_traj(fplog,cr,outf,mdof_flags,
487 top_global,step,(double)step,
488 &state->s,state_global,state->f,f_global,NULL,NULL);
490 if (confout != NULL && MASTER(cr))
492 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
494 /* Make molecules whole only for confout writing */
495 do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
496 state_global->x);
499 write_sto_conf_mtop(confout,
500 *top_global->name,top_global,
501 state_global->x,NULL,ir->ePBC,state_global->box);
505 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
506 gmx_bool bMolPBC,
507 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
508 gmx_constr_t constr,gmx_localtop_t *top,
509 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
510 gmx_large_int_t count)
513 t_state *s1,*s2;
514 int i;
515 int start,end;
516 rvec *x1,*x2;
517 real dvdlambda;
519 s1 = &ems1->s;
520 s2 = &ems2->s;
522 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
524 gmx_incons("state mismatch in do_em_step");
527 s2->flags = s1->flags;
529 if (s2->nalloc != s1->nalloc)
531 s2->nalloc = s1->nalloc;
532 srenew(s2->x,s1->nalloc);
533 srenew(ems2->f, s1->nalloc);
534 if (s2->flags & (1<<estCGP))
536 srenew(s2->cg_p, s1->nalloc);
540 s2->natoms = s1->natoms;
541 copy_mat(s1->box,s2->box);
542 /* Copy free energy state */
543 for (i=0;i<efptNR;i++)
545 s2->lambda[i] = s1->lambda[i];
547 copy_mat(s1->box,s2->box);
549 start = md->start;
550 end = md->start + md->homenr;
552 x1 = s1->x;
553 x2 = s2->x;
555 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
557 int gf,i,m;
559 gf = 0;
560 #pragma omp for schedule(static) nowait
561 for(i=start; i<end; i++)
563 if (md->cFREEZE)
565 gf = md->cFREEZE[i];
567 for(m=0; m<DIM; m++)
569 if (ir->opts.nFreeze[gf][m])
571 x2[i][m] = x1[i][m];
573 else
575 x2[i][m] = x1[i][m] + a*f[i][m];
580 if (s2->flags & (1<<estCGP))
582 /* Copy the CG p vector */
583 x1 = s1->cg_p;
584 x2 = s2->cg_p;
585 #pragma omp for schedule(static) nowait
586 for(i=start; i<end; i++)
588 copy_rvec(x1[i],x2[i]);
592 if (DOMAINDECOMP(cr))
594 s2->ddp_count = s1->ddp_count;
595 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
597 #pragma omp barrier
598 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
599 srenew(s2->cg_gl,s2->cg_gl_nalloc);
600 #pragma omp barrier
602 s2->ncg_gl = s1->ncg_gl;
603 #pragma omp for schedule(static) nowait
604 for(i=0; i<s2->ncg_gl; i++)
606 s2->cg_gl[i] = s1->cg_gl[i];
608 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
612 if (constr)
614 wallcycle_start(wcycle,ewcCONSTR);
615 dvdlambda = 0;
616 constrain(NULL,TRUE,TRUE,constr,&top->idef,
617 ir,NULL,cr,count,0,md,
618 s1->x,s2->x,NULL,bMolPBC,s2->box,
619 s2->lambda[efptBONDED],&dvdlambda,
620 NULL,NULL,nrnb,econqCoord,FALSE,0,0);
621 wallcycle_stop(wcycle,ewcCONSTR);
625 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
626 gmx_mtop_t *top_global,t_inputrec *ir,
627 em_state_t *ems,gmx_localtop_t *top,
628 t_mdatoms *mdatoms,t_forcerec *fr,
629 gmx_vsite_t *vsite,gmx_constr_t constr,
630 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
632 /* Repartition the domain decomposition */
633 wallcycle_start(wcycle,ewcDOMDEC);
634 dd_partition_system(fplog,step,cr,FALSE,1,
635 NULL,top_global,ir,
636 &ems->s,&ems->f,
637 mdatoms,top,fr,vsite,NULL,constr,
638 nrnb,wcycle,FALSE);
639 dd_store_state(cr->dd,&ems->s);
640 wallcycle_stop(wcycle,ewcDOMDEC);
643 static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
644 t_state *state_global,gmx_mtop_t *top_global,
645 em_state_t *ems,gmx_localtop_t *top,
646 t_inputrec *inputrec,
647 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
648 gmx_global_stat_t gstat,
649 gmx_vsite_t *vsite,gmx_constr_t constr,
650 t_fcdata *fcd,
651 t_graph *graph,t_mdatoms *mdatoms,
652 t_forcerec *fr,rvec mu_tot,
653 gmx_enerdata_t *enerd,tensor vir,tensor pres,
654 gmx_large_int_t count,gmx_bool bFirst)
656 real t;
657 gmx_bool bNS;
658 int nabnsb;
659 tensor force_vir,shake_vir,ekin;
660 real dvdlambda,prescorr,enercorr,dvdlcorr;
661 real terminate=0;
663 /* Set the time to the initial time, the time does not change during EM */
664 t = inputrec->init_t;
666 if (bFirst ||
667 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
668 /* This the first state or an old state used before the last ns */
669 bNS = TRUE;
670 } else {
671 bNS = FALSE;
672 if (inputrec->nstlist > 0) {
673 bNS = TRUE;
674 } else if (inputrec->nstlist == -1) {
675 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
676 if (PAR(cr))
677 gmx_sumi(1,&nabnsb,cr);
678 bNS = (nabnsb > 0);
682 if (vsite)
683 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
684 top->idef.iparams,top->idef.il,
685 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
687 if (DOMAINDECOMP(cr)) {
688 if (bNS) {
689 /* Repartition the domain decomposition */
690 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
691 ems,top,mdatoms,fr,vsite,constr,
692 nrnb,wcycle);
696 /* Calc force & energy on new trial position */
697 /* do_force always puts the charge groups in the box and shifts again
698 * We do not unshift, so molecules are always whole in congrad.c
700 do_force(fplog,cr,inputrec,
701 count,nrnb,wcycle,top,top_global,&top_global->groups,
702 ems->s.box,ems->s.x,&ems->s.hist,
703 ems->f,force_vir,mdatoms,enerd,fcd,
704 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
705 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
706 GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
707 (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
709 /* Clear the unused shake virial and pressure */
710 clear_mat(shake_vir);
711 clear_mat(pres);
713 /* Communicate stuff when parallel */
714 if (PAR(cr) && inputrec->eI != eiNM)
716 wallcycle_start(wcycle,ewcMoveE);
718 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
719 inputrec,NULL,NULL,NULL,1,&terminate,
720 top_global,&ems->s,FALSE,
721 CGLO_ENERGY |
722 CGLO_PRESSURE |
723 CGLO_CONSTRAINT |
724 CGLO_FIRSTITERATE);
726 wallcycle_stop(wcycle,ewcMoveE);
729 /* Calculate long range corrections to pressure and energy */
730 calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
731 pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
732 enerd->term[F_DISPCORR] = enercorr;
733 enerd->term[F_EPOT] += enercorr;
734 enerd->term[F_PRES] += prescorr;
735 enerd->term[F_DVDL] += dvdlcorr;
737 ems->epot = enerd->term[F_EPOT];
739 if (constr) {
740 /* Project out the constraint components of the force */
741 wallcycle_start(wcycle,ewcCONSTR);
742 dvdlambda = 0;
743 constrain(NULL,FALSE,FALSE,constr,&top->idef,
744 inputrec,NULL,cr,count,0,mdatoms,
745 ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
746 ems->s.lambda[efptBONDED],&dvdlambda,
747 NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
748 if (fr->bSepDVDL && fplog)
749 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
750 enerd->term[F_DVDL_BONDED] += dvdlambda;
751 m_add(force_vir,shake_vir,vir);
752 wallcycle_stop(wcycle,ewcCONSTR);
753 } else {
754 copy_mat(force_vir,vir);
757 clear_mat(ekin);
758 enerd->term[F_PRES] =
759 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
761 sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
763 if (EI_ENERGY_MINIMIZATION(inputrec->eI))
765 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
769 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
770 gmx_mtop_t *mtop,
771 em_state_t *s_min,em_state_t *s_b)
773 rvec *fm,*fb,*fmg;
774 t_block *cgs_gl;
775 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
776 double partsum;
777 unsigned char *grpnrFREEZE;
779 if (debug)
780 fprintf(debug,"Doing reorder_partsum\n");
782 fm = s_min->f;
783 fb = s_b->f;
785 cgs_gl = dd_charge_groups_global(cr->dd);
786 index = cgs_gl->index;
788 /* Collect fm in a global vector fmg.
789 * This conflicts with the spirit of domain decomposition,
790 * but to fully optimize this a much more complicated algorithm is required.
792 snew(fmg,mtop->natoms);
794 ncg = s_min->s.ncg_gl;
795 cg_gl = s_min->s.cg_gl;
796 i = 0;
797 for(c=0; c<ncg; c++) {
798 cg = cg_gl[c];
799 a0 = index[cg];
800 a1 = index[cg+1];
801 for(a=a0; a<a1; a++) {
802 copy_rvec(fm[i],fmg[a]);
803 i++;
806 gmx_sum(mtop->natoms*3,fmg[0],cr);
808 /* Now we will determine the part of the sum for the cgs in state s_b */
809 ncg = s_b->s.ncg_gl;
810 cg_gl = s_b->s.cg_gl;
811 partsum = 0;
812 i = 0;
813 gf = 0;
814 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
815 for(c=0; c<ncg; c++) {
816 cg = cg_gl[c];
817 a0 = index[cg];
818 a1 = index[cg+1];
819 for(a=a0; a<a1; a++) {
820 if (mdatoms->cFREEZE && grpnrFREEZE) {
821 gf = grpnrFREEZE[i];
823 for(m=0; m<DIM; m++) {
824 if (!opts->nFreeze[gf][m]) {
825 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
828 i++;
832 sfree(fmg);
834 return partsum;
837 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
838 gmx_mtop_t *mtop,
839 em_state_t *s_min,em_state_t *s_b)
841 rvec *fm,*fb;
842 double sum;
843 int gf,i,m;
845 /* This is just the classical Polak-Ribiere calculation of beta;
846 * it looks a bit complicated since we take freeze groups into account,
847 * and might have to sum it in parallel runs.
850 if (!DOMAINDECOMP(cr) ||
851 (s_min->s.ddp_count == cr->dd->ddp_count &&
852 s_b->s.ddp_count == cr->dd->ddp_count)) {
853 fm = s_min->f;
854 fb = s_b->f;
855 sum = 0;
856 gf = 0;
857 /* This part of code can be incorrect with DD,
858 * since the atom ordering in s_b and s_min might differ.
860 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
861 if (mdatoms->cFREEZE)
862 gf = mdatoms->cFREEZE[i];
863 for(m=0; m<DIM; m++)
864 if (!opts->nFreeze[gf][m]) {
865 sum += (fb[i][m] - fm[i][m])*fb[i][m];
868 } else {
869 /* We need to reorder cgs while summing */
870 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
872 if (PAR(cr))
873 gmx_sumd(1,&sum,cr);
875 return sum/sqr(s_min->fnorm);
878 double do_cg(FILE *fplog,t_commrec *cr,
879 int nfile,const t_filenm fnm[],
880 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
881 int nstglobalcomm,
882 gmx_vsite_t *vsite,gmx_constr_t constr,
883 int stepout,
884 t_inputrec *inputrec,
885 gmx_mtop_t *top_global,t_fcdata *fcd,
886 t_state *state_global,
887 t_mdatoms *mdatoms,
888 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
889 gmx_edsam_t ed,
890 t_forcerec *fr,
891 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
892 gmx_membed_t membed,
893 real cpt_period,real max_hours,
894 const char *deviceOptions,
895 unsigned long Flags,
896 gmx_runtime_t *runtime)
898 const char *CG="Polak-Ribiere Conjugate Gradients";
900 em_state_t *s_min,*s_a,*s_b,*s_c;
901 gmx_localtop_t *top;
902 gmx_enerdata_t *enerd;
903 rvec *f;
904 gmx_global_stat_t gstat;
905 t_graph *graph;
906 rvec *f_global,*p,*sf,*sfm;
907 double gpa,gpb,gpc,tmp,sum[2],minstep;
908 real fnormn;
909 real stepsize;
910 real a,b,c,beta=0.0;
911 real epot_repl=0;
912 real pnorm;
913 t_mdebin *mdebin;
914 gmx_bool converged,foundlower;
915 rvec mu_tot;
916 gmx_bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
917 tensor vir,pres;
918 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
919 gmx_mdoutf_t *outf;
920 int i,m,gf,step,nminstep;
921 real terminate=0;
923 step=0;
925 s_min = init_em_state();
926 s_a = init_em_state();
927 s_b = init_em_state();
928 s_c = init_em_state();
930 /* Init em and store the local state in s_min */
931 init_em(fplog,CG,cr,inputrec,
932 state_global,top_global,s_min,&top,&f,&f_global,
933 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
934 nfile,fnm,&outf,&mdebin);
936 /* Print to log file */
937 print_em_start(fplog,cr,runtime,wcycle,CG);
939 /* Max number of steps */
940 number_steps=inputrec->nsteps;
942 if (MASTER(cr))
943 sp_header(stderr,CG,inputrec->em_tol,number_steps);
944 if (fplog)
945 sp_header(fplog,CG,inputrec->em_tol,number_steps);
947 /* Call the force routine and some auxiliary (neighboursearching etc.) */
948 /* do_force always puts the charge groups in the box and shifts again
949 * We do not unshift, so molecules are always whole in congrad.c
951 evaluate_energy(fplog,bVerbose,cr,
952 state_global,top_global,s_min,top,
953 inputrec,nrnb,wcycle,gstat,
954 vsite,constr,fcd,graph,mdatoms,fr,
955 mu_tot,enerd,vir,pres,-1,TRUE);
956 where();
958 if (MASTER(cr)) {
959 /* Copy stuff to the energy bin for easy printing etc. */
960 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
961 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
962 NULL,NULL,vir,pres,NULL,mu_tot,constr);
964 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
965 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
966 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
968 where();
970 /* Estimate/guess the initial stepsize */
971 stepsize = inputrec->em_stepsize/s_min->fnorm;
973 if (MASTER(cr)) {
974 fprintf(stderr," F-max = %12.5e on atom %d\n",
975 s_min->fmax,s_min->a_fmax+1);
976 fprintf(stderr," F-Norm = %12.5e\n",
977 s_min->fnorm/sqrt(state_global->natoms));
978 fprintf(stderr,"\n");
979 /* and copy to the log file too... */
980 fprintf(fplog," F-max = %12.5e on atom %d\n",
981 s_min->fmax,s_min->a_fmax+1);
982 fprintf(fplog," F-Norm = %12.5e\n",
983 s_min->fnorm/sqrt(state_global->natoms));
984 fprintf(fplog,"\n");
986 /* Start the loop over CG steps.
987 * Each successful step is counted, and we continue until
988 * we either converge or reach the max number of steps.
990 converged = FALSE;
991 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
993 /* start taking steps in a new direction
994 * First time we enter the routine, beta=0, and the direction is
995 * simply the negative gradient.
998 /* Calculate the new direction in p, and the gradient in this direction, gpa */
999 p = s_min->s.cg_p;
1000 sf = s_min->f;
1001 gpa = 0;
1002 gf = 0;
1003 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1004 if (mdatoms->cFREEZE)
1005 gf = mdatoms->cFREEZE[i];
1006 for(m=0; m<DIM; m++) {
1007 if (!inputrec->opts.nFreeze[gf][m]) {
1008 p[i][m] = sf[i][m] + beta*p[i][m];
1009 gpa -= p[i][m]*sf[i][m];
1010 /* f is negative gradient, thus the sign */
1011 } else {
1012 p[i][m] = 0;
1017 /* Sum the gradient along the line across CPUs */
1018 if (PAR(cr))
1019 gmx_sumd(1,&gpa,cr);
1021 /* Calculate the norm of the search vector */
1022 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
1024 /* Just in case stepsize reaches zero due to numerical precision... */
1025 if(stepsize<=0)
1026 stepsize = inputrec->em_stepsize/pnorm;
1029 * Double check the value of the derivative in the search direction.
1030 * If it is positive it must be due to the old information in the
1031 * CG formula, so just remove that and start over with beta=0.
1032 * This corresponds to a steepest descent step.
1034 if(gpa>0) {
1035 beta = 0;
1036 step--; /* Don't count this step since we are restarting */
1037 continue; /* Go back to the beginning of the big for-loop */
1040 /* Calculate minimum allowed stepsize, before the average (norm)
1041 * relative change in coordinate is smaller than precision
1043 minstep=0;
1044 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1045 for(m=0; m<DIM; m++) {
1046 tmp = fabs(s_min->s.x[i][m]);
1047 if(tmp < 1.0)
1048 tmp = 1.0;
1049 tmp = p[i][m]/tmp;
1050 minstep += tmp*tmp;
1053 /* Add up from all CPUs */
1054 if(PAR(cr))
1055 gmx_sumd(1,&minstep,cr);
1057 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1059 if(stepsize<minstep) {
1060 converged=TRUE;
1061 break;
1064 /* Write coordinates if necessary */
1065 do_x = do_per_step(step,inputrec->nstxout);
1066 do_f = do_per_step(step,inputrec->nstfout);
1068 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
1069 top_global,inputrec,step,
1070 s_min,state_global,f_global);
1072 /* Take a step downhill.
1073 * In theory, we should minimize the function along this direction.
1074 * That is quite possible, but it turns out to take 5-10 function evaluations
1075 * for each line. However, we dont really need to find the exact minimum -
1076 * it is much better to start a new CG step in a modified direction as soon
1077 * as we are close to it. This will save a lot of energy evaluations.
1079 * In practice, we just try to take a single step.
1080 * If it worked (i.e. lowered the energy), we increase the stepsize but
1081 * the continue straight to the next CG step without trying to find any minimum.
1082 * If it didn't work (higher energy), there must be a minimum somewhere between
1083 * the old position and the new one.
1085 * Due to the finite numerical accuracy, it turns out that it is a good idea
1086 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1087 * This leads to lower final energies in the tests I've done. / Erik
1089 s_a->epot = s_min->epot;
1090 a = 0.0;
1091 c = a + stepsize; /* reference position along line is zero */
1093 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1094 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1095 s_min,top,mdatoms,fr,vsite,constr,
1096 nrnb,wcycle);
1099 /* Take a trial step (new coords in s_c) */
1100 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
1101 constr,top,nrnb,wcycle,-1);
1103 neval++;
1104 /* Calculate energy for the trial step */
1105 evaluate_energy(fplog,bVerbose,cr,
1106 state_global,top_global,s_c,top,
1107 inputrec,nrnb,wcycle,gstat,
1108 vsite,constr,fcd,graph,mdatoms,fr,
1109 mu_tot,enerd,vir,pres,-1,FALSE);
1111 /* Calc derivative along line */
1112 p = s_c->s.cg_p;
1113 sf = s_c->f;
1114 gpc=0;
1115 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1116 for(m=0; m<DIM; m++)
1117 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1119 /* Sum the gradient along the line across CPUs */
1120 if (PAR(cr))
1121 gmx_sumd(1,&gpc,cr);
1123 /* This is the max amount of increase in energy we tolerate */
1124 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1126 /* Accept the step if the energy is lower, or if it is not significantly higher
1127 * and the line derivative is still negative.
1129 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1130 foundlower = TRUE;
1131 /* Great, we found a better energy. Increase step for next iteration
1132 * if we are still going down, decrease it otherwise
1134 if(gpc<0)
1135 stepsize *= 1.618034; /* The golden section */
1136 else
1137 stepsize *= 0.618034; /* 1/golden section */
1138 } else {
1139 /* New energy is the same or higher. We will have to do some work
1140 * to find a smaller value in the interval. Take smaller step next time!
1142 foundlower = FALSE;
1143 stepsize *= 0.618034;
1149 /* OK, if we didn't find a lower value we will have to locate one now - there must
1150 * be one in the interval [a=0,c].
1151 * The same thing is valid here, though: Don't spend dozens of iterations to find
1152 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1153 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1155 * I also have a safeguard for potentially really patological functions so we never
1156 * take more than 20 steps before we give up ...
1158 * If we already found a lower value we just skip this step and continue to the update.
1160 if (!foundlower) {
1161 nminstep=0;
1163 do {
1164 /* Select a new trial point.
1165 * If the derivatives at points a & c have different sign we interpolate to zero,
1166 * otherwise just do a bisection.
1168 if(gpa<0 && gpc>0)
1169 b = a + gpa*(a-c)/(gpc-gpa);
1170 else
1171 b = 0.5*(a+c);
1173 /* safeguard if interpolation close to machine accuracy causes errors:
1174 * never go outside the interval
1176 if(b<=a || b>=c)
1177 b = 0.5*(a+c);
1179 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1180 /* Reload the old state */
1181 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1182 s_min,top,mdatoms,fr,vsite,constr,
1183 nrnb,wcycle);
1186 /* Take a trial step to this new point - new coords in s_b */
1187 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
1188 constr,top,nrnb,wcycle,-1);
1190 neval++;
1191 /* Calculate energy for the trial step */
1192 evaluate_energy(fplog,bVerbose,cr,
1193 state_global,top_global,s_b,top,
1194 inputrec,nrnb,wcycle,gstat,
1195 vsite,constr,fcd,graph,mdatoms,fr,
1196 mu_tot,enerd,vir,pres,-1,FALSE);
1198 /* p does not change within a step, but since the domain decomposition
1199 * might change, we have to use cg_p of s_b here.
1201 p = s_b->s.cg_p;
1202 sf = s_b->f;
1203 gpb=0;
1204 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1205 for(m=0; m<DIM; m++)
1206 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1208 /* Sum the gradient along the line across CPUs */
1209 if (PAR(cr))
1210 gmx_sumd(1,&gpb,cr);
1212 if (debug)
1213 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1214 s_a->epot,s_b->epot,s_c->epot,gpb);
1216 epot_repl = s_b->epot;
1218 /* Keep one of the intervals based on the value of the derivative at the new point */
1219 if (gpb > 0) {
1220 /* Replace c endpoint with b */
1221 swap_em_state(s_b,s_c);
1222 c = b;
1223 gpc = gpb;
1224 } else {
1225 /* Replace a endpoint with b */
1226 swap_em_state(s_b,s_a);
1227 a = b;
1228 gpa = gpb;
1232 * Stop search as soon as we find a value smaller than the endpoints.
1233 * Never run more than 20 steps, no matter what.
1235 nminstep++;
1236 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1237 (nminstep < 20));
1239 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1240 nminstep >= 20) {
1241 /* OK. We couldn't find a significantly lower energy.
1242 * If beta==0 this was steepest descent, and then we give up.
1243 * If not, set beta=0 and restart with steepest descent before quitting.
1245 if (beta == 0.0) {
1246 /* Converged */
1247 converged = TRUE;
1248 break;
1249 } else {
1250 /* Reset memory before giving up */
1251 beta = 0.0;
1252 continue;
1256 /* Select min energy state of A & C, put the best in B.
1258 if (s_c->epot < s_a->epot) {
1259 if (debug)
1260 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1261 s_c->epot,s_a->epot);
1262 swap_em_state(s_b,s_c);
1263 gpb = gpc;
1264 b = c;
1265 } else {
1266 if (debug)
1267 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1268 s_a->epot,s_c->epot);
1269 swap_em_state(s_b,s_a);
1270 gpb = gpa;
1271 b = a;
1274 } else {
1275 if (debug)
1276 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1277 s_c->epot);
1278 swap_em_state(s_b,s_c);
1279 gpb = gpc;
1280 b = c;
1283 /* new search direction */
1284 /* beta = 0 means forget all memory and restart with steepest descents. */
1285 if (nstcg && ((step % nstcg)==0))
1286 beta = 0.0;
1287 else {
1288 /* s_min->fnorm cannot be zero, because then we would have converged
1289 * and broken out.
1292 /* Polak-Ribiere update.
1293 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1295 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1297 /* Limit beta to prevent oscillations */
1298 if (fabs(beta) > 5.0)
1299 beta = 0.0;
1302 /* update positions */
1303 swap_em_state(s_min,s_b);
1304 gpa = gpb;
1306 /* Print it if necessary */
1307 if (MASTER(cr)) {
1308 if(bVerbose)
1309 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1310 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1311 s_min->fmax,s_min->a_fmax+1);
1312 /* Store the new (lower) energies */
1313 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1314 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
1315 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1317 do_log = do_per_step(step,inputrec->nstlog);
1318 do_ene = do_per_step(step,inputrec->nstenergy);
1319 if(do_log)
1320 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1321 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1322 do_log ? fplog : NULL,step,step,eprNORMAL,
1323 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1326 /* Stop when the maximum force lies below tolerance.
1327 * If we have reached machine precision, converged is already set to true.
1329 converged = converged || (s_min->fmax < inputrec->em_tol);
1331 } /* End of the loop */
1333 if (converged)
1334 step--; /* we never took that last step in this case */
1336 if (s_min->fmax > inputrec->em_tol)
1338 if (MASTER(cr))
1340 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1341 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1343 converged = FALSE;
1346 if (MASTER(cr)) {
1347 /* If we printed energy and/or logfile last step (which was the last step)
1348 * we don't have to do it again, but otherwise print the final values.
1350 if(!do_log) {
1351 /* Write final value to log since we didn't do anything the last step */
1352 print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
1354 if (!do_ene || !do_log) {
1355 /* Write final energy file entries */
1356 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
1357 !do_log ? fplog : NULL,step,step,eprNORMAL,
1358 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1362 /* Print some stuff... */
1363 if (MASTER(cr))
1364 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1366 /* IMPORTANT!
1367 * For accurate normal mode calculation it is imperative that we
1368 * store the last conformation into the full precision binary trajectory.
1370 * However, we should only do it if we did NOT already write this step
1371 * above (which we did if do_x or do_f was true).
1373 do_x = !do_per_step(step,inputrec->nstxout);
1374 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1376 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1377 top_global,inputrec,step,
1378 s_min,state_global,f_global);
1380 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1382 if (MASTER(cr)) {
1383 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1384 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1385 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1386 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1388 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1391 finish_em(fplog,cr,outf,runtime,wcycle);
1393 /* To print the actual number of steps we needed somewhere */
1394 runtime->nsteps_done = step;
1396 return 0;
1397 } /* That's all folks */
1400 double do_lbfgs(FILE *fplog,t_commrec *cr,
1401 int nfile,const t_filenm fnm[],
1402 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
1403 int nstglobalcomm,
1404 gmx_vsite_t *vsite,gmx_constr_t constr,
1405 int stepout,
1406 t_inputrec *inputrec,
1407 gmx_mtop_t *top_global,t_fcdata *fcd,
1408 t_state *state,
1409 t_mdatoms *mdatoms,
1410 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1411 gmx_edsam_t ed,
1412 t_forcerec *fr,
1413 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1414 gmx_membed_t membed,
1415 real cpt_period,real max_hours,
1416 const char *deviceOptions,
1417 unsigned long Flags,
1418 gmx_runtime_t *runtime)
1420 static const char *LBFGS="Low-Memory BFGS Minimizer";
1421 em_state_t ems;
1422 gmx_localtop_t *top;
1423 gmx_enerdata_t *enerd;
1424 rvec *f;
1425 gmx_global_stat_t gstat;
1426 t_graph *graph;
1427 rvec *f_global;
1428 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1429 double stepsize,gpa,gpb,gpc,tmp,minstep;
1430 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1431 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1432 real a,b,c,maxdelta,delta;
1433 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1434 real dgdx,dgdg,sq,yr,beta;
1435 t_mdebin *mdebin;
1436 gmx_bool converged,first;
1437 rvec mu_tot;
1438 real fnorm,fmax;
1439 gmx_bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1440 tensor vir,pres;
1441 int start,end,number_steps;
1442 gmx_mdoutf_t *outf;
1443 int i,k,m,n,nfmax,gf,step;
1444 int mdof_flags;
1445 /* not used */
1446 real terminate;
1448 if (PAR(cr))
1449 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1451 n = 3*state->natoms;
1452 nmaxcorr = inputrec->nbfgscorr;
1454 /* Allocate memory */
1455 /* Use pointers to real so we dont have to loop over both atoms and
1456 * dimensions all the time...
1457 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1458 * that point to the same memory.
1460 snew(xa,n);
1461 snew(xb,n);
1462 snew(xc,n);
1463 snew(fa,n);
1464 snew(fb,n);
1465 snew(fc,n);
1466 snew(frozen,n);
1468 snew(p,n);
1469 snew(lastx,n);
1470 snew(lastf,n);
1471 snew(rho,nmaxcorr);
1472 snew(alpha,nmaxcorr);
1474 snew(dx,nmaxcorr);
1475 for(i=0;i<nmaxcorr;i++)
1476 snew(dx[i],n);
1478 snew(dg,nmaxcorr);
1479 for(i=0;i<nmaxcorr;i++)
1480 snew(dg[i],n);
1482 step = 0;
1483 neval = 0;
1485 /* Init em */
1486 init_em(fplog,LBFGS,cr,inputrec,
1487 state,top_global,&ems,&top,&f,&f_global,
1488 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1489 nfile,fnm,&outf,&mdebin);
1490 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1491 * so we free some memory again.
1493 sfree(ems.s.x);
1494 sfree(ems.f);
1496 xx = (real *)state->x;
1497 ff = (real *)f;
1499 start = mdatoms->start;
1500 end = mdatoms->homenr + start;
1502 /* Print to log file */
1503 print_em_start(fplog,cr,runtime,wcycle,LBFGS);
1505 do_log = do_ene = do_x = do_f = TRUE;
1507 /* Max number of steps */
1508 number_steps=inputrec->nsteps;
1510 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1511 gf = 0;
1512 for(i=start; i<end; i++) {
1513 if (mdatoms->cFREEZE)
1514 gf = mdatoms->cFREEZE[i];
1515 for(m=0; m<DIM; m++)
1516 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1518 if (MASTER(cr))
1519 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1520 if (fplog)
1521 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1523 if (vsite)
1524 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1525 top->idef.iparams,top->idef.il,
1526 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1528 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1529 /* do_force always puts the charge groups in the box and shifts again
1530 * We do not unshift, so molecules are always whole
1532 neval++;
1533 ems.s.x = state->x;
1534 ems.f = f;
1535 evaluate_energy(fplog,bVerbose,cr,
1536 state,top_global,&ems,top,
1537 inputrec,nrnb,wcycle,gstat,
1538 vsite,constr,fcd,graph,mdatoms,fr,
1539 mu_tot,enerd,vir,pres,-1,TRUE);
1540 where();
1542 if (MASTER(cr)) {
1543 /* Copy stuff to the energy bin for easy printing etc. */
1544 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1545 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1546 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1548 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1549 print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1550 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1552 where();
1554 /* This is the starting energy */
1555 Epot = enerd->term[F_EPOT];
1557 fnorm = ems.fnorm;
1558 fmax = ems.fmax;
1559 nfmax = ems.a_fmax;
1561 /* Set the initial step.
1562 * since it will be multiplied by the non-normalized search direction
1563 * vector (force vector the first time), we scale it by the
1564 * norm of the force.
1567 if (MASTER(cr)) {
1568 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1569 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1570 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1571 fprintf(stderr,"\n");
1572 /* and copy to the log file too... */
1573 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1574 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1575 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1576 fprintf(fplog,"\n");
1579 point=0;
1580 for(i=0;i<n;i++)
1581 if(!frozen[i])
1582 dx[point][i] = ff[i]; /* Initial search direction */
1583 else
1584 dx[point][i] = 0;
1586 stepsize = 1.0/fnorm;
1587 converged = FALSE;
1589 /* Start the loop over BFGS steps.
1590 * Each successful step is counted, and we continue until
1591 * we either converge or reach the max number of steps.
1594 ncorr=0;
1596 /* Set the gradient from the force */
1597 converged = FALSE;
1598 for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
1600 /* Write coordinates if necessary */
1601 do_x = do_per_step(step,inputrec->nstxout);
1602 do_f = do_per_step(step,inputrec->nstfout);
1604 mdof_flags = 0;
1605 if (do_x)
1607 mdof_flags |= MDOF_X;
1610 if (do_f)
1612 mdof_flags |= MDOF_F;
1615 write_traj(fplog,cr,outf,mdof_flags,
1616 top_global,step,(real)step,state,state,f,f,NULL,NULL);
1618 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1620 /* pointer to current direction - point=0 first time here */
1621 s=dx[point];
1623 /* calculate line gradient */
1624 for(gpa=0,i=0;i<n;i++)
1625 gpa-=s[i]*ff[i];
1627 /* Calculate minimum allowed stepsize, before the average (norm)
1628 * relative change in coordinate is smaller than precision
1630 for(minstep=0,i=0;i<n;i++) {
1631 tmp=fabs(xx[i]);
1632 if(tmp<1.0)
1633 tmp=1.0;
1634 tmp = s[i]/tmp;
1635 minstep += tmp*tmp;
1637 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1639 if(stepsize<minstep) {
1640 converged=TRUE;
1641 break;
1644 /* Store old forces and coordinates */
1645 for(i=0;i<n;i++) {
1646 lastx[i]=xx[i];
1647 lastf[i]=ff[i];
1649 Epot0=Epot;
1651 first=TRUE;
1653 for(i=0;i<n;i++)
1654 xa[i]=xx[i];
1656 /* Take a step downhill.
1657 * In theory, we should minimize the function along this direction.
1658 * That is quite possible, but it turns out to take 5-10 function evaluations
1659 * for each line. However, we dont really need to find the exact minimum -
1660 * it is much better to start a new BFGS step in a modified direction as soon
1661 * as we are close to it. This will save a lot of energy evaluations.
1663 * In practice, we just try to take a single step.
1664 * If it worked (i.e. lowered the energy), we increase the stepsize but
1665 * the continue straight to the next BFGS step without trying to find any minimum.
1666 * If it didn't work (higher energy), there must be a minimum somewhere between
1667 * the old position and the new one.
1669 * Due to the finite numerical accuracy, it turns out that it is a good idea
1670 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1671 * This leads to lower final energies in the tests I've done. / Erik
1673 foundlower=FALSE;
1674 EpotA = Epot0;
1675 a = 0.0;
1676 c = a + stepsize; /* reference position along line is zero */
1678 /* Check stepsize first. We do not allow displacements
1679 * larger than emstep.
1681 do {
1682 c = a + stepsize;
1683 maxdelta=0;
1684 for(i=0;i<n;i++) {
1685 delta=c*s[i];
1686 if(delta>maxdelta)
1687 maxdelta=delta;
1689 if(maxdelta>inputrec->em_stepsize)
1690 stepsize*=0.1;
1691 } while(maxdelta>inputrec->em_stepsize);
1693 /* Take a trial step */
1694 for (i=0; i<n; i++)
1695 xc[i] = lastx[i] + c*s[i];
1697 neval++;
1698 /* Calculate energy for the trial step */
1699 ems.s.x = (rvec *)xc;
1700 ems.f = (rvec *)fc;
1701 evaluate_energy(fplog,bVerbose,cr,
1702 state,top_global,&ems,top,
1703 inputrec,nrnb,wcycle,gstat,
1704 vsite,constr,fcd,graph,mdatoms,fr,
1705 mu_tot,enerd,vir,pres,step,FALSE);
1706 EpotC = ems.epot;
1708 /* Calc derivative along line */
1709 for(gpc=0,i=0; i<n; i++) {
1710 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1712 /* Sum the gradient along the line across CPUs */
1713 if (PAR(cr))
1714 gmx_sumd(1,&gpc,cr);
1716 /* This is the max amount of increase in energy we tolerate */
1717 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1719 /* Accept the step if the energy is lower, or if it is not significantly higher
1720 * and the line derivative is still negative.
1722 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1723 foundlower = TRUE;
1724 /* Great, we found a better energy. Increase step for next iteration
1725 * if we are still going down, decrease it otherwise
1727 if(gpc<0)
1728 stepsize *= 1.618034; /* The golden section */
1729 else
1730 stepsize *= 0.618034; /* 1/golden section */
1731 } else {
1732 /* New energy is the same or higher. We will have to do some work
1733 * to find a smaller value in the interval. Take smaller step next time!
1735 foundlower = FALSE;
1736 stepsize *= 0.618034;
1739 /* OK, if we didn't find a lower value we will have to locate one now - there must
1740 * be one in the interval [a=0,c].
1741 * The same thing is valid here, though: Don't spend dozens of iterations to find
1742 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1743 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1745 * I also have a safeguard for potentially really patological functions so we never
1746 * take more than 20 steps before we give up ...
1748 * If we already found a lower value we just skip this step and continue to the update.
1751 if(!foundlower) {
1753 nminstep=0;
1754 do {
1755 /* Select a new trial point.
1756 * If the derivatives at points a & c have different sign we interpolate to zero,
1757 * otherwise just do a bisection.
1760 if(gpa<0 && gpc>0)
1761 b = a + gpa*(a-c)/(gpc-gpa);
1762 else
1763 b = 0.5*(a+c);
1765 /* safeguard if interpolation close to machine accuracy causes errors:
1766 * never go outside the interval
1768 if(b<=a || b>=c)
1769 b = 0.5*(a+c);
1771 /* Take a trial step */
1772 for (i=0; i<n; i++)
1773 xb[i] = lastx[i] + b*s[i];
1775 neval++;
1776 /* Calculate energy for the trial step */
1777 ems.s.x = (rvec *)xb;
1778 ems.f = (rvec *)fb;
1779 evaluate_energy(fplog,bVerbose,cr,
1780 state,top_global,&ems,top,
1781 inputrec,nrnb,wcycle,gstat,
1782 vsite,constr,fcd,graph,mdatoms,fr,
1783 mu_tot,enerd,vir,pres,step,FALSE);
1784 EpotB = ems.epot;
1786 fnorm = ems.fnorm;
1788 for(gpb=0,i=0; i<n; i++)
1789 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1791 /* Sum the gradient along the line across CPUs */
1792 if (PAR(cr))
1793 gmx_sumd(1,&gpb,cr);
1795 /* Keep one of the intervals based on the value of the derivative at the new point */
1796 if(gpb>0) {
1797 /* Replace c endpoint with b */
1798 EpotC = EpotB;
1799 c = b;
1800 gpc = gpb;
1801 /* swap coord pointers b/c */
1802 xtmp = xb;
1803 ftmp = fb;
1804 xb = xc;
1805 fb = fc;
1806 xc = xtmp;
1807 fc = ftmp;
1808 } else {
1809 /* Replace a endpoint with b */
1810 EpotA = EpotB;
1811 a = b;
1812 gpa = gpb;
1813 /* swap coord pointers a/b */
1814 xtmp = xb;
1815 ftmp = fb;
1816 xb = xa;
1817 fb = fa;
1818 xa = xtmp;
1819 fa = ftmp;
1823 * Stop search as soon as we find a value smaller than the endpoints,
1824 * or if the tolerance is below machine precision.
1825 * Never run more than 20 steps, no matter what.
1827 nminstep++;
1828 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1830 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1831 /* OK. We couldn't find a significantly lower energy.
1832 * If ncorr==0 this was steepest descent, and then we give up.
1833 * If not, reset memory to restart as steepest descent before quitting.
1835 if(ncorr==0) {
1836 /* Converged */
1837 converged=TRUE;
1838 break;
1839 } else {
1840 /* Reset memory */
1841 ncorr=0;
1842 /* Search in gradient direction */
1843 for(i=0;i<n;i++)
1844 dx[point][i]=ff[i];
1845 /* Reset stepsize */
1846 stepsize = 1.0/fnorm;
1847 continue;
1851 /* Select min energy state of A & C, put the best in xx/ff/Epot
1853 if(EpotC<EpotA) {
1854 Epot = EpotC;
1855 /* Use state C */
1856 for(i=0;i<n;i++) {
1857 xx[i]=xc[i];
1858 ff[i]=fc[i];
1860 stepsize=c;
1861 } else {
1862 Epot = EpotA;
1863 /* Use state A */
1864 for(i=0;i<n;i++) {
1865 xx[i]=xa[i];
1866 ff[i]=fa[i];
1868 stepsize=a;
1871 } else {
1872 /* found lower */
1873 Epot = EpotC;
1874 /* Use state C */
1875 for(i=0;i<n;i++) {
1876 xx[i]=xc[i];
1877 ff[i]=fc[i];
1879 stepsize=c;
1882 /* Update the memory information, and calculate a new
1883 * approximation of the inverse hessian
1886 /* Have new data in Epot, xx, ff */
1887 if(ncorr<nmaxcorr)
1888 ncorr++;
1890 for(i=0;i<n;i++) {
1891 dg[point][i]=lastf[i]-ff[i];
1892 dx[point][i]*=stepsize;
1895 dgdg=0;
1896 dgdx=0;
1897 for(i=0;i<n;i++) {
1898 dgdg+=dg[point][i]*dg[point][i];
1899 dgdx+=dg[point][i]*dx[point][i];
1902 diag=dgdx/dgdg;
1904 rho[point]=1.0/dgdx;
1905 point++;
1907 if(point>=nmaxcorr)
1908 point=0;
1910 /* Update */
1911 for(i=0;i<n;i++)
1912 p[i]=ff[i];
1914 cp=point;
1916 /* Recursive update. First go back over the memory points */
1917 for(k=0;k<ncorr;k++) {
1918 cp--;
1919 if(cp<0)
1920 cp=ncorr-1;
1922 sq=0;
1923 for(i=0;i<n;i++)
1924 sq+=dx[cp][i]*p[i];
1926 alpha[cp]=rho[cp]*sq;
1928 for(i=0;i<n;i++)
1929 p[i] -= alpha[cp]*dg[cp][i];
1932 for(i=0;i<n;i++)
1933 p[i] *= diag;
1935 /* And then go forward again */
1936 for(k=0;k<ncorr;k++) {
1937 yr = 0;
1938 for(i=0;i<n;i++)
1939 yr += p[i]*dg[cp][i];
1941 beta = rho[cp]*yr;
1942 beta = alpha[cp]-beta;
1944 for(i=0;i<n;i++)
1945 p[i] += beta*dx[cp][i];
1947 cp++;
1948 if(cp>=ncorr)
1949 cp=0;
1952 for(i=0;i<n;i++)
1953 if(!frozen[i])
1954 dx[point][i] = p[i];
1955 else
1956 dx[point][i] = 0;
1958 stepsize=1.0;
1960 /* Test whether the convergence criterion is met */
1961 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1963 /* Print it if necessary */
1964 if (MASTER(cr)) {
1965 if(bVerbose)
1966 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1967 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1968 /* Store the new (lower) energies */
1969 upd_mdebin(mdebin,FALSE,FALSE,(double)step,
1970 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
1971 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1972 do_log = do_per_step(step,inputrec->nstlog);
1973 do_ene = do_per_step(step,inputrec->nstenergy);
1974 if(do_log)
1975 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
1976 print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
1977 do_log ? fplog : NULL,step,step,eprNORMAL,
1978 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1981 /* Stop when the maximum force lies below tolerance.
1982 * If we have reached machine precision, converged is already set to true.
1985 converged = converged || (fmax < inputrec->em_tol);
1987 } /* End of the loop */
1989 if(converged)
1990 step--; /* we never took that last step in this case */
1992 if(fmax>inputrec->em_tol)
1994 if (MASTER(cr))
1996 warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
1997 warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
1999 converged = FALSE;
2002 /* If we printed energy and/or logfile last step (which was the last step)
2003 * we don't have to do it again, but otherwise print the final values.
2005 if(!do_log) /* Write final value to log since we didn't do anythin last step */
2006 print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
2007 if(!do_ene || !do_log) /* Write final energy file entries */
2008 print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
2009 !do_log ? fplog : NULL,step,step,eprNORMAL,
2010 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2012 /* Print some stuff... */
2013 if (MASTER(cr))
2014 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2016 /* IMPORTANT!
2017 * For accurate normal mode calculation it is imperative that we
2018 * store the last conformation into the full precision binary trajectory.
2020 * However, we should only do it if we did NOT already write this step
2021 * above (which we did if do_x or do_f was true).
2023 do_x = !do_per_step(step,inputrec->nstxout);
2024 do_f = !do_per_step(step,inputrec->nstfout);
2025 write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
2026 top_global,inputrec,step,
2027 &ems,state,f);
2029 if (MASTER(cr)) {
2030 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
2031 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2032 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
2033 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
2035 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
2038 finish_em(fplog,cr,outf,runtime,wcycle);
2040 /* To print the actual number of steps we needed somewhere */
2041 runtime->nsteps_done = step;
2043 return 0;
2044 } /* That's all folks */
2047 double do_steep(FILE *fplog,t_commrec *cr,
2048 int nfile, const t_filenm fnm[],
2049 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2050 int nstglobalcomm,
2051 gmx_vsite_t *vsite,gmx_constr_t constr,
2052 int stepout,
2053 t_inputrec *inputrec,
2054 gmx_mtop_t *top_global,t_fcdata *fcd,
2055 t_state *state_global,
2056 t_mdatoms *mdatoms,
2057 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2058 gmx_edsam_t ed,
2059 t_forcerec *fr,
2060 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2061 gmx_membed_t membed,
2062 real cpt_period,real max_hours,
2063 const char *deviceOptions,
2064 unsigned long Flags,
2065 gmx_runtime_t *runtime)
2067 const char *SD="Steepest Descents";
2068 em_state_t *s_min,*s_try;
2069 rvec *f_global;
2070 gmx_localtop_t *top;
2071 gmx_enerdata_t *enerd;
2072 rvec *f;
2073 gmx_global_stat_t gstat;
2074 t_graph *graph;
2075 real stepsize,constepsize;
2076 real ustep,dvdlambda,fnormn;
2077 gmx_mdoutf_t *outf;
2078 t_mdebin *mdebin;
2079 gmx_bool bDone,bAbort,do_x,do_f;
2080 tensor vir,pres;
2081 rvec mu_tot;
2082 int nsteps;
2083 int count=0;
2084 int steps_accepted=0;
2085 /* not used */
2086 real terminate=0;
2088 s_min = init_em_state();
2089 s_try = init_em_state();
2091 /* Init em and store the local state in s_try */
2092 init_em(fplog,SD,cr,inputrec,
2093 state_global,top_global,s_try,&top,&f,&f_global,
2094 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2095 nfile,fnm,&outf,&mdebin);
2097 /* Print to log file */
2098 print_em_start(fplog,cr,runtime,wcycle,SD);
2100 /* Set variables for stepsize (in nm). This is the largest
2101 * step that we are going to make in any direction.
2103 ustep = inputrec->em_stepsize;
2104 stepsize = 0;
2106 /* Max number of steps */
2107 nsteps = inputrec->nsteps;
2109 if (MASTER(cr))
2110 /* Print to the screen */
2111 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2112 if (fplog)
2113 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2115 /**** HERE STARTS THE LOOP ****
2116 * count is the counter for the number of steps
2117 * bDone will be TRUE when the minimization has converged
2118 * bAbort will be TRUE when nsteps steps have been performed or when
2119 * the stepsize becomes smaller than is reasonable for machine precision
2121 count = 0;
2122 bDone = FALSE;
2123 bAbort = FALSE;
2124 while( !bDone && !bAbort ) {
2125 bAbort = (nsteps >= 0) && (count == nsteps);
2127 /* set new coordinates, except for first step */
2128 if (count > 0) {
2129 do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
2130 s_min,stepsize,s_min->f,s_try,
2131 constr,top,nrnb,wcycle,count);
2134 evaluate_energy(fplog,bVerbose,cr,
2135 state_global,top_global,s_try,top,
2136 inputrec,nrnb,wcycle,gstat,
2137 vsite,constr,fcd,graph,mdatoms,fr,
2138 mu_tot,enerd,vir,pres,count,count==0);
2140 if (MASTER(cr))
2141 print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
2143 if (count == 0)
2144 s_min->epot = s_try->epot + 1;
2146 /* Print it if necessary */
2147 if (MASTER(cr)) {
2148 if (bVerbose) {
2149 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2150 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2151 (s_try->epot < s_min->epot) ? '\n' : '\r');
2154 if (s_try->epot < s_min->epot) {
2155 /* Store the new (lower) energies */
2156 upd_mdebin(mdebin,FALSE,FALSE,(double)count,
2157 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
2158 s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
2159 print_ebin(outf->fp_ene,TRUE,
2160 do_per_step(steps_accepted,inputrec->nstdisreout),
2161 do_per_step(steps_accepted,inputrec->nstorireout),
2162 fplog,count,count,eprNORMAL,TRUE,
2163 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2164 fflush(fplog);
2168 /* Now if the new energy is smaller than the previous...
2169 * or if this is the first step!
2170 * or if we did random steps!
2173 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2174 steps_accepted++;
2176 /* Test whether the convergence criterion is met... */
2177 bDone = (s_try->fmax < inputrec->em_tol);
2179 /* Copy the arrays for force, positions and energy */
2180 /* The 'Min' array always holds the coords and forces of the minimal
2181 sampled energy */
2182 swap_em_state(s_min,s_try);
2183 if (count > 0)
2184 ustep *= 1.2;
2186 /* Write to trn, if necessary */
2187 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2188 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2189 write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
2190 top_global,inputrec,count,
2191 s_min,state_global,f_global);
2193 else {
2194 /* If energy is not smaller make the step smaller... */
2195 ustep *= 0.5;
2197 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2198 /* Reload the old state */
2199 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2200 s_min,top,mdatoms,fr,vsite,constr,
2201 nrnb,wcycle);
2205 /* Determine new step */
2206 stepsize = ustep/s_min->fmax;
2208 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2209 #ifdef GMX_DOUBLE
2210 if (count == nsteps || ustep < 1e-12)
2211 #else
2212 if (count == nsteps || ustep < 1e-6)
2213 #endif
2215 if (MASTER(cr))
2217 warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
2218 warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
2220 bAbort=TRUE;
2223 count++;
2224 } /* End of the loop */
2226 /* Print some shit... */
2227 if (MASTER(cr))
2228 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2229 write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2230 top_global,inputrec,count,
2231 s_min,state_global,f_global);
2233 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2235 if (MASTER(cr)) {
2236 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2237 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2238 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2239 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2242 finish_em(fplog,cr,outf,runtime,wcycle);
2244 /* To print the actual number of steps we needed somewhere */
2245 inputrec->nsteps=count;
2247 runtime->nsteps_done = count;
2249 return 0;
2250 } /* That's all folks */
2253 double do_nm(FILE *fplog,t_commrec *cr,
2254 int nfile,const t_filenm fnm[],
2255 const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
2256 int nstglobalcomm,
2257 gmx_vsite_t *vsite,gmx_constr_t constr,
2258 int stepout,
2259 t_inputrec *inputrec,
2260 gmx_mtop_t *top_global,t_fcdata *fcd,
2261 t_state *state_global,
2262 t_mdatoms *mdatoms,
2263 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2264 gmx_edsam_t ed,
2265 t_forcerec *fr,
2266 int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2267 gmx_membed_t membed,
2268 real cpt_period,real max_hours,
2269 const char *deviceOptions,
2270 unsigned long Flags,
2271 gmx_runtime_t *runtime)
2273 const char *NM = "Normal Mode Analysis";
2274 gmx_mdoutf_t *outf;
2275 int natoms,atom,d;
2276 int nnodes,node;
2277 rvec *f_global;
2278 gmx_localtop_t *top;
2279 gmx_enerdata_t *enerd;
2280 rvec *f;
2281 gmx_global_stat_t gstat;
2282 t_graph *graph;
2283 real t,t0,lambda,lam0;
2284 gmx_bool bNS;
2285 tensor vir,pres;
2286 rvec mu_tot;
2287 rvec *fneg,*dfdx;
2288 gmx_bool bSparse; /* use sparse matrix storage format */
2289 size_t sz;
2290 gmx_sparsematrix_t * sparse_matrix = NULL;
2291 real * full_matrix = NULL;
2292 em_state_t * state_work;
2294 /* added with respect to mdrun */
2295 int i,j,k,row,col;
2296 real der_range=10.0*sqrt(GMX_REAL_EPS);
2297 real x_min;
2298 real fnorm,fmax;
2300 if (constr != NULL)
2302 gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
2305 state_work = init_em_state();
2307 /* Init em and store the local state in state_minimum */
2308 init_em(fplog,NM,cr,inputrec,
2309 state_global,top_global,state_work,&top,
2310 &f,&f_global,
2311 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2312 nfile,fnm,&outf,NULL);
2314 natoms = top_global->natoms;
2315 snew(fneg,natoms);
2316 snew(dfdx,natoms);
2318 #ifndef GMX_DOUBLE
2319 if (MASTER(cr))
2321 fprintf(stderr,
2322 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2323 " which MIGHT not be accurate enough for normal mode analysis.\n"
2324 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2325 " are fairly modest even if you recompile in double precision.\n\n");
2327 #endif
2329 /* Check if we can/should use sparse storage format.
2331 * Sparse format is only useful when the Hessian itself is sparse, which it
2332 * will be when we use a cutoff.
2333 * For small systems (n<1000) it is easier to always use full matrix format, though.
2335 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2337 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2338 bSparse = FALSE;
2340 else if(top_global->natoms < 1000)
2342 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2343 bSparse = FALSE;
2345 else
2347 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2348 bSparse = TRUE;
2351 sz = DIM*top_global->natoms;
2353 fprintf(stderr,"Allocating Hessian memory...\n\n");
2355 if(bSparse)
2357 sparse_matrix=gmx_sparsematrix_init(sz);
2358 sparse_matrix->compressed_symmetric = TRUE;
2360 else
2362 snew(full_matrix,sz*sz);
2365 /* Initial values */
2366 t0 = inputrec->init_t;
2367 lam0 = inputrec->fepvals->init_lambda;
2368 t = t0;
2369 lambda = lam0;
2371 init_nrnb(nrnb);
2373 where();
2375 /* Write start time and temperature */
2376 print_em_start(fplog,cr,runtime,wcycle,NM);
2378 /* fudge nr of steps to nr of atoms */
2379 inputrec->nsteps = natoms*2;
2381 if (MASTER(cr))
2383 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
2384 *(top_global->name),(int)inputrec->nsteps);
2387 nnodes = cr->nnodes;
2389 /* Make evaluate_energy do a single node force calculation */
2390 cr->nnodes = 1;
2391 evaluate_energy(fplog,bVerbose,cr,
2392 state_global,top_global,state_work,top,
2393 inputrec,nrnb,wcycle,gstat,
2394 vsite,constr,fcd,graph,mdatoms,fr,
2395 mu_tot,enerd,vir,pres,-1,TRUE);
2396 cr->nnodes = nnodes;
2398 /* if forces are not small, warn user */
2399 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2401 if (MASTER(cr))
2403 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2404 if (state_work->fmax > 1.0e-3)
2406 fprintf(stderr,"Maximum force probably not small enough to");
2407 fprintf(stderr," ensure that you are in an \nenergy well. ");
2408 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2409 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2413 /***********************************************************
2415 * Loop over all pairs in matrix
2417 * do_force called twice. Once with positive and
2418 * once with negative displacement
2420 ************************************************************/
2422 /* Steps are divided one by one over the nodes */
2423 for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
2426 for (d=0; d<DIM; d++)
2428 x_min = state_work->s.x[atom][d];
2430 state_work->s.x[atom][d] = x_min - der_range;
2432 /* Make evaluate_energy do a single node force calculation */
2433 cr->nnodes = 1;
2434 evaluate_energy(fplog,bVerbose,cr,
2435 state_global,top_global,state_work,top,
2436 inputrec,nrnb,wcycle,gstat,
2437 vsite,constr,fcd,graph,mdatoms,fr,
2438 mu_tot,enerd,vir,pres,atom*2,FALSE);
2440 for(i=0; i<natoms; i++)
2442 copy_rvec(state_work->f[i], fneg[i]);
2445 state_work->s.x[atom][d] = x_min + der_range;
2447 evaluate_energy(fplog,bVerbose,cr,
2448 state_global,top_global,state_work,top,
2449 inputrec,nrnb,wcycle,gstat,
2450 vsite,constr,fcd,graph,mdatoms,fr,
2451 mu_tot,enerd,vir,pres,atom*2+1,FALSE);
2452 cr->nnodes = nnodes;
2454 /* x is restored to original */
2455 state_work->s.x[atom][d] = x_min;
2457 for(j=0; j<natoms; j++)
2459 for (k=0; (k<DIM); k++)
2461 dfdx[j][k] =
2462 -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
2466 if (!MASTER(cr))
2468 #ifdef GMX_MPI
2469 #ifdef GMX_DOUBLE
2470 #define mpi_type MPI_DOUBLE
2471 #else
2472 #define mpi_type MPI_FLOAT
2473 #endif
2474 MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
2475 cr->mpi_comm_mygroup);
2476 #endif
2478 else
2480 for(node=0; (node<nnodes && atom+node<natoms); node++)
2482 if (node > 0)
2484 #ifdef GMX_MPI
2485 MPI_Status stat;
2486 MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
2487 cr->mpi_comm_mygroup,&stat);
2488 #undef mpi_type
2489 #endif
2492 row = (atom + node)*DIM + d;
2494 for(j=0; j<natoms; j++)
2496 for(k=0; k<DIM; k++)
2498 col = j*DIM + k;
2500 if (bSparse)
2502 if (col >= row && dfdx[j][k] != 0.0)
2504 gmx_sparsematrix_increment_value(sparse_matrix,
2505 row,col,dfdx[j][k]);
2508 else
2510 full_matrix[row*sz+col] = dfdx[j][k];
2517 if (bVerbose && fplog)
2519 fflush(fplog);
2522 /* write progress */
2523 if (MASTER(cr) && bVerbose)
2525 fprintf(stderr,"\rFinished step %d out of %d",
2526 min(atom+nnodes,natoms),natoms);
2527 fflush(stderr);
2531 if (MASTER(cr))
2533 fprintf(stderr,"\n\nWriting Hessian...\n");
2534 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2537 finish_em(fplog,cr,outf,runtime,wcycle);
2539 runtime->nsteps_done = natoms*2;
2541 return 0;