Added support for gettimeofday() when available to get microsecond resolution
[gromacs.git] / src / mdlib / minimize.c
bloba05beb828ae688f46f895d9b53a4255e3d5cbcc9
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROwing Monsters And Cloning Shrimps
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <string.h>
41 #include <time.h>
42 #include <math.h>
43 #include "sysstuff.h"
44 #include "string2.h"
45 #include "network.h"
46 #include "confio.h"
47 #include "copyrite.h"
48 #include "smalloc.h"
49 #include "nrnb.h"
50 #include "main.h"
51 #include "pbc.h"
52 #include "force.h"
53 #include "macros.h"
54 #include "random.h"
55 #include "names.h"
56 #include "gmx_fatal.h"
57 #include "txtdump.h"
58 #include "typedefs.h"
59 #include "update.h"
60 #include "random.h"
61 #include "constr.h"
62 #include "vec.h"
63 #include "statutil.h"
64 #include "tgroup.h"
65 #include "mdebin.h"
66 #include "vsite.h"
67 #include "force.h"
68 #include "mdrun.h"
69 #include "domdec.h"
70 #include "partdec.h"
71 #include "trnio.h"
72 #include "sparsematrix.h"
73 #include "mtxio.h"
74 #include "gmx_random.h"
75 #include "physics.h"
76 #include "xvgr.h"
77 #include "mdatoms.h"
78 #include "gbutil.h"
79 #include "ns.h"
80 #include "gmx_wallcycle.h"
81 #include "mtop_util.h"
82 #include "gmxfio.h"
83 #include "pme.h"
84 #include "pdbio.h"
86 typedef struct {
87 t_state s;
88 rvec *f;
89 real epot;
90 real fnorm;
91 real fmax;
92 int a_fmax;
93 } em_state_t;
95 static em_state_t *init_em_state()
97 em_state_t *ems;
99 snew(ems,1);
101 return ems;
104 static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
106 fprintf(out,"%s:\n",minimizer);
107 fprintf(out," Tolerance (Fmax) = %12.5e\n",ftol);
108 fprintf(out," Number of steps = %12d\n",nsteps);
111 static void warn_step(FILE *fp,real ftol,bool bConstrain)
113 fprintf(fp,"\nStepsize too small, or no change in energy.\n"
114 "Converged to machine precision,\n"
115 "but not to the requested precision Fmax < %g\n",
116 ftol);
117 if (sizeof(real)<sizeof(double))
118 fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
120 if (bConstrain)
121 fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
122 "off constraints alltogether (set constraints = none in mdp file)\n");
127 static void print_converged(FILE *fp,const char *alg,real ftol,
128 gmx_step_t count,bool bDone,gmx_step_t nsteps,
129 real epot,real fmax, int nfmax, real fnorm)
131 char buf[22];
133 if (bDone)
134 fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
135 alg,ftol,gmx_step_str(count,buf));
136 else if(count<nsteps)
137 fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
138 "but did not reach the requested Fmax < %g.\n",
139 alg,gmx_step_str(count,buf),ftol);
140 else
141 fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
142 alg,ftol,gmx_step_str(count,buf));
144 #ifdef GMX_DOUBLE
145 fprintf(fp,"Potential Energy = %21.14e\n",epot);
146 fprintf(fp,"Maximum force = %21.14e on atom %d\n",fmax,nfmax+1);
147 fprintf(fp,"Norm of force = %21.14e\n",fnorm);
148 #else
149 fprintf(fp,"Potential Energy = %14.7e\n",epot);
150 fprintf(fp,"Maximum force = %14.7e on atom %d\n",fmax,nfmax+1);
151 fprintf(fp,"Norm of force = %14.7e\n",fnorm);
152 #endif
155 static void get_f_norm_max(t_commrec *cr,
156 t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
157 real *fnorm,real *fmax,int *a_fmax)
159 double fnorm2,*sum;
160 real fmax2,fmax2_0,fam;
161 int la_max,a_max,start,end,i,m,gf;
163 /* This routine finds the largest force and returns it.
164 * On parallel machines the global max is taken.
166 fnorm2 = 0;
167 fmax2 = 0;
168 la_max = -1;
169 gf = 0;
170 start = mdatoms->start;
171 end = mdatoms->homenr + start;
172 if (mdatoms->cFREEZE) {
173 for(i=start; i<end; i++) {
174 gf = mdatoms->cFREEZE[i];
175 fam = 0;
176 for(m=0; m<DIM; m++)
177 if (!opts->nFreeze[gf][m])
178 fam += sqr(f[i][m]);
179 fnorm2 += fam;
180 if (fam > fmax2) {
181 fmax2 = fam;
182 la_max = i;
185 } else {
186 for(i=start; i<end; i++) {
187 fam = norm2(f[i]);
188 fnorm2 += fam;
189 if (fam > fmax2) {
190 fmax2 = fam;
191 la_max = i;
196 if (la_max >= 0 && DOMAINDECOMP(cr)) {
197 a_max = cr->dd->gatindex[la_max];
198 } else {
199 a_max = la_max;
201 if (PAR(cr)) {
202 snew(sum,2*cr->nnodes+1);
203 sum[2*cr->nodeid] = fmax2;
204 sum[2*cr->nodeid+1] = a_max;
205 sum[2*cr->nnodes] = fnorm2;
206 gmx_sumd(2*cr->nnodes+1,sum,cr);
207 fnorm2 = sum[2*cr->nnodes];
208 /* Determine the global maximum */
209 for(i=0; i<cr->nnodes; i++) {
210 if (sum[2*i] > fmax2) {
211 fmax2 = sum[2*i];
212 a_max = (int)(sum[2*i+1] + 0.5);
215 sfree(sum);
218 if (fnorm)
219 *fnorm = sqrt(fnorm2);
220 if (fmax)
221 *fmax = sqrt(fmax2);
222 if (a_fmax)
223 *a_fmax = a_max;
226 static void get_state_f_norm_max(t_commrec *cr,
227 t_grpopts *opts,t_mdatoms *mdatoms,
228 em_state_t *ems)
230 get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
233 void init_em(FILE *fplog,const char *title,
234 t_commrec *cr,t_inputrec *ir,
235 t_state *state_global,gmx_mtop_t *top_global,
236 em_state_t *ems,gmx_localtop_t **top,
237 rvec *f,rvec **f_global,
238 t_nrnb *nrnb,rvec mu_tot,
239 t_forcerec *fr,gmx_enerdata_t **enerd,
240 t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
241 gmx_vsite_t *vsite,gmx_constr_t constr,
242 int nfile,t_filenm fnm[],int *fp_trn,int *fp_ene,
243 t_mdebin **mdebin)
245 int start,homenr,i;
246 real dvdlambda;
248 if (fplog)
249 fprintf(fplog,"Initiating %s\n",title);
251 state_global->ngtc = 0;
252 if (ir->eI == eiCG) {
253 state_global->flags |= (1<<estCGP);
254 snew(state_global->cg_p,state_global->nalloc);
257 /* Initiate some variables */
258 if (ir->efep != efepNO)
259 state_global->lambda = ir->init_lambda;
260 else
261 state_global->lambda = 0.0;
263 init_nrnb(nrnb);
265 if (DOMAINDECOMP(cr)) {
266 *top = dd_init_local_top(top_global);
268 dd_init_local_state(cr->dd,state_global,&ems->s);
270 /* Distribute the charge groups over the nodes from the master node */
271 dd_partition_system(fplog,ir->init_step,cr,TRUE,
272 state_global,top_global,ir,
273 &ems->s,&ems->f,mdatoms,*top,
274 fr,vsite,NULL,constr,
275 nrnb,NULL,FALSE);
276 dd_store_state(cr->dd,&ems->s);
278 if (ir->nstfout) {
279 snew(*f_global,top_global->natoms);
280 } else {
281 *f_global = NULL;
283 *graph = NULL;
284 } else {
285 /* Just copy the state */
286 ems->s = *state_global;
287 snew(ems->s.x,ems->s.nalloc);
288 snew(ems->f,ems->s.nalloc);
289 for(i=0; i<state_global->natoms; i++)
290 copy_rvec(state_global->x[i],ems->s.x[i]);
291 copy_mat(state_global->box,ems->s.box);
293 if (PAR(cr)) {
294 /* Initialize the particle decomposition and split the topology */
295 *top = split_system(fplog,top_global,ir,cr);
297 pd_cg_range(cr,&fr->cg0,&fr->hcg);
298 } else {
299 *top = gmx_mtop_generate_local_top(top_global,ir);
301 *f_global = f;
303 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
304 *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
305 } else {
306 *graph = NULL;
310 clear_rvec(mu_tot);
311 calc_shifts(ems->s.box,fr->shift_vec);
313 if (PARTDECOMP(cr)) {
314 pd_at_range(cr,&start,&homenr);
315 homenr -= start;
316 } else {
317 start = 0;
318 homenr = top_global->natoms;
320 atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
321 update_mdatoms(mdatoms,state_global->lambda);
323 if (vsite && !DOMAINDECOMP(cr)) {
324 set_vsite_top(vsite,*top,mdatoms,cr);
327 if (constr) {
328 if (ir->eConstrAlg == econtSHAKE &&
329 gmx_mtop_ftype_count(top_global,F_CONSTR) > 0) {
330 gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
331 econstr_names[econtSHAKE],econstr_names[econtLINCS]);
334 if (!DOMAINDECOMP(cr))
335 set_constraints(constr,*top,ir,mdatoms,NULL);
337 /* Constrain the starting coordinates */
338 dvdlambda=0;
339 constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
340 ir,cr,-1,0,mdatoms,
341 ems->s.x,ems->s.x,NULL,ems->s.box,ems->s.lambda,&dvdlambda,
342 NULL,NULL,nrnb,econqCoord);
345 if (PAR(cr)) {
346 *gstat = global_stat_init(ir);
350 if (MASTER(cr)) {
351 if (fp_trn)
352 *fp_trn = open_trn(ftp2fn(efTRN,nfile,fnm),"w");
353 if (fp_ene)
354 *fp_ene = open_enx(ftp2fn(efEDR,nfile,fnm),"w");
355 } else {
356 if (fp_trn)
357 *fp_trn = -1;
358 if (fp_ene)
359 *fp_ene = -1;
362 snew(*enerd,1);
363 init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
365 /* Init bin for energy stuff */
366 *mdebin = init_mdebin(*fp_ene,top_global,ir);
368 /* If doing gb, copy make local gb data (for dd, this is done in dd_partition_system) */
369 if(ir->implicit_solvent && !DOMAINDECOMP(cr))
371 make_local_gb(cr,fr->born,ir->gb_algorithm);
377 static void finish_em(FILE *fplog,t_commrec *cr,
378 int fp_traj,int fp_ene)
380 if (!(cr->duty & DUTY_PME)) {
381 /* Tell the PME only node to finish */
382 gmx_pme_finish(cr);
385 if (MASTER(cr)) {
386 close_trn(fp_traj);
387 close_enx(fp_ene);
391 static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
393 em_state_t tmp;
395 tmp = *ems1;
396 *ems1 = *ems2;
397 *ems2 = tmp;
400 static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
402 int i;
404 for(i=0; (i<state->natoms); i++)
405 copy_rvec(ems->s.x[i],state->x[i]);
406 if (f != NULL)
407 copy_rvec(ems->f[i],f[i]);
410 static void write_em_traj(FILE *fplog,t_commrec *cr,
411 int fp_trn,bool bX,bool bF,char *confout,
412 gmx_mtop_t *top_global,
413 t_inputrec *ir,gmx_step_t step,
414 em_state_t *state,
415 t_state *state_global,rvec *f_global)
417 if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
419 f_global = state->f;
420 copy_em_coords_back(state,state_global,bF ? f_global : NULL);
423 write_traj(fplog,cr,fp_trn,bX,FALSE,bF,0,FALSE,0,NULL,FALSE,
424 top_global,ir->eI,ir->simulation_part,step,(double)step,
425 &state->s,state_global,state->f,f_global,NULL,NULL);
427 if (confout != NULL && MASTER(cr))
429 write_sto_conf_mtop(confout,
430 *top_global->name,top_global,
431 state_global->x,NULL,ir->ePBC,state_global->box);
435 static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
436 em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
437 gmx_constr_t constr,gmx_localtop_t *top,
438 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
439 gmx_step_t count)
442 t_state *s1,*s2;
443 int start,end,gf,i,m;
444 rvec *x1,*x2;
445 real dvdlambda;
447 s1 = &ems1->s;
448 s2 = &ems2->s;
450 if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
451 gmx_incons("state mismatch in do_em_step");
453 s2->flags = s1->flags;
455 if (s2->nalloc != s1->nalloc) {
456 s2->nalloc = s1->nalloc;
457 srenew(s2->x,s1->nalloc);
458 srenew(ems2->f, s1->nalloc);
459 if (s2->flags & (1<<estCGP))
460 srenew(s2->cg_p, s1->nalloc);
463 s2->natoms = s1->natoms;
464 s2->lambda = s1->lambda;
465 copy_mat(s1->box,s2->box);
467 start = md->start;
468 end = md->start + md->homenr;
470 x1 = s1->x;
471 x2 = s2->x;
472 gf = 0;
473 for(i=start; i<end; i++) {
474 if (md->cFREEZE)
475 gf = md->cFREEZE[i];
476 for(m=0; m<DIM; m++) {
477 if (ir->opts.nFreeze[gf][m])
478 x2[i][m] = x1[i][m];
479 else
480 x2[i][m] = x1[i][m] + a*f[i][m];
484 if (s2->flags & (1<<estCGP)) {
485 /* Copy the CG p vector */
486 x1 = s1->cg_p;
487 x2 = s2->cg_p;
488 for(i=start; i<end; i++)
489 copy_rvec(x1[i],x2[i]);
492 if (DOMAINDECOMP(cr)) {
493 s2->ddp_count = s1->ddp_count;
494 if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
495 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
496 srenew(s2->cg_gl,s2->cg_gl_nalloc);
498 s2->ncg_gl = s1->ncg_gl;
499 for(i=0; i<s2->ncg_gl; i++)
500 s2->cg_gl[i] = s1->cg_gl[i];
501 s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
504 if (constr) {
505 wallcycle_start(wcycle,ewcCONSTR);
506 dvdlambda = 0;
507 constrain(NULL,TRUE,TRUE,constr,&top->idef,
508 ir,cr,count,0,md,
509 s1->x,s2->x,NULL,s2->box,s2->lambda,
510 &dvdlambda,NULL,NULL,nrnb,econqCoord);
511 wallcycle_stop(wcycle,ewcCONSTR);
515 static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
518 int start,end,i,m;
520 if (DOMAINDECOMP(cr)) {
521 start = 0;
522 end = cr->dd->nat_home;
523 } else if (PARTDECOMP(cr)) {
524 pd_at_range(cr,&start,&end);
525 } else {
526 start = 0;
527 end = n;
530 for(i=start; i<end; i++) {
531 for(m=0; m<DIM; m++) {
532 x2[i][m] = x1[i][m] + a*f[i][m];
537 static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
540 int start,end,i,m;
542 if (DOMAINDECOMP(cr)) {
543 start = 0;
544 end = cr->dd->nat_home;
545 } else if (PARTDECOMP(cr)) {
546 pd_at_range(cr,&start,&end);
547 } else {
548 start = 0;
549 end = n;
552 for(i=start; i<end; i++) {
553 for(m=0; m<DIM; m++) {
554 f[i][m] = (x1[i][m] - x2[i][m])*a;
559 static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
560 gmx_mtop_t *top_global,t_inputrec *ir,
561 em_state_t *ems,gmx_localtop_t *top,
562 t_mdatoms *mdatoms,t_forcerec *fr,
563 gmx_vsite_t *vsite,gmx_constr_t constr,
564 t_nrnb *nrnb,gmx_wallcycle_t wcycle)
566 /* Repartition the domain decomposition */
567 wallcycle_start(wcycle,ewcDOMDEC);
568 dd_partition_system(fplog,step,cr,FALSE,
569 NULL,top_global,ir,
570 &ems->s,&ems->f,
571 mdatoms,top,fr,vsite,NULL,constr,
572 nrnb,wcycle,FALSE);
573 dd_store_state(cr->dd,&ems->s);
574 wallcycle_stop(wcycle,ewcDOMDEC);
577 static void evaluate_energy(FILE *fplog,bool bVerbose,t_commrec *cr,
578 t_state *state_global,gmx_mtop_t *top_global,
579 em_state_t *ems,gmx_localtop_t *top,
580 t_inputrec *inputrec,
581 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
582 gmx_global_stat_t gstat,
583 gmx_vsite_t *vsite,gmx_constr_t constr,
584 t_fcdata *fcd,
585 t_graph *graph,t_mdatoms *mdatoms,
586 t_forcerec *fr,rvec mu_tot,
587 gmx_enerdata_t *enerd,tensor vir,tensor pres,
588 gmx_step_t count,bool bFirst)
590 real t;
591 bool bNS;
592 int nabnsb;
593 tensor force_vir,shake_vir,ekin;
594 real dvdl;
595 real terminate=0;
597 /* Set the time to the initial time, the time does not change during EM */
598 t = inputrec->init_t;
600 if (bFirst ||
601 (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
602 /* This the first state or an old state used before the last ns */
603 bNS = TRUE;
604 } else {
605 bNS = FALSE;
606 if (inputrec->nstlist > 0) {
607 bNS = TRUE;
608 } else if (inputrec->nstlist == -1) {
609 nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
610 if (PAR(cr))
611 gmx_sumi(1,&nabnsb,cr);
612 bNS = (nabnsb > 0);
616 if (vsite)
617 construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
618 top->idef.iparams,top->idef.il,
619 fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
621 if (DOMAINDECOMP(cr)) {
622 if (bNS) {
623 /* Repartition the domain decomposition */
624 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
625 ems,top,mdatoms,fr,vsite,constr,
626 nrnb,wcycle);
630 /* Calc force & energy on new trial position */
631 /* do_force always puts the charge groups in the box and shifts again
632 * We do not unshift, so molecules are always whole in congrad.c
634 do_force(fplog,cr,inputrec,
635 count,nrnb,wcycle,top,top_global,&top_global->groups,
636 ems->s.box,ems->s.x,&ems->s.hist,
637 ems->f,force_vir,mdatoms,enerd,fcd,
638 ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
639 GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
640 (bNS ? GMX_FORCE_NS : 0));
642 /* Clear the unused shake virial and pressure */
643 clear_mat(shake_vir);
644 clear_mat(pres);
646 /* Calculate long range corrections to pressure and energy */
647 calc_dispcorr(fplog,inputrec,fr,count,mdatoms->nr,ems->s.box,ems->s.lambda,
648 pres,force_vir,enerd);
650 /* Communicate stuff when parallel */
651 if (PAR(cr)) {
652 wallcycle_start(wcycle,ewcMoveE);
654 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
655 inputrec,NULL,FALSE,NULL,NULL,NULL,NULL,&terminate,
656 top_global,&ems->s);
658 wallcycle_stop(wcycle,ewcMoveE);
661 ems->epot = enerd->term[F_EPOT];
663 if (constr) {
664 /* Project out the constraint components of the force */
665 wallcycle_start(wcycle,ewcCONSTR);
666 dvdl = 0;
667 constrain(NULL,FALSE,FALSE,constr,&top->idef,
668 inputrec,cr,count,0,mdatoms,
669 ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
670 NULL,&shake_vir,nrnb,econqForceDispl);
671 if (fr->bSepDVDL && fplog)
672 fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
673 enerd->term[F_DHDL_CON] += dvdl;
674 m_add(force_vir,shake_vir,vir);
675 wallcycle_stop(wcycle,ewcCONSTR);
676 } else {
677 copy_mat(force_vir,vir);
680 clear_mat(ekin);
681 enerd->term[F_PRES] =
682 calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
683 (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
685 sum_dhdl(enerd,ems->s.lambda,inputrec);
687 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
690 static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
691 gmx_mtop_t *mtop,
692 em_state_t *s_min,em_state_t *s_b)
694 rvec *fm,*fb,*fmg;
695 t_block *cgs_gl;
696 int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
697 double partsum;
698 unsigned char *grpnrFREEZE;
700 if (debug)
701 fprintf(debug,"Doing reorder_partsum\n");
703 fm = s_min->f;
704 fb = s_b->f;
706 cgs_gl = dd_charge_groups_global(cr->dd);
707 index = cgs_gl->index;
709 /* Collect fm in a global vector fmg.
710 * This conflics with the spirit of domain decomposition,
711 * but to fully optimize this a much more complicated algorithm is required.
713 snew(fmg,mtop->natoms);
715 ncg = s_min->s.ncg_gl;
716 cg_gl = s_min->s.cg_gl;
717 i = 0;
718 for(c=0; c<ncg; c++) {
719 cg = cg_gl[c];
720 a0 = index[cg];
721 a1 = index[cg+1];
722 for(a=a0; a<a1; a++) {
723 copy_rvec(fm[i],fmg[a]);
724 i++;
727 gmx_sum(mtop->natoms*3,fmg[0],cr);
729 /* Now we will determine the part of the sum for the cgs in state s_b */
730 ncg = s_b->s.ncg_gl;
731 cg_gl = s_b->s.cg_gl;
732 partsum = 0;
733 i = 0;
734 gf = 0;
735 grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
736 for(c=0; c<ncg; c++) {
737 cg = cg_gl[c];
738 a0 = index[cg];
739 a1 = index[cg+1];
740 for(a=a0; a<a1; a++) {
741 if (mdatoms->cFREEZE && grpnrFREEZE) {
742 gf = grpnrFREEZE[i];
744 for(m=0; m<DIM; m++) {
745 if (!opts->nFreeze[gf][m]) {
746 partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
749 i++;
753 sfree(fmg);
755 return partsum;
758 static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
759 gmx_mtop_t *mtop,
760 em_state_t *s_min,em_state_t *s_b)
762 rvec *fm,*fb;
763 double sum;
764 int gf,i,m;
766 /* This is just the classical Polak-Ribiere calculation of beta;
767 * it looks a bit complicated since we take freeze groups into account,
768 * and might have to sum it in parallel runs.
771 if (!DOMAINDECOMP(cr) ||
772 (s_min->s.ddp_count == cr->dd->ddp_count &&
773 s_b->s.ddp_count == cr->dd->ddp_count)) {
774 fm = s_min->f;
775 fb = s_b->f;
776 sum = 0;
777 gf = 0;
778 /* This part of code can be incorrect with DD,
779 * since the atom ordering in s_b and s_min might differ.
781 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
782 if (mdatoms->cFREEZE)
783 gf = mdatoms->cFREEZE[i];
784 for(m=0; m<DIM; m++)
785 if (!opts->nFreeze[gf][m]) {
786 sum += (fb[i][m] - fm[i][m])*fb[i][m];
789 } else {
790 /* We need to reorder cgs while summing */
791 sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
793 if (PAR(cr))
794 gmx_sumd(1,&sum,cr);
796 return sum/sqr(s_min->fnorm);
799 double do_cg(FILE *fplog,t_commrec *cr,
800 int nfile,t_filenm fnm[],
801 bool bVerbose,bool bCompact,
802 gmx_vsite_t *vsite,gmx_constr_t constr,
803 int stepout,
804 t_inputrec *inputrec,
805 gmx_mtop_t *top_global,t_fcdata *fcd,
806 t_state *state_global,rvec f[],
807 t_mdatoms *mdatoms,
808 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
809 gmx_edsam_t ed,
810 t_forcerec *fr,
811 int repl_ex_nst,int repl_ex_seed,
812 real cpt_period,real max_hours,
813 unsigned long Flags,
814 gmx_runtime_t *runtime)
816 const char *CG="Polak-Ribiere Conjugate Gradients";
818 em_state_t *s_min,*s_a,*s_b,*s_c;
819 gmx_localtop_t *top;
820 gmx_enerdata_t *enerd;
821 gmx_global_stat_t gstat;
822 t_graph *graph;
823 rvec *f_global,*p,*sf,*sfm;
824 double gpa,gpb,gpc,tmp,sum[2],minstep;
825 real fnormn;
826 real stepsize;
827 real a,b,c,beta=0.0;
828 real epot_repl=0;
829 real pnorm;
830 t_mdebin *mdebin;
831 bool converged,foundlower;
832 rvec mu_tot;
833 bool do_log=FALSE,do_ene=FALSE,do_x,do_f;
834 tensor vir,pres;
835 int number_steps,neval=0,nstcg=inputrec->nstcgsteep;
836 int fp_trn,fp_ene;
837 int i,m,gf,step,nminstep;
838 real terminate=0;
840 step=0;
842 s_min = init_em_state();
843 s_a = init_em_state();
844 s_b = init_em_state();
845 s_c = init_em_state();
847 /* Init em and store the local state in s_min */
848 init_em(fplog,CG,cr,inputrec,
849 state_global,top_global,s_min,&top,f,&f_global,
850 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
851 nfile,fnm,&fp_trn,&fp_ene,&mdebin);
853 /* Print to log file */
854 print_date_and_time(fplog,cr->nodeid,
855 "Started Polak-Ribiere Conjugate Gradients",NULL);
856 wallcycle_start(wcycle,ewcRUN);
858 /* Max number of steps */
859 number_steps=inputrec->nsteps;
861 if (MASTER(cr))
862 sp_header(stderr,CG,inputrec->em_tol,number_steps);
863 if (fplog)
864 sp_header(fplog,CG,inputrec->em_tol,number_steps);
866 /* Call the force routine and some auxiliary (neighboursearching etc.) */
867 /* do_force always puts the charge groups in the box and shifts again
868 * We do not unshift, so molecules are always whole in congrad.c
870 evaluate_energy(fplog,bVerbose,cr,
871 state_global,top_global,s_min,top,
872 inputrec,nrnb,wcycle,gstat,
873 vsite,constr,fcd,graph,mdatoms,fr,
874 mu_tot,enerd,vir,pres,-1,TRUE);
875 where();
877 if (MASTER(cr)) {
878 /* Copy stuff to the energy bin for easy printing etc. */
879 upd_mdebin(mdebin,NULL,TRUE,(double)step,
880 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
881 NULL,NULL,vir,pres,NULL,mu_tot,constr);
883 print_ebin_header(fplog,step,step,s_min->s.lambda);
884 print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
885 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
887 where();
889 /* Estimate/guess the initial stepsize */
890 stepsize = inputrec->em_stepsize/s_min->fnorm;
892 if (MASTER(cr)) {
893 fprintf(stderr," F-max = %12.5e on atom %d\n",
894 s_min->fmax,s_min->a_fmax+1);
895 fprintf(stderr," F-Norm = %12.5e\n",
896 s_min->fnorm/sqrt(state_global->natoms));
897 fprintf(stderr,"\n");
898 /* and copy to the log file too... */
899 fprintf(fplog," F-max = %12.5e on atom %d\n",
900 s_min->fmax,s_min->a_fmax+1);
901 fprintf(fplog," F-Norm = %12.5e\n",
902 s_min->fnorm/sqrt(state_global->natoms));
903 fprintf(fplog,"\n");
905 /* Start the loop over CG steps.
906 * Each successful step is counted, and we continue until
907 * we either converge or reach the max number of steps.
909 for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
911 /* start taking steps in a new direction
912 * First time we enter the routine, beta=0, and the direction is
913 * simply the negative gradient.
916 /* Calculate the new direction in p, and the gradient in this direction, gpa */
917 p = s_min->s.cg_p;
918 sf = s_min->f;
919 gpa = 0;
920 gf = 0;
921 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
922 if (mdatoms->cFREEZE)
923 gf = mdatoms->cFREEZE[i];
924 for(m=0; m<DIM; m++) {
925 if (!inputrec->opts.nFreeze[gf][m]) {
926 p[i][m] = sf[i][m] + beta*p[i][m];
927 gpa -= p[i][m]*sf[i][m];
928 /* f is negative gradient, thus the sign */
929 } else {
930 p[i][m] = 0;
935 /* Sum the gradient along the line across CPUs */
936 if (PAR(cr))
937 gmx_sumd(1,&gpa,cr);
939 /* Calculate the norm of the search vector */
940 get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
942 /* Just in case stepsize reaches zero due to numerical precision... */
943 if(stepsize<=0)
944 stepsize = inputrec->em_stepsize/pnorm;
947 * Double check the value of the derivative in the search direction.
948 * If it is positive it must be due to the old information in the
949 * CG formula, so just remove that and start over with beta=0.
950 * This corresponds to a steepest descent step.
952 if(gpa>0) {
953 beta = 0;
954 step--; /* Don't count this step since we are restarting */
955 continue; /* Go back to the beginning of the big for-loop */
958 /* Calculate minimum allowed stepsize, before the average (norm)
959 * relative change in coordinate is smaller than precision
961 minstep=0;
962 for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
963 for(m=0; m<DIM; m++) {
964 tmp = fabs(s_min->s.x[i][m]);
965 if(tmp < 1.0)
966 tmp = 1.0;
967 tmp = p[i][m]/tmp;
968 minstep += tmp*tmp;
971 /* Add up from all CPUs */
972 if(PAR(cr))
973 gmx_sumd(1,&minstep,cr);
975 minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
977 if(stepsize<minstep) {
978 converged=TRUE;
979 break;
982 /* Write coordinates if necessary */
983 do_x = do_per_step(step,inputrec->nstxout);
984 do_f = do_per_step(step,inputrec->nstfout);
986 write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
987 top_global,inputrec,step,
988 s_min,state_global,f_global);
990 /* Take a step downhill.
991 * In theory, we should minimize the function along this direction.
992 * That is quite possible, but it turns out to take 5-10 function evaluations
993 * for each line. However, we dont really need to find the exact minimum -
994 * it is much better to start a new CG step in a modified direction as soon
995 * as we are close to it. This will save a lot of energy evaluations.
997 * In practice, we just try to take a single step.
998 * If it worked (i.e. lowered the energy), we increase the stepsize but
999 * the continue straight to the next CG step without trying to find any minimum.
1000 * If it didn't work (higher energy), there must be a minimum somewhere between
1001 * the old position and the new one.
1003 * Due to the finite numerical accuracy, it turns out that it is a good idea
1004 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1005 * This leads to lower final energies in the tests I've done. / Erik
1007 s_a->epot = s_min->epot;
1008 a = 0.0;
1009 c = a + stepsize; /* reference position along line is zero */
1011 if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
1012 em_dd_partition_system(fplog,step,cr,top_global,inputrec,
1013 s_min,top,mdatoms,fr,vsite,constr,
1014 nrnb,wcycle);
1017 /* Take a trial step (new coords in s_c) */
1018 do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
1019 constr,top,nrnb,wcycle,-1);
1021 neval++;
1022 /* Calculate energy for the trial step */
1023 evaluate_energy(fplog,bVerbose,cr,
1024 state_global,top_global,s_c,top,
1025 inputrec,nrnb,wcycle,gstat,
1026 vsite,constr,fcd,graph,mdatoms,fr,
1027 mu_tot,enerd,vir,pres,-1,FALSE);
1029 /* Calc derivative along line */
1030 p = s_c->s.cg_p;
1031 sf = s_c->f;
1032 gpc=0;
1033 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1034 for(m=0; m<DIM; m++)
1035 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1037 /* Sum the gradient along the line across CPUs */
1038 if (PAR(cr))
1039 gmx_sumd(1,&gpc,cr);
1041 /* This is the max amount of increase in energy we tolerate */
1042 tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1044 /* Accept the step if the energy is lower, or if it is not significantly higher
1045 * and the line derivative is still negative.
1047 if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
1048 foundlower = TRUE;
1049 /* Great, we found a better energy. Increase step for next iteration
1050 * if we are still going down, decrease it otherwise
1052 if(gpc<0)
1053 stepsize *= 1.618034; /* The golden section */
1054 else
1055 stepsize *= 0.618034; /* 1/golden section */
1056 } else {
1057 /* New energy is the same or higher. We will have to do some work
1058 * to find a smaller value in the interval. Take smaller step next time!
1060 foundlower = FALSE;
1061 stepsize *= 0.618034;
1067 /* OK, if we didn't find a lower value we will have to locate one now - there must
1068 * be one in the interval [a=0,c].
1069 * The same thing is valid here, though: Don't spend dozens of iterations to find
1070 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1071 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1073 * I also have a safeguard for potentially really patological functions so we never
1074 * take more than 20 steps before we give up ...
1076 * If we already found a lower value we just skip this step and continue to the update.
1078 if (!foundlower) {
1079 nminstep=0;
1081 do {
1082 /* Select a new trial point.
1083 * If the derivatives at points a & c have different sign we interpolate to zero,
1084 * otherwise just do a bisection.
1086 if(gpa<0 && gpc>0)
1087 b = a + gpa*(a-c)/(gpc-gpa);
1088 else
1089 b = 0.5*(a+c);
1091 /* safeguard if interpolation close to machine accuracy causes errors:
1092 * never go outside the interval
1094 if(b<=a || b>=c)
1095 b = 0.5*(a+c);
1097 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
1098 /* Reload the old state */
1099 em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
1100 s_min,top,mdatoms,fr,vsite,constr,
1101 nrnb,wcycle);
1104 /* Take a trial step to this new point - new coords in s_b */
1105 do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
1106 constr,top,nrnb,wcycle,-1);
1108 neval++;
1109 /* Calculate energy for the trial step */
1110 evaluate_energy(fplog,bVerbose,cr,
1111 state_global,top_global,s_b,top,
1112 inputrec,nrnb,wcycle,gstat,
1113 vsite,constr,fcd,graph,mdatoms,fr,
1114 mu_tot,enerd,vir,pres,-1,FALSE);
1116 /* p does not change within a step, but since the domain decomposition
1117 * might change, we have to use cg_p of s_b here.
1119 p = s_b->s.cg_p;
1120 sf = s_b->f;
1121 gpb=0;
1122 for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
1123 for(m=0; m<DIM; m++)
1124 gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1126 /* Sum the gradient along the line across CPUs */
1127 if (PAR(cr))
1128 gmx_sumd(1,&gpb,cr);
1130 if (debug)
1131 fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1132 s_a->epot,s_b->epot,s_c->epot,gpb);
1134 epot_repl = s_b->epot;
1136 /* Keep one of the intervals based on the value of the derivative at the new point */
1137 if (gpb > 0) {
1138 /* Replace c endpoint with b */
1139 swap_em_state(s_b,s_c);
1140 c = b;
1141 gpc = gpb;
1142 } else {
1143 /* Replace a endpoint with b */
1144 swap_em_state(s_b,s_a);
1145 a = b;
1146 gpa = gpb;
1150 * Stop search as soon as we find a value smaller than the endpoints.
1151 * Never run more than 20 steps, no matter what.
1153 nminstep++;
1154 } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1155 (nminstep < 20));
1157 if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1158 nminstep >= 20) {
1159 /* OK. We couldn't find a significantly lower energy.
1160 * If beta==0 this was steepest descent, and then we give up.
1161 * If not, set beta=0 and restart with steepest descent before quitting.
1163 if (beta == 0.0) {
1164 /* Converged */
1165 converged = TRUE;
1166 break;
1167 } else {
1168 /* Reset memory before giving up */
1169 beta = 0.0;
1170 continue;
1174 /* Select min energy state of A & C, put the best in B.
1176 if (s_c->epot < s_a->epot) {
1177 if (debug)
1178 fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1179 s_c->epot,s_a->epot);
1180 swap_em_state(s_b,s_c);
1181 gpb = gpc;
1182 b = c;
1183 } else {
1184 if (debug)
1185 fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1186 s_a->epot,s_c->epot);
1187 swap_em_state(s_b,s_a);
1188 gpb = gpa;
1189 b = a;
1192 } else {
1193 if (debug)
1194 fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
1195 s_c->epot);
1196 swap_em_state(s_b,s_c);
1197 gpb = gpc;
1198 b = c;
1201 /* new search direction */
1202 /* beta = 0 means forget all memory and restart with steepest descents. */
1203 if (nstcg && ((step % nstcg)==0))
1204 beta = 0.0;
1205 else {
1206 /* s_min->fnorm cannot be zero, because then we would have converged
1207 * and broken out.
1210 /* Polak-Ribiere update.
1211 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1213 beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
1215 /* Limit beta to prevent oscillations */
1216 if (fabs(beta) > 5.0)
1217 beta = 0.0;
1220 /* update positions */
1221 swap_em_state(s_min,s_b);
1222 gpa = gpb;
1224 /* Print it if necessary */
1225 if (MASTER(cr)) {
1226 if(bVerbose)
1227 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1228 step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
1229 s_min->fmax,s_min->a_fmax+1);
1230 /* Store the new (lower) energies */
1231 upd_mdebin(mdebin,NULL,TRUE,(double)step,
1232 mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
1233 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1234 do_log = do_per_step(step,inputrec->nstlog);
1235 do_ene = do_per_step(step,inputrec->nstenergy);
1236 if(do_log)
1237 print_ebin_header(fplog,step,step,s_min->s.lambda);
1238 print_ebin(fp_ene,do_ene,FALSE,FALSE,
1239 do_log ? fplog : NULL,step,step,eprNORMAL,
1240 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1243 /* Stop when the maximum force lies below tolerance.
1244 * If we have reached machine precision, converged is already set to true.
1246 converged = converged || (s_min->fmax < inputrec->em_tol);
1248 } /* End of the loop */
1250 if (converged)
1251 step--; /* we never took that last step in this case */
1253 if (s_min->fmax > inputrec->em_tol) {
1254 if (MASTER(cr)) {
1255 warn_step(stderr,inputrec->em_tol,FALSE);
1256 warn_step(fplog,inputrec->em_tol,FALSE);
1258 converged = FALSE;
1261 if (MASTER(cr)) {
1262 /* If we printed energy and/or logfile last step (which was the last step)
1263 * we don't have to do it again, but otherwise print the final values.
1265 if(!do_log) {
1266 /* Write final value to log since we didn't do anything the last step */
1267 print_ebin_header(fplog,step,step,s_min->s.lambda);
1269 if (!do_ene || !do_log) {
1270 /* Write final energy file entries */
1271 print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1272 !do_log ? fplog : NULL,step,step,eprNORMAL,
1273 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1277 /* Print some stuff... */
1278 if (MASTER(cr))
1279 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1281 /* IMPORTANT!
1282 * For accurate normal mode calculation it is imperative that we
1283 * store the last conformation into the full precision binary trajectory.
1285 * However, we should only do it if we did NOT already write this step
1286 * above (which we did if do_x or do_f was true).
1288 do_x = !do_per_step(step,inputrec->nstxout);
1289 do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
1291 write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1292 top_global,inputrec,step,
1293 s_min,state_global,f_global);
1295 fnormn = s_min->fnorm/sqrt(state_global->natoms);
1297 if (MASTER(cr)) {
1298 print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
1299 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1300 print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
1301 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
1303 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1306 finish_em(fplog,cr,fp_trn,fp_ene);
1308 /* To print the actual number of steps we needed somewhere */
1309 runtime->nsteps_done = step;
1311 return 0;
1312 } /* That's all folks */
1315 double do_lbfgs(FILE *fplog,t_commrec *cr,
1316 int nfile,t_filenm fnm[],
1317 bool bVerbose,bool bCompact,
1318 gmx_vsite_t *vsite,gmx_constr_t constr,
1319 int stepout,
1320 t_inputrec *inputrec,
1321 gmx_mtop_t *top_global,t_fcdata *fcd,
1322 t_state *state,rvec f[],
1323 t_mdatoms *mdatoms,
1324 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1325 gmx_edsam_t ed,
1326 t_forcerec *fr,
1327 int repl_ex_nst,int repl_ex_seed,
1328 real cpt_period,real max_hours,
1329 unsigned long Flags,
1330 gmx_runtime_t *runtime)
1332 static const char *LBFGS="Low-Memory BFGS Minimizer";
1333 em_state_t ems;
1334 gmx_localtop_t *top;
1335 gmx_enerdata_t *enerd;
1336 gmx_global_stat_t gstat;
1337 t_graph *graph;
1338 int ncorr,nmaxcorr,point,cp,neval,nminstep;
1339 double stepsize,gpa,gpb,gpc,tmp,minstep;
1340 real *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
1341 real *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
1342 real a,b,c,maxdelta,delta;
1343 real diag,Epot0,Epot,EpotA,EpotB,EpotC;
1344 real dgdx,dgdg,sq,yr,beta;
1345 t_mdebin *mdebin;
1346 bool converged,first;
1347 rvec mu_tot;
1348 real fnorm,fmax;
1349 bool do_log,do_ene,do_x,do_f,foundlower,*frozen;
1350 tensor vir,pres;
1351 int fp_trn,fp_ene,start,end,number_steps;
1352 int i,k,m,n,nfmax,gf,step;
1353 /* not used */
1354 real terminate;
1356 if (PAR(cr))
1357 gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
1359 n = 3*state->natoms;
1360 nmaxcorr = inputrec->nbfgscorr;
1362 /* Allocate memory */
1363 snew(f,state->natoms);
1364 /* Use pointers to real so we dont have to loop over both atoms and
1365 * dimensions all the time...
1366 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1367 * that point to the same memory.
1369 snew(xa,n);
1370 snew(xb,n);
1371 snew(xc,n);
1372 snew(fa,n);
1373 snew(fb,n);
1374 snew(fc,n);
1375 snew(frozen,n);
1377 xx = (real *)state->x;
1378 ff = (real *)f;
1380 printf("x0: %20.12g %20.12g %20.12g\n",xx[0],xx[1],xx[2]);
1382 snew(p,n);
1383 snew(lastx,n);
1384 snew(lastf,n);
1385 snew(rho,nmaxcorr);
1386 snew(alpha,nmaxcorr);
1388 snew(dx,nmaxcorr);
1389 for(i=0;i<nmaxcorr;i++)
1390 snew(dx[i],n);
1392 snew(dg,nmaxcorr);
1393 for(i=0;i<nmaxcorr;i++)
1394 snew(dg[i],n);
1396 step = 0;
1397 neval = 0;
1399 /* Init em */
1400 init_em(fplog,LBFGS,cr,inputrec,
1401 state,top_global,&ems,&top,f,&f,
1402 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1403 nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1404 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1405 * so we free some memory again.
1407 sfree(ems.s.x);
1408 sfree(ems.f);
1410 start = mdatoms->start;
1411 end = mdatoms->homenr + start;
1413 /* Print to log file */
1414 print_date_and_time(fplog,cr->nodeid,
1415 "Started Low-Memory BFGS Minimization",NULL);
1416 wallcycle_start(wcycle,ewcRUN);
1418 do_log = do_ene = do_x = do_f = TRUE;
1420 /* Max number of steps */
1421 number_steps=inputrec->nsteps;
1423 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1424 gf = 0;
1425 for(i=start; i<end; i++) {
1426 if (mdatoms->cFREEZE)
1427 gf = mdatoms->cFREEZE[i];
1428 for(m=0; m<DIM; m++)
1429 frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
1431 if (MASTER(cr))
1432 sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
1433 if (fplog)
1434 sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
1436 if (vsite)
1437 construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
1438 top->idef.iparams,top->idef.il,
1439 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
1441 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1442 /* do_force always puts the charge groups in the box and shifts again
1443 * We do not unshift, so molecules are always whole
1445 neval++;
1446 ems.s.x = state->x;
1447 ems.f = f;
1448 evaluate_energy(fplog,bVerbose,cr,
1449 state,top_global,&ems,top,
1450 inputrec,nrnb,wcycle,gstat,
1451 vsite,constr,fcd,graph,mdatoms,fr,
1452 mu_tot,enerd,vir,pres,-1,TRUE);
1453 where();
1455 if (MASTER(cr)) {
1456 /* Copy stuff to the energy bin for easy printing etc. */
1457 upd_mdebin(mdebin,NULL,TRUE,(double)step,
1458 mdatoms->tmass,enerd,state,state->box,
1459 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1461 print_ebin_header(fplog,step,step,state->lambda);
1462 print_ebin(fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
1463 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1465 where();
1467 /* This is the starting energy */
1468 Epot = enerd->term[F_EPOT];
1470 fnorm = ems.fnorm;
1471 fmax = ems.fmax;
1472 nfmax = ems.a_fmax;
1474 /* Set the initial step.
1475 * since it will be multiplied by the non-normalized search direction
1476 * vector (force vector the first time), we scale it by the
1477 * norm of the force.
1480 if (MASTER(cr)) {
1481 fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1482 fprintf(stderr," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1483 fprintf(stderr," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1484 fprintf(stderr,"\n");
1485 /* and copy to the log file too... */
1486 fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
1487 fprintf(fplog," F-max = %12.5e on atom %d\n",fmax,nfmax+1);
1488 fprintf(fplog," F-Norm = %12.5e\n",fnorm/sqrt(state->natoms));
1489 fprintf(fplog,"\n");
1492 point=0;
1493 for(i=0;i<n;i++)
1494 if(!frozen[i])
1495 dx[point][i] = ff[i]; /* Initial search direction */
1496 else
1497 dx[point][i] = 0;
1499 stepsize = 1.0/fnorm;
1500 converged = FALSE;
1502 /* Start the loop over BFGS steps.
1503 * Each successful step is counted, and we continue until
1504 * we either converge or reach the max number of steps.
1507 ncorr=0;
1509 /* Set the gradient from the force */
1510 for(step=0,converged=FALSE;( step<=number_steps || number_steps==0) && !converged;step++) {
1512 /* Write coordinates if necessary */
1513 do_x = do_per_step(step,inputrec->nstxout);
1514 do_f = do_per_step(step,inputrec->nstfout);
1516 write_traj(fplog,cr,fp_trn,do_x,FALSE,do_f,0,FALSE,0,NULL,FALSE,
1517 top_global,inputrec->eI,inputrec->simulation_part,
1518 step,(real)step,state,state,f,f,NULL,NULL);
1520 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1522 /* pointer to current direction - point=0 first time here */
1523 s=dx[point];
1525 /* calculate line gradient */
1526 for(gpa=0,i=0;i<n;i++)
1527 gpa-=s[i]*ff[i];
1529 /* Calculate minimum allowed stepsize, before the average (norm)
1530 * relative change in coordinate is smaller than precision
1532 for(minstep=0,i=0;i<n;i++) {
1533 tmp=fabs(xx[i]);
1534 if(tmp<1.0)
1535 tmp=1.0;
1536 tmp = s[i]/tmp;
1537 minstep += tmp*tmp;
1539 minstep = GMX_REAL_EPS/sqrt(minstep/n);
1541 if(stepsize<minstep) {
1542 converged=TRUE;
1543 break;
1546 /* Store old forces and coordinates */
1547 for(i=0;i<n;i++) {
1548 lastx[i]=xx[i];
1549 lastf[i]=ff[i];
1551 Epot0=Epot;
1553 first=TRUE;
1555 for(i=0;i<n;i++)
1556 xa[i]=xx[i];
1558 /* Take a step downhill.
1559 * In theory, we should minimize the function along this direction.
1560 * That is quite possible, but it turns out to take 5-10 function evaluations
1561 * for each line. However, we dont really need to find the exact minimum -
1562 * it is much better to start a new BFGS step in a modified direction as soon
1563 * as we are close to it. This will save a lot of energy evaluations.
1565 * In practice, we just try to take a single step.
1566 * If it worked (i.e. lowered the energy), we increase the stepsize but
1567 * the continue straight to the next BFGS step without trying to find any minimum.
1568 * If it didn't work (higher energy), there must be a minimum somewhere between
1569 * the old position and the new one.
1571 * Due to the finite numerical accuracy, it turns out that it is a good idea
1572 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1573 * This leads to lower final energies in the tests I've done. / Erik
1575 foundlower=FALSE;
1576 EpotA = Epot0;
1577 a = 0.0;
1578 c = a + stepsize; /* reference position along line is zero */
1580 /* Check stepsize first. We do not allow displacements
1581 * larger than emstep.
1583 do {
1584 c = a + stepsize;
1585 maxdelta=0;
1586 for(i=0;i<n;i++) {
1587 delta=c*s[i];
1588 if(delta>maxdelta)
1589 maxdelta=delta;
1591 if(maxdelta>inputrec->em_stepsize)
1592 stepsize*=0.1;
1593 } while(maxdelta>inputrec->em_stepsize);
1595 /* Take a trial step */
1596 for (i=0; i<n; i++)
1597 xc[i] = lastx[i] + c*s[i];
1599 neval++;
1600 /* Calculate energy for the trial step */
1601 ems.s.x = (rvec *)xc;
1602 ems.f = (rvec *)fc;
1603 evaluate_energy(fplog,bVerbose,cr,
1604 state,top_global,&ems,top,
1605 inputrec,nrnb,wcycle,gstat,
1606 vsite,constr,fcd,graph,mdatoms,fr,
1607 mu_tot,enerd,vir,pres,step,FALSE);
1608 EpotC = ems.epot;
1610 /* Calc derivative along line */
1611 for(gpc=0,i=0; i<n; i++) {
1612 gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
1614 /* Sum the gradient along the line across CPUs */
1615 if (PAR(cr))
1616 gmx_sumd(1,&gpc,cr);
1618 /* This is the max amount of increase in energy we tolerate */
1619 tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
1621 /* Accept the step if the energy is lower, or if it is not significantly higher
1622 * and the line derivative is still negative.
1624 if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
1625 foundlower = TRUE;
1626 /* Great, we found a better energy. Increase step for next iteration
1627 * if we are still going down, decrease it otherwise
1629 if(gpc<0)
1630 stepsize *= 1.618034; /* The golden section */
1631 else
1632 stepsize *= 0.618034; /* 1/golden section */
1633 } else {
1634 /* New energy is the same or higher. We will have to do some work
1635 * to find a smaller value in the interval. Take smaller step next time!
1637 foundlower = FALSE;
1638 stepsize *= 0.618034;
1641 /* OK, if we didn't find a lower value we will have to locate one now - there must
1642 * be one in the interval [a=0,c].
1643 * The same thing is valid here, though: Don't spend dozens of iterations to find
1644 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1645 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1647 * I also have a safeguard for potentially really patological functions so we never
1648 * take more than 20 steps before we give up ...
1650 * If we already found a lower value we just skip this step and continue to the update.
1653 if(!foundlower) {
1655 nminstep=0;
1656 do {
1657 /* Select a new trial point.
1658 * If the derivatives at points a & c have different sign we interpolate to zero,
1659 * otherwise just do a bisection.
1662 if(gpa<0 && gpc>0)
1663 b = a + gpa*(a-c)/(gpc-gpa);
1664 else
1665 b = 0.5*(a+c);
1667 /* safeguard if interpolation close to machine accuracy causes errors:
1668 * never go outside the interval
1670 if(b<=a || b>=c)
1671 b = 0.5*(a+c);
1673 /* Take a trial step */
1674 for (i=0; i<n; i++)
1675 xb[i] = lastx[i] + b*s[i];
1677 neval++;
1678 /* Calculate energy for the trial step */
1679 ems.s.x = (rvec *)xb;
1680 ems.f = (rvec *)fb;
1681 evaluate_energy(fplog,bVerbose,cr,
1682 state,top_global,&ems,top,
1683 inputrec,nrnb,wcycle,gstat,
1684 vsite,constr,fcd,graph,mdatoms,fr,
1685 mu_tot,enerd,vir,pres,step,FALSE);
1686 EpotB = ems.epot;
1688 fnorm = ems.fnorm;
1690 for(gpb=0,i=0; i<n; i++)
1691 gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
1693 /* Sum the gradient along the line across CPUs */
1694 if (PAR(cr))
1695 gmx_sumd(1,&gpb,cr);
1697 /* Keep one of the intervals based on the value of the derivative at the new point */
1698 if(gpb>0) {
1699 /* Replace c endpoint with b */
1700 EpotC = EpotB;
1701 c = b;
1702 gpc = gpb;
1703 /* swap coord pointers b/c */
1704 xtmp = xb;
1705 ftmp = fb;
1706 xb = xc;
1707 fb = fc;
1708 xc = xtmp;
1709 fc = ftmp;
1710 } else {
1711 /* Replace a endpoint with b */
1712 EpotA = EpotB;
1713 a = b;
1714 gpa = gpb;
1715 /* swap coord pointers a/b */
1716 xtmp = xb;
1717 ftmp = fb;
1718 xb = xa;
1719 fb = fa;
1720 xa = xtmp;
1721 fa = ftmp;
1725 * Stop search as soon as we find a value smaller than the endpoints,
1726 * or if the tolerance is below machine precision.
1727 * Never run more than 20 steps, no matter what.
1729 nminstep++;
1730 } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
1732 if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
1733 /* OK. We couldn't find a significantly lower energy.
1734 * If ncorr==0 this was steepest descent, and then we give up.
1735 * If not, reset memory to restart as steepest descent before quitting.
1737 if(ncorr==0) {
1738 /* Converged */
1739 converged=TRUE;
1740 break;
1741 } else {
1742 /* Reset memory */
1743 ncorr=0;
1744 /* Search in gradient direction */
1745 for(i=0;i<n;i++)
1746 dx[point][i]=ff[i];
1747 /* Reset stepsize */
1748 stepsize = 1.0/fnorm;
1749 continue;
1753 /* Select min energy state of A & C, put the best in xx/ff/Epot
1755 if(EpotC<EpotA) {
1756 Epot = EpotC;
1757 /* Use state C */
1758 for(i=0;i<n;i++) {
1759 xx[i]=xc[i];
1760 ff[i]=fc[i];
1762 stepsize=c;
1763 } else {
1764 Epot = EpotA;
1765 /* Use state A */
1766 for(i=0;i<n;i++) {
1767 xx[i]=xa[i];
1768 ff[i]=fa[i];
1770 stepsize=a;
1773 } else {
1774 /* found lower */
1775 Epot = EpotC;
1776 /* Use state C */
1777 for(i=0;i<n;i++) {
1778 xx[i]=xc[i];
1779 ff[i]=fc[i];
1781 stepsize=c;
1784 /* Update the memory information, and calculate a new
1785 * approximation of the inverse hessian
1788 /* Have new data in Epot, xx, ff */
1789 if(ncorr<nmaxcorr)
1790 ncorr++;
1792 for(i=0;i<n;i++) {
1793 dg[point][i]=lastf[i]-ff[i];
1794 dx[point][i]*=stepsize;
1797 dgdg=0;
1798 dgdx=0;
1799 for(i=0;i<n;i++) {
1800 dgdg+=dg[point][i]*dg[point][i];
1801 dgdx+=dg[point][i]*dx[point][i];
1804 diag=dgdx/dgdg;
1806 rho[point]=1.0/dgdx;
1807 point++;
1809 if(point>=nmaxcorr)
1810 point=0;
1812 /* Update */
1813 for(i=0;i<n;i++)
1814 p[i]=ff[i];
1816 cp=point;
1818 /* Recursive update. First go back over the memory points */
1819 for(k=0;k<ncorr;k++) {
1820 cp--;
1821 if(cp<0)
1822 cp=ncorr-1;
1824 sq=0;
1825 for(i=0;i<n;i++)
1826 sq+=dx[cp][i]*p[i];
1828 alpha[cp]=rho[cp]*sq;
1830 for(i=0;i<n;i++)
1831 p[i] -= alpha[cp]*dg[cp][i];
1834 for(i=0;i<n;i++)
1835 p[i] *= diag;
1837 /* And then go forward again */
1838 for(k=0;k<ncorr;k++) {
1839 yr = 0;
1840 for(i=0;i<n;i++)
1841 yr += p[i]*dg[cp][i];
1843 beta = rho[cp]*yr;
1844 beta = alpha[cp]-beta;
1846 for(i=0;i<n;i++)
1847 p[i] += beta*dx[cp][i];
1849 cp++;
1850 if(cp>=ncorr)
1851 cp=0;
1854 for(i=0;i<n;i++)
1855 if(!frozen[i])
1856 dx[point][i] = p[i];
1857 else
1858 dx[point][i] = 0;
1860 stepsize=1.0;
1862 /* Test whether the convergence criterion is met */
1863 get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
1865 /* Print it if necessary */
1866 if (MASTER(cr)) {
1867 if(bVerbose)
1868 fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1869 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
1870 /* Store the new (lower) energies */
1871 upd_mdebin(mdebin,NULL,TRUE,(double)step,
1872 mdatoms->tmass,enerd,state,state->box,
1873 NULL,NULL,vir,pres,NULL,mu_tot,constr);
1874 do_log = do_per_step(step,inputrec->nstlog);
1875 do_ene = do_per_step(step,inputrec->nstenergy);
1876 if(do_log)
1877 print_ebin_header(fplog,step,step,state->lambda);
1878 print_ebin(fp_ene,do_ene,FALSE,FALSE,
1879 do_log ? fplog : NULL,step,step,eprNORMAL,
1880 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1883 /* Stop when the maximum force lies below tolerance.
1884 * If we have reached machine precision, converged is already set to true.
1887 converged = converged || (fmax < inputrec->em_tol);
1889 } /* End of the loop */
1891 if(converged)
1892 step--; /* we never took that last step in this case */
1894 if(fmax>inputrec->em_tol) {
1895 if (MASTER(cr)) {
1896 warn_step(stderr,inputrec->em_tol,FALSE);
1897 warn_step(fplog,inputrec->em_tol,FALSE);
1899 converged = FALSE;
1902 /* If we printed energy and/or logfile last step (which was the last step)
1903 * we don't have to do it again, but otherwise print the final values.
1905 if(!do_log) /* Write final value to log since we didn't do anythin last step */
1906 print_ebin_header(fplog,step,step,state->lambda);
1907 if(!do_ene || !do_log) /* Write final energy file entries */
1908 print_ebin(fp_ene,!do_ene,FALSE,FALSE,
1909 !do_log ? fplog : NULL,step,step,eprNORMAL,
1910 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
1912 /* Print some stuff... */
1913 if (MASTER(cr))
1914 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
1916 /* IMPORTANT!
1917 * For accurate normal mode calculation it is imperative that we
1918 * store the last conformation into the full precision binary trajectory.
1920 * However, we should only do it if we did NOT already write this step
1921 * above (which we did if do_x or do_f was true).
1923 do_x = !do_per_step(step,inputrec->nstxout);
1924 do_f = !do_per_step(step,inputrec->nstfout);
1925 write_em_traj(fplog,cr,fp_trn,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
1926 top_global,inputrec,step,
1927 &ems,state,f);
1929 if (MASTER(cr)) {
1930 print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
1931 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1932 print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
1933 number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
1935 fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
1938 finish_em(fplog,cr,fp_trn,fp_ene);
1940 /* To print the actual number of steps we needed somewhere */
1941 runtime->nsteps_done = step;
1943 return 0;
1944 } /* That's all folks */
1947 double do_steep(FILE *fplog,t_commrec *cr,
1948 int nfile,t_filenm fnm[],
1949 bool bVerbose,bool bCompact,
1950 gmx_vsite_t *vsite,gmx_constr_t constr,
1951 int stepout,
1952 t_inputrec *inputrec,
1953 gmx_mtop_t *top_global,t_fcdata *fcd,
1954 t_state *state_global,rvec f[],
1955 t_mdatoms *mdatoms,
1956 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
1957 gmx_edsam_t ed,
1958 t_forcerec *fr,
1959 int repl_ex_nst,int repl_ex_seed,
1960 real cpt_period,real max_hours,
1961 unsigned long Flags,
1962 gmx_runtime_t *runtime)
1964 const char *SD="Steepest Descents";
1965 em_state_t *s_min,*s_try;
1966 rvec *f_global;
1967 gmx_localtop_t *top;
1968 gmx_enerdata_t *enerd;
1969 gmx_global_stat_t gstat;
1970 t_graph *graph;
1971 real stepsize,constepsize;
1972 real ustep,dvdlambda,fnormn;
1973 int fp_trn,fp_ene;
1974 t_mdebin *mdebin;
1975 bool bDone,bAbort,do_x,do_f;
1976 tensor vir,pres;
1977 rvec mu_tot;
1978 int nsteps;
1979 int count=0;
1980 int steps_accepted=0;
1981 /* not used */
1982 real terminate=0;
1984 s_min = init_em_state();
1985 s_try = init_em_state();
1987 /* Init em and store the local state in s_try */
1988 init_em(fplog,SD,cr,inputrec,
1989 state_global,top_global,s_try,&top,f,&f_global,
1990 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
1991 nfile,fnm,&fp_trn,&fp_ene,&mdebin);
1993 /* Print to log file */
1994 print_date_and_time(fplog,cr->nodeid,"Started Steepest Descents",NULL);
1995 wallcycle_start(wcycle,ewcRUN);
1997 /* Set variables for stepsize (in nm). This is the largest
1998 * step that we are going to make in any direction.
2000 ustep = inputrec->em_stepsize;
2001 stepsize = 0;
2003 /* Max number of steps */
2004 nsteps = inputrec->nsteps;
2006 if (MASTER(cr))
2007 /* Print to the screen */
2008 sp_header(stderr,SD,inputrec->em_tol,nsteps);
2009 if (fplog)
2010 sp_header(fplog,SD,inputrec->em_tol,nsteps);
2012 /**** HERE STARTS THE LOOP ****
2013 * count is the counter for the number of steps
2014 * bDone will be TRUE when the minimization has converged
2015 * bAbort will be TRUE when nsteps steps have been performed or when
2016 * the stepsize becomes smaller than is reasonable for machine precision
2018 count = 0;
2019 bDone = FALSE;
2020 bAbort = FALSE;
2021 while( !bDone && !bAbort ) {
2022 bAbort = (nsteps > 0) && (count==nsteps);
2024 /* set new coordinates, except for first step */
2025 if (count > 0) {
2026 do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
2027 constr,top,nrnb,wcycle,count);
2030 evaluate_energy(fplog,bVerbose,cr,
2031 state_global,top_global,s_try,top,
2032 inputrec,nrnb,wcycle,gstat,
2033 vsite,constr,fcd,graph,mdatoms,fr,
2034 mu_tot,enerd,vir,pres,count,count==0);
2036 if (MASTER(cr))
2037 print_ebin_header(fplog,count,count,s_try->s.lambda);
2039 if (count == 0)
2040 s_min->epot = s_try->epot + 1;
2042 /* Print it if necessary */
2043 if (MASTER(cr)) {
2044 if (bVerbose) {
2045 fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2046 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
2047 (s_try->epot < s_min->epot) ? '\n' : '\r');
2050 if (s_try->epot < s_min->epot) {
2051 /* Store the new (lower) energies */
2052 upd_mdebin(mdebin,NULL,TRUE,(double)count,
2053 mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
2054 NULL,NULL,vir,pres,NULL,mu_tot,constr);
2055 print_ebin(fp_ene,TRUE,
2056 do_per_step(steps_accepted,inputrec->nstdisreout),
2057 do_per_step(steps_accepted,inputrec->nstorireout),
2058 fplog,count,count,eprNORMAL,TRUE,
2059 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2060 fflush(fplog);
2064 /* Now if the new energy is smaller than the previous...
2065 * or if this is the first step!
2066 * or if we did random steps!
2069 if ( (count==0) || (s_try->epot < s_min->epot) ) {
2070 steps_accepted++;
2072 /* Test whether the convergence criterion is met... */
2073 bDone = (s_try->fmax < inputrec->em_tol);
2075 /* Copy the arrays for force, positions and energy */
2076 /* The 'Min' array always holds the coords and forces of the minimal
2077 sampled energy */
2078 swap_em_state(s_min,s_try);
2079 if (count > 0)
2080 ustep *= 1.2;
2082 /* Write to trn, if necessary */
2083 do_x = do_per_step(steps_accepted,inputrec->nstxout);
2084 do_f = do_per_step(steps_accepted,inputrec->nstfout);
2085 write_em_traj(fplog,cr,fp_trn,do_x,do_f,NULL,
2086 top_global,inputrec,count,
2087 s_min,state_global,f_global);
2089 else {
2090 /* If energy is not smaller make the step smaller... */
2091 ustep *= 0.5;
2093 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
2094 /* Reload the old state */
2095 em_dd_partition_system(fplog,count,cr,top_global,inputrec,
2096 s_min,top,mdatoms,fr,vsite,constr,
2097 nrnb,wcycle);
2101 /* Determine new step */
2102 stepsize = ustep/s_min->fmax;
2104 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2105 #ifdef GMX_DOUBLE
2106 if (ustep < 1e-12)
2107 #else
2108 if (ustep < 1e-6)
2109 #endif
2111 if (MASTER(cr)) {
2112 warn_step(stderr,inputrec->em_tol,constr!=NULL);
2113 warn_step(fplog,inputrec->em_tol,constr!=NULL);
2115 bAbort=TRUE;
2118 count++;
2119 } /* End of the loop */
2121 /* Print some shit... */
2122 if (MASTER(cr))
2123 fprintf(stderr,"\nwriting lowest energy coordinates.\n");
2124 write_em_traj(fplog,cr,fp_trn,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
2125 top_global,inputrec,count,
2126 s_min,state_global,f_global);
2128 fnormn = s_min->fnorm/sqrt(state_global->natoms);
2130 if (MASTER(cr)) {
2131 print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
2132 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2133 print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
2134 s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
2137 finish_em(fplog,cr,fp_trn,fp_ene);
2139 /* To print the actual number of steps we needed somewhere */
2140 inputrec->nsteps=count;
2142 runtime->nsteps_done = count;
2144 return 0;
2145 } /* That's all folks */
2148 double do_nm(FILE *fplog,t_commrec *cr,
2149 int nfile,t_filenm fnm[],
2150 bool bVerbose,bool bCompact,
2151 gmx_vsite_t *vsite,gmx_constr_t constr,
2152 int stepout,
2153 t_inputrec *inputrec,
2154 gmx_mtop_t *top_global,t_fcdata *fcd,
2155 t_state *state_global,rvec f[],
2156 t_mdatoms *mdatoms,
2157 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2158 gmx_edsam_t ed,
2159 t_forcerec *fr,
2160 int repl_ex_nst,int repl_ex_seed,
2161 real cpt_period,real max_hours,
2162 unsigned long Flags,
2163 gmx_runtime_t *runtime)
2165 t_mdebin *mdebin;
2166 const char *NM = "Normal Mode Analysis";
2167 int fp_ene,step,i;
2168 rvec *f_global;
2169 gmx_localtop_t *top;
2170 gmx_enerdata_t *enerd;
2171 gmx_global_stat_t gstat;
2172 t_graph *graph;
2173 real t,lambda,t0,lam0;
2174 bool bNS;
2175 tensor vir,pres;
2176 int nfmax,count;
2177 rvec mu_tot;
2178 rvec *fneg,*fpos;
2179 bool bSparse; /* use sparse matrix storage format */
2180 size_t sz;
2181 gmx_sparsematrix_t * sparse_matrix = NULL;
2182 real * full_matrix = NULL;
2183 em_state_t * state_work;
2185 /* added with respect to mdrun */
2186 int idum,jdum,kdum,row,col;
2187 real der_range=10.0*sqrt(GMX_REAL_EPS);
2188 real fnorm,fmax;
2189 real dfdx;
2191 state_work = init_em_state();
2193 /* Init em and store the local state in state_minimum */
2194 init_em(fplog,NM,cr,inputrec,
2195 state_global,top_global,state_work,&top,
2196 f,&f_global,
2197 nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
2198 nfile,fnm,NULL,&fp_ene,&mdebin);
2200 snew(fneg,top_global->natoms);
2201 snew(fpos,top_global->natoms);
2203 #ifndef GMX_DOUBLE
2204 fprintf(stderr,
2205 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2206 " which MIGHT not be accurate enough for normal mode analysis.\n"
2207 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2208 " are fairly modest even if you recompile in double precision.\n\n");
2209 #endif
2211 /* Check if we can/should use sparse storage format.
2213 * Sparse format is only useful when the Hessian itself is sparse, which it
2214 * will be when we use a cutoff.
2215 * For small systems (n<1000) it is easier to always use full matrix format, though.
2217 if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
2219 fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2220 bSparse = FALSE;
2222 else if(top_global->natoms < 1000)
2224 fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
2225 bSparse = FALSE;
2227 else
2229 fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
2230 bSparse = TRUE;
2233 sz = DIM*top_global->natoms;
2235 fprintf(stderr,"Allocating Hessian memory...\n\n");
2237 if(bSparse)
2239 sparse_matrix=gmx_sparsematrix_init(sz);
2240 sparse_matrix->compressed_symmetric = TRUE;
2242 else
2244 snew(full_matrix,sz*sz);
2247 /* Initial values */
2248 t0 = inputrec->init_t;
2249 lam0 = inputrec->init_lambda;
2250 t = t0;
2251 lambda = lam0;
2253 init_nrnb(nrnb);
2255 where();
2257 /* Write start time and temperature */
2258 print_date_and_time(fplog,cr->nodeid,"Started nmrun",NULL);
2259 wallcycle_start(wcycle,ewcRUN);
2260 if (MASTER(cr))
2262 fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",*(top_global->name),
2263 top_global->natoms);
2266 count = 0;
2267 evaluate_energy(fplog,bVerbose,cr,
2268 state_global,top_global,state_work,top,
2269 inputrec,nrnb,wcycle,gstat,
2270 vsite,constr,fcd,graph,mdatoms,fr,
2271 mu_tot,enerd,vir,pres,count,count==0);
2272 count++;
2274 /* if forces are not small, warn user */
2275 get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
2277 fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
2278 if (state_work->fmax > 1.0e-3)
2280 fprintf(stderr,"Maximum force probably not small enough to");
2281 fprintf(stderr," ensure that you are in an \nenergy well. ");
2282 fprintf(stderr,"Be aware that negative eigenvalues may occur");
2283 fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
2286 /***********************************************************
2288 * Loop over all pairs in matrix
2290 * do_force called twice. Once with positive and
2291 * once with negative displacement
2293 ************************************************************/
2295 /* fudge nr of steps to nr of atoms */
2297 inputrec->nsteps = top_global->natoms;
2299 for (step=0; (step<inputrec->nsteps); step++)
2302 for (idum=0; (idum<DIM); idum++)
2304 row = DIM*step+idum;
2306 state_work->s.x[step][idum] -= der_range;
2308 evaluate_energy(fplog,bVerbose,cr,
2309 state_global,top_global,state_work,top,
2310 inputrec,nrnb,wcycle,gstat,
2311 vsite,constr,fcd,graph,mdatoms,fr,
2312 mu_tot,enerd,vir,pres,count,count==0);
2313 count++;
2315 for ( i=0 ; i < top_global->natoms ; i++ )
2317 copy_rvec ( state_work->f[i] , fneg[i] );
2320 state_work->s.x[step][idum] += 2*der_range;
2322 evaluate_energy(fplog,bVerbose,cr,
2323 state_global,top_global,state_work,top,
2324 inputrec,nrnb,wcycle,gstat,
2325 vsite,constr,fcd,graph,mdatoms,fr,
2326 mu_tot,enerd,vir,pres,count,count==0);
2327 count++;
2329 for ( i=0 ; i < top_global->natoms ; i++ )
2331 copy_rvec ( state_work->f[i] , fpos[i] );
2334 for (jdum=0; (jdum<top_global->natoms); jdum++)
2336 for (kdum=0; (kdum<DIM); kdum++)
2338 dfdx=-(fpos[jdum][kdum]-fneg[jdum][kdum])/(2*der_range);
2339 col = DIM*jdum+kdum;
2341 if(bSparse)
2343 if(col>=row && dfdx!=0.0)
2344 gmx_sparsematrix_increment_value(sparse_matrix,row,col,dfdx);
2346 else
2348 full_matrix[row*sz+col] = dfdx;
2353 /* x is restored to original */
2354 state_work->s.x[step][idum] -= der_range;
2356 if (bVerbose && fplog)
2357 fflush(fplog);
2359 /* write progress */
2360 if (MASTER(cr) && bVerbose)
2362 fprintf(stderr,"\rFinished step %d out of %d",
2363 step+1,top_global->natoms);
2364 fflush(stderr);
2367 t=t0+step*inputrec->delta_t;
2368 lambda=lam0+step*inputrec->delta_lambda;
2370 if (MASTER(cr))
2372 print_ebin(-1,FALSE,FALSE,FALSE,fplog,step,t,eprAVER,
2373 FALSE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
2376 fprintf(stderr,"\n\nWriting Hessian...\n");
2377 gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
2379 runtime->nsteps_done = step;
2381 return 0;
2385 static void global_max(t_commrec *cr,int *n)
2387 int *sum,i;
2389 snew(sum,cr->nnodes);
2390 sum[cr->nodeid] = *n;
2391 gmx_sumi(cr->nnodes,sum,cr);
2392 for(i=0; i<cr->nnodes; i++)
2393 *n = max(*n,sum[i]);
2395 sfree(sum);
2398 static void realloc_bins(double **bin,int *nbin,int nbin_new)
2400 int i;
2402 if (nbin_new != *nbin) {
2403 srenew(*bin,nbin_new);
2404 for(i=*nbin; i<nbin_new; i++)
2405 (*bin)[i] = 0;
2406 *nbin = nbin_new;
2410 double do_tpi(FILE *fplog,t_commrec *cr,
2411 int nfile,t_filenm fnm[],
2412 bool bVerbose,bool bCompact,
2413 gmx_vsite_t *vsite,gmx_constr_t constr,
2414 int stepout,
2415 t_inputrec *inputrec,
2416 gmx_mtop_t *top_global,t_fcdata *fcd,
2417 t_state *state,rvec f[],
2418 t_mdatoms *mdatoms,
2419 t_nrnb *nrnb,gmx_wallcycle_t wcycle,
2420 gmx_edsam_t ed,
2421 t_forcerec *fr,
2422 int repl_ex_nst,int repl_ex_seed,
2423 real cpt_period,real max_hours,
2424 unsigned long Flags,
2425 gmx_runtime_t *runtime)
2427 const char *TPI="Test Particle Insertion";
2428 gmx_localtop_t *top;
2429 gmx_groups_t *groups;
2430 gmx_enerdata_t *enerd;
2431 real lambda,t,temp,beta,drmax,epot;
2432 double embU,sum_embU,*sum_UgembU,V,V_all,VembU_all;
2433 int status;
2434 t_trxframe rerun_fr;
2435 bool bDispCorr,bCharge,bRFExcl,bNotLastFrame,bStateChanged,bNS,bOurStep;
2436 tensor force_vir,shake_vir,vir,pres;
2437 int cg_tp,a_tp0,a_tp1,ngid,gid_tp,nener,e;
2438 rvec *x_mol;
2439 rvec mu_tot,x_init,dx,x_tp;
2440 int nnodes,frame,nsteps,step;
2441 int i,start,end;
2442 gmx_rng_t tpi_rand;
2443 FILE *fp_tpi=NULL;
2444 char *ptr,*dump_pdb,**leg,str[STRLEN],str2[STRLEN];
2445 double dbl,dump_ener;
2446 bool bCavity;
2447 int nat_cavity=0,d;
2448 real *mass_cavity=NULL,mass_tot;
2449 int nbin;
2450 double invbinw,*bin,refvolshift,logV,bUlogV;
2451 const char *tpid_leg[2]={"direct","reweighted"};
2453 /* Since numerical problems can lead to extreme negative energies
2454 * when atoms overlap, we need to set a lower limit for beta*U.
2456 real bU_neg_limit = -50;
2458 /* Since there is no upper limit to the insertion energies,
2459 * we need to set an upper limit for the distribution output.
2461 real bU_bin_limit = 50;
2462 real bU_logV_bin_limit = bU_bin_limit + 10;
2464 nnodes = cr->nnodes;
2466 top = gmx_mtop_generate_local_top(top_global,inputrec);
2468 groups = &top_global->groups;
2470 bCavity = (inputrec->eI == eiTPIC);
2471 if (bCavity) {
2472 ptr = getenv("GMX_TPIC_MASSES");
2473 if (ptr == NULL) {
2474 nat_cavity = 1;
2475 } else {
2476 /* Read (multiple) masses from env var GMX_TPIC_MASSES,
2477 * The center of mass of the last atoms is then used for TPIC.
2479 nat_cavity = 0;
2480 while (sscanf(ptr,"%lf%n",&dbl,&i) > 0) {
2481 srenew(mass_cavity,nat_cavity+1);
2482 mass_cavity[nat_cavity] = dbl;
2483 fprintf(fplog,"mass[%d] = %f\n",
2484 nat_cavity+1,mass_cavity[nat_cavity]);
2485 nat_cavity++;
2486 ptr += i;
2488 if (nat_cavity == 0)
2489 gmx_fatal(FARGS,"Found %d masses in GMX_TPIC_MASSES",nat_cavity);
2494 init_em(fplog,TPI,inputrec,&lambda,nrnb,mu_tot,
2495 state->box,fr,mdatoms,top,cr,nfile,fnm,NULL,NULL);*/
2496 /* We never need full pbc for TPI */
2497 fr->ePBC = epbcXYZ;
2498 /* Determine the temperature for the Boltzmann weighting */
2499 temp = inputrec->opts.ref_t[0];
2500 if (fplog) {
2501 for(i=1; (i<inputrec->opts.ngtc); i++) {
2502 if (inputrec->opts.ref_t[i] != temp) {
2503 fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2504 fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
2507 fprintf(fplog,
2508 "\n The temperature for test particle insertion is %.3f K\n\n",
2509 temp);
2511 beta = 1.0/(BOLTZ*temp);
2513 /* Number of insertions per frame */
2514 nsteps = inputrec->nsteps;
2516 /* Use the same neighborlist with more insertions points
2517 * in a sphere of radius drmax around the initial point
2519 /* This should be a proper mdp parameter */
2520 drmax = inputrec->rtpi;
2522 /* An environment variable can be set to dump all configurations
2523 * to pdb with an insertion energy <= this value.
2525 dump_pdb = getenv("GMX_TPI_DUMP");
2526 dump_ener = 0;
2527 if (dump_pdb)
2528 sscanf(dump_pdb,"%lf",&dump_ener);
2530 atoms2md(top_global,inputrec,0,NULL,0,top_global->natoms,mdatoms);
2531 update_mdatoms(mdatoms,inputrec->init_lambda);
2533 snew(enerd,1);
2534 init_enerdata(groups->grps[egcENER].nr,inputrec->n_flambda,enerd);
2536 /* Print to log file */
2537 runtime_start(runtime);
2538 print_date_and_time(fplog,cr->nodeid,
2539 "Started Test Particle Insertion",runtime);
2540 wallcycle_start(wcycle,ewcRUN);
2542 /* The last charge group is the group to be inserted */
2543 cg_tp = top->cgs.nr - 1;
2544 a_tp0 = top->cgs.index[cg_tp];
2545 a_tp1 = top->cgs.index[cg_tp+1];
2546 if (debug)
2547 fprintf(debug,"TPI cg %d, atoms %d-%d\n",cg_tp,a_tp0,a_tp1);
2548 if (a_tp1 - a_tp0 > 1 &&
2549 (inputrec->rlist < inputrec->rcoulomb ||
2550 inputrec->rlist < inputrec->rvdw))
2551 gmx_fatal(FARGS,"Can not do TPI for multi-atom molecule with a twin-range cut-off");
2552 snew(x_mol,a_tp1-a_tp0);
2554 bDispCorr = (inputrec->eDispCorr != edispcNO);
2555 bCharge = FALSE;
2556 for(i=a_tp0; i<a_tp1; i++) {
2557 /* Copy the coordinates of the molecule to be insterted */
2558 copy_rvec(state->x[i],x_mol[i-a_tp0]);
2559 /* Check if we need to print electrostatic energies */
2560 bCharge |= (mdatoms->chargeA[i] != 0 ||
2561 (mdatoms->chargeB && mdatoms->chargeB[i] != 0));
2563 bRFExcl = (bCharge && EEL_RF(fr->eeltype) && fr->eeltype!=eelRF_NEC);
2565 calc_cgcm(fplog,cg_tp,cg_tp+1,&(top->cgs),state->x,fr->cg_cm);
2566 if (bCavity) {
2567 if (norm(fr->cg_cm[cg_tp]) > 0.5*inputrec->rlist && fplog) {
2568 fprintf(fplog, "WARNING: Your TPI molecule is not centered at 0,0,0\n");
2569 fprintf(stderr,"WARNING: Your TPI molecule is not centered at 0,0,0\n");
2571 } else {
2572 /* Center the molecule to be inserted at zero */
2573 for(i=0; i<a_tp1-a_tp0; i++)
2574 rvec_dec(x_mol[i],fr->cg_cm[cg_tp]);
2577 if (fplog) {
2578 fprintf(fplog,"\nWill insert %d atoms %s partial charges\n",
2579 a_tp1-a_tp0,bCharge ? "with" : "without");
2581 fprintf(fplog,"\nWill insert %d times in each frame of %s\n",
2582 nsteps,opt2fn("-rerun",nfile,fnm));
2585 if (!bCavity) {
2586 if (inputrec->nstlist > 1) {
2587 if (drmax==0 && a_tp1-a_tp0==1) {
2588 gmx_fatal(FARGS,"Re-using the neighborlist %d times for insertions of a single atom in a sphere of radius %f does not make sense",inputrec->nstlist,drmax);
2590 if (fplog)
2591 fprintf(fplog,"Will use the same neighborlist for %d insertions in a sphere of radius %f\n",inputrec->nstlist,drmax);
2593 } else {
2594 if (fplog)
2595 fprintf(fplog,"Will insert randomly in a sphere of radius %f around the center of the cavity\n",drmax);
2598 ngid = groups->grps[egcENER].nr;
2599 gid_tp = GET_CGINFO_GID(fr->cginfo[cg_tp]);
2600 nener = 1 + ngid;
2601 if (bDispCorr)
2602 nener += 1;
2603 if (bCharge) {
2604 nener += ngid;
2605 if (bRFExcl)
2606 nener += 1;
2607 if (EEL_FULL(fr->eeltype))
2608 nener += 1;
2610 snew(sum_UgembU,nener);
2612 /* Initialize random generator */
2613 tpi_rand = gmx_rng_init(inputrec->ld_seed);
2615 if (MASTER(cr)) {
2616 fp_tpi = xvgropen(opt2fn("-tpi",nfile,fnm),
2617 "TPI energies","Time (ps)",
2618 "(kJ mol\\S-1\\N) / (nm\\S3\\N)");
2619 xvgr_subtitle(fp_tpi,"f. are averages over one frame");
2620 snew(leg,4+nener);
2621 e = 0;
2622 sprintf(str,"-kT log(<Ve\\S-\\8b\\4U\\N>/<V>)");
2623 leg[e++] = strdup(str);
2624 sprintf(str,"f. -kT log<e\\S-\\8b\\4U\\N>");
2625 leg[e++] = strdup(str);
2626 sprintf(str,"f. <e\\S-\\8b\\4U\\N>");
2627 leg[e++] = strdup(str);
2628 sprintf(str,"f. V");
2629 leg[e++] = strdup(str);
2630 sprintf(str,"f. <Ue\\S-\\8b\\4U\\N>");
2631 leg[e++] = strdup(str);
2632 for(i=0; i<ngid; i++) {
2633 sprintf(str,"f. <U\\sVdW %s\\Ne\\S-\\8b\\4U\\N>",
2634 *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2635 leg[e++] = strdup(str);
2637 if (bDispCorr) {
2638 sprintf(str,"f. <U\\sdisp c\\Ne\\S-\\8b\\4U\\N>");
2639 leg[e++] = strdup(str);
2641 if (bCharge) {
2642 for(i=0; i<ngid; i++) {
2643 sprintf(str,"f. <U\\sCoul %s\\Ne\\S-\\8b\\4U\\N>",
2644 *(groups->grpname[groups->grps[egcENER].nm_ind[i]]));
2645 leg[e++] = strdup(str);
2647 if (bRFExcl) {
2648 sprintf(str,"f. <U\\sRF excl\\Ne\\S-\\8b\\4U\\N>");
2649 leg[e++] = strdup(str);
2651 if (EEL_FULL(fr->eeltype)) {
2652 sprintf(str,"f. <U\\sCoul recip\\Ne\\S-\\8b\\4U\\N>");
2653 leg[e++] = strdup(str);
2656 xvgr_legend(fp_tpi,4+nener,leg);
2657 for(i=0; i<4+nener; i++)
2658 sfree(leg[i]);
2659 sfree(leg);
2661 clear_rvec(x_init);
2662 V_all = 0;
2663 VembU_all = 0;
2665 invbinw = 10;
2666 nbin = 10;
2667 snew(bin,nbin);
2669 bNotLastFrame = read_first_frame(&status,opt2fn("-rerun",nfile,fnm),
2670 &rerun_fr,TRX_NEED_X);
2671 frame = 0;
2673 if (rerun_fr.natoms - (bCavity ? nat_cavity : 0) !=
2674 mdatoms->nr - (a_tp1 - a_tp0))
2675 gmx_fatal(FARGS,"Number of atoms in trajectory (%d)%s "
2676 "is not equal the number in the run input file (%d) "
2677 "minus the number of atoms to insert (%d)\n",
2678 rerun_fr.natoms,bCavity ? " minus one" : "",
2679 mdatoms->nr,a_tp1-a_tp0);
2681 refvolshift = log(det(rerun_fr.box));
2683 while (bNotLastFrame) {
2684 lambda = rerun_fr.lambda;
2685 t = rerun_fr.time;
2687 sum_embU = 0;
2688 for(e=0; e<nener; e++)
2689 sum_UgembU[e] = 0;
2691 /* Copy the coordinates from the input trajectory */
2692 for(i=0; i<rerun_fr.natoms; i++)
2693 copy_rvec(rerun_fr.x[i],state->x[i]);
2695 V = det(rerun_fr.box);
2696 logV = log(V);
2698 bStateChanged = TRUE;
2699 bNS = TRUE;
2700 for(step=0; step<nsteps; step++) {
2701 /* In parallel all nodes generate all random configurations.
2702 * In that way the result is identical to a single cpu tpi run.
2704 if (!bCavity) {
2705 /* Random insertion in the whole volume */
2706 bNS = (step % inputrec->nstlist == 0);
2707 if (bNS) {
2708 /* Generate a random position in the box */
2709 x_init[XX] = gmx_rng_uniform_real(tpi_rand)*state->box[XX][XX];
2710 x_init[YY] = gmx_rng_uniform_real(tpi_rand)*state->box[YY][YY];
2711 x_init[ZZ] = gmx_rng_uniform_real(tpi_rand)*state->box[ZZ][ZZ];
2713 if (inputrec->nstlist == 1) {
2714 copy_rvec(x_init,x_tp);
2715 } else {
2716 /* Generate coordinates within |dx|=drmax of x_init */
2717 do {
2718 dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2719 dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2720 dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2721 } while (norm2(dx) > drmax*drmax);
2722 rvec_add(x_init,dx,x_tp);
2724 } else {
2725 /* Random insertion around a cavity location
2726 * given by the last coordinate of the trajectory.
2728 if (step == 0) {
2729 if (nat_cavity == 1) {
2730 /* Copy the location of the cavity */
2731 copy_rvec(rerun_fr.x[rerun_fr.natoms-1],x_init);
2732 } else {
2733 /* Determine the center of mass of the last molecule */
2734 clear_rvec(x_init);
2735 mass_tot = 0;
2736 for(i=0; i<nat_cavity; i++) {
2737 for(d=0; d<DIM; d++)
2738 x_init[d] +=
2739 mass_cavity[i]*rerun_fr.x[rerun_fr.natoms-nat_cavity+i][d];
2740 mass_tot += mass_cavity[i];
2742 for(d=0; d<DIM; d++)
2743 x_init[d] /= mass_tot;
2746 /* Generate coordinates within |dx|=drmax of x_init */
2747 do {
2748 dx[XX] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2749 dx[YY] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2750 dx[ZZ] = (2*gmx_rng_uniform_real(tpi_rand) - 1)*drmax;
2751 } while (norm2(dx) > drmax*drmax);
2752 rvec_add(x_init,dx,x_tp);
2755 if (a_tp1 - a_tp0 == 1) {
2756 /* Insert a single atom, just copy the insertion location */
2757 copy_rvec(x_tp,state->x[a_tp0]);
2758 } else {
2759 /* Copy the coordinates from the top file */
2760 for(i=a_tp0; i<a_tp1; i++)
2761 copy_rvec(x_mol[i-a_tp0],state->x[i]);
2762 /* Rotate the molecule randomly */
2763 rotate_conf(a_tp1-a_tp0,state->x+a_tp0,NULL,
2764 2*M_PI*gmx_rng_uniform_real(tpi_rand),
2765 2*M_PI*gmx_rng_uniform_real(tpi_rand),
2766 2*M_PI*gmx_rng_uniform_real(tpi_rand));
2767 /* Shift to the insertion location */
2768 for(i=a_tp0; i<a_tp1; i++)
2769 rvec_inc(state->x[i],x_tp);
2772 /* Check if this insertion belongs to this node */
2773 bOurStep = TRUE;
2774 if (PAR(cr)) {
2775 switch (inputrec->eI) {
2776 case eiTPI:
2777 bOurStep = ((step / inputrec->nstlist) % nnodes == cr->nodeid);
2778 break;
2779 case eiTPIC:
2780 bOurStep = (step % nnodes == cr->nodeid);
2781 break;
2782 default:
2783 gmx_fatal(FARGS,"Unknown integrator %s",ei_names[inputrec->eI]);
2786 if (bOurStep) {
2787 /* Clear some matrix variables */
2788 clear_mat(force_vir);
2789 clear_mat(shake_vir);
2790 clear_mat(vir);
2791 clear_mat(pres);
2793 /* Set the charge group center of mass of the test particle */
2794 copy_rvec(x_init,fr->cg_cm[top->cgs.nr-1]);
2796 /* Calc energy (no forces) on new positions.
2797 * Since we only need the intermolecular energy
2798 * and the RF exclusion terms of the inserted molecule occur
2799 * within a single charge group we can pass NULL for the graph.
2800 * This also avoids shifts that would move charge groups
2801 * out of the box.
2803 /* Make do_force do a single node fore calculation */
2804 cr->nnodes = 1;
2805 do_force(fplog,cr,inputrec,
2806 step,nrnb,wcycle,top,top_global,&top_global->groups,
2807 rerun_fr.box,state->x,&state->hist,
2808 f,force_vir,mdatoms,enerd,fcd,
2809 lambda,NULL,fr,NULL,mu_tot,t,NULL,NULL,FALSE,
2810 GMX_FORCE_NONBONDED |
2811 (bNS ? GMX_FORCE_NS : 0) |
2812 (bStateChanged ? GMX_FORCE_STATECHANGED : 0));
2813 cr->nnodes = nnodes;
2814 bStateChanged = FALSE;
2815 bNS = FALSE;
2817 /* Calculate long range corrections to pressure and energy */
2818 calc_dispcorr(fplog,inputrec,fr,step,mdatoms->nr,rerun_fr.box,lambda,
2819 pres,vir,enerd);
2821 /* If the compiler doesn't optimize this check away
2822 * we catch the NAN energies.
2823 * With tables extreme negative energies might occur close to r=0.
2825 epot = enerd->term[F_EPOT];
2826 if (epot != epot || epot*beta < bU_neg_limit) {
2827 if (debug)
2828 fprintf(debug,"\n time %.3f, step %d: non-finite energy %f, using exp(-bU)=0\n",t,step,epot);
2829 embU = 0;
2830 } else {
2832 real eo;
2833 printf(pdbformat,"ATOM",step,"CA","GLY",' ',step,' ',
2834 x_tp[XX]*10,x_tp[YY]*10,x_tp[ZZ]*10);
2835 eo = epot*0.05;
2836 printf("%6.2f%6.2f\n",
2837 1.0,
2838 eo < -9.99 ? -9.99 : eo > 9.99 ? 9.99 : eo);
2840 embU = exp(-beta*epot);
2841 sum_embU += embU;
2842 /* Determine the weighted energy contributions of each energy group */
2843 e = 0;
2844 sum_UgembU[e++] += epot*embU;
2845 if (fr->bBHAM) {
2846 for(i=0; i<ngid; i++)
2847 sum_UgembU[e++] +=
2848 (enerd->grpp.ener[egBHAMSR][GID(i,gid_tp,ngid)] +
2849 enerd->grpp.ener[egBHAMLR][GID(i,gid_tp,ngid)])*embU;
2850 } else {
2851 for(i=0; i<ngid; i++)
2852 sum_UgembU[e++] +=
2853 (enerd->grpp.ener[egLJSR][GID(i,gid_tp,ngid)] +
2854 enerd->grpp.ener[egLJLR][GID(i,gid_tp,ngid)])*embU;
2856 if (bDispCorr)
2857 sum_UgembU[e++] += enerd->term[F_DISPCORR]*embU;
2858 if (bCharge) {
2859 for(i=0; i<ngid; i++)
2860 sum_UgembU[e++] +=
2861 (enerd->grpp.ener[egCOULSR][GID(i,gid_tp,ngid)] +
2862 enerd->grpp.ener[egCOULLR][GID(i,gid_tp,ngid)])*embU;
2863 if (bRFExcl)
2864 sum_UgembU[e++] += enerd->term[F_RF_EXCL]*embU;
2865 if (EEL_FULL(fr->eeltype))
2866 sum_UgembU[e++] += enerd->term[F_COUL_RECIP]*embU;
2870 if (embU == 0 || beta*epot > bU_bin_limit) {
2871 bin[0]++;
2872 } else {
2873 i = (int)((bU_logV_bin_limit
2874 - (beta*epot - logV + refvolshift))*invbinw
2875 + 0.5);
2876 if (i < 0)
2877 i = 0;
2878 if (i >= nbin)
2879 realloc_bins(&bin,&nbin,i+10);
2880 bin[i]++;
2883 if (debug)
2884 fprintf(debug,"TPI %7d %12.5e %12.5f %12.5f %12.5f\n",
2885 step,epot,x_tp[XX],x_tp[YY],x_tp[ZZ]);
2887 if (dump_pdb && epot <= dump_ener) {
2888 sprintf(str,"t%g_step%d.pdb",t,step);
2889 sprintf(str2,"t: %f step %d ener: %f",t,step,epot);
2890 write_sto_conf_mtop(str,str2,top_global,state->x,state->v,
2891 inputrec->ePBC,state->box);
2896 if (PAR(cr)) {
2897 /* When running in parallel sum the energies over the processes */
2898 gmx_sumd(1, &sum_embU, cr);
2899 gmx_sumd(nener,sum_UgembU,cr);
2902 frame++;
2903 V_all += V;
2904 VembU_all += V*sum_embU/nsteps;
2906 if (fp_tpi) {
2907 if (bVerbose || frame%10==0 || frame<10)
2908 fprintf(stderr,"mu %10.3e <mu> %10.3e\n",
2909 -log(sum_embU/nsteps)/beta,-log(VembU_all/V_all)/beta);
2911 fprintf(fp_tpi,"%10.3f %12.5e %12.5e %12.5e %12.5e",
2913 VembU_all==0 ? 20/beta : -log(VembU_all/V_all)/beta,
2914 sum_embU==0 ? 20/beta : -log(sum_embU/nsteps)/beta,
2915 sum_embU/nsteps,V);
2916 for(e=0; e<nener; e++)
2917 fprintf(fp_tpi," %12.5e",sum_UgembU[e]/nsteps);
2918 fprintf(fp_tpi,"\n");
2919 fflush(fp_tpi);
2922 bNotLastFrame = read_next_frame(status,&rerun_fr);
2923 } /* End of the loop */
2925 runtime_end(runtime);
2927 close_trj(status);
2929 if (fp_tpi)
2930 gmx_fio_fclose(fp_tpi);
2932 if (fplog) {
2933 fprintf(fplog,"\n");
2934 fprintf(fplog," <V> = %12.5e nm^3\n",V_all/frame);
2935 fprintf(fplog," <mu> = %12.5e kJ/mol\n",-log(VembU_all/V_all)/beta);
2938 /* Write the Boltzmann factor histogram */
2939 if (PAR(cr)) {
2940 /* When running in parallel sum the bins over the processes */
2941 i = nbin;
2942 global_max(cr,&i);
2943 realloc_bins(&bin,&nbin,i);
2944 gmx_sumd(nbin,bin,cr);
2946 fp_tpi = xvgropen(opt2fn("-tpid",nfile,fnm),
2947 "TPI energy distribution",
2948 "\\8b\\4U - log(V/<V>)","count");
2949 sprintf(str,"number \\8b\\4U > %g: %9.3e",bU_bin_limit,bin[0]);
2950 xvgr_subtitle(fp_tpi,str);
2951 xvgr_legend(fp_tpi,2,(char **)tpid_leg);
2952 for(i=nbin-1; i>0; i--) {
2953 bUlogV = -i/invbinw + bU_logV_bin_limit - refvolshift + log(V_all/frame);
2954 fprintf(fp_tpi,"%6.2f %10d %12.5e\n",
2955 bUlogV,
2956 (int)(bin[i]+0.5),
2957 bin[i]*exp(-bUlogV)*V_all/VembU_all);
2959 gmx_fio_fclose(fp_tpi);
2960 sfree(bin);
2962 sfree(sum_UgembU);
2964 runtime->nsteps_done = frame*inputrec->nsteps;
2966 return 0;