1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
55 #include "gmx_fatal.h"
67 #include "md_support.h"
71 #include "sparsematrix.h"
75 #include "gmx_wallcycle.h"
76 #include "mtop_util.h"
80 #include "gmx_omp_nthreads.h"
92 static em_state_t
*init_em_state()
98 /* does this need to be here? Should the array be declared differently (staticaly)in the state definition? */
99 snew(ems
->s
.lambda
,efptNR
);
104 static void print_em_start(FILE *fplog
,t_commrec
*cr
,gmx_runtime_t
*runtime
,
105 gmx_wallcycle_t wcycle
,
110 runtime_start(runtime
);
112 sprintf(buf
,"Started %s",name
);
113 print_date_and_time(fplog
,cr
->nodeid
,buf
,NULL
);
115 wallcycle_start(wcycle
,ewcRUN
);
117 static void em_time_end(FILE *fplog
,t_commrec
*cr
,gmx_runtime_t
*runtime
,
118 gmx_wallcycle_t wcycle
)
120 wallcycle_stop(wcycle
,ewcRUN
);
122 runtime_end(runtime
);
125 static void sp_header(FILE *out
,const char *minimizer
,real ftol
,int nsteps
)
128 fprintf(out
,"%s:\n",minimizer
);
129 fprintf(out
," Tolerance (Fmax) = %12.5e\n",ftol
);
130 fprintf(out
," Number of steps = %12d\n",nsteps
);
133 static void warn_step(FILE *fp
,real ftol
,gmx_bool bLastStep
,gmx_bool bConstrain
)
139 "\nEnergy minimization reached the maximum number"
140 "of steps before the forces reached the requested"
141 "precision Fmax < %g.\n",ftol
);
146 "\nEnergy minimization has stopped, but the forces have"
147 "not converged to the requested precision Fmax < %g (which"
148 "may not be possible for your system). It stopped"
149 "because the algorithm tried to make a new step whose size"
150 "was too small, or there was no change in the energy since"
151 "last step. Either way, we regard the minimization as"
152 "converged to within the available machine precision,"
153 "given your starting configuration and EM parameters.\n%s%s",
155 sizeof(real
)<sizeof(double) ?
156 "\nDouble precision normally gives you higher accuracy, but"
157 "this is often not needed for preparing to run molecular"
161 "You might need to increase your constraint accuracy, or turn\n"
162 "off constraints altogether (set constraints = none in mdp file)\n" :
165 fputs(wrap_lines(buffer
, 78, 0, FALSE
), fp
);
170 static void print_converged(FILE *fp
,const char *alg
,real ftol
,
171 gmx_large_int_t count
,gmx_bool bDone
,gmx_large_int_t nsteps
,
172 real epot
,real fmax
, int nfmax
, real fnorm
)
174 char buf
[STEPSTRSIZE
];
177 fprintf(fp
,"\n%s converged to Fmax < %g in %s steps\n",
178 alg
,ftol
,gmx_step_str(count
,buf
));
179 else if(count
<nsteps
)
180 fprintf(fp
,"\n%s converged to machine precision in %s steps,\n"
181 "but did not reach the requested Fmax < %g.\n",
182 alg
,gmx_step_str(count
,buf
),ftol
);
184 fprintf(fp
,"\n%s did not converge to Fmax < %g in %s steps.\n",
185 alg
,ftol
,gmx_step_str(count
,buf
));
188 fprintf(fp
,"Potential Energy = %21.14e\n",epot
);
189 fprintf(fp
,"Maximum force = %21.14e on atom %d\n",fmax
,nfmax
+1);
190 fprintf(fp
,"Norm of force = %21.14e\n",fnorm
);
192 fprintf(fp
,"Potential Energy = %14.7e\n",epot
);
193 fprintf(fp
,"Maximum force = %14.7e on atom %d\n",fmax
,nfmax
+1);
194 fprintf(fp
,"Norm of force = %14.7e\n",fnorm
);
198 static void get_f_norm_max(t_commrec
*cr
,
199 t_grpopts
*opts
,t_mdatoms
*mdatoms
,rvec
*f
,
200 real
*fnorm
,real
*fmax
,int *a_fmax
)
203 real fmax2
,fmax2_0
,fam
;
204 int la_max
,a_max
,start
,end
,i
,m
,gf
;
206 /* This routine finds the largest force and returns it.
207 * On parallel machines the global max is taken.
213 start
= mdatoms
->start
;
214 end
= mdatoms
->homenr
+ start
;
215 if (mdatoms
->cFREEZE
) {
216 for(i
=start
; i
<end
; i
++) {
217 gf
= mdatoms
->cFREEZE
[i
];
220 if (!opts
->nFreeze
[gf
][m
])
229 for(i
=start
; i
<end
; i
++) {
239 if (la_max
>= 0 && DOMAINDECOMP(cr
)) {
240 a_max
= cr
->dd
->gatindex
[la_max
];
245 snew(sum
,2*cr
->nnodes
+1);
246 sum
[2*cr
->nodeid
] = fmax2
;
247 sum
[2*cr
->nodeid
+1] = a_max
;
248 sum
[2*cr
->nnodes
] = fnorm2
;
249 gmx_sumd(2*cr
->nnodes
+1,sum
,cr
);
250 fnorm2
= sum
[2*cr
->nnodes
];
251 /* Determine the global maximum */
252 for(i
=0; i
<cr
->nnodes
; i
++) {
253 if (sum
[2*i
] > fmax2
) {
255 a_max
= (int)(sum
[2*i
+1] + 0.5);
262 *fnorm
= sqrt(fnorm2
);
269 static void get_state_f_norm_max(t_commrec
*cr
,
270 t_grpopts
*opts
,t_mdatoms
*mdatoms
,
273 get_f_norm_max(cr
,opts
,mdatoms
,ems
->f
,&ems
->fnorm
,&ems
->fmax
,&ems
->a_fmax
);
276 void init_em(FILE *fplog
,const char *title
,
277 t_commrec
*cr
,t_inputrec
*ir
,
278 t_state
*state_global
,gmx_mtop_t
*top_global
,
279 em_state_t
*ems
,gmx_localtop_t
**top
,
280 rvec
**f
,rvec
**f_global
,
281 t_nrnb
*nrnb
,rvec mu_tot
,
282 t_forcerec
*fr
,gmx_enerdata_t
**enerd
,
283 t_graph
**graph
,t_mdatoms
*mdatoms
,gmx_global_stat_t
*gstat
,
284 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
285 int nfile
,const t_filenm fnm
[],
286 gmx_mdoutf_t
**outf
,t_mdebin
**mdebin
)
293 fprintf(fplog
,"Initiating %s\n",title
);
296 state_global
->ngtc
= 0;
298 /* Initialize lambda variables */
299 initialize_lambdas(fplog
,ir
,&(state_global
->fep_state
),state_global
->lambda
,NULL
);
303 if (DOMAINDECOMP(cr
))
305 *top
= dd_init_local_top(top_global
);
307 dd_init_local_state(cr
->dd
,state_global
,&ems
->s
);
311 /* Distribute the charge groups over the nodes from the master node */
312 dd_partition_system(fplog
,ir
->init_step
,cr
,TRUE
,1,
313 state_global
,top_global
,ir
,
314 &ems
->s
,&ems
->f
,mdatoms
,*top
,
315 fr
,vsite
,NULL
,constr
,
317 dd_store_state(cr
->dd
,&ems
->s
);
321 snew(*f_global
,top_global
->natoms
);
331 snew(*f
,top_global
->natoms
);
333 /* Just copy the state */
334 ems
->s
= *state_global
;
335 snew(ems
->s
.x
,ems
->s
.nalloc
);
336 snew(ems
->f
,ems
->s
.nalloc
);
337 for(i
=0; i
<state_global
->natoms
; i
++)
339 copy_rvec(state_global
->x
[i
],ems
->s
.x
[i
]);
341 copy_mat(state_global
->box
,ems
->s
.box
);
343 if (PAR(cr
) && ir
->eI
!= eiNM
)
345 /* Initialize the particle decomposition and split the topology */
346 *top
= split_system(fplog
,top_global
,ir
,cr
);
348 pd_cg_range(cr
,&fr
->cg0
,&fr
->hcg
);
352 *top
= gmx_mtop_generate_local_top(top_global
,ir
);
356 forcerec_set_excl_load(fr
,*top
,cr
);
358 init_bonded_thread_force_reduction(fr
,&(*top
)->idef
);
360 if (ir
->ePBC
!= epbcNONE
&& !fr
->bMolPBC
)
362 *graph
= mk_graph(fplog
,&((*top
)->idef
),0,top_global
->natoms
,FALSE
,FALSE
);
371 pd_at_range(cr
,&start
,&homenr
);
377 homenr
= top_global
->natoms
;
379 atoms2md(top_global
,ir
,0,NULL
,start
,homenr
,mdatoms
);
380 update_mdatoms(mdatoms
,state_global
->lambda
[efptFEP
]);
384 set_vsite_top(vsite
,*top
,mdatoms
,cr
);
390 if (ir
->eConstrAlg
== econtSHAKE
&&
391 gmx_mtop_ftype_count(top_global
,F_CONSTR
) > 0)
393 gmx_fatal(FARGS
,"Can not do energy minimization with %s, use %s\n",
394 econstr_names
[econtSHAKE
],econstr_names
[econtLINCS
]);
397 if (!DOMAINDECOMP(cr
))
399 set_constraints(constr
,*top
,ir
,mdatoms
,cr
);
402 if (!ir
->bContinuation
)
404 /* Constrain the starting coordinates */
406 constrain(PAR(cr
) ? NULL
: fplog
,TRUE
,TRUE
,constr
,&(*top
)->idef
,
407 ir
,NULL
,cr
,-1,0,mdatoms
,
408 ems
->s
.x
,ems
->s
.x
,NULL
,fr
->bMolPBC
,ems
->s
.box
,
409 ems
->s
.lambda
[efptFEP
],&dvdlambda
,
410 NULL
,NULL
,nrnb
,econqCoord
,FALSE
,0,0);
416 *gstat
= global_stat_init(ir
);
419 *outf
= init_mdoutf(nfile
,fnm
,0,cr
,ir
,NULL
);
422 init_enerdata(top_global
->groups
.grps
[egcENER
].nr
,ir
->fepvals
->n_lambda
,
427 /* Init bin for energy stuff */
428 *mdebin
= init_mdebin((*outf
)->fp_ene
,top_global
,ir
,NULL
);
432 calc_shifts(ems
->s
.box
,fr
->shift_vec
);
435 static void finish_em(FILE *fplog
,t_commrec
*cr
,gmx_mdoutf_t
*outf
,
436 gmx_runtime_t
*runtime
,gmx_wallcycle_t wcycle
)
438 if (!(cr
->duty
& DUTY_PME
)) {
439 /* Tell the PME only node to finish */
440 gmx_pme_send_finish(cr
);
445 em_time_end(fplog
,cr
,runtime
,wcycle
);
448 static void swap_em_state(em_state_t
*ems1
,em_state_t
*ems2
)
457 static void copy_em_coords(em_state_t
*ems
,t_state
*state
)
461 for(i
=0; (i
<state
->natoms
); i
++)
463 copy_rvec(ems
->s
.x
[i
],state
->x
[i
]);
467 static void write_em_traj(FILE *fplog
,t_commrec
*cr
,
469 gmx_bool bX
,gmx_bool bF
,const char *confout
,
470 gmx_mtop_t
*top_global
,
471 t_inputrec
*ir
,gmx_large_int_t step
,
473 t_state
*state_global
,rvec
*f_global
)
477 if ((bX
|| bF
|| confout
!= NULL
) && !DOMAINDECOMP(cr
))
479 copy_em_coords(state
,state_global
);
484 if (bX
) { mdof_flags
|= MDOF_X
; }
485 if (bF
) { mdof_flags
|= MDOF_F
; }
486 write_traj(fplog
,cr
,outf
,mdof_flags
,
487 top_global
,step
,(double)step
,
488 &state
->s
,state_global
,state
->f
,f_global
,NULL
,NULL
);
490 if (confout
!= NULL
&& MASTER(cr
))
492 if (ir
->ePBC
!= epbcNONE
&& !ir
->bPeriodicMols
&& DOMAINDECOMP(cr
))
494 /* Make molecules whole only for confout writing */
495 do_pbc_mtop(fplog
,ir
->ePBC
,state_global
->box
,top_global
,
499 write_sto_conf_mtop(confout
,
500 *top_global
->name
,top_global
,
501 state_global
->x
,NULL
,ir
->ePBC
,state_global
->box
);
505 static void do_em_step(t_commrec
*cr
,t_inputrec
*ir
,t_mdatoms
*md
,
507 em_state_t
*ems1
,real a
,rvec
*f
,em_state_t
*ems2
,
508 gmx_constr_t constr
,gmx_localtop_t
*top
,
509 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
510 gmx_large_int_t count
)
522 if (DOMAINDECOMP(cr
) && s1
->ddp_count
!= cr
->dd
->ddp_count
)
524 gmx_incons("state mismatch in do_em_step");
527 s2
->flags
= s1
->flags
;
529 if (s2
->nalloc
!= s1
->nalloc
)
531 s2
->nalloc
= s1
->nalloc
;
532 srenew(s2
->x
,s1
->nalloc
);
533 srenew(ems2
->f
, s1
->nalloc
);
534 if (s2
->flags
& (1<<estCGP
))
536 srenew(s2
->cg_p
, s1
->nalloc
);
540 s2
->natoms
= s1
->natoms
;
541 copy_mat(s1
->box
,s2
->box
);
542 /* Copy free energy state */
543 for (i
=0;i
<efptNR
;i
++)
545 s2
->lambda
[i
] = s1
->lambda
[i
];
547 copy_mat(s1
->box
,s2
->box
);
550 end
= md
->start
+ md
->homenr
;
555 #pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
560 #pragma omp for schedule(static) nowait
561 for(i
=start
; i
<end
; i
++)
569 if (ir
->opts
.nFreeze
[gf
][m
])
575 x2
[i
][m
] = x1
[i
][m
] + a
*f
[i
][m
];
580 if (s2
->flags
& (1<<estCGP
))
582 /* Copy the CG p vector */
585 #pragma omp for schedule(static) nowait
586 for(i
=start
; i
<end
; i
++)
588 copy_rvec(x1
[i
],x2
[i
]);
592 if (DOMAINDECOMP(cr
))
594 s2
->ddp_count
= s1
->ddp_count
;
595 if (s2
->cg_gl_nalloc
< s1
->cg_gl_nalloc
)
598 s2
->cg_gl_nalloc
= s1
->cg_gl_nalloc
;
599 srenew(s2
->cg_gl
,s2
->cg_gl_nalloc
);
602 s2
->ncg_gl
= s1
->ncg_gl
;
603 #pragma omp for schedule(static) nowait
604 for(i
=0; i
<s2
->ncg_gl
; i
++)
606 s2
->cg_gl
[i
] = s1
->cg_gl
[i
];
608 s2
->ddp_count_cg_gl
= s1
->ddp_count_cg_gl
;
614 wallcycle_start(wcycle
,ewcCONSTR
);
616 constrain(NULL
,TRUE
,TRUE
,constr
,&top
->idef
,
617 ir
,NULL
,cr
,count
,0,md
,
618 s1
->x
,s2
->x
,NULL
,bMolPBC
,s2
->box
,
619 s2
->lambda
[efptBONDED
],&dvdlambda
,
620 NULL
,NULL
,nrnb
,econqCoord
,FALSE
,0,0);
621 wallcycle_stop(wcycle
,ewcCONSTR
);
625 static void em_dd_partition_system(FILE *fplog
,int step
,t_commrec
*cr
,
626 gmx_mtop_t
*top_global
,t_inputrec
*ir
,
627 em_state_t
*ems
,gmx_localtop_t
*top
,
628 t_mdatoms
*mdatoms
,t_forcerec
*fr
,
629 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
630 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
)
632 /* Repartition the domain decomposition */
633 wallcycle_start(wcycle
,ewcDOMDEC
);
634 dd_partition_system(fplog
,step
,cr
,FALSE
,1,
637 mdatoms
,top
,fr
,vsite
,NULL
,constr
,
639 dd_store_state(cr
->dd
,&ems
->s
);
640 wallcycle_stop(wcycle
,ewcDOMDEC
);
643 static void evaluate_energy(FILE *fplog
,gmx_bool bVerbose
,t_commrec
*cr
,
644 t_state
*state_global
,gmx_mtop_t
*top_global
,
645 em_state_t
*ems
,gmx_localtop_t
*top
,
646 t_inputrec
*inputrec
,
647 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
648 gmx_global_stat_t gstat
,
649 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
651 t_graph
*graph
,t_mdatoms
*mdatoms
,
652 t_forcerec
*fr
,rvec mu_tot
,
653 gmx_enerdata_t
*enerd
,tensor vir
,tensor pres
,
654 gmx_large_int_t count
,gmx_bool bFirst
)
659 tensor force_vir
,shake_vir
,ekin
;
660 real dvdlambda
,prescorr
,enercorr
,dvdlcorr
;
663 /* Set the time to the initial time, the time does not change during EM */
664 t
= inputrec
->init_t
;
667 (DOMAINDECOMP(cr
) && ems
->s
.ddp_count
< cr
->dd
->ddp_count
)) {
668 /* This the first state or an old state used before the last ns */
672 if (inputrec
->nstlist
> 0) {
674 } else if (inputrec
->nstlist
== -1) {
675 nabnsb
= natoms_beyond_ns_buffer(inputrec
,fr
,&top
->cgs
,NULL
,ems
->s
.x
);
677 gmx_sumi(1,&nabnsb
,cr
);
683 construct_vsites(fplog
,vsite
,ems
->s
.x
,nrnb
,1,NULL
,
684 top
->idef
.iparams
,top
->idef
.il
,
685 fr
->ePBC
,fr
->bMolPBC
,graph
,cr
,ems
->s
.box
);
687 if (DOMAINDECOMP(cr
)) {
689 /* Repartition the domain decomposition */
690 em_dd_partition_system(fplog
,count
,cr
,top_global
,inputrec
,
691 ems
,top
,mdatoms
,fr
,vsite
,constr
,
696 /* Calc force & energy on new trial position */
697 /* do_force always puts the charge groups in the box and shifts again
698 * We do not unshift, so molecules are always whole in congrad.c
700 do_force(fplog
,cr
,inputrec
,
701 count
,nrnb
,wcycle
,top
,top_global
,&top_global
->groups
,
702 ems
->s
.box
,ems
->s
.x
,&ems
->s
.hist
,
703 ems
->f
,force_vir
,mdatoms
,enerd
,fcd
,
704 ems
->s
.lambda
,graph
,fr
,vsite
,mu_tot
,t
,NULL
,NULL
,TRUE
,
705 GMX_FORCE_STATECHANGED
| GMX_FORCE_ALLFORCES
|
706 GMX_FORCE_VIRIAL
| GMX_FORCE_ENERGY
|
707 (bNS
? GMX_FORCE_NS
| GMX_FORCE_DOLR
: 0));
709 /* Clear the unused shake virial and pressure */
710 clear_mat(shake_vir
);
713 /* Communicate stuff when parallel */
714 if (PAR(cr
) && inputrec
->eI
!= eiNM
)
716 wallcycle_start(wcycle
,ewcMoveE
);
718 global_stat(fplog
,gstat
,cr
,enerd
,force_vir
,shake_vir
,mu_tot
,
719 inputrec
,NULL
,NULL
,NULL
,1,&terminate
,
720 top_global
,&ems
->s
,FALSE
,
726 wallcycle_stop(wcycle
,ewcMoveE
);
729 /* Calculate long range corrections to pressure and energy */
730 calc_dispcorr(fplog
,inputrec
,fr
,count
,top_global
->natoms
,ems
->s
.box
,ems
->s
.lambda
[efptVDW
],
731 pres
,force_vir
,&prescorr
,&enercorr
,&dvdlcorr
);
732 enerd
->term
[F_DISPCORR
] = enercorr
;
733 enerd
->term
[F_EPOT
] += enercorr
;
734 enerd
->term
[F_PRES
] += prescorr
;
735 enerd
->term
[F_DVDL
] += dvdlcorr
;
737 ems
->epot
= enerd
->term
[F_EPOT
];
740 /* Project out the constraint components of the force */
741 wallcycle_start(wcycle
,ewcCONSTR
);
743 constrain(NULL
,FALSE
,FALSE
,constr
,&top
->idef
,
744 inputrec
,NULL
,cr
,count
,0,mdatoms
,
745 ems
->s
.x
,ems
->f
,ems
->f
,fr
->bMolPBC
,ems
->s
.box
,
746 ems
->s
.lambda
[efptBONDED
],&dvdlambda
,
747 NULL
,&shake_vir
,nrnb
,econqForceDispl
,FALSE
,0,0);
748 if (fr
->bSepDVDL
&& fplog
)
749 fprintf(fplog
,sepdvdlformat
,"Constraints",t
,dvdlambda
);
750 enerd
->term
[F_DVDL_BONDED
] += dvdlambda
;
751 m_add(force_vir
,shake_vir
,vir
);
752 wallcycle_stop(wcycle
,ewcCONSTR
);
754 copy_mat(force_vir
,vir
);
758 enerd
->term
[F_PRES
] =
759 calc_pres(fr
->ePBC
,inputrec
->nwall
,ems
->s
.box
,ekin
,vir
,pres
);
761 sum_dhdl(enerd
,ems
->s
.lambda
,inputrec
->fepvals
);
763 if (EI_ENERGY_MINIMIZATION(inputrec
->eI
))
765 get_state_f_norm_max(cr
,&(inputrec
->opts
),mdatoms
,ems
);
769 static double reorder_partsum(t_commrec
*cr
,t_grpopts
*opts
,t_mdatoms
*mdatoms
,
771 em_state_t
*s_min
,em_state_t
*s_b
)
775 int ncg
,*cg_gl
,*index
,c
,cg
,i
,a0
,a1
,a
,gf
,m
;
777 unsigned char *grpnrFREEZE
;
780 fprintf(debug
,"Doing reorder_partsum\n");
785 cgs_gl
= dd_charge_groups_global(cr
->dd
);
786 index
= cgs_gl
->index
;
788 /* Collect fm in a global vector fmg.
789 * This conflicts with the spirit of domain decomposition,
790 * but to fully optimize this a much more complicated algorithm is required.
792 snew(fmg
,mtop
->natoms
);
794 ncg
= s_min
->s
.ncg_gl
;
795 cg_gl
= s_min
->s
.cg_gl
;
797 for(c
=0; c
<ncg
; c
++) {
801 for(a
=a0
; a
<a1
; a
++) {
802 copy_rvec(fm
[i
],fmg
[a
]);
806 gmx_sum(mtop
->natoms
*3,fmg
[0],cr
);
808 /* Now we will determine the part of the sum for the cgs in state s_b */
810 cg_gl
= s_b
->s
.cg_gl
;
814 grpnrFREEZE
= mtop
->groups
.grpnr
[egcFREEZE
];
815 for(c
=0; c
<ncg
; c
++) {
819 for(a
=a0
; a
<a1
; a
++) {
820 if (mdatoms
->cFREEZE
&& grpnrFREEZE
) {
823 for(m
=0; m
<DIM
; m
++) {
824 if (!opts
->nFreeze
[gf
][m
]) {
825 partsum
+= (fb
[i
][m
] - fmg
[a
][m
])*fb
[i
][m
];
837 static real
pr_beta(t_commrec
*cr
,t_grpopts
*opts
,t_mdatoms
*mdatoms
,
839 em_state_t
*s_min
,em_state_t
*s_b
)
845 /* This is just the classical Polak-Ribiere calculation of beta;
846 * it looks a bit complicated since we take freeze groups into account,
847 * and might have to sum it in parallel runs.
850 if (!DOMAINDECOMP(cr
) ||
851 (s_min
->s
.ddp_count
== cr
->dd
->ddp_count
&&
852 s_b
->s
.ddp_count
== cr
->dd
->ddp_count
)) {
857 /* This part of code can be incorrect with DD,
858 * since the atom ordering in s_b and s_min might differ.
860 for(i
=mdatoms
->start
; i
<mdatoms
->start
+mdatoms
->homenr
; i
++) {
861 if (mdatoms
->cFREEZE
)
862 gf
= mdatoms
->cFREEZE
[i
];
864 if (!opts
->nFreeze
[gf
][m
]) {
865 sum
+= (fb
[i
][m
] - fm
[i
][m
])*fb
[i
][m
];
869 /* We need to reorder cgs while summing */
870 sum
= reorder_partsum(cr
,opts
,mdatoms
,mtop
,s_min
,s_b
);
875 return sum
/sqr(s_min
->fnorm
);
878 double do_cg(FILE *fplog
,t_commrec
*cr
,
879 int nfile
,const t_filenm fnm
[],
880 const output_env_t oenv
, gmx_bool bVerbose
,gmx_bool bCompact
,
882 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
884 t_inputrec
*inputrec
,
885 gmx_mtop_t
*top_global
,t_fcdata
*fcd
,
886 t_state
*state_global
,
888 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
891 int repl_ex_nst
, int repl_ex_nex
, int repl_ex_seed
,
893 real cpt_period
,real max_hours
,
894 const char *deviceOptions
,
896 gmx_runtime_t
*runtime
)
898 const char *CG
="Polak-Ribiere Conjugate Gradients";
900 em_state_t
*s_min
,*s_a
,*s_b
,*s_c
;
902 gmx_enerdata_t
*enerd
;
904 gmx_global_stat_t gstat
;
906 rvec
*f_global
,*p
,*sf
,*sfm
;
907 double gpa
,gpb
,gpc
,tmp
,sum
[2],minstep
;
914 gmx_bool converged
,foundlower
;
916 gmx_bool do_log
=FALSE
,do_ene
=FALSE
,do_x
,do_f
;
918 int number_steps
,neval
=0,nstcg
=inputrec
->nstcgsteep
;
920 int i
,m
,gf
,step
,nminstep
;
925 s_min
= init_em_state();
926 s_a
= init_em_state();
927 s_b
= init_em_state();
928 s_c
= init_em_state();
930 /* Init em and store the local state in s_min */
931 init_em(fplog
,CG
,cr
,inputrec
,
932 state_global
,top_global
,s_min
,&top
,&f
,&f_global
,
933 nrnb
,mu_tot
,fr
,&enerd
,&graph
,mdatoms
,&gstat
,vsite
,constr
,
934 nfile
,fnm
,&outf
,&mdebin
);
936 /* Print to log file */
937 print_em_start(fplog
,cr
,runtime
,wcycle
,CG
);
939 /* Max number of steps */
940 number_steps
=inputrec
->nsteps
;
943 sp_header(stderr
,CG
,inputrec
->em_tol
,number_steps
);
945 sp_header(fplog
,CG
,inputrec
->em_tol
,number_steps
);
947 /* Call the force routine and some auxiliary (neighboursearching etc.) */
948 /* do_force always puts the charge groups in the box and shifts again
949 * We do not unshift, so molecules are always whole in congrad.c
951 evaluate_energy(fplog
,bVerbose
,cr
,
952 state_global
,top_global
,s_min
,top
,
953 inputrec
,nrnb
,wcycle
,gstat
,
954 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
955 mu_tot
,enerd
,vir
,pres
,-1,TRUE
);
959 /* Copy stuff to the energy bin for easy printing etc. */
960 upd_mdebin(mdebin
,FALSE
,FALSE
,(double)step
,
961 mdatoms
->tmass
,enerd
,&s_min
->s
,inputrec
->fepvals
,inputrec
->expandedvals
,s_min
->s
.box
,
962 NULL
,NULL
,vir
,pres
,NULL
,mu_tot
,constr
);
964 print_ebin_header(fplog
,step
,step
,s_min
->s
.lambda
[efptFEP
]);
965 print_ebin(outf
->fp_ene
,TRUE
,FALSE
,FALSE
,fplog
,step
,step
,eprNORMAL
,
966 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
970 /* Estimate/guess the initial stepsize */
971 stepsize
= inputrec
->em_stepsize
/s_min
->fnorm
;
974 fprintf(stderr
," F-max = %12.5e on atom %d\n",
975 s_min
->fmax
,s_min
->a_fmax
+1);
976 fprintf(stderr
," F-Norm = %12.5e\n",
977 s_min
->fnorm
/sqrt(state_global
->natoms
));
978 fprintf(stderr
,"\n");
979 /* and copy to the log file too... */
980 fprintf(fplog
," F-max = %12.5e on atom %d\n",
981 s_min
->fmax
,s_min
->a_fmax
+1);
982 fprintf(fplog
," F-Norm = %12.5e\n",
983 s_min
->fnorm
/sqrt(state_global
->natoms
));
986 /* Start the loop over CG steps.
987 * Each successful step is counted, and we continue until
988 * we either converge or reach the max number of steps.
991 for(step
=0; (number_steps
<0 || (number_steps
>=0 && step
<=number_steps
)) && !converged
;step
++) {
993 /* start taking steps in a new direction
994 * First time we enter the routine, beta=0, and the direction is
995 * simply the negative gradient.
998 /* Calculate the new direction in p, and the gradient in this direction, gpa */
1003 for(i
=mdatoms
->start
; i
<mdatoms
->start
+mdatoms
->homenr
; i
++) {
1004 if (mdatoms
->cFREEZE
)
1005 gf
= mdatoms
->cFREEZE
[i
];
1006 for(m
=0; m
<DIM
; m
++) {
1007 if (!inputrec
->opts
.nFreeze
[gf
][m
]) {
1008 p
[i
][m
] = sf
[i
][m
] + beta
*p
[i
][m
];
1009 gpa
-= p
[i
][m
]*sf
[i
][m
];
1010 /* f is negative gradient, thus the sign */
1017 /* Sum the gradient along the line across CPUs */
1019 gmx_sumd(1,&gpa
,cr
);
1021 /* Calculate the norm of the search vector */
1022 get_f_norm_max(cr
,&(inputrec
->opts
),mdatoms
,p
,&pnorm
,NULL
,NULL
);
1024 /* Just in case stepsize reaches zero due to numerical precision... */
1026 stepsize
= inputrec
->em_stepsize
/pnorm
;
1029 * Double check the value of the derivative in the search direction.
1030 * If it is positive it must be due to the old information in the
1031 * CG formula, so just remove that and start over with beta=0.
1032 * This corresponds to a steepest descent step.
1036 step
--; /* Don't count this step since we are restarting */
1037 continue; /* Go back to the beginning of the big for-loop */
1040 /* Calculate minimum allowed stepsize, before the average (norm)
1041 * relative change in coordinate is smaller than precision
1044 for (i
=mdatoms
->start
; i
<mdatoms
->start
+mdatoms
->homenr
; i
++) {
1045 for(m
=0; m
<DIM
; m
++) {
1046 tmp
= fabs(s_min
->s
.x
[i
][m
]);
1053 /* Add up from all CPUs */
1055 gmx_sumd(1,&minstep
,cr
);
1057 minstep
= GMX_REAL_EPS
/sqrt(minstep
/(3*state_global
->natoms
));
1059 if(stepsize
<minstep
) {
1064 /* Write coordinates if necessary */
1065 do_x
= do_per_step(step
,inputrec
->nstxout
);
1066 do_f
= do_per_step(step
,inputrec
->nstfout
);
1068 write_em_traj(fplog
,cr
,outf
,do_x
,do_f
,NULL
,
1069 top_global
,inputrec
,step
,
1070 s_min
,state_global
,f_global
);
1072 /* Take a step downhill.
1073 * In theory, we should minimize the function along this direction.
1074 * That is quite possible, but it turns out to take 5-10 function evaluations
1075 * for each line. However, we dont really need to find the exact minimum -
1076 * it is much better to start a new CG step in a modified direction as soon
1077 * as we are close to it. This will save a lot of energy evaluations.
1079 * In practice, we just try to take a single step.
1080 * If it worked (i.e. lowered the energy), we increase the stepsize but
1081 * the continue straight to the next CG step without trying to find any minimum.
1082 * If it didn't work (higher energy), there must be a minimum somewhere between
1083 * the old position and the new one.
1085 * Due to the finite numerical accuracy, it turns out that it is a good idea
1086 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1087 * This leads to lower final energies in the tests I've done. / Erik
1089 s_a
->epot
= s_min
->epot
;
1091 c
= a
+ stepsize
; /* reference position along line is zero */
1093 if (DOMAINDECOMP(cr
) && s_min
->s
.ddp_count
< cr
->dd
->ddp_count
) {
1094 em_dd_partition_system(fplog
,step
,cr
,top_global
,inputrec
,
1095 s_min
,top
,mdatoms
,fr
,vsite
,constr
,
1099 /* Take a trial step (new coords in s_c) */
1100 do_em_step(cr
,inputrec
,mdatoms
,fr
->bMolPBC
,s_min
,c
,s_min
->s
.cg_p
,s_c
,
1101 constr
,top
,nrnb
,wcycle
,-1);
1104 /* Calculate energy for the trial step */
1105 evaluate_energy(fplog
,bVerbose
,cr
,
1106 state_global
,top_global
,s_c
,top
,
1107 inputrec
,nrnb
,wcycle
,gstat
,
1108 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
1109 mu_tot
,enerd
,vir
,pres
,-1,FALSE
);
1111 /* Calc derivative along line */
1115 for(i
=mdatoms
->start
; i
<mdatoms
->start
+mdatoms
->homenr
; i
++) {
1116 for(m
=0; m
<DIM
; m
++)
1117 gpc
-= p
[i
][m
]*sf
[i
][m
]; /* f is negative gradient, thus the sign */
1119 /* Sum the gradient along the line across CPUs */
1121 gmx_sumd(1,&gpc
,cr
);
1123 /* This is the max amount of increase in energy we tolerate */
1124 tmp
=sqrt(GMX_REAL_EPS
)*fabs(s_a
->epot
);
1126 /* Accept the step if the energy is lower, or if it is not significantly higher
1127 * and the line derivative is still negative.
1129 if (s_c
->epot
< s_a
->epot
|| (gpc
< 0 && s_c
->epot
< (s_a
->epot
+ tmp
))) {
1131 /* Great, we found a better energy. Increase step for next iteration
1132 * if we are still going down, decrease it otherwise
1135 stepsize
*= 1.618034; /* The golden section */
1137 stepsize
*= 0.618034; /* 1/golden section */
1139 /* New energy is the same or higher. We will have to do some work
1140 * to find a smaller value in the interval. Take smaller step next time!
1143 stepsize
*= 0.618034;
1149 /* OK, if we didn't find a lower value we will have to locate one now - there must
1150 * be one in the interval [a=0,c].
1151 * The same thing is valid here, though: Don't spend dozens of iterations to find
1152 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1153 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1155 * I also have a safeguard for potentially really patological functions so we never
1156 * take more than 20 steps before we give up ...
1158 * If we already found a lower value we just skip this step and continue to the update.
1164 /* Select a new trial point.
1165 * If the derivatives at points a & c have different sign we interpolate to zero,
1166 * otherwise just do a bisection.
1169 b
= a
+ gpa
*(a
-c
)/(gpc
-gpa
);
1173 /* safeguard if interpolation close to machine accuracy causes errors:
1174 * never go outside the interval
1179 if (DOMAINDECOMP(cr
) && s_min
->s
.ddp_count
!= cr
->dd
->ddp_count
) {
1180 /* Reload the old state */
1181 em_dd_partition_system(fplog
,-1,cr
,top_global
,inputrec
,
1182 s_min
,top
,mdatoms
,fr
,vsite
,constr
,
1186 /* Take a trial step to this new point - new coords in s_b */
1187 do_em_step(cr
,inputrec
,mdatoms
,fr
->bMolPBC
,s_min
,b
,s_min
->s
.cg_p
,s_b
,
1188 constr
,top
,nrnb
,wcycle
,-1);
1191 /* Calculate energy for the trial step */
1192 evaluate_energy(fplog
,bVerbose
,cr
,
1193 state_global
,top_global
,s_b
,top
,
1194 inputrec
,nrnb
,wcycle
,gstat
,
1195 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
1196 mu_tot
,enerd
,vir
,pres
,-1,FALSE
);
1198 /* p does not change within a step, but since the domain decomposition
1199 * might change, we have to use cg_p of s_b here.
1204 for(i
=mdatoms
->start
; i
<mdatoms
->start
+mdatoms
->homenr
; i
++) {
1205 for(m
=0; m
<DIM
; m
++)
1206 gpb
-= p
[i
][m
]*sf
[i
][m
]; /* f is negative gradient, thus the sign */
1208 /* Sum the gradient along the line across CPUs */
1210 gmx_sumd(1,&gpb
,cr
);
1213 fprintf(debug
,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1214 s_a
->epot
,s_b
->epot
,s_c
->epot
,gpb
);
1216 epot_repl
= s_b
->epot
;
1218 /* Keep one of the intervals based on the value of the derivative at the new point */
1220 /* Replace c endpoint with b */
1221 swap_em_state(s_b
,s_c
);
1225 /* Replace a endpoint with b */
1226 swap_em_state(s_b
,s_a
);
1232 * Stop search as soon as we find a value smaller than the endpoints.
1233 * Never run more than 20 steps, no matter what.
1236 } while ((epot_repl
> s_a
->epot
|| epot_repl
> s_c
->epot
) &&
1239 if (fabs(epot_repl
- s_min
->epot
) < fabs(s_min
->epot
)*GMX_REAL_EPS
||
1241 /* OK. We couldn't find a significantly lower energy.
1242 * If beta==0 this was steepest descent, and then we give up.
1243 * If not, set beta=0 and restart with steepest descent before quitting.
1250 /* Reset memory before giving up */
1256 /* Select min energy state of A & C, put the best in B.
1258 if (s_c
->epot
< s_a
->epot
) {
1260 fprintf(debug
,"CGE: C (%f) is lower than A (%f), moving C to B\n",
1261 s_c
->epot
,s_a
->epot
);
1262 swap_em_state(s_b
,s_c
);
1267 fprintf(debug
,"CGE: A (%f) is lower than C (%f), moving A to B\n",
1268 s_a
->epot
,s_c
->epot
);
1269 swap_em_state(s_b
,s_a
);
1276 fprintf(debug
,"CGE: Found a lower energy %f, moving C to B\n",
1278 swap_em_state(s_b
,s_c
);
1283 /* new search direction */
1284 /* beta = 0 means forget all memory and restart with steepest descents. */
1285 if (nstcg
&& ((step
% nstcg
)==0))
1288 /* s_min->fnorm cannot be zero, because then we would have converged
1292 /* Polak-Ribiere update.
1293 * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1295 beta
= pr_beta(cr
,&inputrec
->opts
,mdatoms
,top_global
,s_min
,s_b
);
1297 /* Limit beta to prevent oscillations */
1298 if (fabs(beta
) > 5.0)
1302 /* update positions */
1303 swap_em_state(s_min
,s_b
);
1306 /* Print it if necessary */
1309 fprintf(stderr
,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1310 step
,s_min
->epot
,s_min
->fnorm
/sqrt(state_global
->natoms
),
1311 s_min
->fmax
,s_min
->a_fmax
+1);
1312 /* Store the new (lower) energies */
1313 upd_mdebin(mdebin
,FALSE
,FALSE
,(double)step
,
1314 mdatoms
->tmass
,enerd
,&s_min
->s
,inputrec
->fepvals
,inputrec
->expandedvals
,s_min
->s
.box
,
1315 NULL
,NULL
,vir
,pres
,NULL
,mu_tot
,constr
);
1317 do_log
= do_per_step(step
,inputrec
->nstlog
);
1318 do_ene
= do_per_step(step
,inputrec
->nstenergy
);
1320 print_ebin_header(fplog
,step
,step
,s_min
->s
.lambda
[efptFEP
]);
1321 print_ebin(outf
->fp_ene
,do_ene
,FALSE
,FALSE
,
1322 do_log
? fplog
: NULL
,step
,step
,eprNORMAL
,
1323 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
1326 /* Stop when the maximum force lies below tolerance.
1327 * If we have reached machine precision, converged is already set to true.
1329 converged
= converged
|| (s_min
->fmax
< inputrec
->em_tol
);
1331 } /* End of the loop */
1334 step
--; /* we never took that last step in this case */
1336 if (s_min
->fmax
> inputrec
->em_tol
)
1340 warn_step(stderr
,inputrec
->em_tol
,step
-1==number_steps
,FALSE
);
1341 warn_step(fplog
,inputrec
->em_tol
,step
-1==number_steps
,FALSE
);
1347 /* If we printed energy and/or logfile last step (which was the last step)
1348 * we don't have to do it again, but otherwise print the final values.
1351 /* Write final value to log since we didn't do anything the last step */
1352 print_ebin_header(fplog
,step
,step
,s_min
->s
.lambda
[efptFEP
]);
1354 if (!do_ene
|| !do_log
) {
1355 /* Write final energy file entries */
1356 print_ebin(outf
->fp_ene
,!do_ene
,FALSE
,FALSE
,
1357 !do_log
? fplog
: NULL
,step
,step
,eprNORMAL
,
1358 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
1362 /* Print some stuff... */
1364 fprintf(stderr
,"\nwriting lowest energy coordinates.\n");
1367 * For accurate normal mode calculation it is imperative that we
1368 * store the last conformation into the full precision binary trajectory.
1370 * However, we should only do it if we did NOT already write this step
1371 * above (which we did if do_x or do_f was true).
1373 do_x
= !do_per_step(step
,inputrec
->nstxout
);
1374 do_f
= (inputrec
->nstfout
> 0 && !do_per_step(step
,inputrec
->nstfout
));
1376 write_em_traj(fplog
,cr
,outf
,do_x
,do_f
,ftp2fn(efSTO
,nfile
,fnm
),
1377 top_global
,inputrec
,step
,
1378 s_min
,state_global
,f_global
);
1380 fnormn
= s_min
->fnorm
/sqrt(state_global
->natoms
);
1383 print_converged(stderr
,CG
,inputrec
->em_tol
,step
,converged
,number_steps
,
1384 s_min
->epot
,s_min
->fmax
,s_min
->a_fmax
,fnormn
);
1385 print_converged(fplog
,CG
,inputrec
->em_tol
,step
,converged
,number_steps
,
1386 s_min
->epot
,s_min
->fmax
,s_min
->a_fmax
,fnormn
);
1388 fprintf(fplog
,"\nPerformed %d energy evaluations in total.\n",neval
);
1391 finish_em(fplog
,cr
,outf
,runtime
,wcycle
);
1393 /* To print the actual number of steps we needed somewhere */
1394 runtime
->nsteps_done
= step
;
1397 } /* That's all folks */
1400 double do_lbfgs(FILE *fplog
,t_commrec
*cr
,
1401 int nfile
,const t_filenm fnm
[],
1402 const output_env_t oenv
, gmx_bool bVerbose
,gmx_bool bCompact
,
1404 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
1406 t_inputrec
*inputrec
,
1407 gmx_mtop_t
*top_global
,t_fcdata
*fcd
,
1410 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
1413 int repl_ex_nst
, int repl_ex_nex
, int repl_ex_seed
,
1414 gmx_membed_t membed
,
1415 real cpt_period
,real max_hours
,
1416 const char *deviceOptions
,
1417 unsigned long Flags
,
1418 gmx_runtime_t
*runtime
)
1420 static const char *LBFGS
="Low-Memory BFGS Minimizer";
1422 gmx_localtop_t
*top
;
1423 gmx_enerdata_t
*enerd
;
1425 gmx_global_stat_t gstat
;
1428 int ncorr
,nmaxcorr
,point
,cp
,neval
,nminstep
;
1429 double stepsize
,gpa
,gpb
,gpc
,tmp
,minstep
;
1430 real
*rho
,*alpha
,*ff
,*xx
,*p
,*s
,*lastx
,*lastf
,**dx
,**dg
;
1431 real
*xa
,*xb
,*xc
,*fa
,*fb
,*fc
,*xtmp
,*ftmp
;
1432 real a
,b
,c
,maxdelta
,delta
;
1433 real diag
,Epot0
,Epot
,EpotA
,EpotB
,EpotC
;
1434 real dgdx
,dgdg
,sq
,yr
,beta
;
1436 gmx_bool converged
,first
;
1439 gmx_bool do_log
,do_ene
,do_x
,do_f
,foundlower
,*frozen
;
1441 int start
,end
,number_steps
;
1443 int i
,k
,m
,n
,nfmax
,gf
,step
;
1449 gmx_fatal(FARGS
,"Cannot do parallel L-BFGS Minimization - yet.\n");
1451 n
= 3*state
->natoms
;
1452 nmaxcorr
= inputrec
->nbfgscorr
;
1454 /* Allocate memory */
1455 /* Use pointers to real so we dont have to loop over both atoms and
1456 * dimensions all the time...
1457 * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1458 * that point to the same memory.
1472 snew(alpha
,nmaxcorr
);
1475 for(i
=0;i
<nmaxcorr
;i
++)
1479 for(i
=0;i
<nmaxcorr
;i
++)
1486 init_em(fplog
,LBFGS
,cr
,inputrec
,
1487 state
,top_global
,&ems
,&top
,&f
,&f_global
,
1488 nrnb
,mu_tot
,fr
,&enerd
,&graph
,mdatoms
,&gstat
,vsite
,constr
,
1489 nfile
,fnm
,&outf
,&mdebin
);
1490 /* Do_lbfgs is not completely updated like do_steep and do_cg,
1491 * so we free some memory again.
1496 xx
= (real
*)state
->x
;
1499 start
= mdatoms
->start
;
1500 end
= mdatoms
->homenr
+ start
;
1502 /* Print to log file */
1503 print_em_start(fplog
,cr
,runtime
,wcycle
,LBFGS
);
1505 do_log
= do_ene
= do_x
= do_f
= TRUE
;
1507 /* Max number of steps */
1508 number_steps
=inputrec
->nsteps
;
1510 /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1512 for(i
=start
; i
<end
; i
++) {
1513 if (mdatoms
->cFREEZE
)
1514 gf
= mdatoms
->cFREEZE
[i
];
1515 for(m
=0; m
<DIM
; m
++)
1516 frozen
[3*i
+m
]=inputrec
->opts
.nFreeze
[gf
][m
];
1519 sp_header(stderr
,LBFGS
,inputrec
->em_tol
,number_steps
);
1521 sp_header(fplog
,LBFGS
,inputrec
->em_tol
,number_steps
);
1524 construct_vsites(fplog
,vsite
,state
->x
,nrnb
,1,NULL
,
1525 top
->idef
.iparams
,top
->idef
.il
,
1526 fr
->ePBC
,fr
->bMolPBC
,graph
,cr
,state
->box
);
1528 /* Call the force routine and some auxiliary (neighboursearching etc.) */
1529 /* do_force always puts the charge groups in the box and shifts again
1530 * We do not unshift, so molecules are always whole
1535 evaluate_energy(fplog
,bVerbose
,cr
,
1536 state
,top_global
,&ems
,top
,
1537 inputrec
,nrnb
,wcycle
,gstat
,
1538 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
1539 mu_tot
,enerd
,vir
,pres
,-1,TRUE
);
1543 /* Copy stuff to the energy bin for easy printing etc. */
1544 upd_mdebin(mdebin
,FALSE
,FALSE
,(double)step
,
1545 mdatoms
->tmass
,enerd
,state
,inputrec
->fepvals
,inputrec
->expandedvals
,state
->box
,
1546 NULL
,NULL
,vir
,pres
,NULL
,mu_tot
,constr
);
1548 print_ebin_header(fplog
,step
,step
,state
->lambda
[efptFEP
]);
1549 print_ebin(outf
->fp_ene
,TRUE
,FALSE
,FALSE
,fplog
,step
,step
,eprNORMAL
,
1550 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
1554 /* This is the starting energy */
1555 Epot
= enerd
->term
[F_EPOT
];
1561 /* Set the initial step.
1562 * since it will be multiplied by the non-normalized search direction
1563 * vector (force vector the first time), we scale it by the
1564 * norm of the force.
1568 fprintf(stderr
,"Using %d BFGS correction steps.\n\n",nmaxcorr
);
1569 fprintf(stderr
," F-max = %12.5e on atom %d\n",fmax
,nfmax
+1);
1570 fprintf(stderr
," F-Norm = %12.5e\n",fnorm
/sqrt(state
->natoms
));
1571 fprintf(stderr
,"\n");
1572 /* and copy to the log file too... */
1573 fprintf(fplog
,"Using %d BFGS correction steps.\n\n",nmaxcorr
);
1574 fprintf(fplog
," F-max = %12.5e on atom %d\n",fmax
,nfmax
+1);
1575 fprintf(fplog
," F-Norm = %12.5e\n",fnorm
/sqrt(state
->natoms
));
1576 fprintf(fplog
,"\n");
1582 dx
[point
][i
] = ff
[i
]; /* Initial search direction */
1586 stepsize
= 1.0/fnorm
;
1589 /* Start the loop over BFGS steps.
1590 * Each successful step is counted, and we continue until
1591 * we either converge or reach the max number of steps.
1596 /* Set the gradient from the force */
1598 for(step
=0; (number_steps
<0 || (number_steps
>=0 && step
<=number_steps
)) && !converged
; step
++) {
1600 /* Write coordinates if necessary */
1601 do_x
= do_per_step(step
,inputrec
->nstxout
);
1602 do_f
= do_per_step(step
,inputrec
->nstfout
);
1607 mdof_flags
|= MDOF_X
;
1612 mdof_flags
|= MDOF_F
;
1615 write_traj(fplog
,cr
,outf
,mdof_flags
,
1616 top_global
,step
,(real
)step
,state
,state
,f
,f
,NULL
,NULL
);
1618 /* Do the linesearching in the direction dx[point][0..(n-1)] */
1620 /* pointer to current direction - point=0 first time here */
1623 /* calculate line gradient */
1624 for(gpa
=0,i
=0;i
<n
;i
++)
1627 /* Calculate minimum allowed stepsize, before the average (norm)
1628 * relative change in coordinate is smaller than precision
1630 for(minstep
=0,i
=0;i
<n
;i
++) {
1637 minstep
= GMX_REAL_EPS
/sqrt(minstep
/n
);
1639 if(stepsize
<minstep
) {
1644 /* Store old forces and coordinates */
1656 /* Take a step downhill.
1657 * In theory, we should minimize the function along this direction.
1658 * That is quite possible, but it turns out to take 5-10 function evaluations
1659 * for each line. However, we dont really need to find the exact minimum -
1660 * it is much better to start a new BFGS step in a modified direction as soon
1661 * as we are close to it. This will save a lot of energy evaluations.
1663 * In practice, we just try to take a single step.
1664 * If it worked (i.e. lowered the energy), we increase the stepsize but
1665 * the continue straight to the next BFGS step without trying to find any minimum.
1666 * If it didn't work (higher energy), there must be a minimum somewhere between
1667 * the old position and the new one.
1669 * Due to the finite numerical accuracy, it turns out that it is a good idea
1670 * to even accept a SMALL increase in energy, if the derivative is still downhill.
1671 * This leads to lower final energies in the tests I've done. / Erik
1676 c
= a
+ stepsize
; /* reference position along line is zero */
1678 /* Check stepsize first. We do not allow displacements
1679 * larger than emstep.
1689 if(maxdelta
>inputrec
->em_stepsize
)
1691 } while(maxdelta
>inputrec
->em_stepsize
);
1693 /* Take a trial step */
1695 xc
[i
] = lastx
[i
] + c
*s
[i
];
1698 /* Calculate energy for the trial step */
1699 ems
.s
.x
= (rvec
*)xc
;
1701 evaluate_energy(fplog
,bVerbose
,cr
,
1702 state
,top_global
,&ems
,top
,
1703 inputrec
,nrnb
,wcycle
,gstat
,
1704 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
1705 mu_tot
,enerd
,vir
,pres
,step
,FALSE
);
1708 /* Calc derivative along line */
1709 for(gpc
=0,i
=0; i
<n
; i
++) {
1710 gpc
-= s
[i
]*fc
[i
]; /* f is negative gradient, thus the sign */
1712 /* Sum the gradient along the line across CPUs */
1714 gmx_sumd(1,&gpc
,cr
);
1716 /* This is the max amount of increase in energy we tolerate */
1717 tmp
=sqrt(GMX_REAL_EPS
)*fabs(EpotA
);
1719 /* Accept the step if the energy is lower, or if it is not significantly higher
1720 * and the line derivative is still negative.
1722 if(EpotC
<EpotA
|| (gpc
<0 && EpotC
<(EpotA
+tmp
))) {
1724 /* Great, we found a better energy. Increase step for next iteration
1725 * if we are still going down, decrease it otherwise
1728 stepsize
*= 1.618034; /* The golden section */
1730 stepsize
*= 0.618034; /* 1/golden section */
1732 /* New energy is the same or higher. We will have to do some work
1733 * to find a smaller value in the interval. Take smaller step next time!
1736 stepsize
*= 0.618034;
1739 /* OK, if we didn't find a lower value we will have to locate one now - there must
1740 * be one in the interval [a=0,c].
1741 * The same thing is valid here, though: Don't spend dozens of iterations to find
1742 * the line minimum. We try to interpolate based on the derivative at the endpoints,
1743 * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1745 * I also have a safeguard for potentially really patological functions so we never
1746 * take more than 20 steps before we give up ...
1748 * If we already found a lower value we just skip this step and continue to the update.
1755 /* Select a new trial point.
1756 * If the derivatives at points a & c have different sign we interpolate to zero,
1757 * otherwise just do a bisection.
1761 b
= a
+ gpa
*(a
-c
)/(gpc
-gpa
);
1765 /* safeguard if interpolation close to machine accuracy causes errors:
1766 * never go outside the interval
1771 /* Take a trial step */
1773 xb
[i
] = lastx
[i
] + b
*s
[i
];
1776 /* Calculate energy for the trial step */
1777 ems
.s
.x
= (rvec
*)xb
;
1779 evaluate_energy(fplog
,bVerbose
,cr
,
1780 state
,top_global
,&ems
,top
,
1781 inputrec
,nrnb
,wcycle
,gstat
,
1782 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
1783 mu_tot
,enerd
,vir
,pres
,step
,FALSE
);
1788 for(gpb
=0,i
=0; i
<n
; i
++)
1789 gpb
-= s
[i
]*fb
[i
]; /* f is negative gradient, thus the sign */
1791 /* Sum the gradient along the line across CPUs */
1793 gmx_sumd(1,&gpb
,cr
);
1795 /* Keep one of the intervals based on the value of the derivative at the new point */
1797 /* Replace c endpoint with b */
1801 /* swap coord pointers b/c */
1809 /* Replace a endpoint with b */
1813 /* swap coord pointers a/b */
1823 * Stop search as soon as we find a value smaller than the endpoints,
1824 * or if the tolerance is below machine precision.
1825 * Never run more than 20 steps, no matter what.
1828 } while((EpotB
>EpotA
|| EpotB
>EpotC
) && (nminstep
<20));
1830 if(fabs(EpotB
-Epot0
)<GMX_REAL_EPS
|| nminstep
>=20) {
1831 /* OK. We couldn't find a significantly lower energy.
1832 * If ncorr==0 this was steepest descent, and then we give up.
1833 * If not, reset memory to restart as steepest descent before quitting.
1842 /* Search in gradient direction */
1845 /* Reset stepsize */
1846 stepsize
= 1.0/fnorm
;
1851 /* Select min energy state of A & C, put the best in xx/ff/Epot
1882 /* Update the memory information, and calculate a new
1883 * approximation of the inverse hessian
1886 /* Have new data in Epot, xx, ff */
1891 dg
[point
][i
]=lastf
[i
]-ff
[i
];
1892 dx
[point
][i
]*=stepsize
;
1898 dgdg
+=dg
[point
][i
]*dg
[point
][i
];
1899 dgdx
+=dg
[point
][i
]*dx
[point
][i
];
1904 rho
[point
]=1.0/dgdx
;
1916 /* Recursive update. First go back over the memory points */
1917 for(k
=0;k
<ncorr
;k
++) {
1926 alpha
[cp
]=rho
[cp
]*sq
;
1929 p
[i
] -= alpha
[cp
]*dg
[cp
][i
];
1935 /* And then go forward again */
1936 for(k
=0;k
<ncorr
;k
++) {
1939 yr
+= p
[i
]*dg
[cp
][i
];
1942 beta
= alpha
[cp
]-beta
;
1945 p
[i
] += beta
*dx
[cp
][i
];
1954 dx
[point
][i
] = p
[i
];
1960 /* Test whether the convergence criterion is met */
1961 get_f_norm_max(cr
,&(inputrec
->opts
),mdatoms
,f
,&fnorm
,&fmax
,&nfmax
);
1963 /* Print it if necessary */
1966 fprintf(stderr
,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1967 step
,Epot
,fnorm
/sqrt(state
->natoms
),fmax
,nfmax
+1);
1968 /* Store the new (lower) energies */
1969 upd_mdebin(mdebin
,FALSE
,FALSE
,(double)step
,
1970 mdatoms
->tmass
,enerd
,state
,inputrec
->fepvals
,inputrec
->expandedvals
,state
->box
,
1971 NULL
,NULL
,vir
,pres
,NULL
,mu_tot
,constr
);
1972 do_log
= do_per_step(step
,inputrec
->nstlog
);
1973 do_ene
= do_per_step(step
,inputrec
->nstenergy
);
1975 print_ebin_header(fplog
,step
,step
,state
->lambda
[efptFEP
]);
1976 print_ebin(outf
->fp_ene
,do_ene
,FALSE
,FALSE
,
1977 do_log
? fplog
: NULL
,step
,step
,eprNORMAL
,
1978 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
1981 /* Stop when the maximum force lies below tolerance.
1982 * If we have reached machine precision, converged is already set to true.
1985 converged
= converged
|| (fmax
< inputrec
->em_tol
);
1987 } /* End of the loop */
1990 step
--; /* we never took that last step in this case */
1992 if(fmax
>inputrec
->em_tol
)
1996 warn_step(stderr
,inputrec
->em_tol
,step
-1==number_steps
,FALSE
);
1997 warn_step(fplog
,inputrec
->em_tol
,step
-1==number_steps
,FALSE
);
2002 /* If we printed energy and/or logfile last step (which was the last step)
2003 * we don't have to do it again, but otherwise print the final values.
2005 if(!do_log
) /* Write final value to log since we didn't do anythin last step */
2006 print_ebin_header(fplog
,step
,step
,state
->lambda
[efptFEP
]);
2007 if(!do_ene
|| !do_log
) /* Write final energy file entries */
2008 print_ebin(outf
->fp_ene
,!do_ene
,FALSE
,FALSE
,
2009 !do_log
? fplog
: NULL
,step
,step
,eprNORMAL
,
2010 TRUE
,mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
2012 /* Print some stuff... */
2014 fprintf(stderr
,"\nwriting lowest energy coordinates.\n");
2017 * For accurate normal mode calculation it is imperative that we
2018 * store the last conformation into the full precision binary trajectory.
2020 * However, we should only do it if we did NOT already write this step
2021 * above (which we did if do_x or do_f was true).
2023 do_x
= !do_per_step(step
,inputrec
->nstxout
);
2024 do_f
= !do_per_step(step
,inputrec
->nstfout
);
2025 write_em_traj(fplog
,cr
,outf
,do_x
,do_f
,ftp2fn(efSTO
,nfile
,fnm
),
2026 top_global
,inputrec
,step
,
2030 print_converged(stderr
,LBFGS
,inputrec
->em_tol
,step
,converged
,
2031 number_steps
,Epot
,fmax
,nfmax
,fnorm
/sqrt(state
->natoms
));
2032 print_converged(fplog
,LBFGS
,inputrec
->em_tol
,step
,converged
,
2033 number_steps
,Epot
,fmax
,nfmax
,fnorm
/sqrt(state
->natoms
));
2035 fprintf(fplog
,"\nPerformed %d energy evaluations in total.\n",neval
);
2038 finish_em(fplog
,cr
,outf
,runtime
,wcycle
);
2040 /* To print the actual number of steps we needed somewhere */
2041 runtime
->nsteps_done
= step
;
2044 } /* That's all folks */
2047 double do_steep(FILE *fplog
,t_commrec
*cr
,
2048 int nfile
, const t_filenm fnm
[],
2049 const output_env_t oenv
, gmx_bool bVerbose
,gmx_bool bCompact
,
2051 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
2053 t_inputrec
*inputrec
,
2054 gmx_mtop_t
*top_global
,t_fcdata
*fcd
,
2055 t_state
*state_global
,
2057 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
2060 int repl_ex_nst
, int repl_ex_nex
, int repl_ex_seed
,
2061 gmx_membed_t membed
,
2062 real cpt_period
,real max_hours
,
2063 const char *deviceOptions
,
2064 unsigned long Flags
,
2065 gmx_runtime_t
*runtime
)
2067 const char *SD
="Steepest Descents";
2068 em_state_t
*s_min
,*s_try
;
2070 gmx_localtop_t
*top
;
2071 gmx_enerdata_t
*enerd
;
2073 gmx_global_stat_t gstat
;
2075 real stepsize
,constepsize
;
2076 real ustep
,dvdlambda
,fnormn
;
2079 gmx_bool bDone
,bAbort
,do_x
,do_f
;
2084 int steps_accepted
=0;
2088 s_min
= init_em_state();
2089 s_try
= init_em_state();
2091 /* Init em and store the local state in s_try */
2092 init_em(fplog
,SD
,cr
,inputrec
,
2093 state_global
,top_global
,s_try
,&top
,&f
,&f_global
,
2094 nrnb
,mu_tot
,fr
,&enerd
,&graph
,mdatoms
,&gstat
,vsite
,constr
,
2095 nfile
,fnm
,&outf
,&mdebin
);
2097 /* Print to log file */
2098 print_em_start(fplog
,cr
,runtime
,wcycle
,SD
);
2100 /* Set variables for stepsize (in nm). This is the largest
2101 * step that we are going to make in any direction.
2103 ustep
= inputrec
->em_stepsize
;
2106 /* Max number of steps */
2107 nsteps
= inputrec
->nsteps
;
2110 /* Print to the screen */
2111 sp_header(stderr
,SD
,inputrec
->em_tol
,nsteps
);
2113 sp_header(fplog
,SD
,inputrec
->em_tol
,nsteps
);
2115 /**** HERE STARTS THE LOOP ****
2116 * count is the counter for the number of steps
2117 * bDone will be TRUE when the minimization has converged
2118 * bAbort will be TRUE when nsteps steps have been performed or when
2119 * the stepsize becomes smaller than is reasonable for machine precision
2124 while( !bDone
&& !bAbort
) {
2125 bAbort
= (nsteps
>= 0) && (count
== nsteps
);
2127 /* set new coordinates, except for first step */
2129 do_em_step(cr
,inputrec
,mdatoms
,fr
->bMolPBC
,
2130 s_min
,stepsize
,s_min
->f
,s_try
,
2131 constr
,top
,nrnb
,wcycle
,count
);
2134 evaluate_energy(fplog
,bVerbose
,cr
,
2135 state_global
,top_global
,s_try
,top
,
2136 inputrec
,nrnb
,wcycle
,gstat
,
2137 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
2138 mu_tot
,enerd
,vir
,pres
,count
,count
==0);
2141 print_ebin_header(fplog
,count
,count
,s_try
->s
.lambda
[efptFEP
]);
2144 s_min
->epot
= s_try
->epot
+ 1;
2146 /* Print it if necessary */
2149 fprintf(stderr
,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2150 count
,ustep
,s_try
->epot
,s_try
->fmax
,s_try
->a_fmax
+1,
2151 (s_try
->epot
< s_min
->epot
) ? '\n' : '\r');
2154 if (s_try
->epot
< s_min
->epot
) {
2155 /* Store the new (lower) energies */
2156 upd_mdebin(mdebin
,FALSE
,FALSE
,(double)count
,
2157 mdatoms
->tmass
,enerd
,&s_try
->s
,inputrec
->fepvals
,inputrec
->expandedvals
,
2158 s_try
->s
.box
, NULL
,NULL
,vir
,pres
,NULL
,mu_tot
,constr
);
2159 print_ebin(outf
->fp_ene
,TRUE
,
2160 do_per_step(steps_accepted
,inputrec
->nstdisreout
),
2161 do_per_step(steps_accepted
,inputrec
->nstorireout
),
2162 fplog
,count
,count
,eprNORMAL
,TRUE
,
2163 mdebin
,fcd
,&(top_global
->groups
),&(inputrec
->opts
));
2168 /* Now if the new energy is smaller than the previous...
2169 * or if this is the first step!
2170 * or if we did random steps!
2173 if ( (count
==0) || (s_try
->epot
< s_min
->epot
) ) {
2176 /* Test whether the convergence criterion is met... */
2177 bDone
= (s_try
->fmax
< inputrec
->em_tol
);
2179 /* Copy the arrays for force, positions and energy */
2180 /* The 'Min' array always holds the coords and forces of the minimal
2182 swap_em_state(s_min
,s_try
);
2186 /* Write to trn, if necessary */
2187 do_x
= do_per_step(steps_accepted
,inputrec
->nstxout
);
2188 do_f
= do_per_step(steps_accepted
,inputrec
->nstfout
);
2189 write_em_traj(fplog
,cr
,outf
,do_x
,do_f
,NULL
,
2190 top_global
,inputrec
,count
,
2191 s_min
,state_global
,f_global
);
2194 /* If energy is not smaller make the step smaller... */
2197 if (DOMAINDECOMP(cr
) && s_min
->s
.ddp_count
!= cr
->dd
->ddp_count
) {
2198 /* Reload the old state */
2199 em_dd_partition_system(fplog
,count
,cr
,top_global
,inputrec
,
2200 s_min
,top
,mdatoms
,fr
,vsite
,constr
,
2205 /* Determine new step */
2206 stepsize
= ustep
/s_min
->fmax
;
2208 /* Check if stepsize is too small, with 1 nm as a characteristic length */
2210 if (count
== nsteps
|| ustep
< 1e-12)
2212 if (count
== nsteps
|| ustep
< 1e-6)
2217 warn_step(stderr
,inputrec
->em_tol
,count
==nsteps
,constr
!=NULL
);
2218 warn_step(fplog
,inputrec
->em_tol
,count
==nsteps
,constr
!=NULL
);
2224 } /* End of the loop */
2226 /* Print some shit... */
2228 fprintf(stderr
,"\nwriting lowest energy coordinates.\n");
2229 write_em_traj(fplog
,cr
,outf
,TRUE
,inputrec
->nstfout
,ftp2fn(efSTO
,nfile
,fnm
),
2230 top_global
,inputrec
,count
,
2231 s_min
,state_global
,f_global
);
2233 fnormn
= s_min
->fnorm
/sqrt(state_global
->natoms
);
2236 print_converged(stderr
,SD
,inputrec
->em_tol
,count
,bDone
,nsteps
,
2237 s_min
->epot
,s_min
->fmax
,s_min
->a_fmax
,fnormn
);
2238 print_converged(fplog
,SD
,inputrec
->em_tol
,count
,bDone
,nsteps
,
2239 s_min
->epot
,s_min
->fmax
,s_min
->a_fmax
,fnormn
);
2242 finish_em(fplog
,cr
,outf
,runtime
,wcycle
);
2244 /* To print the actual number of steps we needed somewhere */
2245 inputrec
->nsteps
=count
;
2247 runtime
->nsteps_done
= count
;
2250 } /* That's all folks */
2253 double do_nm(FILE *fplog
,t_commrec
*cr
,
2254 int nfile
,const t_filenm fnm
[],
2255 const output_env_t oenv
, gmx_bool bVerbose
,gmx_bool bCompact
,
2257 gmx_vsite_t
*vsite
,gmx_constr_t constr
,
2259 t_inputrec
*inputrec
,
2260 gmx_mtop_t
*top_global
,t_fcdata
*fcd
,
2261 t_state
*state_global
,
2263 t_nrnb
*nrnb
,gmx_wallcycle_t wcycle
,
2266 int repl_ex_nst
, int repl_ex_nex
, int repl_ex_seed
,
2267 gmx_membed_t membed
,
2268 real cpt_period
,real max_hours
,
2269 const char *deviceOptions
,
2270 unsigned long Flags
,
2271 gmx_runtime_t
*runtime
)
2273 const char *NM
= "Normal Mode Analysis";
2278 gmx_localtop_t
*top
;
2279 gmx_enerdata_t
*enerd
;
2281 gmx_global_stat_t gstat
;
2283 real t
,t0
,lambda
,lam0
;
2288 gmx_bool bSparse
; /* use sparse matrix storage format */
2290 gmx_sparsematrix_t
* sparse_matrix
= NULL
;
2291 real
* full_matrix
= NULL
;
2292 em_state_t
* state_work
;
2294 /* added with respect to mdrun */
2296 real der_range
=10.0*sqrt(GMX_REAL_EPS
);
2302 gmx_fatal(FARGS
,"Constraints present with Normal Mode Analysis, this combination is not supported");
2305 state_work
= init_em_state();
2307 /* Init em and store the local state in state_minimum */
2308 init_em(fplog
,NM
,cr
,inputrec
,
2309 state_global
,top_global
,state_work
,&top
,
2311 nrnb
,mu_tot
,fr
,&enerd
,&graph
,mdatoms
,&gstat
,vsite
,constr
,
2312 nfile
,fnm
,&outf
,NULL
);
2314 natoms
= top_global
->natoms
;
2322 "NOTE: This version of Gromacs has been compiled in single precision,\n"
2323 " which MIGHT not be accurate enough for normal mode analysis.\n"
2324 " Gromacs now uses sparse matrix storage, so the memory requirements\n"
2325 " are fairly modest even if you recompile in double precision.\n\n");
2329 /* Check if we can/should use sparse storage format.
2331 * Sparse format is only useful when the Hessian itself is sparse, which it
2332 * will be when we use a cutoff.
2333 * For small systems (n<1000) it is easier to always use full matrix format, though.
2335 if(EEL_FULL(fr
->eeltype
) || fr
->rlist
==0.0)
2337 fprintf(stderr
,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
2340 else if(top_global
->natoms
< 1000)
2342 fprintf(stderr
,"Small system size (N=%d), using full Hessian format.\n",top_global
->natoms
);
2347 fprintf(stderr
,"Using compressed symmetric sparse Hessian format.\n");
2351 sz
= DIM
*top_global
->natoms
;
2353 fprintf(stderr
,"Allocating Hessian memory...\n\n");
2357 sparse_matrix
=gmx_sparsematrix_init(sz
);
2358 sparse_matrix
->compressed_symmetric
= TRUE
;
2362 snew(full_matrix
,sz
*sz
);
2365 /* Initial values */
2366 t0
= inputrec
->init_t
;
2367 lam0
= inputrec
->fepvals
->init_lambda
;
2375 /* Write start time and temperature */
2376 print_em_start(fplog
,cr
,runtime
,wcycle
,NM
);
2378 /* fudge nr of steps to nr of atoms */
2379 inputrec
->nsteps
= natoms
*2;
2383 fprintf(stderr
,"starting normal mode calculation '%s'\n%d steps.\n\n",
2384 *(top_global
->name
),(int)inputrec
->nsteps
);
2387 nnodes
= cr
->nnodes
;
2389 /* Make evaluate_energy do a single node force calculation */
2391 evaluate_energy(fplog
,bVerbose
,cr
,
2392 state_global
,top_global
,state_work
,top
,
2393 inputrec
,nrnb
,wcycle
,gstat
,
2394 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
2395 mu_tot
,enerd
,vir
,pres
,-1,TRUE
);
2396 cr
->nnodes
= nnodes
;
2398 /* if forces are not small, warn user */
2399 get_state_f_norm_max(cr
,&(inputrec
->opts
),mdatoms
,state_work
);
2403 fprintf(stderr
,"Maximum force:%12.5e\n",state_work
->fmax
);
2404 if (state_work
->fmax
> 1.0e-3)
2406 fprintf(stderr
,"Maximum force probably not small enough to");
2407 fprintf(stderr
," ensure that you are in an \nenergy well. ");
2408 fprintf(stderr
,"Be aware that negative eigenvalues may occur");
2409 fprintf(stderr
," when the\nresulting matrix is diagonalized.\n");
2413 /***********************************************************
2415 * Loop over all pairs in matrix
2417 * do_force called twice. Once with positive and
2418 * once with negative displacement
2420 ************************************************************/
2422 /* Steps are divided one by one over the nodes */
2423 for(atom
=cr
->nodeid
; atom
<natoms
; atom
+=nnodes
)
2426 for (d
=0; d
<DIM
; d
++)
2428 x_min
= state_work
->s
.x
[atom
][d
];
2430 state_work
->s
.x
[atom
][d
] = x_min
- der_range
;
2432 /* Make evaluate_energy do a single node force calculation */
2434 evaluate_energy(fplog
,bVerbose
,cr
,
2435 state_global
,top_global
,state_work
,top
,
2436 inputrec
,nrnb
,wcycle
,gstat
,
2437 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
2438 mu_tot
,enerd
,vir
,pres
,atom
*2,FALSE
);
2440 for(i
=0; i
<natoms
; i
++)
2442 copy_rvec(state_work
->f
[i
], fneg
[i
]);
2445 state_work
->s
.x
[atom
][d
] = x_min
+ der_range
;
2447 evaluate_energy(fplog
,bVerbose
,cr
,
2448 state_global
,top_global
,state_work
,top
,
2449 inputrec
,nrnb
,wcycle
,gstat
,
2450 vsite
,constr
,fcd
,graph
,mdatoms
,fr
,
2451 mu_tot
,enerd
,vir
,pres
,atom
*2+1,FALSE
);
2452 cr
->nnodes
= nnodes
;
2454 /* x is restored to original */
2455 state_work
->s
.x
[atom
][d
] = x_min
;
2457 for(j
=0; j
<natoms
; j
++)
2459 for (k
=0; (k
<DIM
); k
++)
2462 -(state_work
->f
[j
][k
] - fneg
[j
][k
])/(2*der_range
);
2470 #define mpi_type MPI_DOUBLE
2472 #define mpi_type MPI_FLOAT
2474 MPI_Send(dfdx
[0],natoms
*DIM
,mpi_type
,MASTERNODE(cr
),cr
->nodeid
,
2475 cr
->mpi_comm_mygroup
);
2480 for(node
=0; (node
<nnodes
&& atom
+node
<natoms
); node
++)
2486 MPI_Recv(dfdx
[0],natoms
*DIM
,mpi_type
,node
,node
,
2487 cr
->mpi_comm_mygroup
,&stat
);
2492 row
= (atom
+ node
)*DIM
+ d
;
2494 for(j
=0; j
<natoms
; j
++)
2496 for(k
=0; k
<DIM
; k
++)
2502 if (col
>= row
&& dfdx
[j
][k
] != 0.0)
2504 gmx_sparsematrix_increment_value(sparse_matrix
,
2505 row
,col
,dfdx
[j
][k
]);
2510 full_matrix
[row
*sz
+col
] = dfdx
[j
][k
];
2517 if (bVerbose
&& fplog
)
2522 /* write progress */
2523 if (MASTER(cr
) && bVerbose
)
2525 fprintf(stderr
,"\rFinished step %d out of %d",
2526 min(atom
+nnodes
,natoms
),natoms
);
2533 fprintf(stderr
,"\n\nWriting Hessian...\n");
2534 gmx_mtxio_write(ftp2fn(efMTX
,nfile
,fnm
),sz
,sz
,full_matrix
,sparse_matrix
);
2537 finish_em(fplog
,cr
,outf
,runtime
,wcycle
);
2539 runtime
->nsteps_done
= natoms
*2;