1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
52 #include "gmx_fatal.h"
69 /* calculated values */
71 barsim_t
*a
, *b
; /* the simulation data */
73 double lambda_a
, lambda_b
; /* the lambda values at a and b */
75 double dg
; /* the free energy difference */
76 double dg_err
; /* the free energy difference */
78 double sa
; /* relative entropy of b in state a */
79 double sa_err
; /* error in sa */
80 double sb
; /* relative entropy of a in state b */
81 double sb_err
; /* error in sb */
83 double dg_stddev
; /* expected dg stddev per sample */
84 double dg_stddev_err
; /* error in dg_stddev */
88 static double calc_bar_sum(int n
,double *W
,double Wfac
,double sbMmDG
)
97 sum
+= 1./(1. + exp(Wfac
*W
[i
] + sbMmDG
));
103 static double calc_bar_lowlevel(int n1
,double *W1
,int n2
,double *W2
,
104 double delta_lambda
,double temp
,double tol
)
106 double kT
,beta
,beta_dl
,M
;
109 double Wfac1
,Wfac2
,Wmin
,Wmax
;
110 double DG0
,DG1
,DG2
,dDG1
;
116 M
= log((double)n1
/(double)n2
);
118 if (delta_lambda
== 0)
125 Wfac1
= beta
*delta_lambda
;
126 Wfac2
= -beta
*delta_lambda
;
131 /* We print the output both in kT and kJ/mol.
132 * Here we determine DG in kT, so when beta < 1
133 * the precision has to be increased.
138 /* Calculate minimum and maximum work to give an initial estimate of
139 * delta G as their average.
145 Wmin
= min(Wmin
,W1
[i
]*Wfac1
);
146 Wmax
= max(Wmax
,W1
[i
]*Wfac1
);
150 Wmin
= min(Wmin
,-W2
[i
]*Wfac2
);
151 Wmax
= max(Wmax
,-W2
[i
]*Wfac2
);
156 /* For the comparison we can use twice the tolerance. */
159 fprintf(debug
,"DG %9.5f %9.5f\n",DG0
,DG2
);
161 while (DG2
- DG0
> 2*tol
)
163 DG1
= 0.5*(DG0
+ DG2
);
165 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
168 calc_bar_sum(n1
,W1
,Wfac1
, (M
-DG1
)) -
169 calc_bar_sum(n2
,W2
,Wfac2
,-(M
-DG1
));
181 fprintf(debug
,"DG %9.5f %9.5f\n",DG0
,DG2
);
185 return 0.5*(DG0
+ DG2
);
188 static void calc_rel_entropy(int n1
,double *W1
,int n2
,double *W2
,
189 double delta_lambda
, double temp
,
190 double dg
, double *sa
, double *sb
)
201 /* to ensure the work values are the same as during the delta_G */
202 if (delta_lambda
== 0)
209 Wfac1
= beta
*delta_lambda
;
210 Wfac2
= -beta
*delta_lambda
;
213 /* first calculate the average work in both directions */
225 /* then calculate the relative entropies */
230 static void calc_dg_stddev(int n1
, double *W1
, int n2
, double *W2
,
231 double delta_lambda
, double temp
,
232 double dg
, double *stddev
)
240 double nn1
=n1
; /* this makes the fraction in the *stddev eq below nicer */
246 /* to ensure the work values are the same as during the delta_G */
247 if (delta_lambda
== 0)
254 Wfac1
= beta
*delta_lambda
;
255 Wfac2
= -beta
*delta_lambda
;
260 /* calculate average in both directions */
263 sigmafact
+= 1./(2. + 2.*cosh((M
+ Wfac1
*W1
[i
] - dg
)));
267 sigmafact
+= 1./(2. + 2.*cosh((M
- Wfac2
*W2
[i
] - dg
)));
269 sigmafact
/= (n1
+ n2
);
272 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
273 *stddev
= sqrt(((1./sigmafact
) - ( (nn1
+nn2
)/nn1
+ (nn1
+nn2
)/nn2
)));
280 static void get_begin_end(barsim_t
*ba
,real begin
,real end
,int *b
,int *e
)
285 while (i
+ 1 < ba
->np
&& ba
->t
[i
] < begin
)
291 gmx_fatal(FARGS
,"Some data end before the start time %g",begin
);
298 while (i
> *b
&& ba
->t
[i
-1] > end
)
306 static int get_lam_set(barsim_t
*ba
,double lambda
)
311 while (i
< ba
->nset
&&
312 !gmx_within_tol(ba
->lambda
[i
],lambda
,10*GMX_REAL_EPS
))
318 gmx_fatal(FARGS
,"Could not find a set for lambda = %g in the file '%s' of lambda = %g",lambda
,ba
->filename
,ba
->lambda
[0]);
324 static void calc_bar(barsim_t
*ba1
,barsim_t
*ba2
,bool bUsedhdl
,
325 double tol
, int npee_min
,int npee_max
,
326 barres_t
*br
, bool *bEE
, double *partsum
)
328 int np1
,np2
,s1
,s2
,npee
,p
;
330 double dg_sig2
,sa_sig2
,sb_sig2
,stddev_sig2
; /* intermediate variance values
331 for calculated quantities */
334 br
->lambda_a
= ba1
->lambda
[0];
335 br
->lambda_b
= ba2
->lambda
[0];
342 delta_lambda
= ba2
->lambda
[0] - ba1
->lambda
[0];
346 s1
= get_lam_set(ba1
,ba2
->lambda
[0]);
347 s2
= get_lam_set(ba2
,ba1
->lambda
[0]);
352 np1
= ba1
->end
- ba1
->begin
;
353 np2
= ba2
->end
- ba2
->begin
;
355 br
->dg
= calc_bar_lowlevel(np1
,ba1
->y
[s1
]+ba1
->begin
,
356 np2
,ba2
->y
[s2
]+ba2
->begin
,
357 delta_lambda
,ba1
->temp
,tol
);
360 calc_rel_entropy(np1
, ba1
->y
[s1
]+ba1
->begin
,
361 np2
, ba2
->y
[s2
]+ba2
->begin
,
362 delta_lambda
, ba1
->temp
, br
->dg
, &(br
->sa
), &(br
->sb
));
363 calc_dg_stddev(np1
, ba1
->y
[s1
]+ba1
->begin
,
364 np2
, ba2
->y
[s2
]+ba2
->begin
,
365 delta_lambda
, ba1
->temp
, br
->dg
, &(br
->dg_stddev
) );
372 if (np1
>= npee_max
&& np2
>= npee_max
)
374 for(npee
=npee_min
; npee
<=npee_max
; npee
++)
386 for(p
=0; p
<npee
; p
++)
391 dgp
= calc_bar_lowlevel(np1
/npee
,
392 ba1
->y
[s1
]+ba1
->begin
+p
*(np1
/npee
),
394 ba2
->y
[s2
]+ba2
->begin
+p
*(np2
/npee
),
395 delta_lambda
,ba1
->temp
,tol
);
399 partsum
[npee
*(npee_max
+1)+p
] += dgp
;
401 calc_rel_entropy(np1
/npee
,
402 ba1
->y
[s1
]+ba1
->begin
+p
*(np1
/npee
),
404 ba2
->y
[s2
]+ba2
->begin
+p
*(np2
/npee
),
405 delta_lambda
, ba1
->temp
, dgp
, &sac
, &sbc
);
410 calc_dg_stddev(np1
/npee
,
411 ba1
->y
[s1
]+ba1
->begin
+p
*(np1
/npee
),
413 ba2
->y
[s2
]+ba2
->begin
+p
*(np2
/npee
),
414 delta_lambda
, ba1
->temp
, dgp
, &stddevc
);
417 dstddev2
+= stddevc
*stddevc
;
421 dg_sig2
+= (dgs2
-dgs
*dgs
)/(npee
-1);
427 sa_sig2
+= (dsa2
-dsa
*dsa
)/(npee
-1);
428 sb_sig2
+= (dsb2
-dsb
*dsb
)/(npee
-1);
432 stddev_sig2
+= (dstddev2
-dstddev
*dstddev
)/(npee
-1);
434 br
->dg_err
= sqrt(dg_sig2
/(npee_max
- npee_min
+ 1));
435 br
->sa_err
= sqrt(sa_sig2
/(npee_max
- npee_min
+ 1));
436 br
->sb_err
= sqrt(sb_sig2
/(npee_max
- npee_min
+ 1));
437 br
->dg_stddev_err
= sqrt(stddev_sig2
/(npee_max
- npee_min
+ 1));
446 static double bar_err(int nbmin
, int nbmax
, const double *partsum
)
452 for(nb
=nbmin
; nb
<=nbmax
; nb
++)
458 dg
= partsum
[nb
*(nbmax
+1)+b
];
464 svar
+= (s2
- s
*s
)/(nb
- 1);
467 return sqrt(svar
/(nbmax
+ 1 - nbmin
));
471 static double legend2lambda(char *fn
,const char *legend
,bool bdhdl
)
478 gmx_fatal(FARGS
,"There is no legend in file '%s', can not deduce lambda",fn
);
480 ptr
= strrchr(legend
,' ');
481 if (( bdhdl
&& strstr(legend
,"dH") == NULL
) ||
482 (!bdhdl
&& (strchr(legend
,'D') == NULL
||
483 strchr(legend
,'H') == NULL
)) ||
486 gmx_fatal(FARGS
,"There is no proper lambda legend in file '%s', can not deduce lambda",fn
);
488 if (sscanf(ptr
,"%lf",&lambda
) != 1)
490 gmx_fatal(FARGS
,"There is no proper lambda legend in file '%s', can not deduce lambda",fn
);
496 static double filename2lambda(char *fn
)
502 /* go to the end of the path string and search backward
503 because there might be numbers in the directory names
504 before the directory in which the lambda value is
506 while (ptr
[1] != '\0')
510 while (!isdigit(*ptr
) && ptr
> fn
)
514 if (!isdigit(ptr
[0]))
516 gmx_fatal(FARGS
,"While trying to read the lambda value from the filename: filename '%s' does not contain a number",fn
);
518 /* now that we have the last digit of the number we are looking for
519 let's find the beginning of the number and the sign if it has one
521 while ((isdigit(*ptr
) || ptr
[0] == '.') && ptr
> fn
)
530 lambda
= strtod(ptr
,&endptr
);
533 gmx_fatal(FARGS
,"Malformed number in filename '%s'",fn
);
539 static void read_barsim(char *fn
,double begin
,double end
,real temp
,
543 char *subtitle
,**legend
,*ptr
;
547 printf("'%s' ",ba
->filename
);
549 ba
->np
= read_xvg_legend(fn
,&ba
->y
,&ba
->nset
,&subtitle
,&legend
);
552 gmx_fatal(FARGS
,"File %s contains no usable data.",fn
);
556 get_begin_end(ba
,begin
,end
,&ba
->begin
,&ba
->end
);
557 printf("%.1f - %.1f, %6d points, lam:",
558 ba
->t
[ba
->begin
],ba
->t
[ba
->end
-1],ba
->end
-ba
->begin
);
561 if (subtitle
!= NULL
)
563 ptr
= strstr(subtitle
,"T =");
567 if (sscanf(ptr
,"%lf",&ba
->temp
) == 1)
571 gmx_fatal(FARGS
,"Found temperature of %g in file '%s'",
581 gmx_fatal(FARGS
,"Did not find a temperature in the subtitle in file '%s', use the -temp option of g_bar",fn
);
586 snew(ba
->lambda
,ba
->nset
-1);
589 /* Check if we have a single set, nset=2 means t and dH/dl */
592 /* Deduce lambda from the file name */
593 ba
->lambda
[0] = filename2lambda(fn
);
594 printf(" %g",ba
->lambda
[0]);
598 gmx_fatal(FARGS
,"File %s contains multiple sets but no legends, can not determine the lambda values",fn
);
603 for(i
=0; i
<ba
->nset
-1; i
++)
605 /* Read lambda from the legend */
606 ba
->lambda
[i
] = legend2lambda(fn
,legend
[i
],i
==0);
607 printf(" %g",ba
->lambda
[i
]);
612 /* Reorder the data */
613 for(i
=1; i
<ba
->nset
; i
++)
615 ba
->y
[i
-1] = ba
->y
[i
];
619 for(i
=0; i
<ba
->nset
-1; i
++)
628 int gmx_bar(int argc
,char *argv
[])
630 static const char *desc
[] = {
631 "g_bar calculates free energy difference estimates through ",
632 "Bennett's acceptance ratio method. ",
633 "Input option [TT]-f[tt] expects multiple dhdl files. ",
634 "Two types of input files are supported:[BR]",
635 "* Files with only one y-value, for such files it is assumed ",
636 "that the y-value is dH/dlambda and that the Hamiltonian depends ",
637 "linearly on lambda. The lambda value of the simulation is inferred ",
638 "from the legend if present, otherwise from a number in the file ",
641 "* Files with more than one y-value. The files should have columns ",
642 "with dH/dlambda and Delta lambda. The lambda values are inferred ",
643 "from the legends: ",
644 "lambda of the simulation from the legend of dH/dlambda ",
645 "and the foreign lambda's from the legends of Delta H.[PAR]",
647 "The lambda of the simulation is parsed from dhdl.xvg file's legend ",
648 "containing the string 'dH', the foreign lambda's from the legend ",
649 "containing the capitalized letters 'D' and 'H'. The temperature ",
650 "is parsed from the legend line containing 'T ='.[PAR]",
652 "The free energy estimates are determined using BAR with bisection, ",
653 "the precision of the output is set with [TT]-prec[tt]. ",
654 "An error estimate taking into account time correlations ",
655 "is made by splitting the data into blocks and determining ",
656 "the free energy differences over those blocks and assuming ",
657 "the blocks are independent. ",
658 "The final error estimate is determined from the average variance ",
659 "over 5 blocks. A range of blocks numbers for error estimation can ",
660 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
662 "The results are split in two parts: the last part contains the final ",
663 "results in kJ/mol, together with the error estimate for each part ",
664 "and the total. The first part contains detailed free energy ",
665 "difference estimates and phase space overlap measures in units of ",
666 "kT (together with their computed error estimate). The printed ",
668 "* lam_A: the lambda values for point A.[BR]",
669 "* lam_B: the lambda values for point B.[BR]",
670 "* DG: the free energy estimate.[BR]",
671 "* s_A: an estimate of the relative entropy of B in A.[BR]",
672 "* s_A: an estimate of the relative entropy of A in B.[BR]",
673 "* stdev: an estimate expected per-sample standard deviation.[PAR]",
675 "The relative entropy of both states in each other's ensemble can be ",
676 "interpreted as a measure of phase space overlap: ",
677 "the relative entropy s_A of the work samples of lambda_B in the ",
678 "ensemble of lambda_A (and vice versa for s_B), is a ",
679 "measure of the 'distance' between Boltzmann distributions of ",
680 "the two states, that goes to zero for identical distributions. See ",
681 "Wu & Kofke, J. Chem. Phys. 123 084109 (2009) for more information.",
683 "The estimate of the expected per-sample standard deviation, as given ",
684 "in Bennett's original BAR paper: ",
685 "Bennett, J. Comp. Phys. 22, p 245 (1976), Eq. 10 gives an estimate ",
686 "of the quality of sampling (not directly of the actual statistical ",
687 "error, because it assumes independent samples).[PAR]",
690 static real begin
=0,end
=-1,temp
=-1;
691 static int nd
=2,nbmin
=5,nbmax
=5;
694 { "-b", FALSE
, etREAL
, {&begin
}, "Begin time for BAR" },
695 { "-e", FALSE
, etREAL
, {&end
}, "End time for BAR" },
696 { "-temp", FALSE
, etREAL
, {&temp
}, "Temperature (K)" },
697 { "-prec", FALSE
, etINT
, {&nd
}, "The number of digits after the decimal point" },
698 { "-nbmin", FALSE
, etINT
, {&nbmin
}, "Minimum number of blocks for error estimation" },
699 { "-nbmax", FALSE
, etINT
, {&nbmax
}, "Maximum number of blocks for error estimation" }
703 { efXVG
, "-f", "dhdl", ffRDMULT
},
704 { efXVG
, "-o", "bar", ffOPTWR
},
705 { efXVG
, "-oi", "barint", ffOPTWR
}
707 #define NFILE asize(fnm)
709 int nfile
,f
,f2
,fm
,n1
,nm
;
714 double prec
,dg_tot
,dg
,sig
;
716 char dgformat
[20],xvg2format
[STRLEN
],xvg3format
[STRLEN
],buf
[STRLEN
];
717 char ktformat
[STRLEN
], sktformat
[STRLEN
];
718 char kteformat
[STRLEN
], skteformat
[STRLEN
];
721 bool result_OK
=TRUE
,bEE
=TRUE
;
723 CopyRight(stderr
,argv
[0]);
724 parse_common_args(&argc
,argv
,
726 NFILE
,fnm
,asize(pa
),pa
,asize(desc
),desc
,0,NULL
,&oenv
);
728 nfile
= opt2fns(&fnms
,"-f",NFILE
,fnm
);
731 gmx_fatal(FARGS
,"No input files!");
736 gmx_fatal(FARGS
,"Can not have negative number of digits");
739 sprintf( dgformat
,"%%%d.%df",3+nd
,nd
);
740 /* the format strings of the results in kT */
741 sprintf( ktformat
,"%%%d.%df",5+nd
,nd
);
742 sprintf( sktformat
,"%%%ds",6+nd
);
743 /* the format strings of the errors in kT */
744 sprintf( kteformat
,"%%%d.%df",3+nd
,nd
);
745 sprintf( skteformat
,"%%%ds",4+nd
);
746 sprintf(xvg2format
,"%s %s\n","%g",dgformat
);
747 sprintf(xvg3format
,"%s %s %s\n","%g",dgformat
,dgformat
);
751 snew(results
,nfile
-1);
752 snew(partsum
,(nbmax
+1)*(nbmax
+1));
755 for(f
=0; f
<nfile
; f
++)
757 read_barsim(fnms
[f
],begin
,end
,temp
,&ba
[f
]);
758 if (f
> 0 && ba
[f
].temp
!= ba
[0].temp
)
760 printf("\nWARNING: temperature for file '%s' (%g) is not equal to that of file '%s' (%g)\n\n",fnms
[f
],ba
[f
].temp
,fnms
[0],ba
[0].temp
);
765 gmx_fatal(FARGS
,"File '%s' contains less than two columns",fnms
[f
]);
767 else if (ba
[f
].nset
== 1)
778 if (n1
> 0 && nm
> 0)
780 gmx_fatal(FARGS
,"Some dhdl files contain only one value (assuming dH/dl), while others contain multiple values (assuming dH/dl and Delta H), will not proceed because of possible inconsistencies");
783 /* Sort the data sets on lambda */
784 for(f
=0; f
<nfile
-1; f
++)
787 for(f2
=f
+1; f2
<nfile
; f2
++)
789 if (ba
[f2
].lambda
[0] == ba
[fm
].lambda
[0])
791 gmx_fatal(FARGS
,"There are multiple files with lambda = %g",
794 else if (ba
[f2
].lambda
[0] < ba
[fm
].lambda
[0])
806 printf("Only one y value in all files,\n"
807 "assuming the Hamiltonian depends linearly on lambda\n\n");
811 if (opt2bSet("-o",NFILE
,fnm
))
813 sprintf(buf
,"%s (%s)","\\DeltaG",unit_energy
);
814 fpb
= xvgropen_type(opt2fn("-o",NFILE
,fnm
),"Free energy differences",
815 "\\lambda",buf
,exvggtXYDY
,oenv
);
819 if (opt2bSet("-oi",NFILE
,fnm
))
821 sprintf(buf
,"%s (%s)","\\DeltaG",unit_energy
);
822 fpi
= xvgropen(opt2fn("-oi",NFILE
,fnm
),"Free energy integral",
823 "\\lambda",buf
,oenv
);
826 /* first calculate results */
828 for(f
=0; f
<nfile
-1; f
++)
830 /* Determine the free energy difference with a factor of 10
831 * more accuracy than requested for printing.
833 calc_bar(&ba
[f
], &ba
[f
+1], n1
>0, 0.1*prec
, nbmin
, nbmax
,
834 &(results
[f
]), &bEE
, partsum
);
837 /* print results in kT */
838 kT
= BOLTZ
*ba
[0].temp
;
841 printf("\nTemperature: %g K\n", ba
[0].temp
);
843 printf("\nDetailed results in kT (see help for explanation):\n\n");
844 printf(skteformat
, "lam_A ");
845 printf(skteformat
, "lam_B ");
846 printf(sktformat
, "DG ");
847 printf(skteformat
, "+/- ");
848 printf(sktformat
, "s_A ");
849 printf(skteformat
, "+/- " );
850 printf(sktformat
, "s_B ");
851 printf(skteformat
, "+/- " );
852 printf(sktformat
, "stdev ");
853 printf(skteformat
, "+/- ");
855 for(f
=0; f
<nfile
-1; f
++)
857 printf(kteformat
, results
[f
].lambda_a
);
859 printf(kteformat
, results
[f
].lambda_b
);
861 printf(ktformat
, results
[f
].dg
);
863 printf(kteformat
, results
[f
].dg_err
);
865 printf(ktformat
, results
[f
].sa
);
867 printf(kteformat
, results
[f
].sa_err
);
869 printf(ktformat
, results
[f
].sb
);
871 printf(kteformat
, results
[f
].sb_err
);
873 printf(ktformat
, results
[f
].dg_stddev
);
875 printf(kteformat
, results
[f
].dg_stddev_err
);
878 /* Check for negative relative entropy with a 95% certainty. */
879 if (results
[f
].sa
< -2*results
[f
].sa_err
||
880 results
[f
].sb
< -2*results
[f
].sb_err
)
888 printf("\nWARNING: Some of these results violate the Second Law of "
890 " This is can be the result of severe undersampling, or "
892 " there is something wrong with the simulations.\n");
896 /* final results in kJ/mol */
897 printf("\n\nFinal results in kJ/mol:\n\n");
899 for(f
=0; f
<nfile
-1; f
++)
904 fprintf(fpi
, xvg2format
, ba
[f
].lambda
[0], dg_tot
);
910 fprintf(fpb
, xvg3format
,
911 0.5*(ba
[f
].lambda
[0] + ba
[f
+1].lambda
[0]),
912 results
[f
].dg
,results
[f
].dg_err
);
915 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
916 results[f].lambda_b);*/
918 printf(dgformat
, results
[f
].lambda_a
);
920 printf(dgformat
, results
[f
].lambda_b
);
923 printf(dgformat
,results
[f
].dg
*kT
);
925 printf(dgformat
,results
[f
].dg_err
*kT
);
928 dg_tot
+= results
[f
].dg
;
932 printf(dgformat
, ba
[0].lambda
[0]);
934 printf(dgformat
, ba
[nfile
-1].lambda
[0]);
937 printf(dgformat
,dg_tot
*kT
);
941 printf(dgformat
,bar_err(nbmin
,nbmax
,partsum
)*kT
);
947 fprintf(fpi
, xvg2format
,
948 ba
[nfile
-1].lambda
[0], dg_tot
);
952 do_view(oenv
,opt2fn_null("-o",NFILE
,fnm
),"-xydy");
953 do_view(oenv
,opt2fn_null("-oi",NFILE
,fnm
),"-xydy");