Redefine the default boolean type to gmx_bool.
[gromacs.git] / src / tools / gmx_bar.c
blob9bab6916cdc8a6943c4c19242f8c72c0f468ee62
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <math.h>
43 #include <float.h>
45 #include "sysstuff.h"
46 #include "typedefs.h"
47 #include "smalloc.h"
48 #include "futil.h"
49 #include "statutil.h"
50 #include "copyrite.h"
51 #include "macros.h"
52 #include "enxio.h"
53 #include "physics.h"
54 #include "gmx_fatal.h"
55 #include "xvgr.h"
56 #include "gmx_ana.h"
57 #include "maths.h"
60 typedef struct
62 unsigned int *bin; /* the histogram values */
63 double dx; /* the histogram spacing */
64 gmx_large_int_t x0; /* the histogram start point as int */
66 int nbin; /* the number of bins */
67 gmx_large_int_t sum; /* the total number of counts */
69 double starttime, endtime; /* start time, end time of histogram */
70 } barhist_t;
73 /* the raw data from a simulation */
74 typedef struct {
75 char *filename;
76 int ftp; /* file type */
77 int nset; /* number of lambdas, including dhdl */
78 int *np; /* number of data points (du or hists) per lambda */
79 int np_alloc; /* number of points (du or hists) allocated */
80 double temp; /* temperature */
81 double *lambda; /* the lambdas (of first index for y). The first one
82 is just the 'native' lambda */
83 double *t; /* the times (of second index for y) */
84 double **y; /* the dU values. y[0] holds the derivative, while
85 further ones contain the energy differences between
86 the native lambda and the 'foreign' lambdas. */
87 barhist_t **hists; /* histograms. */
88 } barsim_t;
90 /* an aggregate of samples for partial free energy calculation */
91 typedef struct barsamples_t
93 double native_lambda;
94 double foreign_lambda;
95 double temp;
97 int nndu; /* number of delta u sample collections */
98 int *ndu; /* the number of delta U samples */
99 double **du; /* the delta u's */
100 double **t; /* the times associated with those samples */
102 int nhist; /* the number of histograms */
103 barhist_t *hists; /* the histograms */
105 gmx_large_int_t ntot; /* total number of samples */
107 struct barsamples_t *next, *prev; /* the next and prev in the list */
108 } barsamples_t;
110 /* all the samples associated with a lambda point */
111 typedef struct barlambda_t
113 double lambda; /* the native lambda */
114 double temp; /* temperature */
116 int nbs; /* number of barsamples_ts with their own lambda */
117 barsamples_t *bs; /* the samples */
119 barsamples_t bs_head; /* the pre-allocated list head for the linked list.
120 Also used to contain the dhdl data. */
122 struct barlambda_t *next, *prev; /* the next and prev in the list */
123 } barlambda_t;
126 /* calculated values. */
127 typedef struct {
128 /*barsim_t *a, *b; *//* the simulation data */
129 barsamples_t *a, *b;
131 /*double lambda_a, lambda_b; *//* the lambda values at a and b */
133 double dg; /* the free energy difference */
134 double dg_err; /* the free energy difference */
136 double dg_disc_err; /* discretization error */
137 double dg_histrange_err; /* histogram range error */
139 double sa; /* relative entropy of b in state a */
140 double sa_err; /* error in sa */
141 double sb; /* relative entropy of a in state b */
142 double sb_err; /* error in sb */
144 double dg_stddev; /* expected dg stddev per sample */
145 double dg_stddev_err; /* error in dg_stddev */
146 } barres_t;
150 static void barsim_init(barsim_t *ba)
152 ba->filename=NULL;
153 ba->nset=0;
154 ba->np_alloc=0;
155 ba->np=NULL;
156 ba->y=NULL;
157 ba->hists=NULL;
160 static void barsamples_init(barsamples_t *bs, double native_lambda,
161 double foreign_lambda, double temp)
163 bs->native_lambda=native_lambda;
164 bs->foreign_lambda=foreign_lambda;
165 bs->temp=temp;
166 bs->nndu=0;
167 bs->nhist=0;
168 bs->ntot=0;
169 bs->ndu=NULL;
170 bs->du=NULL;
171 bs->t=NULL;
172 bs->hists=NULL;
174 bs->next=NULL;
175 bs->prev=NULL;
178 /* destroy the data structures directly associated with the structure, not
179 the data it points to */
180 static void barsamples_destroy(barsamples_t *bs)
182 sfree(bs->ndu);
183 sfree(bs->du);
184 sfree(bs->t);
185 sfree(bs->hists);
188 static void barlambda_init(barlambda_t *bl, double native_lambda, double temp)
190 bl->lambda=native_lambda;
191 bl->temp=temp;
192 bl->nbs=0;
193 bl->next=NULL;
194 bl->prev=NULL;
195 bl->bs=&(bl->bs_head);
196 barsamples_init(bl->bs, native_lambda, 0., 0.);
197 bl->bs->next=bl->bs;
198 bl->bs->prev=bl->bs;
201 static void barres_init(barres_t *br)
203 br->dg=0;
204 br->dg_err=0;
205 br->sa=0;
206 br->sa_err=0;
207 br->sb=0;
208 br->sb_err=0;
209 br->dg_stddev=0;
210 br->dg_stddev_err=0;
212 br->a=NULL;
213 br->b=NULL;
217 static gmx_bool lambda_same(double lambda1, double lambda2)
219 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
222 /* find the barsamples_t associated with a barlambda that corresponds to
223 a specific foreign lambda */
224 barsamples_t *barlambda_find_barsample(barlambda_t *bl, double foreign_lambda)
226 barsamples_t *bs=bl->bs->next;
228 while(bs != bl->bs)
230 if (lambda_same(bs->foreign_lambda,foreign_lambda))
232 return bs;
234 bs=bs->next;
237 return NULL;
240 /* create subsample i out of ni from an existing barsample_t */
241 void barsamples_create_subsample(barsamples_t *bs, barsamples_t *bs_orig,
242 int i, int ni)
244 int j;
245 int hist_start, hist_end;
247 *bs = *bs_orig; /* just copy all fields */
249 /* allocate proprietary memory */
250 snew(bs->ndu, bs_orig->nndu);
251 snew(bs->du, bs_orig->nndu);
252 snew(bs->t, bs_orig->nndu);
253 snew(bs->hists, bs_orig->nhist);
255 /* fix all the du fields */
256 for(j=0;j<bs_orig->nndu;j++)
258 /* these ugly casts avoid a nasty overflow if ndu is very big. */
259 int start=(int)(bs_orig->ndu[j]*((double)(i)/ni));
260 int end=(int)(bs_orig->ndu[j]*((double)(i+1.)/ni))-1;
262 bs->ndu[j]=end-start+1;
263 bs->du[j]=&(bs_orig->du[j][start]);
264 bs->t[j]=&(bs_orig->t[j][start]);
266 /* and all histograms */
267 hist_start = (int)(bs_orig->nhist*((double)(i)/ni));
268 hist_end = (int)(bs_orig->nhist*((double)(i+1)/ni))-1;
269 bs->nhist=(hist_end-hist_start)+1;
270 for(j=0;j<bs->nhist;j++)
271 bs->hists[j] = bs_orig->hists[j+hist_start];
273 /* and count ntot */
274 bs->ntot = 0;
275 for(i=0;i<bs->nndu;i++)
276 bs->ntot += bs->ndu[i];
277 for(i=0;i<bs->nhist;i++)
278 bs->ntot += bs->hists[i].sum;
282 /* add simulation data to a barlambda structure */
283 void barlambda_add_sim(barlambda_t *bl, barsim_t *ba, double begin, double end)
285 int i,j;
287 if (!lambda_same(bl->lambda, ba->lambda[0]))
288 gmx_fatal(FARGS, "barlambda_add_sim lambdas inconsistent!");
290 for(i=0;i<ba->nset;i++)
292 if (ba->np[i] > 0)
294 barsamples_t *bs=NULL;
296 if (i>0)
298 bs=barlambda_find_barsample(bl, ba->lambda[i]);
301 if (!bs)
303 /* we make a new barsamples_t */
304 snew(bs, 1);
305 barsamples_init(bs, bl->lambda, ba->lambda[i], bl->temp);
306 /* insert it */
307 bs->next=bl->bs;
308 bs->prev=bl->bs->prev;
309 bs->next->prev=bs;
310 bs->prev->next=bs;
312 /* else
313 there already exists a barsamples_t with this foreign lambda
314 and we don't need to do anything */
316 /* and add our samples */
317 if (ba->y && ba->y[i])
319 int ndu=ba->np[i];
320 double *y=ba->y[i];
321 double *t=ba->t;
323 /* get the right time */
324 while( ndu>0 && *t < begin )
326 ndu--;
327 y++;
328 t++;
330 if (end > begin)
332 while(t[ndu-1] > end)
334 ndu--;
338 bs->nndu++;
339 srenew(bs->ndu, bs->nndu);
340 srenew(bs->du, bs->nndu);
341 srenew(bs->t, bs->nndu);
342 bs->ndu[bs->nndu-1]=ndu;
343 bs->du[bs->nndu-1]=y;
344 bs->t[bs->nndu-1]=t;
345 bs->ntot += ndu;
347 if (ba->hists && ba->hists[i])
349 int nhist_prev = bs->nhist;
350 int starti=0;
351 int endi=ba->np[i];
353 while( starti<endi && ba->hists[i][starti].endtime<begin)
355 starti++;
357 if (end > begin)
359 while((endi>starti) && (ba->hists[i][endi].starttime>end))
361 endi--;
364 bs->nhist += endi-starti;
365 srenew(bs->hists, bs->nhist);
366 for(j=starti;j<endi;j++)
368 int hi=nhist_prev+(j-starti);
369 bs->hists[hi] = ba->hists[i][j];
370 bs->ntot += bs->hists[hi].sum;
378 /* assemble an ordered list of barlambda_ts */
379 barlambda_t *barlambdas_list_create(barsim_t *ba, int nfile,
380 int *nbl, double begin, double end)
382 barsim_t ba_tmp;
383 barlambda_t *bl_head; /* the head of the list */
384 int i;
386 snew(bl_head, 1); /* the first element is a dummy element */
387 barlambda_init(bl_head, 0, 0);
388 bl_head->next=bl_head;
389 bl_head->prev=bl_head;
390 *nbl=0;
392 for(i=0; i<nfile; i++)
394 barlambda_t *bl=bl_head->next;
395 gmx_bool inserted=FALSE;
397 while(bl != bl_head)
399 if (lambda_same(ba[i].lambda[0], bl->lambda))
401 /* this lambda is the same as a previous lambda; add
402 our samples */
403 barlambda_add_sim(bl, &(ba[i]), begin, end);
404 inserted=TRUE;
405 break;
407 if ( bl->lambda > ba[i].lambda[0] )
409 break;
411 bl=bl->next;
414 if (!inserted)
416 barlambda_t *bl_new;
418 snew(bl_new, 1);
419 (*nbl)++;
420 barlambda_init(bl_new,ba[i].lambda[0], ba[i].temp);
421 /* update linked list */
422 bl_new->next=bl;
423 bl_new->prev=bl->prev;
424 bl_new->prev->next=bl_new;
425 bl_new->next->prev=bl_new;
426 barlambda_add_sim(bl_new, &(ba[i]), begin, end);
429 return bl_head;
432 /* make a histogram out of a sample collection */
433 void barsamples_make_hist(barsamples_t *bs, int **bin,
434 int *nbin_alloc, int *nbin,
435 double *dx, double *xmin, int nbin_default)
437 int i,j;
438 gmx_bool dx_set=FALSE;
439 gmx_bool xmin_set=FALSE;
441 gmx_bool xmax_set=FALSE;
442 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the limits of
443 a histogram */
444 double xmax=-1;
446 /* first determine dx and xmin; try the histograms */
447 for(i=0;i<bs->nhist;i++)
449 double xmax_now=(bs->hists[i].x0+bs->hists[i].nbin)*bs->hists[i].dx;
451 /* we use the biggest dx*/
452 if ( (!dx_set) || bs->hists[i].dx > *dx)
454 dx_set=TRUE;
455 *dx = bs->hists[i].dx;
457 if ( (!xmin_set) || (bs->hists[i].x0*bs->hists[i].dx) < *xmin)
459 xmin_set=TRUE;
460 *xmin = (bs->hists[i].x0*bs->hists[i].dx);
463 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
465 xmax_set=TRUE;
466 xmax = xmax_now;
467 if (bs->hists[i].bin[bs->hists[i].nbin-1] != 0)
468 xmax_set_hard=TRUE;
470 if ( bs->hists[i].bin[bs->hists[i].nbin-1]!=0 && (xmax_now < xmax) )
472 xmax_set_hard=TRUE;
473 xmax = xmax_now;
476 /* and the delta us */
477 for(i=0;i<bs->nndu;i++)
479 if (bs->ndu[i]>0)
481 /* determine min and max */
482 double du_xmin=bs->du[i][0];
483 double du_xmax=bs->du[i][0];
484 for(j=1;j<bs->ndu[i];j++)
486 if (bs->du[i][j] < du_xmin)
487 du_xmin = bs->du[i][j];
488 if (bs->du[i][j] > du_xmax)
489 du_xmax = bs->du[i][j];
492 /* and now change the limits */
493 if ( (!xmin_set) || (du_xmin < *xmin) )
495 xmin_set=TRUE;
496 *xmin=du_xmin;
498 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
500 xmax_set=TRUE;
501 xmax=du_xmax;
506 if (!xmax_set || !xmin_set)
508 *nbin=0;
509 return;
513 if (!dx_set)
515 *nbin=nbin_default;
516 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
517 be 0, and we count from 0 */
519 else
521 *nbin=(xmax-(*xmin))/(*dx);
524 if (*nbin > *nbin_alloc)
526 *nbin_alloc=*nbin;
527 srenew(*bin, *nbin_alloc);
530 /* reset the histogram */
531 for(i=0;i<(*nbin);i++)
533 (*bin)[i] = 0;
536 /* now add the acutal data */
537 for(i=0;i<bs->nhist;i++)
539 double xmin_hist=bs->hists[i].x0*bs->hists[i].dx;
540 for(j=0;j<bs->hists[i].nbin;j++)
542 /* calculate the bin corresponding to the middle of the original
543 bin */
544 double x=bs->hists[i].dx*(j+0.5) + xmin_hist;
545 int binnr=(int)((x-(*xmin))/(*dx));
547 if (binnr >= *nbin || binnr<0)
548 binnr = (*nbin)-1;
550 (*bin)[binnr] += bs->hists[i].bin[j];
553 for(i=0;i<bs->nndu;i++)
555 for(j=0;j<bs->ndu[i];j++)
557 int binnr=(int)((bs->du[i][j]-(*xmin))/(*dx));
558 if (binnr >= *nbin || binnr<0)
559 binnr = (*nbin)-1;
561 (*bin)[binnr] ++;
566 /* write a collection of histograms to a file */
567 void barlambdas_histogram(barlambda_t *bl_head, const char *filename,
568 int nbin_default, const output_env_t oenv)
570 char label_x[STRLEN];
571 const char *title="N(\\Delta H)";
572 const char *label_y="Samples";
573 FILE *fp;
574 barlambda_t *bl;
575 int nsets=0;
576 char **setnames=NULL;
577 gmx_bool first_set=FALSE;
578 /* histogram data: */
579 int *hist=NULL;
580 int nbin=0;
581 int nbin_alloc=0;
582 double dx=0;
583 double min;
584 int i;
586 sprintf(label_x, "\\Delta H (%s)", unit_energy);
588 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
590 /* first get all the set names */
591 bl=bl_head->next;
592 /* iterate over all lambdas */
593 while(bl!=bl_head)
595 barsamples_t *bs=bl->bs->next;
597 /* iterate over all samples */
598 while(bs!=bl->bs)
600 nsets++;
601 srenew(setnames, nsets);
602 snew(setnames[nsets-1], STRLEN);
603 if (!lambda_same(bs->foreign_lambda, bs->native_lambda))
605 sprintf(setnames[nsets-1],
606 "N(H(\\lambda=%g) - H(\\lambda=%g) | \\lambda=%g)",
607 bs->foreign_lambda, bs->native_lambda,
608 bs->native_lambda);
610 else
612 sprintf(setnames[nsets-1],
613 "N(dH/d\\lambda | \\lambda=%g)",
614 bs->native_lambda);
616 bs=bs->next;
619 bl=bl->next;
621 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
624 /* now make the histograms */
625 bl=bl_head->next;
626 /* iterate over all lambdas */
627 while(bl!=bl_head)
629 barsamples_t *bs=bl->bs->next;
631 /* iterate over all samples */
632 while(bs!=bl->bs)
634 if (!first_set)
635 xvgr_new_dataset(fp, oenv);
637 barsamples_make_hist(bs, &hist, &nbin_alloc, &nbin, &dx, &min,
638 nbin_default);
640 for(i=0;i<nbin;i++)
642 double xmin=i*dx;
643 double xmax=(i+1)*dx;
645 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
648 first_set=FALSE;
649 bs=bs->next;
652 bl=bl->next;
655 if(hist)
656 sfree(hist);
658 xvgrclose(fp);
661 /* create a collection (array) of barres_t object given a ordered linked list
662 of barlamda_t sample collections */
663 static barres_t *barres_list_create(barlambda_t *bl_head, int *nres)
665 barlambda_t *bl;
666 int nlambda=0;
667 barres_t *res;
668 int i;
669 gmx_bool dhdl=FALSE;
670 gmx_bool first=TRUE;
672 /* first count the barlambdas */
673 bl=bl_head->next;
674 while(bl!=bl_head)
676 nlambda++;
677 bl=bl->next;
679 snew(res, nlambda-1);
681 /* next put the right samples in the res */
682 *nres=0;
683 bl=bl_head->next->next; /* we start with the second one. */
684 while(bl!=bl_head)
686 barsamples_t *bs,*bsprev;
687 barres_t *br=&(res[*nres]);
688 /* there is always a previous one. we search for that as a foreign
689 lambda: */
690 bsprev=barlambda_find_barsample(bl->prev, bl->lambda);
691 bs=barlambda_find_barsample(bl, bl->prev->lambda);
693 barres_init(br);
695 if (!bsprev && !bs)
697 /* we use dhdl */
699 bsprev=barlambda_find_barsample(bl->prev, bl->prev->lambda);
700 bs=barlambda_find_barsample(bl, bl->lambda);
702 if (first)
704 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
705 dhdl=TRUE;
707 if (!dhdl)
709 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
713 /* normal delta H */
714 if (!bsprev)
716 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->lambda,bl->prev->lambda);
718 if (!bs)
720 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->prev->lambda,bl->lambda);
722 br->a = bsprev;
723 br->b = bs;
725 first=FALSE;
726 (*nres)++;
727 bl=bl->next;
729 return res;
732 /* estimate the maximum discretization error */
733 static double barres_list_max_disc_err(barres_t *res, int nres)
735 int i,j;
736 double disc_err=0.;
738 for(i=0;i<nres;i++)
740 barres_t *br=&(res[i]);
742 for(j=0;j<br->a->nhist;j++)
744 disc_err=max(disc_err, br->a->hists[j].dx);
746 for(j=0;j<br->b->nhist;j++)
748 disc_err=max(disc_err, br->b->hists[j].dx);
751 return disc_err;
754 static double calc_bar_sum(int n,double *W,double Wfac,double sbMmDG)
756 int i;
757 double sum;
759 sum = 0;
761 for(i=0; i<n; i++)
763 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
766 return sum;
769 /* calculate the BAR average given a histogram
771 if type== 0, calculate the best estimate for the average,
772 if type==-1, calculate the minimum possible value given the histogram
773 if type== 1, calculate the maximum possible value given the histogram */
774 static double calc_bar_sum_hist(barhist_t *hist, double Wfac, double sbMmDG,
775 int type)
777 double sum=0.;
778 int i;
779 int max=hist->nbin-1;
780 /* normalization factor multiplied with bin width and
781 number of samples (we normalize through M): */
782 double normdx = 1.;
784 if (type==1)
786 max=hist->nbin; /* we also add whatever was out of range */
789 for(i=0;i<max;i++)
791 double x=Wfac*((i+hist->x0)+0.5)*hist->dx; /* bin middle */
792 double pxdx=hist->bin[i]*normdx; /* p(x)dx */
794 sum += pxdx/(1. + exp(x + sbMmDG));
797 return sum;
800 static double calc_bar_lowlevel(barsamples_t *ba, barsamples_t *bb,
801 double temp, double tol, int type)
803 double kT,beta,M;
804 double DG;
805 int i,j;
806 double Wfac1,Wfac2,Wmin,Wmax;
807 double DG0,DG1,DG2,dDG1;
808 double sum1,sum2;
809 double n1, n2; /* numbers of samples as doubles */
811 kT = BOLTZ*temp;
812 beta = 1/kT;
814 /* count the numbers of samples */
815 n1 = ba->ntot;
816 n2 = bb->ntot;
818 M = log(n1/n2);
820 if (!lambda_same(ba->native_lambda, ba->foreign_lambda))
822 /* this is the case when the delta U were calculated directly
823 (i.e. we're not scaling dhdl) */
824 Wfac1 = beta;
825 Wfac2 = beta;
827 else
829 /* we're using dhdl, so delta_lambda needs to be a
830 multiplication factor. */
831 double delta_lambda=bb->native_lambda-ba->native_lambda;
832 Wfac1 = beta*delta_lambda;
833 Wfac2 = -beta*delta_lambda;
836 if (beta < 1)
838 /* We print the output both in kT and kJ/mol.
839 * Here we determine DG in kT, so when beta < 1
840 * the precision has to be increased.
842 tol *= beta;
845 /* Calculate minimum and maximum work to give an initial estimate of
846 * delta G as their average.
848 Wmin=FLT_MAX;
849 Wmax=-FLT_MAX;
850 for(i=0;i<ba->nndu;i++)
852 for(j=1; j<ba->ndu[i]; j++)
854 Wmin = min(Wmin,ba->du[i][j]*Wfac1);
855 Wmax = max(Wmax,ba->du[i][j]*Wfac1);
858 for(i=0;i<ba->nhist;i++)
860 Wmin = min(Wmin,ba->hists[i].x0*ba->hists[i].dx);
861 for(j=ba->hists[i].nbin-1;j>=0;j--)
863 /* look for the highest value bin with values */
864 if (ba->hists[i].bin[j]>0)
866 Wmax=max(Wmax,(j+ba->hists[i].x0+1)*ba->hists[i].dx);
867 break;
871 for(i=0;i<bb->nndu;i++)
873 for(j=1; j<bb->ndu[i]; j++)
875 Wmin = min(Wmin,bb->du[i][j]*Wfac1);
876 Wmax = max(Wmax,bb->du[i][j]*Wfac1);
879 for(i=0;i<bb->nhist;i++)
881 Wmin = min(Wmin,bb->hists[i].x0*bb->hists[i].dx);
882 for(j=bb->hists[i].nbin-1;j>=0;j--)
884 /* look for the highest value bin with values */
885 if (bb->hists[i].bin[j]>0)
887 Wmax=max(Wmax,(j+bb->hists[i].x0+1)*bb->hists[i].dx);
888 break;
893 DG0 = Wmin;
894 DG2 = Wmax;
896 if (debug)
898 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
900 /* We approximate by bisection: given our initial estimates
901 we keep checking whether the halfway point is greater or
902 smaller than what we get out of the BAR averages.
904 For the comparison we can use twice the tolerance. */
905 while (DG2 - DG0 > 2*tol)
907 DG1 = 0.5*(DG0 + DG2);
909 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
910 DG1);*/
912 /* calculate the BAR averages */
913 dDG1=0.;
914 for(i=0;i<ba->nndu;i++)
916 /* first the du lists */
917 dDG1 += calc_bar_sum(ba->ndu[i], ba->du[i], Wfac1, (M-DG1));
919 for(i=0;i<ba->nhist;i++)
921 /* then the histograms */
922 dDG1 += calc_bar_sum_hist(&(ba->hists[i]), Wfac1, (M-DG1), type);
925 for(i=0;i<bb->nndu;i++)
927 dDG1 -= calc_bar_sum(bb->ndu[i], bb->du[i], Wfac2, -(M-DG1));
929 for(i=0;i<bb->nhist;i++)
931 dDG1 -= calc_bar_sum_hist(&(bb->hists[i]), Wfac2, -(M-DG1), type);
934 if (dDG1 < 0)
936 DG0 = DG1;
938 else
940 DG2 = DG1;
942 if (debug)
944 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
948 return 0.5*(DG0 + DG2);
951 static void calc_rel_entropy(barsamples_t *ba, barsamples_t *bb,
952 double temp, double dg, double *sa, double *sb)
954 int i,j;
955 double W_ab=0.;
956 double W_ba=0.;
957 double kT, beta;
958 double Wfac1, Wfac2;
959 double n1,n2;
961 kT = BOLTZ*temp;
962 beta = 1/kT;
965 /* count the numbers of samples */
966 n1 = ba->ntot;
967 n2 = bb->ntot;
969 /* to ensure the work values are the same as during the delta_G */
970 if (!lambda_same(ba->native_lambda, ba->foreign_lambda))
972 /* this is the case when the delta U were calculated directly
973 (i.e. we're not scaling dhdl) */
974 Wfac1 = beta;
975 Wfac2 = beta;
977 else
979 /* we're using dhdl, so delta_lambda needs to be a
980 multiplication factor. */
981 double delta_lambda=bb->native_lambda-ba->native_lambda;
982 Wfac1 = beta*delta_lambda;
983 Wfac2 = -beta*delta_lambda;
986 /* first calculate the average work in both directions */
987 for(i=0;i<ba->nndu;i++)
989 for(j=0;j<ba->ndu[i];j++)
991 W_ab += Wfac1*ba->du[i][j];
994 for(i=0;i<ba->nhist;i++)
996 /* then the histograms */
997 /* normalization factor multiplied with bin width and
998 number of samples (we normalize through M): */
999 double normdx = 1.;
1000 barhist_t *hist=&(ba->hists[i]);
1002 for(j=0;j<hist->nbin;j++)
1004 double x=Wfac1*((j+hist->x0)+0.5)*hist->dx; /* bin middle */
1005 double pxdx=hist->bin[j]*normdx; /* p(x)dx */
1007 W_ab += pxdx*x;
1010 W_ab/=n1;
1011 for(i=0;i<bb->nndu;i++)
1013 for(j=0;j<bb->ndu[i];j++)
1015 W_ba += Wfac2*bb->du[i][j];
1018 for(i=0;i<bb->nhist;i++)
1020 /* then the histograms */
1021 /* normalization factor multiplied with bin width and
1022 number of samples (we normalize through M): */
1023 double normdx = 1.;
1024 barhist_t *hist=&(bb->hists[i]);
1026 for(j=0;j<hist->nbin;j++)
1028 double x=Wfac2*((j+hist->x0)+0.5)*hist->dx; /* bin middle */
1029 double pxdx=hist->bin[j]*normdx; /* p(x)dx */
1031 W_ba += pxdx*x;
1034 W_ba/=n2;
1036 /* then calculate the relative entropies */
1037 *sa = (W_ab - dg);
1038 *sb = (W_ba + dg);
1041 static void calc_dg_stddev(barsamples_t *ba, barsamples_t *bb,
1042 double temp, double dg, double *stddev)
1044 int i,j;
1045 double M;
1046 double sigmafact=0.;
1047 double kT, beta;
1048 double Wfac1, Wfac2;
1049 double n1, n2;
1051 kT = BOLTZ*temp;
1052 beta = 1/kT;
1054 /* count the numbers of samples */
1055 n1 = ba->ntot;
1056 n2 = bb->ntot;
1058 /* to ensure the work values are the same as during the delta_G */
1059 if (!lambda_same(ba->native_lambda, ba->foreign_lambda))
1061 /* this is the case when the delta U were calculated directly
1062 (i.e. we're not scaling dhdl) */
1063 Wfac1 = beta;
1064 Wfac2 = beta;
1066 else
1068 /* we're using dhdl, so delta_lambda needs to be a
1069 multiplication factor. */
1070 double delta_lambda=bb->native_lambda-ba->native_lambda;
1071 Wfac1 = beta*delta_lambda;
1072 Wfac2 = -beta*delta_lambda;
1075 M = log(n1/n2);
1077 /* calculate average in both directions */
1078 for(i=0;i<ba->nndu;i++)
1080 for(j=0;j<ba->ndu[i];j++)
1082 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*ba->du[i][j] - dg)));
1085 for(i=0;i<ba->nhist;i++)
1087 /* then the histograms */
1088 /* normalization factor multiplied with bin width and
1089 number of samples (we normalize through M): */
1090 double normdx = 1.;
1091 barhist_t *hist=&(ba->hists[i]);
1093 for(j=0;j<hist->nbin;j++)
1095 double x=Wfac1*((j+hist->x0)+0.5)*hist->dx; /* bin middle */
1096 double pxdx=hist->bin[j]*normdx; /* p(x)dx */
1098 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1101 for(i=0;i<bb->nndu;i++)
1103 for(j=0;j<bb->ndu[i];j++)
1105 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*bb->du[i][j] - dg)));
1108 for(i=0;i<bb->nhist;i++)
1110 /* then the histograms */
1111 /* normalization factor multiplied with bin width and
1112 number of samples (we normalize through M): */
1113 double normdx = 1.;
1114 barhist_t *hist=&(bb->hists[i]);
1116 for(j=0;j<hist->nbin;j++)
1118 double x=Wfac2*((j+hist->x0)+0.5)*hist->dx; /* bin middle */
1119 double pxdx=hist->bin[j]*normdx; /* p(x)dx */
1121 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1124 sigmafact /= (n1 + n2);
1127 /* Eq. 10 from
1128 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1129 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1134 static void calc_bar(barres_t *br, double tol,
1135 int npee_min, int npee_max, gmx_bool *bEE,
1136 double *partsum)
1138 int npee,p;
1139 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1140 for calculated quantities */
1141 int nsample1, nsample2;
1142 double temp=br->a->temp;
1143 int i,j;
1144 double dg_min, dg_max;
1146 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1148 br->dg_disc_err = 0.;
1149 br->dg_histrange_err = 0.;
1150 if ((br->a->nhist > 0) || (br->b->nhist > 0) )
1152 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1153 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1155 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1157 /* the histogram range error is the biggest of the differences
1158 between the best estimate and the extremes */
1159 br->dg_histrange_err = fabs(dg_max - dg_min);
1161 br->dg_disc_err = 0.;
1162 for(i=0;i<br->a->nhist;i++)
1164 br->dg_disc_err=max(br->dg_disc_err, br->a->hists[i].dx);
1166 for(i=0;i<br->b->nhist;i++)
1168 br->dg_disc_err=max(br->dg_disc_err, br->b->hists[i].dx);
1172 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1174 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1176 dg_sig2 = 0;
1177 sa_sig2 = 0;
1178 sb_sig2 = 0;
1179 stddev_sig2 = 0;
1181 /* we look for the smallest sample size */
1182 nsample1=INT_MAX;
1183 for(i=0;i<br->a->nndu;i++)
1184 nsample1 = min( nsample1, br->a->ndu[i] );
1185 if (br->a->nhist > 0)
1186 nsample1 = min( nsample1, br->a->nhist );
1188 nsample2=INT_MAX;
1189 for(i=0;i<br->b->nndu;i++)
1190 nsample2 = min( nsample2, br->b->ndu[i] );
1191 if (br->b->nhist > 0)
1192 nsample2 = min( nsample2, br->b->nhist );
1195 printf("nsample1=%d, nsample2=%d\n", nsample1, nsample2);
1196 if (nsample1 >= npee_max && nsample2 >= npee_max)
1198 barsamples_t ba, bb;
1199 if ( (2*npee_max > nsample1) || (2*npee_max > nsample2) )
1201 if (npee_min != min(nsample1, nsample2) &&
1202 npee_max != min(nsample1, nsample2) )
1205 npee_min = npee_max = min(nsample1, nsample2);
1206 printf("NOTE: redefining nbin and nbmax to %d at lambda=%g-%g\n because of the small number of blocks available\n",
1207 npee_min, br->a->native_lambda, br->b->native_lambda);
1211 barsamples_init(&ba, br->a->native_lambda, br->a->foreign_lambda,
1212 br->a->temp);
1213 barsamples_init(&bb, br->b->native_lambda, br->b->foreign_lambda,
1214 br->b->temp);
1216 for(npee=npee_min; npee<=npee_max; npee++)
1218 double dgs = 0;
1219 double dgs2 = 0;
1220 double dsa = 0;
1221 double dsb = 0;
1222 double dsa2 = 0;
1223 double dsb2 = 0;
1224 double dstddev = 0;
1225 double dstddev2 = 0;
1228 for(p=0; p<npee; p++)
1230 double dgp;
1231 double stddevc;
1232 double sac, sbc;
1234 barsamples_create_subsample(&ba, br->a, p, npee);
1235 barsamples_create_subsample(&bb, br->b, p, npee);
1237 dgp=calc_bar_lowlevel(&ba, &bb, temp, tol, 0);
1238 dgs += dgp;
1239 dgs2 += dgp*dgp;
1241 partsum[npee*(npee_max+1)+p] += dgp;
1243 calc_rel_entropy(&ba, &bb, temp, dgp, &sac, &sbc);
1244 dsa += sac;
1245 dsa2 += sac*sac;
1246 dsb += sbc;
1247 dsb2 += sbc*sbc;
1248 calc_dg_stddev(&ba, &bb, temp, dgp, &stddevc );
1250 dstddev += stddevc;
1251 dstddev2 += stddevc*stddevc;
1253 barsamples_destroy(&ba);
1254 barsamples_destroy(&bb);
1256 dgs /= npee;
1257 dgs2 /= npee;
1258 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1260 dsa /= npee;
1261 dsa2 /= npee;
1262 dsb /= npee;
1263 dsb2 /= npee;
1264 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1265 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1267 dstddev /= npee;
1268 dstddev2 /= npee;
1269 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1271 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1272 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1273 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1274 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1276 else
1278 *bEE = FALSE;
1283 static double bar_err(int nbmin, int nbmax, const double *partsum)
1285 int nb,b;
1286 double svar,s,s2,dg;
1288 svar = 0;
1289 for(nb=nbmin; nb<=nbmax; nb++)
1291 s = 0;
1292 s2 = 0;
1293 for(b=0; b<nb; b++)
1295 dg = partsum[nb*(nbmax+1)+b];
1296 s += dg;
1297 s2 += dg*dg;
1299 s /= nb;
1300 s2 /= nb;
1301 svar += (s2 - s*s)/(nb - 1);
1304 return sqrt(svar/(nbmax + 1 - nbmin));
1308 static double legend2lambda(char *fn,const char *legend,gmx_bool bdhdl)
1310 double lambda=0;
1311 const char *ptr;
1313 if (legend == NULL)
1315 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1317 ptr = strrchr(legend,' ');
1318 if (( bdhdl && strstr(legend,"dH") == NULL) ||
1319 (!bdhdl && (strchr(legend,'D') == NULL ||
1320 strchr(legend,'H') == NULL)) ||
1321 ptr == NULL)
1323 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1325 if (sscanf(ptr,"%lf",&lambda) != 1)
1327 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1330 return lambda;
1333 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1335 gmx_bool bFound;
1336 char *ptr;
1338 bFound = FALSE;
1340 /* plain text lambda string */
1341 ptr = strstr(subtitle,"lambda");
1342 if (ptr == NULL)
1344 /* xmgrace formatted lambda string */
1345 ptr = strstr(subtitle,"\\xl\\f{}");
1347 if (ptr == NULL)
1349 /* xmgr formatted lambda string */
1350 ptr = strstr(subtitle,"\\8l\\4");
1352 if (ptr != NULL)
1354 ptr = strstr(ptr,"=");
1356 if (ptr != NULL)
1358 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1361 return bFound;
1364 static double filename2lambda(char *fn)
1366 double lambda;
1367 char *ptr,*endptr,*digitptr;
1368 int dirsep;
1369 ptr = fn;
1370 /* go to the end of the path string and search backward to find the last
1371 directory in the path which has to contain the value of lambda
1373 while (ptr[1] != '\0')
1375 ptr++;
1377 /* searching backward to find the second directory separator */
1378 dirsep = 0;
1379 digitptr = NULL;
1380 while (ptr >= fn)
1382 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1384 if (dirsep == 1) break;
1385 dirsep++;
1387 /* save the last position of a digit between the last two
1388 separators = in the last dirname */
1389 if (dirsep > 0 && isdigit(*ptr))
1391 digitptr = ptr;
1393 ptr--;
1395 if (!digitptr)
1397 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1398 " last directory in the path '%s' does not contain a number",fn);
1400 if (digitptr[-1] == '-')
1402 digitptr--;
1404 lambda = strtod(digitptr,&endptr);
1405 if (endptr == digitptr)
1407 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1410 return lambda;
1413 static void read_barsim_xvg(char *fn,double begin,double end,real temp,
1414 barsim_t *ba)
1416 int i;
1417 char *subtitle,**legend,*ptr;
1418 int np;
1420 barsim_init(ba);
1422 ba->filename = fn;
1424 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1425 if (!ba->y)
1427 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1429 ba->t = ba->y[0];
1431 snew(ba->np,ba->nset);
1432 for(i=0;i<ba->nset;i++)
1433 ba->np[i]=np;
1436 ba->temp = -1;
1437 if (subtitle != NULL)
1439 ptr = strstr(subtitle,"T =");
1440 if (ptr != NULL)
1442 ptr += 3;
1443 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1445 if (ba->temp <= 0)
1447 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1448 ba->temp,fn);
1453 if (ba->temp < 0)
1455 if (temp <= 0)
1457 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of g_bar",fn);
1459 ba->temp = temp;
1462 snew(ba->lambda,ba->nset-1);
1463 if (legend == NULL)
1465 /* Check if we have a single set, nset=2 means t and dH/dl */
1466 if (ba->nset == 2)
1468 /* Try to deduce lambda from the subtitle */
1469 if (subtitle != NULL &&
1470 !subtitle2lambda(subtitle,&ba->lambda[0]))
1472 /* Deduce lambda from the file name */
1473 ba->lambda[0] = filename2lambda(fn);
1476 else
1478 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1481 else
1483 for(i=0; i<ba->nset-1; i++)
1485 /* Read lambda from the legend */
1486 ba->lambda[i] = legend2lambda(fn,legend[i],i==0);
1490 /* Reorder the data */
1491 for(i=1; i<ba->nset; i++)
1493 ba->y[i-1] = ba->y[i];
1495 if (legend != NULL)
1497 for(i=0; i<ba->nset-1; i++)
1499 sfree(legend[i]);
1501 sfree(legend);
1503 ba->nset--;
1506 static void read_edr_rawdh(barsim_t *ba, t_enxblock *blk, int id,
1507 gmx_bool lambda_set, double starttime,
1508 double endtime)
1510 int j;
1511 gmx_bool allocated;
1513 if (starttime < 0 || endtime < 0)
1515 gmx_file("start time or end time <0. Probably due to wrong block order\n");
1519 /* check the block types etc. */
1520 if ( (blk->nsub < 2) ||
1521 (blk->sub[0].type != xdr_datatype_double) ||
1523 (blk->sub[1].type != xdr_datatype_float) &&
1524 (blk->sub[1].type != xdr_datatype_double)
1525 ) ||
1526 (blk->sub[0].nr < 1) )
1528 gmx_fatal(FARGS, "Unexpected block data in file %s", ba->filename);
1531 /* we assume the blocks come in the same order */
1532 if (!lambda_set)
1534 ba->lambda[id] = blk->sub[0].dval[0];
1536 else
1538 if (fabs(ba->lambda[id] - blk->sub[0].dval[0])>1e-12)
1540 gmx_fatal(FARGS, "lambdas change in %s", ba->filename);
1544 /* now make room for the data */
1545 if (ba->np[id] + blk->sub[1].nr > ba->np_alloc )
1547 ba->np_alloc = ba->np[id] + blk->sub[1].nr + 2;
1548 srenew(ba->t, ba->np_alloc);
1549 for(j=0;j<ba->nset;j++)
1551 srenew(ba->y[j], ba->np_alloc);
1553 allocated=TRUE;
1555 /* and copy the data */
1556 for(j=0;j<blk->sub[1].nr;j++)
1558 unsigned int index=ba->np[id];
1559 ba->np[id]++;
1560 if (allocated)
1561 ba->t[index] = ((endtime-starttime)*j)/blk->sub[1].nr;
1562 if (blk->sub[1].type == xdr_datatype_float)
1564 ba->y[id][index] = blk->sub[1].fval[j];
1566 else
1568 ba->y[id][index] = blk->sub[1].dval[j];
1573 static void read_edr_hist(barsim_t *ba, t_enxblock *blk, int id,
1574 gmx_bool lambda_set,
1575 double starttime, double endtime)
1577 int j;
1578 barhist_t *bh;
1580 if (starttime < 0 || endtime < 0)
1582 gmx_file("start time or end time <0. Probably due to wrong block order\n");
1585 /* check the block types etc. */
1586 if ( (blk->nsub < 3) ||
1587 (blk->sub[0].type != xdr_datatype_double) ||
1588 (blk->sub[1].type != xdr_datatype_large_int) ||
1589 (blk->sub[2].type != xdr_datatype_int) ||
1590 (blk->sub[0].nr < 2) ||
1591 (blk->sub[1].nr < 1) )
1593 gmx_fatal(FARGS, "Unexpected block data in file %s", ba->filename);
1596 if (ba->y != NULL)
1598 gmx_fatal(FARGS, "Can't have both histograms and raw delta U data in one file %s", ba->filename);
1601 if (!lambda_set)
1603 ba->lambda[id] = blk->sub[0].dval[0];
1605 else
1607 if (fabs(ba->lambda[id] - blk->sub[0].dval[0])>1e-12)
1609 gmx_fatal(FARGS, "lambdas change in %s: %g, %g", ba->filename,
1610 ba->lambda[id], blk->sub[0].dval[0]);
1613 if (blk->sub[2].nr > 0)
1615 /* make room for the data */
1616 if (ba->np[id] + 1 > ba->np_alloc)
1618 ba->np_alloc = ba->np[id] + 2;
1619 /*srenew(ba->t, ba->np_alloc);*/
1620 for(j=0;j<ba->nset;j++)
1622 srenew(ba->hists[j], ba->np_alloc);
1626 bh=&(ba->hists[id][ba->np[id]]);
1628 bh->dx = blk->sub[0].dval[1];
1629 bh->starttime = starttime;
1630 bh->endtime = endtime;
1631 bh->x0 = blk->sub[1].lval[0];
1633 bh->nbin = blk->sub[2].nr;
1634 bh->sum = 0;
1635 snew(bh->bin, bh->nbin);
1637 for(j=0;j<bh->nbin;j++)
1639 bh->bin[j] = (int)blk->sub[2].ival[j];
1640 bh->sum += bh->bin[j];
1643 ba->np[id]++;
1649 static void read_barsim_edr(char *fn,double begin,double end,real temp,
1650 barsim_t *ba)
1652 int i;
1653 ener_file_t fp;
1654 t_enxframe *fr;
1655 int nblocks=0;
1656 gmx_bool lambda_set=FALSE;
1657 int nre;
1658 gmx_enxnm_t *enm=NULL;
1661 barsim_init(ba);
1663 fp = open_enx(fn,"r");
1664 do_enxnms(fp,&nre,&enm);
1665 snew(fr, 1);
1667 ba->filename = fn;
1669 while(do_enx(fp, fr))
1671 /* count the data blocks */
1672 int nblocks_raw=0;
1673 int nblocks_hist=0;
1674 int nlam=0;
1675 int nb=0;
1676 double starttime=-1;
1677 double endtime=-1;
1679 for(i=0;i<fr->nblock;i++)
1681 if (fr->block[i].id == enxDHHIST )
1682 nblocks_hist++;
1683 if ( fr->block[i].id == enxDH )
1684 nblocks_raw++;
1685 if (fr->block[i].id == enxDHCOLL )
1686 nlam++;
1689 if (nblocks_raw > 0 && nblocks_hist > 0 )
1691 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
1693 if ((nblocks > 0 && (nblocks_raw+nblocks_hist)!=nblocks) || (nlam!=1 ))
1695 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
1696 fn, nblocks, nblocks_raw+nblocks_hist);
1699 nblocks=nblocks_raw + nblocks_hist;
1700 ba->nset=nblocks+1;
1702 if (!ba->lambda)
1703 snew(ba->lambda, ba->nset);
1704 if (!ba->np)
1706 snew(ba->np, ba->nset);
1707 for(i=0;i<ba->nset;i++)
1708 ba->np[i]=0.;
1710 if (!ba->y && nblocks_raw>0)
1712 snew(ba->y, ba->nset);
1713 for(i=0;i<ba->nset;i++)
1714 ba->y[i]=NULL;
1716 if (!ba->hists && nblocks_hist>0)
1718 snew(ba->hists, ba->nset);
1719 for(i=0;i<ba->nset;i++)
1720 ba->hists[i]=NULL;
1724 for(i=0;i<fr->nblock;i++)
1726 /* try to find the enxDHCOLL block */
1727 if (fr->block[i].id == enxDHCOLL)
1729 if ( (fr->block[i].nsub < 1) ||
1730 (fr->block[i].sub[0].type != xdr_datatype_double) ||
1731 (fr->block[i].sub[0].nr < 4))
1733 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
1736 ba->temp = fr->block[i].sub[0].dval[0];
1737 ba->lambda[0] = fr->block[i].sub[0].dval[1];
1738 starttime = fr->block[i].sub[0].dval[2];
1739 endtime = fr->block[i].sub[0].dval[3];
1742 if (fr->block[i].id == enxDH)
1745 read_edr_rawdh(ba, &(fr->block[i]), nb+1, lambda_set,
1746 starttime, endtime);
1747 nb++;
1750 if (fr->block[i].id == enxDHHIST)
1752 read_edr_hist(ba, &(fr->block[i]), nb+1, lambda_set,
1753 starttime, endtime);
1754 nb++;
1758 lambda_set=TRUE;
1760 fprintf(stderr, "\n");
1764 int gmx_bar(int argc,char *argv[])
1766 static const char *desc[] = {
1767 "g_bar calculates free energy difference estimates through ",
1768 "Bennett's acceptance ratio method. ",
1769 "Input option [TT]-f[tt] expects multiple dhdl files. ",
1770 "Two types of input files are supported:[BR]",
1771 "* Files with only one y-value, for such files it is assumed ",
1772 "that the y-value is dH/dlambda and that the Hamiltonian depends ",
1773 "linearly on lambda. The lambda value of the simulation is inferred ",
1774 "from the subtitle if present, otherwise from a number in the",
1775 "subdirectory in the file name.",
1776 "[BR]",
1777 "* Files with more than one y-value. The files should have columns ",
1778 "with dH/dlambda and Delta lambda. The lambda values are inferred ",
1779 "from the legends: ",
1780 "lambda of the simulation from the legend of dH/dlambda ",
1781 "and the foreign lambda's from the legends of Delta H.[PAR]",
1783 "The lambda of the simulation is parsed from dhdl.xvg file's legend ",
1784 "containing the string 'dH', the foreign lambda's from the legend ",
1785 "containing the capitalized letters 'D' and 'H'. The temperature ",
1786 "is parsed from the legend line containing 'T ='.[PAR]",
1788 "The free energy estimates are determined using BAR with bisection, ",
1789 "the precision of the output is set with [TT]-prec[tt]. ",
1790 "An error estimate taking into account time correlations ",
1791 "is made by splitting the data into blocks and determining ",
1792 "the free energy differences over those blocks and assuming ",
1793 "the blocks are independent. ",
1794 "The final error estimate is determined from the average variance ",
1795 "over 5 blocks. A range of blocks numbers for error estimation can ",
1796 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
1798 "The results are split in two parts: the last part contains the final ",
1799 "results in kJ/mol, together with the error estimate for each part ",
1800 "and the total. The first part contains detailed free energy ",
1801 "difference estimates and phase space overlap measures in units of ",
1802 "kT (together with their computed error estimate). The printed ",
1803 "values are:[BR]",
1804 "* lam_A: the lambda values for point A.[BR]",
1805 "* lam_B: the lambda values for point B.[BR]",
1806 "* DG: the free energy estimate.[BR]",
1807 "* s_A: an estimate of the relative entropy of B in A.[BR]",
1808 "* s_A: an estimate of the relative entropy of A in B.[BR]",
1809 "* stdev: an estimate expected per-sample standard deviation.[PAR]",
1811 "The relative entropy of both states in each other's ensemble can be ",
1812 "interpreted as a measure of phase space overlap: ",
1813 "the relative entropy s_A of the work samples of lambda_B in the ",
1814 "ensemble of lambda_A (and vice versa for s_B), is a ",
1815 "measure of the 'distance' between Boltzmann distributions of ",
1816 "the two states, that goes to zero for identical distributions. See ",
1817 "Wu & Kofke, J. Chem. Phys. 123 084109 (2009) for more information.",
1818 "[PAR]",
1819 "The estimate of the expected per-sample standard deviation, as given ",
1820 "in Bennett's original BAR paper: ",
1821 "Bennett, J. Comp. Phys. 22, p 245 (1976), Eq. 10 gives an estimate ",
1822 "of the quality of sampling (not directly of the actual statistical ",
1823 "error, because it assumes independent samples).[PAR]",
1826 static real begin=0,end=-1,temp=-1;
1827 int nd=2,nbmin=5,nbmax=5;
1828 int nbin=100;
1829 gmx_bool calc_s,calc_v;
1830 t_pargs pa[] = {
1831 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
1832 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
1833 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
1834 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
1835 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
1836 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
1837 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"}
1840 t_filenm fnm[] = {
1841 { efXVG, "-f", "dhdl", ffOPTRDMULT },
1842 { efXVG, "-o", "bar", ffOPTWR },
1843 { efXVG, "-oi", "barint", ffOPTWR },
1844 { efXVG, "-oh", "histogram", ffOPTWR },
1845 { efEDR, "-g", "energy", ffOPTRDMULT }
1847 #define NFILE asize(fnm)
1849 int f,i,j;
1850 int nf=0; /* file counter */
1851 int nbs;
1852 int nfile_tot; /* total number of input files */
1853 int nxvgfile=0;
1854 int nedrfile=0;
1855 char **fxvgnms;
1856 char **fedrnms;
1857 barsim_t *ba; /* the raw input data */
1858 barlambda_t *bl; /* the pre-processed lambda data (linked list head) */
1859 barres_t *results; /* the results */
1860 int nresults; /* number of results in results array */
1862 double *partsum;
1863 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
1864 FILE *fpb,*fpi;
1865 char lamformat[20];
1866 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
1867 char ktformat[STRLEN], sktformat[STRLEN];
1868 char kteformat[STRLEN], skteformat[STRLEN];
1869 output_env_t oenv;
1870 double kT, beta;
1871 gmx_bool result_OK=TRUE,bEE=TRUE;
1873 gmx_bool disc_err=FALSE;
1874 double sum_disc_err=0.; /* discretization error */
1875 gmx_bool histrange_err=FALSE;
1876 double sum_histrange_err=0.; /* histogram range error */
1877 double stat_err=0.; /* statistical error */
1879 CopyRight(stderr,argv[0]);
1880 parse_common_args(&argc,argv,
1881 PCA_CAN_VIEW,
1882 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
1884 if (opt2bSet("-f", NFILE, fnm))
1886 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
1888 if (opt2bSet("-g", NFILE, fnm))
1890 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
1893 nfile_tot = nxvgfile + nedrfile;
1895 if (nfile_tot == 0)
1897 gmx_fatal(FARGS,"No input files!");
1900 if (nd < 0)
1902 gmx_fatal(FARGS,"Can not have negative number of digits");
1904 prec = pow(10,-nd);
1906 snew(ba,nfile_tot);
1907 snew(results,nfile_tot-1);
1908 snew(partsum,(nbmax+1)*(nbmax+1));
1909 nf = 0;
1911 /* read in all files. First xvg files */
1912 for(f=0; f<nxvgfile; f++)
1914 read_barsim_xvg(fxvgnms[f],begin,end,temp,&ba[nf]);
1915 if (f > 0 && ba[nf].temp != ba[0].temp)
1917 printf("\nWARNING: temperature for file '%s' (%g) is not equal to that of file '%s' (%g)\n\n",fxvgnms[nf],ba[nf].temp,fxvgnms[0],ba[0].temp);
1920 if (ba[nf].nset == 0)
1922 gmx_fatal(FARGS,"File '%s' contains fewer than two columns",
1923 fxvgnms[nf]);
1925 nf++;
1927 /* then .edr files */
1928 for(f=0; f<nedrfile; f++)
1930 read_barsim_edr(fedrnms[f],begin,end,temp,&ba[nf]);
1931 if (nf > 0 && ba[nf].temp != ba[0].temp)
1933 printf("\nWARNING: temperature for file '%s' (%g) is not equal to that of file '%s' (%g)\n\n",fedrnms[nf],ba[nf].temp,fedrnms[0],ba[0].temp);
1936 if (ba[nf].nset == 0)
1938 gmx_fatal(FARGS,"File '%s' contains fewer than two columns",
1939 fedrnms[nf]);
1941 nf++;
1944 /* print input file data summary */
1945 for(i=0;i<nf;i++)
1947 int np;
1948 double begint, endt;
1949 if (ba[i].y)
1951 if (ba[i].nset>0)
1952 np=ba[i].np[1];
1953 else
1954 np=ba[i].np[0];
1956 begint=ba[i].t[0];
1957 endt=ba[i].t[np-1];
1959 else
1961 np=ba[i].np[1];
1962 begint=ba[i].hists[1][0].starttime;
1963 endt=ba[i].hists[1][np-1].endtime;
1965 printf("\n%s: %.1f - %.1f; lambdas:",ba[i].filename, begint, endt);
1967 for(j=0;j<ba[i].nset;j++)
1969 printf(" %.3f", ba[i].lambda[j]);
1970 if (ba[i].np[j]>0)
1972 if (ba[i].y)
1973 printf(" (%d pts)", ba[i].np[j]);
1974 if (ba[i].hists)
1975 printf(" (%d hists)", ba[i].np[j]);
1979 printf("\n");
1981 /* Sort the data sets on lambda */
1982 bl=barlambdas_list_create(ba, nfile_tot, &nbs, begin, end);
1984 if (opt2bSet("-oh",NFILE,fnm))
1986 barlambdas_histogram(bl, opt2fn("-oh",NFILE,fnm), nbin, oenv);
1989 /* assemble the output structures from the barlambdas */
1990 results=barres_list_create(bl, &nresults);
1992 sum_disc_err=barres_list_max_disc_err(results, nresults);
1993 if (sum_disc_err > prec)
1995 prec=sum_disc_err;
1996 nd = ceil(-log10(prec));
1997 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
1999 sprintf(lamformat,"%%6.3f");
2000 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2001 /* the format strings of the results in kT */
2002 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2003 sprintf( sktformat,"%%%ds",6+nd);
2004 /* the format strings of the errors in kT */
2005 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2006 sprintf( skteformat,"%%%ds",4+nd);
2007 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2008 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2012 fpb = NULL;
2013 if (opt2bSet("-o",NFILE,fnm))
2015 sprintf(buf,"%s (%s)","\\DeltaG",unit_energy);
2016 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2017 "\\lambda",buf,exvggtXYDY,oenv);
2020 fpi = NULL;
2021 if (opt2bSet("-oi",NFILE,fnm))
2023 sprintf(buf,"%s (%s)","\\DeltaG",unit_energy);
2024 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2025 "\\lambda",buf,oenv);
2030 if (nbmin > nbmax)
2031 nbmin=nbmax;
2033 /* first calculate results */
2034 bEE = TRUE;
2035 disc_err = FALSE;
2036 for(f=0; f<nresults; f++)
2038 /* Determine the free energy difference with a factor of 10
2039 * more accuracy than requested for printing.
2041 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2042 &bEE, partsum);
2044 if (results[f].dg_disc_err > prec/10.)
2045 disc_err=TRUE;
2046 if (results[f].dg_histrange_err > prec/10.)
2047 histrange_err=TRUE;
2050 /* print results in kT */
2051 kT = BOLTZ*ba[0].temp;
2052 beta = 1/kT;
2054 printf("\nTemperature: %g K\n", ba[0].temp);
2056 printf("\nDetailed results in kT (see help for explanation):\n\n");
2057 printf("%6s ", " lam_A");
2058 printf("%6s ", " lam_B");
2059 printf(sktformat, "DG ");
2060 if (bEE)
2061 printf(skteformat, "+/- ");
2062 if (disc_err)
2063 printf(skteformat, "disc ");
2064 if (histrange_err)
2065 printf(skteformat, "range ");
2066 printf(sktformat, "s_A ");
2067 if (bEE)
2068 printf(skteformat, "+/- " );
2069 printf(sktformat, "s_B ");
2070 if (bEE)
2071 printf(skteformat, "+/- " );
2072 printf(sktformat, "stdev ");
2073 if (bEE)
2074 printf(skteformat, "+/- ");
2075 printf("\n");
2076 for(f=0; f<nresults; f++)
2078 printf(lamformat, results[f].a->native_lambda);
2079 printf(" ");
2080 printf(lamformat, results[f].b->native_lambda);
2081 printf(" ");
2082 printf(ktformat, results[f].dg);
2083 printf(" ");
2084 if (bEE)
2086 printf(kteformat, results[f].dg_err);
2087 printf(" ");
2089 if (disc_err)
2091 printf(kteformat, results[f].dg_disc_err);
2092 printf(" ");
2094 if (histrange_err)
2096 printf(kteformat, results[f].dg_histrange_err);
2097 printf(" ");
2099 printf(ktformat, results[f].sa);
2100 printf(" ");
2101 if (bEE)
2103 printf(kteformat, results[f].sa_err);
2104 printf(" ");
2106 printf(ktformat, results[f].sb);
2107 printf(" ");
2108 if (bEE)
2110 printf(kteformat, results[f].sb_err);
2111 printf(" ");
2113 printf(ktformat, results[f].dg_stddev);
2114 printf(" ");
2115 if (bEE)
2117 printf(kteformat, results[f].dg_stddev_err);
2119 printf("\n");
2121 /* Check for negative relative entropy with a 95% certainty. */
2122 if (results[f].sa < -2*results[f].sa_err ||
2123 results[f].sb < -2*results[f].sb_err)
2125 result_OK=FALSE;
2129 if (!result_OK)
2131 printf("\nWARNING: Some of these results violate the Second Law of "
2132 "Thermodynamics: \n"
2133 " This is can be the result of severe undersampling, or "
2134 "(more likely)\n"
2135 " there is something wrong with the simulations.\n");
2139 /* final results in kJ/mol */
2140 printf("\n\nFinal results in kJ/mol:\n\n");
2141 dg_tot = 0;
2142 for(f=0; f<nresults; f++)
2145 if (fpi != NULL)
2147 fprintf(fpi, xvg2format, ba[f].lambda[0], dg_tot);
2151 if (fpb != NULL)
2153 fprintf(fpb, xvg3format,
2154 0.5*(ba[f].lambda[0] + ba[f+1].lambda[0]),
2155 results[f].dg,results[f].dg_err);
2158 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2159 results[f].lambda_b);*/
2160 printf("lambda ");
2161 printf(lamformat, results[f].a->native_lambda);
2162 printf(" - ");
2163 printf(lamformat, results[f].b->native_lambda);
2164 printf(", DG ");
2166 printf(dgformat,results[f].dg*kT);
2167 if (bEE)
2169 printf(" +/- ");
2170 printf(dgformat,results[f].dg_err*kT);
2172 if (histrange_err)
2174 printf(" (max. range err. = ");
2175 printf(dgformat,results[f].dg_histrange_err*kT);
2176 printf(")");
2177 sum_histrange_err += results[f].dg_histrange_err*kT;
2180 printf("\n");
2181 dg_tot += results[f].dg;
2183 printf("\n");
2184 printf("total ");
2185 printf(lamformat, ba[0].lambda[0]);
2186 printf(" - ");
2187 printf(lamformat, ba[nfile_tot-1].lambda[0]);
2188 printf(", DG ");
2190 printf(dgformat,dg_tot*kT);
2191 if (bEE)
2193 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2194 printf(" +/- ");
2195 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2197 printf("\n");
2198 if (disc_err)
2200 printf("\nmaximum discretization error = ");
2201 printf(dgformat,sum_disc_err);
2202 if (bEE && stat_err < sum_disc_err)
2204 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2207 if (histrange_err)
2209 printf("\nmaximum histogram range error = ");
2210 printf(dgformat,sum_histrange_err);
2211 if (bEE && stat_err < sum_histrange_err)
2213 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2217 printf("\n");
2220 if (fpi != NULL)
2222 fprintf(fpi, xvg2format,
2223 ba[nfile_tot-1].lambda[0], dg_tot);
2224 ffclose(fpi);
2227 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2228 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");
2230 thanx(stderr);
2232 return 0;