Fixes #882 - looping bug in trxio.c
[gromacs.git] / src / tools / gmx_bar.c
bloba77d0cf2dec9d3788fdde7375c1bb31315e2629e
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include <ctype.h>
42 #include <math.h>
43 #include <float.h>
45 #include "sysstuff.h"
46 #include "typedefs.h"
47 #include "smalloc.h"
48 #include "futil.h"
49 #include "statutil.h"
50 #include "copyrite.h"
51 #include "macros.h"
52 #include "enxio.h"
53 #include "physics.h"
54 #include "gmx_fatal.h"
55 #include "xvgr.h"
56 #include "gmx_ana.h"
57 #include "maths.h"
59 /* Suppress Cygwin compiler warnings from using newlib version of
60 * ctype.h */
61 #ifdef GMX_CYGWIN
62 #undef isdigit
63 #endif
65 /* the dhdl.xvg data from a simulation (actually obsolete, but still
66 here for reading the dhdl.xvg file*/
67 typedef struct xvg_t
69 char *filename;
70 int ftp; /* file type */
71 int nset; /* number of lambdas, including dhdl */
72 int *np; /* number of data points (du or hists) per lambda */
73 int np_alloc; /* number of points (du or hists) allocated */
74 double temp; /* temperature */
75 double *lambda; /* the lambdas (of first index for y). */
76 double *t; /* the times (of second index for y) */
77 double **y; /* the dU values. y[0] holds the derivative, while
78 further ones contain the energy differences between
79 the native lambda and the 'foreign' lambdas. */
81 double native_lambda; /* the native lambda */
83 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
84 } xvg_t;
88 typedef struct hist_t
90 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
91 double dx[2]; /* the histogram spacing. The reverse
92 dx is the negative of the forward dx.*/
93 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
94 point(s) as int */
96 int nbin[2]; /* the (forward+reverse) number of bins */
97 gmx_large_int_t sum; /* the total number of counts. Must be
98 the same for forward + reverse. */
99 int nhist; /* number of hist datas (forward or reverse) */
101 double start_time, delta_time; /* start time, end time of histogram */
102 } hist_t;
105 /* an aggregate of samples for partial free energy calculation */
106 typedef struct samples_t
108 double native_lambda;
109 double foreign_lambda;
110 double temp; /* the temperature */
111 gmx_bool derivative; /* whether this sample is a derivative */
113 /* The samples come either as either delta U lists: */
114 int ndu; /* the number of delta U samples */
115 double *du; /* the delta u's */
116 double *t; /* the times associated with those samples, or: */
117 double start_time,delta_time;/*start time and delta time for linear time*/
119 /* or as histograms: */
120 hist_t *hist; /* a histogram */
122 /* allocation data: (not NULL for data 'owned' by this struct) */
123 double *du_alloc, *t_alloc; /* allocated delta u arrays */
124 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
125 hist_t *hist_alloc; /* allocated hist */
127 gmx_large_int_t ntot; /* total number of samples */
128 const char *filename; /* the file name this sample comes from */
129 } samples_t;
131 /* a sample range (start to end for du-style data, or boolean
132 for both du-style data and histograms */
133 typedef struct sample_range_t
135 int start, end; /* start and end index for du style data */
136 gmx_bool use; /* whether to use this sample */
138 samples_t *s; /* the samples this range belongs to */
139 } sample_range_t;
142 /* a collection of samples for a partial free energy calculation
143 (i.e. the collection of samples from one native lambda to one
144 foreign lambda) */
145 typedef struct sample_coll_t
147 double native_lambda; /* these should be the same for all samples in the */
148 double foreign_lambda; /* collection */
149 double temp; /* the temperature */
151 int nsamples; /* the number of samples */
152 samples_t **s; /* the samples themselves */
153 sample_range_t *r; /* the sample ranges */
154 int nsamples_alloc; /* number of allocated samples */
156 gmx_large_int_t ntot; /* total number of samples in the ranges of
157 this collection */
159 struct sample_coll_t *next, *prev; /* next and previous in the list */
160 } sample_coll_t;
162 /* all the samples associated with a lambda point */
163 typedef struct lambda_t
165 double lambda; /* the native lambda (at start time if dynamic) */
166 double temp; /* temperature */
168 sample_coll_t *sc; /* the samples */
170 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
172 struct lambda_t *next, *prev; /* the next and prev in the list */
173 } lambda_t;
176 /* calculated values. */
177 typedef struct {
178 sample_coll_t *a, *b; /* the simulation data */
180 double dg; /* the free energy difference */
181 double dg_err; /* the free energy difference */
183 double dg_disc_err; /* discretization error */
184 double dg_histrange_err; /* histogram range error */
186 double sa; /* relative entropy of b in state a */
187 double sa_err; /* error in sa */
188 double sb; /* relative entropy of a in state b */
189 double sb_err; /* error in sb */
191 double dg_stddev; /* expected dg stddev per sample */
192 double dg_stddev_err; /* error in dg_stddev */
193 } barres_t;
198 static void hist_init(hist_t *h, int nhist, int *nbin)
200 int i;
201 if (nhist>2)
203 gmx_fatal(FARGS, "histogram with more than two sets of data!");
205 for(i=0;i<nhist;i++)
207 snew(h->bin[i], nbin[i]);
208 h->x0[i]=0;
209 h->nbin[i]=nbin[i];
210 h->start_time=h->delta_time=0;
211 h->dx[i]=0;
213 h->sum=0;
214 h->nhist=nhist;
217 static void hist_destroy(hist_t *h)
219 sfree(h->bin);
223 static void xvg_init(xvg_t *ba)
225 ba->filename=NULL;
226 ba->nset=0;
227 ba->np_alloc=0;
228 ba->np=NULL;
229 ba->y=NULL;
232 static void samples_init(samples_t *s, double native_lambda,
233 double foreign_lambda, double temp,
234 gmx_bool derivative, const char *filename)
236 s->native_lambda=native_lambda;
237 s->foreign_lambda=foreign_lambda;
238 s->temp=temp;
239 s->derivative=derivative;
241 s->ndu=0;
242 s->du=NULL;
243 s->t=NULL;
244 s->start_time = s->delta_time = 0;
245 s->hist=NULL;
246 s->du_alloc=NULL;
247 s->t_alloc=NULL;
248 s->hist_alloc=NULL;
249 s->ndu_alloc=0;
250 s->nt_alloc=0;
252 s->ntot=0;
253 s->filename=filename;
256 /* destroy the data structures directly associated with the structure, not
257 the data it points to */
258 static void samples_destroy(samples_t *s)
260 if (s->du_alloc)
262 sfree(s->du_alloc);
264 if (s->t_alloc)
266 sfree(s->t_alloc);
268 if (s->hist_alloc)
270 hist_destroy(s->hist_alloc);
271 sfree(s->hist_alloc);
275 static void sample_range_init(sample_range_t *r, samples_t *s)
277 r->start=0;
278 r->end=s->ndu;
279 r->use=TRUE;
280 r->s=NULL;
283 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
284 double foreign_lambda, double temp)
286 sc->native_lambda = native_lambda;
287 sc->foreign_lambda = foreign_lambda;
288 sc->temp = temp;
290 sc->nsamples=0;
291 sc->s=NULL;
292 sc->r=NULL;
293 sc->nsamples_alloc=0;
295 sc->ntot=0;
296 sc->next=sc->prev=NULL;
299 static void sample_coll_destroy(sample_coll_t *sc)
301 /* don't free the samples themselves */
302 sfree(sc->r);
303 sfree(sc->s);
307 static void lambda_init(lambda_t *l, double native_lambda, double temp)
309 l->lambda=native_lambda;
310 l->temp=temp;
312 l->next=NULL;
313 l->prev=NULL;
315 l->sc=&(l->sc_head);
317 sample_coll_init(l->sc, native_lambda, 0., 0.);
318 l->sc->next=l->sc;
319 l->sc->prev=l->sc;
322 static void barres_init(barres_t *br)
324 br->dg=0;
325 br->dg_err=0;
326 br->sa=0;
327 br->sa_err=0;
328 br->sb=0;
329 br->sb_err=0;
330 br->dg_stddev=0;
331 br->dg_stddev_err=0;
333 br->a=NULL;
334 br->b=NULL;
340 static gmx_bool lambda_same(double lambda1, double lambda2)
342 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
345 /* calculate the total number of samples in a sample collection */
346 static void sample_coll_calc_ntot(sample_coll_t *sc)
348 int i;
350 sc->ntot=0;
351 for(i=0;i<sc->nsamples;i++)
353 if (sc->r[i].use)
355 if (sc->s[i]->hist)
357 sc->ntot += sc->s[i]->ntot;
359 else
361 sc->ntot += sc->r[i].end - sc->r[i].start;
368 /* find the barsamples_t associated with a lambda that corresponds to
369 a specific foreign lambda */
370 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
371 double foreign_lambda)
373 sample_coll_t *sc=l->sc->next;
375 while(sc != l->sc)
377 if (lambda_same(sc->foreign_lambda,foreign_lambda))
379 return sc;
381 sc=sc->next;
384 return NULL;
387 /* insert li into an ordered list of lambda_colls */
388 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
390 sample_coll_t *scn=l->sc->next;
391 while ( (scn!=l->sc) )
393 if (scn->foreign_lambda > sc->foreign_lambda)
394 break;
395 scn=scn->next;
397 /* now insert it before the found scn */
398 sc->next=scn;
399 sc->prev=scn->prev;
400 scn->prev->next=sc;
401 scn->prev=sc;
404 /* insert li into an ordered list of lambdas */
405 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
407 lambda_t *lc=head->next;
408 while (lc!=head)
410 if (lc->lambda > li->lambda)
411 break;
412 lc=lc->next;
414 /* now insert ourselves before the found lc */
415 li->next=lc;
416 li->prev=lc->prev;
417 lc->prev->next=li;
418 lc->prev=li;
421 /* insert a sample and a sample_range into a sample_coll. The
422 samples are stored as a pointer, the range is copied. */
423 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
424 sample_range_t *r)
426 /* first check if it belongs here */
427 if (sc->temp != s->temp)
429 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
430 s->filename, sc->next->s[0]->filename);
432 if (sc->native_lambda != s->native_lambda)
434 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
435 s->filename, sc->next->s[0]->filename);
437 if (sc->foreign_lambda != s->foreign_lambda)
439 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
440 s->filename, sc->next->s[0]->filename);
443 /* check if there's room */
444 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
446 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
447 srenew(sc->s, sc->nsamples_alloc);
448 srenew(sc->r, sc->nsamples_alloc);
450 sc->s[sc->nsamples]=s;
451 sc->r[sc->nsamples]=*r;
452 sc->nsamples++;
454 sample_coll_calc_ntot(sc);
457 /* insert a sample into a lambda_list, creating the right sample_coll if
458 neccesary */
459 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
461 gmx_bool found=FALSE;
462 sample_coll_t *sc;
463 sample_range_t r;
465 lambda_t *l=head->next;
467 /* first search for the right lambda_t */
468 while(l != head)
470 if (lambda_same(l->lambda, s->native_lambda) )
472 found=TRUE;
473 break;
475 l=l->next;
478 if (!found)
480 snew(l, 1); /* allocate a new one */
481 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
482 lambda_insert_lambda(head, l); /* add it to the list */
485 /* now look for a sample collection */
486 sc=lambda_find_sample_coll(l, s->foreign_lambda);
487 if (!sc)
489 snew(sc, 1); /* allocate a new one */
490 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
491 lambda_insert_sample_coll(l, sc);
494 /* now insert the samples into the sample coll */
495 sample_range_init(&r, s);
496 sample_coll_insert_sample(sc, s, &r);
500 /* make a histogram out of a sample collection */
501 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
502 int *nbin_alloc, int *nbin,
503 double *dx, double *xmin, int nbin_default)
505 int i,j,k;
506 gmx_bool dx_set=FALSE;
507 gmx_bool xmin_set=FALSE;
509 gmx_bool xmax_set=FALSE;
510 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
511 limits of a histogram */
512 double xmax=-1;
514 /* first determine dx and xmin; try the histograms */
515 for(i=0;i<sc->nsamples;i++)
517 if (sc->s[i]->hist)
519 hist_t *hist=sc->s[i]->hist;
520 for(k=0;k<hist->nhist;k++)
522 double hdx=hist->dx[k];
523 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
525 /* we use the biggest dx*/
526 if ( (!dx_set) || hist->dx[0] > *dx)
528 dx_set=TRUE;
529 *dx = hist->dx[0];
531 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
533 xmin_set=TRUE;
534 *xmin = (hist->x0[k]*hdx);
537 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
539 xmax_set=TRUE;
540 xmax = xmax_now;
541 if (hist->bin[k][hist->nbin[k]-1] != 0)
542 xmax_set_hard=TRUE;
544 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
546 xmax_set_hard=TRUE;
547 xmax = xmax_now;
552 /* and the delta us */
553 for(i=0;i<sc->nsamples;i++)
555 if (sc->s[i]->ndu > 0)
557 /* determine min and max */
558 int starti=sc->r[i].start;
559 int endi=sc->r[i].end;
560 double du_xmin=sc->s[i]->du[starti];
561 double du_xmax=sc->s[i]->du[starti];
562 for(j=starti+1;j<endi;j++)
564 if (sc->s[i]->du[j] < du_xmin)
565 du_xmin = sc->s[i]->du[j];
566 if (sc->s[i]->du[j] > du_xmax)
567 du_xmax = sc->s[i]->du[j];
570 /* and now change the limits */
571 if ( (!xmin_set) || (du_xmin < *xmin) )
573 xmin_set=TRUE;
574 *xmin=du_xmin;
576 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
578 xmax_set=TRUE;
579 xmax=du_xmax;
584 if (!xmax_set || !xmin_set)
586 *nbin=0;
587 return;
591 if (!dx_set)
593 *nbin=nbin_default;
594 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
595 be 0, and we count from 0 */
597 else
599 *nbin=(xmax-(*xmin))/(*dx);
602 if (*nbin > *nbin_alloc)
604 *nbin_alloc=*nbin;
605 srenew(*bin, *nbin_alloc);
608 /* reset the histogram */
609 for(i=0;i<(*nbin);i++)
611 (*bin)[i] = 0;
614 /* now add the actual data */
615 for(i=0;i<sc->nsamples;i++)
617 if (sc->s[i]->hist)
619 hist_t *hist=sc->s[i]->hist;
620 for(k=0;k<hist->nhist;k++)
622 double hdx = hist->dx[k];
623 double xmin_hist=hist->x0[k]*hdx;
624 for(j=0;j<hist->nbin[k];j++)
626 /* calculate the bin corresponding to the middle of the
627 original bin */
628 double x=hdx*(j+0.5) + xmin_hist;
629 int binnr=(int)((x-(*xmin))/(*dx));
631 if (binnr >= *nbin || binnr<0)
632 binnr = (*nbin)-1;
634 (*bin)[binnr] += hist->bin[k][j];
638 else
640 int starti=sc->r[i].start;
641 int endi=sc->r[i].end;
642 for(j=starti;j<endi;j++)
644 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
645 if (binnr >= *nbin || binnr<0)
646 binnr = (*nbin)-1;
648 (*bin)[binnr] ++;
654 /* write a collection of histograms to a file */
655 void lambdas_histogram(lambda_t *bl_head, const char *filename,
656 int nbin_default, const output_env_t oenv)
658 char label_x[STRLEN];
659 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
660 const char *title="N(\\DeltaH)";
661 const char *label_y="Samples";
662 FILE *fp;
663 lambda_t *bl;
664 int nsets=0;
665 char **setnames=NULL;
666 gmx_bool first_set=FALSE;
667 /* histogram data: */
668 int *hist=NULL;
669 int nbin=0;
670 int nbin_alloc=0;
671 double dx=0;
672 double min=0;
673 int i;
675 printf("\nWriting histogram to %s\n", filename);
676 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
678 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
680 /* first get all the set names */
681 bl=bl_head->next;
682 /* iterate over all lambdas */
683 while(bl!=bl_head)
685 sample_coll_t *sc=bl->sc->next;
687 /* iterate over all samples */
688 while(sc!=bl->sc)
690 nsets++;
691 srenew(setnames, nsets);
692 snew(setnames[nsets-1], STRLEN);
693 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
695 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
696 deltag, lambda, sc->foreign_lambda, lambda,
697 sc->native_lambda);
699 else
701 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
702 dhdl, lambda, sc->native_lambda);
704 sc=sc->next;
707 bl=bl->next;
709 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
712 /* now make the histograms */
713 bl=bl_head->next;
714 /* iterate over all lambdas */
715 while(bl!=bl_head)
717 sample_coll_t *sc=bl->sc->next;
719 /* iterate over all samples */
720 while(sc!=bl->sc)
722 if (!first_set)
724 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
727 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
728 nbin_default);
730 for(i=0;i<nbin;i++)
732 double xmin=i*dx + min;
733 double xmax=(i+1)*dx + min;
735 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
738 first_set=FALSE;
739 sc=sc->next;
742 bl=bl->next;
745 if(hist)
746 sfree(hist);
748 xvgrclose(fp);
751 /* create a collection (array) of barres_t object given a ordered linked list
752 of barlamda_t sample collections */
753 static barres_t *barres_list_create(lambda_t *bl_head, int *nres)
755 lambda_t *bl;
756 int nlambda=0;
757 barres_t *res;
758 int i;
759 gmx_bool dhdl=FALSE;
760 gmx_bool first=TRUE;
762 /* first count the lambdas */
763 bl=bl_head->next;
764 while(bl!=bl_head)
766 nlambda++;
767 bl=bl->next;
769 snew(res, nlambda-1);
771 /* next put the right samples in the res */
772 *nres=0;
773 bl=bl_head->next->next; /* we start with the second one. */
774 while(bl!=bl_head)
776 sample_coll_t *sc, *scprev;
777 barres_t *br=&(res[*nres]);
778 /* there is always a previous one. we search for that as a foreign
779 lambda: */
780 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
781 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
783 barres_init(br);
785 if (!scprev && !sc)
787 /* we use dhdl */
789 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
790 sc=lambda_find_sample_coll(bl, bl->lambda);
792 if (first)
794 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
795 dhdl=TRUE;
797 if (!dhdl)
799 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
803 /* normal delta H */
804 if (!scprev)
806 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->lambda,bl->prev->lambda);
808 if (!sc)
810 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->prev->lambda,bl->lambda);
812 br->a = scprev;
813 br->b = sc;
815 first=FALSE;
816 (*nres)++;
817 bl=bl->next;
819 return res;
822 /* estimate the maximum discretization error */
823 static double barres_list_max_disc_err(barres_t *res, int nres)
825 int i,j;
826 double disc_err=0.;
827 double delta_lambda;
829 for(i=0;i<nres;i++)
831 barres_t *br=&(res[i]);
833 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
835 for(j=0;j<br->a->nsamples;j++)
837 if (br->a->s[j]->hist)
839 double Wfac=1.;
840 if (br->a->s[j]->derivative)
841 Wfac = delta_lambda;
843 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
846 for(j=0;j<br->b->nsamples;j++)
848 if (br->b->s[j]->hist)
850 double Wfac=1.;
851 if (br->b->s[j]->derivative)
852 Wfac = delta_lambda;
853 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
857 return disc_err;
861 /* impose start and end times on a sample collection, updating sample_ranges */
862 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
863 double end_t)
865 int i;
866 for(i=0;i<sc->nsamples;i++)
868 samples_t *s=sc->s[i];
869 sample_range_t *r=&(sc->r[i]);
870 if (s->hist)
872 double end_time=s->hist->delta_time*s->hist->sum +
873 s->hist->start_time;
874 if (s->hist->start_time < begin_t || end_time > end_t)
876 r->use=FALSE;
879 else
881 if (!s->t)
883 double end_time;
884 if (s->start_time < begin_t)
886 r->start=(int)((begin_t - s->start_time)/s->delta_time);
888 end_time=s->delta_time*s->ndu + s->start_time;
889 if (end_time > end_t)
891 r->end=(int)((end_t - s->start_time)/s->delta_time);
894 else
896 int j;
897 for(j=0;j<s->ndu;j++)
899 if (s->t[j] <begin_t)
901 r->start = j;
904 if (s->t[j] >= end_t)
906 r->end = j;
907 break;
911 if (r->start > r->end)
913 r->use=FALSE;
917 sample_coll_calc_ntot(sc);
920 static void lambdas_impose_times(lambda_t *head, double begin, double end)
922 double first_t, last_t;
923 double begin_t, end_t;
924 lambda_t *lc;
925 int j;
927 if (begin<=0 && end<0)
929 return;
932 /* first determine the global start and end times */
933 first_t = -1;
934 last_t = -1;
935 lc=head->next;
936 while(lc!=head)
938 sample_coll_t *sc=lc->sc->next;
939 while(sc != lc->sc)
941 for(j=0;j<sc->nsamples;j++)
943 double start_t,end_t;
945 start_t = sc->s[j]->start_time;
946 end_t = sc->s[j]->start_time;
947 if (sc->s[j]->hist)
949 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
951 else
953 if (sc->s[j]->t)
955 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
957 else
959 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
963 if (start_t < first_t || first_t<0)
965 first_t=start_t;
967 if (end_t > last_t)
969 last_t=end_t;
972 sc=sc->next;
974 lc=lc->next;
977 /* calculate the actual times */
978 if (begin > 0 )
980 begin_t = begin;
982 else
984 begin_t = first_t;
987 if (end >0 )
989 end_t = end;
991 else
993 end_t = last_t;
995 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
997 if (begin_t > end_t)
999 return;
1001 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1003 /* then impose them */
1004 lc=head->next;
1005 while(lc!=head)
1007 sample_coll_t *sc=lc->sc->next;
1008 while(sc != lc->sc)
1010 sample_coll_impose_times(sc, begin_t, end_t);
1011 sc=sc->next;
1013 lc=lc->next;
1018 /* create subsample i out of ni from an existing sample_coll */
1019 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1020 sample_coll_t *sc_orig,
1021 int i, int ni)
1023 int j;
1024 int hist_start, hist_end;
1026 gmx_large_int_t ntot_start;
1027 gmx_large_int_t ntot_end;
1028 gmx_large_int_t ntot_so_far;
1030 *sc = *sc_orig; /* just copy all fields */
1032 /* allocate proprietary memory */
1033 snew(sc->s, sc_orig->nsamples);
1034 snew(sc->r, sc_orig->nsamples);
1036 /* copy the samples */
1037 for(j=0;j<sc_orig->nsamples;j++)
1039 sc->s[j] = sc_orig->s[j];
1040 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1043 /* now fix start and end fields */
1044 /* the casts avoid possible overflows */
1045 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1046 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1047 ntot_so_far = 0;
1048 for(j=0;j<sc->nsamples;j++)
1050 gmx_large_int_t ntot_add;
1051 gmx_large_int_t new_start, new_end;
1053 if (sc->r[j].use)
1055 if (sc->s[j]->hist)
1057 ntot_add = sc->s[j]->hist->sum;
1059 else
1061 ntot_add = sc->r[j].end - sc->r[j].start;
1064 else
1066 ntot_add = 0;
1069 if (!sc->s[j]->hist)
1071 if (ntot_so_far < ntot_start)
1073 /* adjust starting point */
1074 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1076 else
1078 new_start = sc->r[j].start;
1080 /* adjust end point */
1081 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1082 if (new_end > sc->r[j].end)
1084 new_end=sc->r[j].end;
1087 /* check if we're in range at all */
1088 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1090 new_start=0;
1091 new_end=0;
1093 /* and write the new range */
1094 sc->r[j].start=(int)new_start;
1095 sc->r[j].end=(int)new_end;
1097 else
1099 if (sc->r[j].use)
1101 double overlap;
1102 double ntot_start_norm, ntot_end_norm;
1103 /* calculate the amount of overlap of the
1104 desired range (ntot_start -- ntot_end) onto
1105 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1107 /* first calculate normalized bounds
1108 (where 0 is the start of the hist range, and 1 the end) */
1109 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1110 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1112 /* now fix the boundaries */
1113 ntot_start_norm = min(1, max(0., ntot_start_norm));
1114 ntot_end_norm = max(0, min(1., ntot_end_norm));
1116 /* and calculate the overlap */
1117 overlap = ntot_end_norm - ntot_start_norm;
1119 if (overlap > 0.95) /* we allow for 5% slack */
1121 sc->r[j].use = TRUE;
1123 else if (overlap < 0.05)
1125 sc->r[j].use = FALSE;
1127 else
1129 return FALSE;
1133 ntot_so_far += ntot_add;
1135 sample_coll_calc_ntot(sc);
1137 return TRUE;
1140 /* calculate minimum and maximum work values in sample collection */
1141 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1142 double *Wmin, double *Wmax)
1144 int i,j;
1146 *Wmin=FLT_MAX;
1147 *Wmax=-FLT_MAX;
1149 for(i=0;i<sc->nsamples;i++)
1151 samples_t *s=sc->s[i];
1152 sample_range_t *r=&(sc->r[i]);
1153 if (r->use)
1155 if (!s->hist)
1157 for(j=r->start; j<r->end; j++)
1159 *Wmin = min(*Wmin,s->du[j]*Wfac);
1160 *Wmax = max(*Wmax,s->du[j]*Wfac);
1163 else
1165 int hd=0; /* determine the histogram direction: */
1166 double dx;
1167 if ( (s->hist->nhist>1) && (Wfac<0) )
1169 hd=1;
1171 dx=s->hist->dx[hd];
1173 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1175 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1176 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1177 /* look for the highest value bin with values */
1178 if (s->hist->bin[hd][j]>0)
1180 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1181 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1182 break;
1191 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1193 int i;
1194 double sum;
1196 sum = 0;
1198 for(i=0; i<n; i++)
1200 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1203 return sum;
1206 /* calculate the BAR average given a histogram
1208 if type== 0, calculate the best estimate for the average,
1209 if type==-1, calculate the minimum possible value given the histogram
1210 if type== 1, calculate the maximum possible value given the histogram */
1211 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1212 int type)
1214 double sum=0.;
1215 int i;
1216 int max;
1217 /* normalization factor multiplied with bin width and
1218 number of samples (we normalize through M): */
1219 double normdx = 1.;
1220 int hd=0; /* determine the histogram direction: */
1221 double dx;
1223 if ( (hist->nhist>1) && (Wfac<0) )
1225 hd=1;
1227 dx=hist->dx[hd];
1228 max=hist->nbin[hd]-1;
1229 if (type==1)
1231 max=hist->nbin[hd]; /* we also add whatever was out of range */
1234 for(i=0;i<max;i++)
1236 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1237 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1239 sum += pxdx/(1. + exp(x + sbMmDG));
1242 return sum;
1245 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1246 double temp, double tol, int type)
1248 double kT,beta,M;
1249 double DG;
1250 int i,j;
1251 double Wfac1,Wfac2,Wmin,Wmax;
1252 double DG0,DG1,DG2,dDG1;
1253 double sum1,sum2;
1254 double n1, n2; /* numbers of samples as doubles */
1256 kT = BOLTZ*temp;
1257 beta = 1/kT;
1259 /* count the numbers of samples */
1260 n1 = ca->ntot;
1261 n2 = cb->ntot;
1263 M = log(n1/n2);
1265 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1267 /* this is the case when the delta U were calculated directly
1268 (i.e. we're not scaling dhdl) */
1269 Wfac1 = beta;
1270 Wfac2 = beta;
1272 else
1274 /* we're using dhdl, so delta_lambda needs to be a
1275 multiplication factor. */
1276 double delta_lambda=cb->native_lambda-ca->native_lambda;
1277 Wfac1 = beta*delta_lambda;
1278 Wfac2 = -beta*delta_lambda;
1281 if (beta < 1)
1283 /* We print the output both in kT and kJ/mol.
1284 * Here we determine DG in kT, so when beta < 1
1285 * the precision has to be increased.
1287 tol *= beta;
1290 /* Calculate minimum and maximum work to give an initial estimate of
1291 * delta G as their average.
1294 double Wmin1, Wmin2, Wmax1, Wmax2;
1295 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1296 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1298 Wmin=min(Wmin1, Wmin2);
1299 Wmax=max(Wmax1, Wmax2);
1302 DG0 = Wmin;
1303 DG2 = Wmax;
1305 if (debug)
1307 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1309 /* We approximate by bisection: given our initial estimates
1310 we keep checking whether the halfway point is greater or
1311 smaller than what we get out of the BAR averages.
1313 For the comparison we can use twice the tolerance. */
1314 while (DG2 - DG0 > 2*tol)
1316 DG1 = 0.5*(DG0 + DG2);
1318 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1319 DG1);*/
1321 /* calculate the BAR averages */
1322 dDG1=0.;
1324 for(i=0;i<ca->nsamples;i++)
1326 samples_t *s=ca->s[i];
1327 sample_range_t *r=&(ca->r[i]);
1328 if (r->use)
1330 if (s->hist)
1332 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1334 else
1336 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1337 Wfac1, (M-DG1));
1341 for(i=0;i<cb->nsamples;i++)
1343 samples_t *s=cb->s[i];
1344 sample_range_t *r=&(cb->r[i]);
1345 if (r->use)
1347 if (s->hist)
1349 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1351 else
1353 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1354 Wfac2, -(M-DG1));
1359 if (dDG1 < 0)
1361 DG0 = DG1;
1363 else
1365 DG2 = DG1;
1367 if (debug)
1369 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1373 return 0.5*(DG0 + DG2);
1376 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1377 double temp, double dg, double *sa, double *sb)
1379 int i,j;
1380 double W_ab=0.;
1381 double W_ba=0.;
1382 double kT, beta;
1383 double Wfac1, Wfac2;
1384 double n1,n2;
1386 kT = BOLTZ*temp;
1387 beta = 1/kT;
1389 /* count the numbers of samples */
1390 n1 = ca->ntot;
1391 n2 = cb->ntot;
1393 /* to ensure the work values are the same as during the delta_G */
1394 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1396 /* this is the case when the delta U were calculated directly
1397 (i.e. we're not scaling dhdl) */
1398 Wfac1 = beta;
1399 Wfac2 = beta;
1401 else
1403 /* we're using dhdl, so delta_lambda needs to be a
1404 multiplication factor. */
1405 double delta_lambda=cb->native_lambda-ca->native_lambda;
1406 Wfac1 = beta*delta_lambda;
1407 Wfac2 = -beta*delta_lambda;
1410 /* first calculate the average work in both directions */
1411 for(i=0;i<ca->nsamples;i++)
1413 samples_t *s=ca->s[i];
1414 sample_range_t *r=&(ca->r[i]);
1415 if (r->use)
1417 if (!s->hist)
1419 for(j=r->start;j<r->end;j++)
1420 W_ab += Wfac1*s->du[j];
1422 else
1424 /* normalization factor multiplied with bin width and
1425 number of samples (we normalize through M): */
1426 double normdx = 1.;
1427 double dx;
1428 int hd=0; /* histogram direction */
1429 if ( (s->hist->nhist>1) && (Wfac1<0) )
1431 hd=1;
1433 dx=s->hist->dx[hd];
1435 for(j=0;j<s->hist->nbin[0];j++)
1437 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1438 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1439 W_ab += pxdx*x;
1444 W_ab/=n1;
1446 for(i=0;i<cb->nsamples;i++)
1448 samples_t *s=cb->s[i];
1449 sample_range_t *r=&(cb->r[i]);
1450 if (r->use)
1452 if (!s->hist)
1454 for(j=r->start;j<r->end;j++)
1455 W_ba += Wfac1*s->du[j];
1457 else
1459 /* normalization factor multiplied with bin width and
1460 number of samples (we normalize through M): */
1461 double normdx = 1.;
1462 double dx;
1463 int hd=0; /* histogram direction */
1464 if ( (s->hist->nhist>1) && (Wfac2<0) )
1466 hd=1;
1468 dx=s->hist->dx[hd];
1470 for(j=0;j<s->hist->nbin[0];j++)
1472 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1473 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1474 W_ba += pxdx*x;
1479 W_ba/=n2;
1481 /* then calculate the relative entropies */
1482 *sa = (W_ab - dg);
1483 *sb = (W_ba + dg);
1486 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1487 double temp, double dg, double *stddev)
1489 int i,j;
1490 double M;
1491 double sigmafact=0.;
1492 double kT, beta;
1493 double Wfac1, Wfac2;
1494 double n1, n2;
1496 kT = BOLTZ*temp;
1497 beta = 1/kT;
1499 /* count the numbers of samples */
1500 n1 = ca->ntot;
1501 n2 = cb->ntot;
1503 /* to ensure the work values are the same as during the delta_G */
1504 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1506 /* this is the case when the delta U were calculated directly
1507 (i.e. we're not scaling dhdl) */
1508 Wfac1 = beta;
1509 Wfac2 = beta;
1511 else
1513 /* we're using dhdl, so delta_lambda needs to be a
1514 multiplication factor. */
1515 double delta_lambda=cb->native_lambda-ca->native_lambda;
1516 Wfac1 = beta*delta_lambda;
1517 Wfac2 = -beta*delta_lambda;
1520 M = log(n1/n2);
1523 /* calculate average in both directions */
1524 for(i=0;i<ca->nsamples;i++)
1526 samples_t *s=ca->s[i];
1527 sample_range_t *r=&(ca->r[i]);
1528 if (r->use)
1530 if (!s->hist)
1532 for(j=r->start;j<r->end;j++)
1534 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1537 else
1539 /* normalization factor multiplied with bin width and
1540 number of samples (we normalize through M): */
1541 double normdx = 1.;
1542 double dx;
1543 int hd=0; /* histogram direction */
1544 if ( (s->hist->nhist>1) && (Wfac1<0) )
1546 hd=1;
1548 dx=s->hist->dx[hd];
1550 for(j=0;j<s->hist->nbin[0];j++)
1552 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1553 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1555 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1560 for(i=0;i<cb->nsamples;i++)
1562 samples_t *s=cb->s[i];
1563 sample_range_t *r=&(cb->r[i]);
1564 if (r->use)
1566 if (!s->hist)
1568 for(j=r->start;j<r->end;j++)
1570 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1573 else
1575 /* normalization factor multiplied with bin width and
1576 number of samples (we normalize through M): */
1577 double normdx = 1.;
1578 double dx;
1579 int hd=0; /* histogram direction */
1580 if ( (s->hist->nhist>1) && (Wfac2<0) )
1582 hd=1;
1584 dx=s->hist->dx[hd];
1586 for(j=0;j<s->hist->nbin[0];j++)
1588 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1589 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1591 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1597 sigmafact /= (n1 + n2);
1600 /* Eq. 10 from
1601 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1602 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1607 static void calc_bar(barres_t *br, double tol,
1608 int npee_min, int npee_max, gmx_bool *bEE,
1609 double *partsum)
1611 int npee,p;
1612 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1613 for calculated quantities */
1614 int nsample1, nsample2;
1615 double temp=br->a->temp;
1616 int i,j;
1617 double dg_min, dg_max;
1618 gmx_bool have_hist=FALSE;
1620 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1622 br->dg_disc_err = 0.;
1623 br->dg_histrange_err = 0.;
1625 /* check if there are histograms */
1626 for(i=0;i<br->a->nsamples;i++)
1628 if (br->a->r[i].use && br->a->s[i]->hist)
1630 have_hist=TRUE;
1631 break;
1634 if (!have_hist)
1636 for(i=0;i<br->b->nsamples;i++)
1638 if (br->b->r[i].use && br->b->s[i]->hist)
1640 have_hist=TRUE;
1641 break;
1646 /* calculate histogram-specific errors */
1647 if (have_hist)
1649 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1650 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1652 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1654 /* the histogram range error is the biggest of the differences
1655 between the best estimate and the extremes */
1656 br->dg_histrange_err = fabs(dg_max - dg_min);
1658 br->dg_disc_err = 0.;
1659 for(i=0;i<br->a->nsamples;i++)
1661 if (br->a->s[i]->hist)
1662 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1664 for(i=0;i<br->b->nsamples;i++)
1666 if (br->b->s[i]->hist)
1667 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1670 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1672 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1674 dg_sig2 = 0;
1675 sa_sig2 = 0;
1676 sb_sig2 = 0;
1677 stddev_sig2 = 0;
1679 *bEE=TRUE;
1681 sample_coll_t ca, cb;
1683 /* initialize the samples */
1684 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1685 br->a->temp);
1686 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1687 br->b->temp);
1689 for(npee=npee_min; npee<=npee_max; npee++)
1691 double dgs = 0;
1692 double dgs2 = 0;
1693 double dsa = 0;
1694 double dsb = 0;
1695 double dsa2 = 0;
1696 double dsb2 = 0;
1697 double dstddev = 0;
1698 double dstddev2 = 0;
1701 for(p=0; p<npee; p++)
1703 double dgp;
1704 double stddevc;
1705 double sac, sbc;
1706 gmx_bool cac, cbc;
1708 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1709 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1711 if (!cac || !cbc)
1713 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1714 *bEE=FALSE;
1715 if (cac)
1716 sample_coll_destroy(&ca);
1717 if (cbc)
1718 sample_coll_destroy(&cb);
1719 return;
1722 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1723 dgs += dgp;
1724 dgs2 += dgp*dgp;
1726 partsum[npee*(npee_max+1)+p] += dgp;
1728 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1729 dsa += sac;
1730 dsa2 += sac*sac;
1731 dsb += sbc;
1732 dsb2 += sbc*sbc;
1733 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1735 dstddev += stddevc;
1736 dstddev2 += stddevc*stddevc;
1738 sample_coll_destroy(&ca);
1739 sample_coll_destroy(&cb);
1741 dgs /= npee;
1742 dgs2 /= npee;
1743 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1745 dsa /= npee;
1746 dsa2 /= npee;
1747 dsb /= npee;
1748 dsb2 /= npee;
1749 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1750 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1752 dstddev /= npee;
1753 dstddev2 /= npee;
1754 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1756 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1757 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1758 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1759 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1764 static double bar_err(int nbmin, int nbmax, const double *partsum)
1766 int nb,b;
1767 double svar,s,s2,dg;
1769 svar = 0;
1770 for(nb=nbmin; nb<=nbmax; nb++)
1772 s = 0;
1773 s2 = 0;
1774 for(b=0; b<nb; b++)
1776 dg = partsum[nb*(nbmax+1)+b];
1777 s += dg;
1778 s2 += dg*dg;
1780 s /= nb;
1781 s2 /= nb;
1782 svar += (s2 - s*s)/(nb - 1);
1785 return sqrt(svar/(nbmax + 1 - nbmin));
1788 /* deduce lambda value from legend.
1789 input:
1790 bdhdl = if true, value may be a derivative.
1791 output:
1792 bdhdl = whether the legend was for a derivative.
1794 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1796 double lambda=0;
1797 const char *ptr;
1798 gmx_bool ok=FALSE;
1800 if (legend == NULL)
1802 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1804 ptr = strrchr(legend,' ');
1806 if (strstr(legend,"dH"))
1808 if (! (*bdhdl))
1810 ok=FALSE;
1812 else
1814 ok=TRUE;
1817 else
1819 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1821 ok=TRUE;
1822 *bdhdl=FALSE;
1825 if (!ptr)
1827 ok=FALSE;
1830 if (!ok)
1832 printf("%s\n", legend);
1833 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1835 if (sscanf(ptr,"%lf",&lambda) != 1)
1837 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1840 return lambda;
1843 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1845 gmx_bool bFound;
1846 char *ptr;
1848 bFound = FALSE;
1850 /* plain text lambda string */
1851 ptr = strstr(subtitle,"lambda");
1852 if (ptr == NULL)
1854 /* xmgrace formatted lambda string */
1855 ptr = strstr(subtitle,"\\xl\\f{}");
1857 if (ptr == NULL)
1859 /* xmgr formatted lambda string */
1860 ptr = strstr(subtitle,"\\8l\\4");
1862 if (ptr != NULL)
1864 ptr = strstr(ptr,"=");
1866 if (ptr != NULL)
1868 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1871 return bFound;
1874 static double filename2lambda(char *fn)
1876 double lambda;
1877 char *ptr,*endptr,*digitptr;
1878 int dirsep;
1879 ptr = fn;
1880 /* go to the end of the path string and search backward to find the last
1881 directory in the path which has to contain the value of lambda
1883 while (ptr[1] != '\0')
1885 ptr++;
1887 /* searching backward to find the second directory separator */
1888 dirsep = 0;
1889 digitptr = NULL;
1890 while (ptr >= fn)
1892 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1894 if (dirsep == 1) break;
1895 dirsep++;
1897 /* save the last position of a digit between the last two
1898 separators = in the last dirname */
1899 if (dirsep > 0 && isdigit(*ptr))
1901 digitptr = ptr;
1903 ptr--;
1905 if (!digitptr)
1907 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1908 " last directory in the path '%s' does not contain a number",fn);
1910 if (digitptr[-1] == '-')
1912 digitptr--;
1914 lambda = strtod(digitptr,&endptr);
1915 if (endptr == digitptr)
1917 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1920 return lambda;
1923 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1925 int i;
1926 char *subtitle,**legend,*ptr;
1927 int np;
1928 gmx_bool native_lambda_read=FALSE;
1930 xvg_init(ba);
1932 ba->filename = fn;
1934 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1935 if (!ba->y)
1937 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1939 ba->t = ba->y[0];
1941 snew(ba->np,ba->nset);
1942 for(i=0;i<ba->nset;i++)
1943 ba->np[i]=np;
1946 ba->temp = -1;
1947 if (subtitle != NULL)
1949 ptr = strstr(subtitle,"T =");
1950 if (ptr != NULL)
1952 ptr += 3;
1953 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1955 if (ba->temp <= 0)
1957 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1958 ba->temp,fn);
1963 if (ba->temp < 0)
1965 if (*temp <= 0)
1967 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1969 ba->temp = *temp;
1972 /* Try to deduce lambda from the subtitle */
1973 if (subtitle)
1975 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1977 native_lambda_read=TRUE;
1980 snew(ba->lambda,ba->nset-1);
1981 if (legend == NULL)
1983 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1984 if (ba->nset == 2)
1986 if (!native_lambda_read)
1988 /* Deduce lambda from the file name */
1989 ba->native_lambda = filename2lambda(fn);
1990 native_lambda_read=TRUE;
1992 ba->lambda[0] = ba->native_lambda;
1994 else
1996 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1999 else
2001 for(i=0; i<ba->nset-1; i++)
2003 gmx_bool is_dhdl=(i==0);
2004 /* Read lambda from the legend */
2005 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
2007 if (is_dhdl && !native_lambda_read)
2009 ba->native_lambda = ba->lambda[i];
2010 native_lambda_read=TRUE;
2015 if (!native_lambda_read)
2017 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2020 /* Reorder the data */
2021 for(i=1; i<ba->nset; i++)
2023 ba->y[i-1] = ba->y[i];
2025 if (legend != NULL)
2027 for(i=0; i<ba->nset-1; i++)
2029 sfree(legend[i]);
2031 sfree(legend);
2033 ba->nset--;
2036 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2038 xvg_t *barsim;
2039 samples_t *s;
2040 int i;
2041 double *lambda;
2043 snew(barsim,1);
2045 read_bar_xvg_lowlevel(fn, temp, barsim);
2047 if (barsim->nset <1 )
2049 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2052 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2054 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2056 *temp=barsim->temp;
2058 /* now create a series of samples_t */
2059 snew(s, barsim->nset);
2060 for(i=0;i<barsim->nset;i++)
2062 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2063 barsim->temp, lambda_same(barsim->native_lambda,
2064 barsim->lambda[i]),
2065 fn);
2066 s[i].du=barsim->y[i];
2067 s[i].ndu=barsim->np[i];
2068 s[i].t=barsim->t;
2070 lambda_list_insert_sample(lambda_head, s+i);
2072 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2073 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2074 for(i=0;i<barsim->nset;i++)
2076 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2078 printf("\n\n");
2081 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2082 double start_time, double delta_time,
2083 double native_lambda, double temp,
2084 double *last_t, const char *filename)
2086 int j;
2087 gmx_bool allocated;
2088 double foreign_lambda;
2089 int derivative;
2090 samples_t *s; /* convenience pointer */
2091 int startj;
2093 /* check the block types etc. */
2094 if ( (blk->nsub < 3) ||
2095 (blk->sub[0].type != xdr_datatype_int) ||
2096 (blk->sub[1].type != xdr_datatype_double) ||
2098 (blk->sub[2].type != xdr_datatype_float) &&
2099 (blk->sub[2].type != xdr_datatype_double)
2100 ) ||
2101 (blk->sub[0].nr < 1) ||
2102 (blk->sub[1].nr < 1) )
2104 gmx_fatal(FARGS,
2105 "Unexpected/corrupted block data in file %s around time %g.",
2106 filename, start_time);
2109 derivative = blk->sub[0].ival[0];
2110 foreign_lambda = blk->sub[1].dval[0];
2112 if (! *smp)
2114 /* initialize the samples structure if it's empty. */
2115 snew(*smp, 1);
2116 samples_init(*smp, native_lambda, foreign_lambda, temp,
2117 derivative!=0, filename);
2118 (*smp)->start_time=start_time;
2119 (*smp)->delta_time=delta_time;
2122 /* set convenience pointer */
2123 s=*smp;
2125 /* now double check */
2126 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2127 ( (derivative!=0) != (s->derivative!=0) ) )
2129 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2130 foreign_lambda, s->foreign_lambda);
2131 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2132 derivative, s->derivative);
2133 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2134 filename, start_time);
2137 /* make room for the data */
2138 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2140 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2141 blk->sub[2].nr*2 : s->ndu_alloc;
2142 srenew(s->du_alloc, s->ndu_alloc);
2143 s->du=s->du_alloc;
2145 startj = s->ndu;
2146 s->ndu += blk->sub[2].nr;
2147 s->ntot += blk->sub[2].nr;
2148 *ndu = blk->sub[2].nr;
2150 /* and copy the data*/
2151 for(j=0;j<blk->sub[2].nr;j++)
2153 if (blk->sub[2].type == xdr_datatype_float)
2155 s->du[startj+j] = blk->sub[2].fval[j];
2157 else
2159 s->du[startj+j] = blk->sub[2].dval[j];
2162 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2164 *last_t = start_time + blk->sub[2].nr*delta_time;
2168 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2169 double start_time, double delta_time,
2170 double native_lambda, double temp,
2171 double *last_t, const char *filename)
2173 int i,j;
2174 samples_t *s;
2175 int nhist;
2176 double foreign_lambda;
2177 int derivative;
2178 int nbins[2];
2180 /* check the block types etc. */
2181 if ( (blk->nsub < 2) ||
2182 (blk->sub[0].type != xdr_datatype_double) ||
2183 (blk->sub[1].type != xdr_datatype_large_int) ||
2184 (blk->sub[0].nr < 2) ||
2185 (blk->sub[1].nr < 2) )
2187 gmx_fatal(FARGS,
2188 "Unexpected/corrupted block data in file %s around time %g",
2189 filename, start_time);
2192 nhist=blk->nsub-2;
2193 if (nhist == 0)
2195 return NULL;
2197 if (nhist > 2)
2199 gmx_fatal(FARGS,
2200 "Unexpected/corrupted block data in file %s around time %g",
2201 filename, start_time);
2204 snew(s, 1);
2205 *nsamples=1;
2207 foreign_lambda=blk->sub[0].dval[0];
2208 derivative=(int)(blk->sub[1].lval[1]);
2209 if (derivative)
2210 foreign_lambda=native_lambda;
2212 samples_init(s, native_lambda, foreign_lambda, temp,
2213 derivative!=0, filename);
2214 snew(s->hist, 1);
2216 for(i=0;i<nhist;i++)
2218 nbins[i] = blk->sub[i+2].nr;
2221 hist_init(s->hist, nhist, nbins);
2223 for(i=0;i<nhist;i++)
2225 s->hist->x0[i]=blk->sub[1].lval[2+i];
2226 s->hist->dx[i] = blk->sub[0].dval[1];
2227 if (i==1)
2228 s->hist->dx[i] = - s->hist->dx[i];
2231 s->hist->start_time = start_time;
2232 s->hist->delta_time = delta_time;
2233 s->start_time = start_time;
2234 s->delta_time = delta_time;
2236 for(i=0;i<nhist;i++)
2238 int nbin;
2239 gmx_large_int_t sum=0;
2241 for(j=0;j<s->hist->nbin[i];j++)
2243 int binv=(int)(blk->sub[i+2].ival[j]);
2245 s->hist->bin[i][j] = binv;
2246 sum += binv;
2249 if (i==0)
2251 s->ntot = sum;
2252 s->hist->sum = sum;
2254 else
2256 if (s->ntot != sum)
2258 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2259 filename);
2264 if (start_time + s->hist->sum*delta_time > *last_t)
2266 *last_t = start_time + s->hist->sum*delta_time;
2268 return s;
2272 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2274 int i;
2275 ener_file_t fp;
2276 t_enxframe *fr;
2277 int nre;
2278 gmx_enxnm_t *enm=NULL;
2279 double first_t=-1;
2280 double last_t=-1;
2281 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2282 int *nhists=NULL; /* array to keep count & print at end */
2283 int *npts=NULL; /* array to keep count & print at end */
2284 double *lambdas=NULL; /* array to keep count & print at end */
2285 double native_lambda=-1;
2286 double end_time; /* the end time of the last batch of samples */
2287 int nsamples=0;
2289 fp = open_enx(fn,"r");
2290 do_enxnms(fp,&nre,&enm);
2291 snew(fr, 1);
2293 while(do_enx(fp, fr))
2295 /* count the data blocks */
2296 int nblocks_raw=0;
2297 int nblocks_hist=0;
2298 int nlam=0;
2299 int k;
2300 /* DHCOLL block information: */
2301 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2302 double rtemp=0;
2304 /* count the blocks and handle collection information: */
2305 for(i=0;i<fr->nblock;i++)
2307 if (fr->block[i].id == enxDHHIST )
2308 nblocks_hist++;
2309 if ( fr->block[i].id == enxDH )
2310 nblocks_raw++;
2311 if (fr->block[i].id == enxDHCOLL )
2313 nlam++;
2314 if ( (fr->block[i].nsub < 1) ||
2315 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2316 (fr->block[i].sub[0].nr < 5))
2318 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2321 /* read the data from the DHCOLL block */
2322 rtemp = fr->block[i].sub[0].dval[0];
2323 start_time = fr->block[i].sub[0].dval[1];
2324 delta_time = fr->block[i].sub[0].dval[2];
2325 start_lambda = fr->block[i].sub[0].dval[3];
2326 delta_lambda = fr->block[i].sub[0].dval[4];
2328 if (delta_lambda>0)
2330 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2332 if ( ( *temp != rtemp) && (*temp > 0) )
2334 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2336 *temp=rtemp;
2338 if (first_t < 0)
2339 first_t=start_time;
2343 if (nlam != 1)
2345 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2347 if (nblocks_raw > 0 && nblocks_hist > 0 )
2349 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2352 if (nsamples > 0)
2354 /* check the native lambda */
2355 if (!lambda_same(start_lambda, native_lambda) )
2357 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2358 fn, native_lambda, start_lambda, start_time);
2360 /* check the number of samples against the previous number */
2361 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2363 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2364 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2366 /* check whether last iterations's end time matches with
2367 the currrent start time */
2368 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2370 /* it didn't. We need to store our samples and reallocate */
2371 for(i=0;i<nsamples;i++)
2373 if (samples_rawdh[i])
2375 /* insert it into the existing list */
2376 lambda_list_insert_sample(lambda_head,
2377 samples_rawdh[i]);
2378 /* and make sure we'll allocate a new one this time
2379 around */
2380 samples_rawdh[i]=NULL;
2385 else
2387 /* this is the first round; allocate the associated data
2388 structures */
2389 native_lambda=start_lambda;
2390 nsamples=nblocks_raw+nblocks_hist;
2391 snew(nhists, nsamples);
2392 snew(npts, nsamples);
2393 snew(lambdas, nsamples);
2394 snew(samples_rawdh, nsamples);
2395 for(i=0;i<nsamples;i++)
2397 nhists[i]=0;
2398 npts[i]=0;
2399 lambdas[i]=-1;
2400 samples_rawdh[i]=NULL; /* init to NULL so we know which
2401 ones contain values */
2405 /* and read them */
2406 k=0; /* counter for the lambdas, etc. arrays */
2407 for(i=0;i<fr->nblock;i++)
2409 if (fr->block[i].id == enxDH)
2411 int ndu;
2412 read_edr_rawdh_block(&(samples_rawdh[k]),
2413 &ndu,
2414 &(fr->block[i]),
2415 start_time, delta_time,
2416 start_lambda, rtemp,
2417 &last_t, fn);
2418 npts[k] += ndu;
2419 if (samples_rawdh[k])
2421 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2423 k++;
2425 else if (fr->block[i].id == enxDHHIST)
2427 int j;
2428 int nb=0;
2429 samples_t *s; /* this is where the data will go */
2430 s=read_edr_hist_block(&nb, &(fr->block[i]),
2431 start_time, delta_time,
2432 start_lambda, rtemp,
2433 &last_t, fn);
2434 nhists[k] += nb;
2435 if (nb>0)
2437 lambdas[k]= s->foreign_lambda;
2439 k++;
2440 /* and insert the new sample immediately */
2441 for(j=0;j<nb;j++)
2443 lambda_list_insert_sample(lambda_head, s+j);
2448 /* Now store all our extant sample collections */
2449 for(i=0;i<nsamples;i++)
2451 if (samples_rawdh[i])
2453 /* insert it into the existing list */
2454 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2459 fprintf(stderr, "\n");
2460 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2461 fn, first_t, last_t, native_lambda);
2462 for(i=0;i<nsamples;i++)
2464 if (nhists[i] > 0)
2466 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2468 else
2470 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2473 printf("\n\n");
2474 sfree(npts);
2475 sfree(nhists);
2476 sfree(lambdas);
2480 int gmx_bar(int argc,char *argv[])
2482 static const char *desc[] = {
2483 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2484 "Bennett's acceptance ratio method (BAR). It also automatically",
2485 "adds series of individual free energies obtained with BAR into",
2486 "a combined free energy estimate.[PAR]",
2488 "Every individual BAR free energy difference relies on two ",
2489 "simulations at different states: say state A and state B, as",
2490 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2491 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2492 "average of the Hamiltonian difference of state B given state A and",
2493 "vice versa. If the Hamiltonian does not depend linearly on [GRK]lambda[grk]",
2494 "(in which case we can extrapolate the derivative of the Hamiltonian",
2495 "with respect to [GRK]lambda[grk], as is the default when [TT]free_energy[tt] is on),",
2496 "the energy differences to the other state need to be calculated",
2497 "explicitly during the simulation. This can be controlled with",
2498 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2500 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2501 "Two types of input files are supported:[BR]",
2502 "[TT]*[tt] Files with only one [IT]y[it]-value, for such files it is assumed ",
2503 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the Hamiltonian depends ",
2504 " linearly on [GRK]lambda[grk]. The [GRK]lambda[grk] value of the simulation is inferred ",
2505 " from the subtitle (if present), otherwise from a number in the",
2506 " subdirectory in the file name.",
2507 "[BR]",
2508 "[TT]*[tt] Files with more than one [IT]y[it]-value. The files should have columns ",
2509 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. The [GRK]lambda[grk] values are inferred ",
2510 " from the legends: [GRK]lambda[grk] of the simulation from the legend of dH/d[GRK]lambda[grk] ",
2511 " and the foreign [GRK]lambda[grk] values from the legends of Delta H.[PAR]",
2512 "The [GRK]lambda[grk] of the simulation is parsed from [TT]dhdl.xvg[tt] file's legend ",
2513 "containing the string 'dH', the foreign [GRK]lambda[grk] values from the legend ",
2514 "containing the capitalized letters 'D' and 'H'. The temperature ",
2515 "is parsed from the legend line containing 'T ='.[PAR]",
2517 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2518 "These can contain either lists of energy differences (see the",
2519 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of histograms",
2520 "(see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and [TT]dh_hist_spacing[tt]).",
2521 "The temperature and [GRK]lambda[grk] values are automatically deduced from",
2522 "the [TT]ener.edr[tt] file.[PAR]"
2524 "The free energy estimates are determined using BAR with bisection, ",
2525 "with the precision of the output set with [TT]-prec[tt]. ",
2526 "An error estimate taking into account time correlations ",
2527 "is made by splitting the data into blocks and determining ",
2528 "the free energy differences over those blocks and assuming ",
2529 "the blocks are independent. ",
2530 "The final error estimate is determined from the average variance ",
2531 "over 5 blocks. A range of block numbers for error estimation can ",
2532 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2534 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and 'foreign'",
2535 "[GRK]lambda[grk] values, but always assumes independent samples. [BB]Note[bb] that",
2536 "when aggregating energy differences/derivatives with different",
2537 "sampling intervals, this is almost certainly not correct. Usually",
2538 "subsequent energies are correlated and different time intervals mean",
2539 "different degrees of correlation between samples.[PAR]",
2541 "The results are split in two parts: the last part contains the final ",
2542 "results in kJ/mol, together with the error estimate for each part ",
2543 "and the total. The first part contains detailed free energy ",
2544 "difference estimates and phase space overlap measures in units of ",
2545 "kT (together with their computed error estimate). The printed ",
2546 "values are:[BR]",
2547 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2548 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2549 "[TT]*[tt] DG: the free energy estimate.[BR]",
2550 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2551 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2552 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2554 "The relative entropy of both states in each other's ensemble can be ",
2555 "interpreted as a measure of phase space overlap: ",
2556 "the relative entropy s_A of the work samples of lambda_B in the ",
2557 "ensemble of lambda_A (and vice versa for s_B), is a ",
2558 "measure of the 'distance' between Boltzmann distributions of ",
2559 "the two states, that goes to zero for identical distributions. See ",
2560 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2561 "[PAR]",
2562 "The estimate of the expected per-sample standard deviation, as given ",
2563 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2564 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2565 "of the actual statistical error, because it assumes independent samples).[PAR]",
2567 "To get a visual estimate of the phase space overlap, use the ",
2568 "[TT]-oh[tt] option to write series of histograms, together with the ",
2569 "[TT]-nbin[tt] option.[PAR]"
2571 static real begin=0,end=-1,temp=-1;
2572 int nd=2,nbmin=5,nbmax=5;
2573 int nbin=100;
2574 gmx_bool calc_s,calc_v;
2575 t_pargs pa[] = {
2576 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2577 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2578 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2579 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2580 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2581 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2582 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"}
2585 t_filenm fnm[] = {
2586 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2587 { efEDR, "-g", "ener", ffOPTRDMULT },
2588 { efXVG, "-o", "bar", ffOPTWR },
2589 { efXVG, "-oi", "barint", ffOPTWR },
2590 { efXVG, "-oh", "histogram", ffOPTWR }
2592 #define NFILE asize(fnm)
2594 int f,i,j;
2595 int nf=0; /* file counter */
2596 int nbs;
2597 int nfile_tot; /* total number of input files */
2598 int nxvgfile=0;
2599 int nedrfile=0;
2600 char **fxvgnms;
2601 char **fedrnms;
2602 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2603 lambda_t lambda_head; /* the head element */
2604 barres_t *results; /* the results */
2605 int nresults; /* number of results in results array */
2607 double *partsum;
2608 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2609 FILE *fpb,*fpi;
2610 char lamformat[20];
2611 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2612 char ktformat[STRLEN], sktformat[STRLEN];
2613 char kteformat[STRLEN], skteformat[STRLEN];
2614 output_env_t oenv;
2615 double kT, beta;
2616 gmx_bool result_OK=TRUE,bEE=TRUE;
2618 gmx_bool disc_err=FALSE;
2619 double sum_disc_err=0.; /* discretization error */
2620 gmx_bool histrange_err=FALSE;
2621 double sum_histrange_err=0.; /* histogram range error */
2622 double stat_err=0.; /* statistical error */
2624 CopyRight(stderr,argv[0]);
2625 parse_common_args(&argc,argv,
2626 PCA_CAN_VIEW,
2627 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2629 if (opt2bSet("-f", NFILE, fnm))
2631 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2633 if (opt2bSet("-g", NFILE, fnm))
2635 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2638 /* make linked list */
2639 lb=&lambda_head;
2640 lambda_init(lb, 0, 0);
2641 lb->next=lb;
2642 lb->prev=lb;
2645 nfile_tot = nxvgfile + nedrfile;
2647 if (nfile_tot == 0)
2649 gmx_fatal(FARGS,"No input files!");
2652 if (nd < 0)
2654 gmx_fatal(FARGS,"Can not have negative number of digits");
2656 prec = pow(10,-nd);
2658 snew(partsum,(nbmax+1)*(nbmax+1));
2659 nf = 0;
2661 /* read in all files. First xvg files */
2662 for(f=0; f<nxvgfile; f++)
2664 read_bar_xvg(fxvgnms[f],&temp,lb);
2665 nf++;
2667 /* then .edr files */
2668 for(f=0; f<nedrfile; f++)
2670 read_barsim_edr(fedrnms[f],&temp,lb);;
2671 nf++;
2674 /* fix the times to allow for equilibration */
2675 lambdas_impose_times(lb, begin, end);
2677 if (opt2bSet("-oh",NFILE,fnm))
2679 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2682 /* assemble the output structures from the lambdas */
2683 results=barres_list_create(lb, &nresults);
2685 sum_disc_err=barres_list_max_disc_err(results, nresults);
2687 if (nresults == 0)
2689 printf("\nNo results to calculate.\n");
2690 return 0;
2693 #if 1
2694 if (sum_disc_err > prec)
2696 prec=sum_disc_err;
2697 nd = ceil(-log10(prec));
2698 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2700 #endif
2702 sprintf(lamformat,"%%6.3f");
2703 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2704 /* the format strings of the results in kT */
2705 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2706 sprintf( sktformat,"%%%ds",6+nd);
2707 /* the format strings of the errors in kT */
2708 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2709 sprintf( skteformat,"%%%ds",4+nd);
2710 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2711 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2715 fpb = NULL;
2716 if (opt2bSet("-o",NFILE,fnm))
2718 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2719 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2720 "\\lambda",buf,exvggtXYDY,oenv);
2723 fpi = NULL;
2724 if (opt2bSet("-oi",NFILE,fnm))
2726 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2727 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2728 "\\lambda",buf,oenv);
2733 if (nbmin > nbmax)
2734 nbmin=nbmax;
2736 /* first calculate results */
2737 bEE = TRUE;
2738 disc_err = FALSE;
2739 for(f=0; f<nresults; f++)
2741 /* Determine the free energy difference with a factor of 10
2742 * more accuracy than requested for printing.
2744 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2745 &bEE, partsum);
2747 if (results[f].dg_disc_err > prec/10.)
2748 disc_err=TRUE;
2749 if (results[f].dg_histrange_err > prec/10.)
2750 histrange_err=TRUE;
2753 /* print results in kT */
2754 kT = BOLTZ*temp;
2755 beta = 1/kT;
2757 printf("\nTemperature: %g K\n", temp);
2759 printf("\nDetailed results in kT (see help for explanation):\n\n");
2760 printf("%6s ", " lam_A");
2761 printf("%6s ", " lam_B");
2762 printf(sktformat, "DG ");
2763 if (bEE)
2764 printf(skteformat, "+/- ");
2765 if (disc_err)
2766 printf(skteformat, "disc ");
2767 if (histrange_err)
2768 printf(skteformat, "range ");
2769 printf(sktformat, "s_A ");
2770 if (bEE)
2771 printf(skteformat, "+/- " );
2772 printf(sktformat, "s_B ");
2773 if (bEE)
2774 printf(skteformat, "+/- " );
2775 printf(sktformat, "stdev ");
2776 if (bEE)
2777 printf(skteformat, "+/- ");
2778 printf("\n");
2779 for(f=0; f<nresults; f++)
2781 printf(lamformat, results[f].a->native_lambda);
2782 printf(" ");
2783 printf(lamformat, results[f].b->native_lambda);
2784 printf(" ");
2785 printf(ktformat, results[f].dg);
2786 printf(" ");
2787 if (bEE)
2789 printf(kteformat, results[f].dg_err);
2790 printf(" ");
2792 if (disc_err)
2794 printf(kteformat, results[f].dg_disc_err);
2795 printf(" ");
2797 if (histrange_err)
2799 printf(kteformat, results[f].dg_histrange_err);
2800 printf(" ");
2802 printf(ktformat, results[f].sa);
2803 printf(" ");
2804 if (bEE)
2806 printf(kteformat, results[f].sa_err);
2807 printf(" ");
2809 printf(ktformat, results[f].sb);
2810 printf(" ");
2811 if (bEE)
2813 printf(kteformat, results[f].sb_err);
2814 printf(" ");
2816 printf(ktformat, results[f].dg_stddev);
2817 printf(" ");
2818 if (bEE)
2820 printf(kteformat, results[f].dg_stddev_err);
2822 printf("\n");
2824 /* Check for negative relative entropy with a 95% certainty. */
2825 if (results[f].sa < -2*results[f].sa_err ||
2826 results[f].sb < -2*results[f].sb_err)
2828 result_OK=FALSE;
2832 if (!result_OK)
2834 printf("\nWARNING: Some of these results violate the Second Law of "
2835 "Thermodynamics: \n"
2836 " This is can be the result of severe undersampling, or "
2837 "(more likely)\n"
2838 " there is something wrong with the simulations.\n");
2842 /* final results in kJ/mol */
2843 printf("\n\nFinal results in kJ/mol:\n\n");
2844 dg_tot = 0;
2845 for(f=0; f<nresults; f++)
2848 if (fpi != NULL)
2850 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2854 if (fpb != NULL)
2856 fprintf(fpb, xvg3format,
2857 0.5*(results[f].a->native_lambda +
2858 results[f].b->native_lambda),
2859 results[f].dg,results[f].dg_err);
2862 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2863 results[f].lambda_b);*/
2864 printf("lambda ");
2865 printf(lamformat, results[f].a->native_lambda);
2866 printf(" - ");
2867 printf(lamformat, results[f].b->native_lambda);
2868 printf(", DG ");
2870 printf(dgformat,results[f].dg*kT);
2871 if (bEE)
2873 printf(" +/- ");
2874 printf(dgformat,results[f].dg_err*kT);
2876 if (histrange_err)
2878 printf(" (max. range err. = ");
2879 printf(dgformat,results[f].dg_histrange_err*kT);
2880 printf(")");
2881 sum_histrange_err += results[f].dg_histrange_err*kT;
2884 printf("\n");
2885 dg_tot += results[f].dg;
2887 printf("\n");
2888 printf("total ");
2889 printf(lamformat, results[0].a->native_lambda);
2890 printf(" - ");
2891 printf(lamformat, results[nresults-1].b->native_lambda);
2892 printf(", DG ");
2894 printf(dgformat,dg_tot*kT);
2895 if (bEE)
2897 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2898 printf(" +/- ");
2899 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2901 printf("\n");
2902 if (disc_err)
2904 printf("\nmaximum discretization error = ");
2905 printf(dgformat,sum_disc_err);
2906 if (bEE && stat_err < sum_disc_err)
2908 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2911 if (histrange_err)
2913 printf("\nmaximum histogram range error = ");
2914 printf(dgformat,sum_histrange_err);
2915 if (bEE && stat_err < sum_histrange_err)
2917 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2921 printf("\n");
2924 if (fpi != NULL)
2926 fprintf(fpi, xvg2format,
2927 results[nresults-1].b->native_lambda, dg_tot);
2928 ffclose(fpi);
2931 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2932 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");
2934 thanx(stderr);
2936 return 0;