Re-organize BlueGene toolchain files
[gromacs.git] / src / tools / gmx_bar.c
blob76446cdc27fdf7e413f1a4900383ac98c4061c1f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 #include "gmx_header_config.h"
42 #include <math.h>
43 #include <string.h>
44 #include <ctype.h>
45 #include <math.h>
46 #include <float.h>
48 #include "sysstuff.h"
49 #include "typedefs.h"
50 #include "smalloc.h"
51 #include "futil.h"
52 #include "statutil.h"
53 #include "copyrite.h"
54 #include "macros.h"
55 #include "enxio.h"
56 #include "physics.h"
57 #include "gmx_fatal.h"
58 #include "xvgr.h"
59 #include "gmx_ana.h"
60 #include "maths.h"
62 /* Suppress Cygwin compiler warnings from using newlib version of
63 * ctype.h */
64 #ifdef GMX_CYGWIN
65 #undef isdigit
66 #endif
68 /* the dhdl.xvg data from a simulation (actually obsolete, but still
69 here for reading the dhdl.xvg file*/
70 typedef struct xvg_t
72 char *filename;
73 int ftp; /* file type */
74 int nset; /* number of lambdas, including dhdl */
75 int *np; /* number of data points (du or hists) per lambda */
76 int np_alloc; /* number of points (du or hists) allocated */
77 double temp; /* temperature */
78 double *lambda; /* the lambdas (of first index for y). */
79 double *t; /* the times (of second index for y) */
80 double **y; /* the dU values. y[0] holds the derivative, while
81 further ones contain the energy differences between
82 the native lambda and the 'foreign' lambdas. */
84 double native_lambda; /* the native lambda */
86 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
87 } xvg_t;
91 typedef struct hist_t
93 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
94 double dx[2]; /* the histogram spacing. The reverse
95 dx is the negative of the forward dx.*/
96 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
97 point(s) as int */
99 int nbin[2]; /* the (forward+reverse) number of bins */
100 gmx_large_int_t sum; /* the total number of counts. Must be
101 the same for forward + reverse. */
102 int nhist; /* number of hist datas (forward or reverse) */
104 double start_time, delta_time; /* start time, end time of histogram */
105 } hist_t;
108 /* an aggregate of samples for partial free energy calculation */
109 typedef struct samples_t
111 double native_lambda;
112 double foreign_lambda;
113 double temp; /* the temperature */
114 gmx_bool derivative; /* whether this sample is a derivative */
116 /* The samples come either as either delta U lists: */
117 int ndu; /* the number of delta U samples */
118 double *du; /* the delta u's */
119 double *t; /* the times associated with those samples, or: */
120 double start_time,delta_time;/*start time and delta time for linear time*/
122 /* or as histograms: */
123 hist_t *hist; /* a histogram */
125 /* allocation data: (not NULL for data 'owned' by this struct) */
126 double *du_alloc, *t_alloc; /* allocated delta u arrays */
127 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
128 hist_t *hist_alloc; /* allocated hist */
130 gmx_large_int_t ntot; /* total number of samples */
131 const char *filename; /* the file name this sample comes from */
132 } samples_t;
134 /* a sample range (start to end for du-style data, or boolean
135 for both du-style data and histograms */
136 typedef struct sample_range_t
138 int start, end; /* start and end index for du style data */
139 gmx_bool use; /* whether to use this sample */
141 samples_t *s; /* the samples this range belongs to */
142 } sample_range_t;
145 /* a collection of samples for a partial free energy calculation
146 (i.e. the collection of samples from one native lambda to one
147 foreign lambda) */
148 typedef struct sample_coll_t
150 double native_lambda; /* these should be the same for all samples in the histogram?*/
151 double foreign_lambda; /* collection */
152 double temp; /* the temperature */
154 int nsamples; /* the number of samples */
155 samples_t **s; /* the samples themselves */
156 sample_range_t *r; /* the sample ranges */
157 int nsamples_alloc; /* number of allocated samples */
159 gmx_large_int_t ntot; /* total number of samples in the ranges of
160 this collection */
162 struct sample_coll_t *next, *prev; /* next and previous in the list */
163 } sample_coll_t;
165 /* all the samples associated with a lambda point */
166 typedef struct lambda_t
168 double lambda; /* the native lambda (at start time if dynamic) */
169 double temp; /* temperature */
171 sample_coll_t *sc; /* the samples */
173 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
175 struct lambda_t *next, *prev; /* the next and prev in the list */
176 } lambda_t;
179 /* calculated values. */
180 typedef struct {
181 sample_coll_t *a, *b; /* the simulation data */
183 double dg; /* the free energy difference */
184 double dg_err; /* the free energy difference */
186 double dg_disc_err; /* discretization error */
187 double dg_histrange_err; /* histogram range error */
189 double sa; /* relative entropy of b in state a */
190 double sa_err; /* error in sa */
191 double sb; /* relative entropy of a in state b */
192 double sb_err; /* error in sb */
194 double dg_stddev; /* expected dg stddev per sample */
195 double dg_stddev_err; /* error in dg_stddev */
196 } barres_t;
201 static void hist_init(hist_t *h, int nhist, int *nbin)
203 int i;
204 if (nhist>2)
206 gmx_fatal(FARGS, "histogram with more than two sets of data!");
208 for(i=0;i<nhist;i++)
210 snew(h->bin[i], nbin[i]);
211 h->x0[i]=0;
212 h->nbin[i]=nbin[i];
213 h->start_time=h->delta_time=0;
214 h->dx[i]=0;
216 h->sum=0;
217 h->nhist=nhist;
220 static void hist_destroy(hist_t *h)
222 sfree(h->bin);
226 static void xvg_init(xvg_t *ba)
228 ba->filename=NULL;
229 ba->nset=0;
230 ba->np_alloc=0;
231 ba->np=NULL;
232 ba->y=NULL;
235 static void samples_init(samples_t *s, double native_lambda,
236 double foreign_lambda, double temp,
237 gmx_bool derivative, const char *filename)
239 s->native_lambda=native_lambda;
240 s->foreign_lambda=foreign_lambda;
241 s->temp=temp;
242 s->derivative=derivative;
244 s->ndu=0;
245 s->du=NULL;
246 s->t=NULL;
247 s->start_time = s->delta_time = 0;
248 s->hist=NULL;
249 s->du_alloc=NULL;
250 s->t_alloc=NULL;
251 s->hist_alloc=NULL;
252 s->ndu_alloc=0;
253 s->nt_alloc=0;
255 s->ntot=0;
256 s->filename=filename;
259 static void sample_range_init(sample_range_t *r, samples_t *s)
261 r->start=0;
262 r->end=s->ndu;
263 r->use=TRUE;
264 r->s=NULL;
267 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
268 double foreign_lambda, double temp)
270 sc->native_lambda = native_lambda;
271 sc->foreign_lambda = foreign_lambda;
272 sc->temp = temp;
274 sc->nsamples=0;
275 sc->s=NULL;
276 sc->r=NULL;
277 sc->nsamples_alloc=0;
279 sc->ntot=0;
280 sc->next=sc->prev=NULL;
283 static void sample_coll_destroy(sample_coll_t *sc)
285 /* don't free the samples themselves */
286 sfree(sc->r);
287 sfree(sc->s);
291 static void lambda_init(lambda_t *l, double native_lambda, double temp)
293 l->lambda=native_lambda;
294 l->temp=temp;
296 l->next=NULL;
297 l->prev=NULL;
299 l->sc=&(l->sc_head);
301 sample_coll_init(l->sc, native_lambda, 0., 0.);
302 l->sc->next=l->sc;
303 l->sc->prev=l->sc;
306 static void barres_init(barres_t *br)
308 br->dg=0;
309 br->dg_err=0;
310 br->sa=0;
311 br->sa_err=0;
312 br->sb=0;
313 br->sb_err=0;
314 br->dg_stddev=0;
315 br->dg_stddev_err=0;
317 br->a=NULL;
318 br->b=NULL;
324 static gmx_bool lambda_same(double lambda1, double lambda2)
326 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
329 /* calculate the total number of samples in a sample collection */
330 static void sample_coll_calc_ntot(sample_coll_t *sc)
332 int i;
334 sc->ntot=0;
335 for(i=0;i<sc->nsamples;i++)
337 if (sc->r[i].use)
339 if (sc->s[i]->hist)
341 sc->ntot += sc->s[i]->ntot;
343 else
345 sc->ntot += sc->r[i].end - sc->r[i].start;
352 /* find the barsamples_t associated with a lambda that corresponds to
353 a specific foreign lambda */
354 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
355 double foreign_lambda)
357 sample_coll_t *sc=l->sc->next;
359 while(sc != l->sc)
361 if (lambda_same(sc->foreign_lambda,foreign_lambda))
363 return sc;
365 sc=sc->next;
368 return NULL;
371 /* insert li into an ordered list of lambda_colls */
372 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
374 sample_coll_t *scn=l->sc->next;
375 while ( (scn!=l->sc) )
377 if (scn->foreign_lambda > sc->foreign_lambda)
378 break;
379 scn=scn->next;
381 /* now insert it before the found scn */
382 sc->next=scn;
383 sc->prev=scn->prev;
384 scn->prev->next=sc;
385 scn->prev=sc;
388 /* insert li into an ordered list of lambdas */
389 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
391 lambda_t *lc=head->next;
392 while (lc!=head)
394 if (lc->lambda > li->lambda)
395 break;
396 lc=lc->next;
398 /* now insert ourselves before the found lc */
399 li->next=lc;
400 li->prev=lc->prev;
401 lc->prev->next=li;
402 lc->prev=li;
405 /* insert a sample and a sample_range into a sample_coll. The
406 samples are stored as a pointer, the range is copied. */
407 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
408 sample_range_t *r)
410 /* first check if it belongs here */
411 if (sc->temp != s->temp)
413 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
414 s->filename, sc->next->s[0]->filename);
416 if (sc->native_lambda != s->native_lambda)
418 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
419 s->filename, sc->next->s[0]->filename);
421 if (sc->foreign_lambda != s->foreign_lambda)
423 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
424 s->filename, sc->next->s[0]->filename);
427 /* check if there's room */
428 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
430 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
431 srenew(sc->s, sc->nsamples_alloc);
432 srenew(sc->r, sc->nsamples_alloc);
434 sc->s[sc->nsamples]=s;
435 sc->r[sc->nsamples]=*r;
436 sc->nsamples++;
438 sample_coll_calc_ntot(sc);
441 /* insert a sample into a lambda_list, creating the right sample_coll if
442 neccesary */
443 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
445 gmx_bool found=FALSE;
446 sample_coll_t *sc;
447 sample_range_t r;
449 lambda_t *l=head->next;
451 /* first search for the right lambda_t */
452 while(l != head)
454 if (lambda_same(l->lambda, s->native_lambda) )
456 found=TRUE;
457 break;
459 l=l->next;
462 if (!found)
464 snew(l, 1); /* allocate a new one */
465 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
466 lambda_insert_lambda(head, l); /* add it to the list */
469 /* now look for a sample collection */
470 sc=lambda_find_sample_coll(l, s->foreign_lambda);
471 if (!sc)
473 snew(sc, 1); /* allocate a new one */
474 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
475 lambda_insert_sample_coll(l, sc);
478 /* now insert the samples into the sample coll */
479 sample_range_init(&r, s);
480 sample_coll_insert_sample(sc, s, &r);
484 /* make a histogram out of a sample collection */
485 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
486 int *nbin_alloc, int *nbin,
487 double *dx, double *xmin, int nbin_default)
489 int i,j,k;
490 gmx_bool dx_set=FALSE;
491 gmx_bool xmin_set=FALSE;
493 gmx_bool xmax_set=FALSE;
494 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
495 limits of a histogram */
496 double xmax=-1;
498 /* first determine dx and xmin; try the histograms */
499 for(i=0;i<sc->nsamples;i++)
501 if (sc->s[i]->hist)
503 hist_t *hist=sc->s[i]->hist;
504 for(k=0;k<hist->nhist;k++)
506 double hdx=hist->dx[k];
507 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
509 /* we use the biggest dx*/
510 if ( (!dx_set) || hist->dx[0] > *dx)
512 dx_set=TRUE;
513 *dx = hist->dx[0];
515 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
517 xmin_set=TRUE;
518 *xmin = (hist->x0[k]*hdx);
521 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
523 xmax_set=TRUE;
524 xmax = xmax_now;
525 if (hist->bin[k][hist->nbin[k]-1] != 0)
526 xmax_set_hard=TRUE;
528 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
530 xmax_set_hard=TRUE;
531 xmax = xmax_now;
536 /* and the delta us */
537 for(i=0;i<sc->nsamples;i++)
539 if (sc->s[i]->ndu > 0)
541 /* determine min and max */
542 int starti=sc->r[i].start;
543 int endi=sc->r[i].end;
544 double du_xmin=sc->s[i]->du[starti];
545 double du_xmax=sc->s[i]->du[starti];
546 for(j=starti+1;j<endi;j++)
548 if (sc->s[i]->du[j] < du_xmin)
549 du_xmin = sc->s[i]->du[j];
550 if (sc->s[i]->du[j] > du_xmax)
551 du_xmax = sc->s[i]->du[j];
554 /* and now change the limits */
555 if ( (!xmin_set) || (du_xmin < *xmin) )
557 xmin_set=TRUE;
558 *xmin=du_xmin;
560 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
562 xmax_set=TRUE;
563 xmax=du_xmax;
568 if (!xmax_set || !xmin_set)
570 *nbin=0;
571 return;
575 if (!dx_set)
577 *nbin=nbin_default;
578 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
579 be 0, and we count from 0 */
581 else
583 *nbin=(xmax-(*xmin))/(*dx);
586 if (*nbin > *nbin_alloc)
588 *nbin_alloc=*nbin;
589 srenew(*bin, *nbin_alloc);
592 /* reset the histogram */
593 for(i=0;i<(*nbin);i++)
595 (*bin)[i] = 0;
598 /* now add the actual data */
599 for(i=0;i<sc->nsamples;i++)
601 if (sc->s[i]->hist)
603 hist_t *hist=sc->s[i]->hist;
604 for(k=0;k<hist->nhist;k++)
606 double hdx = hist->dx[k];
607 double xmin_hist=hist->x0[k]*hdx;
608 for(j=0;j<hist->nbin[k];j++)
610 /* calculate the bin corresponding to the middle of the
611 original bin */
612 double x=hdx*(j+0.5) + xmin_hist;
613 int binnr=(int)((x-(*xmin))/(*dx));
615 if (binnr >= *nbin || binnr<0)
616 binnr = (*nbin)-1;
618 (*bin)[binnr] += hist->bin[k][j];
622 else
624 int starti=sc->r[i].start;
625 int endi=sc->r[i].end;
626 for(j=starti;j<endi;j++)
628 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
629 if (binnr >= *nbin || binnr<0)
630 binnr = (*nbin)-1;
632 (*bin)[binnr] ++;
638 /* write a collection of histograms to a file */
639 void lambdas_histogram(lambda_t *bl_head, const char *filename,
640 int nbin_default, const output_env_t oenv)
642 char label_x[STRLEN];
643 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
644 const char *title="N(\\DeltaH)";
645 const char *label_y="Samples";
646 FILE *fp;
647 lambda_t *bl;
648 int nsets=0;
649 char **setnames=NULL;
650 gmx_bool first_set=FALSE;
651 /* histogram data: */
652 int *hist=NULL;
653 int nbin=0;
654 int nbin_alloc=0;
655 double dx=0;
656 double min=0;
657 int i;
659 printf("\nWriting histogram to %s\n", filename);
660 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
662 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
664 /* first get all the set names */
665 bl=bl_head->next;
666 /* iterate over all lambdas */
667 while(bl!=bl_head)
669 sample_coll_t *sc=bl->sc->next;
671 /* iterate over all samples */
672 while(sc!=bl->sc)
674 nsets++;
675 srenew(setnames, nsets);
676 snew(setnames[nsets-1], STRLEN);
677 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
679 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
680 deltag, lambda, sc->foreign_lambda, lambda,
681 sc->native_lambda);
683 else
685 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
686 dhdl, lambda, sc->native_lambda);
688 sc=sc->next;
691 bl=bl->next;
693 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
696 /* now make the histograms */
697 bl=bl_head->next;
698 /* iterate over all lambdas */
699 while(bl!=bl_head)
701 sample_coll_t *sc=bl->sc->next;
703 /* iterate over all samples */
704 while(sc!=bl->sc)
706 if (!first_set)
708 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
711 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
712 nbin_default);
714 for(i=0;i<nbin;i++)
716 double xmin=i*dx + min;
717 double xmax=(i+1)*dx + min;
719 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
722 first_set=FALSE;
723 sc=sc->next;
726 bl=bl->next;
729 if(hist)
730 sfree(hist);
732 xvgrclose(fp);
735 /* create a collection (array) of barres_t object given a ordered linked list
736 of barlamda_t sample collections */
737 static barres_t *barres_list_create(lambda_t *bl_head, int *nres,
738 gmx_bool use_dhdl)
740 lambda_t *bl;
741 int nlambda=0;
742 barres_t *res;
743 int i;
744 gmx_bool dhdl=FALSE;
745 gmx_bool first=TRUE;
747 /* first count the lambdas */
748 bl=bl_head->next;
749 while(bl!=bl_head)
751 nlambda++;
752 bl=bl->next;
754 snew(res, nlambda-1);
756 /* next put the right samples in the res */
757 *nres=0;
758 bl=bl_head->next->next; /* we start with the second one. */
759 while(bl!=bl_head)
761 sample_coll_t *sc, *scprev;
762 barres_t *br=&(res[*nres]);
763 /* there is always a previous one. we search for that as a foreign
764 lambda: */
765 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
766 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
768 barres_init(br);
770 if (use_dhdl)
772 /* we use dhdl */
774 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
775 sc=lambda_find_sample_coll(bl, bl->lambda);
777 if (first)
779 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
780 dhdl=TRUE;
782 if (!dhdl)
784 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
787 else if (!scprev && !sc)
789 gmx_fatal(FARGS,"There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
792 /* normal delta H */
793 if (!scprev)
795 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->lambda,bl->prev->lambda);
797 if (!sc)
799 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->prev->lambda,bl->lambda);
801 br->a = scprev;
802 br->b = sc;
804 first=FALSE;
805 (*nres)++;
806 bl=bl->next;
808 return res;
811 /* estimate the maximum discretization error */
812 static double barres_list_max_disc_err(barres_t *res, int nres)
814 int i,j;
815 double disc_err=0.;
816 double delta_lambda;
818 for(i=0;i<nres;i++)
820 barres_t *br=&(res[i]);
822 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
824 for(j=0;j<br->a->nsamples;j++)
826 if (br->a->s[j]->hist)
828 double Wfac=1.;
829 if (br->a->s[j]->derivative)
830 Wfac = delta_lambda;
832 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
835 for(j=0;j<br->b->nsamples;j++)
837 if (br->b->s[j]->hist)
839 double Wfac=1.;
840 if (br->b->s[j]->derivative)
841 Wfac = delta_lambda;
842 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
846 return disc_err;
850 /* impose start and end times on a sample collection, updating sample_ranges */
851 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
852 double end_t)
854 int i;
855 for(i=0;i<sc->nsamples;i++)
857 samples_t *s=sc->s[i];
858 sample_range_t *r=&(sc->r[i]);
859 if (s->hist)
861 double end_time=s->hist->delta_time*s->hist->sum +
862 s->hist->start_time;
863 if (s->hist->start_time < begin_t || end_time > end_t)
865 r->use=FALSE;
868 else
870 if (!s->t)
872 double end_time;
873 if (s->start_time < begin_t)
875 r->start=(int)((begin_t - s->start_time)/s->delta_time);
877 end_time=s->delta_time*s->ndu + s->start_time;
878 if (end_time > end_t)
880 r->end=(int)((end_t - s->start_time)/s->delta_time);
883 else
885 int j;
886 for(j=0;j<s->ndu;j++)
888 if (s->t[j] <begin_t)
890 r->start = j;
893 if (s->t[j] >= end_t)
895 r->end = j;
896 break;
900 if (r->start > r->end)
902 r->use=FALSE;
906 sample_coll_calc_ntot(sc);
909 static void lambdas_impose_times(lambda_t *head, double begin, double end)
911 double first_t, last_t;
912 double begin_t, end_t;
913 lambda_t *lc;
914 int j;
916 if (begin<=0 && end<0)
918 return;
921 /* first determine the global start and end times */
922 first_t = -1;
923 last_t = -1;
924 lc=head->next;
925 while(lc!=head)
927 sample_coll_t *sc=lc->sc->next;
928 while(sc != lc->sc)
930 for(j=0;j<sc->nsamples;j++)
932 double start_t,end_t;
934 start_t = sc->s[j]->start_time;
935 end_t = sc->s[j]->start_time;
936 if (sc->s[j]->hist)
938 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
940 else
942 if (sc->s[j]->t)
944 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
946 else
948 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
952 if (start_t < first_t || first_t<0)
954 first_t=start_t;
956 if (end_t > last_t)
958 last_t=end_t;
961 sc=sc->next;
963 lc=lc->next;
966 /* calculate the actual times */
967 if (begin > 0 )
969 begin_t = begin;
971 else
973 begin_t = first_t;
976 if (end >0 )
978 end_t = end;
980 else
982 end_t = last_t;
984 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
986 if (begin_t > end_t)
988 return;
990 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
992 /* then impose them */
993 lc=head->next;
994 while(lc!=head)
996 sample_coll_t *sc=lc->sc->next;
997 while(sc != lc->sc)
999 sample_coll_impose_times(sc, begin_t, end_t);
1000 sc=sc->next;
1002 lc=lc->next;
1007 /* create subsample i out of ni from an existing sample_coll */
1008 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1009 sample_coll_t *sc_orig,
1010 int i, int ni)
1012 int j;
1013 int hist_start, hist_end;
1015 gmx_large_int_t ntot_start;
1016 gmx_large_int_t ntot_end;
1017 gmx_large_int_t ntot_so_far;
1019 *sc = *sc_orig; /* just copy all fields */
1021 /* allocate proprietary memory */
1022 snew(sc->s, sc_orig->nsamples);
1023 snew(sc->r, sc_orig->nsamples);
1025 /* copy the samples */
1026 for(j=0;j<sc_orig->nsamples;j++)
1028 sc->s[j] = sc_orig->s[j];
1029 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1032 /* now fix start and end fields */
1033 /* the casts avoid possible overflows */
1034 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1035 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1036 ntot_so_far = 0;
1037 for(j=0;j<sc->nsamples;j++)
1039 gmx_large_int_t ntot_add;
1040 gmx_large_int_t new_start, new_end;
1042 if (sc->r[j].use)
1044 if (sc->s[j]->hist)
1046 ntot_add = sc->s[j]->hist->sum;
1048 else
1050 ntot_add = sc->r[j].end - sc->r[j].start;
1053 else
1055 ntot_add = 0;
1058 if (!sc->s[j]->hist)
1060 if (ntot_so_far < ntot_start)
1062 /* adjust starting point */
1063 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1065 else
1067 new_start = sc->r[j].start;
1069 /* adjust end point */
1070 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1071 if (new_end > sc->r[j].end)
1073 new_end=sc->r[j].end;
1076 /* check if we're in range at all */
1077 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1079 new_start=0;
1080 new_end=0;
1082 /* and write the new range */
1083 sc->r[j].start=(int)new_start;
1084 sc->r[j].end=(int)new_end;
1086 else
1088 if (sc->r[j].use)
1090 double overlap;
1091 double ntot_start_norm, ntot_end_norm;
1092 /* calculate the amount of overlap of the
1093 desired range (ntot_start -- ntot_end) onto
1094 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1096 /* first calculate normalized bounds
1097 (where 0 is the start of the hist range, and 1 the end) */
1098 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1099 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1101 /* now fix the boundaries */
1102 ntot_start_norm = min(1, max(0., ntot_start_norm));
1103 ntot_end_norm = max(0, min(1., ntot_end_norm));
1105 /* and calculate the overlap */
1106 overlap = ntot_end_norm - ntot_start_norm;
1108 if (overlap > 0.95) /* we allow for 5% slack */
1110 sc->r[j].use = TRUE;
1112 else if (overlap < 0.05)
1114 sc->r[j].use = FALSE;
1116 else
1118 return FALSE;
1122 ntot_so_far += ntot_add;
1124 sample_coll_calc_ntot(sc);
1126 return TRUE;
1129 /* calculate minimum and maximum work values in sample collection */
1130 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1131 double *Wmin, double *Wmax)
1133 int i,j;
1135 *Wmin=FLT_MAX;
1136 *Wmax=-FLT_MAX;
1138 for(i=0;i<sc->nsamples;i++)
1140 samples_t *s=sc->s[i];
1141 sample_range_t *r=&(sc->r[i]);
1142 if (r->use)
1144 if (!s->hist)
1146 for(j=r->start; j<r->end; j++)
1148 *Wmin = min(*Wmin,s->du[j]*Wfac);
1149 *Wmax = max(*Wmax,s->du[j]*Wfac);
1152 else
1154 int hd=0; /* determine the histogram direction: */
1155 double dx;
1156 if ( (s->hist->nhist>1) && (Wfac<0) )
1158 hd=1;
1160 dx=s->hist->dx[hd];
1162 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1164 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1165 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1166 /* look for the highest value bin with values */
1167 if (s->hist->bin[hd][j]>0)
1169 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1170 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1171 break;
1180 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1182 int i;
1183 double sum;
1185 sum = 0;
1187 for(i=0; i<n; i++)
1189 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1192 return sum;
1195 /* calculate the BAR average given a histogram
1197 if type== 0, calculate the best estimate for the average,
1198 if type==-1, calculate the minimum possible value given the histogram
1199 if type== 1, calculate the maximum possible value given the histogram */
1200 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1201 int type)
1203 double sum=0.;
1204 int i;
1205 int max;
1206 /* normalization factor multiplied with bin width and
1207 number of samples (we normalize through M): */
1208 double normdx = 1.;
1209 int hd=0; /* determine the histogram direction: */
1210 double dx;
1212 if ( (hist->nhist>1) && (Wfac<0) )
1214 hd=1;
1216 dx=hist->dx[hd];
1217 max=hist->nbin[hd]-1;
1218 if (type==1)
1220 max=hist->nbin[hd]; /* we also add whatever was out of range */
1223 for(i=0;i<max;i++)
1225 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1226 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1228 sum += pxdx/(1. + exp(x + sbMmDG));
1231 return sum;
1234 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1235 double temp, double tol, int type)
1237 double kT,beta,M;
1238 double DG;
1239 int i,j;
1240 double Wfac1,Wfac2,Wmin,Wmax;
1241 double DG0,DG1,DG2,dDG1;
1242 double sum1,sum2;
1243 double n1, n2; /* numbers of samples as doubles */
1245 kT = BOLTZ*temp;
1246 beta = 1/kT;
1248 /* count the numbers of samples */
1249 n1 = ca->ntot;
1250 n2 = cb->ntot;
1252 M = log(n1/n2);
1254 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1256 /* this is the case when the delta U were calculated directly
1257 (i.e. we're not scaling dhdl) */
1258 Wfac1 = beta;
1259 Wfac2 = beta;
1261 else
1263 /* we're using dhdl, so delta_lambda needs to be a
1264 multiplication factor. */
1265 double delta_lambda=cb->native_lambda-ca->native_lambda;
1266 Wfac1 = beta*delta_lambda;
1267 Wfac2 = -beta*delta_lambda;
1270 if (beta < 1)
1272 /* We print the output both in kT and kJ/mol.
1273 * Here we determine DG in kT, so when beta < 1
1274 * the precision has to be increased.
1276 tol *= beta;
1279 /* Calculate minimum and maximum work to give an initial estimate of
1280 * delta G as their average.
1283 double Wmin1, Wmin2, Wmax1, Wmax2;
1284 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1285 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1287 Wmin=min(Wmin1, Wmin2);
1288 Wmax=max(Wmax1, Wmax2);
1291 DG0 = Wmin;
1292 DG2 = Wmax;
1294 if (debug)
1296 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1298 /* We approximate by bisection: given our initial estimates
1299 we keep checking whether the halfway point is greater or
1300 smaller than what we get out of the BAR averages.
1302 For the comparison we can use twice the tolerance. */
1303 while (DG2 - DG0 > 2*tol)
1305 DG1 = 0.5*(DG0 + DG2);
1307 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1308 DG1);*/
1310 /* calculate the BAR averages */
1311 dDG1=0.;
1313 for(i=0;i<ca->nsamples;i++)
1315 samples_t *s=ca->s[i];
1316 sample_range_t *r=&(ca->r[i]);
1317 if (r->use)
1319 if (s->hist)
1321 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1323 else
1325 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1326 Wfac1, (M-DG1));
1330 for(i=0;i<cb->nsamples;i++)
1332 samples_t *s=cb->s[i];
1333 sample_range_t *r=&(cb->r[i]);
1334 if (r->use)
1336 if (s->hist)
1338 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1340 else
1342 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1343 Wfac2, -(M-DG1));
1348 if (dDG1 < 0)
1350 DG0 = DG1;
1352 else
1354 DG2 = DG1;
1356 if (debug)
1358 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1362 return 0.5*(DG0 + DG2);
1365 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1366 double temp, double dg, double *sa, double *sb)
1368 int i,j;
1369 double W_ab=0.;
1370 double W_ba=0.;
1371 double kT, beta;
1372 double Wfac1, Wfac2;
1373 double n1,n2;
1375 kT = BOLTZ*temp;
1376 beta = 1/kT;
1378 /* count the numbers of samples */
1379 n1 = ca->ntot;
1380 n2 = cb->ntot;
1382 /* to ensure the work values are the same as during the delta_G */
1383 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1385 /* this is the case when the delta U were calculated directly
1386 (i.e. we're not scaling dhdl) */
1387 Wfac1 = beta;
1388 Wfac2 = beta;
1390 else
1392 /* we're using dhdl, so delta_lambda needs to be a
1393 multiplication factor. */
1394 double delta_lambda=cb->native_lambda-ca->native_lambda;
1395 Wfac1 = beta*delta_lambda;
1396 Wfac2 = -beta*delta_lambda;
1399 /* first calculate the average work in both directions */
1400 for(i=0;i<ca->nsamples;i++)
1402 samples_t *s=ca->s[i];
1403 sample_range_t *r=&(ca->r[i]);
1404 if (r->use)
1406 if (!s->hist)
1408 for(j=r->start;j<r->end;j++)
1409 W_ab += Wfac1*s->du[j];
1411 else
1413 /* normalization factor multiplied with bin width and
1414 number of samples (we normalize through M): */
1415 double normdx = 1.;
1416 double dx;
1417 int hd=0; /* histogram direction */
1418 if ( (s->hist->nhist>1) && (Wfac1<0) )
1420 hd=1;
1422 dx=s->hist->dx[hd];
1424 for(j=0;j<s->hist->nbin[0];j++)
1426 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1427 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1428 W_ab += pxdx*x;
1433 W_ab/=n1;
1435 for(i=0;i<cb->nsamples;i++)
1437 samples_t *s=cb->s[i];
1438 sample_range_t *r=&(cb->r[i]);
1439 if (r->use)
1441 if (!s->hist)
1443 for(j=r->start;j<r->end;j++)
1444 W_ba += Wfac1*s->du[j];
1446 else
1448 /* normalization factor multiplied with bin width and
1449 number of samples (we normalize through M): */
1450 double normdx = 1.;
1451 double dx;
1452 int hd=0; /* histogram direction */
1453 if ( (s->hist->nhist>1) && (Wfac2<0) )
1455 hd=1;
1457 dx=s->hist->dx[hd];
1459 for(j=0;j<s->hist->nbin[0];j++)
1461 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1462 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1463 W_ba += pxdx*x;
1468 W_ba/=n2;
1470 /* then calculate the relative entropies */
1471 *sa = (W_ab - dg);
1472 *sb = (W_ba + dg);
1475 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1476 double temp, double dg, double *stddev)
1478 int i,j;
1479 double M;
1480 double sigmafact=0.;
1481 double kT, beta;
1482 double Wfac1, Wfac2;
1483 double n1, n2;
1485 kT = BOLTZ*temp;
1486 beta = 1/kT;
1488 /* count the numbers of samples */
1489 n1 = ca->ntot;
1490 n2 = cb->ntot;
1492 /* to ensure the work values are the same as during the delta_G */
1493 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1495 /* this is the case when the delta U were calculated directly
1496 (i.e. we're not scaling dhdl) */
1497 Wfac1 = beta;
1498 Wfac2 = beta;
1500 else
1502 /* we're using dhdl, so delta_lambda needs to be a
1503 multiplication factor. */
1504 double delta_lambda=cb->native_lambda-ca->native_lambda;
1505 Wfac1 = beta*delta_lambda;
1506 Wfac2 = -beta*delta_lambda;
1509 M = log(n1/n2);
1512 /* calculate average in both directions */
1513 for(i=0;i<ca->nsamples;i++)
1515 samples_t *s=ca->s[i];
1516 sample_range_t *r=&(ca->r[i]);
1517 if (r->use)
1519 if (!s->hist)
1521 for(j=r->start;j<r->end;j++)
1523 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1526 else
1528 /* normalization factor multiplied with bin width and
1529 number of samples (we normalize through M): */
1530 double normdx = 1.;
1531 double dx;
1532 int hd=0; /* histogram direction */
1533 if ( (s->hist->nhist>1) && (Wfac1<0) )
1535 hd=1;
1537 dx=s->hist->dx[hd];
1539 for(j=0;j<s->hist->nbin[0];j++)
1541 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1542 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1544 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1549 for(i=0;i<cb->nsamples;i++)
1551 samples_t *s=cb->s[i];
1552 sample_range_t *r=&(cb->r[i]);
1553 if (r->use)
1555 if (!s->hist)
1557 for(j=r->start;j<r->end;j++)
1559 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1562 else
1564 /* normalization factor multiplied with bin width and
1565 number of samples (we normalize through M): */
1566 double normdx = 1.;
1567 double dx;
1568 int hd=0; /* histogram direction */
1569 if ( (s->hist->nhist>1) && (Wfac2<0) )
1571 hd=1;
1573 dx=s->hist->dx[hd];
1575 for(j=0;j<s->hist->nbin[0];j++)
1577 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1578 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1580 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1586 sigmafact /= (n1 + n2);
1589 /* Eq. 10 from
1590 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1591 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1596 static void calc_bar(barres_t *br, double tol,
1597 int npee_min, int npee_max, gmx_bool *bEE,
1598 double *partsum)
1600 int npee,p;
1601 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1602 for calculated quantities */
1603 int nsample1, nsample2;
1604 double temp=br->a->temp;
1605 int i,j;
1606 double dg_min, dg_max;
1607 gmx_bool have_hist=FALSE;
1609 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1611 br->dg_disc_err = 0.;
1612 br->dg_histrange_err = 0.;
1614 /* check if there are histograms */
1615 for(i=0;i<br->a->nsamples;i++)
1617 if (br->a->r[i].use && br->a->s[i]->hist)
1619 have_hist=TRUE;
1620 break;
1623 if (!have_hist)
1625 for(i=0;i<br->b->nsamples;i++)
1627 if (br->b->r[i].use && br->b->s[i]->hist)
1629 have_hist=TRUE;
1630 break;
1635 /* calculate histogram-specific errors */
1636 if (have_hist)
1638 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1639 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1641 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1643 /* the histogram range error is the biggest of the differences
1644 between the best estimate and the extremes */
1645 br->dg_histrange_err = fabs(dg_max - dg_min);
1647 br->dg_disc_err = 0.;
1648 for(i=0;i<br->a->nsamples;i++)
1650 if (br->a->s[i]->hist)
1651 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1653 for(i=0;i<br->b->nsamples;i++)
1655 if (br->b->s[i]->hist)
1656 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1659 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1661 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1663 dg_sig2 = 0;
1664 sa_sig2 = 0;
1665 sb_sig2 = 0;
1666 stddev_sig2 = 0;
1668 *bEE=TRUE;
1670 sample_coll_t ca, cb;
1672 /* initialize the samples */
1673 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1674 br->a->temp);
1675 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1676 br->b->temp);
1678 for(npee=npee_min; npee<=npee_max; npee++)
1680 double dgs = 0;
1681 double dgs2 = 0;
1682 double dsa = 0;
1683 double dsb = 0;
1684 double dsa2 = 0;
1685 double dsb2 = 0;
1686 double dstddev = 0;
1687 double dstddev2 = 0;
1690 for(p=0; p<npee; p++)
1692 double dgp;
1693 double stddevc;
1694 double sac, sbc;
1695 gmx_bool cac, cbc;
1697 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1698 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1700 if (!cac || !cbc)
1702 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1703 *bEE=FALSE;
1704 if (cac)
1705 sample_coll_destroy(&ca);
1706 if (cbc)
1707 sample_coll_destroy(&cb);
1708 return;
1711 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1712 dgs += dgp;
1713 dgs2 += dgp*dgp;
1715 partsum[npee*(npee_max+1)+p] += dgp;
1717 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1718 dsa += sac;
1719 dsa2 += sac*sac;
1720 dsb += sbc;
1721 dsb2 += sbc*sbc;
1722 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1724 dstddev += stddevc;
1725 dstddev2 += stddevc*stddevc;
1727 sample_coll_destroy(&ca);
1728 sample_coll_destroy(&cb);
1730 dgs /= npee;
1731 dgs2 /= npee;
1732 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1734 dsa /= npee;
1735 dsa2 /= npee;
1736 dsb /= npee;
1737 dsb2 /= npee;
1738 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1739 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1741 dstddev /= npee;
1742 dstddev2 /= npee;
1743 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1745 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1746 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1747 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1748 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1753 static double bar_err(int nbmin, int nbmax, const double *partsum)
1755 int nb,b;
1756 double svar,s,s2,dg;
1758 svar = 0;
1759 for(nb=nbmin; nb<=nbmax; nb++)
1761 s = 0;
1762 s2 = 0;
1763 for(b=0; b<nb; b++)
1765 dg = partsum[nb*(nbmax+1)+b];
1766 s += dg;
1767 s2 += dg*dg;
1769 s /= nb;
1770 s2 /= nb;
1771 svar += (s2 - s*s)/(nb - 1);
1774 return sqrt(svar/(nbmax + 1 - nbmin));
1777 /* deduce lambda value from legend.
1778 input:
1779 bdhdl = if true, value may be a derivative.
1780 output:
1781 bdhdl = whether the legend was for a derivative.
1783 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1785 double lambda=0;
1786 const char *ptr;
1787 gmx_bool ok=FALSE;
1789 if (legend == NULL)
1791 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1793 ptr = strrchr(legend,' ');
1795 if (strstr(legend,"dH"))
1797 if (! (*bdhdl))
1799 ok=FALSE;
1801 else
1803 ok=TRUE;
1806 else
1808 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1810 ok=TRUE;
1811 *bdhdl=FALSE;
1814 if (!ptr)
1816 ok=FALSE;
1819 if (!ok)
1821 printf("%s\n", legend);
1822 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1824 if (sscanf(ptr,"%lf",&lambda) != 1)
1826 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1829 return lambda;
1832 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1834 gmx_bool bFound;
1835 char *ptr;
1837 bFound = FALSE;
1839 /* plain text lambda string */
1840 ptr = strstr(subtitle,"lambda");
1841 if (ptr == NULL)
1843 /* xmgrace formatted lambda string */
1844 ptr = strstr(subtitle,"\\xl\\f{}");
1846 if (ptr == NULL)
1848 /* xmgr formatted lambda string */
1849 ptr = strstr(subtitle,"\\8l\\4");
1851 if (ptr != NULL)
1853 ptr = strstr(ptr,"=");
1855 if (ptr != NULL)
1857 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1860 return bFound;
1863 static double filename2lambda(char *fn)
1865 double lambda;
1866 char *ptr,*endptr,*digitptr;
1867 int dirsep;
1868 ptr = fn;
1869 /* go to the end of the path string and search backward to find the last
1870 directory in the path which has to contain the value of lambda
1872 while (ptr[1] != '\0')
1874 ptr++;
1876 /* searching backward to find the second directory separator */
1877 dirsep = 0;
1878 digitptr = NULL;
1879 while (ptr >= fn)
1881 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1883 if (dirsep == 1) break;
1884 dirsep++;
1886 /* save the last position of a digit between the last two
1887 separators = in the last dirname */
1888 if (dirsep > 0 && isdigit(*ptr))
1890 digitptr = ptr;
1892 ptr--;
1894 if (!digitptr)
1896 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1897 " last directory in the path '%s' does not contain a number",fn);
1899 if (digitptr[-1] == '-')
1901 digitptr--;
1903 lambda = strtod(digitptr,&endptr);
1904 if (endptr == digitptr)
1906 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1909 return lambda;
1912 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1914 int i;
1915 char *subtitle,**legend,*ptr;
1916 int np;
1917 gmx_bool native_lambda_read=FALSE;
1919 xvg_init(ba);
1921 ba->filename = fn;
1923 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1924 if (!ba->y)
1926 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1928 ba->t = ba->y[0];
1930 snew(ba->np,ba->nset);
1931 for(i=0;i<ba->nset;i++)
1932 ba->np[i]=np;
1935 ba->temp = -1;
1936 if (subtitle != NULL)
1938 ptr = strstr(subtitle,"T =");
1939 if (ptr != NULL)
1941 ptr += 3;
1942 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1944 if (ba->temp <= 0)
1946 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1947 ba->temp,fn);
1952 if (ba->temp < 0)
1954 if (*temp <= 0)
1956 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1958 ba->temp = *temp;
1961 /* Try to deduce lambda from the subtitle */
1962 if (subtitle)
1964 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1966 native_lambda_read=TRUE;
1969 snew(ba->lambda,ba->nset-1);
1970 if (legend == NULL)
1972 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1973 if (ba->nset == 2)
1975 if (!native_lambda_read)
1977 /* Deduce lambda from the file name */
1978 ba->native_lambda = filename2lambda(fn);
1979 native_lambda_read=TRUE;
1981 ba->lambda[0] = ba->native_lambda;
1983 else
1985 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1988 else
1990 for(i=0; i<ba->nset-1; i++)
1992 gmx_bool is_dhdl=(i==0);
1993 /* Read lambda from the legend */
1994 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
1996 if (is_dhdl && !native_lambda_read)
1998 ba->native_lambda = ba->lambda[i];
1999 native_lambda_read=TRUE;
2004 if (!native_lambda_read)
2006 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2009 /* Reorder the data */
2010 for(i=1; i<ba->nset; i++)
2012 ba->y[i-1] = ba->y[i];
2014 if (legend != NULL)
2016 for(i=0; i<ba->nset-1; i++)
2018 sfree(legend[i]);
2020 sfree(legend);
2022 ba->nset--;
2025 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2027 xvg_t *barsim;
2028 samples_t *s;
2029 int i;
2030 double *lambda;
2032 snew(barsim,1);
2034 read_bar_xvg_lowlevel(fn, temp, barsim);
2036 if (barsim->nset <1 )
2038 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2041 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2043 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2045 *temp=barsim->temp;
2047 /* now create a series of samples_t */
2048 snew(s, barsim->nset);
2049 for(i=0;i<barsim->nset;i++)
2051 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2052 barsim->temp, lambda_same(barsim->native_lambda,
2053 barsim->lambda[i]),
2054 fn);
2055 s[i].du=barsim->y[i];
2056 s[i].ndu=barsim->np[i];
2057 s[i].t=barsim->t;
2059 lambda_list_insert_sample(lambda_head, s+i);
2061 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2062 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2063 for(i=0;i<barsim->nset;i++)
2065 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2067 printf("\n\n");
2070 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2071 double start_time, double delta_time,
2072 double native_lambda, double temp,
2073 double *last_t, const char *filename)
2075 int j;
2076 gmx_bool allocated;
2077 double foreign_lambda;
2078 int derivative;
2079 samples_t *s; /* convenience pointer */
2080 int startj;
2082 /* check the block types etc. */
2083 if ( (blk->nsub < 3) ||
2084 (blk->sub[0].type != xdr_datatype_int) ||
2085 (blk->sub[1].type != xdr_datatype_double) ||
2087 (blk->sub[2].type != xdr_datatype_float) &&
2088 (blk->sub[2].type != xdr_datatype_double)
2089 ) ||
2090 (blk->sub[0].nr < 1) ||
2091 (blk->sub[1].nr < 1) )
2093 gmx_fatal(FARGS,
2094 "Unexpected/corrupted block data in file %s around time %g.",
2095 filename, start_time);
2098 derivative = blk->sub[0].ival[0];
2099 foreign_lambda = blk->sub[1].dval[0];
2101 if (! *smp)
2103 /* initialize the samples structure if it's empty. */
2104 snew(*smp, 1);
2105 samples_init(*smp, native_lambda, foreign_lambda, temp,
2106 derivative!=0, filename);
2107 (*smp)->start_time=start_time;
2108 (*smp)->delta_time=delta_time;
2111 /* set convenience pointer */
2112 s=*smp;
2114 /* now double check */
2115 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2116 ( (derivative!=0) != (s->derivative!=0) ) )
2118 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2119 foreign_lambda, s->foreign_lambda);
2120 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2121 derivative, s->derivative);
2122 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2123 filename, start_time);
2126 /* make room for the data */
2127 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2129 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2130 blk->sub[2].nr*2 : s->ndu_alloc;
2131 srenew(s->du_alloc, s->ndu_alloc);
2132 s->du=s->du_alloc;
2134 startj = s->ndu;
2135 s->ndu += blk->sub[2].nr;
2136 s->ntot += blk->sub[2].nr;
2137 *ndu = blk->sub[2].nr;
2139 /* and copy the data*/
2140 for(j=0;j<blk->sub[2].nr;j++)
2142 if (blk->sub[2].type == xdr_datatype_float)
2144 s->du[startj+j] = blk->sub[2].fval[j];
2146 else
2148 s->du[startj+j] = blk->sub[2].dval[j];
2151 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2153 *last_t = start_time + blk->sub[2].nr*delta_time;
2157 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2158 double start_time, double delta_time,
2159 double native_lambda, double temp,
2160 double *last_t, const char *filename)
2162 int i,j;
2163 samples_t *s;
2164 int nhist;
2165 double foreign_lambda;
2166 int derivative;
2167 int nbins[2];
2169 /* check the block types etc. */
2170 if ( (blk->nsub < 2) ||
2171 (blk->sub[0].type != xdr_datatype_double) ||
2172 (blk->sub[1].type != xdr_datatype_large_int) ||
2173 (blk->sub[0].nr < 2) ||
2174 (blk->sub[1].nr < 2) )
2176 gmx_fatal(FARGS,
2177 "Unexpected/corrupted block data in file %s around time %g",
2178 filename, start_time);
2181 nhist=blk->nsub-2;
2182 if (nhist == 0)
2184 return NULL;
2186 if (nhist > 2)
2188 gmx_fatal(FARGS,
2189 "Unexpected/corrupted block data in file %s around time %g",
2190 filename, start_time);
2193 snew(s, 1);
2194 *nsamples=1;
2196 foreign_lambda=blk->sub[0].dval[0];
2197 derivative=(int)(blk->sub[1].lval[1]);
2198 if (derivative)
2199 foreign_lambda=native_lambda;
2201 samples_init(s, native_lambda, foreign_lambda, temp,
2202 derivative!=0, filename);
2203 snew(s->hist, 1);
2205 for(i=0;i<nhist;i++)
2207 nbins[i] = blk->sub[i+2].nr;
2210 hist_init(s->hist, nhist, nbins);
2212 for(i=0;i<nhist;i++)
2214 s->hist->x0[i]=blk->sub[1].lval[2+i];
2215 s->hist->dx[i] = blk->sub[0].dval[1];
2216 if (i==1)
2217 s->hist->dx[i] = - s->hist->dx[i];
2220 s->hist->start_time = start_time;
2221 s->hist->delta_time = delta_time;
2222 s->start_time = start_time;
2223 s->delta_time = delta_time;
2225 for(i=0;i<nhist;i++)
2227 int nbin;
2228 gmx_large_int_t sum=0;
2230 for(j=0;j<s->hist->nbin[i];j++)
2232 int binv=(int)(blk->sub[i+2].ival[j]);
2234 s->hist->bin[i][j] = binv;
2235 sum += binv;
2238 if (i==0)
2240 s->ntot = sum;
2241 s->hist->sum = sum;
2243 else
2245 if (s->ntot != sum)
2247 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2248 filename);
2253 if (start_time + s->hist->sum*delta_time > *last_t)
2255 *last_t = start_time + s->hist->sum*delta_time;
2257 return s;
2261 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2263 int i;
2264 ener_file_t fp;
2265 t_enxframe *fr;
2266 int nre;
2267 gmx_enxnm_t *enm=NULL;
2268 double first_t=-1;
2269 double last_t=-1;
2270 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2271 int *nhists=NULL; /* array to keep count & print at end */
2272 int *npts=NULL; /* array to keep count & print at end */
2273 double *lambdas=NULL; /* array to keep count & print at end */
2274 double native_lambda=-1;
2275 double end_time; /* the end time of the last batch of samples */
2276 int nsamples=0;
2278 fp = open_enx(fn,"r");
2279 do_enxnms(fp,&nre,&enm);
2280 snew(fr, 1);
2282 while(do_enx(fp, fr))
2284 /* count the data blocks */
2285 int nblocks_raw=0;
2286 int nblocks_hist=0;
2287 int nlam=0;
2288 int k;
2289 /* DHCOLL block information: */
2290 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2291 double rtemp=0;
2293 /* count the blocks and handle collection information: */
2294 for(i=0;i<fr->nblock;i++)
2296 if (fr->block[i].id == enxDHHIST )
2297 nblocks_hist++;
2298 if ( fr->block[i].id == enxDH )
2299 nblocks_raw++;
2300 if (fr->block[i].id == enxDHCOLL )
2302 nlam++;
2303 if ( (fr->block[i].nsub < 1) ||
2304 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2305 (fr->block[i].sub[0].nr < 5))
2307 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2310 /* read the data from the DHCOLL block */
2311 rtemp = fr->block[i].sub[0].dval[0];
2312 start_time = fr->block[i].sub[0].dval[1];
2313 delta_time = fr->block[i].sub[0].dval[2];
2314 start_lambda = fr->block[i].sub[0].dval[3];
2315 delta_lambda = fr->block[i].sub[0].dval[4];
2317 if (delta_lambda>0)
2319 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2321 if ( ( *temp != rtemp) && (*temp > 0) )
2323 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2325 *temp=rtemp;
2327 if (first_t < 0)
2328 first_t=start_time;
2332 if (nlam != 1)
2334 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2336 if (nblocks_raw > 0 && nblocks_hist > 0 )
2338 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2341 if (nsamples > 0)
2343 /* check the native lambda */
2344 if (!lambda_same(start_lambda, native_lambda) )
2346 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2347 fn, native_lambda, start_lambda, start_time);
2349 /* check the number of samples against the previous number */
2350 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2352 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2353 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2355 /* check whether last iterations's end time matches with
2356 the currrent start time */
2357 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2359 /* it didn't. We need to store our samples and reallocate */
2360 for(i=0;i<nsamples;i++)
2362 if (samples_rawdh[i])
2364 /* insert it into the existing list */
2365 lambda_list_insert_sample(lambda_head,
2366 samples_rawdh[i]);
2367 /* and make sure we'll allocate a new one this time
2368 around */
2369 samples_rawdh[i]=NULL;
2374 else
2376 /* this is the first round; allocate the associated data
2377 structures */
2378 native_lambda=start_lambda;
2379 nsamples=nblocks_raw+nblocks_hist;
2380 snew(nhists, nsamples);
2381 snew(npts, nsamples);
2382 snew(lambdas, nsamples);
2383 snew(samples_rawdh, nsamples);
2384 for(i=0;i<nsamples;i++)
2386 nhists[i]=0;
2387 npts[i]=0;
2388 lambdas[i]=-1;
2389 samples_rawdh[i]=NULL; /* init to NULL so we know which
2390 ones contain values */
2394 /* and read them */
2395 k=0; /* counter for the lambdas, etc. arrays */
2396 for(i=0;i<fr->nblock;i++)
2398 if (fr->block[i].id == enxDH)
2400 int ndu;
2401 read_edr_rawdh_block(&(samples_rawdh[k]),
2402 &ndu,
2403 &(fr->block[i]),
2404 start_time, delta_time,
2405 start_lambda, rtemp,
2406 &last_t, fn);
2407 npts[k] += ndu;
2408 if (samples_rawdh[k])
2410 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2412 k++;
2414 else if (fr->block[i].id == enxDHHIST)
2416 int j;
2417 int nb=0;
2418 samples_t *s; /* this is where the data will go */
2419 s=read_edr_hist_block(&nb, &(fr->block[i]),
2420 start_time, delta_time,
2421 start_lambda, rtemp,
2422 &last_t, fn);
2423 nhists[k] += nb;
2424 if (nb>0)
2426 lambdas[k]= s->foreign_lambda;
2428 k++;
2429 /* and insert the new sample immediately */
2430 for(j=0;j<nb;j++)
2432 lambda_list_insert_sample(lambda_head, s+j);
2437 /* Now store all our extant sample collections */
2438 for(i=0;i<nsamples;i++)
2440 if (samples_rawdh[i])
2442 /* insert it into the existing list */
2443 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2448 fprintf(stderr, "\n");
2449 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2450 fn, first_t, last_t, native_lambda);
2451 for(i=0;i<nsamples;i++)
2453 if (nhists[i] > 0)
2455 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2457 else
2459 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2462 printf("\n\n");
2463 sfree(npts);
2464 sfree(nhists);
2465 sfree(lambdas);
2469 int gmx_bar(int argc,char *argv[])
2471 static const char *desc[] = {
2472 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2473 "Bennett's acceptance ratio method (BAR). It also automatically",
2474 "adds series of individual free energies obtained with BAR into",
2475 "a combined free energy estimate.[PAR]",
2477 "Every individual BAR free energy difference relies on two ",
2478 "simulations at different states: say state A and state B, as",
2479 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2480 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2481 "average of the Hamiltonian difference of state B given state A and",
2482 "vice versa.",
2483 "The energy differences to the other state must be calculated",
2484 "explicitly during the simulation. This can be done with",
2485 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2487 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2488 "Two types of input files are supported:[BR]",
2489 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
2490 "The files should have columns ",
2491 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
2492 "The [GRK]lambda[grk] values are inferred ",
2493 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
2494 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
2495 "legends of Delta H",
2496 "[BR]",
2497 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
2498 "[TT]-extp[tt] option for these files, it is assumed",
2499 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
2500 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
2501 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
2502 "subtitle (if present), otherwise from a number in the subdirectory ",
2503 "in the file name.[PAR]",
2505 "The [GRK]lambda[grk] of the simulation is parsed from ",
2506 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
2507 "foreign [GRK]lambda[grk] values from the legend containing the ",
2508 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
2509 "the legend line containing 'T ='.[PAR]",
2511 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2512 "These can contain either lists of energy differences (see the ",
2513 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
2514 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
2515 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
2516 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
2518 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
2519 "the energy difference can also be extrapolated from the ",
2520 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
2521 "option, which assumes that the system's Hamiltonian depends linearly",
2522 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
2524 "The free energy estimates are determined using BAR with bisection, ",
2525 "with the precision of the output set with [TT]-prec[tt]. ",
2526 "An error estimate taking into account time correlations ",
2527 "is made by splitting the data into blocks and determining ",
2528 "the free energy differences over those blocks and assuming ",
2529 "the blocks are independent. ",
2530 "The final error estimate is determined from the average variance ",
2531 "over 5 blocks. A range of block numbers for error estimation can ",
2532 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2534 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
2535 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
2536 "samples. [BB]Note[bb] that when aggregating energy ",
2537 "differences/derivatives with different sampling intervals, this is ",
2538 "almost certainly not correct. Usually subsequent energies are ",
2539 "correlated and different time intervals mean different degrees ",
2540 "of correlation between samples.[PAR]",
2542 "The results are split in two parts: the last part contains the final ",
2543 "results in kJ/mol, together with the error estimate for each part ",
2544 "and the total. The first part contains detailed free energy ",
2545 "difference estimates and phase space overlap measures in units of ",
2546 "kT (together with their computed error estimate). The printed ",
2547 "values are:[BR]",
2548 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2549 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2550 "[TT]*[tt] DG: the free energy estimate.[BR]",
2551 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2552 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2553 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2555 "The relative entropy of both states in each other's ensemble can be ",
2556 "interpreted as a measure of phase space overlap: ",
2557 "the relative entropy s_A of the work samples of lambda_B in the ",
2558 "ensemble of lambda_A (and vice versa for s_B), is a ",
2559 "measure of the 'distance' between Boltzmann distributions of ",
2560 "the two states, that goes to zero for identical distributions. See ",
2561 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2562 "[PAR]",
2563 "The estimate of the expected per-sample standard deviation, as given ",
2564 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2565 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2566 "of the actual statistical error, because it assumes independent samples).[PAR]",
2568 "To get a visual estimate of the phase space overlap, use the ",
2569 "[TT]-oh[tt] option to write series of histograms, together with the ",
2570 "[TT]-nbin[tt] option.[PAR]"
2572 static real begin=0,end=-1,temp=-1;
2573 int nd=2,nbmin=5,nbmax=5;
2574 int nbin=100;
2575 gmx_bool use_dhdl=FALSE;
2576 gmx_bool calc_s,calc_v;
2577 t_pargs pa[] = {
2578 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2579 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2580 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2581 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2582 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2583 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2584 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
2585 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
2588 t_filenm fnm[] = {
2589 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2590 { efEDR, "-g", "ener", ffOPTRDMULT },
2591 { efXVG, "-o", "bar", ffOPTWR },
2592 { efXVG, "-oi", "barint", ffOPTWR },
2593 { efXVG, "-oh", "histogram", ffOPTWR }
2595 #define NFILE asize(fnm)
2597 int f,i,j;
2598 int nf=0; /* file counter */
2599 int nbs;
2600 int nfile_tot; /* total number of input files */
2601 int nxvgfile=0;
2602 int nedrfile=0;
2603 char **fxvgnms;
2604 char **fedrnms;
2605 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2606 lambda_t lambda_head; /* the head element */
2607 barres_t *results; /* the results */
2608 int nresults; /* number of results in results array */
2610 double *partsum;
2611 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2612 FILE *fpb,*fpi;
2613 char lamformat[20];
2614 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2615 char ktformat[STRLEN], sktformat[STRLEN];
2616 char kteformat[STRLEN], skteformat[STRLEN];
2617 output_env_t oenv;
2618 double kT, beta;
2619 gmx_bool result_OK=TRUE,bEE=TRUE;
2621 gmx_bool disc_err=FALSE;
2622 double sum_disc_err=0.; /* discretization error */
2623 gmx_bool histrange_err=FALSE;
2624 double sum_histrange_err=0.; /* histogram range error */
2625 double stat_err=0.; /* statistical error */
2627 CopyRight(stderr,argv[0]);
2628 parse_common_args(&argc,argv,
2629 PCA_CAN_VIEW,
2630 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2632 if (opt2bSet("-f", NFILE, fnm))
2634 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2636 if (opt2bSet("-g", NFILE, fnm))
2638 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2641 /* make linked list */
2642 lb=&lambda_head;
2643 lambda_init(lb, 0, 0);
2644 lb->next=lb;
2645 lb->prev=lb;
2648 nfile_tot = nxvgfile + nedrfile;
2650 if (nfile_tot == 0)
2652 gmx_fatal(FARGS,"No input files!");
2655 if (nd < 0)
2657 gmx_fatal(FARGS,"Can not have negative number of digits");
2659 prec = pow(10,-nd);
2661 snew(partsum,(nbmax+1)*(nbmax+1));
2662 nf = 0;
2664 /* read in all files. First xvg files */
2665 for(f=0; f<nxvgfile; f++)
2667 read_bar_xvg(fxvgnms[f],&temp,lb);
2668 nf++;
2670 /* then .edr files */
2671 for(f=0; f<nedrfile; f++)
2673 read_barsim_edr(fedrnms[f],&temp,lb);;
2674 nf++;
2677 /* fix the times to allow for equilibration */
2678 lambdas_impose_times(lb, begin, end);
2680 if (opt2bSet("-oh",NFILE,fnm))
2682 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2685 /* assemble the output structures from the lambdas */
2686 results=barres_list_create(lb, &nresults, use_dhdl);
2688 sum_disc_err=barres_list_max_disc_err(results, nresults);
2690 if (nresults == 0)
2692 printf("\nNo results to calculate.\n");
2693 return 0;
2696 if (sum_disc_err > prec)
2698 prec=sum_disc_err;
2699 nd = ceil(-log10(prec));
2700 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2704 sprintf(lamformat,"%%6.3f");
2705 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2706 /* the format strings of the results in kT */
2707 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2708 sprintf( sktformat,"%%%ds",6+nd);
2709 /* the format strings of the errors in kT */
2710 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2711 sprintf( skteformat,"%%%ds",4+nd);
2712 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2713 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2717 fpb = NULL;
2718 if (opt2bSet("-o",NFILE,fnm))
2720 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2721 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2722 "\\lambda",buf,exvggtXYDY,oenv);
2725 fpi = NULL;
2726 if (opt2bSet("-oi",NFILE,fnm))
2728 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2729 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2730 "\\lambda",buf,oenv);
2735 if (nbmin > nbmax)
2736 nbmin=nbmax;
2738 /* first calculate results */
2739 bEE = TRUE;
2740 disc_err = FALSE;
2741 for(f=0; f<nresults; f++)
2743 /* Determine the free energy difference with a factor of 10
2744 * more accuracy than requested for printing.
2746 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2747 &bEE, partsum);
2749 if (results[f].dg_disc_err > prec/10.)
2750 disc_err=TRUE;
2751 if (results[f].dg_histrange_err > prec/10.)
2752 histrange_err=TRUE;
2755 /* print results in kT */
2756 kT = BOLTZ*temp;
2757 beta = 1/kT;
2759 printf("\nTemperature: %g K\n", temp);
2761 printf("\nDetailed results in kT (see help for explanation):\n\n");
2762 printf("%6s ", " lam_A");
2763 printf("%6s ", " lam_B");
2764 printf(sktformat, "DG ");
2765 if (bEE)
2766 printf(skteformat, "+/- ");
2767 if (disc_err)
2768 printf(skteformat, "disc ");
2769 if (histrange_err)
2770 printf(skteformat, "range ");
2771 printf(sktformat, "s_A ");
2772 if (bEE)
2773 printf(skteformat, "+/- " );
2774 printf(sktformat, "s_B ");
2775 if (bEE)
2776 printf(skteformat, "+/- " );
2777 printf(sktformat, "stdev ");
2778 if (bEE)
2779 printf(skteformat, "+/- ");
2780 printf("\n");
2781 for(f=0; f<nresults; f++)
2783 printf(lamformat, results[f].a->native_lambda);
2784 printf(" ");
2785 printf(lamformat, results[f].b->native_lambda);
2786 printf(" ");
2787 printf(ktformat, results[f].dg);
2788 printf(" ");
2789 if (bEE)
2791 printf(kteformat, results[f].dg_err);
2792 printf(" ");
2794 if (disc_err)
2796 printf(kteformat, results[f].dg_disc_err);
2797 printf(" ");
2799 if (histrange_err)
2801 printf(kteformat, results[f].dg_histrange_err);
2802 printf(" ");
2804 printf(ktformat, results[f].sa);
2805 printf(" ");
2806 if (bEE)
2808 printf(kteformat, results[f].sa_err);
2809 printf(" ");
2811 printf(ktformat, results[f].sb);
2812 printf(" ");
2813 if (bEE)
2815 printf(kteformat, results[f].sb_err);
2816 printf(" ");
2818 printf(ktformat, results[f].dg_stddev);
2819 printf(" ");
2820 if (bEE)
2822 printf(kteformat, results[f].dg_stddev_err);
2824 printf("\n");
2826 /* Check for negative relative entropy with a 95% certainty. */
2827 if (results[f].sa < -2*results[f].sa_err ||
2828 results[f].sb < -2*results[f].sb_err)
2830 result_OK=FALSE;
2834 if (!result_OK)
2836 printf("\nWARNING: Some of these results violate the Second Law of "
2837 "Thermodynamics: \n"
2838 " This is can be the result of severe undersampling, or "
2839 "(more likely)\n"
2840 " there is something wrong with the simulations.\n");
2844 /* final results in kJ/mol */
2845 printf("\n\nFinal results in kJ/mol:\n\n");
2846 dg_tot = 0;
2847 for(f=0; f<nresults; f++)
2850 if (fpi != NULL)
2852 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2856 if (fpb != NULL)
2858 fprintf(fpb, xvg3format,
2859 0.5*(results[f].a->native_lambda +
2860 results[f].b->native_lambda),
2861 results[f].dg,results[f].dg_err);
2864 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2865 results[f].lambda_b);*/
2866 printf("lambda ");
2867 printf(lamformat, results[f].a->native_lambda);
2868 printf(" - ");
2869 printf(lamformat, results[f].b->native_lambda);
2870 printf(", DG ");
2872 printf(dgformat,results[f].dg*kT);
2873 if (bEE)
2875 printf(" +/- ");
2876 printf(dgformat,results[f].dg_err*kT);
2878 if (histrange_err)
2880 printf(" (max. range err. = ");
2881 printf(dgformat,results[f].dg_histrange_err*kT);
2882 printf(")");
2883 sum_histrange_err += results[f].dg_histrange_err*kT;
2886 printf("\n");
2887 dg_tot += results[f].dg;
2889 printf("\n");
2890 printf("total ");
2891 printf(lamformat, results[0].a->native_lambda);
2892 printf(" - ");
2893 printf(lamformat, results[nresults-1].b->native_lambda);
2894 printf(", DG ");
2896 printf(dgformat,dg_tot*kT);
2897 if (bEE)
2899 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2900 printf(" +/- ");
2901 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2903 printf("\n");
2904 if (disc_err)
2906 printf("\nmaximum discretization error = ");
2907 printf(dgformat,sum_disc_err);
2908 if (bEE && stat_err < sum_disc_err)
2910 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2913 if (histrange_err)
2915 printf("\nmaximum histogram range error = ");
2916 printf(dgformat,sum_histrange_err);
2917 if (bEE && stat_err < sum_histrange_err)
2919 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2923 printf("\n");
2926 if (fpi != NULL)
2928 fprintf(fpi, xvg2format,
2929 results[nresults-1].b->native_lambda, dg_tot);
2930 ffclose(fpi);
2933 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2934 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");
2936 thanx(stderr);
2938 return 0;