Renamed entropy.* to thermochemistry.*
[gromacs.git] / src / gromacs / gmxana / gmx_bar.cpp
blob86f17f7c0199b071e880b1798dadcdebba4b44ab
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include <cctype>
38 #include <cmath>
39 #include <cstdlib>
40 #include <cstring>
42 #include <algorithm>
43 #include <limits>
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/enxio.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/math/units.h"
51 #include "gromacs/math/utilities.h"
52 #include "gromacs/mdlib/mdebin.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/utility/arraysize.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/dir_separator.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/smalloc.h"
60 #include "gromacs/utility/snprintf.h"
63 /* Structure for the names of lambda vector components */
64 typedef struct lambda_components_t
66 char **names; /* Array of strings with names for the lambda vector
67 components */
68 int N; /* The number of components */
69 int Nalloc; /* The number of allocated components */
70 } lambda_components_t;
72 /* Structure for a lambda vector or a dhdl derivative direction */
73 typedef struct lambda_vec_t
75 double *val; /* The lambda vector component values. Only valid if
76 dhdl == -1 */
77 int dhdl; /* The coordinate index for the derivative described by this
78 structure, or -1 */
79 const lambda_components_t *lc; /* the associated lambda_components
80 structure */
81 int index; /* The state number (init-lambda-state) of this lambda
82 vector, if known. If not, it is set to -1 */
83 } lambda_vec_t;
85 /* the dhdl.xvg data from a simulation */
86 typedef struct xvg_t
88 const char *filename;
89 int ftp; /* file type */
90 int nset; /* number of lambdas, including dhdl */
91 int *np; /* number of data points (du or hists) per lambda */
92 int np_alloc; /* number of points (du or hists) allocated */
93 double temp; /* temperature */
94 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
95 double *t; /* the times (of second index for y) */
96 double **y; /* the dU values. y[0] holds the derivative, while
97 further ones contain the energy differences between
98 the native lambda and the 'foreign' lambdas. */
99 lambda_vec_t native_lambda; /* the native lambda */
101 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
102 } xvg_t;
105 typedef struct hist_t
107 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
108 double dx[2]; /* the histogram spacing. The reverse
109 dx is the negative of the forward dx.*/
110 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
111 point(s) as int */
113 int nbin[2]; /* the (forward+reverse) number of bins */
114 gmx_int64_t sum; /* the total number of counts. Must be
115 the same for forward + reverse. */
116 int nhist; /* number of hist datas (forward or reverse) */
118 double start_time, delta_time; /* start time, end time of histogram */
119 } hist_t;
122 /* an aggregate of samples for partial free energy calculation */
123 typedef struct samples_t
125 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
126 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
127 double temp; /* the temperature */
128 gmx_bool derivative; /* whether this sample is a derivative */
130 /* The samples come either as either delta U lists: */
131 int ndu; /* the number of delta U samples */
132 double *du; /* the delta u's */
133 double *t; /* the times associated with those samples, or: */
134 double start_time, delta_time; /*start time and delta time for linear time*/
136 /* or as histograms: */
137 hist_t *hist; /* a histogram */
139 /* allocation data: (not NULL for data 'owned' by this struct) */
140 double *du_alloc, *t_alloc; /* allocated delta u arrays */
141 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
142 hist_t *hist_alloc; /* allocated hist */
144 gmx_int64_t ntot; /* total number of samples */
145 const char *filename; /* the file name this sample comes from */
146 } samples_t;
148 /* a sample range (start to end for du-style data, or boolean
149 for both du-style data and histograms */
150 typedef struct sample_range_t
152 int start, end; /* start and end index for du style data */
153 gmx_bool use; /* whether to use this sample */
155 samples_t *s; /* the samples this range belongs to */
156 } sample_range_t;
159 /* a collection of samples for a partial free energy calculation
160 (i.e. the collection of samples from one native lambda to one
161 foreign lambda) */
162 typedef struct sample_coll_t
164 lambda_vec_t *native_lambda; /* these should be the same for all samples
165 in the histogram */
166 lambda_vec_t *foreign_lambda; /* collection */
167 double temp; /* the temperature */
169 int nsamples; /* the number of samples */
170 samples_t **s; /* the samples themselves */
171 sample_range_t *r; /* the sample ranges */
172 int nsamples_alloc; /* number of allocated samples */
174 gmx_int64_t ntot; /* total number of samples in the ranges of
175 this collection */
177 struct sample_coll_t *next, *prev; /* next and previous in the list */
178 } sample_coll_t;
180 /* all the samples associated with a lambda point */
181 typedef struct lambda_data_t
183 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
184 double temp; /* temperature */
186 sample_coll_t *sc; /* the samples */
188 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
190 struct lambda_data_t *next, *prev; /* the next and prev in the list */
191 } lambda_data_t;
193 /* Top-level data structure of simulation data */
194 typedef struct sim_data_t
196 lambda_data_t *lb; /* a lambda data linked list */
197 lambda_data_t lb_head; /* The head element of the linked list */
199 lambda_components_t lc; /* the allowed components of the lambda
200 vectors */
201 } sim_data_t;
203 /* Top-level data structure with calculated values. */
204 typedef struct {
205 sample_coll_t *a, *b; /* the simulation data */
207 double dg; /* the free energy difference */
208 double dg_err; /* the free energy difference */
210 double dg_disc_err; /* discretization error */
211 double dg_histrange_err; /* histogram range error */
213 double sa; /* relative entropy of b in state a */
214 double sa_err; /* error in sa */
215 double sb; /* relative entropy of a in state b */
216 double sb_err; /* error in sb */
218 double dg_stddev; /* expected dg stddev per sample */
219 double dg_stddev_err; /* error in dg_stddev */
220 } barres_t;
223 /* Initialize a lambda_components structure */
224 static void lambda_components_init(lambda_components_t *lc)
226 lc->N = 0;
227 lc->Nalloc = 2;
228 snew(lc->names, lc->Nalloc);
231 /* Add a component to a lambda_components structure */
232 static void lambda_components_add(lambda_components_t *lc,
233 const char *name, size_t name_length)
235 while (lc->N + 1 > lc->Nalloc)
237 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
238 srenew(lc->names, lc->Nalloc);
240 snew(lc->names[lc->N], name_length+1);
241 std::strncpy(lc->names[lc->N], name, name_length);
242 lc->N++;
245 /* check whether a component with index 'index' matches the given name, or
246 is also NULL. Returns TRUE if this is the case.
247 the string name does not need to end */
248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
249 int index,
250 const char *name,
251 size_t name_length)
253 size_t len;
254 if (index >= lc->N)
256 return FALSE;
258 if (name == nullptr && lc->names[index] == nullptr)
260 return TRUE;
262 if ((name == nullptr) != (lc->names[index] == nullptr))
264 return FALSE;
266 len = std::strlen(lc->names[index]);
267 if (len != name_length)
269 return FALSE;
271 if (std::strncmp(lc->names[index], name, name_length) == 0)
273 return TRUE;
275 return FALSE;
278 /* Find the index of a given lambda component name, or -1 if not found */
279 static int lambda_components_find(const lambda_components_t *lc,
280 const char *name,
281 size_t name_length)
283 int i;
285 for (i = 0; i < lc->N; i++)
287 if (std::strncmp(lc->names[i], name, name_length) == 0)
289 return i;
292 return -1;
297 /* initialize a lambda vector */
298 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
300 snew(lv->val, lc->N);
301 lv->index = -1;
302 lv->dhdl = -1;
303 lv->lc = lc;
306 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
308 int i;
310 lambda_vec_init(lv, orig->lc);
311 lv->dhdl = orig->dhdl;
312 lv->index = orig->index;
313 for (i = 0; i < lv->lc->N; i++)
315 lv->val[i] = orig->val[i];
319 /* write a lambda vec to a preallocated string */
320 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
322 int i;
324 str[0] = 0; /* reset the string */
325 if (lv->dhdl < 0)
327 if (named)
329 str += sprintf(str, "delta H to ");
331 if (lv->lc->N > 1)
333 str += sprintf(str, "(");
335 for (i = 0; i < lv->lc->N; i++)
337 str += sprintf(str, "%g", lv->val[i]);
338 if (i < lv->lc->N-1)
340 str += sprintf(str, ", ");
343 if (lv->lc->N > 1)
345 sprintf(str, ")");
348 else
350 /* this lambda vector describes a derivative */
351 str += sprintf(str, "dH/dl");
352 if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
354 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
359 /* write a shortened version of the lambda vec to a preallocated string */
360 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
362 if (lv->index >= 0)
364 sprintf(str, "%6d", lv->index);
366 else
368 if (lv->dhdl < 0)
370 sprintf(str, "%6.3f", lv->val[0]);
372 else
374 sprintf(str, "dH/dl[%d]", lv->dhdl);
379 /* write an intermediate version of two lambda vecs to a preallocated string */
380 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
381 const lambda_vec_t *b, char *str)
383 str[0] = 0;
384 if ( (a->index >= 0) && (b->index >= 0) )
386 sprintf(str, "%6.3f", (a->index+b->index)/2.0);
388 else
390 if ( (a->dhdl < 0) && (b->dhdl < 0) )
392 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
398 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
399 a and b must describe non-derivative lambda points */
400 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
402 int i;
403 double ret = 0.;
405 if ( (a->dhdl > 0) || (b->dhdl > 0) )
407 gmx_fatal(FARGS,
408 "Trying to calculate the difference between derivatives instead of lambda points");
410 if (a->lc != b->lc)
412 gmx_fatal(FARGS,
413 "Trying to calculate the difference lambdas with differing basis set");
415 for (i = 0; i < a->lc->N; i++)
417 double df = a->val[i] - b->val[i];
418 ret += df*df;
420 return std::sqrt(ret);
424 /* check whether two lambda vectors are the same */
425 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
427 int i;
429 if (a->lc != b->lc)
431 return FALSE;
433 if (a->dhdl < 0)
435 for (i = 0; i < a->lc->N; i++)
437 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
439 return FALSE;
442 return TRUE;
444 else
446 /* they're derivatives, so we check whether the indices match */
447 return (a->dhdl == b->dhdl);
451 /* Compare the sort order of two foreign lambda vectors
453 returns 1 if a is 'bigger' than b,
454 returns 0 if they're the same,
455 returns -1 if a is 'smaller' than b.*/
456 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
457 const lambda_vec_t *b)
459 int i;
460 double norm_a = 0, norm_b = 0;
461 gmx_bool different = FALSE;
463 if (a->lc != b->lc)
465 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
467 /* if either one has an index we sort based on that */
468 if ((a->index >= 0) || (b->index >= 0))
470 if (a->index == b->index)
472 return 0;
474 return (a->index > b->index) ? 1 : -1;
476 if (a->dhdl >= 0 || b->dhdl >= 0)
478 /* lambda vectors that are derivatives always sort higher than those
479 without derivatives */
480 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
482 return (a->dhdl >= 0) ? 1 : -1;
484 return a->dhdl > b->dhdl;
487 /* neither has an index, so we can only sort on the lambda components,
488 which is only valid if there is one component */
489 for (i = 0; i < a->lc->N; i++)
491 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
493 different = TRUE;
495 norm_a += a->val[i]*a->val[i];
496 norm_b += b->val[i]*b->val[i];
498 if (!different)
500 return 0;
502 return norm_a > norm_b;
505 /* Compare the sort order of two native lambda vectors
507 returns 1 if a is 'bigger' than b,
508 returns 0 if they're the same,
509 returns -1 if a is 'smaller' than b.*/
510 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
511 const lambda_vec_t *b)
513 if (a->lc != b->lc)
515 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
517 /* if either one has an index we sort based on that */
518 if ((a->index >= 0) || (b->index >= 0))
520 if (a->index == b->index)
522 return 0;
524 return (a->index > b->index) ? 1 : -1;
526 /* neither has an index, so we can only sort on the lambda components,
527 which is only valid if there is one component */
528 if (a->lc->N > 1)
530 gmx_fatal(FARGS,
531 "Can't compare lambdas with no index and > 1 component");
533 if (a->dhdl >= 0 || b->dhdl >= 0)
535 gmx_fatal(FARGS,
536 "Can't compare native lambdas that are derivatives");
538 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
540 return 0;
542 return a->val[0] > b->val[0] ? 1 : -1;
548 static void hist_init(hist_t *h, int nhist, int *nbin)
550 int i;
551 if (nhist > 2)
553 gmx_fatal(FARGS, "histogram with more than two sets of data!");
555 for (i = 0; i < nhist; i++)
557 snew(h->bin[i], nbin[i]);
558 h->x0[i] = 0;
559 h->nbin[i] = nbin[i];
560 h->start_time = h->delta_time = 0;
561 h->dx[i] = 0;
563 h->sum = 0;
564 h->nhist = nhist;
567 static void xvg_init(xvg_t *ba)
569 ba->filename = nullptr;
570 ba->nset = 0;
571 ba->np_alloc = 0;
572 ba->np = nullptr;
573 ba->y = nullptr;
576 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
577 lambda_vec_t *foreign_lambda, double temp,
578 gmx_bool derivative, const char *filename)
580 s->native_lambda = native_lambda;
581 s->foreign_lambda = foreign_lambda;
582 s->temp = temp;
583 s->derivative = derivative;
585 s->ndu = 0;
586 s->du = nullptr;
587 s->t = nullptr;
588 s->start_time = s->delta_time = 0;
589 s->hist = nullptr;
590 s->du_alloc = nullptr;
591 s->t_alloc = nullptr;
592 s->hist_alloc = nullptr;
593 s->ndu_alloc = 0;
594 s->nt_alloc = 0;
596 s->ntot = 0;
597 s->filename = filename;
600 static void sample_range_init(sample_range_t *r, samples_t *s)
602 r->start = 0;
603 r->end = s->ndu;
604 r->use = TRUE;
605 r->s = nullptr;
608 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
609 lambda_vec_t *foreign_lambda, double temp)
611 sc->native_lambda = native_lambda;
612 sc->foreign_lambda = foreign_lambda;
613 sc->temp = temp;
615 sc->nsamples = 0;
616 sc->s = nullptr;
617 sc->r = nullptr;
618 sc->nsamples_alloc = 0;
620 sc->ntot = 0;
621 sc->next = sc->prev = nullptr;
624 static void sample_coll_destroy(sample_coll_t *sc)
626 /* don't free the samples themselves */
627 sfree(sc->r);
628 sfree(sc->s);
632 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
633 double temp)
635 l->lambda = native_lambda;
636 l->temp = temp;
638 l->next = nullptr;
639 l->prev = nullptr;
641 l->sc = &(l->sc_head);
643 sample_coll_init(l->sc, native_lambda, nullptr, 0.);
644 l->sc->next = l->sc;
645 l->sc->prev = l->sc;
648 static void barres_init(barres_t *br)
650 br->dg = 0;
651 br->dg_err = 0;
652 br->sa = 0;
653 br->sa_err = 0;
654 br->sb = 0;
655 br->sb_err = 0;
656 br->dg_stddev = 0;
657 br->dg_stddev_err = 0;
659 br->a = nullptr;
660 br->b = nullptr;
664 /* calculate the total number of samples in a sample collection */
665 static void sample_coll_calc_ntot(sample_coll_t *sc)
667 int i;
669 sc->ntot = 0;
670 for (i = 0; i < sc->nsamples; i++)
672 if (sc->r[i].use)
674 if (sc->s[i]->hist)
676 sc->ntot += sc->s[i]->ntot;
678 else
680 sc->ntot += sc->r[i].end - sc->r[i].start;
687 /* find the barsamples_t associated with a lambda that corresponds to
688 a specific foreign lambda */
689 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
690 lambda_vec_t *foreign_lambda)
692 sample_coll_t *sc = l->sc->next;
694 while (sc != l->sc)
696 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
698 return sc;
700 sc = sc->next;
703 return nullptr;
706 /* insert li into an ordered list of lambda_colls */
707 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
709 sample_coll_t *scn = l->sc->next;
710 while ( (scn != l->sc) )
712 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
714 break;
716 scn = scn->next;
718 /* now insert it before the found scn */
719 sc->next = scn;
720 sc->prev = scn->prev;
721 scn->prev->next = sc;
722 scn->prev = sc;
725 /* insert li into an ordered list of lambdas */
726 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
728 lambda_data_t *lc = head->next;
729 while (lc != head)
731 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
733 break;
735 lc = lc->next;
737 /* now insert ourselves before the found lc */
738 li->next = lc;
739 li->prev = lc->prev;
740 lc->prev->next = li;
741 lc->prev = li;
744 /* insert a sample and a sample_range into a sample_coll. The
745 samples are stored as a pointer, the range is copied. */
746 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
747 sample_range_t *r)
749 /* first check if it belongs here */
750 if (sc->temp != s->temp)
752 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
753 s->filename, sc->next->s[0]->filename);
755 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
757 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
758 s->filename, sc->next->s[0]->filename);
760 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
762 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
763 s->filename, sc->next->s[0]->filename);
766 /* check if there's room */
767 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
769 sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
770 srenew(sc->s, sc->nsamples_alloc);
771 srenew(sc->r, sc->nsamples_alloc);
773 sc->s[sc->nsamples] = s;
774 sc->r[sc->nsamples] = *r;
775 sc->nsamples++;
777 sample_coll_calc_ntot(sc);
780 /* insert a sample into a lambda_list, creating the right sample_coll if
781 neccesary */
782 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
784 gmx_bool found = FALSE;
785 sample_coll_t *sc;
786 sample_range_t r;
788 lambda_data_t *l = head->next;
790 /* first search for the right lambda_data_t */
791 while (l != head)
793 if (lambda_vec_same(l->lambda, s->native_lambda) )
795 found = TRUE;
796 break;
798 l = l->next;
801 if (!found)
803 snew(l, 1); /* allocate a new one */
804 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
805 lambda_data_insert_lambda(head, l); /* add it to the list */
808 /* now look for a sample collection */
809 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
810 if (!sc)
812 snew(sc, 1); /* allocate a new one */
813 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
814 lambda_data_insert_sample_coll(l, sc);
817 /* now insert the samples into the sample coll */
818 sample_range_init(&r, s);
819 sample_coll_insert_sample(sc, s, &r);
823 /* make a histogram out of a sample collection */
824 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
825 int *nbin_alloc, int *nbin,
826 double *dx, double *xmin, int nbin_default)
828 int i, j, k;
829 gmx_bool dx_set = FALSE;
830 gmx_bool xmin_set = FALSE;
832 gmx_bool xmax_set = FALSE;
833 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
834 limits of a histogram */
835 double xmax = -1;
837 /* first determine dx and xmin; try the histograms */
838 for (i = 0; i < sc->nsamples; i++)
840 if (sc->s[i]->hist)
842 hist_t *hist = sc->s[i]->hist;
843 for (k = 0; k < hist->nhist; k++)
845 double hdx = hist->dx[k];
846 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
848 /* we use the biggest dx*/
849 if ( (!dx_set) || hist->dx[0] > *dx)
851 dx_set = TRUE;
852 *dx = hist->dx[0];
854 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
856 xmin_set = TRUE;
857 *xmin = (hist->x0[k]*hdx);
860 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
862 xmax_set = TRUE;
863 xmax = xmax_now;
864 if (hist->bin[k][hist->nbin[k]-1] != 0)
866 xmax_set_hard = TRUE;
869 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
871 xmax_set_hard = TRUE;
872 xmax = xmax_now;
877 /* and the delta us */
878 for (i = 0; i < sc->nsamples; i++)
880 if (sc->s[i]->ndu > 0)
882 /* determine min and max */
883 int starti = sc->r[i].start;
884 int endi = sc->r[i].end;
885 double du_xmin = sc->s[i]->du[starti];
886 double du_xmax = sc->s[i]->du[starti];
887 for (j = starti+1; j < endi; j++)
889 if (sc->s[i]->du[j] < du_xmin)
891 du_xmin = sc->s[i]->du[j];
893 if (sc->s[i]->du[j] > du_xmax)
895 du_xmax = sc->s[i]->du[j];
899 /* and now change the limits */
900 if ( (!xmin_set) || (du_xmin < *xmin) )
902 xmin_set = TRUE;
903 *xmin = du_xmin;
905 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
907 xmax_set = TRUE;
908 xmax = du_xmax;
913 if (!xmax_set || !xmin_set)
915 *nbin = 0;
916 return;
920 if (!dx_set)
922 *nbin = nbin_default;
923 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
924 be 0, and we count from 0 */
926 else
928 *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
931 if (*nbin > *nbin_alloc)
933 *nbin_alloc = *nbin;
934 srenew(*bin, *nbin_alloc);
937 /* reset the histogram */
938 for (i = 0; i < (*nbin); i++)
940 (*bin)[i] = 0;
943 /* now add the actual data */
944 for (i = 0; i < sc->nsamples; i++)
946 if (sc->s[i]->hist)
948 hist_t *hist = sc->s[i]->hist;
949 for (k = 0; k < hist->nhist; k++)
951 double hdx = hist->dx[k];
952 double xmin_hist = hist->x0[k]*hdx;
953 for (j = 0; j < hist->nbin[k]; j++)
955 /* calculate the bin corresponding to the middle of the
956 original bin */
957 double x = hdx*(j+0.5) + xmin_hist;
958 int binnr = static_cast<int>((x-(*xmin))/(*dx));
960 if (binnr >= *nbin || binnr < 0)
962 binnr = (*nbin)-1;
965 (*bin)[binnr] += hist->bin[k][j];
969 else
971 int starti = sc->r[i].start;
972 int endi = sc->r[i].end;
973 for (j = starti; j < endi; j++)
975 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
976 if (binnr >= *nbin || binnr < 0)
978 binnr = (*nbin)-1;
981 (*bin)[binnr]++;
987 /* write a collection of histograms to a file */
988 static void sim_data_histogram(sim_data_t *sd, const char *filename,
989 int nbin_default, const gmx_output_env_t *oenv)
991 char label_x[STRLEN];
992 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
993 const char *title = "N(\\DeltaH)";
994 const char *label_y = "Samples";
995 FILE *fp;
996 lambda_data_t *bl;
997 int nsets = 0;
998 char **setnames = nullptr;
999 gmx_bool first_set = FALSE;
1000 /* histogram data: */
1001 int *hist = nullptr;
1002 int nbin = 0;
1003 int nbin_alloc = 0;
1004 double dx = 0;
1005 double minval = 0;
1006 int i;
1007 lambda_data_t *bl_head = sd->lb;
1009 printf("\nWriting histogram to %s\n", filename);
1010 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1012 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1014 /* first get all the set names */
1015 bl = bl_head->next;
1016 /* iterate over all lambdas */
1017 while (bl != bl_head)
1019 sample_coll_t *sc = bl->sc->next;
1021 /* iterate over all samples */
1022 while (sc != bl->sc)
1024 char buf[STRLEN], buf2[STRLEN];
1026 nsets++;
1027 srenew(setnames, nsets);
1028 snew(setnames[nsets-1], STRLEN);
1029 if (sc->foreign_lambda->dhdl < 0)
1031 lambda_vec_print(sc->native_lambda, buf, FALSE);
1032 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1033 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1034 deltag, lambda, buf2, lambda, buf);
1036 else
1038 lambda_vec_print(sc->native_lambda, buf, FALSE);
1039 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1040 dhdl, lambda, buf);
1042 sc = sc->next;
1045 bl = bl->next;
1047 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1050 /* now make the histograms */
1051 bl = bl_head->next;
1052 /* iterate over all lambdas */
1053 while (bl != bl_head)
1055 sample_coll_t *sc = bl->sc->next;
1057 /* iterate over all samples */
1058 while (sc != bl->sc)
1060 if (!first_set)
1062 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1065 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1066 nbin_default);
1068 for (i = 0; i < nbin; i++)
1070 double xmin = i*dx + minval;
1071 double xmax = (i+1)*dx + minval;
1073 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1076 first_set = FALSE;
1077 sc = sc->next;
1080 bl = bl->next;
1083 if (hist)
1085 sfree(hist);
1088 xvgrclose(fp);
1091 static int
1092 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1094 int n = 0;
1096 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1097 if (lambda->index >= 0)
1099 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1101 if (lambda->dhdl >= 0)
1103 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1105 else
1107 int i;
1108 for (i = 0; i < lambda->lc->N; i++)
1110 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1113 return n;
1116 /* create a collection (array) of barres_t object given a ordered linked list
1117 of barlamda_t sample collections */
1118 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1119 gmx_bool use_dhdl)
1121 lambda_data_t *bl;
1122 int nlambda = 0;
1123 barres_t *res;
1124 gmx_bool dhdl = FALSE;
1125 gmx_bool first = TRUE;
1126 lambda_data_t *bl_head = sd->lb;
1128 /* first count the lambdas */
1129 bl = bl_head->next;
1130 while (bl != bl_head)
1132 nlambda++;
1133 bl = bl->next;
1135 snew(res, nlambda-1);
1137 /* next put the right samples in the res */
1138 *nres = 0;
1139 bl = bl_head->next->next; /* we start with the second one. */
1140 while (bl != bl_head)
1142 sample_coll_t *sc, *scprev;
1143 barres_t *br = &(res[*nres]);
1144 /* there is always a previous one. we search for that as a foreign
1145 lambda: */
1146 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1147 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1149 barres_init(br);
1151 if (use_dhdl)
1153 /* we use dhdl */
1155 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1156 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1158 if (first)
1160 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1161 dhdl = TRUE;
1163 if (!dhdl)
1165 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1168 else if (!scprev && !sc)
1170 char descX[STRLEN], descY[STRLEN];
1171 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1172 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1174 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1177 /* normal delta H */
1178 if (!scprev)
1180 char descX[STRLEN], descY[STRLEN];
1181 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1182 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1183 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1185 if (!sc)
1187 char descX[STRLEN], descY[STRLEN];
1188 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1189 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1190 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1192 br->a = scprev;
1193 br->b = sc;
1195 first = FALSE;
1196 (*nres)++;
1197 bl = bl->next;
1199 return res;
1202 /* estimate the maximum discretization error */
1203 static double barres_list_max_disc_err(barres_t *res, int nres)
1205 int i, j;
1206 double disc_err = 0.;
1207 double delta_lambda;
1209 for (i = 0; i < nres; i++)
1211 barres_t *br = &(res[i]);
1213 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1214 br->a->native_lambda);
1216 for (j = 0; j < br->a->nsamples; j++)
1218 if (br->a->s[j]->hist)
1220 double Wfac = 1.;
1221 if (br->a->s[j]->derivative)
1223 Wfac = delta_lambda;
1226 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1229 for (j = 0; j < br->b->nsamples; j++)
1231 if (br->b->s[j]->hist)
1233 double Wfac = 1.;
1234 if (br->b->s[j]->derivative)
1236 Wfac = delta_lambda;
1238 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1242 return disc_err;
1246 /* impose start and end times on a sample collection, updating sample_ranges */
1247 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1248 double end_t)
1250 int i;
1251 for (i = 0; i < sc->nsamples; i++)
1253 samples_t *s = sc->s[i];
1254 sample_range_t *r = &(sc->r[i]);
1255 if (s->hist)
1257 double end_time = s->hist->delta_time*s->hist->sum +
1258 s->hist->start_time;
1259 if (s->hist->start_time < begin_t || end_time > end_t)
1261 r->use = FALSE;
1264 else
1266 if (!s->t)
1268 double end_time;
1269 if (s->start_time < begin_t)
1271 r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1273 end_time = s->delta_time*s->ndu + s->start_time;
1274 if (end_time > end_t)
1276 r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1279 else
1281 int j;
1282 for (j = 0; j < s->ndu; j++)
1284 if (s->t[j] < begin_t)
1286 r->start = j;
1289 if (s->t[j] >= end_t)
1291 r->end = j;
1292 break;
1296 if (r->start > r->end)
1298 r->use = FALSE;
1302 sample_coll_calc_ntot(sc);
1305 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1307 double first_t, last_t;
1308 double begin_t, end_t;
1309 lambda_data_t *lc;
1310 lambda_data_t *head = sd->lb;
1311 int j;
1313 if (begin <= 0 && end < 0)
1315 return;
1318 /* first determine the global start and end times */
1319 first_t = -1;
1320 last_t = -1;
1321 lc = head->next;
1322 while (lc != head)
1324 sample_coll_t *sc = lc->sc->next;
1325 while (sc != lc->sc)
1327 for (j = 0; j < sc->nsamples; j++)
1329 double start_t, end_t;
1331 start_t = sc->s[j]->start_time;
1332 end_t = sc->s[j]->start_time;
1333 if (sc->s[j]->hist)
1335 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1337 else
1339 if (sc->s[j]->t)
1341 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1343 else
1345 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1349 if (start_t < first_t || first_t < 0)
1351 first_t = start_t;
1353 if (end_t > last_t)
1355 last_t = end_t;
1358 sc = sc->next;
1360 lc = lc->next;
1363 /* calculate the actual times */
1364 if (begin > 0)
1366 begin_t = begin;
1368 else
1370 begin_t = first_t;
1373 if (end > 0)
1375 end_t = end;
1377 else
1379 end_t = last_t;
1381 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1383 if (begin_t > end_t)
1385 return;
1387 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1389 /* then impose them */
1390 lc = head->next;
1391 while (lc != head)
1393 sample_coll_t *sc = lc->sc->next;
1394 while (sc != lc->sc)
1396 sample_coll_impose_times(sc, begin_t, end_t);
1397 sc = sc->next;
1399 lc = lc->next;
1404 /* create subsample i out of ni from an existing sample_coll */
1405 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1406 sample_coll_t *sc_orig,
1407 int i, int ni)
1409 int j;
1411 gmx_int64_t ntot_start;
1412 gmx_int64_t ntot_end;
1413 gmx_int64_t ntot_so_far;
1415 *sc = *sc_orig; /* just copy all fields */
1417 /* allocate proprietary memory */
1418 snew(sc->s, sc_orig->nsamples);
1419 snew(sc->r, sc_orig->nsamples);
1421 /* copy the samples */
1422 for (j = 0; j < sc_orig->nsamples; j++)
1424 sc->s[j] = sc_orig->s[j];
1425 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1428 /* now fix start and end fields */
1429 /* the casts avoid possible overflows */
1430 ntot_start = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1431 ntot_end = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1432 ntot_so_far = 0;
1433 for (j = 0; j < sc->nsamples; j++)
1435 gmx_int64_t ntot_add;
1436 gmx_int64_t new_start, new_end;
1438 if (sc->r[j].use)
1440 if (sc->s[j]->hist)
1442 ntot_add = sc->s[j]->hist->sum;
1444 else
1446 ntot_add = sc->r[j].end - sc->r[j].start;
1449 else
1451 ntot_add = 0;
1454 if (!sc->s[j]->hist)
1456 if (ntot_so_far < ntot_start)
1458 /* adjust starting point */
1459 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1461 else
1463 new_start = sc->r[j].start;
1465 /* adjust end point */
1466 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1467 if (new_end > sc->r[j].end)
1469 new_end = sc->r[j].end;
1472 /* check if we're in range at all */
1473 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1475 new_start = 0;
1476 new_end = 0;
1478 /* and write the new range */
1479 GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1480 GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1481 sc->r[j].start = static_cast<int>(new_start);
1482 sc->r[j].end = static_cast<int>(new_end);
1484 else
1486 if (sc->r[j].use)
1488 double overlap;
1489 double ntot_start_norm, ntot_end_norm;
1490 /* calculate the amount of overlap of the
1491 desired range (ntot_start -- ntot_end) onto
1492 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1494 /* first calculate normalized bounds
1495 (where 0 is the start of the hist range, and 1 the end) */
1496 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1497 ntot_end_norm = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1499 /* now fix the boundaries */
1500 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1501 ntot_end_norm = std::max(0.0, std::min(1.0, ntot_end_norm));
1503 /* and calculate the overlap */
1504 overlap = ntot_end_norm - ntot_start_norm;
1506 if (overlap > 0.95) /* we allow for 5% slack */
1508 sc->r[j].use = TRUE;
1510 else if (overlap < 0.05)
1512 sc->r[j].use = FALSE;
1514 else
1516 return FALSE;
1520 ntot_so_far += ntot_add;
1522 sample_coll_calc_ntot(sc);
1524 return TRUE;
1527 /* calculate minimum and maximum work values in sample collection */
1528 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1529 double *Wmin, double *Wmax)
1531 int i, j;
1533 *Wmin = std::numeric_limits<float>::max();
1534 *Wmax = -std::numeric_limits<float>::max();
1536 for (i = 0; i < sc->nsamples; i++)
1538 samples_t *s = sc->s[i];
1539 sample_range_t *r = &(sc->r[i]);
1540 if (r->use)
1542 if (!s->hist)
1544 for (j = r->start; j < r->end; j++)
1546 *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1547 *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1550 else
1552 int hd = 0; /* determine the histogram direction: */
1553 double dx;
1554 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1556 hd = 1;
1558 dx = s->hist->dx[hd];
1560 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1562 *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1563 *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1564 /* look for the highest value bin with values */
1565 if (s->hist->bin[hd][j] > 0)
1567 *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1568 *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1569 break;
1577 /* Initialize a sim_data structure */
1578 static void sim_data_init(sim_data_t *sd)
1580 /* make linked list */
1581 sd->lb = &(sd->lb_head);
1582 sd->lb->next = sd->lb;
1583 sd->lb->prev = sd->lb;
1585 lambda_components_init(&(sd->lc));
1589 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1591 int i;
1592 double sum;
1594 sum = 0;
1596 for (i = 0; i < n; i++)
1598 sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1601 return sum;
1604 /* calculate the BAR average given a histogram
1606 if type== 0, calculate the best estimate for the average,
1607 if type==-1, calculate the minimum possible value given the histogram
1608 if type== 1, calculate the maximum possible value given the histogram */
1609 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1610 int type)
1612 double sum = 0.;
1613 int i;
1614 int maxbin;
1615 /* normalization factor multiplied with bin width and
1616 number of samples (we normalize through M): */
1617 double normdx = 1.;
1618 int hd = 0; /* determine the histogram direction: */
1619 double dx;
1621 if ( (hist->nhist > 1) && (Wfac < 0) )
1623 hd = 1;
1625 dx = hist->dx[hd];
1626 maxbin = hist->nbin[hd]-1;
1627 if (type == 1)
1629 maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1632 for (i = 0; i < maxbin; i++)
1634 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1635 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1637 sum += pxdx/(1. + std::exp(x + sbMmDG));
1640 return sum;
1643 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1644 double temp, double tol, int type)
1646 double kT, beta, M;
1647 int i;
1648 double Wfac1, Wfac2, Wmin, Wmax;
1649 double DG0, DG1, DG2, dDG1;
1650 double n1, n2; /* numbers of samples as doubles */
1652 kT = BOLTZ*temp;
1653 beta = 1/kT;
1655 /* count the numbers of samples */
1656 n1 = ca->ntot;
1657 n2 = cb->ntot;
1659 M = std::log(n1/n2);
1661 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1662 if (ca->foreign_lambda->dhdl < 0)
1664 /* this is the case when the delta U were calculated directly
1665 (i.e. we're not scaling dhdl) */
1666 Wfac1 = beta;
1667 Wfac2 = beta;
1669 else
1671 /* we're using dhdl, so delta_lambda needs to be a
1672 multiplication factor. */
1673 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1674 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1675 ca->native_lambda);
1676 if (cb->native_lambda->lc->N > 1)
1678 gmx_fatal(FARGS,
1679 "Can't (yet) do multi-component dhdl interpolation");
1682 Wfac1 = beta*delta_lambda;
1683 Wfac2 = -beta*delta_lambda;
1686 if (beta < 1)
1688 /* We print the output both in kT and kJ/mol.
1689 * Here we determine DG in kT, so when beta < 1
1690 * the precision has to be increased.
1692 tol *= beta;
1695 /* Calculate minimum and maximum work to give an initial estimate of
1696 * delta G as their average.
1699 double Wmin1, Wmin2, Wmax1, Wmax2;
1700 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1701 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1703 Wmin = std::min(Wmin1, Wmin2);
1704 Wmax = std::max(Wmax1, Wmax2);
1707 DG0 = Wmin;
1708 DG2 = Wmax;
1710 if (debug)
1712 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1714 /* We approximate by bisection: given our initial estimates
1715 we keep checking whether the halfway point is greater or
1716 smaller than what we get out of the BAR averages.
1718 For the comparison we can use twice the tolerance. */
1719 while (DG2 - DG0 > 2*tol)
1721 DG1 = 0.5*(DG0 + DG2);
1723 /* calculate the BAR averages */
1724 dDG1 = 0.;
1726 for (i = 0; i < ca->nsamples; i++)
1728 samples_t *s = ca->s[i];
1729 sample_range_t *r = &(ca->r[i]);
1730 if (r->use)
1732 if (s->hist)
1734 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1736 else
1738 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1739 Wfac1, (M-DG1));
1743 for (i = 0; i < cb->nsamples; i++)
1745 samples_t *s = cb->s[i];
1746 sample_range_t *r = &(cb->r[i]);
1747 if (r->use)
1749 if (s->hist)
1751 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1753 else
1755 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1756 Wfac2, -(M-DG1));
1761 if (dDG1 < 0)
1763 DG0 = DG1;
1765 else
1767 DG2 = DG1;
1769 if (debug)
1771 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1775 return 0.5*(DG0 + DG2);
1778 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1779 double temp, double dg, double *sa, double *sb)
1781 int i, j;
1782 double W_ab = 0.;
1783 double W_ba = 0.;
1784 double kT, beta;
1785 double Wfac1, Wfac2;
1786 double n1, n2;
1788 kT = BOLTZ*temp;
1789 beta = 1/kT;
1791 /* count the numbers of samples */
1792 n1 = ca->ntot;
1793 n2 = cb->ntot;
1795 /* to ensure the work values are the same as during the delta_G */
1796 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1797 if (ca->foreign_lambda->dhdl < 0)
1799 /* this is the case when the delta U were calculated directly
1800 (i.e. we're not scaling dhdl) */
1801 Wfac1 = beta;
1802 Wfac2 = beta;
1804 else
1806 /* we're using dhdl, so delta_lambda needs to be a
1807 multiplication factor. */
1808 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1809 ca->native_lambda);
1810 Wfac1 = beta*delta_lambda;
1811 Wfac2 = -beta*delta_lambda;
1814 /* first calculate the average work in both directions */
1815 for (i = 0; i < ca->nsamples; i++)
1817 samples_t *s = ca->s[i];
1818 sample_range_t *r = &(ca->r[i]);
1819 if (r->use)
1821 if (!s->hist)
1823 for (j = r->start; j < r->end; j++)
1825 W_ab += Wfac1*s->du[j];
1828 else
1830 /* normalization factor multiplied with bin width and
1831 number of samples (we normalize through M): */
1832 double normdx = 1.;
1833 double dx;
1834 int hd = 0; /* histogram direction */
1835 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1837 hd = 1;
1839 dx = s->hist->dx[hd];
1841 for (j = 0; j < s->hist->nbin[0]; j++)
1843 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1844 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1845 W_ab += pxdx*x;
1850 W_ab /= n1;
1852 for (i = 0; i < cb->nsamples; i++)
1854 samples_t *s = cb->s[i];
1855 sample_range_t *r = &(cb->r[i]);
1856 if (r->use)
1858 if (!s->hist)
1860 for (j = r->start; j < r->end; j++)
1862 W_ba += Wfac1*s->du[j];
1865 else
1867 /* normalization factor multiplied with bin width and
1868 number of samples (we normalize through M): */
1869 double normdx = 1.;
1870 double dx;
1871 int hd = 0; /* histogram direction */
1872 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1874 hd = 1;
1876 dx = s->hist->dx[hd];
1878 for (j = 0; j < s->hist->nbin[0]; j++)
1880 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1881 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1882 W_ba += pxdx*x;
1887 W_ba /= n2;
1889 /* then calculate the relative entropies */
1890 *sa = (W_ab - dg);
1891 *sb = (W_ba + dg);
1894 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1895 double temp, double dg, double *stddev)
1897 int i, j;
1898 double M;
1899 double sigmafact = 0.;
1900 double kT, beta;
1901 double Wfac1, Wfac2;
1902 double n1, n2;
1904 kT = BOLTZ*temp;
1905 beta = 1/kT;
1907 /* count the numbers of samples */
1908 n1 = ca->ntot;
1909 n2 = cb->ntot;
1911 /* to ensure the work values are the same as during the delta_G */
1912 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1913 if (ca->foreign_lambda->dhdl < 0)
1915 /* this is the case when the delta U were calculated directly
1916 (i.e. we're not scaling dhdl) */
1917 Wfac1 = beta;
1918 Wfac2 = beta;
1920 else
1922 /* we're using dhdl, so delta_lambda needs to be a
1923 multiplication factor. */
1924 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1925 ca->native_lambda);
1926 Wfac1 = beta*delta_lambda;
1927 Wfac2 = -beta*delta_lambda;
1930 M = std::log(n1/n2);
1933 /* calculate average in both directions */
1934 for (i = 0; i < ca->nsamples; i++)
1936 samples_t *s = ca->s[i];
1937 sample_range_t *r = &(ca->r[i]);
1938 if (r->use)
1940 if (!s->hist)
1942 for (j = r->start; j < r->end; j++)
1944 sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1947 else
1949 /* normalization factor multiplied with bin width and
1950 number of samples (we normalize through M): */
1951 double normdx = 1.;
1952 double dx;
1953 int hd = 0; /* histogram direction */
1954 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1956 hd = 1;
1958 dx = s->hist->dx[hd];
1960 for (j = 0; j < s->hist->nbin[0]; j++)
1962 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1963 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1965 sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1970 for (i = 0; i < cb->nsamples; i++)
1972 samples_t *s = cb->s[i];
1973 sample_range_t *r = &(cb->r[i]);
1974 if (r->use)
1976 if (!s->hist)
1978 for (j = r->start; j < r->end; j++)
1980 sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1983 else
1985 /* normalization factor multiplied with bin width and
1986 number of samples (we normalize through M): */
1987 double normdx = 1.;
1988 double dx;
1989 int hd = 0; /* histogram direction */
1990 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1992 hd = 1;
1994 dx = s->hist->dx[hd];
1996 for (j = 0; j < s->hist->nbin[0]; j++)
1998 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1999 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2001 sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2007 sigmafact /= (n1 + n2);
2010 /* Eq. 10 from
2011 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2012 *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2017 static void calc_bar(barres_t *br, double tol,
2018 int npee_min, int npee_max, gmx_bool *bEE,
2019 double *partsum)
2021 int npee, p;
2022 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2023 for calculated quantities */
2024 double temp = br->a->temp;
2025 int i;
2026 double dg_min, dg_max;
2027 gmx_bool have_hist = FALSE;
2029 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2031 br->dg_disc_err = 0.;
2032 br->dg_histrange_err = 0.;
2034 /* check if there are histograms */
2035 for (i = 0; i < br->a->nsamples; i++)
2037 if (br->a->r[i].use && br->a->s[i]->hist)
2039 have_hist = TRUE;
2040 break;
2043 if (!have_hist)
2045 for (i = 0; i < br->b->nsamples; i++)
2047 if (br->b->r[i].use && br->b->s[i]->hist)
2049 have_hist = TRUE;
2050 break;
2055 /* calculate histogram-specific errors */
2056 if (have_hist)
2058 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2059 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2061 if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2063 /* the histogram range error is the biggest of the differences
2064 between the best estimate and the extremes */
2065 br->dg_histrange_err = std::abs(dg_max - dg_min);
2067 br->dg_disc_err = 0.;
2068 for (i = 0; i < br->a->nsamples; i++)
2070 if (br->a->s[i]->hist)
2072 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2075 for (i = 0; i < br->b->nsamples; i++)
2077 if (br->b->s[i]->hist)
2079 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2083 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2085 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2087 dg_sig2 = 0;
2088 sa_sig2 = 0;
2089 sb_sig2 = 0;
2090 stddev_sig2 = 0;
2092 *bEE = TRUE;
2094 sample_coll_t ca, cb;
2096 /* initialize the samples */
2097 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2098 br->a->temp);
2099 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2100 br->b->temp);
2102 for (npee = npee_min; npee <= npee_max; npee++)
2104 double dgs = 0;
2105 double dgs2 = 0;
2106 double dsa = 0;
2107 double dsb = 0;
2108 double dsa2 = 0;
2109 double dsb2 = 0;
2110 double dstddev = 0;
2111 double dstddev2 = 0;
2114 for (p = 0; p < npee; p++)
2116 double dgp;
2117 double stddevc;
2118 double sac, sbc;
2119 gmx_bool cac, cbc;
2121 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2122 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2124 if (!cac || !cbc)
2126 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2127 *bEE = FALSE;
2128 if (cac)
2130 sample_coll_destroy(&ca);
2132 if (cbc)
2134 sample_coll_destroy(&cb);
2136 return;
2139 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2140 dgs += dgp;
2141 dgs2 += dgp*dgp;
2143 partsum[npee*(npee_max+1)+p] += dgp;
2145 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2146 dsa += sac;
2147 dsa2 += sac*sac;
2148 dsb += sbc;
2149 dsb2 += sbc*sbc;
2150 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2152 dstddev += stddevc;
2153 dstddev2 += stddevc*stddevc;
2155 sample_coll_destroy(&ca);
2156 sample_coll_destroy(&cb);
2158 dgs /= npee;
2159 dgs2 /= npee;
2160 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2162 dsa /= npee;
2163 dsa2 /= npee;
2164 dsb /= npee;
2165 dsb2 /= npee;
2166 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2167 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2169 dstddev /= npee;
2170 dstddev2 /= npee;
2171 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2173 br->dg_err = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2174 br->sa_err = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2175 br->sb_err = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2176 br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2181 static double bar_err(int nbmin, int nbmax, const double *partsum)
2183 int nb, b;
2184 double svar, s, s2, dg;
2186 svar = 0;
2187 for (nb = nbmin; nb <= nbmax; nb++)
2189 s = 0;
2190 s2 = 0;
2191 for (b = 0; b < nb; b++)
2193 dg = partsum[nb*(nbmax+1)+b];
2194 s += dg;
2195 s2 += dg*dg;
2197 s /= nb;
2198 s2 /= nb;
2199 svar += (s2 - s*s)/(nb - 1);
2202 return std::sqrt(svar/(nbmax + 1 - nbmin));
2206 /* Seek the end of an identifier (consecutive non-spaces), followed by
2207 an optional number of spaces or '='-signs. Returns a pointer to the
2208 first non-space value found after that. Returns NULL if the string
2209 ends before that.
2211 static const char *find_value(const char *str)
2213 gmx_bool name_end_found = FALSE;
2215 /* if the string is a NULL pointer, return a NULL pointer. */
2216 if (str == nullptr)
2218 return nullptr;
2220 while (*str != '\0')
2222 /* first find the end of the name */
2223 if (!name_end_found)
2225 if (std::isspace(*str) || (*str == '=') )
2227 name_end_found = TRUE;
2230 else
2232 if (!( std::isspace(*str) || (*str == '=') ))
2234 return str;
2237 str++;
2239 return nullptr;
2243 /* read a vector-notation description of a lambda vector */
2244 static gmx_bool read_lambda_compvec(const char *str,
2245 lambda_vec_t *lv,
2246 const lambda_components_t *lc_in,
2247 lambda_components_t *lc_out,
2248 const char **end,
2249 const char *fn)
2251 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2252 components, or to check them */
2253 gmx_bool start_reached = FALSE; /* whether the start of component names
2254 has been reached */
2255 gmx_bool vector = FALSE; /* whether there are multiple components */
2256 int n = 0; /* current component number */
2257 const char *val_start = nullptr; /* start of the component name, or NULL
2258 if not in a value */
2259 char *strtod_end;
2260 gmx_bool OK = TRUE;
2262 if (end)
2264 *end = str;
2268 if (lc_out && lc_out->N == 0)
2270 initialize_lc = TRUE;
2273 if (lc_in == nullptr)
2275 lc_in = lc_out;
2278 while (1)
2280 if (!start_reached)
2282 if (std::isalnum(*str))
2284 vector = FALSE;
2285 start_reached = TRUE;
2286 val_start = str;
2288 else if (*str == '(')
2290 vector = TRUE;
2291 start_reached = TRUE;
2293 else if (!std::isspace(*str))
2295 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2298 else
2300 if (val_start)
2302 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2304 /* end of value */
2305 if (lv == nullptr)
2307 if (initialize_lc)
2309 lambda_components_add(lc_out, val_start,
2310 (str-val_start));
2312 else
2314 if (!lambda_components_check(lc_out, n, val_start,
2315 (str-val_start)))
2317 return FALSE;
2321 else
2323 /* add a vector component to lv */
2324 lv->val[n] = strtod(val_start, &strtod_end);
2325 if (val_start == strtod_end)
2327 gmx_fatal(FARGS,
2328 "Error reading lambda vector in %s", fn);
2331 /* reset for the next identifier */
2332 val_start = nullptr;
2333 n++;
2334 if (!vector)
2336 return OK;
2340 else if (std::isalnum(*str))
2342 val_start = str;
2344 if (*str == ')')
2346 str++;
2347 if (end)
2349 *end = str;
2351 if (!vector)
2353 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2355 else
2357 GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2358 if (n == lc_in->N)
2360 return OK;
2362 else if (lv == nullptr)
2364 return FALSE;
2366 else
2368 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2369 fn);
2370 return FALSE;
2376 if (*str == '\0')
2378 break;
2380 str++;
2381 if (end)
2383 *end = str;
2386 if (vector)
2388 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2389 return FALSE;
2391 return OK;
2394 /* read and check the component names from a string */
2395 static gmx_bool read_lambda_components(const char *str,
2396 lambda_components_t *lc,
2397 const char **end,
2398 const char *fn)
2400 return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2403 /* read an initialized lambda vector from a string */
2404 static gmx_bool read_lambda_vector(const char *str,
2405 lambda_vec_t *lv,
2406 const char **end,
2407 const char *fn)
2409 return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2414 /* deduce lambda value from legend.
2415 fn = the file name
2416 legend = the legend string
2417 ba = the xvg data
2418 lam = the initialized lambda vector
2419 returns whether to use the data in this set.
2421 static gmx_bool legend2lambda(const char *fn,
2422 const char *legend,
2423 lambda_vec_t *lam)
2425 const char *ptr = nullptr, *ptr2 = nullptr;
2426 gmx_bool ok = FALSE;
2427 gmx_bool bdhdl = FALSE;
2428 const char *tostr = " to ";
2430 if (legend == nullptr)
2432 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2435 /* look for the last 'to': */
2436 ptr2 = legend;
2439 ptr2 = std::strstr(ptr2, tostr);
2440 if (ptr2 != nullptr)
2442 ptr = ptr2;
2443 ptr2++;
2446 while (ptr2 != nullptr && *ptr2 != '\0');
2448 if (ptr)
2450 ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2452 else
2454 /* look for the = sign */
2455 ptr = std::strrchr(legend, '=');
2456 if (!ptr)
2458 /* otherwise look for the last space */
2459 ptr = std::strrchr(legend, ' ');
2463 if (std::strstr(legend, "dH"))
2465 ok = TRUE;
2466 bdhdl = TRUE;
2468 else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2470 ok = TRUE;
2471 bdhdl = FALSE;
2473 else /*if (std::strstr(legend, "pV"))*/
2475 return FALSE;
2477 if (!ptr)
2479 ok = FALSE;
2482 if (!ok)
2484 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2486 if (!bdhdl)
2488 ptr = find_value(ptr);
2489 if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2491 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2494 else
2496 int dhdl_index;
2497 const char *end;
2499 ptr = std::strrchr(legend, '=');
2500 end = ptr;
2501 if (ptr)
2503 /* there must be a component name */
2504 ptr--;
2505 if (ptr < legend)
2507 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2509 /* now backtrack to the start of the identifier */
2510 while (isspace(*ptr))
2512 end = ptr;
2513 ptr--;
2514 if (ptr < legend)
2516 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2519 while (!std::isspace(*ptr))
2521 ptr--;
2522 if (ptr < legend)
2524 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2527 ptr++;
2528 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2529 if (dhdl_index < 0)
2531 char buf[STRLEN];
2532 std::strncpy(buf, ptr, (end-ptr));
2533 buf[(end-ptr)] = '\0';
2534 gmx_fatal(FARGS,
2535 "Did not find lambda component for '%s' in %s",
2536 buf, fn);
2539 else
2541 if (lam->lc->N > 1)
2543 gmx_fatal(FARGS,
2544 "dhdl without component name with >1 lambda component in %s",
2545 fn);
2547 dhdl_index = 0;
2549 lam->dhdl = dhdl_index;
2551 return TRUE;
2554 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2555 lambda_components_t *lc)
2557 gmx_bool bFound;
2558 const char *ptr;
2559 char *end;
2560 double native_lambda;
2562 bFound = FALSE;
2564 /* first check for a state string */
2565 ptr = std::strstr(subtitle, "state");
2566 if (ptr)
2568 int index = -1;
2569 const char *val_end;
2571 /* the new 4.6 style lambda vectors */
2572 ptr = find_value(ptr);
2573 if (ptr)
2575 index = std::strtol(ptr, &end, 10);
2576 if (ptr == end)
2578 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2579 return FALSE;
2581 ptr = end;
2583 else
2585 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2586 return FALSE;
2588 /* now find the lambda vector component names */
2589 while (*ptr != '(' && !std::isalnum(*ptr))
2591 ptr++;
2592 if (*ptr == '\0')
2594 gmx_fatal(FARGS,
2595 "Incomplete lambda vector component data in %s", fn);
2596 return FALSE;
2599 val_end = ptr;
2600 if (!read_lambda_components(ptr, lc, &val_end, fn))
2602 gmx_fatal(FARGS,
2603 "lambda vector components in %s don't match those previously read",
2604 fn);
2606 ptr = find_value(val_end);
2607 if (!ptr)
2609 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2610 return FALSE;
2612 lambda_vec_init(&(ba->native_lambda), lc);
2613 if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2615 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2617 ba->native_lambda.index = index;
2618 bFound = TRUE;
2620 else
2622 /* compatibility mode: check for lambda in other ways. */
2623 /* plain text lambda string */
2624 ptr = std::strstr(subtitle, "lambda");
2625 if (ptr == nullptr)
2627 /* xmgrace formatted lambda string */
2628 ptr = std::strstr(subtitle, "\\xl\\f{}");
2630 if (ptr == nullptr)
2632 /* xmgr formatted lambda string */
2633 ptr = std::strstr(subtitle, "\\8l\\4");
2635 if (ptr != nullptr)
2637 ptr = std::strstr(ptr, "=");
2639 if (ptr != nullptr)
2641 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2642 /* add the lambda component name as an empty string */
2643 if (lc->N > 0)
2645 if (!lambda_components_check(lc, 0, "", 0))
2647 gmx_fatal(FARGS,
2648 "lambda vector components in %s don't match those previously read",
2649 fn);
2652 else
2654 lambda_components_add(lc, "", 0);
2656 lambda_vec_init(&(ba->native_lambda), lc);
2657 ba->native_lambda.val[0] = native_lambda;
2661 return bFound;
2664 static double filename2lambda(const char *fn)
2666 double lambda;
2667 const char *ptr, *digitptr;
2668 char *endptr;
2669 int dirsep;
2670 ptr = fn;
2671 /* go to the end of the path string and search backward to find the last
2672 directory in the path which has to contain the value of lambda
2674 while (ptr[1] != '\0')
2676 ptr++;
2678 /* searching backward to find the second directory separator */
2679 dirsep = 0;
2680 digitptr = nullptr;
2681 while (ptr >= fn)
2683 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2685 if (dirsep == 1)
2687 break;
2689 dirsep++;
2691 /* save the last position of a digit between the last two
2692 separators = in the last dirname */
2693 if (dirsep > 0 && std::isdigit(*ptr))
2695 digitptr = ptr;
2697 ptr--;
2699 if (!digitptr)
2701 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2702 " last directory in the path '%s' does not contain a number", fn);
2704 if (digitptr[-1] == '-')
2706 digitptr--;
2708 lambda = strtod(digitptr, &endptr);
2709 if (endptr == digitptr)
2711 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2713 return lambda;
2716 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2717 lambda_components_t *lc)
2719 int i;
2720 char *subtitle, **legend, *ptr;
2721 int np;
2722 gmx_bool native_lambda_read = FALSE;
2723 char buf[STRLEN];
2725 xvg_init(ba);
2727 ba->filename = fn;
2729 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2730 if (!ba->y)
2732 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2734 /* Reorder the data */
2735 ba->t = ba->y[0];
2736 for (i = 1; i < ba->nset; i++)
2738 ba->y[i-1] = ba->y[i];
2740 ba->nset--;
2742 snew(ba->np, ba->nset);
2743 for (i = 0; i < ba->nset; i++)
2745 ba->np[i] = np;
2748 ba->temp = -1;
2749 if (subtitle != nullptr)
2751 /* try to extract temperature */
2752 ptr = std::strstr(subtitle, "T =");
2753 if (ptr != nullptr)
2755 ptr += 3;
2756 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2758 if (ba->temp <= 0)
2760 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2761 ba->temp, fn);
2766 if (ba->temp < 0)
2768 if (*temp <= 0)
2770 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2772 ba->temp = *temp;
2775 /* Try to deduce lambda from the subtitle */
2776 if (subtitle)
2778 if (subtitle2lambda(subtitle, ba, fn, lc))
2780 native_lambda_read = TRUE;
2783 snew(ba->lambda, ba->nset);
2784 if (legend == nullptr)
2786 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2787 if (ba->nset == 1)
2789 if (!native_lambda_read)
2791 // Deduce lambda from the file name.
2792 // Updated the routine filename2lambda to actually return lambda
2793 // to avoid C++ warnings, but this usage does not seem to need it?
2794 // EL 2015-07-10
2795 filename2lambda(fn);
2796 native_lambda_read = TRUE;
2798 ba->lambda[0] = ba->native_lambda;
2800 else
2802 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2805 else
2807 for (i = 0; i < ba->nset; )
2809 gmx_bool use = FALSE;
2810 /* Read lambda from the legend */
2811 lambda_vec_init( &(ba->lambda[i]), lc );
2812 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2813 use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2814 if (use)
2816 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2817 i++;
2819 else
2821 int j;
2822 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2823 for (j = i+1; j < ba->nset; j++)
2825 ba->y[j-1] = ba->y[j];
2826 legend[j-1] = legend[j];
2828 ba->nset--;
2833 if (!native_lambda_read)
2835 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2838 if (legend != nullptr)
2840 for (i = 0; i < ba->nset-1; i++)
2842 sfree(legend[i]);
2844 sfree(legend);
2848 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2850 xvg_t *barsim;
2851 samples_t *s;
2852 int i;
2854 snew(barsim, 1);
2856 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2858 if (barsim->nset < 1)
2860 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2863 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2865 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2867 *temp = barsim->temp;
2869 /* now create a series of samples_t */
2870 snew(s, barsim->nset);
2871 for (i = 0; i < barsim->nset; i++)
2873 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2874 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2875 &(barsim->lambda[i])),
2876 fn);
2877 s[i].du = barsim->y[i];
2878 s[i].ndu = barsim->np[i];
2879 s[i].t = barsim->t;
2881 lambda_data_list_insert_sample(sd->lb, s+i);
2884 char buf[STRLEN];
2886 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2887 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2888 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2889 for (i = 0; i < barsim->nset; i++)
2891 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2892 printf(" %s (%d pts)\n", buf, s[i].ndu);
2895 printf("\n\n");
2898 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2899 double start_time, double delta_time,
2900 lambda_vec_t *native_lambda, double temp,
2901 double *last_t, const char *filename)
2903 int i, j;
2904 lambda_vec_t *foreign_lambda;
2905 int type;
2906 samples_t *s; /* convenience pointer */
2907 int startj;
2909 /* check the block types etc. */
2910 if ( (blk->nsub < 3) ||
2911 (blk->sub[0].type != xdr_datatype_int) ||
2912 (blk->sub[1].type != xdr_datatype_double) ||
2914 (blk->sub[2].type != xdr_datatype_float) &&
2915 (blk->sub[2].type != xdr_datatype_double)
2916 ) ||
2917 (blk->sub[0].nr < 1) ||
2918 (blk->sub[1].nr < 1) )
2920 gmx_fatal(FARGS,
2921 "Unexpected/corrupted block data in file %s around time %f.",
2922 filename, start_time);
2925 snew(foreign_lambda, 1);
2926 lambda_vec_init(foreign_lambda, native_lambda->lc);
2927 lambda_vec_copy(foreign_lambda, native_lambda);
2928 type = blk->sub[0].ival[0];
2929 if (type == dhbtDH)
2931 for (i = 0; i < native_lambda->lc->N; i++)
2933 foreign_lambda->val[i] = blk->sub[1].dval[i];
2936 else
2938 if (blk->sub[0].nr > 1)
2940 foreign_lambda->dhdl = blk->sub[0].ival[1];
2942 else
2944 foreign_lambda->dhdl = 0;
2948 if (!*smp)
2950 /* initialize the samples structure if it's empty. */
2951 snew(*smp, 1);
2952 samples_init(*smp, native_lambda, foreign_lambda, temp,
2953 type == dhbtDHDL, filename);
2954 (*smp)->start_time = start_time;
2955 (*smp)->delta_time = delta_time;
2958 /* set convenience pointer */
2959 s = *smp;
2961 /* now double check */
2962 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2964 char buf[STRLEN], buf2[STRLEN];
2965 lambda_vec_print(foreign_lambda, buf, FALSE);
2966 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2967 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2968 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2969 filename, start_time);
2972 /* make room for the data */
2973 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2975 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2976 blk->sub[2].nr*2 : s->ndu_alloc;
2977 srenew(s->du_alloc, s->ndu_alloc);
2978 s->du = s->du_alloc;
2980 startj = s->ndu;
2981 s->ndu += blk->sub[2].nr;
2982 s->ntot += blk->sub[2].nr;
2983 *ndu = blk->sub[2].nr;
2985 /* and copy the data*/
2986 for (j = 0; j < blk->sub[2].nr; j++)
2988 if (blk->sub[2].type == xdr_datatype_float)
2990 s->du[startj+j] = blk->sub[2].fval[j];
2992 else
2994 s->du[startj+j] = blk->sub[2].dval[j];
2997 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2999 *last_t = start_time + blk->sub[2].nr*delta_time;
3003 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3004 double start_time, double delta_time,
3005 lambda_vec_t *native_lambda, double temp,
3006 double *last_t, const char *filename)
3008 int i, j;
3009 samples_t *s;
3010 int nhist;
3011 lambda_vec_t *foreign_lambda;
3012 int type;
3013 int nbins[2];
3015 /* check the block types etc. */
3016 if ( (blk->nsub < 2) ||
3017 (blk->sub[0].type != xdr_datatype_double) ||
3018 (blk->sub[1].type != xdr_datatype_int64) ||
3019 (blk->sub[0].nr < 2) ||
3020 (blk->sub[1].nr < 2) )
3022 gmx_fatal(FARGS,
3023 "Unexpected/corrupted block data in file %s around time %f",
3024 filename, start_time);
3027 nhist = blk->nsub-2;
3028 if (nhist == 0)
3030 return nullptr;
3032 if (nhist > 2)
3034 gmx_fatal(FARGS,
3035 "Unexpected/corrupted block data in file %s around time %f",
3036 filename, start_time);
3039 snew(s, 1);
3040 *nsamples = 1;
3042 snew(foreign_lambda, 1);
3043 lambda_vec_init(foreign_lambda, native_lambda->lc);
3044 lambda_vec_copy(foreign_lambda, native_lambda);
3045 type = static_cast<int>(blk->sub[1].lval[1]);
3046 if (type == dhbtDH)
3048 double old_foreign_lambda;
3050 old_foreign_lambda = blk->sub[0].dval[0];
3051 if (old_foreign_lambda >= 0)
3053 foreign_lambda->val[0] = old_foreign_lambda;
3054 if (foreign_lambda->lc->N > 1)
3056 gmx_fatal(FARGS,
3057 "Single-component lambda in multi-component file %s",
3058 filename);
3061 else
3063 for (i = 0; i < native_lambda->lc->N; i++)
3065 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3069 else
3071 if (foreign_lambda->lc->N > 1)
3073 if (blk->sub[1].nr < 3 + nhist)
3075 gmx_fatal(FARGS,
3076 "Missing derivative coord in multi-component file %s",
3077 filename);
3079 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3081 else
3083 foreign_lambda->dhdl = 0;
3087 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3088 filename);
3089 snew(s->hist, 1);
3091 for (i = 0; i < nhist; i++)
3093 nbins[i] = blk->sub[i+2].nr;
3096 hist_init(s->hist, nhist, nbins);
3098 for (i = 0; i < nhist; i++)
3100 s->hist->x0[i] = blk->sub[1].lval[2+i];
3101 s->hist->dx[i] = blk->sub[0].dval[1];
3102 if (i == 1)
3104 s->hist->dx[i] = -s->hist->dx[i];
3108 s->hist->start_time = start_time;
3109 s->hist->delta_time = delta_time;
3110 s->start_time = start_time;
3111 s->delta_time = delta_time;
3113 for (i = 0; i < nhist; i++)
3115 gmx_int64_t sum = 0;
3117 for (j = 0; j < s->hist->nbin[i]; j++)
3119 int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3121 s->hist->bin[i][j] = binv;
3122 sum += binv;
3125 if (i == 0)
3127 s->ntot = sum;
3128 s->hist->sum = sum;
3130 else
3132 if (s->ntot != sum)
3134 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3135 filename);
3140 if (start_time + s->hist->sum*delta_time > *last_t)
3142 *last_t = start_time + s->hist->sum*delta_time;
3144 return s;
3148 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3150 int i, j;
3151 ener_file_t fp;
3152 t_enxframe *fr;
3153 int nre;
3154 gmx_enxnm_t *enm = nullptr;
3155 double first_t = -1;
3156 double last_t = -1;
3157 samples_t **samples_rawdh = nullptr; /* contains samples for raw delta_h */
3158 int *nhists = nullptr; /* array to keep count & print at end */
3159 int *npts = nullptr; /* array to keep count & print at end */
3160 lambda_vec_t **lambdas = nullptr; /* array to keep count & print at end */
3161 lambda_vec_t *native_lambda;
3162 int nsamples = 0;
3163 lambda_vec_t start_lambda;
3165 fp = open_enx(fn, "r");
3166 do_enxnms(fp, &nre, &enm);
3167 snew(fr, 1);
3169 snew(native_lambda, 1);
3170 start_lambda.lc = nullptr;
3172 while (do_enx(fp, fr))
3174 /* count the data blocks */
3175 int nblocks_raw = 0;
3176 int nblocks_hist = 0;
3177 int nlam = 0;
3178 int k;
3179 /* DHCOLL block information: */
3180 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3181 double rtemp = 0;
3183 /* count the blocks and handle collection information: */
3184 for (i = 0; i < fr->nblock; i++)
3186 if (fr->block[i].id == enxDHHIST)
3188 nblocks_hist++;
3190 if (fr->block[i].id == enxDH)
3192 nblocks_raw++;
3194 if (fr->block[i].id == enxDHCOLL)
3196 nlam++;
3197 if ( (fr->block[i].nsub < 1) ||
3198 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3199 (fr->block[i].sub[0].nr < 5))
3201 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3204 /* read the data from the DHCOLL block */
3205 rtemp = fr->block[i].sub[0].dval[0];
3206 start_time = fr->block[i].sub[0].dval[1];
3207 delta_time = fr->block[i].sub[0].dval[2];
3208 old_start_lambda = fr->block[i].sub[0].dval[3];
3209 delta_lambda = fr->block[i].sub[0].dval[4];
3211 if (delta_lambda > 0)
3213 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3215 if ( ( *temp != rtemp) && (*temp > 0) )
3217 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3219 *temp = rtemp;
3221 if (old_start_lambda >= 0)
3223 if (sd->lc.N > 0)
3225 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3227 gmx_fatal(FARGS,
3228 "lambda vector components in %s don't match those previously read",
3229 fn);
3232 else
3234 lambda_components_add(&(sd->lc), "", 0);
3236 if (!start_lambda.lc)
3238 lambda_vec_init(&start_lambda, &(sd->lc));
3240 start_lambda.val[0] = old_start_lambda;
3242 else
3244 /* read lambda vector */
3245 int n_lambda_vec;
3246 gmx_bool check = (sd->lc.N > 0);
3247 if (fr->block[i].nsub < 2)
3249 gmx_fatal(FARGS,
3250 "No lambda vector, but start_lambda=%f\n",
3251 old_start_lambda);
3253 n_lambda_vec = fr->block[i].sub[1].ival[1];
3254 for (j = 0; j < n_lambda_vec; j++)
3256 const char *name =
3257 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3258 if (check)
3260 /* check the components */
3261 lambda_components_check(&(sd->lc), j, name,
3262 std::strlen(name));
3264 else
3266 lambda_components_add(&(sd->lc), name,
3267 std::strlen(name));
3270 lambda_vec_init(&start_lambda, &(sd->lc));
3271 start_lambda.index = fr->block[i].sub[1].ival[0];
3272 for (j = 0; j < n_lambda_vec; j++)
3274 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3277 if (first_t < 0)
3279 first_t = start_time;
3284 if (nlam != 1)
3286 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3288 if (nblocks_raw > 0 && nblocks_hist > 0)
3290 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3293 if (nsamples == 0)
3295 /* this is the first round; allocate the associated data
3296 structures */
3297 /*native_lambda=start_lambda;*/
3298 lambda_vec_init(native_lambda, &(sd->lc));
3299 lambda_vec_copy(native_lambda, &start_lambda);
3300 nsamples = nblocks_raw+nblocks_hist;
3301 snew(nhists, nsamples);
3302 snew(npts, nsamples);
3303 snew(lambdas, nsamples);
3304 snew(samples_rawdh, nsamples);
3305 for (i = 0; i < nsamples; i++)
3307 nhists[i] = 0;
3308 npts[i] = 0;
3309 lambdas[i] = nullptr;
3310 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3311 ones contain values */
3314 else
3316 // nsamples > 0 means this is NOT the first iteration
3318 /* check the native lambda */
3319 if (!lambda_vec_same(&start_lambda, native_lambda) )
3321 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3322 fn, native_lambda, start_lambda, start_time);
3324 /* check the number of samples against the previous number */
3325 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3327 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3328 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3330 /* check whether last iterations's end time matches with
3331 the currrent start time */
3332 if ( (std::abs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3334 /* it didn't. We need to store our samples and reallocate */
3336 for (i = 0; i < nsamples; i++)
3338 // nsamples is always >0 here, so samples_rawdh must be a valid pointer. Unfortunately
3339 // cppcheck does not understand the logic unless the assert is inside the loop, but
3340 // this is not performance-sensitive code.
3341 GMX_RELEASE_ASSERT(samples_rawdh != nullptr, "samples_rawdh==NULL with nsamples>0");
3342 if (samples_rawdh[i])
3344 /* insert it into the existing list */
3345 lambda_data_list_insert_sample(sd->lb,
3346 samples_rawdh[i]);
3347 /* and make sure we'll allocate a new one this time
3348 around */
3349 samples_rawdh[i] = nullptr;
3355 /* and read them */
3356 k = 0; /* counter for the lambdas, etc. arrays */
3357 for (i = 0; i < fr->nblock; i++)
3359 if (fr->block[i].id == enxDH)
3361 int type = (fr->block[i].sub[0].ival[0]);
3362 if (type == dhbtDH || type == dhbtDHDL)
3364 int ndu;
3365 read_edr_rawdh_block(&(samples_rawdh[k]),
3366 &ndu,
3367 &(fr->block[i]),
3368 start_time, delta_time,
3369 native_lambda, rtemp,
3370 &last_t, fn);
3371 npts[k] += ndu;
3372 if (samples_rawdh[k])
3374 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3376 k++;
3379 else if (fr->block[i].id == enxDHHIST)
3381 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3382 if (type == dhbtDH || type == dhbtDHDL)
3384 int j;
3385 int nb = 0;
3386 samples_t *s; /* this is where the data will go */
3387 s = read_edr_hist_block(&nb, &(fr->block[i]),
3388 start_time, delta_time,
3389 native_lambda, rtemp,
3390 &last_t, fn);
3391 nhists[k] += nb;
3392 if (nb > 0)
3394 lambdas[k] = s->foreign_lambda;
3396 k++;
3397 /* and insert the new sample immediately */
3398 for (j = 0; j < nb; j++)
3400 lambda_data_list_insert_sample(sd->lb, s+j);
3406 /* Now store all our extant sample collections */
3407 for (i = 0; i < nsamples; i++)
3409 if (samples_rawdh[i])
3411 /* insert it into the existing list */
3412 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3418 char buf[STRLEN];
3419 printf("\n");
3420 lambda_vec_print(native_lambda, buf, FALSE);
3421 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3422 fn, first_t, last_t, buf);
3423 for (i = 0; i < nsamples; i++)
3425 if (lambdas[i])
3427 lambda_vec_print(lambdas[i], buf, TRUE);
3428 if (nhists[i] > 0)
3430 printf(" %s (%d hists)\n", buf, nhists[i]);
3432 else
3434 printf(" %s (%d pts)\n", buf, npts[i]);
3439 printf("\n\n");
3440 sfree(npts);
3441 sfree(nhists);
3442 sfree(lambdas);
3446 int gmx_bar(int argc, char *argv[])
3448 static const char *desc[] = {
3449 "[THISMODULE] calculates free energy difference estimates through ",
3450 "Bennett's acceptance ratio method (BAR). It also automatically",
3451 "adds series of individual free energies obtained with BAR into",
3452 "a combined free energy estimate.[PAR]",
3454 "Every individual BAR free energy difference relies on two ",
3455 "simulations at different states: say state A and state B, as",
3456 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3457 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3458 "average of the Hamiltonian difference of state B given state A and",
3459 "vice versa.",
3460 "The energy differences to the other state must be calculated",
3461 "explicitly during the simulation. This can be done with",
3462 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3464 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3465 "Two types of input files are supported:",
3467 " * Files with more than one [IT]y[it]-value. ",
3468 " The files should have columns ",
3469 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3470 " The [GRK]lambda[grk] values are inferred ",
3471 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3472 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3473 " legends of Delta H",
3474 " * Files with only one [IT]y[it]-value. Using the",
3475 " [TT]-extp[tt] option for these files, it is assumed",
3476 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3477 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3478 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3479 " subtitle (if present), otherwise from a number in the subdirectory ",
3480 " in the file name.",
3483 "The [GRK]lambda[grk] of the simulation is parsed from ",
3484 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3485 "foreign [GRK]lambda[grk] values from the legend containing the ",
3486 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3487 "the legend line containing 'T ='.[PAR]",
3489 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3490 "These can contain either lists of energy differences (see the ",
3491 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3492 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3493 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3494 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3496 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3497 "the energy difference can also be extrapolated from the ",
3498 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3499 "option, which assumes that the system's Hamiltonian depends linearly",
3500 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3502 "The free energy estimates are determined using BAR with bisection, ",
3503 "with the precision of the output set with [TT]-prec[tt]. ",
3504 "An error estimate taking into account time correlations ",
3505 "is made by splitting the data into blocks and determining ",
3506 "the free energy differences over those blocks and assuming ",
3507 "the blocks are independent. ",
3508 "The final error estimate is determined from the average variance ",
3509 "over 5 blocks. A range of block numbers for error estimation can ",
3510 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3512 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3513 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3514 "samples. [BB]Note[bb] that when aggregating energy ",
3515 "differences/derivatives with different sampling intervals, this is ",
3516 "almost certainly not correct. Usually subsequent energies are ",
3517 "correlated and different time intervals mean different degrees ",
3518 "of correlation between samples.[PAR]",
3520 "The results are split in two parts: the last part contains the final ",
3521 "results in kJ/mol, together with the error estimate for each part ",
3522 "and the total. The first part contains detailed free energy ",
3523 "difference estimates and phase space overlap measures in units of ",
3524 "kT (together with their computed error estimate). The printed ",
3525 "values are:",
3527 " * lam_A: the [GRK]lambda[grk] values for point A.",
3528 " * lam_B: the [GRK]lambda[grk] values for point B.",
3529 " * DG: the free energy estimate.",
3530 " * s_A: an estimate of the relative entropy of B in A.",
3531 " * s_B: an estimate of the relative entropy of A in B.",
3532 " * stdev: an estimate expected per-sample standard deviation.",
3535 "The relative entropy of both states in each other's ensemble can be ",
3536 "interpreted as a measure of phase space overlap: ",
3537 "the relative entropy s_A of the work samples of lambda_B in the ",
3538 "ensemble of lambda_A (and vice versa for s_B), is a ",
3539 "measure of the 'distance' between Boltzmann distributions of ",
3540 "the two states, that goes to zero for identical distributions. See ",
3541 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3542 "[PAR]",
3543 "The estimate of the expected per-sample standard deviation, as given ",
3544 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3545 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3546 "of the actual statistical error, because it assumes independent samples).[PAR]",
3548 "To get a visual estimate of the phase space overlap, use the ",
3549 "[TT]-oh[tt] option to write series of histograms, together with the ",
3550 "[TT]-nbin[tt] option.[PAR]"
3552 static real begin = 0, end = -1, temp = -1;
3553 int nd = 2, nbmin = 5, nbmax = 5;
3554 int nbin = 100;
3555 gmx_bool use_dhdl = FALSE;
3556 t_pargs pa[] = {
3557 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3558 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3559 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3560 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3561 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3562 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3563 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3564 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3567 t_filenm fnm[] = {
3568 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3569 { efEDR, "-g", "ener", ffOPTRDMULT },
3570 { efXVG, "-o", "bar", ffOPTWR },
3571 { efXVG, "-oi", "barint", ffOPTWR },
3572 { efXVG, "-oh", "histogram", ffOPTWR }
3574 #define NFILE asize(fnm)
3576 int f;
3577 int nf = 0; /* file counter */
3578 int nfile_tot; /* total number of input files */
3579 sim_data_t sim_data; /* the simulation data */
3580 barres_t *results; /* the results */
3581 int nresults; /* number of results in results array */
3583 double *partsum;
3584 double prec, dg_tot;
3585 FILE *fpb, *fpi;
3586 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3587 char buf[STRLEN], buf2[STRLEN];
3588 char ktformat[STRLEN], sktformat[STRLEN];
3589 char kteformat[STRLEN], skteformat[STRLEN];
3590 gmx_output_env_t *oenv;
3591 double kT;
3592 gmx_bool result_OK = TRUE, bEE = TRUE;
3594 gmx_bool disc_err = FALSE;
3595 double sum_disc_err = 0.; /* discretization error */
3596 gmx_bool histrange_err = FALSE;
3597 double sum_histrange_err = 0.; /* histogram range error */
3598 double stat_err = 0.; /* statistical error */
3600 if (!parse_common_args(&argc, argv,
3601 PCA_CAN_VIEW,
3602 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3604 return 0;
3607 gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3608 gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3610 sim_data_init(&sim_data);
3611 #if 0
3612 /* make linked list */
3613 lb = &lambda_head;
3614 lambda_data_init(lb, 0, 0);
3615 lb->next = lb;
3616 lb->prev = lb;
3617 #endif
3620 nfile_tot = xvgFiles.size() + edrFiles.size();
3622 if (nfile_tot == 0)
3624 gmx_fatal(FARGS, "No input files!");
3627 if (nd < 0)
3629 gmx_fatal(FARGS, "Can not have negative number of digits");
3631 prec = std::pow(10.0, static_cast<double>(-nd));
3633 snew(partsum, (nbmax+1)*(nbmax+1));
3634 nf = 0;
3636 /* read in all files. First xvg files */
3637 for (const std::string &filenm : xvgFiles)
3639 read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3640 nf++;
3642 /* then .edr files */
3643 for (const std::string &filenm : edrFiles)
3645 read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3646 nf++;
3649 /* fix the times to allow for equilibration */
3650 sim_data_impose_times(&sim_data, begin, end);
3652 if (opt2bSet("-oh", NFILE, fnm))
3654 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3657 /* assemble the output structures from the lambdas */
3658 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3660 sum_disc_err = barres_list_max_disc_err(results, nresults);
3662 if (nresults == 0)
3664 printf("\nNo results to calculate.\n");
3665 return 0;
3668 if (sum_disc_err > prec)
3670 prec = sum_disc_err;
3671 nd = static_cast<int>(std::ceil(-std::log10(prec)));
3672 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3676 /*sprintf(lamformat,"%%6.3f");*/
3677 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3678 /* the format strings of the results in kT */
3679 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3680 sprintf( sktformat, "%%%ds", 6+nd);
3681 /* the format strings of the errors in kT */
3682 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3683 sprintf( skteformat, "%%%ds", 4+nd);
3684 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3685 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3689 fpb = nullptr;
3690 if (opt2bSet("-o", NFILE, fnm))
3692 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3693 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3694 "\\lambda", buf, exvggtXYDY, oenv);
3697 fpi = nullptr;
3698 if (opt2bSet("-oi", NFILE, fnm))
3700 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3701 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3702 "\\lambda", buf, oenv);
3707 if (nbmin > nbmax)
3709 nbmin = nbmax;
3712 /* first calculate results */
3713 bEE = TRUE;
3714 disc_err = FALSE;
3715 for (f = 0; f < nresults; f++)
3717 /* Determine the free energy difference with a factor of 10
3718 * more accuracy than requested for printing.
3720 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3721 &bEE, partsum);
3723 if (results[f].dg_disc_err > prec/10.)
3725 disc_err = TRUE;
3727 if (results[f].dg_histrange_err > prec/10.)
3729 histrange_err = TRUE;
3733 /* print results in kT */
3734 kT = BOLTZ*temp;
3736 printf("\nTemperature: %g K\n", temp);
3738 printf("\nDetailed results in kT (see help for explanation):\n\n");
3739 printf("%6s ", " lam_A");
3740 printf("%6s ", " lam_B");
3741 printf(sktformat, "DG ");
3742 if (bEE)
3744 printf(skteformat, "+/- ");
3746 if (disc_err)
3748 printf(skteformat, "disc ");
3750 if (histrange_err)
3752 printf(skteformat, "range ");
3754 printf(sktformat, "s_A ");
3755 if (bEE)
3757 printf(skteformat, "+/- " );
3759 printf(sktformat, "s_B ");
3760 if (bEE)
3762 printf(skteformat, "+/- " );
3764 printf(sktformat, "stdev ");
3765 if (bEE)
3767 printf(skteformat, "+/- ");
3769 printf("\n");
3770 for (f = 0; f < nresults; f++)
3772 lambda_vec_print_short(results[f].a->native_lambda, buf);
3773 printf("%s ", buf);
3774 lambda_vec_print_short(results[f].b->native_lambda, buf);
3775 printf("%s ", buf);
3776 printf(ktformat, results[f].dg);
3777 printf(" ");
3778 if (bEE)
3780 printf(kteformat, results[f].dg_err);
3781 printf(" ");
3783 if (disc_err)
3785 printf(kteformat, results[f].dg_disc_err);
3786 printf(" ");
3788 if (histrange_err)
3790 printf(kteformat, results[f].dg_histrange_err);
3791 printf(" ");
3793 printf(ktformat, results[f].sa);
3794 printf(" ");
3795 if (bEE)
3797 printf(kteformat, results[f].sa_err);
3798 printf(" ");
3800 printf(ktformat, results[f].sb);
3801 printf(" ");
3802 if (bEE)
3804 printf(kteformat, results[f].sb_err);
3805 printf(" ");
3807 printf(ktformat, results[f].dg_stddev);
3808 printf(" ");
3809 if (bEE)
3811 printf(kteformat, results[f].dg_stddev_err);
3813 printf("\n");
3815 /* Check for negative relative entropy with a 95% certainty. */
3816 if (results[f].sa < -2*results[f].sa_err ||
3817 results[f].sb < -2*results[f].sb_err)
3819 result_OK = FALSE;
3823 if (!result_OK)
3825 printf("\nWARNING: Some of these results violate the Second Law of "
3826 "Thermodynamics: \n"
3827 " This is can be the result of severe undersampling, or "
3828 "(more likely)\n"
3829 " there is something wrong with the simulations.\n");
3833 /* final results in kJ/mol */
3834 printf("\n\nFinal results in kJ/mol:\n\n");
3835 dg_tot = 0;
3836 for (f = 0; f < nresults; f++)
3839 if (fpi != nullptr)
3841 lambda_vec_print_short(results[f].a->native_lambda, buf);
3842 fprintf(fpi, xvg2format, buf, dg_tot);
3846 if (fpb != nullptr)
3848 lambda_vec_print_intermediate(results[f].a->native_lambda,
3849 results[f].b->native_lambda,
3850 buf);
3852 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3855 printf("point ");
3856 lambda_vec_print_short(results[f].a->native_lambda, buf);
3857 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3858 printf("%s - %s", buf, buf2);
3859 printf(", DG ");
3861 printf(dgformat, results[f].dg*kT);
3862 if (bEE)
3864 printf(" +/- ");
3865 printf(dgformat, results[f].dg_err*kT);
3867 if (histrange_err)
3869 printf(" (max. range err. = ");
3870 printf(dgformat, results[f].dg_histrange_err*kT);
3871 printf(")");
3872 sum_histrange_err += results[f].dg_histrange_err*kT;
3875 printf("\n");
3876 dg_tot += results[f].dg;
3878 printf("\n");
3879 printf("total ");
3880 lambda_vec_print_short(results[0].a->native_lambda, buf);
3881 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3882 printf("%s - %s", buf, buf2);
3883 printf(", DG ");
3885 printf(dgformat, dg_tot*kT);
3886 if (bEE)
3888 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3889 printf(" +/- ");
3890 printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3892 printf("\n");
3893 if (disc_err)
3895 printf("\nmaximum discretization error = ");
3896 printf(dgformat, sum_disc_err);
3897 if (bEE && stat_err < sum_disc_err)
3899 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3902 if (histrange_err)
3904 printf("\nmaximum histogram range error = ");
3905 printf(dgformat, sum_histrange_err);
3906 if (bEE && stat_err < sum_histrange_err)
3908 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3912 printf("\n");
3915 if (fpi != nullptr)
3917 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3918 fprintf(fpi, xvg2format, buf, dg_tot);
3919 xvgrclose(fpi);
3921 if (fpb != nullptr)
3923 xvgrclose(fpb);
3926 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3927 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3929 return 0;