src/gromacs/gmxana/gmx_bar.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <cctype>
  38 #include <cmath>
  39 #include <cstdlib>
  40 #include <cstring>
  41
  42 #include <algorithm>
  43 #include <limits>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/commandline/viewit.h"
  47 #include "gromacs/fileio/enxio.h"
  48 #include "gromacs/fileio/xvgr.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/math/units.h"
  51 #include "gromacs/math/utilities.h"
  52 #include "gromacs/mdlib/mdebin.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/utility/arraysize.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "gromacs/utility/dir_separator.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/smalloc.h"
  60 #include "gromacs/utility/snprintf.h"
  61
  62
  63 /* Structure for the names of lambda vector components */
  64 typedef struct lambda_components_t
  65 {
  66     char **names;  /* Array of strings with names for the lambda vector
  67                       components */
  68     int    N;      /* The number of components */
  69     int    Nalloc; /* The number of allocated components */
  70 } lambda_components_t;
  71
  72 /* Structure for a lambda vector or a dhdl derivative direction */
  73 typedef struct lambda_vec_t
  74 {
  75     double                    *val;   /* The lambda vector component values. Only valid if
  76                                          dhdl == -1 */
  77     int                        dhdl;  /* The coordinate index for the derivative described by this
  78                                          structure, or -1 */
  79     const lambda_components_t *lc;    /* the associated lambda_components
  80                                          structure */
  81     int                        index; /* The state number (init-lambda-state) of this lambda
  82                                          vector, if known. If not, it is set to -1 */
  83 } lambda_vec_t;
  84
  85 /* the dhdl.xvg data from a simulation */
  86 typedef struct xvg_t
  87 {
  88     const char   *filename;
  89     int           ftp;           /* file type */
  90     int           nset;          /* number of lambdas, including dhdl */
  91     int          *np;            /* number of data points (du or hists) per lambda */
  92     int           np_alloc;      /* number of points (du or hists) allocated */
  93     double        temp;          /* temperature */
  94     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  95     double       *t;             /* the times (of second index for y) */
  96     double      **y;             /* the dU values. y[0] holds the derivative, while
  97                                     further ones contain the energy differences between
  98                                     the native lambda and the 'foreign' lambdas. */
  99     lambda_vec_t  native_lambda; /* the native lambda */
 100
 101     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 102 } xvg_t;
 103
 104
 105 typedef struct hist_t
 106 {
 107     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 108     double          dx[2];                  /* the histogram spacing. The reverse
 109                                                dx is the negative of the forward dx.*/
 110     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 111                                                    point(s) as int */
 112
 113     int             nbin[2];                /* the (forward+reverse) number of bins */
 114     gmx_int64_t     sum;                    /* the total number of counts. Must be
 115                                                    the same for forward + reverse.  */
 116     int             nhist;                  /* number of hist datas (forward or reverse) */
 117
 118     double          start_time, delta_time; /* start time, end time of histogram */
 119 } hist_t;
 120
 121
 122 /* an aggregate of samples for partial free energy calculation */
 123 typedef struct samples_t
 124 {
 125     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 126     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 127     double        temp;           /* the temperature */
 128     gmx_bool      derivative;     /* whether this sample is a derivative */
 129
 130     /* The samples come either as either delta U lists: */
 131     int     ndu;                    /* the number of delta U samples */
 132     double *du;                     /* the delta u's */
 133     double *t;                      /* the times associated with those samples, or: */
 134     double  start_time, delta_time; /*start time and delta time for linear time*/
 135
 136     /* or as histograms: */
 137     hist_t *hist; /* a histogram */
 138
 139     /* allocation data: (not NULL for data 'owned' by this struct) */
 140     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 141     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 142     hist_t         *hist_alloc;          /* allocated hist */
 143
 144     gmx_int64_t     ntot;                /* total number of samples */
 145     const char     *filename;            /* the file name this sample comes from */
 146 } samples_t;
 147
 148 /* a sample range (start to end for du-style data, or boolean
 149     for both du-style data and histograms */
 150 typedef struct sample_range_t
 151 {
 152     int        start, end; /* start and end index for du style data */
 153     gmx_bool   use;        /* whether to use this sample */
 154
 155     samples_t *s;          /* the samples this range belongs to */
 156 } sample_range_t;
 157
 158
 159 /* a collection of samples for a partial free energy calculation
 160     (i.e. the collection of samples from one native lambda to one
 161     foreign lambda) */
 162 typedef struct sample_coll_t
 163 {
 164     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 165                                           in the histogram */
 166     lambda_vec_t   *foreign_lambda;    /* collection */
 167     double          temp;              /* the temperature */
 168
 169     int             nsamples;          /* the number of samples */
 170     samples_t     **s;                 /* the samples themselves */
 171     sample_range_t *r;                 /* the sample ranges */
 172     int             nsamples_alloc;    /* number of allocated samples */
 173
 174     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 175                                               this collection */
 176
 177     struct sample_coll_t *next, *prev; /* next and previous in the list */
 178 } sample_coll_t;
 179
 180 /* all the samples associated with a lambda point */
 181 typedef struct lambda_data_t
 182 {
 183     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 184     double                temp;        /* temperature */
 185
 186     sample_coll_t        *sc;          /* the samples */
 187
 188     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 189
 190     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 191 } lambda_data_t;
 192
 193 /* Top-level data structure of simulation data */
 194 typedef struct sim_data_t
 195 {
 196     lambda_data_t      *lb;      /* a lambda data linked list */
 197     lambda_data_t       lb_head; /* The head element of the linked list */
 198
 199     lambda_components_t lc;      /* the allowed components of the lambda
 200                                     vectors */
 201 } sim_data_t;
 202
 203 /* Top-level data structure with calculated values. */
 204 typedef struct {
 205     sample_coll_t *a, *b;            /* the simulation data */
 206
 207     double         dg;               /* the free energy difference */
 208     double         dg_err;           /* the free energy difference */
 209
 210     double         dg_disc_err;      /* discretization error */
 211     double         dg_histrange_err; /* histogram range error */
 212
 213     double         sa;               /* relative entropy of b in state a */
 214     double         sa_err;           /* error in sa */
 215     double         sb;               /* relative entropy of a in state b */
 216     double         sb_err;           /* error in sb */
 217
 218     double         dg_stddev;        /* expected dg stddev per sample */
 219     double         dg_stddev_err;    /* error in dg_stddev */
 220 } barres_t;
 221
 222
 223 /* Initialize a lambda_components structure */
 224 static void lambda_components_init(lambda_components_t *lc)
 225 {
 226     lc->N      = 0;
 227     lc->Nalloc = 2;
 228     snew(lc->names, lc->Nalloc);
 229 }
 230
 231 /* Add a component to a lambda_components structure */
 232 static void lambda_components_add(lambda_components_t *lc,
 233                                   const char *name, size_t name_length)
 234 {
 235     while (lc->N + 1 > lc->Nalloc)
 236     {
 237         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 238         srenew(lc->names, lc->Nalloc);
 239     }
 240     snew(lc->names[lc->N], name_length+1);
 241     std::strncpy(lc->names[lc->N], name, name_length);
 242     lc->N++;
 243 }
 244
 245 /* check whether a component with index 'index' matches the given name, or
 246    is also NULL. Returns TRUE if this is the case.
 247    the string name does not need to end */
 248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 249                                         int                        index,
 250                                         const char                *name,
 251                                         size_t                     name_length)
 252 {
 253     size_t len;
 254     if (index >= lc->N)
 255     {
 256         return FALSE;
 257     }
 258     if (name == nullptr && lc->names[index] == nullptr)
 259     {
 260         return TRUE;
 261     }
 262     if ((name == nullptr) != (lc->names[index] == nullptr))
 263     {
 264         return FALSE;
 265     }
 266     len = std::strlen(lc->names[index]);
 267     if (len != name_length)
 268     {
 269         return FALSE;
 270     }
 271     if (std::strncmp(lc->names[index], name, name_length) == 0)
 272     {
 273         return TRUE;
 274     }
 275     return FALSE;
 276 }
 277
 278 /* Find the index of a given lambda component name, or -1 if not found */
 279 static int lambda_components_find(const lambda_components_t *lc,
 280                                   const char                *name,
 281                                   size_t                     name_length)
 282 {
 283     int i;
 284
 285     for (i = 0; i < lc->N; i++)
 286     {
 287         if (std::strncmp(lc->names[i], name, name_length) == 0)
 288         {
 289             return i;
 290         }
 291     }
 292     return -1;
 293 }
 294
 295
 296
 297 /* initialize a lambda vector */
 298 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 299 {
 300     snew(lv->val, lc->N);
 301     lv->index = -1;
 302     lv->dhdl  = -1;
 303     lv->lc    = lc;
 304 }
 305
 306 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 307 {
 308     int i;
 309
 310     lambda_vec_init(lv, orig->lc);
 311     lv->dhdl  = orig->dhdl;
 312     lv->index = orig->index;
 313     for (i = 0; i < lv->lc->N; i++)
 314     {
 315         lv->val[i] = orig->val[i];
 316     }
 317 }
 318
 319 /* write a lambda vec to a preallocated string */
 320 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 321 {
 322     int    i;
 323
 324     str[0] = 0; /* reset the string */
 325     if (lv->dhdl < 0)
 326     {
 327         if (named)
 328         {
 329             str += sprintf(str, "delta H to ");
 330         }
 331         if (lv->lc->N > 1)
 332         {
 333             str += sprintf(str, "(");
 334         }
 335         for (i = 0; i < lv->lc->N; i++)
 336         {
 337             str += sprintf(str, "%g", lv->val[i]);
 338             if (i < lv->lc->N-1)
 339             {
 340                 str += sprintf(str, ", ");
 341             }
 342         }
 343         if (lv->lc->N > 1)
 344         {
 345             sprintf(str, ")");
 346         }
 347     }
 348     else
 349     {
 350         /* this lambda vector describes a derivative */
 351         str += sprintf(str, "dH/dl");
 352         if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
 353         {
 354             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 355         }
 356     }
 357 }
 358
 359 /* write a shortened version of the lambda vec to a preallocated string */
 360 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 361 {
 362     if (lv->index >= 0)
 363     {
 364         sprintf(str, "%6d", lv->index);
 365     }
 366     else
 367     {
 368         if (lv->dhdl < 0)
 369         {
 370             sprintf(str, "%6.3f", lv->val[0]);
 371         }
 372         else
 373         {
 374             sprintf(str, "dH/dl[%d]", lv->dhdl);
 375         }
 376     }
 377 }
 378
 379 /* write an intermediate version of two lambda vecs to a preallocated string */
 380 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 381                                           const lambda_vec_t *b, char *str)
 382 {
 383     str[0] = 0;
 384     if ( (a->index >= 0) && (b->index >= 0) )
 385     {
 386         sprintf(str, "%6.3f", (a->index+b->index)/2.0);
 387     }
 388     else
 389     {
 390         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 391         {
 392             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
 393         }
 394     }
 395 }
 396
 397
 398 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 399    a and b must describe non-derivative lambda points */
 400 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 401 {
 402     int    i;
 403     double ret = 0.;
 404
 405     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 406     {
 407         gmx_fatal(FARGS,
 408                   "Trying to calculate the difference between derivatives instead of lambda points");
 409     }
 410     if (a->lc != b->lc)
 411     {
 412         gmx_fatal(FARGS,
 413                   "Trying to calculate the difference lambdas with differing basis set");
 414     }
 415     for (i = 0; i < a->lc->N; i++)
 416     {
 417         double df = a->val[i] - b->val[i];
 418         ret += df*df;
 419     }
 420     return std::sqrt(ret);
 421 }
 422
 423
 424 /* check whether two lambda vectors are the same */
 425 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 426 {
 427     int i;
 428
 429     if (a->lc != b->lc)
 430     {
 431         return FALSE;
 432     }
 433     if (a->dhdl < 0)
 434     {
 435         for (i = 0; i < a->lc->N; i++)
 436         {
 437             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 438             {
 439                 return FALSE;
 440             }
 441         }
 442         return TRUE;
 443     }
 444     else
 445     {
 446         /* they're derivatives, so we check whether the indices match */
 447         return (a->dhdl == b->dhdl);
 448     }
 449 }
 450
 451 /* Compare the sort order of two foreign lambda vectors
 452
 453     returns 1 if a is 'bigger' than b,
 454     returns 0 if they're the same,
 455     returns -1 if a is 'smaller' than b.*/
 456 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 457                                        const lambda_vec_t *b)
 458 {
 459     int      i;
 460     double   norm_a    = 0, norm_b = 0;
 461     gmx_bool different = FALSE;
 462
 463     if (a->lc != b->lc)
 464     {
 465         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 466     }
 467     /* if either one has an index we sort based on that */
 468     if ((a->index >= 0) || (b->index >= 0))
 469     {
 470         if (a->index == b->index)
 471         {
 472             return 0;
 473         }
 474         return (a->index > b->index) ? 1 : -1;
 475     }
 476     if (a->dhdl >= 0 || b->dhdl >= 0)
 477     {
 478         /* lambda vectors that are derivatives always sort higher than those
 479            without derivatives */
 480         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 481         {
 482             return (a->dhdl >= 0) ? 1 : -1;
 483         }
 484         return a->dhdl > b->dhdl;
 485     }
 486
 487     /* neither has an index, so we can only sort on the lambda components,
 488        which is only valid if there is one component */
 489     for (i = 0; i < a->lc->N; i++)
 490     {
 491         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 492         {
 493             different = TRUE;
 494         }
 495         norm_a += a->val[i]*a->val[i];
 496         norm_b += b->val[i]*b->val[i];
 497     }
 498     if (!different)
 499     {
 500         return 0;
 501     }
 502     return norm_a > norm_b;
 503 }
 504
 505 /* Compare the sort order of two native lambda vectors
 506
 507     returns 1 if a is 'bigger' than b,
 508     returns 0 if they're the same,
 509     returns -1 if a is 'smaller' than b.*/
 510 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 511                                       const lambda_vec_t *b)
 512 {
 513     if (a->lc != b->lc)
 514     {
 515         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 516     }
 517     /* if either one has an index we sort based on that */
 518     if ((a->index >= 0) || (b->index >= 0))
 519     {
 520         if (a->index == b->index)
 521         {
 522             return 0;
 523         }
 524         return (a->index > b->index) ? 1 : -1;
 525     }
 526     /* neither has an index, so we can only sort on the lambda components,
 527        which is only valid if there is one component */
 528     if (a->lc->N > 1)
 529     {
 530         gmx_fatal(FARGS,
 531                   "Can't compare lambdas with no index and > 1 component");
 532     }
 533     if (a->dhdl >= 0 || b->dhdl >= 0)
 534     {
 535         gmx_fatal(FARGS,
 536                   "Can't compare native lambdas that are derivatives");
 537     }
 538     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 539     {
 540         return 0;
 541     }
 542     return a->val[0] > b->val[0] ? 1 : -1;
 543 }
 544
 545
 546
 547
 548 static void hist_init(hist_t *h, int nhist, int *nbin)
 549 {
 550     int i;
 551     if (nhist > 2)
 552     {
 553         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 554     }
 555     for (i = 0; i < nhist; i++)
 556     {
 557         snew(h->bin[i], nbin[i]);
 558         h->x0[i]      = 0;
 559         h->nbin[i]    = nbin[i];
 560         h->start_time = h->delta_time = 0;
 561         h->dx[i]      = 0;
 562     }
 563     h->sum   = 0;
 564     h->nhist = nhist;
 565 }
 566
 567 static void xvg_init(xvg_t *ba)
 568 {
 569     ba->filename = nullptr;
 570     ba->nset     = 0;
 571     ba->np_alloc = 0;
 572     ba->np       = nullptr;
 573     ba->y        = nullptr;
 574 }
 575
 576 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 577                          lambda_vec_t *foreign_lambda, double temp,
 578                          gmx_bool derivative, const char *filename)
 579 {
 580     s->native_lambda  = native_lambda;
 581     s->foreign_lambda = foreign_lambda;
 582     s->temp           = temp;
 583     s->derivative     = derivative;
 584
 585     s->ndu        = 0;
 586     s->du         = nullptr;
 587     s->t          = nullptr;
 588     s->start_time = s->delta_time = 0;
 589     s->hist       = nullptr;
 590     s->du_alloc   = nullptr;
 591     s->t_alloc    = nullptr;
 592     s->hist_alloc = nullptr;
 593     s->ndu_alloc  = 0;
 594     s->nt_alloc   = 0;
 595
 596     s->ntot     = 0;
 597     s->filename = filename;
 598 }
 599
 600 static void sample_range_init(sample_range_t *r, samples_t *s)
 601 {
 602     r->start = 0;
 603     r->end   = s->ndu;
 604     r->use   = TRUE;
 605     r->s     = nullptr;
 606 }
 607
 608 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 609                              lambda_vec_t *foreign_lambda, double temp)
 610 {
 611     sc->native_lambda  = native_lambda;
 612     sc->foreign_lambda = foreign_lambda;
 613     sc->temp           = temp;
 614
 615     sc->nsamples       = 0;
 616     sc->s              = nullptr;
 617     sc->r              = nullptr;
 618     sc->nsamples_alloc = 0;
 619
 620     sc->ntot = 0;
 621     sc->next = sc->prev = nullptr;
 622 }
 623
 624 static void sample_coll_destroy(sample_coll_t *sc)
 625 {
 626     /* don't free the samples themselves */
 627     sfree(sc->r);
 628     sfree(sc->s);
 629 }
 630
 631
 632 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 633                              double temp)
 634 {
 635     l->lambda = native_lambda;
 636     l->temp   = temp;
 637
 638     l->next = nullptr;
 639     l->prev = nullptr;
 640
 641     l->sc = &(l->sc_head);
 642
 643     sample_coll_init(l->sc, native_lambda, nullptr, 0.);
 644     l->sc->next = l->sc;
 645     l->sc->prev = l->sc;
 646 }
 647
 648 static void barres_init(barres_t *br)
 649 {
 650     br->dg            = 0;
 651     br->dg_err        = 0;
 652     br->sa            = 0;
 653     br->sa_err        = 0;
 654     br->sb            = 0;
 655     br->sb_err        = 0;
 656     br->dg_stddev     = 0;
 657     br->dg_stddev_err = 0;
 658
 659     br->a = nullptr;
 660     br->b = nullptr;
 661 }
 662
 663
 664 /* calculate the total number of samples in a sample collection */
 665 static void sample_coll_calc_ntot(sample_coll_t *sc)
 666 {
 667     int i;
 668
 669     sc->ntot = 0;
 670     for (i = 0; i < sc->nsamples; i++)
 671     {
 672         if (sc->r[i].use)
 673         {
 674             if (sc->s[i]->hist)
 675             {
 676                 sc->ntot += sc->s[i]->ntot;
 677             }
 678             else
 679             {
 680                 sc->ntot += sc->r[i].end - sc->r[i].start;
 681             }
 682         }
 683     }
 684 }
 685
 686
 687 /* find the barsamples_t associated with a lambda that corresponds to
 688    a specific foreign lambda */
 689 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 690                                                    lambda_vec_t  *foreign_lambda)
 691 {
 692     sample_coll_t *sc = l->sc->next;
 693
 694     while (sc != l->sc)
 695     {
 696         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 697         {
 698             return sc;
 699         }
 700         sc = sc->next;
 701     }
 702
 703     return nullptr;
 704 }
 705
 706 /* insert li into an ordered list of lambda_colls */
 707 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 708 {
 709     sample_coll_t *scn = l->sc->next;
 710     while ( (scn != l->sc) )
 711     {
 712         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 713         {
 714             break;
 715         }
 716         scn = scn->next;
 717     }
 718     /* now insert it before the found scn */
 719     sc->next        = scn;
 720     sc->prev        = scn->prev;
 721     scn->prev->next = sc;
 722     scn->prev       = sc;
 723 }
 724
 725 /* insert li into an ordered list of lambdas */
 726 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 727 {
 728     lambda_data_t *lc = head->next;
 729     while (lc != head)
 730     {
 731         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 732         {
 733             break;
 734         }
 735         lc = lc->next;
 736     }
 737     /* now insert ourselves before the found lc */
 738     li->next       = lc;
 739     li->prev       = lc->prev;
 740     lc->prev->next = li;
 741     lc->prev       = li;
 742 }
 743
 744 /* insert a sample and a sample_range into a sample_coll. The
 745     samples are stored as a pointer, the range is copied. */
 746 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 747                                       sample_range_t *r)
 748 {
 749     /* first check if it belongs here */
 750     if (sc->temp != s->temp)
 751     {
 752         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 753                   s->filename, sc->next->s[0]->filename);
 754     }
 755     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 756     {
 757         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 758                   s->filename, sc->next->s[0]->filename);
 759     }
 760     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 761     {
 762         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 763                   s->filename, sc->next->s[0]->filename);
 764     }
 765
 766     /* check if there's room */
 767     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 768     {
 769         sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
 770         srenew(sc->s, sc->nsamples_alloc);
 771         srenew(sc->r, sc->nsamples_alloc);
 772     }
 773     sc->s[sc->nsamples] = s;
 774     sc->r[sc->nsamples] = *r;
 775     sc->nsamples++;
 776
 777     sample_coll_calc_ntot(sc);
 778 }
 779
 780 /* insert a sample into a lambda_list, creating the right sample_coll if
 781    neccesary */
 782 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 783 {
 784     gmx_bool       found = FALSE;
 785     sample_coll_t *sc;
 786     sample_range_t r;
 787
 788     lambda_data_t *l = head->next;
 789
 790     /* first search for the right lambda_data_t */
 791     while (l != head)
 792     {
 793         if (lambda_vec_same(l->lambda, s->native_lambda) )
 794         {
 795             found = TRUE;
 796             break;
 797         }
 798         l = l->next;
 799     }
 800
 801     if (!found)
 802     {
 803         snew(l, 1);                                     /* allocate a new one */
 804         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 805         lambda_data_insert_lambda(head, l);             /* add it to the list */
 806     }
 807
 808     /* now look for a sample collection */
 809     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 810     if (!sc)
 811     {
 812         snew(sc, 1); /* allocate a new one */
 813         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 814         lambda_data_insert_sample_coll(l, sc);
 815     }
 816
 817     /* now insert the samples into the sample coll */
 818     sample_range_init(&r, s);
 819     sample_coll_insert_sample(sc, s, &r);
 820 }
 821
 822
 823 /* make a histogram out of a sample collection */
 824 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 825                                   int *nbin_alloc, int *nbin,
 826                                   double *dx, double *xmin, int nbin_default)
 827 {
 828     int      i, j, k;
 829     gmx_bool dx_set   = FALSE;
 830     gmx_bool xmin_set = FALSE;
 831
 832     gmx_bool xmax_set      = FALSE;
 833     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 834                                        limits of a histogram */
 835     double   xmax          = -1;
 836
 837     /* first determine dx and xmin; try the histograms */
 838     for (i = 0; i < sc->nsamples; i++)
 839     {
 840         if (sc->s[i]->hist)
 841         {
 842             hist_t *hist = sc->s[i]->hist;
 843             for (k = 0; k < hist->nhist; k++)
 844             {
 845                 double hdx      = hist->dx[k];
 846                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 847
 848                 /* we use the biggest dx*/
 849                 if ( (!dx_set) || hist->dx[0] > *dx)
 850                 {
 851                     dx_set = TRUE;
 852                     *dx    = hist->dx[0];
 853                 }
 854                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 855                 {
 856                     xmin_set = TRUE;
 857                     *xmin    = (hist->x0[k]*hdx);
 858                 }
 859
 860                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 861                 {
 862                     xmax_set = TRUE;
 863                     xmax     = xmax_now;
 864                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 865                     {
 866                         xmax_set_hard = TRUE;
 867                     }
 868                 }
 869                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 870                 {
 871                     xmax_set_hard = TRUE;
 872                     xmax          = xmax_now;
 873                 }
 874             }
 875         }
 876     }
 877     /* and the delta us */
 878     for (i = 0; i < sc->nsamples; i++)
 879     {
 880         if (sc->s[i]->ndu > 0)
 881         {
 882             /* determine min and max */
 883             int    starti  = sc->r[i].start;
 884             int    endi    = sc->r[i].end;
 885             double du_xmin = sc->s[i]->du[starti];
 886             double du_xmax = sc->s[i]->du[starti];
 887             for (j = starti+1; j < endi; j++)
 888             {
 889                 if (sc->s[i]->du[j] < du_xmin)
 890                 {
 891                     du_xmin = sc->s[i]->du[j];
 892                 }
 893                 if (sc->s[i]->du[j] > du_xmax)
 894                 {
 895                     du_xmax = sc->s[i]->du[j];
 896                 }
 897             }
 898
 899             /* and now change the limits */
 900             if ( (!xmin_set) || (du_xmin < *xmin) )
 901             {
 902                 xmin_set = TRUE;
 903                 *xmin    = du_xmin;
 904             }
 905             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 906             {
 907                 xmax_set = TRUE;
 908                 xmax     = du_xmax;
 909             }
 910         }
 911     }
 912
 913     if (!xmax_set || !xmin_set)
 914     {
 915         *nbin = 0;
 916         return;
 917     }
 918
 919
 920     if (!dx_set)
 921     {
 922         *nbin = nbin_default;
 923         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 924                                                be 0, and we count from 0 */
 925     }
 926     else
 927     {
 928         *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
 929     }
 930
 931     if (*nbin > *nbin_alloc)
 932     {
 933         *nbin_alloc = *nbin;
 934         srenew(*bin, *nbin_alloc);
 935     }
 936
 937     /* reset the histogram */
 938     for (i = 0; i < (*nbin); i++)
 939     {
 940         (*bin)[i] = 0;
 941     }
 942
 943     /* now add the actual data */
 944     for (i = 0; i < sc->nsamples; i++)
 945     {
 946         if (sc->s[i]->hist)
 947         {
 948             hist_t *hist = sc->s[i]->hist;
 949             for (k = 0; k < hist->nhist; k++)
 950             {
 951                 double hdx       = hist->dx[k];
 952                 double xmin_hist = hist->x0[k]*hdx;
 953                 for (j = 0; j < hist->nbin[k]; j++)
 954                 {
 955                     /* calculate the bin corresponding to the middle of the
 956                        original bin */
 957                     double x     = hdx*(j+0.5) + xmin_hist;
 958                     int    binnr = static_cast<int>((x-(*xmin))/(*dx));
 959
 960                     if (binnr >= *nbin || binnr < 0)
 961                     {
 962                         binnr = (*nbin)-1;
 963                     }
 964
 965                     (*bin)[binnr] += hist->bin[k][j];
 966                 }
 967             }
 968         }
 969         else
 970         {
 971             int starti = sc->r[i].start;
 972             int endi   = sc->r[i].end;
 973             for (j = starti; j < endi; j++)
 974             {
 975                 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
 976                 if (binnr >= *nbin || binnr < 0)
 977                 {
 978                     binnr = (*nbin)-1;
 979                 }
 980
 981                 (*bin)[binnr]++;
 982             }
 983         }
 984     }
 985 }
 986
 987 /* write a collection of histograms to a file */
 988 static void sim_data_histogram(sim_data_t *sd, const char *filename,
 989                                int nbin_default, const gmx_output_env_t *oenv)
 990 {
 991     char           label_x[STRLEN];
 992     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
 993     const char    *title   = "N(\\DeltaH)";
 994     const char    *label_y = "Samples";
 995     FILE          *fp;
 996     lambda_data_t *bl;
 997     int            nsets     = 0;
 998     char         **setnames  = nullptr;
 999     gmx_bool       first_set = FALSE;
1000     /* histogram data: */
1001     int           *hist       = nullptr;
1002     int            nbin       = 0;
1003     int            nbin_alloc = 0;
1004     double         dx         = 0;
1005     double         minval     = 0;
1006     int            i;
1007     lambda_data_t *bl_head = sd->lb;
1008
1009     printf("\nWriting histogram to %s\n", filename);
1010     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1011
1012     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1013
1014     /* first get all the set names */
1015     bl = bl_head->next;
1016     /* iterate over all lambdas */
1017     while (bl != bl_head)
1018     {
1019         sample_coll_t *sc = bl->sc->next;
1020
1021         /* iterate over all samples */
1022         while (sc != bl->sc)
1023         {
1024             char buf[STRLEN], buf2[STRLEN];
1025
1026             nsets++;
1027             srenew(setnames, nsets);
1028             snew(setnames[nsets-1], STRLEN);
1029             if (sc->foreign_lambda->dhdl < 0)
1030             {
1031                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1032                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1033                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1034                         deltag, lambda, buf2, lambda, buf);
1035             }
1036             else
1037             {
1038                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1039                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1040                         dhdl, lambda, buf);
1041             }
1042             sc = sc->next;
1043         }
1044
1045         bl = bl->next;
1046     }
1047     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1048
1049
1050     /* now make the histograms */
1051     bl = bl_head->next;
1052     /* iterate over all lambdas */
1053     while (bl != bl_head)
1054     {
1055         sample_coll_t *sc = bl->sc->next;
1056
1057         /* iterate over all samples */
1058         while (sc != bl->sc)
1059         {
1060             if (!first_set)
1061             {
1062                 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1063             }
1064
1065             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1066                                   nbin_default);
1067
1068             for (i = 0; i < nbin; i++)
1069             {
1070                 double xmin = i*dx + minval;
1071                 double xmax = (i+1)*dx + minval;
1072
1073                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1074             }
1075
1076             first_set = FALSE;
1077             sc        = sc->next;
1078         }
1079
1080         bl = bl->next;
1081     }
1082
1083     if (hist)
1084     {
1085         sfree(hist);
1086     }
1087
1088     xvgrclose(fp);
1089 }
1090
1091 static int
1092 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1093 {
1094     int n = 0;
1095
1096     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1097     if (lambda->index >= 0)
1098     {
1099         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1100     }
1101     if (lambda->dhdl >= 0)
1102     {
1103         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1104     }
1105     else
1106     {
1107         int i;
1108         for (i = 0; i < lambda->lc->N; i++)
1109         {
1110             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1111         }
1112     }
1113     return n;
1114 }
1115
1116 /* create a collection (array) of barres_t object given a ordered linked list
1117    of barlamda_t sample collections */
1118 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1119                                     gmx_bool use_dhdl)
1120 {
1121     lambda_data_t *bl;
1122     int            nlambda = 0;
1123     barres_t      *res;
1124     gmx_bool       dhdl    = FALSE;
1125     gmx_bool       first   = TRUE;
1126     lambda_data_t *bl_head = sd->lb;
1127
1128     /* first count the lambdas */
1129     bl = bl_head->next;
1130     while (bl != bl_head)
1131     {
1132         nlambda++;
1133         bl = bl->next;
1134     }
1135     snew(res, nlambda-1);
1136
1137     /* next put the right samples in the res */
1138     *nres = 0;
1139     bl    = bl_head->next->next; /* we start with the second one. */
1140     while (bl != bl_head)
1141     {
1142         sample_coll_t *sc, *scprev;
1143         barres_t      *br = &(res[*nres]);
1144         /* there is always a previous one. we search for that as a foreign
1145            lambda: */
1146         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1147         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1148
1149         barres_init(br);
1150
1151         if (use_dhdl)
1152         {
1153             /* we use dhdl */
1154
1155             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1156             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1157
1158             if (first)
1159             {
1160                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1161                 dhdl = TRUE;
1162             }
1163             if (!dhdl)
1164             {
1165                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1166             }
1167         }
1168         else if (!scprev && !sc)
1169         {
1170             char descX[STRLEN], descY[STRLEN];
1171             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1172             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1173
1174             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1175         }
1176
1177         /* normal delta H */
1178         if (!scprev)
1179         {
1180             char descX[STRLEN], descY[STRLEN];
1181             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1182             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1183             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1184         }
1185         if (!sc)
1186         {
1187             char descX[STRLEN], descY[STRLEN];
1188             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1189             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1190             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1191         }
1192         br->a = scprev;
1193         br->b = sc;
1194
1195         first = FALSE;
1196         (*nres)++;
1197         bl = bl->next;
1198     }
1199     return res;
1200 }
1201
1202 /* estimate the maximum discretization error */
1203 static double barres_list_max_disc_err(barres_t *res, int nres)
1204 {
1205     int    i, j;
1206     double disc_err = 0.;
1207     double delta_lambda;
1208
1209     for (i = 0; i < nres; i++)
1210     {
1211         barres_t *br = &(res[i]);
1212
1213         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1214                                            br->a->native_lambda);
1215
1216         for (j = 0; j < br->a->nsamples; j++)
1217         {
1218             if (br->a->s[j]->hist)
1219             {
1220                 double Wfac = 1.;
1221                 if (br->a->s[j]->derivative)
1222                 {
1223                     Wfac =  delta_lambda;
1224                 }
1225
1226                 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1227             }
1228         }
1229         for (j = 0; j < br->b->nsamples; j++)
1230         {
1231             if (br->b->s[j]->hist)
1232             {
1233                 double Wfac = 1.;
1234                 if (br->b->s[j]->derivative)
1235                 {
1236                     Wfac =  delta_lambda;
1237                 }
1238                 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1239             }
1240         }
1241     }
1242     return disc_err;
1243 }
1244
1245
1246 /* impose start and end times on a sample collection, updating sample_ranges */
1247 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1248                                      double end_t)
1249 {
1250     int i;
1251     for (i = 0; i < sc->nsamples; i++)
1252     {
1253         samples_t      *s = sc->s[i];
1254         sample_range_t *r = &(sc->r[i]);
1255         if (s->hist)
1256         {
1257             double end_time = s->hist->delta_time*s->hist->sum +
1258                 s->hist->start_time;
1259             if (s->hist->start_time < begin_t || end_time > end_t)
1260             {
1261                 r->use = FALSE;
1262             }
1263         }
1264         else
1265         {
1266             if (!s->t)
1267             {
1268                 double end_time;
1269                 if (s->start_time < begin_t)
1270                 {
1271                     r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1272                 }
1273                 end_time = s->delta_time*s->ndu + s->start_time;
1274                 if (end_time > end_t)
1275                 {
1276                     r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1277                 }
1278             }
1279             else
1280             {
1281                 int j;
1282                 for (j = 0; j < s->ndu; j++)
1283                 {
1284                     if (s->t[j] < begin_t)
1285                     {
1286                         r->start = j;
1287                     }
1288
1289                     if (s->t[j] >= end_t)
1290                     {
1291                         r->end = j;
1292                         break;
1293                     }
1294                 }
1295             }
1296             if (r->start > r->end)
1297             {
1298                 r->use = FALSE;
1299             }
1300         }
1301     }
1302     sample_coll_calc_ntot(sc);
1303 }
1304
1305 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1306 {
1307     double         first_t, last_t;
1308     double         begin_t, end_t;
1309     lambda_data_t *lc;
1310     lambda_data_t *head = sd->lb;
1311     int            j;
1312
1313     if (begin <= 0 && end < 0)
1314     {
1315         return;
1316     }
1317
1318     /* first determine the global start and end times */
1319     first_t = -1;
1320     last_t  = -1;
1321     lc      = head->next;
1322     while (lc != head)
1323     {
1324         sample_coll_t *sc = lc->sc->next;
1325         while (sc != lc->sc)
1326         {
1327             for (j = 0; j < sc->nsamples; j++)
1328             {
1329                 double start_t, end_t;
1330
1331                 start_t = sc->s[j]->start_time;
1332                 end_t   =   sc->s[j]->start_time;
1333                 if (sc->s[j]->hist)
1334                 {
1335                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1336                 }
1337                 else
1338                 {
1339                     if (sc->s[j]->t)
1340                     {
1341                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1342                     }
1343                     else
1344                     {
1345                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1346                     }
1347                 }
1348
1349                 if (start_t < first_t || first_t < 0)
1350                 {
1351                     first_t = start_t;
1352                 }
1353                 if (end_t > last_t)
1354                 {
1355                     last_t = end_t;
1356                 }
1357             }
1358             sc = sc->next;
1359         }
1360         lc = lc->next;
1361     }
1362
1363     /* calculate the actual times */
1364     if (begin > 0)
1365     {
1366         begin_t = begin;
1367     }
1368     else
1369     {
1370         begin_t = first_t;
1371     }
1372
1373     if (end > 0)
1374     {
1375         end_t = end;
1376     }
1377     else
1378     {
1379         end_t = last_t;
1380     }
1381     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1382
1383     if (begin_t > end_t)
1384     {
1385         return;
1386     }
1387     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1388
1389     /* then impose them */
1390     lc = head->next;
1391     while (lc != head)
1392     {
1393         sample_coll_t *sc = lc->sc->next;
1394         while (sc != lc->sc)
1395         {
1396             sample_coll_impose_times(sc, begin_t, end_t);
1397             sc = sc->next;
1398         }
1399         lc = lc->next;
1400     }
1401 }
1402
1403
1404 /* create subsample i out of ni from an existing sample_coll */
1405 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1406                                              sample_coll_t *sc_orig,
1407                                              int i, int ni)
1408 {
1409     int             j;
1410
1411     gmx_int64_t     ntot_start;
1412     gmx_int64_t     ntot_end;
1413     gmx_int64_t     ntot_so_far;
1414
1415     *sc = *sc_orig; /* just copy all fields */
1416
1417     /* allocate proprietary memory */
1418     snew(sc->s, sc_orig->nsamples);
1419     snew(sc->r, sc_orig->nsamples);
1420
1421     /* copy the samples */
1422     for (j = 0; j < sc_orig->nsamples; j++)
1423     {
1424         sc->s[j] = sc_orig->s[j];
1425         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1426     }
1427
1428     /* now fix start and end fields */
1429     /* the casts avoid possible overflows */
1430     ntot_start  = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1431     ntot_end    = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1432     ntot_so_far = 0;
1433     for (j = 0; j < sc->nsamples; j++)
1434     {
1435         gmx_int64_t ntot_add;
1436         gmx_int64_t new_start, new_end;
1437
1438         if (sc->r[j].use)
1439         {
1440             if (sc->s[j]->hist)
1441             {
1442                 ntot_add = sc->s[j]->hist->sum;
1443             }
1444             else
1445             {
1446                 ntot_add = sc->r[j].end - sc->r[j].start;
1447             }
1448         }
1449         else
1450         {
1451             ntot_add = 0;
1452         }
1453
1454         if (!sc->s[j]->hist)
1455         {
1456             if (ntot_so_far < ntot_start)
1457             {
1458                 /* adjust starting point */
1459                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1460             }
1461             else
1462             {
1463                 new_start = sc->r[j].start;
1464             }
1465             /* adjust end point */
1466             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1467             if (new_end > sc->r[j].end)
1468             {
1469                 new_end = sc->r[j].end;
1470             }
1471
1472             /* check if we're in range at all */
1473             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1474             {
1475                 new_start = 0;
1476                 new_end   = 0;
1477             }
1478             /* and write the new range */
1479             GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1480             GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1481             sc->r[j].start = static_cast<int>(new_start);
1482             sc->r[j].end   = static_cast<int>(new_end);
1483         }
1484         else
1485         {
1486             if (sc->r[j].use)
1487             {
1488                 double overlap;
1489                 double ntot_start_norm, ntot_end_norm;
1490                 /* calculate the amount of overlap of the
1491                    desired range (ntot_start -- ntot_end) onto
1492                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1493
1494                 /* first calculate normalized bounds
1495                    (where 0 is the start of the hist range, and 1 the end) */
1496                 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1497                 ntot_end_norm   = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1498
1499                 /* now fix the boundaries */
1500                 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1501                 ntot_end_norm   = std::max(0.0, std::min(1.0, ntot_end_norm));
1502
1503                 /* and calculate the overlap */
1504                 overlap = ntot_end_norm - ntot_start_norm;
1505
1506                 if (overlap > 0.95) /* we allow for 5% slack */
1507                 {
1508                     sc->r[j].use = TRUE;
1509                 }
1510                 else if (overlap < 0.05)
1511                 {
1512                     sc->r[j].use = FALSE;
1513                 }
1514                 else
1515                 {
1516                     return FALSE;
1517                 }
1518             }
1519         }
1520         ntot_so_far += ntot_add;
1521     }
1522     sample_coll_calc_ntot(sc);
1523
1524     return TRUE;
1525 }
1526
1527 /* calculate minimum and maximum work values in sample collection */
1528 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1529                                 double *Wmin, double *Wmax)
1530 {
1531     int i, j;
1532
1533     *Wmin =  std::numeric_limits<float>::max();
1534     *Wmax = -std::numeric_limits<float>::max();
1535
1536     for (i = 0; i < sc->nsamples; i++)
1537     {
1538         samples_t      *s = sc->s[i];
1539         sample_range_t *r = &(sc->r[i]);
1540         if (r->use)
1541         {
1542             if (!s->hist)
1543             {
1544                 for (j = r->start; j < r->end; j++)
1545                 {
1546                     *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1547                     *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1548                 }
1549             }
1550             else
1551             {
1552                 int    hd = 0; /* determine the histogram direction: */
1553                 double dx;
1554                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1555                 {
1556                     hd = 1;
1557                 }
1558                 dx = s->hist->dx[hd];
1559
1560                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1561                 {
1562                     *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1563                     *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1564                     /* look for the highest value bin with values */
1565                     if (s->hist->bin[hd][j] > 0)
1566                     {
1567                         *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1568                         *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1569                         break;
1570                     }
1571                 }
1572             }
1573         }
1574     }
1575 }
1576
1577 /* Initialize a sim_data structure */
1578 static void sim_data_init(sim_data_t *sd)
1579 {
1580     /* make linked list */
1581     sd->lb       = &(sd->lb_head);
1582     sd->lb->next = sd->lb;
1583     sd->lb->prev = sd->lb;
1584
1585     lambda_components_init(&(sd->lc));
1586 }
1587
1588
1589 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1590 {
1591     int    i;
1592     double sum;
1593
1594     sum = 0;
1595
1596     for (i = 0; i < n; i++)
1597     {
1598         sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1599     }
1600
1601     return sum;
1602 }
1603
1604 /* calculate the BAR average given a histogram
1605
1606     if type== 0, calculate the best estimate for the average,
1607     if type==-1, calculate the minimum possible value given the histogram
1608     if type== 1, calculate the maximum possible value given the histogram */
1609 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1610                                 int type)
1611 {
1612     double sum = 0.;
1613     int    i;
1614     int    maxbin;
1615     /* normalization factor multiplied with bin width and
1616        number of samples (we normalize through M): */
1617     double normdx = 1.;
1618     int    hd     = 0; /* determine the histogram direction: */
1619     double dx;
1620
1621     if ( (hist->nhist > 1) && (Wfac < 0) )
1622     {
1623         hd = 1;
1624     }
1625     dx     = hist->dx[hd];
1626     maxbin = hist->nbin[hd]-1;
1627     if (type == 1)
1628     {
1629         maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1630     }
1631
1632     for (i = 0; i < maxbin; i++)
1633     {
1634         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1635         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1636
1637         sum += pxdx/(1. + std::exp(x + sbMmDG));
1638     }
1639
1640     return sum;
1641 }
1642
1643 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1644                                 double temp, double tol, int type)
1645 {
1646     double kT, beta, M;
1647     int    i;
1648     double Wfac1, Wfac2, Wmin, Wmax;
1649     double DG0, DG1, DG2, dDG1;
1650     double n1, n2; /* numbers of samples as doubles */
1651
1652     kT   = BOLTZ*temp;
1653     beta = 1/kT;
1654
1655     /* count the numbers of samples */
1656     n1 = ca->ntot;
1657     n2 = cb->ntot;
1658
1659     M = std::log(n1/n2);
1660
1661     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1662     if (ca->foreign_lambda->dhdl < 0)
1663     {
1664         /* this is the case when the delta U were calculated directly
1665            (i.e. we're not scaling dhdl) */
1666         Wfac1 = beta;
1667         Wfac2 = beta;
1668     }
1669     else
1670     {
1671         /* we're using dhdl, so delta_lambda needs to be a
1672            multiplication factor.  */
1673         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1674         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1675                                                   ca->native_lambda);
1676         if (cb->native_lambda->lc->N > 1)
1677         {
1678             gmx_fatal(FARGS,
1679                       "Can't (yet) do multi-component dhdl interpolation");
1680         }
1681
1682         Wfac1 =  beta*delta_lambda;
1683         Wfac2 = -beta*delta_lambda;
1684     }
1685
1686     if (beta < 1)
1687     {
1688         /* We print the output both in kT and kJ/mol.
1689          * Here we determine DG in kT, so when beta < 1
1690          * the precision has to be increased.
1691          */
1692         tol *= beta;
1693     }
1694
1695     /* Calculate minimum and maximum work to give an initial estimate of
1696      * delta G  as their average.
1697      */
1698     {
1699         double Wmin1, Wmin2, Wmax1, Wmax2;
1700         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1701         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1702
1703         Wmin = std::min(Wmin1, Wmin2);
1704         Wmax = std::max(Wmax1, Wmax2);
1705     }
1706
1707     DG0 = Wmin;
1708     DG2 = Wmax;
1709
1710     if (debug)
1711     {
1712         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1713     }
1714     /* We approximate by bisection: given our initial estimates
1715        we keep checking whether the halfway point is greater or
1716        smaller than what we get out of the BAR averages.
1717
1718        For the comparison we can use twice the tolerance. */
1719     while (DG2 - DG0 > 2*tol)
1720     {
1721         DG1 = 0.5*(DG0 + DG2);
1722
1723         /* calculate the BAR averages */
1724         dDG1 = 0.;
1725
1726         for (i = 0; i < ca->nsamples; i++)
1727         {
1728             samples_t      *s = ca->s[i];
1729             sample_range_t *r = &(ca->r[i]);
1730             if (r->use)
1731             {
1732                 if (s->hist)
1733                 {
1734                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1735                 }
1736                 else
1737                 {
1738                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1739                                          Wfac1, (M-DG1));
1740                 }
1741             }
1742         }
1743         for (i = 0; i < cb->nsamples; i++)
1744         {
1745             samples_t      *s = cb->s[i];
1746             sample_range_t *r = &(cb->r[i]);
1747             if (r->use)
1748             {
1749                 if (s->hist)
1750                 {
1751                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1752                 }
1753                 else
1754                 {
1755                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1756                                          Wfac2, -(M-DG1));
1757                 }
1758             }
1759         }
1760
1761         if (dDG1 < 0)
1762         {
1763             DG0 = DG1;
1764         }
1765         else
1766         {
1767             DG2 = DG1;
1768         }
1769         if (debug)
1770         {
1771             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1772         }
1773     }
1774
1775     return 0.5*(DG0 + DG2);
1776 }
1777
1778 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1779                              double temp, double dg, double *sa, double *sb)
1780 {
1781     int    i, j;
1782     double W_ab = 0.;
1783     double W_ba = 0.;
1784     double kT, beta;
1785     double Wfac1, Wfac2;
1786     double n1, n2;
1787
1788     kT   = BOLTZ*temp;
1789     beta = 1/kT;
1790
1791     /* count the numbers of samples */
1792     n1 = ca->ntot;
1793     n2 = cb->ntot;
1794
1795     /* to ensure the work values are the same as during the delta_G */
1796     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1797     if (ca->foreign_lambda->dhdl < 0)
1798     {
1799         /* this is the case when the delta U were calculated directly
1800            (i.e. we're not scaling dhdl) */
1801         Wfac1 = beta;
1802         Wfac2 = beta;
1803     }
1804     else
1805     {
1806         /* we're using dhdl, so delta_lambda needs to be a
1807            multiplication factor.  */
1808         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1809                                                   ca->native_lambda);
1810         Wfac1 =  beta*delta_lambda;
1811         Wfac2 = -beta*delta_lambda;
1812     }
1813
1814     /* first calculate the average work in both directions */
1815     for (i = 0; i < ca->nsamples; i++)
1816     {
1817         samples_t      *s = ca->s[i];
1818         sample_range_t *r = &(ca->r[i]);
1819         if (r->use)
1820         {
1821             if (!s->hist)
1822             {
1823                 for (j = r->start; j < r->end; j++)
1824                 {
1825                     W_ab += Wfac1*s->du[j];
1826                 }
1827             }
1828             else
1829             {
1830                 /* normalization factor multiplied with bin width and
1831                    number of samples (we normalize through M): */
1832                 double normdx = 1.;
1833                 double dx;
1834                 int    hd = 0; /* histogram direction */
1835                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1836                 {
1837                     hd = 1;
1838                 }
1839                 dx = s->hist->dx[hd];
1840
1841                 for (j = 0; j < s->hist->nbin[0]; j++)
1842                 {
1843                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1844                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1845                     W_ab += pxdx*x;
1846                 }
1847             }
1848         }
1849     }
1850     W_ab /= n1;
1851
1852     for (i = 0; i < cb->nsamples; i++)
1853     {
1854         samples_t      *s = cb->s[i];
1855         sample_range_t *r = &(cb->r[i]);
1856         if (r->use)
1857         {
1858             if (!s->hist)
1859             {
1860                 for (j = r->start; j < r->end; j++)
1861                 {
1862                     W_ba += Wfac1*s->du[j];
1863                 }
1864             }
1865             else
1866             {
1867                 /* normalization factor multiplied with bin width and
1868                    number of samples (we normalize through M): */
1869                 double normdx = 1.;
1870                 double dx;
1871                 int    hd = 0; /* histogram direction */
1872                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1873                 {
1874                     hd = 1;
1875                 }
1876                 dx = s->hist->dx[hd];
1877
1878                 for (j = 0; j < s->hist->nbin[0]; j++)
1879                 {
1880                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1881                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1882                     W_ba += pxdx*x;
1883                 }
1884             }
1885         }
1886     }
1887     W_ba /= n2;
1888
1889     /* then calculate the relative entropies */
1890     *sa = (W_ab - dg);
1891     *sb = (W_ba + dg);
1892 }
1893
1894 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1895                            double temp, double dg, double *stddev)
1896 {
1897     int    i, j;
1898     double M;
1899     double sigmafact = 0.;
1900     double kT, beta;
1901     double Wfac1, Wfac2;
1902     double n1, n2;
1903
1904     kT   = BOLTZ*temp;
1905     beta = 1/kT;
1906
1907     /* count the numbers of samples */
1908     n1 = ca->ntot;
1909     n2 = cb->ntot;
1910
1911     /* to ensure the work values are the same as during the delta_G */
1912     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1913     if (ca->foreign_lambda->dhdl < 0)
1914     {
1915         /* this is the case when the delta U were calculated directly
1916            (i.e. we're not scaling dhdl) */
1917         Wfac1 = beta;
1918         Wfac2 = beta;
1919     }
1920     else
1921     {
1922         /* we're using dhdl, so delta_lambda needs to be a
1923            multiplication factor.  */
1924         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1925                                                   ca->native_lambda);
1926         Wfac1 =  beta*delta_lambda;
1927         Wfac2 = -beta*delta_lambda;
1928     }
1929
1930     M = std::log(n1/n2);
1931
1932
1933     /* calculate average in both directions */
1934     for (i = 0; i < ca->nsamples; i++)
1935     {
1936         samples_t      *s = ca->s[i];
1937         sample_range_t *r = &(ca->r[i]);
1938         if (r->use)
1939         {
1940             if (!s->hist)
1941             {
1942                 for (j = r->start; j < r->end; j++)
1943                 {
1944                     sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1945                 }
1946             }
1947             else
1948             {
1949                 /* normalization factor multiplied with bin width and
1950                    number of samples (we normalize through M): */
1951                 double normdx = 1.;
1952                 double dx;
1953                 int    hd = 0; /* histogram direction */
1954                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1955                 {
1956                     hd = 1;
1957                 }
1958                 dx = s->hist->dx[hd];
1959
1960                 for (j = 0; j < s->hist->nbin[0]; j++)
1961                 {
1962                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1963                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1964
1965                     sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1966                 }
1967             }
1968         }
1969     }
1970     for (i = 0; i < cb->nsamples; i++)
1971     {
1972         samples_t      *s = cb->s[i];
1973         sample_range_t *r = &(cb->r[i]);
1974         if (r->use)
1975         {
1976             if (!s->hist)
1977             {
1978                 for (j = r->start; j < r->end; j++)
1979                 {
1980                     sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1981                 }
1982             }
1983             else
1984             {
1985                 /* normalization factor multiplied with bin width and
1986                    number of samples (we normalize through M): */
1987                 double normdx = 1.;
1988                 double dx;
1989                 int    hd = 0; /* histogram direction */
1990                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1991                 {
1992                     hd = 1;
1993                 }
1994                 dx = s->hist->dx[hd];
1995
1996                 for (j = 0; j < s->hist->nbin[0]; j++)
1997                 {
1998                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1999                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2000
2001                     sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2002                 }
2003             }
2004         }
2005     }
2006
2007     sigmafact /= (n1 + n2);
2008
2009
2010     /* Eq. 10 from
2011        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2012     *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2013 }
2014
2015
2016
2017 static void calc_bar(barres_t *br, double tol,
2018                      int npee_min, int npee_max, gmx_bool *bEE,
2019                      double *partsum)
2020 {
2021     int      npee, p;
2022     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2023                                                         for calculated quantities */
2024     double   temp = br->a->temp;
2025     int      i;
2026     double   dg_min, dg_max;
2027     gmx_bool have_hist = FALSE;
2028
2029     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2030
2031     br->dg_disc_err      = 0.;
2032     br->dg_histrange_err = 0.;
2033
2034     /* check if there are histograms */
2035     for (i = 0; i < br->a->nsamples; i++)
2036     {
2037         if (br->a->r[i].use && br->a->s[i]->hist)
2038         {
2039             have_hist = TRUE;
2040             break;
2041         }
2042     }
2043     if (!have_hist)
2044     {
2045         for (i = 0; i < br->b->nsamples; i++)
2046         {
2047             if (br->b->r[i].use && br->b->s[i]->hist)
2048             {
2049                 have_hist = TRUE;
2050                 break;
2051             }
2052         }
2053     }
2054
2055     /* calculate histogram-specific errors */
2056     if (have_hist)
2057     {
2058         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2059         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2060
2061         if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2062         {
2063             /* the histogram range  error is the biggest of the differences
2064                between the best estimate and the extremes */
2065             br->dg_histrange_err = std::abs(dg_max - dg_min);
2066         }
2067         br->dg_disc_err = 0.;
2068         for (i = 0; i < br->a->nsamples; i++)
2069         {
2070             if (br->a->s[i]->hist)
2071             {
2072                 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2073             }
2074         }
2075         for (i = 0; i < br->b->nsamples; i++)
2076         {
2077             if (br->b->s[i]->hist)
2078             {
2079                 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2080             }
2081         }
2082     }
2083     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2084
2085     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2086
2087     dg_sig2     = 0;
2088     sa_sig2     = 0;
2089     sb_sig2     = 0;
2090     stddev_sig2 = 0;
2091
2092     *bEE = TRUE;
2093     {
2094         sample_coll_t ca, cb;
2095
2096         /* initialize the samples */
2097         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2098                          br->a->temp);
2099         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2100                          br->b->temp);
2101
2102         for (npee = npee_min; npee <= npee_max; npee++)
2103         {
2104             double dgs      = 0;
2105             double dgs2     = 0;
2106             double dsa      = 0;
2107             double dsb      = 0;
2108             double dsa2     = 0;
2109             double dsb2     = 0;
2110             double dstddev  = 0;
2111             double dstddev2 = 0;
2112
2113
2114             for (p = 0; p < npee; p++)
2115             {
2116                 double   dgp;
2117                 double   stddevc;
2118                 double   sac, sbc;
2119                 gmx_bool cac, cbc;
2120
2121                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2122                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2123
2124                 if (!cac || !cbc)
2125                 {
2126                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2127                     *bEE = FALSE;
2128                     if (cac)
2129                     {
2130                         sample_coll_destroy(&ca);
2131                     }
2132                     if (cbc)
2133                     {
2134                         sample_coll_destroy(&cb);
2135                     }
2136                     return;
2137                 }
2138
2139                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2140                 dgs  += dgp;
2141                 dgs2 += dgp*dgp;
2142
2143                 partsum[npee*(npee_max+1)+p] += dgp;
2144
2145                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2146                 dsa  += sac;
2147                 dsa2 += sac*sac;
2148                 dsb  += sbc;
2149                 dsb2 += sbc*sbc;
2150                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2151
2152                 dstddev  += stddevc;
2153                 dstddev2 += stddevc*stddevc;
2154
2155                 sample_coll_destroy(&ca);
2156                 sample_coll_destroy(&cb);
2157             }
2158             dgs     /= npee;
2159             dgs2    /= npee;
2160             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2161
2162             dsa     /= npee;
2163             dsa2    /= npee;
2164             dsb     /= npee;
2165             dsb2    /= npee;
2166             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2167             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2168
2169             dstddev     /= npee;
2170             dstddev2    /= npee;
2171             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2172         }
2173         br->dg_err        = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2174         br->sa_err        = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2175         br->sb_err        = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2176         br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2177     }
2178 }
2179
2180
2181 static double bar_err(int nbmin, int nbmax, const double *partsum)
2182 {
2183     int    nb, b;
2184     double svar, s, s2, dg;
2185
2186     svar = 0;
2187     for (nb = nbmin; nb <= nbmax; nb++)
2188     {
2189         s  = 0;
2190         s2 = 0;
2191         for (b = 0; b < nb; b++)
2192         {
2193             dg  = partsum[nb*(nbmax+1)+b];
2194             s  += dg;
2195             s2 += dg*dg;
2196         }
2197         s    /= nb;
2198         s2   /= nb;
2199         svar += (s2 - s*s)/(nb - 1);
2200     }
2201
2202     return std::sqrt(svar/(nbmax + 1 - nbmin));
2203 }
2204
2205
2206 /* Seek the end of an identifier (consecutive non-spaces), followed by
2207    an optional number of spaces or '='-signs. Returns a pointer to the
2208    first non-space value found after that. Returns NULL if the string
2209    ends before that.
2210  */
2211 static const char *find_value(const char *str)
2212 {
2213     gmx_bool name_end_found = FALSE;
2214
2215     /* if the string is a NULL pointer, return a NULL pointer. */
2216     if (str == nullptr)
2217     {
2218         return nullptr;
2219     }
2220     while (*str != '\0')
2221     {
2222         /* first find the end of the name */
2223         if (!name_end_found)
2224         {
2225             if (std::isspace(*str) || (*str == '=') )
2226             {
2227                 name_end_found = TRUE;
2228             }
2229         }
2230         else
2231         {
2232             if (!( std::isspace(*str) || (*str == '=') ))
2233             {
2234                 return str;
2235             }
2236         }
2237         str++;
2238     }
2239     return nullptr;
2240 }
2241
2242
2243 /* read a vector-notation description of a lambda vector */
2244 static gmx_bool read_lambda_compvec(const char                *str,
2245                                     lambda_vec_t              *lv,
2246                                     const lambda_components_t *lc_in,
2247                                     lambda_components_t       *lc_out,
2248                                     const char               **end,
2249                                     const char                *fn)
2250 {
2251     gmx_bool    initialize_lc = FALSE;   /* whether to initialize the lambda
2252                                             components, or to check them */
2253     gmx_bool    start_reached = FALSE;   /* whether the start of component names
2254                                             has been reached */
2255     gmx_bool    vector        = FALSE;   /* whether there are multiple components */
2256     int         n             = 0;       /* current component number */
2257     const char *val_start     = nullptr; /* start of the component name, or NULL
2258                                             if not in a value */
2259     char       *strtod_end;
2260     gmx_bool    OK = TRUE;
2261
2262     if (end)
2263     {
2264         *end = str;
2265     }
2266
2267
2268     if (lc_out && lc_out->N == 0)
2269     {
2270         initialize_lc = TRUE;
2271     }
2272
2273     if (lc_in == nullptr)
2274     {
2275         lc_in = lc_out;
2276     }
2277
2278     while (1)
2279     {
2280         if (!start_reached)
2281         {
2282             if (std::isalnum(*str))
2283             {
2284                 vector        = FALSE;
2285                 start_reached = TRUE;
2286                 val_start     = str;
2287             }
2288             else if (*str == '(')
2289             {
2290                 vector        = TRUE;
2291                 start_reached = TRUE;
2292             }
2293             else if (!std::isspace(*str))
2294             {
2295                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2296             }
2297         }
2298         else
2299         {
2300             if (val_start)
2301             {
2302                 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2303                 {
2304                     /* end of value */
2305                     if (lv == nullptr)
2306                     {
2307                         if (initialize_lc)
2308                         {
2309                             lambda_components_add(lc_out, val_start,
2310                                                   (str-val_start));
2311                         }
2312                         else
2313                         {
2314                             if (!lambda_components_check(lc_out, n, val_start,
2315                                                          (str-val_start)))
2316                             {
2317                                 return FALSE;
2318                             }
2319                         }
2320                     }
2321                     else
2322                     {
2323                         /* add a vector component to lv */
2324                         lv->val[n] = strtod(val_start, &strtod_end);
2325                         if (val_start == strtod_end)
2326                         {
2327                             gmx_fatal(FARGS,
2328                                       "Error reading lambda vector in %s", fn);
2329                         }
2330                     }
2331                     /* reset for the next identifier */
2332                     val_start = nullptr;
2333                     n++;
2334                     if (!vector)
2335                     {
2336                         return OK;
2337                     }
2338                 }
2339             }
2340             else if (std::isalnum(*str))
2341             {
2342                 val_start = str;
2343             }
2344             if (*str == ')')
2345             {
2346                 str++;
2347                 if (end)
2348                 {
2349                     *end = str;
2350                 }
2351                 if (!vector)
2352                 {
2353                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2354                 }
2355                 else
2356                 {
2357                     GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2358                     if (n == lc_in->N)
2359                     {
2360                         return OK;
2361                     }
2362                     else if (lv == nullptr)
2363                     {
2364                         return FALSE;
2365                     }
2366                     else
2367                     {
2368                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2369                                   fn);
2370                         return FALSE;
2371                     }
2372
2373                 }
2374             }
2375         }
2376         if (*str == '\0')
2377         {
2378             break;
2379         }
2380         str++;
2381         if (end)
2382         {
2383             *end = str;
2384         }
2385     }
2386     if (vector)
2387     {
2388         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2389         return FALSE;
2390     }
2391     return OK;
2392 }
2393
2394 /* read and check the component names from a string */
2395 static gmx_bool read_lambda_components(const char          *str,
2396                                        lambda_components_t *lc,
2397                                        const char         **end,
2398                                        const char          *fn)
2399 {
2400     return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2401 }
2402
2403 /* read an initialized lambda vector from a string */
2404 static gmx_bool read_lambda_vector(const char   *str,
2405                                    lambda_vec_t *lv,
2406                                    const char  **end,
2407                                    const char   *fn)
2408 {
2409     return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2410 }
2411
2412
2413
2414 /* deduce lambda value from legend.
2415     fn = the file name
2416     legend = the legend string
2417     ba = the xvg data
2418     lam = the initialized lambda vector
2419    returns whether to use the data in this set.
2420  */
2421 static gmx_bool legend2lambda(const char   *fn,
2422                               const char   *legend,
2423                               lambda_vec_t *lam)
2424 {
2425     const char   *ptr    = nullptr, *ptr2 = nullptr;
2426     gmx_bool      ok     = FALSE;
2427     gmx_bool      bdhdl  = FALSE;
2428     const char   *tostr  = " to ";
2429
2430     if (legend == nullptr)
2431     {
2432         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2433     }
2434
2435     /* look for the last 'to': */
2436     ptr2 = legend;
2437     do
2438     {
2439         ptr2 = std::strstr(ptr2, tostr);
2440         if (ptr2 != nullptr)
2441         {
2442             ptr = ptr2;
2443             ptr2++;
2444         }
2445     }
2446     while (ptr2 != nullptr && *ptr2 != '\0');
2447
2448     if (ptr)
2449     {
2450         ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2451     }
2452     else
2453     {
2454         /* look for the = sign */
2455         ptr = std::strrchr(legend, '=');
2456         if (!ptr)
2457         {
2458             /* otherwise look for the last space */
2459             ptr = std::strrchr(legend, ' ');
2460         }
2461     }
2462
2463     if (std::strstr(legend, "dH"))
2464     {
2465         ok    = TRUE;
2466         bdhdl = TRUE;
2467     }
2468     else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2469     {
2470         ok    = TRUE;
2471         bdhdl = FALSE;
2472     }
2473     else /*if (std::strstr(legend, "pV"))*/
2474     {
2475         return FALSE;
2476     }
2477     if (!ptr)
2478     {
2479         ok = FALSE;
2480     }
2481
2482     if (!ok)
2483     {
2484         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2485     }
2486     if (!bdhdl)
2487     {
2488         ptr = find_value(ptr);
2489         if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2490         {
2491             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2492         }
2493     }
2494     else
2495     {
2496         int         dhdl_index;
2497         const char *end;
2498
2499         ptr = std::strrchr(legend, '=');
2500         end = ptr;
2501         if (ptr)
2502         {
2503             /* there must be a component name */
2504             ptr--;
2505             if (ptr < legend)
2506             {
2507                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2508             }
2509             /* now backtrack to the start of the identifier */
2510             while (isspace(*ptr))
2511             {
2512                 end = ptr;
2513                 ptr--;
2514                 if (ptr < legend)
2515                 {
2516                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2517                 }
2518             }
2519             while (!std::isspace(*ptr))
2520             {
2521                 ptr--;
2522                 if (ptr < legend)
2523                 {
2524                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2525                 }
2526             }
2527             ptr++;
2528             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2529             if (dhdl_index < 0)
2530             {
2531                 char buf[STRLEN];
2532                 std::strncpy(buf, ptr, (end-ptr));
2533                 buf[(end-ptr)] = '\0';
2534                 gmx_fatal(FARGS,
2535                           "Did not find lambda component for '%s' in %s",
2536                           buf, fn);
2537             }
2538         }
2539         else
2540         {
2541             if (lam->lc->N > 1)
2542             {
2543                 gmx_fatal(FARGS,
2544                           "dhdl without component name with >1 lambda component in %s",
2545                           fn);
2546             }
2547             dhdl_index = 0;
2548         }
2549         lam->dhdl = dhdl_index;
2550     }
2551     return TRUE;
2552 }
2553
2554 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2555                                 lambda_components_t *lc)
2556 {
2557     gmx_bool    bFound;
2558     const char *ptr;
2559     char       *end;
2560     double      native_lambda;
2561
2562     bFound = FALSE;
2563
2564     /* first check for a state string */
2565     ptr = std::strstr(subtitle, "state");
2566     if (ptr)
2567     {
2568         int         index = -1;
2569         const char *val_end;
2570
2571         /* the new 4.6 style lambda vectors */
2572         ptr = find_value(ptr);
2573         if (ptr)
2574         {
2575             index = std::strtol(ptr, &end, 10);
2576             if (ptr == end)
2577             {
2578                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2579                 return FALSE;
2580             }
2581             ptr = end;
2582         }
2583         else
2584         {
2585             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2586             return FALSE;
2587         }
2588         /* now find the lambda vector component names */
2589         while (*ptr != '(' && !std::isalnum(*ptr))
2590         {
2591             ptr++;
2592             if (*ptr == '\0')
2593             {
2594                 gmx_fatal(FARGS,
2595                           "Incomplete lambda vector component data in %s", fn);
2596                 return FALSE;
2597             }
2598         }
2599         val_end = ptr;
2600         if (!read_lambda_components(ptr, lc, &val_end, fn))
2601         {
2602             gmx_fatal(FARGS,
2603                       "lambda vector components in %s don't match those previously read",
2604                       fn);
2605         }
2606         ptr = find_value(val_end);
2607         if (!ptr)
2608         {
2609             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2610             return FALSE;
2611         }
2612         lambda_vec_init(&(ba->native_lambda), lc);
2613         if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2614         {
2615             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2616         }
2617         ba->native_lambda.index = index;
2618         bFound                  = TRUE;
2619     }
2620     else
2621     {
2622         /* compatibility mode: check for lambda in other ways. */
2623         /* plain text lambda string */
2624         ptr = std::strstr(subtitle, "lambda");
2625         if (ptr == nullptr)
2626         {
2627             /* xmgrace formatted lambda string */
2628             ptr = std::strstr(subtitle, "\\xl\\f{}");
2629         }
2630         if (ptr == nullptr)
2631         {
2632             /* xmgr formatted lambda string */
2633             ptr = std::strstr(subtitle, "\\8l\\4");
2634         }
2635         if (ptr != nullptr)
2636         {
2637             ptr = std::strstr(ptr, "=");
2638         }
2639         if (ptr != nullptr)
2640         {
2641             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2642             /* add the lambda component name as an empty string */
2643             if (lc->N > 0)
2644             {
2645                 if (!lambda_components_check(lc, 0, "", 0))
2646                 {
2647                     gmx_fatal(FARGS,
2648                               "lambda vector components in %s don't match those previously read",
2649                               fn);
2650                 }
2651             }
2652             else
2653             {
2654                 lambda_components_add(lc, "", 0);
2655             }
2656             lambda_vec_init(&(ba->native_lambda), lc);
2657             ba->native_lambda.val[0] = native_lambda;
2658         }
2659     }
2660
2661     return bFound;
2662 }
2663
2664 static double filename2lambda(const char *fn)
2665 {
2666     double        lambda;
2667     const char   *ptr, *digitptr;
2668     char         *endptr;
2669     int           dirsep;
2670     ptr = fn;
2671     /* go to the end of the path string and search backward to find the last
2672        directory in the path which has to contain the value of lambda
2673      */
2674     while (ptr[1] != '\0')
2675     {
2676         ptr++;
2677     }
2678     /* searching backward to find the second directory separator */
2679     dirsep   = 0;
2680     digitptr = nullptr;
2681     while (ptr >= fn)
2682     {
2683         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2684         {
2685             if (dirsep == 1)
2686             {
2687                 break;
2688             }
2689             dirsep++;
2690         }
2691         /* save the last position of a digit between the last two
2692            separators = in the last dirname */
2693         if (dirsep > 0 && std::isdigit(*ptr))
2694         {
2695             digitptr = ptr;
2696         }
2697         ptr--;
2698     }
2699     if (!digitptr)
2700     {
2701         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2702                   " last directory in the path '%s' does not contain a number", fn);
2703     }
2704     if (digitptr[-1] == '-')
2705     {
2706         digitptr--;
2707     }
2708     lambda = strtod(digitptr, &endptr);
2709     if (endptr == digitptr)
2710     {
2711         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2712     }
2713     return lambda;
2714 }
2715
2716 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2717                                   lambda_components_t *lc)
2718 {
2719     int          i;
2720     char        *subtitle, **legend, *ptr;
2721     int          np;
2722     gmx_bool     native_lambda_read = FALSE;
2723     char         buf[STRLEN];
2724
2725     xvg_init(ba);
2726
2727     ba->filename = fn;
2728
2729     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2730     if (!ba->y)
2731     {
2732         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2733     }
2734     /* Reorder the data */
2735     ba->t  = ba->y[0];
2736     for (i = 1; i < ba->nset; i++)
2737     {
2738         ba->y[i-1] = ba->y[i];
2739     }
2740     ba->nset--;
2741
2742     snew(ba->np, ba->nset);
2743     for (i = 0; i < ba->nset; i++)
2744     {
2745         ba->np[i] = np;
2746     }
2747
2748     ba->temp = -1;
2749     if (subtitle != nullptr)
2750     {
2751         /* try to extract temperature */
2752         ptr = std::strstr(subtitle, "T =");
2753         if (ptr != nullptr)
2754         {
2755             ptr += 3;
2756             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2757             {
2758                 if (ba->temp <= 0)
2759                 {
2760                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2761                               ba->temp, fn);
2762                 }
2763             }
2764         }
2765     }
2766     if (ba->temp < 0)
2767     {
2768         if (*temp <= 0)
2769         {
2770             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2771         }
2772         ba->temp = *temp;
2773     }
2774
2775     /* Try to deduce lambda from the subtitle */
2776     if (subtitle)
2777     {
2778         if (subtitle2lambda(subtitle, ba, fn, lc))
2779         {
2780             native_lambda_read = TRUE;
2781         }
2782     }
2783     snew(ba->lambda, ba->nset);
2784     if (legend == nullptr)
2785     {
2786         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2787         if (ba->nset == 1)
2788         {
2789             if (!native_lambda_read)
2790             {
2791                 // Deduce lambda from the file name.
2792                 // Updated the routine filename2lambda to actually return lambda
2793                 // to avoid C++ warnings, but this usage does not seem to need it?
2794                 // EL 2015-07-10
2795                 filename2lambda(fn);
2796                 native_lambda_read = TRUE;
2797             }
2798             ba->lambda[0] = ba->native_lambda;
2799         }
2800         else
2801         {
2802             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2803         }
2804     }
2805     else
2806     {
2807         for (i = 0; i < ba->nset; )
2808         {
2809             gmx_bool use = FALSE;
2810             /* Read lambda from the legend */
2811             lambda_vec_init( &(ba->lambda[i]), lc );
2812             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2813             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2814             if (use)
2815             {
2816                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2817                 i++;
2818             }
2819             else
2820             {
2821                 int j;
2822                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2823                 for (j = i+1; j < ba->nset; j++)
2824                 {
2825                     ba->y[j-1]  = ba->y[j];
2826                     legend[j-1] = legend[j];
2827                 }
2828                 ba->nset--;
2829             }
2830         }
2831     }
2832
2833     if (!native_lambda_read)
2834     {
2835         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2836     }
2837
2838     if (legend != nullptr)
2839     {
2840         for (i = 0; i < ba->nset-1; i++)
2841         {
2842             sfree(legend[i]);
2843         }
2844         sfree(legend);
2845     }
2846 }
2847
2848 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2849 {
2850     xvg_t     *barsim;
2851     samples_t *s;
2852     int        i;
2853
2854     snew(barsim, 1);
2855
2856     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2857
2858     if (barsim->nset < 1)
2859     {
2860         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2861     }
2862
2863     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2864     {
2865         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2866     }
2867     *temp = barsim->temp;
2868
2869     /* now create a series of samples_t */
2870     snew(s, barsim->nset);
2871     for (i = 0; i < barsim->nset; i++)
2872     {
2873         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2874                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2875                                                    &(barsim->lambda[i])),
2876                      fn);
2877         s[i].du  = barsim->y[i];
2878         s[i].ndu = barsim->np[i];
2879         s[i].t   = barsim->t;
2880
2881         lambda_data_list_insert_sample(sd->lb, s+i);
2882     }
2883     {
2884         char buf[STRLEN];
2885
2886         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2887         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2888                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2889         for (i = 0; i < barsim->nset; i++)
2890         {
2891             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2892             printf("        %s (%d pts)\n", buf, s[i].ndu);
2893         }
2894     }
2895     printf("\n\n");
2896 }
2897
2898 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2899                                  double start_time, double delta_time,
2900                                  lambda_vec_t *native_lambda, double temp,
2901                                  double *last_t, const char *filename)
2902 {
2903     int           i, j;
2904     lambda_vec_t *foreign_lambda;
2905     int           type;
2906     samples_t    *s; /* convenience pointer */
2907     int           startj;
2908
2909     /* check the block types etc. */
2910     if ( (blk->nsub < 3) ||
2911          (blk->sub[0].type != xdr_datatype_int) ||
2912          (blk->sub[1].type != xdr_datatype_double) ||
2913          (
2914              (blk->sub[2].type != xdr_datatype_float) &&
2915              (blk->sub[2].type != xdr_datatype_double)
2916          ) ||
2917          (blk->sub[0].nr < 1) ||
2918          (blk->sub[1].nr < 1) )
2919     {
2920         gmx_fatal(FARGS,
2921                   "Unexpected/corrupted block data in file %s around time %f.",
2922                   filename, start_time);
2923     }
2924
2925     snew(foreign_lambda, 1);
2926     lambda_vec_init(foreign_lambda, native_lambda->lc);
2927     lambda_vec_copy(foreign_lambda, native_lambda);
2928     type = blk->sub[0].ival[0];
2929     if (type == dhbtDH)
2930     {
2931         for (i = 0; i < native_lambda->lc->N; i++)
2932         {
2933             foreign_lambda->val[i] = blk->sub[1].dval[i];
2934         }
2935     }
2936     else
2937     {
2938         if (blk->sub[0].nr > 1)
2939         {
2940             foreign_lambda->dhdl = blk->sub[0].ival[1];
2941         }
2942         else
2943         {
2944             foreign_lambda->dhdl = 0;
2945         }
2946     }
2947
2948     if (!*smp)
2949     {
2950         /* initialize the samples structure if it's empty. */
2951         snew(*smp, 1);
2952         samples_init(*smp, native_lambda, foreign_lambda, temp,
2953                      type == dhbtDHDL, filename);
2954         (*smp)->start_time = start_time;
2955         (*smp)->delta_time = delta_time;
2956     }
2957
2958     /* set convenience pointer */
2959     s = *smp;
2960
2961     /* now double check */
2962     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2963     {
2964         char buf[STRLEN], buf2[STRLEN];
2965         lambda_vec_print(foreign_lambda, buf, FALSE);
2966         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2967         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2968         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2969                   filename, start_time);
2970     }
2971
2972     /* make room for the data */
2973     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2974     {
2975         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2976             blk->sub[2].nr*2 : s->ndu_alloc;
2977         srenew(s->du_alloc, s->ndu_alloc);
2978         s->du = s->du_alloc;
2979     }
2980     startj   = s->ndu;
2981     s->ndu  += blk->sub[2].nr;
2982     s->ntot += blk->sub[2].nr;
2983     *ndu     = blk->sub[2].nr;
2984
2985     /* and copy the data*/
2986     for (j = 0; j < blk->sub[2].nr; j++)
2987     {
2988         if (blk->sub[2].type == xdr_datatype_float)
2989         {
2990             s->du[startj+j] = blk->sub[2].fval[j];
2991         }
2992         else
2993         {
2994             s->du[startj+j] = blk->sub[2].dval[j];
2995         }
2996     }
2997     if (start_time + blk->sub[2].nr*delta_time > *last_t)
2998     {
2999         *last_t = start_time + blk->sub[2].nr*delta_time;
3000     }
3001 }
3002
3003 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3004                                       double start_time, double delta_time,
3005                                       lambda_vec_t *native_lambda, double temp,
3006                                       double *last_t, const char *filename)
3007 {
3008     int           i, j;
3009     samples_t    *s;
3010     int           nhist;
3011     lambda_vec_t *foreign_lambda;
3012     int           type;
3013     int           nbins[2];
3014
3015     /* check the block types etc. */
3016     if ( (blk->nsub < 2) ||
3017          (blk->sub[0].type != xdr_datatype_double) ||
3018          (blk->sub[1].type != xdr_datatype_int64) ||
3019          (blk->sub[0].nr < 2)  ||
3020          (blk->sub[1].nr < 2) )
3021     {
3022         gmx_fatal(FARGS,
3023                   "Unexpected/corrupted block data in file %s around time %f",
3024                   filename, start_time);
3025     }
3026
3027     nhist = blk->nsub-2;
3028     if (nhist == 0)
3029     {
3030         return nullptr;
3031     }
3032     if (nhist > 2)
3033     {
3034         gmx_fatal(FARGS,
3035                   "Unexpected/corrupted block data in file %s around time %f",
3036                   filename, start_time);
3037     }
3038
3039     snew(s, 1);
3040     *nsamples = 1;
3041
3042     snew(foreign_lambda, 1);
3043     lambda_vec_init(foreign_lambda, native_lambda->lc);
3044     lambda_vec_copy(foreign_lambda, native_lambda);
3045     type = static_cast<int>(blk->sub[1].lval[1]);
3046     if (type == dhbtDH)
3047     {
3048         double old_foreign_lambda;
3049
3050         old_foreign_lambda = blk->sub[0].dval[0];
3051         if (old_foreign_lambda >= 0)
3052         {
3053             foreign_lambda->val[0] = old_foreign_lambda;
3054             if (foreign_lambda->lc->N > 1)
3055             {
3056                 gmx_fatal(FARGS,
3057                           "Single-component lambda in multi-component file %s",
3058                           filename);
3059             }
3060         }
3061         else
3062         {
3063             for (i = 0; i < native_lambda->lc->N; i++)
3064             {
3065                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3066             }
3067         }
3068     }
3069     else
3070     {
3071         if (foreign_lambda->lc->N > 1)
3072         {
3073             if (blk->sub[1].nr < 3 + nhist)
3074             {
3075                 gmx_fatal(FARGS,
3076                           "Missing derivative coord in multi-component file %s",
3077                           filename);
3078             }
3079             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3080         }
3081         else
3082         {
3083             foreign_lambda->dhdl = 0;
3084         }
3085     }
3086
3087     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3088                  filename);
3089     snew(s->hist, 1);
3090
3091     for (i = 0; i < nhist; i++)
3092     {
3093         nbins[i] = blk->sub[i+2].nr;
3094     }
3095
3096     hist_init(s->hist, nhist, nbins);
3097
3098     for (i = 0; i < nhist; i++)
3099     {
3100         s->hist->x0[i] = blk->sub[1].lval[2+i];
3101         s->hist->dx[i] = blk->sub[0].dval[1];
3102         if (i == 1)
3103         {
3104             s->hist->dx[i] = -s->hist->dx[i];
3105         }
3106     }
3107
3108     s->hist->start_time = start_time;
3109     s->hist->delta_time = delta_time;
3110     s->start_time       = start_time;
3111     s->delta_time       = delta_time;
3112
3113     for (i = 0; i < nhist; i++)
3114     {
3115         gmx_int64_t     sum = 0;
3116
3117         for (j = 0; j < s->hist->nbin[i]; j++)
3118         {
3119             int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3120
3121             s->hist->bin[i][j] = binv;
3122             sum               += binv;
3123
3124         }
3125         if (i == 0)
3126         {
3127             s->ntot      = sum;
3128             s->hist->sum = sum;
3129         }
3130         else
3131         {
3132             if (s->ntot != sum)
3133             {
3134                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3135                           filename);
3136             }
3137         }
3138     }
3139
3140     if (start_time + s->hist->sum*delta_time > *last_t)
3141     {
3142         *last_t = start_time + s->hist->sum*delta_time;
3143     }
3144     return s;
3145 }
3146
3147
3148 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3149 {
3150     int            i, j;
3151     ener_file_t    fp;
3152     t_enxframe    *fr;
3153     int            nre;
3154     gmx_enxnm_t   *enm           = nullptr;
3155     double         first_t       = -1;
3156     double         last_t        = -1;
3157     samples_t    **samples_rawdh = nullptr; /* contains samples for raw delta_h  */
3158     int           *nhists        = nullptr; /* array to keep count & print at end */
3159     int           *npts          = nullptr; /* array to keep count & print at end */
3160     lambda_vec_t **lambdas       = nullptr; /* array to keep count & print at end */
3161     lambda_vec_t  *native_lambda;
3162     int            nsamples = 0;
3163     lambda_vec_t   start_lambda;
3164
3165     fp = open_enx(fn, "r");
3166     do_enxnms(fp, &nre, &enm);
3167     snew(fr, 1);
3168
3169     snew(native_lambda, 1);
3170     start_lambda.lc = nullptr;
3171
3172     while (do_enx(fp, fr))
3173     {
3174         /* count the data blocks */
3175         int    nblocks_raw  = 0;
3176         int    nblocks_hist = 0;
3177         int    nlam         = 0;
3178         int    k;
3179         /* DHCOLL block information: */
3180         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3181         double rtemp      = 0;
3182
3183         /* count the blocks and handle collection information: */
3184         for (i = 0; i < fr->nblock; i++)
3185         {
3186             if (fr->block[i].id == enxDHHIST)
3187             {
3188                 nblocks_hist++;
3189             }
3190             if (fr->block[i].id == enxDH)
3191             {
3192                 nblocks_raw++;
3193             }
3194             if (fr->block[i].id == enxDHCOLL)
3195             {
3196                 nlam++;
3197                 if ( (fr->block[i].nsub < 1) ||
3198                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3199                      (fr->block[i].sub[0].nr < 5))
3200                 {
3201                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3202                 }
3203
3204                 /* read the data from the DHCOLL block */
3205                 rtemp            = fr->block[i].sub[0].dval[0];
3206                 start_time       = fr->block[i].sub[0].dval[1];
3207                 delta_time       = fr->block[i].sub[0].dval[2];
3208                 old_start_lambda = fr->block[i].sub[0].dval[3];
3209                 delta_lambda     = fr->block[i].sub[0].dval[4];
3210
3211                 if (delta_lambda > 0)
3212                 {
3213                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3214                 }
3215                 if ( ( *temp != rtemp) && (*temp > 0) )
3216                 {
3217                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3218                 }
3219                 *temp = rtemp;
3220
3221                 if (old_start_lambda >= 0)
3222                 {
3223                     if (sd->lc.N > 0)
3224                     {
3225                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3226                         {
3227                             gmx_fatal(FARGS,
3228                                       "lambda vector components in %s don't match those previously read",
3229                                       fn);
3230                         }
3231                     }
3232                     else
3233                     {
3234                         lambda_components_add(&(sd->lc), "", 0);
3235                     }
3236                     if (!start_lambda.lc)
3237                     {
3238                         lambda_vec_init(&start_lambda, &(sd->lc));
3239                     }
3240                     start_lambda.val[0] = old_start_lambda;
3241                 }
3242                 else
3243                 {
3244                     /* read lambda vector */
3245                     int      n_lambda_vec;
3246                     gmx_bool check = (sd->lc.N > 0);
3247                     if (fr->block[i].nsub < 2)
3248                     {
3249                         gmx_fatal(FARGS,
3250                                   "No lambda vector, but start_lambda=%f\n",
3251                                   old_start_lambda);
3252                     }
3253                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3254                     for (j = 0; j < n_lambda_vec; j++)
3255                     {
3256                         const char *name =
3257                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3258                         if (check)
3259                         {
3260                             /* check the components */
3261                             lambda_components_check(&(sd->lc), j, name,
3262                                                     std::strlen(name));
3263                         }
3264                         else
3265                         {
3266                             lambda_components_add(&(sd->lc), name,
3267                                                   std::strlen(name));
3268                         }
3269                     }
3270                     lambda_vec_init(&start_lambda, &(sd->lc));
3271                     start_lambda.index = fr->block[i].sub[1].ival[0];
3272                     for (j = 0; j < n_lambda_vec; j++)
3273                     {
3274                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3275                     }
3276                 }
3277                 if (first_t < 0)
3278                 {
3279                     first_t = start_time;
3280                 }
3281             }
3282         }
3283
3284         if (nlam != 1)
3285         {
3286             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3287         }
3288         if (nblocks_raw > 0 && nblocks_hist > 0)
3289         {
3290             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3291         }
3292
3293         if (nsamples == 0)
3294         {
3295             /* this is the first round; allocate the associated data
3296                structures */
3297             /*native_lambda=start_lambda;*/
3298             lambda_vec_init(native_lambda, &(sd->lc));
3299             lambda_vec_copy(native_lambda, &start_lambda);
3300             nsamples = nblocks_raw+nblocks_hist;
3301             snew(nhists, nsamples);
3302             snew(npts, nsamples);
3303             snew(lambdas, nsamples);
3304             snew(samples_rawdh, nsamples);
3305             for (i = 0; i < nsamples; i++)
3306             {
3307                 nhists[i]        = 0;
3308                 npts[i]          = 0;
3309                 lambdas[i]       = nullptr;
3310                 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3311                                                ones contain values */
3312             }
3313         }
3314         else
3315         {
3316             // nsamples > 0 means this is NOT the first iteration
3317
3318             /* check the native lambda */
3319             if (!lambda_vec_same(&start_lambda, native_lambda) )
3320             {
3321                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3322                           fn, native_lambda, start_lambda, start_time);
3323             }
3324             /* check the number of samples against the previous number */
3325             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3326             {
3327                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3328                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3329             }
3330             /* check whether last iterations's end time matches with
3331                the currrent start time */
3332             if ( (std::abs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3333             {
3334                 /* it didn't. We need to store our samples and reallocate */
3335
3336                 for (i = 0; i < nsamples; i++)
3337                 {
3338                     // nsamples is always >0 here, so samples_rawdh must be a valid pointer. Unfortunately
3339                     // cppcheck does not understand the logic unless the assert is inside the loop, but
3340                     // this is not performance-sensitive code.
3341                     GMX_RELEASE_ASSERT(samples_rawdh != nullptr, "samples_rawdh==NULL with nsamples>0");
3342                     if (samples_rawdh[i])
3343                     {
3344                         /* insert it into the existing list */
3345                         lambda_data_list_insert_sample(sd->lb,
3346                                                        samples_rawdh[i]);
3347                         /* and make sure we'll allocate a new one this time
3348                            around */
3349                         samples_rawdh[i] = nullptr;
3350                     }
3351                 }
3352             }
3353         }
3354
3355         /* and read them */
3356         k = 0; /* counter for the lambdas, etc. arrays */
3357         for (i = 0; i < fr->nblock; i++)
3358         {
3359             if (fr->block[i].id == enxDH)
3360             {
3361                 int type = (fr->block[i].sub[0].ival[0]);
3362                 if (type == dhbtDH || type == dhbtDHDL)
3363                 {
3364                     int ndu;
3365                     read_edr_rawdh_block(&(samples_rawdh[k]),
3366                                          &ndu,
3367                                          &(fr->block[i]),
3368                                          start_time, delta_time,
3369                                          native_lambda, rtemp,
3370                                          &last_t, fn);
3371                     npts[k] += ndu;
3372                     if (samples_rawdh[k])
3373                     {
3374                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3375                     }
3376                     k++;
3377                 }
3378             }
3379             else if (fr->block[i].id == enxDHHIST)
3380             {
3381                 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3382                 if (type == dhbtDH || type == dhbtDHDL)
3383                 {
3384                     int        j;
3385                     int        nb = 0;
3386                     samples_t *s; /* this is where the data will go */
3387                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3388                                             start_time, delta_time,
3389                                             native_lambda, rtemp,
3390                                             &last_t, fn);
3391                     nhists[k] += nb;
3392                     if (nb > 0)
3393                     {
3394                         lambdas[k] = s->foreign_lambda;
3395                     }
3396                     k++;
3397                     /* and insert the new sample immediately */
3398                     for (j = 0; j < nb; j++)
3399                     {
3400                         lambda_data_list_insert_sample(sd->lb, s+j);
3401                     }
3402                 }
3403             }
3404         }
3405     }
3406     /* Now store all our extant sample collections */
3407     for (i = 0; i < nsamples; i++)
3408     {
3409         if (samples_rawdh[i])
3410         {
3411             /* insert it into the existing list */
3412             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3413         }
3414     }
3415
3416
3417     {
3418         char buf[STRLEN];
3419         printf("\n");
3420         lambda_vec_print(native_lambda, buf, FALSE);
3421         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3422                fn, first_t, last_t, buf);
3423         for (i = 0; i < nsamples; i++)
3424         {
3425             if (lambdas[i])
3426             {
3427                 lambda_vec_print(lambdas[i], buf, TRUE);
3428                 if (nhists[i] > 0)
3429                 {
3430                     printf("        %s (%d hists)\n", buf, nhists[i]);
3431                 }
3432                 else
3433                 {
3434                     printf("        %s (%d pts)\n", buf, npts[i]);
3435                 }
3436             }
3437         }
3438     }
3439     printf("\n\n");
3440     sfree(npts);
3441     sfree(nhists);
3442     sfree(lambdas);
3443 }
3444
3445
3446 int gmx_bar(int argc, char *argv[])
3447 {
3448     static const char *desc[] = {
3449         "[THISMODULE] calculates free energy difference estimates through ",
3450         "Bennett's acceptance ratio method (BAR). It also automatically",
3451         "adds series of individual free energies obtained with BAR into",
3452         "a combined free energy estimate.[PAR]",
3453
3454         "Every individual BAR free energy difference relies on two ",
3455         "simulations at different states: say state A and state B, as",
3456         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3457         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3458         "average of the Hamiltonian difference of state B given state A and",
3459         "vice versa.",
3460         "The energy differences to the other state must be calculated",
3461         "explicitly during the simulation. This can be done with",
3462         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3463
3464         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3465         "Two types of input files are supported:",
3466         "",
3467         " * Files with more than one [IT]y[it]-value. ",
3468         "   The files should have columns ",
3469         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3470         "   The [GRK]lambda[grk] values are inferred ",
3471         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3472         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3473         "   legends of Delta H",
3474         " * Files with only one [IT]y[it]-value. Using the",
3475         "   [TT]-extp[tt] option for these files, it is assumed",
3476         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3477         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3478         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3479         "   subtitle (if present), otherwise from a number in the subdirectory ",
3480         "   in the file name.",
3481         "",
3482
3483         "The [GRK]lambda[grk] of the simulation is parsed from ",
3484         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3485         "foreign [GRK]lambda[grk] values from the legend containing the ",
3486         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3487         "the legend line containing 'T ='.[PAR]",
3488
3489         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3490         "These can contain either lists of energy differences (see the ",
3491         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3492         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3493         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3494         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3495
3496         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3497         "the energy difference can also be extrapolated from the ",
3498         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3499         "option, which assumes that the system's Hamiltonian depends linearly",
3500         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3501
3502         "The free energy estimates are determined using BAR with bisection, ",
3503         "with the precision of the output set with [TT]-prec[tt]. ",
3504         "An error estimate taking into account time correlations ",
3505         "is made by splitting the data into blocks and determining ",
3506         "the free energy differences over those blocks and assuming ",
3507         "the blocks are independent. ",
3508         "The final error estimate is determined from the average variance ",
3509         "over 5 blocks. A range of block numbers for error estimation can ",
3510         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3511
3512         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3513         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3514         "samples. [BB]Note[bb] that when aggregating energy ",
3515         "differences/derivatives with different sampling intervals, this is ",
3516         "almost certainly not correct. Usually subsequent energies are ",
3517         "correlated and different time intervals mean different degrees ",
3518         "of correlation between samples.[PAR]",
3519
3520         "The results are split in two parts: the last part contains the final ",
3521         "results in kJ/mol, together with the error estimate for each part ",
3522         "and the total. The first part contains detailed free energy ",
3523         "difference estimates and phase space overlap measures in units of ",
3524         "kT (together with their computed error estimate). The printed ",
3525         "values are:",
3526         "",
3527         " * lam_A: the [GRK]lambda[grk] values for point A.",
3528         " * lam_B: the [GRK]lambda[grk] values for point B.",
3529         " *    DG: the free energy estimate.",
3530         " *   s_A: an estimate of the relative entropy of B in A.",
3531         " *   s_B: an estimate of the relative entropy of A in B.",
3532         " * stdev: an estimate expected per-sample standard deviation.",
3533         "",
3534
3535         "The relative entropy of both states in each other's ensemble can be ",
3536         "interpreted as a measure of phase space overlap: ",
3537         "the relative entropy s_A of the work samples of lambda_B in the ",
3538         "ensemble of lambda_A (and vice versa for s_B), is a ",
3539         "measure of the 'distance' between Boltzmann distributions of ",
3540         "the two states, that goes to zero for identical distributions. See ",
3541         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3542         "[PAR]",
3543         "The estimate of the expected per-sample standard deviation, as given ",
3544         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3545         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3546         "of the actual statistical error, because it assumes independent samples).[PAR]",
3547
3548         "To get a visual estimate of the phase space overlap, use the ",
3549         "[TT]-oh[tt] option to write series of histograms, together with the ",
3550         "[TT]-nbin[tt] option.[PAR]"
3551     };
3552     static real        begin    = 0, end = -1, temp = -1;
3553     int                nd       = 2, nbmin = 5, nbmax = 5;
3554     int                nbin     = 100;
3555     gmx_bool           use_dhdl = FALSE;
3556     t_pargs            pa[]     = {
3557         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3558         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3559         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3560         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3561         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3562         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3563         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3564         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3565     };
3566
3567     t_filenm           fnm[] = {
3568         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3569         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3570         { efXVG, "-o",  "bar",    ffOPTWR },
3571         { efXVG, "-oi", "barint", ffOPTWR },
3572         { efXVG, "-oh", "histogram", ffOPTWR }
3573     };
3574 #define NFILE asize(fnm)
3575
3576     int               f;
3577     int               nf = 0;    /* file counter */
3578     int               nfile_tot; /* total number of input files */
3579     sim_data_t        sim_data;  /* the simulation data */
3580     barres_t         *results;   /* the results */
3581     int               nresults;  /* number of results in results array */
3582
3583     double           *partsum;
3584     double            prec, dg_tot;
3585     FILE             *fpb, *fpi;
3586     char              dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3587     char              buf[STRLEN], buf2[STRLEN];
3588     char              ktformat[STRLEN], sktformat[STRLEN];
3589     char              kteformat[STRLEN], skteformat[STRLEN];
3590     gmx_output_env_t *oenv;
3591     double            kT;
3592     gmx_bool          result_OK = TRUE, bEE = TRUE;
3593
3594     gmx_bool          disc_err          = FALSE;
3595     double            sum_disc_err      = 0.; /* discretization error */
3596     gmx_bool          histrange_err     = FALSE;
3597     double            sum_histrange_err = 0.; /* histogram range error */
3598     double            stat_err          = 0.; /* statistical error */
3599
3600     if (!parse_common_args(&argc, argv,
3601                            PCA_CAN_VIEW,
3602                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3603     {
3604         return 0;
3605     }
3606
3607     gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3608     gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3609
3610     sim_data_init(&sim_data);
3611 #if 0
3612     /* make linked list */
3613     lb = &lambda_head;
3614     lambda_data_init(lb, 0, 0);
3615     lb->next = lb;
3616     lb->prev = lb;
3617 #endif
3618
3619
3620     nfile_tot = xvgFiles.size() + edrFiles.size();
3621
3622     if (nfile_tot == 0)
3623     {
3624         gmx_fatal(FARGS, "No input files!");
3625     }
3626
3627     if (nd < 0)
3628     {
3629         gmx_fatal(FARGS, "Can not have negative number of digits");
3630     }
3631     prec = std::pow(10.0, static_cast<double>(-nd));
3632
3633     snew(partsum, (nbmax+1)*(nbmax+1));
3634     nf = 0;
3635
3636     /* read in all files. First xvg files */
3637     for (const std::string &filenm : xvgFiles)
3638     {
3639         read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3640         nf++;
3641     }
3642     /* then .edr files */
3643     for (const std::string &filenm : edrFiles)
3644     {
3645         read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3646         nf++;
3647     }
3648
3649     /* fix the times to allow for equilibration */
3650     sim_data_impose_times(&sim_data, begin, end);
3651
3652     if (opt2bSet("-oh", NFILE, fnm))
3653     {
3654         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3655     }
3656
3657     /* assemble the output structures from the lambdas */
3658     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3659
3660     sum_disc_err = barres_list_max_disc_err(results, nresults);
3661
3662     if (nresults == 0)
3663     {
3664         printf("\nNo results to calculate.\n");
3665         return 0;
3666     }
3667
3668     if (sum_disc_err > prec)
3669     {
3670         prec = sum_disc_err;
3671         nd   = static_cast<int>(std::ceil(-std::log10(prec)));
3672         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3673     }
3674
3675
3676     /*sprintf(lamformat,"%%6.3f");*/
3677     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3678     /* the format strings of the results in kT */
3679     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3680     sprintf( sktformat, "%%%ds", 6+nd);
3681     /* the format strings of the errors in kT */
3682     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3683     sprintf( skteformat, "%%%ds", 4+nd);
3684     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3685     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3686
3687
3688
3689     fpb = nullptr;
3690     if (opt2bSet("-o", NFILE, fnm))
3691     {
3692         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3693         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3694                             "\\lambda", buf, exvggtXYDY, oenv);
3695     }
3696
3697     fpi = nullptr;
3698     if (opt2bSet("-oi", NFILE, fnm))
3699     {
3700         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3701         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3702                        "\\lambda", buf, oenv);
3703     }
3704
3705
3706
3707     if (nbmin > nbmax)
3708     {
3709         nbmin = nbmax;
3710     }
3711
3712     /* first calculate results */
3713     bEE      = TRUE;
3714     disc_err = FALSE;
3715     for (f = 0; f < nresults; f++)
3716     {
3717         /* Determine the free energy difference with a factor of 10
3718          * more accuracy than requested for printing.
3719          */
3720         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3721                  &bEE, partsum);
3722
3723         if (results[f].dg_disc_err > prec/10.)
3724         {
3725             disc_err = TRUE;
3726         }
3727         if (results[f].dg_histrange_err > prec/10.)
3728         {
3729             histrange_err = TRUE;
3730         }
3731     }
3732
3733     /* print results in kT */
3734     kT   = BOLTZ*temp;
3735
3736     printf("\nTemperature: %g K\n", temp);
3737
3738     printf("\nDetailed results in kT (see help for explanation):\n\n");
3739     printf("%6s ", " lam_A");
3740     printf("%6s ", " lam_B");
3741     printf(sktformat,  "DG ");
3742     if (bEE)
3743     {
3744         printf(skteformat, "+/- ");
3745     }
3746     if (disc_err)
3747     {
3748         printf(skteformat, "disc ");
3749     }
3750     if (histrange_err)
3751     {
3752         printf(skteformat, "range ");
3753     }
3754     printf(sktformat,  "s_A ");
3755     if (bEE)
3756     {
3757         printf(skteformat, "+/- " );
3758     }
3759     printf(sktformat,  "s_B ");
3760     if (bEE)
3761     {
3762         printf(skteformat, "+/- " );
3763     }
3764     printf(sktformat,  "stdev ");
3765     if (bEE)
3766     {
3767         printf(skteformat, "+/- ");
3768     }
3769     printf("\n");
3770     for (f = 0; f < nresults; f++)
3771     {
3772         lambda_vec_print_short(results[f].a->native_lambda, buf);
3773         printf("%s ", buf);
3774         lambda_vec_print_short(results[f].b->native_lambda, buf);
3775         printf("%s ", buf);
3776         printf(ktformat,  results[f].dg);
3777         printf(" ");
3778         if (bEE)
3779         {
3780             printf(kteformat, results[f].dg_err);
3781             printf(" ");
3782         }
3783         if (disc_err)
3784         {
3785             printf(kteformat, results[f].dg_disc_err);
3786             printf(" ");
3787         }
3788         if (histrange_err)
3789         {
3790             printf(kteformat, results[f].dg_histrange_err);
3791             printf(" ");
3792         }
3793         printf(ktformat,  results[f].sa);
3794         printf(" ");
3795         if (bEE)
3796         {
3797             printf(kteformat, results[f].sa_err);
3798             printf(" ");
3799         }
3800         printf(ktformat,  results[f].sb);
3801         printf(" ");
3802         if (bEE)
3803         {
3804             printf(kteformat, results[f].sb_err);
3805             printf(" ");
3806         }
3807         printf(ktformat,  results[f].dg_stddev);
3808         printf(" ");
3809         if (bEE)
3810         {
3811             printf(kteformat, results[f].dg_stddev_err);
3812         }
3813         printf("\n");
3814
3815         /* Check for negative relative entropy with a 95% certainty. */
3816         if (results[f].sa < -2*results[f].sa_err ||
3817             results[f].sb < -2*results[f].sb_err)
3818         {
3819             result_OK = FALSE;
3820         }
3821     }
3822
3823     if (!result_OK)
3824     {
3825         printf("\nWARNING: Some of these results violate the Second Law of "
3826                "Thermodynamics: \n"
3827                "         This is can be the result of severe undersampling, or "
3828                "(more likely)\n"
3829                "         there is something wrong with the simulations.\n");
3830     }
3831
3832
3833     /* final results in kJ/mol */
3834     printf("\n\nFinal results in kJ/mol:\n\n");
3835     dg_tot  = 0;
3836     for (f = 0; f < nresults; f++)
3837     {
3838
3839         if (fpi != nullptr)
3840         {
3841             lambda_vec_print_short(results[f].a->native_lambda, buf);
3842             fprintf(fpi, xvg2format, buf, dg_tot);
3843         }
3844
3845
3846         if (fpb != nullptr)
3847         {
3848             lambda_vec_print_intermediate(results[f].a->native_lambda,
3849                                           results[f].b->native_lambda,
3850                                           buf);
3851
3852             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3853         }
3854
3855         printf("point ");
3856         lambda_vec_print_short(results[f].a->native_lambda, buf);
3857         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3858         printf("%s - %s", buf, buf2);
3859         printf(",   DG ");
3860
3861         printf(dgformat, results[f].dg*kT);
3862         if (bEE)
3863         {
3864             printf(" +/- ");
3865             printf(dgformat, results[f].dg_err*kT);
3866         }
3867         if (histrange_err)
3868         {
3869             printf(" (max. range err. = ");
3870             printf(dgformat, results[f].dg_histrange_err*kT);
3871             printf(")");
3872             sum_histrange_err += results[f].dg_histrange_err*kT;
3873         }
3874
3875         printf("\n");
3876         dg_tot += results[f].dg;
3877     }
3878     printf("\n");
3879     printf("total ");
3880     lambda_vec_print_short(results[0].a->native_lambda, buf);
3881     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3882     printf("%s - %s", buf, buf2);
3883     printf(",   DG ");
3884
3885     printf(dgformat, dg_tot*kT);
3886     if (bEE)
3887     {
3888         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3889         printf(" +/- ");
3890         printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3891     }
3892     printf("\n");
3893     if (disc_err)
3894     {
3895         printf("\nmaximum discretization error = ");
3896         printf(dgformat, sum_disc_err);
3897         if (bEE && stat_err < sum_disc_err)
3898         {
3899             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3900         }
3901     }
3902     if (histrange_err)
3903     {
3904         printf("\nmaximum histogram range error = ");
3905         printf(dgformat, sum_histrange_err);
3906         if (bEE && stat_err < sum_histrange_err)
3907         {
3908             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3909         }
3910
3911     }
3912     printf("\n");
3913
3914
3915     if (fpi != nullptr)
3916     {
3917         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3918         fprintf(fpi, xvg2format, buf, dg_tot);
3919         xvgrclose(fpi);
3920     }
3921     if (fpb != nullptr)
3922     {
3923         xvgrclose(fpb);
3924     }
3925
3926     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3927     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3928
3929     return 0;
3930 }