Fixed typos in output.
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blob217457640e6d8616bd02640611574c38eee14a54
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \internal \file
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
43 #include "gmxpre.h"
45 #include "config.h"
47 #include <cctype>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
53 #include <algorithm>
54 #include <sstream>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/copyrite.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/random/random.h"
65 #include "gromacs/utility/arraysize.h"
66 #include "gromacs/utility/cstringutil.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/fatalerror.h"
69 #include "gromacs/utility/futil.h"
70 #include "gromacs/utility/gmxomp.h"
71 #include "gromacs/utility/smalloc.h"
73 //! longest file names allowed in input files
74 #define WHAM_MAXFILELEN 2048
76 /*! \brief
77 * x-axis legend for output files
79 static const char *xlabel = "\\xx\\f{} (nm)";
81 /*! \brief
82 * enum for energy units
84 enum {
85 enSel, en_kJ, en_kCal, en_kT, enNr
87 /*! \brief
88 * enum for type of input files (pdos, tpr, or pullf)
90 enum {
91 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
94 /*! \brief enum for bootstrapping method
96 * These bootstrap methods are supported:
97 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
98 * (bsMethod_BayesianHist)
99 * - bootstrap complete histograms (bsMethod_hist)
100 * - bootstrap trajectories from given umbrella histograms. This generates new
101 * "synthetic" histograms (bsMethod_traj)
102 * - bootstrap trajectories from Gaussian with mu/sigma computed from
103 * the respective histogram (bsMethod_trajGauss). This gives very similar
104 * results compared to bsMethod_traj.
106 * ********************************************************************
107 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
108 * JS Hub, BL de Groot, D van der Spoel
109 * g_wham - A free weighted histogram analysis implementation including
110 * robust error and autocorrelation estimates,
111 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
112 * ********************************************************************
114 enum {
115 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
116 bsMethod_traj, bsMethod_trajGauss
120 //! Parameters of the umbrella potentials
121 typedef struct
124 * \name Using umbrella pull code since gromacs 4.x
126 /*!\{*/
127 int npullcrds_tot; //!< nr of pull coordinates in tpr file
128 int npullcrds; //!< nr of umbrella pull coordinates for reading
129 int pull_geometry; //!< such as distance, direction
130 ivec pull_dim; //!< pull dimension with geometry distance
131 int pull_ndim; //!< nr of pull_dim != 0
132 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
133 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
134 real *k; //!< force constants in tpr file
135 real *init_dist; //!< reference displacements
136 real *umbInitDist; //!< reference displacement in umbrella direction
137 /*!\}*/
139 * \name Using PDO files common until gromacs 3.x
141 /*!\{*/
142 int nSkip;
143 char Reference[256];
144 int nPull;
145 int nDim;
146 ivec Dims;
147 char PullName[4][256];
148 double UmbPos[4][3];
149 double UmbCons[4][3];
150 /*!\}*/
151 } t_UmbrellaHeader;
153 //! Data in the umbrella histograms
154 typedef struct
156 int nPull; //!< nr of pull groups in this pdo or pullf/x file
157 double **Histo; //!< nPull histograms
158 double **cum; //!< nPull cumulative distribution functions
159 int nBin; //!< nr of bins. identical to opt->bins
160 double *k; //!< force constants for the nPull groups
161 double *pos; //!< umbrella positions for the nPull groups
162 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
163 int *N; //!< nr of data points in nPull histograms.
164 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
166 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
168 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
169 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
171 double *g;
172 double *tau; //!< intetrated autocorrelation time (IACT)
173 double *tausmooth; //!< smoothed IACT
175 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
177 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
178 gmx_bool **bContrib;
179 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
181 /*! \brief average force estimated from average displacement, fAv=dzAv*k
183 * Used for integration to guess the potential.
185 real *forceAv;
186 real *aver; //!< average of histograms
187 real *sigma; //!< stddev of histograms
188 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
189 } t_UmbrellaWindow;
191 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
192 typedef struct
194 int n; //!< total nr of pull groups in this tpr file
195 int nUse; //!< nr of pull groups used
196 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
197 } t_groupselection;
199 //! Parameters of WHAM
200 typedef struct
203 * \name Input stuff
205 /*!\{*/
206 const char *fnTpr, *fnPullf, *fnGroupsel;
207 const char *fnPdo, *fnPullx; //!< file names of input
208 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
209 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
211 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
212 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
213 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
214 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
215 /*!\}*/
217 * \name Basic WHAM options
219 /*!\{*/
220 int bins; //!< nr of bins, min, max, and dz of profile
221 real min, max, dz;
222 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
223 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
224 /*!\}*/
226 * \name Output control
228 /*!\{*/
229 gmx_bool bLog; //!< energy output (instead of probability) for profile
230 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
231 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
232 /*! \brief after wham, set prof to zero at this z-position.
233 * When bootstrapping, set zProf0 to a "stable" reference position.
235 real zProf0;
236 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
238 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
239 gmx_bool bAuto; //!< determine min and max automatically but do not exit
241 gmx_bool verbose; //!< more noisy wham mode
242 int stepchange; //!< print maximum change in prof after how many interations
243 gmx_output_env_t *oenv; //!< xvgr options
244 /*!\}*/
246 * \name Autocorrelation stuff
248 /*!\{*/
249 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
250 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
251 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
252 real acTrestart; //!< when computing ACT, time between restarting points
254 /* \brief Enforce the same weight for each umbella window, that is
255 * calculate with the same number of data points for
256 * each window. That can be reasonable, if the histograms
257 * have different length, but due to autocorrelation,
258 * a longer simulation should not have larger weightin wham.
260 gmx_bool bHistEq;
261 /*!\}*/
264 * \name Bootstrapping stuff
266 /*!\{*/
267 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
269 /* \brief bootstrap method
271 * if == bsMethod_hist, consider complete histograms as independent
272 * data points and, hence, only mix complete histograms.
273 * if == bsMethod_BayesianHist, consider complete histograms
274 * as independent data points, but assign random weights
275 * to the histograms during the bootstrapping ("Bayesian bootstrap")
276 * In case of long correlations (e.g., inside a channel), these
277 * will yield a more realistic error.
278 * if == bsMethod_traj(Gauss), generate synthetic histograms
279 * for each given
280 * histogram by generating an autocorrelated random sequence
281 * that is distributed according to the respective given
282 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
283 * (instead of from the umbrella histogram) to generate a new
284 * histogram.
286 int bsMethod;
288 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
289 real tauBootStrap;
291 /* \brief when mixing histograms, mix only histograms withing blocks
292 long the reaction coordinate xi. Avoids gaps along xi. */
293 int histBootStrapBlockLength;
295 int bsSeed; //!< random seed for bootstrapping
297 /* \brief Write cumulative distribution functions (CDFs) of histograms
298 and write the generated histograms for each bootstrap */
299 gmx_bool bs_verbose;
300 /*!\}*/
302 * \name tabulated umbrella potential stuff
304 /*!\{*/
305 gmx_bool bTab;
306 double *tabX, *tabY, tabMin, tabMax, tabDz;
307 int tabNbins;
308 /*!\}*/
309 gmx_rng_t rng; //!< gromacs random number generator
310 } t_UmbrellaOptions;
312 //! Make an umbrella window (may contain several histograms)
313 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
315 t_UmbrellaWindow *win;
316 int i;
317 snew(win, nwin);
318 for (i = 0; i < nwin; i++)
320 win[i].Histo = win[i].cum = 0;
321 win[i].k = win[i].pos = win[i].z = 0;
322 win[i].N = win[i].Ntot = 0;
323 win[i].g = win[i].tau = win[i].tausmooth = 0;
324 win[i].bContrib = 0;
325 win[i].ztime = 0;
326 win[i].forceAv = 0;
327 win[i].aver = win[i].sigma = 0;
328 win[i].bsWeight = 0;
330 return win;
333 //! Delete an umbrella window (may contain several histograms)
334 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
336 int i, j;
337 for (i = 0; i < nwin; i++)
339 if (win[i].Histo)
341 for (j = 0; j < win[i].nPull; j++)
343 sfree(win[i].Histo[j]);
346 if (win[i].cum)
348 for (j = 0; j < win[i].nPull; j++)
350 sfree(win[i].cum[j]);
353 if (win[i].bContrib)
355 for (j = 0; j < win[i].nPull; j++)
357 sfree(win[i].bContrib[j]);
360 sfree(win[i].Histo);
361 sfree(win[i].cum);
362 sfree(win[i].k);
363 sfree(win[i].pos);
364 sfree(win[i].z);
365 sfree(win[i].N);
366 sfree(win[i].Ntot);
367 sfree(win[i].g);
368 sfree(win[i].tau);
369 sfree(win[i].tausmooth);
370 sfree(win[i].bContrib);
371 sfree(win[i].ztime);
372 sfree(win[i].forceAv);
373 sfree(win[i].aver);
374 sfree(win[i].sigma);
375 sfree(win[i].bsWeight);
377 sfree(win);
380 /*! \brief
381 * Read and setup tabulated umbrella potential
383 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
385 int i, ny, nl;
386 double **y;
388 printf("Setting up tabulated potential from file %s\n", fn);
389 nl = read_xvg(fn, &y, &ny);
390 opt->tabNbins = nl;
391 if (ny != 2)
393 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
395 opt->tabMin = y[0][0];
396 opt->tabMax = y[0][nl-1];
397 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
398 if (opt->tabDz <= 0)
400 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
401 "ascending z-direction", fn);
403 for (i = 0; i < nl-1; i++)
405 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
407 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
410 snew(opt->tabY, nl);
411 snew(opt->tabX, nl);
412 for (i = 0; i < nl; i++)
414 opt->tabX[i] = y[0][i];
415 opt->tabY[i] = y[1][i];
417 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
418 opt->tabMin, opt->tabMax, opt->tabDz);
421 //! Read the header of an PDO file (position, force const, nr of groups)
422 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
424 char line[2048];
425 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
426 int i;
427 std::istringstream ist;
429 /* line 1 */
430 if (fgets(line, 2048, file) == NULL)
432 gmx_fatal(FARGS, "Error reading header from pdo file\n");
434 ist.str(line);
435 ist >> Buffer0 >> Buffer1 >> Buffer2;
436 if (std::strcmp(Buffer1, "UMBRELLA"))
438 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
439 "(Found in first line: `%s')\n",
440 Buffer1, "UMBRELLA", line);
442 if (std::strcmp(Buffer2, "3.0"))
444 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
447 /* line 2 */
448 if (fgets(line, 2048, file) == NULL)
450 gmx_fatal(FARGS, "Error reading header from pdo file\n");
452 ist.str(line);
453 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
454 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
456 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
457 if (header->nDim != 1)
459 gmx_fatal(FARGS, "Currently only supports one dimension");
462 /* line3 */
463 if (fgets(line, 2048, file) == NULL)
465 gmx_fatal(FARGS, "Error reading header from pdo file\n");
467 ist.str(line);
468 ist >> Buffer0 >> Buffer1 >> header->nSkip;
470 /* line 4 */
471 if (fgets(line, 2048, file) == NULL)
473 gmx_fatal(FARGS, "Error reading header from pdo file\n");
475 ist.str(line);
476 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
478 /* line 5 */
479 if (fgets(line, 2048, file) == NULL)
481 gmx_fatal(FARGS, "Error reading header from pdo file\n");
483 ist.str(line);
484 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
486 if (opt->verbose)
488 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
489 header->Reference);
492 for (i = 0; i < header->nPull; ++i)
494 if (fgets(line, 2048, file) == NULL)
496 gmx_fatal(FARGS, "Error reading header from pdo file\n");
498 ist.str(line);
499 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
500 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
501 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
503 if (opt->verbose)
505 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
506 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
510 if (fgets(line, 2048, file) == NULL)
512 gmx_fatal(FARGS, "Cannot read from file\n");
514 ist.str(line);
515 ist >> Buffer3;
516 if (std::strcmp(Buffer3, "#####") != 0)
518 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
522 //! smarter fgets
523 static char *fgets3(FILE *fp, char ptr[], int *len)
525 char *p;
526 int slen;
528 if (fgets(ptr, *len-1, fp) == NULL)
530 return NULL;
532 p = ptr;
533 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
535 /* This line is longer than len characters, let's increase len! */
536 *len += STRLEN;
537 p += STRLEN;
538 srenew(ptr, *len);
539 if (fgets(p-1, STRLEN, fp) == NULL)
541 break;
544 slen = std::strlen(ptr);
545 if (ptr[slen-1] == '\n')
547 ptr[slen-1] = '\0';
550 return ptr;
553 /*! \brief Read the data columns of and PDO file.
555 * TO DO: Get rid of the scanf function to avoid the clang warning.
556 * At the moment, this warning is avoided by hiding the format string
557 * the variable fmtlf.
559 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
560 int fileno, t_UmbrellaWindow * win,
561 t_UmbrellaOptions *opt,
562 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
564 int i, inttemp, bins, count, ntot;
565 real minval, maxval, minfound = 1e20, maxfound = -1e20;
566 double temp, time, time0 = 0, dt;
567 char *ptr = 0;
568 t_UmbrellaWindow * window = 0;
569 gmx_bool timeok, dt_ok = 1;
570 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
571 int len = STRLEN, dstep = 1;
572 const int blocklen = 4096;
573 int *lennow = 0;
575 if (!bGetMinMax)
577 bins = opt->bins;
578 minval = opt->min;
579 maxval = opt->max;
581 window = win+fileno;
582 /* Need to alocate memory and set up structure */
583 window->nPull = header->nPull;
584 window->nBin = bins;
586 snew(window->Histo, window->nPull);
587 snew(window->z, window->nPull);
588 snew(window->k, window->nPull);
589 snew(window->pos, window->nPull);
590 snew(window->N, window->nPull);
591 snew(window->Ntot, window->nPull);
592 snew(window->g, window->nPull);
593 snew(window->bsWeight, window->nPull);
595 window->bContrib = 0;
597 if (opt->bCalcTauInt)
599 snew(window->ztime, window->nPull);
601 else
603 window->ztime = 0;
605 snew(lennow, window->nPull);
607 for (i = 0; i < window->nPull; ++i)
609 window->z[i] = 1;
610 window->bsWeight[i] = 1.;
611 snew(window->Histo[i], bins);
612 window->k[i] = header->UmbCons[i][0];
613 window->pos[i] = header->UmbPos[i][0];
614 window->N[i] = 0;
615 window->Ntot[i] = 0;
616 window->g[i] = 1.;
617 if (opt->bCalcTauInt)
619 window->ztime[i] = 0;
623 /* Done with setup */
625 else
627 minfound = 1e20;
628 maxfound = -1e20;
629 minval = maxval = bins = 0; /* Get rid of warnings */
632 count = 0;
633 snew(tmpbuf, len);
634 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
636 trim(ptr);
638 if (ptr[0] == '#' || std::strlen(ptr) < 2)
640 continue;
643 /* Initiate format string */
644 fmtign[0] = '\0';
645 std::strcat(fmtign, "%*s");
647 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
648 /* Round time to fs */
649 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
651 /* get time step of pdo file */
652 if (count == 0)
654 time0 = time;
656 else if (count == 1)
658 dt = time-time0;
659 if (opt->dt > 0.0)
661 dstep = static_cast<int>(opt->dt/dt+0.5);
662 if (dstep == 0)
664 dstep = 1;
667 if (!bGetMinMax)
669 window->dt = dt*dstep;
672 count++;
674 dt_ok = ((count-1)%dstep == 0);
675 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
676 /* if (opt->verbose)
677 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
678 time,opt->tmin, opt->tmax, dt_ok,timeok); */
680 if (timeok)
682 for (i = 0; i < header->nPull; ++i)
684 std::strcpy(fmt, fmtign);
685 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
686 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
687 if (sscanf(ptr, fmt, &temp))
689 temp += header->UmbPos[i][0];
690 if (bGetMinMax)
692 if (temp < minfound)
694 minfound = temp;
696 if (temp > maxfound)
698 maxfound = temp;
701 else
703 if (opt->bCalcTauInt)
705 /* save time series for autocorrelation analysis */
706 ntot = window->Ntot[i];
707 if (ntot >= lennow[i])
709 lennow[i] += blocklen;
710 srenew(window->ztime[i], lennow[i]);
712 window->ztime[i][ntot] = temp;
715 temp -= minval;
716 temp /= (maxval-minval);
717 temp *= bins;
718 temp = std::floor(temp);
720 inttemp = static_cast<int> (temp);
721 if (opt->bCycl)
723 if (inttemp < 0)
725 inttemp += bins;
727 else if (inttemp >= bins)
729 inttemp -= bins;
733 if (inttemp >= 0 && inttemp < bins)
735 window->Histo[i][inttemp] += 1.;
736 window->N[i]++;
738 window->Ntot[i]++;
743 if (time > opt->tmax)
745 if (opt->verbose)
747 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
749 break;
753 if (bGetMinMax)
755 *mintmp = minfound;
756 *maxtmp = maxfound;
759 sfree(lennow);
760 sfree(tmpbuf);
763 /*! \brief Set identical weights for all histograms
765 * Normally, the weight is given by the number data points in each
766 * histogram, together with the autocorrelation time. This can be overwritten
767 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
768 * an appropriate autocorrelation times instead of using this function.
770 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
772 int i, k, j, NEnforced;
773 double ratio;
775 NEnforced = window[0].Ntot[0];
776 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
777 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
778 /* enforce all histograms to have the same weight as the very first histogram */
780 for (j = 0; j < nWindows; ++j)
782 for (k = 0; k < window[j].nPull; ++k)
784 ratio = 1.0*NEnforced/window[j].Ntot[k];
785 for (i = 0; i < window[0].nBin; ++i)
787 window[j].Histo[k][i] *= ratio;
789 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
794 /*! \brief Simple linear interpolation between two given tabulated points
796 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
798 int jl, ju;
799 double pl, pu, dz, dp;
801 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
802 ju = jl+1;
803 if (jl < 0 || ju >= opt->tabNbins)
805 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
806 "Provide an extended table.", dist, jl, ju);
808 pl = opt->tabY[jl];
809 pu = opt->tabY[ju];
810 dz = dist-opt->tabX[jl];
811 dp = (pu-pl)*dz/opt->tabDz;
812 return pl+dp;
816 /*! \brief
817 * Check which bins substiantially contribute (accelerates WHAM)
819 * Don't worry, that routine does not mean we compute the PMF in limited precision.
820 * After rapid convergence (using only substiantal contributions), we always switch to
821 * full precision.
823 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
824 t_UmbrellaOptions *opt)
826 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
827 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
828 gmx_bool bAnyContrib;
829 static int bFirst = 1;
830 static double wham_contrib_lim;
832 if (bFirst)
834 for (i = 0; i < nWindows; ++i)
836 nGrptot += window[i].nPull;
838 wham_contrib_lim = opt->Tolerance/nGrptot;
841 ztot = opt->max-opt->min;
842 ztot_half = ztot/2;
844 for (i = 0; i < nWindows; ++i)
846 if (!window[i].bContrib)
848 snew(window[i].bContrib, window[i].nPull);
850 for (j = 0; j < window[i].nPull; ++j)
852 if (!window[i].bContrib[j])
854 snew(window[i].bContrib[j], opt->bins);
856 bAnyContrib = FALSE;
857 for (k = 0; k < opt->bins; ++k)
859 temp = (1.0*k+0.5)*dz+min;
860 distance = temp - window[i].pos[j]; /* distance to umbrella center */
861 if (opt->bCycl)
862 { /* in cyclic wham: */
863 if (distance > ztot_half) /* |distance| < ztot_half */
865 distance -= ztot;
867 else if (distance < -ztot_half)
869 distance += ztot;
872 /* Note: there are two contributions to bin k in the wham equations:
873 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
874 ii) exp(- U/(BOLTZ*opt->Temperature))
875 where U is the umbrella potential
876 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
879 if (!opt->bTab)
881 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
883 else
885 U = tabulated_pot(distance, opt); /* Use tabulated potential */
887 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
888 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
889 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
890 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
891 if (window[i].bContrib[j][k])
893 nContrib++;
895 nTot++;
897 /* If this histo is far outside min and max all bContrib may be FALSE,
898 causing a floating point exception later on. To avoid that, switch
899 them all to true.*/
900 if (!bAnyContrib)
902 for (k = 0; k < opt->bins; ++k)
904 window[i].bContrib[j][k] = TRUE;
909 if (bFirst)
911 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
912 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
915 if (opt->verbose)
917 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
918 nContrib, nTot);
920 bFirst = 0;
923 //! Compute the PMF (one of the two main WHAM routines)
924 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
925 t_UmbrellaOptions *opt, gmx_bool bExact)
927 double ztot_half, ztot, min = opt->min, dz = opt->dz;
929 ztot = opt->max-opt->min;
930 ztot_half = ztot/2;
932 #pragma omp parallel
936 int nthreads = gmx_omp_get_max_threads();
937 int thread_id = gmx_omp_get_thread_num();
938 int i;
939 int i0 = thread_id*opt->bins/nthreads;
940 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
942 for (i = i0; i < i1; ++i)
944 int j, k;
945 double num, denom, invg, temp = 0, distance, U = 0;
946 num = denom = 0.;
947 for (j = 0; j < nWindows; ++j)
949 for (k = 0; k < window[j].nPull; ++k)
951 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
952 temp = (1.0*i+0.5)*dz+min;
953 num += invg*window[j].Histo[k][i];
955 if (!(bExact || window[j].bContrib[k][i]))
957 continue;
959 distance = temp - window[j].pos[k]; /* distance to umbrella center */
960 if (opt->bCycl)
961 { /* in cyclic wham: */
962 if (distance > ztot_half) /* |distance| < ztot_half */
964 distance -= ztot;
966 else if (distance < -ztot_half)
968 distance += ztot;
972 if (!opt->bTab)
974 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
976 else
978 U = tabulated_pot(distance, opt); /* Use tabulated potential */
980 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
983 profile[i] = num/denom;
986 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
990 //! Compute the free energy offsets z (one of the two main WHAM routines)
991 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
992 t_UmbrellaOptions *opt, gmx_bool bExact)
994 double min = opt->min, dz = opt->dz, ztot_half, ztot;
995 double maxglob = -1e20;
997 ztot = opt->max-opt->min;
998 ztot_half = ztot/2;
1000 #pragma omp parallel
1004 int nthreads = gmx_omp_get_max_threads();
1005 int thread_id = gmx_omp_get_thread_num();
1006 int i;
1007 int i0 = thread_id*nWindows/nthreads;
1008 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1009 double maxloc = -1e20;
1011 for (i = i0; i < i1; ++i)
1013 double total = 0, temp, distance, U = 0;
1014 int j, k;
1016 for (j = 0; j < window[i].nPull; ++j)
1018 total = 0;
1019 for (k = 0; k < window[i].nBin; ++k)
1021 if (!(bExact || window[i].bContrib[j][k]))
1023 continue;
1025 temp = (1.0*k+0.5)*dz+min;
1026 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1027 if (opt->bCycl)
1028 { /* in cyclic wham: */
1029 if (distance > ztot_half) /* |distance| < ztot_half */
1031 distance -= ztot;
1033 else if (distance < -ztot_half)
1035 distance += ztot;
1039 if (!opt->bTab)
1041 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1043 else
1045 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1047 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1049 /* Avoid floating point exception if window is far outside min and max */
1050 if (total != 0.0)
1052 total = -std::log(total);
1054 else
1056 total = 1000.0;
1058 temp = std::abs(total - window[i].z[j]);
1059 if (temp > maxloc)
1061 maxloc = temp;
1063 window[i].z[j] = total;
1066 /* Now get maximum maxloc from the threads and put in maxglob */
1067 if (maxloc > maxglob)
1069 #pragma omp critical
1071 if (maxloc > maxglob)
1073 maxglob = maxloc;
1078 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1081 return maxglob;
1084 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1085 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1087 int i, j, bins = opt->bins;
1088 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1089 double z, z1;
1091 if (min > 0. || max < 0.)
1093 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1094 opt->min, opt->max);
1097 snew(prof2, bins);
1099 for (i = 0; i < bins; i++)
1101 z = min+(i+0.5)*dz;
1102 zsym = -z;
1103 /* bin left of zsym */
1104 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1105 if (j >= 0 && (j+1) < bins)
1107 /* interpolate profile linearly between bins j and j+1 */
1108 z1 = min+(j+0.5)*dz;
1109 deltaz = zsym-z1;
1110 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1111 /* average between left and right */
1112 prof2[i] = 0.5*(profsym+profile[i]);
1114 else
1116 prof2[i] = profile[i];
1120 std::memcpy(profile, prof2, bins*sizeof(double));
1121 sfree(prof2);
1124 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1125 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1127 int i, bins, imin;
1128 double unit_factor = 1., diff;
1130 bins = opt->bins;
1132 /* Not log? Nothing to do! */
1133 if (!opt->bLog)
1135 return;
1138 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1139 if (opt->unit == en_kT)
1141 unit_factor = 1.0;
1143 else if (opt->unit == en_kJ)
1145 unit_factor = BOLTZ*opt->Temperature;
1147 else if (opt->unit == en_kCal)
1149 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1151 else
1153 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1156 for (i = 0; i < bins; i++)
1158 if (profile[i] > 0.0)
1160 profile[i] = -std::log(profile[i])*unit_factor;
1164 /* shift to zero at z=opt->zProf0 */
1165 if (!opt->bProf0Set)
1167 diff = profile[0];
1169 else
1171 /* Get bin with shortest distance to opt->zProf0
1172 (-0.5 from bin position and +0.5 from rounding cancel) */
1173 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1174 if (imin < 0)
1176 imin = 0;
1178 else if (imin >= bins)
1180 imin = bins-1;
1182 diff = profile[imin];
1185 /* Shift to zero */
1186 for (i = 0; i < bins; i++)
1188 profile[i] -= diff;
1192 //! Make an array of random integers (used for bootstrapping)
1193 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1195 int ipull, blockBase, nr, ipullRandom;
1197 if (blockLength == 0)
1199 blockLength = nPull;
1202 for (ipull = 0; ipull < nPull; ipull++)
1204 blockBase = (ipull/blockLength)*blockLength;
1206 { /* make sure nothing bad happens in the last block */
1207 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1208 ipullRandom = blockBase + nr;
1210 while (ipullRandom >= nPull);
1211 if (ipullRandom < 0 || ipullRandom >= nPull)
1213 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1214 "blockLength = %d, blockBase = %d\n",
1215 ipullRandom, nPull, nr, blockLength, blockBase);
1217 randomArray[ipull] = ipullRandom;
1219 /*for (ipull=0; ipull<nPull; ipull++)
1220 printf("%d ",randomArray[ipull]); printf("\n"); */
1223 /*! \brief Set pull group information of a synthetic histogram
1225 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1226 * but it is not required if we bootstrap complete histograms.
1228 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1229 t_UmbrellaWindow *thisWindow, int pullid)
1231 synthWindow->N [0] = thisWindow->N [pullid];
1232 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1233 synthWindow->pos [0] = thisWindow->pos [pullid];
1234 synthWindow->z [0] = thisWindow->z [pullid];
1235 synthWindow->k [0] = thisWindow->k [pullid];
1236 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1237 synthWindow->g [0] = thisWindow->g [pullid];
1238 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1241 /*! \brief Calculate cumulative distribution function of of all histograms.
1243 * This allow to create random number sequences
1244 * which are distributed according to the histograms. Required to generate
1245 * the "synthetic" histograms for the Bootstrap method
1247 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1248 t_UmbrellaOptions *opt, const char *fnhist)
1250 int i, j, k, nbin;
1251 double last;
1252 char *fn = 0, *buf = 0;
1253 FILE *fp = 0;
1255 if (opt->bs_verbose)
1257 snew(fn, std::strlen(fnhist)+10);
1258 snew(buf, std::strlen(fnhist)+10);
1259 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1260 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1263 nbin = opt->bins;
1264 for (i = 0; i < nWindows; i++)
1266 snew(window[i].cum, window[i].nPull);
1267 for (j = 0; j < window[i].nPull; j++)
1269 snew(window[i].cum[j], nbin+1);
1270 window[i].cum[j][0] = 0.;
1271 for (k = 1; k <= nbin; k++)
1273 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1276 /* normalize CDFs. Ensure cum[nbin]==1 */
1277 last = window[i].cum[j][nbin];
1278 for (k = 0; k <= nbin; k++)
1280 window[i].cum[j][k] /= last;
1285 printf("Cumulative distribution functions of all histograms created.\n");
1286 if (opt->bs_verbose)
1288 for (k = 0; k <= nbin; k++)
1290 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1291 for (i = 0; i < nWindows; i++)
1293 for (j = 0; j < window[i].nPull; j++)
1295 fprintf(fp, "%g\t", window[i].cum[j][k]);
1298 fprintf(fp, "\n");
1300 printf("Wrote cumulative distribution functions to %s\n", fn);
1301 xvgrclose(fp);
1302 sfree(fn);
1303 sfree(buf);
1308 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1310 * This is used to generate a random sequence distributed according to a histogram
1312 void searchCumulative(double xx[], int n, double x, int *j)
1314 int ju, jm, jl;
1316 jl = -1;
1317 ju = n;
1318 while (ju-jl > 1)
1320 jm = (ju+jl) >> 1;
1321 if (x >= xx[jm])
1323 jl = jm;
1325 else
1327 ju = jm;
1330 if (x == xx[0])
1332 *j = 0;
1334 else if (x == xx[n-1])
1336 *j = n-2;
1338 else
1340 *j = jl;
1344 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1345 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1346 int pullid, t_UmbrellaOptions *opt)
1348 int N, i, nbins, r_index, ibin;
1349 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1350 char errstr[1024];
1352 N = thisWindow->N[pullid];
1353 dt = thisWindow->dt;
1354 nbins = thisWindow->nBin;
1356 /* tau = autocorrelation time */
1357 if (opt->tauBootStrap > 0.0)
1359 tausteps = opt->tauBootStrap/dt;
1361 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1363 /* calc tausteps from g=1+2tausteps */
1364 g = thisWindow->g[pullid];
1365 tausteps = (g-1)/2;
1367 else
1369 sprintf(errstr,
1370 "When generating hypothetical trajectories from given umbrella histograms,\n"
1371 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1372 "cannot be predicted. You have 3 options:\n"
1373 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1374 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1375 std::strcat(errstr,
1376 " with option -iiact for all umbrella windows.\n"
1377 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1378 " Use option (3) only if you are sure what you're doing, you may severely\n"
1379 " underestimate the error if a too small ACT is given.\n");
1380 gmx_fatal(FARGS, errstr);
1383 synthWindow->N [0] = N;
1384 synthWindow->pos [0] = thisWindow->pos[pullid];
1385 synthWindow->z [0] = thisWindow->z[pullid];
1386 synthWindow->k [0] = thisWindow->k[pullid];
1387 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1388 synthWindow->g [0] = thisWindow->g [pullid];
1389 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1391 for (i = 0; i < nbins; i++)
1393 synthWindow->Histo[0][i] = 0.;
1396 if (opt->bsMethod == bsMethod_trajGauss)
1398 sig = thisWindow->sigma [pullid];
1399 mu = thisWindow->aver [pullid];
1402 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1403 Use the following:
1404 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1405 then
1406 z = a*x + sqrt(1-a^2)*y
1407 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1408 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1409 function
1410 C(t) = exp(-t/tau) with tau=-1/ln(a)
1412 Then, use error function to turn the Gaussian random variable into a uniformly
1413 distributed one in [0,1]. Eventually, use cumulative distribution function of
1414 histogram to get random variables distributed according to histogram.
1415 Note: The ACT of the flat distribution and of the generated histogram is not
1416 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1418 a = std::exp(-1.0/tausteps);
1419 ap = std::sqrt(1.0-a*a);
1420 invsqrt2 = 1.0/std::sqrt(2.0);
1422 /* init random sequence */
1423 x = gmx_rng_gaussian_table(opt->rng);
1425 if (opt->bsMethod == bsMethod_traj)
1427 /* bootstrap points from the umbrella histograms */
1428 for (i = 0; i < N; i++)
1430 y = gmx_rng_gaussian_table(opt->rng);
1431 x = a*x+ap*y;
1432 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1433 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1435 r = 0.5*(1+std::erf(x*invsqrt2));
1436 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1437 synthWindow->Histo[0][r_index] += 1.;
1440 else if (opt->bsMethod == bsMethod_trajGauss)
1442 /* bootstrap points from a Gaussian with the same average and sigma
1443 as the respective umbrella histogram. The idea was, that -given
1444 limited sampling- the bootstrapped histograms are otherwise biased
1445 from the limited sampling of the US histos. However, bootstrapping from
1446 the Gaussian seems to yield a similar estimate. */
1447 i = 0;
1448 while (i < N)
1450 y = gmx_rng_gaussian_table(opt->rng);
1451 x = a*x+ap*y;
1452 z = x*sig+mu;
1453 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1454 if (opt->bCycl)
1456 if (ibin < 0)
1458 while ( (ibin += nbins) < 0)
1463 else if (ibin >= nbins)
1465 while ( (ibin -= nbins) >= nbins)
1472 if (ibin >= 0 && ibin < nbins)
1474 synthWindow->Histo[0][ibin] += 1.;
1475 i++;
1479 else
1481 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1485 /*! \brief Write all histograms to a file
1487 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1488 * sets of bootstrapped histograms.
1490 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1491 int bs_index, t_UmbrellaOptions *opt)
1493 char *fn = 0, *buf = 0, title[256];
1494 FILE *fp;
1495 int bins, l, i, j;
1497 if (bs_index >= 0)
1499 snew(fn, std::strlen(fnhist)+10);
1500 snew(buf, std::strlen(fnhist)+1);
1501 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1502 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1504 else
1506 fn = gmx_strdup(fnhist);
1507 std::strcpy(title, "Umbrella histograms");
1510 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1511 bins = opt->bins;
1513 /* Write histograms */
1514 for (l = 0; l < bins; ++l)
1516 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1517 for (i = 0; i < nWindows; ++i)
1519 for (j = 0; j < window[i].nPull; ++j)
1521 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1524 fprintf(fp, "\n");
1527 xvgrclose(fp);
1528 printf("Wrote %s\n", fn);
1529 if (bs_index >= 0)
1531 sfree(buf);
1533 sfree(fn);
1536 //! Used for qsort to sort random numbers
1537 int func_wham_is_larger(const void *a, const void *b)
1539 double *aa, *bb;
1540 aa = (double*)a;
1541 bb = (double*)b;
1542 if (*aa < *bb)
1544 return -1;
1546 else if (*aa > *bb)
1548 return 1;
1550 else
1552 return 0;
1556 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1557 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1559 int i;
1560 double *r;
1562 snew(r, nAllPull);
1564 /* generate ordered random numbers between 0 and nAllPull */
1565 for (i = 0; i < nAllPull-1; i++)
1567 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1569 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1570 r[nAllPull-1] = 1.0*nAllPull;
1572 synthwin[0].bsWeight[0] = r[0];
1573 for (i = 1; i < nAllPull; i++)
1575 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1578 /* avoid to have zero weight by adding a tiny value */
1579 for (i = 0; i < nAllPull; i++)
1581 if (synthwin[i].bsWeight[0] < 1e-5)
1583 synthwin[i].bsWeight[0] = 1e-5;
1587 sfree(r);
1590 //! The main bootstrapping routine
1591 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1592 char* ylabel, double *profile,
1593 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1595 t_UmbrellaWindow * synthWindow;
1596 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1597 int i, j, *randomArray = 0, winid, pullid, ib;
1598 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1599 FILE *fp;
1600 gmx_bool bExact = FALSE;
1602 /* init random generator */
1603 if (opt->bsSeed == -1)
1605 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1607 else
1609 opt->rng = gmx_rng_init(opt->bsSeed);
1612 snew(bsProfile, opt->bins);
1613 snew(bsProfiles_av, opt->bins);
1614 snew(bsProfiles_av2, opt->bins);
1616 /* Create array of all pull groups. Note that different windows
1617 may have different nr of pull groups
1618 First: Get total nr of pull groups */
1619 nAllPull = 0;
1620 for (i = 0; i < nWindows; i++)
1622 nAllPull += window[i].nPull;
1624 snew(allPull_winId, nAllPull);
1625 snew(allPull_pullId, nAllPull);
1626 iAllPull = 0;
1627 /* Setup one array of all pull groups */
1628 for (i = 0; i < nWindows; i++)
1630 for (j = 0; j < window[i].nPull; j++)
1632 allPull_winId[iAllPull] = i;
1633 allPull_pullId[iAllPull] = j;
1634 iAllPull++;
1638 /* setup stuff for synthetic windows */
1639 snew(synthWindow, nAllPull);
1640 for (i = 0; i < nAllPull; i++)
1642 synthWindow[i].nPull = 1;
1643 synthWindow[i].nBin = opt->bins;
1644 snew(synthWindow[i].Histo, 1);
1645 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1647 snew(synthWindow[i].Histo[0], opt->bins);
1649 snew(synthWindow[i].N, 1);
1650 snew(synthWindow[i].pos, 1);
1651 snew(synthWindow[i].z, 1);
1652 snew(synthWindow[i].k, 1);
1653 snew(synthWindow[i].bContrib, 1);
1654 snew(synthWindow[i].g, 1);
1655 snew(synthWindow[i].bsWeight, 1);
1658 switch (opt->bsMethod)
1660 case bsMethod_hist:
1661 snew(randomArray, nAllPull);
1662 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1663 please_cite(stdout, "Hub2006");
1664 break;
1665 case bsMethod_BayesianHist:
1666 /* just copy all histogams into synthWindow array */
1667 for (i = 0; i < nAllPull; i++)
1669 winid = allPull_winId [i];
1670 pullid = allPull_pullId[i];
1671 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1673 break;
1674 case bsMethod_traj:
1675 case bsMethod_trajGauss:
1676 calc_cumulatives(window, nWindows, opt, fnhist);
1677 break;
1678 default:
1679 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1682 /* do bootstrapping */
1683 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1684 for (ib = 0; ib < opt->nBootStrap; ib++)
1686 printf(" *******************************************\n"
1687 " ******** Start bootstrap nr %d ************\n"
1688 " *******************************************\n", ib+1);
1690 switch (opt->bsMethod)
1692 case bsMethod_hist:
1693 /* bootstrap complete histograms from given histograms */
1694 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1695 for (i = 0; i < nAllPull; i++)
1697 winid = allPull_winId [randomArray[i]];
1698 pullid = allPull_pullId[randomArray[i]];
1699 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1701 break;
1702 case bsMethod_BayesianHist:
1703 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1704 setRandomBsWeights(synthWindow, nAllPull, opt);
1705 break;
1706 case bsMethod_traj:
1707 case bsMethod_trajGauss:
1708 /* create new histos from given histos, that is generate new hypothetical
1709 trajectories */
1710 for (i = 0; i < nAllPull; i++)
1712 winid = allPull_winId[i];
1713 pullid = allPull_pullId[i];
1714 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1716 break;
1719 /* write histos in case of verbose output */
1720 if (opt->bs_verbose)
1722 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1725 /* do wham */
1726 i = 0;
1727 bExact = FALSE;
1728 maxchange = 1e20;
1729 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1732 if ( (i%opt->stepUpdateContrib) == 0)
1734 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1736 if (maxchange < opt->Tolerance)
1738 bExact = TRUE;
1740 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1742 printf("\t%4d) Maximum change %e\n", i, maxchange);
1744 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1745 i++;
1747 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1748 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1750 if (opt->bLog)
1752 prof_normalization_and_unit(bsProfile, opt);
1755 /* symmetrize profile around z=0 */
1756 if (opt->bSym)
1758 symmetrizeProfile(bsProfile, opt);
1761 /* save stuff to get average and stddev */
1762 for (i = 0; i < opt->bins; i++)
1764 tmp = bsProfile[i];
1765 bsProfiles_av[i] += tmp;
1766 bsProfiles_av2[i] += tmp*tmp;
1767 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1769 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1771 xvgrclose(fp);
1773 /* write average and stddev */
1774 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1775 if (output_env_get_print_xvgr_codes(opt->oenv))
1777 fprintf(fp, "@TYPE xydy\n");
1779 for (i = 0; i < opt->bins; i++)
1781 bsProfiles_av [i] /= opt->nBootStrap;
1782 bsProfiles_av2[i] /= opt->nBootStrap;
1783 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1784 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1785 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1787 xvgrclose(fp);
1788 printf("Wrote boot strap result to %s\n", fnres);
1791 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1792 int whaminFileType(char *fn)
1794 int len;
1795 len = std::strlen(fn);
1796 if (std::strcmp(fn+len-3, "tpr") == 0)
1798 return whamin_tpr;
1800 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1802 return whamin_pullxf;
1804 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1806 return whamin_pdo;
1808 else
1810 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1812 return whamin_unknown;
1815 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1816 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1817 t_UmbrellaOptions *opt)
1819 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1820 int nread, sizenow, i, block = 1;
1821 FILE *fp;
1823 fp = gmx_ffopen(fn, "r");
1824 nread = 0;
1825 sizenow = 0;
1826 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1828 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1830 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1832 if (nread >= sizenow)
1834 sizenow += block;
1835 srenew(filename, sizenow);
1836 for (i = sizenow-block; i < sizenow; i++)
1838 snew(filename[i], WHAM_MAXFILELEN);
1841 /* remove newline if there is one */
1842 if (tmp[std::strlen(tmp)-1] == '\n')
1844 tmp[std::strlen(tmp)-1] = '\0';
1846 std::strcpy(filename[nread], tmp);
1847 if (opt->verbose)
1849 printf("Found file %s in %s\n", filename[nread], fn);
1851 nread++;
1853 *filenamesRet = filename;
1854 *nfilesRet = nread;
1857 //! Open a file or a pipe to a gzipped file
1858 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1860 char Buffer[1024], gunzip[1024], *Path = 0;
1861 FILE *pipe = 0;
1862 static gmx_bool bFirst = 1;
1864 /* gzipped pdo file? */
1865 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1867 /* search gunzip executable */
1868 if (!(Path = getenv("GMX_PATH_GZIP")))
1870 if (gmx_fexist("/bin/gunzip"))
1872 sprintf(gunzip, "%s", "/bin/gunzip");
1874 else if (gmx_fexist("/usr/bin/gunzip"))
1876 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1878 else
1880 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1881 "You may want to define the path to gunzip "
1882 "with the environment variable GMX_PATH_GZIP.", gunzip);
1885 else
1887 sprintf(gunzip, "%s/gunzip", Path);
1888 if (!gmx_fexist(gunzip))
1890 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1891 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1894 if (bFirst)
1896 printf("Using gunzip executable %s\n", gunzip);
1897 bFirst = 0;
1899 if (!gmx_fexist(fn))
1901 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1903 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1904 if (opt->verbose)
1906 printf("Executing command '%s'\n", Buffer);
1908 #if HAVE_PIPES
1909 if ((pipe = popen(Buffer, "r")) == NULL)
1911 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1913 #else
1914 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1915 #endif
1916 *bPipeOpen = TRUE;
1918 else
1920 pipe = gmx_ffopen(fn, "r");
1921 *bPipeOpen = FALSE;
1924 return pipe;
1927 //! Close file or pipe
1928 void pdo_close_file(FILE *fp)
1930 #if HAVE_PIPES
1931 pclose(fp);
1932 #else
1933 gmx_ffclose(fp);
1934 #endif
1937 //! Reading all pdo files
1938 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1939 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1941 FILE *file;
1942 real mintmp, maxtmp, done = 0.;
1943 int i;
1944 gmx_bool bPipeOpen;
1945 /* char Buffer0[1000]; */
1947 if (nfiles < 1)
1949 gmx_fatal(FARGS, "No files found. Hick.");
1952 /* if min and max are not given, get min and max from the input files */
1953 if (opt->bAuto)
1955 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1956 opt->min = 1e20;
1957 opt->max = -1e20;
1958 for (i = 0; i < nfiles; ++i)
1960 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1961 /*fgets(Buffer0,999,file);
1962 fprintf(stderr,"First line '%s'\n",Buffer0); */
1963 done = 100.0*(i+1)/nfiles;
1964 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1965 if (opt->verbose)
1967 printf("\n");
1969 read_pdo_header(file, header, opt);
1970 /* here only determine min and max of this window */
1971 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1972 if (maxtmp > opt->max)
1974 opt->max = maxtmp;
1976 if (mintmp < opt->min)
1978 opt->min = mintmp;
1980 if (bPipeOpen)
1982 pdo_close_file(file);
1984 else
1986 gmx_ffclose(file);
1989 printf("\n");
1990 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1991 if (opt->bBoundsOnly)
1993 printf("Found option -boundsonly, now exiting.\n");
1994 exit (0);
1997 /* store stepsize in profile */
1998 opt->dz = (opt->max-opt->min)/opt->bins;
2000 /* Having min and max, we read in all files */
2001 /* Loop over all files */
2002 for (i = 0; i < nfiles; ++i)
2004 done = 100.0*(i+1)/nfiles;
2005 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2006 if (opt->verbose)
2008 printf("\n");
2010 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2011 read_pdo_header(file, header, opt);
2012 /* load data into window */
2013 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2014 if ((window+i)->Ntot[0] == 0)
2016 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2018 if (bPipeOpen)
2020 pdo_close_file(file);
2022 else
2024 gmx_ffclose(file);
2027 printf("\n");
2028 for (i = 0; i < nfiles; ++i)
2030 sfree(fn[i]);
2032 sfree(fn);
2035 //! translate 0/1 to N/Y to write pull dimensions
2036 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2038 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2039 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2041 t_inputrec ir;
2042 int i, ncrd;
2043 t_state state;
2044 static int first = 1;
2046 /* printf("Reading %s \n",fn); */
2047 read_tpx_state(fn, &ir, &state, NULL);
2049 if (!ir.bPull)
2051 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2054 /* nr of pull groups */
2055 ncrd = 0;
2056 for (i = 0; i < ir.pull->ncoord; i++)
2058 if (ir.pull->coord[i].eType == epullUMBRELLA)
2060 int m;
2062 if (ncrd == 0)
2064 header->pull_geometry = ir.pull->coord[i].eGeom;
2065 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2068 if (ncrd != i)
2070 /* TODO: remove this restriction */
2071 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2074 for (m = 0; m < DIM; m++)
2076 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2078 /* TODO: remove the restriction */
2079 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2082 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2084 /* TODO: remove the restriction */
2085 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2089 ncrd++;
2093 if (ncrd < 1)
2095 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2098 header->npullcrds_tot = ir.pull->ncoord;
2099 header->npullcrds = ncrd;
2100 header->bPrintRef = ir.pull->bPrintCOM1;
2101 if (ir.pull->bPrintCOM2)
2103 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2105 if (ir.pull->bPrintRefValue)
2107 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2109 header->bPrintComp = ir.pull->bPrintComp;
2110 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2111 /* We should add a struct for each pull coord with all pull coord data
2112 instead of allocating many arrays for each property */
2113 snew(header->k, ncrd);
2114 snew(header->init_dist, ncrd);
2115 snew(header->umbInitDist, ncrd);
2117 /* only z-direction with epullgCYL? */
2118 if (header->pull_geometry == epullgCYL)
2120 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2122 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2123 "However, found dimensions [%s %s %s]\n",
2124 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2125 int2YN(header->pull_dim[ZZ]));
2129 for (i = 0; i < ncrd; i++)
2131 header->k[i] = ir.pull->coord[i].k;
2132 if (header->k[i] == 0.0)
2134 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2135 "That doesn't seem to be an Umbrella tpr.\n",
2136 i, fn);
2138 header->init_dist[i] = ir.pull->coord[i].init;
2140 /* initial distance to reference */
2141 switch (header->pull_geometry)
2143 case epullgCYL:
2144 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2145 case epullgDIST:
2146 case epullgDIR:
2147 case epullgDIRPBC:
2148 header->umbInitDist[i] = header->init_dist[i];
2149 break;
2150 default:
2151 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2155 if (opt->verbose || first)
2157 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2158 "\tPull group coordinates%s expected in pullx files.\n",
2159 fn, header->npullcrds, epullg_names[header->pull_geometry],
2160 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2161 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2162 for (i = 0; i < ncrd; i++)
2164 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2167 if (!opt->verbose && first)
2169 printf("\tUse option -v to see this output for all input tpr files\n\n");
2172 first = 0;
2175 //! 2-norm in a ndim-dimensional space
2176 double dist_ndim(double **dx, int ndim, int line)
2178 int i;
2179 double r2 = 0.;
2180 for (i = 0; i < ndim; i++)
2182 r2 += sqr(dx[i][line]);
2184 return std::sqrt(r2);
2187 //! Read pullx.xvg or pullf.xvg
2188 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2189 t_UmbrellaWindow * window,
2190 t_UmbrellaOptions *opt,
2191 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2192 t_groupselection *groupsel)
2194 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2195 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2196 real min, max, minfound = 1e20, maxfound = -1e20;
2197 gmx_bool dt_ok, timeok, bHaveForce;
2198 const char *quantity;
2199 const int blocklen = 4096;
2200 int *lennow = 0;
2201 static gmx_bool bFirst = TRUE;
2204 * Data columns in pull output:
2205 * - in force output pullf.xvg:
2206 * No reference columns, one column per pull coordinate
2208 * - in position output pullx.xvg
2209 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2210 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2213 nColPerCrd = 1;
2214 if (opt->bPullx && header->bPrintComp)
2216 nColPerCrd += header->pull_ndim;
2218 quantity = opt->bPullx ? "position" : "force";
2220 if (opt->bPullx && header->bPrintRef)
2222 nColRefEachCrd = header->pull_ndim;
2224 else
2226 nColRefEachCrd = 0;
2229 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2230 bHaveForce = opt->bPullf;
2232 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2233 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2234 Sorry for the laziness, this is a To-do. */
2235 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2236 && opt->bPullx)
2238 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2239 "reading \n(option -if) is supported at present, "
2240 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2241 "forces (pullf.xvg files)\nand provide them to gmx wham with option -if.",
2242 epullg_names[header->pull_geometry]);
2245 nt = read_xvg(fn, &y, &ny);
2247 /* Check consistency */
2248 if (nt < 1)
2250 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2252 if (bFirst)
2254 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2255 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2256 header->pull_ndim);
2257 /* Since this tool code has not updated yet to allow difference pull
2258 * dimensions per pull coordinate, we can't easily check the exact
2259 * number of expected columns, so we only check what we expect for
2260 * the pull coordinates selected for the WHAM analysis.
2262 printf("Expecting these columns in pull file:\n"
2263 "\t%d reference columns for each individual pull coordinate\n"
2264 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2265 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2266 header->npullcrds,
2267 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2268 nColExpect);
2269 bFirst = FALSE;
2271 if (ny < nColExpect ||
2272 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2274 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2275 "\nMaybe you confused options -ix and -if ?\n",
2276 header->npullcrds, fntpr, ny-1, fn,
2277 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2278 nColExpect-1);
2281 if (opt->verbose)
2283 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2286 if (!bGetMinMax)
2288 bins = opt->bins;
2289 min = opt->min;
2290 max = opt->max;
2291 if (nt > 1)
2293 window->dt = y[0][1]-y[0][0];
2295 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2297 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2300 /* Need to alocate memory and set up structure */
2302 if (groupsel)
2304 /* Use only groups selected with option -is file */
2305 if (header->npullcrds != groupsel->n)
2307 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2308 header->npullcrds, groupsel->n);
2310 window->nPull = groupsel->nUse;
2312 else
2314 window->nPull = header->npullcrds;
2317 window->nBin = bins;
2318 snew(window->Histo, window->nPull);
2319 snew(window->z, window->nPull);
2320 snew(window->k, window->nPull);
2321 snew(window->pos, window->nPull);
2322 snew(window->N, window->nPull);
2323 snew(window->Ntot, window->nPull);
2324 snew(window->g, window->nPull);
2325 snew(window->bsWeight, window->nPull);
2326 window->bContrib = 0;
2328 if (opt->bCalcTauInt)
2330 snew(window->ztime, window->nPull);
2332 else
2334 window->ztime = NULL;
2336 snew(lennow, window->nPull);
2338 for (g = 0; g < window->nPull; ++g)
2340 window->z[g] = 1;
2341 window->bsWeight[g] = 1.;
2342 snew(window->Histo[g], bins);
2343 window->N[g] = 0;
2344 window->Ntot[g] = 0;
2345 window->g[g] = 1.;
2346 if (opt->bCalcTauInt)
2348 window->ztime[g] = NULL;
2352 /* Copying umbrella center and force const is more involved since not
2353 all pull groups from header (tpr file) may be used in window variable */
2354 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2356 if (groupsel && (groupsel->bUse[g] == FALSE))
2358 continue;
2360 window->k[gUsed] = header->k[g];
2361 window->pos[gUsed] = header->umbInitDist[g];
2362 gUsed++;
2365 else
2366 { /* only determine min and max */
2367 minfound = 1e20;
2368 maxfound = -1e20;
2369 min = max = bins = 0; /* Get rid of warnings */
2373 for (i = 0; i < nt; i++)
2375 /* Do you want that time frame? */
2376 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2378 /* get time step of pdo file and get dstep from opt->dt */
2379 if (i == 0)
2381 time0 = t;
2383 else if (i == 1)
2385 dt = t-time0;
2386 if (opt->dt > 0.0)
2388 dstep = static_cast<int>(opt->dt/dt+0.5);
2389 if (dstep == 0)
2391 dstep = 1;
2394 if (!bGetMinMax)
2396 window->dt = dt*dstep;
2400 dt_ok = (i%dstep == 0);
2401 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2402 /*if (opt->verbose)
2403 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2404 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2405 if (timeok)
2407 /* Note: if groupsel == NULL:
2408 * all groups in pullf/x file are stored in this window, and gUsed == g
2409 * if groupsel != NULL:
2410 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2412 gUsed = -1;
2413 for (g = 0; g < header->npullcrds; ++g)
2415 /* was this group selected for application in WHAM? */
2416 if (groupsel && (groupsel->bUse[g] == FALSE))
2418 continue;
2421 gUsed++;
2423 if (bHaveForce)
2425 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2426 force = y[g+1][i];
2427 pos = -force/header->k[g] + header->umbInitDist[g];
2429 else
2431 /* pick the right column from:
2432 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2434 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2435 pos = y[column][i];
2438 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2439 if (bGetMinMax)
2441 if (pos < minfound)
2443 minfound = pos;
2445 if (pos > maxfound)
2447 maxfound = pos;
2450 else
2452 if (gUsed >= window->nPull)
2454 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2455 gUsed, window->nPull);
2458 if (opt->bCalcTauInt && !bGetMinMax)
2460 /* save time series for autocorrelation analysis */
2461 ntot = window->Ntot[gUsed];
2462 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2463 if (ntot >= lennow[gUsed])
2465 lennow[gUsed] += blocklen;
2466 srenew(window->ztime[gUsed], lennow[gUsed]);
2468 window->ztime[gUsed][ntot] = pos;
2471 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2472 if (opt->bCycl)
2474 if (ibin < 0)
2476 while ( (ibin += bins) < 0)
2481 else if (ibin >= bins)
2483 while ( (ibin -= bins) >= bins)
2489 if (ibin >= 0 && ibin < bins)
2491 window->Histo[gUsed][ibin] += 1.;
2492 window->N[gUsed]++;
2494 window->Ntot[gUsed]++;
2498 else if (t > opt->tmax)
2500 if (opt->verbose)
2502 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2504 break;
2508 if (bGetMinMax)
2510 *mintmp = minfound;
2511 *maxtmp = maxfound;
2513 sfree(lennow);
2514 for (i = 0; i < ny; i++)
2516 sfree(y[i]);
2520 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2521 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2522 t_UmbrellaHeader* header,
2523 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2525 int i;
2526 real mintmp, maxtmp;
2528 printf("Reading %d tpr and pullf files\n", nfiles/2);
2530 /* min and max not given? */
2531 if (opt->bAuto)
2533 printf("Automatic determination of boundaries...\n");
2534 opt->min = 1e20;
2535 opt->max = -1e20;
2536 for (i = 0; i < nfiles; i++)
2538 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2540 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2542 read_tpr_header(fnTprs[i], header, opt);
2543 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2545 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2547 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2548 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2549 if (maxtmp > opt->max)
2551 opt->max = maxtmp;
2553 if (mintmp < opt->min)
2555 opt->min = mintmp;
2558 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2559 if (opt->bBoundsOnly)
2561 printf("Found option -boundsonly, now exiting.\n");
2562 exit (0);
2565 /* store stepsize in profile */
2566 opt->dz = (opt->max-opt->min)/opt->bins;
2568 for (i = 0; i < nfiles; i++)
2570 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2572 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2574 read_tpr_header(fnTprs[i], header, opt);
2575 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2577 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2579 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2580 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2581 if (window[i].Ntot[0] == 0)
2583 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2587 for (i = 0; i < nfiles; i++)
2589 sfree(fnTprs[i]);
2590 sfree(fnPull[i]);
2592 sfree(fnTprs);
2593 sfree(fnPull);
2596 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2598 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2599 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2601 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2603 int nlines, ny, i, ig;
2604 double **iact;
2606 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2607 nlines = read_xvg(fn, &iact, &ny);
2608 if (nlines != nwins)
2610 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2611 nlines, fn, nwins);
2613 for (i = 0; i < nlines; i++)
2615 if (window[i].nPull != ny)
2617 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2618 "number of pull groups is different in different simulations. That is not\n"
2619 "supported yet. Sorry.\n");
2621 for (ig = 0; ig < window[i].nPull; ig++)
2623 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2624 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2626 if (iact[ig][i] <= 0.0)
2628 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2635 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2637 * This is useful
2638 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2639 * that -in case of limited sampling- the ACT may be severely underestimated.
2640 * Note: the g=1+2tau are overwritten.
2641 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2642 * by the smoothing
2644 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2646 int i, ig, j, jg;
2647 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2649 /* only evaluate within +- 3sigma of the Gausian */
2650 siglim = 3.0*opt->sigSmoothIact;
2651 siglim2 = dsqr(siglim);
2652 /* pre-factor of Gaussian */
2653 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2654 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2656 for (i = 0; i < nwins; i++)
2658 snew(window[i].tausmooth, window[i].nPull);
2659 for (ig = 0; ig < window[i].nPull; ig++)
2661 tausm = 0.;
2662 weight = 0;
2663 pos = window[i].pos[ig];
2664 for (j = 0; j < nwins; j++)
2666 for (jg = 0; jg < window[j].nPull; jg++)
2668 dpos2 = dsqr(window[j].pos[jg]-pos);
2669 if (dpos2 < siglim2)
2671 w = gaufact*std::exp(-dpos2*invtwosig2);
2672 weight += w;
2673 tausm += w*window[j].tau[jg];
2674 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2675 w,dpos2,pos,gaufact,invtwosig2); */
2679 tausm /= weight;
2680 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2682 window[i].tausmooth[ig] = tausm;
2684 else
2686 window[i].tausmooth[ig] = window[i].tau[ig];
2688 window[i].g[ig] = 1+2*tausm/window[i].dt;
2693 //! Stop integrating autoccorelation function when ACF drops under this value
2694 #define WHAM_AC_ZERO_LIMIT 0.05
2696 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2698 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2699 t_UmbrellaOptions *opt, const char *fn)
2701 int i, ig, ncorr, ntot, j, k, *count, restart;
2702 real *corr, c0, dt, tmp;
2703 real *ztime, av, tausteps;
2704 FILE *fp, *fpcorr = 0;
2706 if (opt->verbose)
2708 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2709 "time [ps]", "autocorrelation function", opt->oenv);
2712 printf("\n");
2713 for (i = 0; i < nwins; i++)
2715 printf("\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2716 fflush(stdout);
2717 ntot = window[i].Ntot[0];
2719 /* using half the maximum time as length of autocorrelation function */
2720 ncorr = ntot/2;
2721 if (ntot < 10)
2723 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2724 " points. Provide more pull data!", ntot);
2726 snew(corr, ncorr);
2727 /* snew(corrSq,ncorr); */
2728 snew(count, ncorr);
2729 dt = window[i].dt;
2730 snew(window[i].tau, window[i].nPull);
2731 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2732 if (restart == 0)
2734 restart = 1;
2737 for (ig = 0; ig < window[i].nPull; ig++)
2739 if (ntot != window[i].Ntot[ig])
2741 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2742 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2744 ztime = window[i].ztime[ig];
2746 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2747 for (j = 0, av = 0; (j < ntot); j++)
2749 av += ztime[j];
2751 av /= ntot;
2752 for (k = 0; (k < ncorr); k++)
2754 corr[k] = 0.;
2755 count[k] = 0;
2757 for (j = 0; (j < ntot); j += restart)
2759 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2761 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2762 corr [k] += tmp;
2763 /* corrSq[k] += tmp*tmp; */
2764 count[k]++;
2767 /* divide by nr of frames for each time displacement */
2768 for (k = 0; (k < ncorr); k++)
2770 /* count probably = (ncorr-k+(restart-1))/restart; */
2771 corr[k] = corr[k]/count[k];
2772 /* variance of autocorrelation function */
2773 /* corrSq[k]=corrSq[k]/count[k]; */
2775 /* normalize such that corr[0] == 0 */
2776 c0 = 1./corr[0];
2777 for (k = 0; (k < ncorr); k++)
2779 corr[k] *= c0;
2780 /* corrSq[k]*=c0*c0; */
2783 /* write ACFs in verbose mode */
2784 if (fpcorr)
2786 for (k = 0; (k < ncorr); k++)
2788 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2790 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2793 /* esimate integrated correlation time, fitting is too unstable */
2794 tausteps = 0.5*corr[0];
2795 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2796 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2798 tausteps += corr[j];
2801 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2802 Kumar et al, eq. 28 ff. */
2803 window[i].tau[ig] = tausteps*dt;
2804 window[i].g[ig] = 1+2*tausteps;
2805 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2806 } /* ig loop */
2807 sfree(corr);
2808 sfree(count);
2810 printf(" done\n");
2811 if (fpcorr)
2813 xvgrclose(fpcorr);
2816 /* plot IACT along reaction coordinate */
2817 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2818 if (output_env_get_print_xvgr_codes(opt->oenv))
2820 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2821 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2822 for (i = 0; i < nwins; i++)
2824 fprintf(fp, "# %3d ", i);
2825 for (ig = 0; ig < window[i].nPull; ig++)
2827 fprintf(fp, " %11g", window[i].tau[ig]);
2829 fprintf(fp, "\n");
2832 for (i = 0; i < nwins; i++)
2834 for (ig = 0; ig < window[i].nPull; ig++)
2836 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2839 if (opt->sigSmoothIact > 0.0)
2841 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2842 opt->sigSmoothIact);
2843 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2844 smoothIact(window, nwins, opt);
2845 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2846 if (output_env_get_print_xvgr_codes(opt->oenv))
2848 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2849 fprintf(fp, "@ s1 symbol color 2\n");
2851 for (i = 0; i < nwins; i++)
2853 for (ig = 0; ig < window[i].nPull; ig++)
2855 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2859 xvgrclose(fp);
2860 printf("Wrote %s\n", fn);
2863 /*! \brief
2864 * compute average and sigma of each umbrella histogram
2866 void averageSigma(t_UmbrellaWindow *window, int nwins)
2868 int i, ig, ntot, k;
2869 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2871 for (i = 0; i < nwins; i++)
2873 snew(window[i].aver, window[i].nPull);
2874 snew(window[i].sigma, window[i].nPull);
2876 ntot = window[i].Ntot[0];
2877 for (ig = 0; ig < window[i].nPull; ig++)
2879 ztime = window[i].ztime[ig];
2880 for (k = 0, av = 0.; k < ntot; k++)
2882 av += ztime[k];
2884 av /= ntot;
2885 for (k = 0, sum2 = 0.; k < ntot; k++)
2887 diff = ztime[k]-av;
2888 sum2 += diff*diff;
2890 sig = std::sqrt(sum2/ntot);
2891 window[i].aver[ig] = av;
2893 /* Note: This estimate for sigma is biased from the limited sampling.
2894 Correct sigma by n/(n-1) where n = number of independent
2895 samples. Only possible if IACT is known.
2897 if (window[i].tau)
2899 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2900 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2902 else
2904 window[i].sigma[ig] = sig;
2906 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2912 /*! \brief
2913 * Use histograms to compute average force on pull group.
2915 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2917 int i, j, bins = opt->bins, k;
2918 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2919 double posmirrored;
2921 dz = (max-min)/bins;
2922 ztot = opt->max-min;
2923 ztot_half = ztot/2;
2925 /* Compute average displacement from histograms */
2926 for (j = 0; j < nWindows; ++j)
2928 snew(window[j].forceAv, window[j].nPull);
2929 for (k = 0; k < window[j].nPull; ++k)
2931 displAv = 0.0;
2932 weight = 0.0;
2933 for (i = 0; i < opt->bins; ++i)
2935 temp = (1.0*i+0.5)*dz+min;
2936 distance = temp - window[j].pos[k];
2937 if (opt->bCycl)
2938 { /* in cyclic wham: */
2939 if (distance > ztot_half) /* |distance| < ztot_half */
2941 distance -= ztot;
2943 else if (distance < -ztot_half)
2945 distance += ztot;
2948 w = window[j].Histo[k][i]/window[j].g[k];
2949 displAv += w*distance;
2950 weight += w;
2951 /* Are we near min or max? We are getting wrong forces from the histgrams since
2952 the histograms are zero outside [min,max). Therefore, assume that the position
2953 on the other side of the histomgram center is equally likely. */
2954 if (!opt->bCycl)
2956 posmirrored = window[j].pos[k]-distance;
2957 if (posmirrored >= max || posmirrored < min)
2959 displAv += -w*distance;
2960 weight += w;
2964 displAv /= weight;
2966 /* average force from average displacement */
2967 window[j].forceAv[k] = displAv*window[j].k[k];
2968 /* sigma from average square displacement */
2969 /* window[j].sigma [k] = sqrt(displAv2); */
2970 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2975 /*! \brief
2976 * Check if the complete reaction coordinate is covered by the histograms
2978 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2979 t_UmbrellaOptions *opt)
2981 int i, ig, j, bins = opt->bins, bBoundary;
2982 real avcount = 0, z, relcount, *count;
2983 snew(count, opt->bins);
2985 for (j = 0; j < opt->bins; ++j)
2987 for (i = 0; i < nwins; i++)
2989 for (ig = 0; ig < window[i].nPull; ig++)
2991 count[j] += window[i].Histo[ig][j];
2994 avcount += 1.0*count[j];
2996 avcount /= bins;
2997 for (j = 0; j < bins; ++j)
2999 relcount = count[j]/avcount;
3000 z = (j+0.5)*opt->dz+opt->min;
3001 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3002 /* check for bins with no data */
3003 if (count[j] == 0)
3005 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3006 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3008 /* and check for poor sampling */
3009 else if (relcount < 0.005 && !bBoundary)
3011 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3014 sfree(count);
3017 /*! \brief Compute initial potential by integrating the average force
3019 * This speeds up the convergence by roughly a factor of 2
3021 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3023 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3024 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3025 FILE *fp;
3027 dz = (opt->max-min)/bins;
3029 printf("Getting initial potential by integration.\n");
3031 /* Compute average displacement from histograms */
3032 computeAverageForce(window, nWindows, opt);
3034 /* Get force for each bin from all histograms in this bin, or, alternatively,
3035 if no histograms are inside this bin, from the closest histogram */
3036 snew(pot, bins);
3037 snew(f, bins);
3038 for (j = 0; j < opt->bins; ++j)
3040 pos = (1.0*j+0.5)*dz+min;
3041 nHist = 0;
3042 fAv = 0.;
3043 distmin = 1e20;
3044 groupmin = winmin = 0;
3045 for (i = 0; i < nWindows; i++)
3047 for (ig = 0; ig < window[i].nPull; ig++)
3049 hispos = window[i].pos[ig];
3050 dist = std::abs(hispos-pos);
3051 /* average force within bin */
3052 if (dist < dz/2)
3054 nHist++;
3055 fAv += window[i].forceAv[ig];
3057 /* at the same time, remember closest histogram */
3058 if (dist < distmin)
3060 winmin = i;
3061 groupmin = ig;
3062 distmin = dist;
3066 /* if no histogram found in this bin, use closest histogram */
3067 if (nHist > 0)
3069 fAv = fAv/nHist;
3071 else
3073 fAv = window[winmin].forceAv[groupmin];
3075 f[j] = fAv;
3077 for (j = 1; j < opt->bins; ++j)
3079 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3082 /* cyclic wham: linearly correct possible offset */
3083 if (opt->bCycl)
3085 diff = (pot[bins-1]-pot[0])/(bins-1);
3086 for (j = 1; j < opt->bins; ++j)
3088 pot[j] -= j*diff;
3091 if (opt->verbose)
3093 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3094 for (j = 0; j < opt->bins; ++j)
3096 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3098 xvgrclose(fp);
3099 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3102 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3103 energy offsets which are usually determined by wham
3104 First: turn pot into probabilities:
3106 for (j = 0; j < opt->bins; ++j)
3108 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3110 calc_z(pot, window, nWindows, opt, TRUE);
3112 sfree(pot);
3113 sfree(f);
3116 //! Count number of words in an line
3117 static int wordcount(char *ptr)
3119 int i, n, is[2];
3120 int cur = 0;
3122 if (std::strlen(ptr) == 0)
3124 return 0;
3126 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3127 n = 1;
3128 for (i = 0; (ptr[i] != '\0'); i++)
3130 is[cur] = isspace(ptr[i]);
3131 if ((i > 0) && (is[cur] && !is[1-cur]))
3133 n++;
3135 cur = 1-cur;
3137 return n;
3140 /*! \brief Read input file for pull group selection (option -is)
3142 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3144 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3146 FILE *fp;
3147 int i, iline, n, len = STRLEN, temp;
3148 char *ptr = 0, *tmpbuf = 0;
3149 char fmt[1024], fmtign[1024];
3150 int block = 1, sizenow;
3152 fp = gmx_ffopen(opt->fnGroupsel, "r");
3153 opt->groupsel = NULL;
3155 snew(tmpbuf, len);
3156 sizenow = 0;
3157 iline = 0;
3158 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3160 trim(ptr);
3161 n = wordcount(ptr);
3163 if (iline >= sizenow)
3165 sizenow += block;
3166 srenew(opt->groupsel, sizenow);
3168 opt->groupsel[iline].n = n;
3169 opt->groupsel[iline].nUse = 0;
3170 snew(opt->groupsel[iline].bUse, n);
3172 fmtign[0] = '\0';
3173 for (i = 0; i < n; i++)
3175 std::strcpy(fmt, fmtign);
3176 std::strcat(fmt, "%d");
3177 if (sscanf(ptr, fmt, &temp))
3179 opt->groupsel[iline].bUse[i] = (temp > 0);
3180 if (opt->groupsel[iline].bUse[i])
3182 opt->groupsel[iline].nUse++;
3185 std::strcat(fmtign, "%*s");
3187 iline++;
3189 opt->nGroupsel = iline;
3190 if (nTpr != opt->nGroupsel)
3192 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3193 opt->fnGroupsel);
3196 printf("\nUse only these pull groups:\n");
3197 for (iline = 0; iline < nTpr; iline++)
3199 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3200 for (i = 0; i < opt->groupsel[iline].n; i++)
3202 if (opt->groupsel[iline].bUse[i])
3204 printf(" %d", i+1);
3207 printf("\n");
3209 printf("\n");
3211 sfree(tmpbuf);
3214 //! Boolean XOR
3215 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3217 //! Number of elements in fnm (used for command line parsing)
3218 #define NFILE asize(fnm)
3220 //! The main gmx wham routine
3221 int gmx_wham(int argc, char *argv[])
3223 const char *desc[] = {
3224 "[THISMODULE] is an analysis program that implements the Weighted",
3225 "Histogram Analysis Method (WHAM). It is intended to analyze",
3226 "output files generated by umbrella sampling simulations to ",
3227 "compute a potential of mean force (PMF).[PAR]",
3229 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3230 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3231 "and, if multiple coordinates need to be analyzed, all used the same",
3232 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3234 "At present, three input modes are supported.",
3236 "* With option [TT]-it[tt], the user provides a file which contains the",
3237 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3238 " AND, with option [TT]-ix[tt], a file which contains file names of",
3239 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3240 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3241 " first pullx, etc.",
3242 "* Same as the previous input mode, except that the the user",
3243 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3244 " From the pull force the position in the umbrella potential is",
3245 " computed. This does not work with tabulated umbrella potentials.",
3246 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3247 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3248 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3249 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3250 " must be similar to the following::",
3252 " # UMBRELLA 3.0",
3253 " # Component selection: 0 0 1",
3254 " # nSkip 1",
3255 " # Ref. Group 'TestAtom'",
3256 " # Nr. of pull groups 2",
3257 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3258 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3259 " #####",
3261 " The number of pull groups, umbrella positions, force constants, and names ",
3262 " may (of course) differ. Following the header, a time column and ",
3263 " a data column for each pull group follows (i.e. the displacement",
3264 " with respect to the umbrella center). Up to four pull groups are possible ",
3265 " per [REF].pdo[ref] file at present.",
3267 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3268 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3269 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3270 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3271 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3272 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3273 "used, groupsel.dat looks like this::",
3275 " 1 1 0 0",
3276 " 1 1 0 0",
3277 " 1 1 0 0",
3279 "By default, the output files are",
3281 "* [TT]-o[tt] PMF output file",
3282 "* [TT]-hist[tt] Histograms output file",
3284 "Always check whether the histograms sufficiently overlap.[PAR]",
3285 "The umbrella potential is assumed to be harmonic and the force constants are ",
3286 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3287 "a tabulated potential can be provided with [TT]-tab[tt].",
3289 "WHAM options",
3290 "^^^^^^^^^^^^",
3292 "* [TT]-bins[tt] Number of bins used in analysis",
3293 "* [TT]-temp[tt] Temperature in the simulations",
3294 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3295 "* [TT]-auto[tt] Automatic determination of boundaries",
3296 "* [TT]-min,-max[tt] Boundaries of the profile",
3298 "The data points that are used to compute the profile",
3299 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3300 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3301 "umbrella window.[PAR]",
3302 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3303 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3304 "With energy output, the energy in the first bin is defined to be zero. ",
3305 "If you want the free energy at a different ",
3306 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3307 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3308 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3309 "[THISMODULE] will make use of the",
3310 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3311 "reaction coordinate will assumed be be neighbors.[PAR]",
3312 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3313 "which may be useful for, e.g. membranes.",
3315 "Parallelization",
3316 "^^^^^^^^^^^^^^^",
3318 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3320 "Autocorrelations",
3321 "^^^^^^^^^^^^^^^^",
3323 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3324 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3325 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3326 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3327 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3328 "Because the IACTs can be severely underestimated in case of limited ",
3329 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3330 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3331 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3332 "integration of the ACFs while the ACFs are larger 0.05.",
3333 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3334 "less robust) method such as fitting to a double exponential, you can ",
3335 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3336 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3337 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3339 "Error analysis",
3340 "^^^^^^^^^^^^^^",
3342 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3343 "otherwise the statistical error may be substantially underestimated. ",
3344 "More background and examples for the bootstrap technique can be found in ",
3345 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3346 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3347 "Four bootstrapping methods are supported and ",
3348 "selected with [TT]-bs-method[tt].",
3350 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3351 " data points, and the bootstrap is carried out by assigning random weights to the ",
3352 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3353 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3354 " statistical error is underestimated.",
3355 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3356 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3357 " (allowing duplication, i.e. sampling with replacement).",
3358 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3359 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3360 " divided into blocks and only histograms within each block are mixed. Note that ",
3361 " the histograms within each block must be representative for all possible histograms, ",
3362 " otherwise the statistical error is underestimated.",
3363 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3364 " such that the generated data points are distributed according the given histograms ",
3365 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3366 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3367 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3368 " Note that this method may severely underestimate the error in case of limited sampling, ",
3369 " that is if individual histograms do not represent the complete phase space at ",
3370 " the respective positions.",
3371 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3372 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3373 " and width of the umbrella histograms. That method yields similar error estimates ",
3374 " like method [TT]traj[tt].",
3376 "Bootstrapping output:",
3378 "* [TT]-bsres[tt] Average profile and standard deviations",
3379 "* [TT]-bsprof[tt] All bootstrapping profiles",
3381 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3382 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3383 "the histograms."
3386 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3387 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3388 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3389 static t_UmbrellaOptions opt;
3390 static int nthreads = -1;
3392 t_pargs pa[] = {
3393 #ifdef GMX_OPENMP
3394 { "-nt", FALSE, etINT, {&nthreads},
3395 "Number of threads used by gmx wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3396 #endif
3397 { "-min", FALSE, etREAL, {&opt.min},
3398 "Minimum coordinate in profile"},
3399 { "-max", FALSE, etREAL, {&opt.max},
3400 "Maximum coordinate in profile"},
3401 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3402 "Determine min and max automatically"},
3403 { "-bins", FALSE, etINT, {&opt.bins},
3404 "Number of bins in profile"},
3405 { "-temp", FALSE, etREAL, {&opt.Temperature},
3406 "Temperature"},
3407 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3408 "Tolerance"},
3409 { "-v", FALSE, etBOOL, {&opt.verbose},
3410 "Verbose mode"},
3411 { "-b", FALSE, etREAL, {&opt.tmin},
3412 "First time to analyse (ps)"},
3413 { "-e", FALSE, etREAL, {&opt.tmax},
3414 "Last time to analyse (ps)"},
3415 { "-dt", FALSE, etREAL, {&opt.dt},
3416 "Analyse only every dt ps"},
3417 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3418 "Write histograms and exit"},
3419 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3420 "Determine min and max and exit (with [TT]-auto[tt])"},
3421 { "-log", FALSE, etBOOL, {&opt.bLog},
3422 "Calculate the log of the profile before printing"},
3423 { "-unit", FALSE, etENUM, {en_unit},
3424 "Energy unit in case of log output" },
3425 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3426 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3427 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3428 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3429 { "-sym", FALSE, etBOOL, {&opt.bSym},
3430 "Symmetrize profile around z=0"},
3431 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3432 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3433 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3434 "Calculate integrated autocorrelation times and use in wham"},
3435 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3436 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3437 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3438 "When computing autocorrelation functions, restart computing every .. (ps)"},
3439 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3440 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3441 "during smoothing"},
3442 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3443 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3444 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3445 "Bootstrap method" },
3446 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3447 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3448 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3449 "Seed for bootstrapping. (-1 = use time)"},
3450 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3451 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3452 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3453 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3454 { "-stepout", FALSE, etINT, {&opt.stepchange},
3455 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3456 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3457 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3460 t_filenm fnm[] = {
3461 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3462 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3463 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3464 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3465 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3466 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3467 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3468 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3469 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3470 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3471 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3472 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3475 int i, j, l, nfiles, nwins, nfiles2;
3476 t_UmbrellaHeader header;
3477 t_UmbrellaWindow * window = NULL;
3478 double *profile, maxchange = 1e20;
3479 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3480 char **fninTpr, **fninPull, **fninPdo;
3481 const char *fnPull;
3482 FILE *histout, *profout;
3483 char ylabel[256], title[256];
3485 opt.bins = 200;
3486 opt.verbose = FALSE;
3487 opt.bHistOnly = FALSE;
3488 opt.bCycl = FALSE;
3489 opt.tmin = 50;
3490 opt.tmax = 1e20;
3491 opt.dt = 0.0;
3492 opt.min = 0;
3493 opt.max = 0;
3494 opt.bAuto = TRUE;
3495 opt.nGroupsel = 0;
3497 /* bootstrapping stuff */
3498 opt.nBootStrap = 0;
3499 opt.bsMethod = bsMethod_hist;
3500 opt.tauBootStrap = 0.0;
3501 opt.bsSeed = -1;
3502 opt.histBootStrapBlockLength = 8;
3503 opt.bs_verbose = FALSE;
3505 opt.bLog = TRUE;
3506 opt.unit = en_kJ;
3507 opt.zProf0 = 0.;
3508 opt.Temperature = 298;
3509 opt.Tolerance = 1e-6;
3510 opt.bBoundsOnly = FALSE;
3511 opt.bSym = FALSE;
3512 opt.bCalcTauInt = FALSE;
3513 opt.sigSmoothIact = 0.0;
3514 opt.bAllowReduceIact = TRUE;
3515 opt.bInitPotByIntegration = TRUE;
3516 opt.acTrestart = 1.0;
3517 opt.stepchange = 100;
3518 opt.stepUpdateContrib = 100;
3520 if (!parse_common_args(&argc, argv, 0,
3521 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3523 return 0;
3526 opt.unit = nenum(en_unit);
3527 opt.bsMethod = nenum(en_bsMethod);
3529 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3531 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3532 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3533 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3534 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3535 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3536 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3537 if (opt.bTab && opt.bPullf)
3539 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3540 "Provide pullx.xvg or pdo files!");
3543 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3545 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3547 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3549 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3550 "\n\n Check gmx wham -h !");
3553 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3554 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3555 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3556 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3557 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3559 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3560 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3561 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3562 if ( (bMinSet || bMaxSet) && bAutoSet)
3564 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3567 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3569 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3572 if (bMinSet && opt.bAuto)
3574 printf("Note: min and max given, switching off -auto.\n");
3575 opt.bAuto = FALSE;
3578 if (opt.bTauIntGiven && opt.bCalcTauInt)
3580 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3581 "the autocorrelation times. Not both.");
3584 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3586 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3587 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3589 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3591 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3592 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3595 /* Set # of OpenMP threads */
3596 if (nthreads > 0)
3598 gmx_omp_set_num_threads(nthreads);
3600 else
3602 nthreads = gmx_omp_get_max_threads();
3604 if (nthreads > 1)
3606 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3609 /* Reading gmx4 pull output and tpr files */
3610 if (opt.bTpr || opt.bPullf || opt.bPullx)
3612 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3614 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3615 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3616 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3617 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3618 if (nfiles != nfiles2)
3620 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3621 opt.fnTpr, nfiles2, fnPull);
3624 /* Read file that selects the pull group to be used */
3625 if (opt.fnGroupsel != NULL)
3627 readPullGroupSelection(&opt, fninTpr, nfiles);
3630 window = initUmbrellaWindows(nfiles);
3631 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3633 else
3634 { /* reading pdo files */
3635 if (opt.fnGroupsel != NULL)
3637 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3638 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3640 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3641 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3642 window = initUmbrellaWindows(nfiles);
3643 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3645 nwins = nfiles;
3647 /* enforce equal weight for all histograms? */
3648 if (opt.bHistEq)
3650 enforceEqualWeights(window, nwins);
3653 /* write histograms */
3654 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3655 xlabel, "count", opt.oenv);
3656 for (l = 0; l < opt.bins; ++l)
3658 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3659 for (i = 0; i < nwins; ++i)
3661 for (j = 0; j < window[i].nPull; ++j)
3663 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3666 fprintf(histout, "\n");
3668 xvgrclose(histout);
3669 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3670 if (opt.bHistOnly)
3672 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3673 return 0;
3676 /* Using tabulated umbrella potential */
3677 if (opt.bTab)
3679 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3682 /* Integrated autocorrelation times provided ? */
3683 if (opt.bTauIntGiven)
3685 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3688 /* Compute integrated autocorrelation times */
3689 if (opt.bCalcTauInt)
3691 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3694 /* calc average and sigma for each histogram
3695 (maybe required for bootstrapping. If not, this is fast anyhow) */
3696 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3698 averageSigma(window, nwins);
3701 /* Get initial potential by simple integration */
3702 if (opt.bInitPotByIntegration)
3704 guessPotByIntegration(window, nwins, &opt);
3707 /* Check if complete reaction coordinate is covered */
3708 checkReactionCoordinateCovered(window, nwins, &opt);
3710 /* Calculate profile */
3711 snew(profile, opt.bins);
3712 if (opt.verbose)
3714 opt.stepchange = 1;
3716 i = 0;
3719 if ( (i%opt.stepUpdateContrib) == 0)
3721 setup_acc_wham(profile, window, nwins, &opt);
3723 if (maxchange < opt.Tolerance)
3725 bExact = TRUE;
3726 /* if (opt.verbose) */
3727 printf("Switched to exact iteration in iteration %d\n", i);
3729 calc_profile(profile, window, nwins, &opt, bExact);
3730 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3732 printf("\t%4d) Maximum change %e\n", i, maxchange);
3734 i++;
3736 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3737 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3739 /* calc error from Kumar's formula */
3740 /* Unclear how the error propagates along reaction coordinate, therefore
3741 commented out */
3742 /* calc_error_kumar(profile,window, nwins,&opt); */
3744 /* Write profile in energy units? */
3745 if (opt.bLog)
3747 prof_normalization_and_unit(profile, &opt);
3748 std::strcpy(ylabel, en_unit_label[opt.unit]);
3749 std::strcpy(title, "Umbrella potential");
3751 else
3753 std::strcpy(ylabel, "Density of states");
3754 std::strcpy(title, "Density of states");
3757 /* symmetrize profile around z=0? */
3758 if (opt.bSym)
3760 symmetrizeProfile(profile, &opt);
3763 /* write profile or density of states */
3764 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3765 for (i = 0; i < opt.bins; ++i)
3767 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3769 xvgrclose(profout);
3770 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3772 /* Bootstrap Method */
3773 if (opt.nBootStrap)
3775 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3776 opt2fn("-hist", NFILE, fnm),
3777 ylabel, profile, window, nwins, &opt);
3780 sfree(profile);
3781 freeUmbrellaWindows(window, nfiles);
3783 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3784 please_cite(stdout, "Hub2010");
3786 return 0;