Added pull coordinate geometry angle-axis
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blob15a8f4c034bdb535b93a0b4f57f75b45174b2d08
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \internal \file
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
43 #include "gmxpre.h"
45 #include "config.h"
47 #include <cctype>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
53 #include <algorithm>
54 #include <sstream>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/tpxio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/gmxana/gmx_ana.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/pull-params.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/random/tabulatednormaldistribution.h"
68 #include "gromacs/random/threefry.h"
69 #include "gromacs/random/uniformintdistribution.h"
70 #include "gromacs/random/uniformrealdistribution.h"
71 #include "gromacs/utility/arraysize.h"
72 #include "gromacs/utility/cstringutil.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/futil.h"
76 #include "gromacs/utility/gmxomp.h"
77 #include "gromacs/utility/pleasecite.h"
78 #include "gromacs/utility/smalloc.h"
80 //! longest file names allowed in input files
81 #define WHAM_MAXFILELEN 2048
83 /*! \brief
84 * enum for energy units
86 enum {
87 enSel, en_kJ, en_kCal, en_kT, enNr
89 /*! \brief
90 * enum for type of input files (pdos, tpr, or pullf)
92 enum {
93 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
96 /*! \brief enum for bootstrapping method
98 * These bootstrap methods are supported:
99 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
100 * (bsMethod_BayesianHist)
101 * - bootstrap complete histograms (bsMethod_hist)
102 * - bootstrap trajectories from given umbrella histograms. This generates new
103 * "synthetic" histograms (bsMethod_traj)
104 * - bootstrap trajectories from Gaussian with mu/sigma computed from
105 * the respective histogram (bsMethod_trajGauss). This gives very similar
106 * results compared to bsMethod_traj.
108 * ********************************************************************
109 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
110 * JS Hub, BL de Groot, D van der Spoel
111 * g_wham - A free weighted histogram analysis implementation including
112 * robust error and autocorrelation estimates,
113 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
114 * ********************************************************************
116 enum {
117 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
118 bsMethod_traj, bsMethod_trajGauss
122 //! Parameters of the umbrella potentials
123 typedef struct
126 * \name Using umbrella pull code since gromacs 4.x
128 /*!\{*/
129 int npullcrds_tot; //!< nr of pull coordinates in tpr file
130 int npullcrds; //!< nr of umbrella pull coordinates for reading
131 int pull_geometry; //!< such as distance, direction
132 ivec pull_dim; //!< pull dimension with geometry distance
133 int pull_ndim; //!< nr of pull_dim != 0
134 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
135 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
136 char coord_units[256]; //!< Units common for all coordinates
137 real *k; //!< force constants in tpr file
138 real *init_dist; //!< reference displacements
139 real *umbInitDist; //!< reference displacement in umbrella direction
140 /*!\}*/
142 * \name Using PDO files common until gromacs 3.x
144 /*!\{*/
145 int nSkip;
146 char Reference[256];
147 int nPull;
148 int nDim;
149 ivec Dims;
150 char PullName[4][256];
151 double UmbPos[4][3];
152 double UmbCons[4][3];
153 /*!\}*/
154 } t_UmbrellaHeader;
156 //! Data in the umbrella histograms
157 typedef struct
159 int nPull; //!< nr of pull groups in this pdo or pullf/x file
160 double **Histo; //!< nPull histograms
161 double **cum; //!< nPull cumulative distribution functions
162 int nBin; //!< nr of bins. identical to opt->bins
163 double *k; //!< force constants for the nPull groups
164 double *pos; //!< umbrella positions for the nPull groups
165 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
166 int *N; //!< nr of data points in nPull histograms.
167 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
169 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
171 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
172 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
174 double *g;
175 double *tau; //!< intetrated autocorrelation time (IACT)
176 double *tausmooth; //!< smoothed IACT
178 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
180 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
181 gmx_bool **bContrib;
182 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
184 /*! \brief average force estimated from average displacement, fAv=dzAv*k
186 * Used for integration to guess the potential.
188 real *forceAv;
189 real *aver; //!< average of histograms
190 real *sigma; //!< stddev of histograms
191 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
192 } t_UmbrellaWindow;
194 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
195 typedef struct
197 int n; //!< total nr of pull groups in this tpr file
198 int nUse; //!< nr of pull groups used
199 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
200 } t_groupselection;
202 //! Parameters of WHAM
203 typedef struct
206 * \name Input stuff
208 /*!\{*/
209 const char *fnTpr, *fnPullf, *fnGroupsel;
210 const char *fnPdo, *fnPullx; //!< file names of input
211 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
212 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
214 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
215 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
216 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
217 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
218 /*!\}*/
220 * \name Basic WHAM options
222 /*!\{*/
223 int bins; //!< nr of bins, min, max, and dz of profile
224 real min, max, dz;
225 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
226 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
227 /*!\}*/
229 * \name Output control
231 /*!\{*/
232 gmx_bool bLog; //!< energy output (instead of probability) for profile
233 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
234 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
235 /*! \brief after wham, set prof to zero at this z-position.
236 * When bootstrapping, set zProf0 to a "stable" reference position.
238 real zProf0;
239 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
241 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
242 gmx_bool bAuto; //!< determine min and max automatically but do not exit
244 gmx_bool verbose; //!< more noisy wham mode
245 int stepchange; //!< print maximum change in prof after how many interations
246 gmx_output_env_t *oenv; //!< xvgr options
247 /*!\}*/
249 * \name Autocorrelation stuff
251 /*!\{*/
252 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
253 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
254 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
255 real acTrestart; //!< when computing ACT, time between restarting points
257 /* \brief Enforce the same weight for each umbella window, that is
258 * calculate with the same number of data points for
259 * each window. That can be reasonable, if the histograms
260 * have different length, but due to autocorrelation,
261 * a longer simulation should not have larger weightin wham.
263 gmx_bool bHistEq;
264 /*!\}*/
267 * \name Bootstrapping stuff
269 /*!\{*/
270 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
272 /* \brief bootstrap method
274 * if == bsMethod_hist, consider complete histograms as independent
275 * data points and, hence, only mix complete histograms.
276 * if == bsMethod_BayesianHist, consider complete histograms
277 * as independent data points, but assign random weights
278 * to the histograms during the bootstrapping ("Bayesian bootstrap")
279 * In case of long correlations (e.g., inside a channel), these
280 * will yield a more realistic error.
281 * if == bsMethod_traj(Gauss), generate synthetic histograms
282 * for each given
283 * histogram by generating an autocorrelated random sequence
284 * that is distributed according to the respective given
285 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
286 * (instead of from the umbrella histogram) to generate a new
287 * histogram.
289 int bsMethod;
291 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
292 real tauBootStrap;
294 /* \brief when mixing histograms, mix only histograms withing blocks
295 long the reaction coordinate xi. Avoids gaps along xi. */
296 int histBootStrapBlockLength;
298 int bsSeed; //!< random seed for bootstrapping
300 /* \brief Write cumulative distribution functions (CDFs) of histograms
301 and write the generated histograms for each bootstrap */
302 gmx_bool bs_verbose;
303 /*!\}*/
305 * \name tabulated umbrella potential stuff
307 /*!\{*/
308 gmx_bool bTab;
309 double *tabX, *tabY, tabMin, tabMax, tabDz;
310 int tabNbins;
311 /*!\}*/
312 gmx::DefaultRandomEngine rng; //!< gromacs random number generator
313 gmx::TabulatedNormalDistribution<> normalDistribution; //!< Uses default: real output, 14-bit table
314 } t_UmbrellaOptions;
316 //! Make an umbrella window (may contain several histograms)
317 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
319 t_UmbrellaWindow *win;
320 int i;
321 snew(win, nwin);
322 for (i = 0; i < nwin; i++)
324 win[i].Histo = win[i].cum = 0;
325 win[i].k = win[i].pos = win[i].z = 0;
326 win[i].N = win[i].Ntot = 0;
327 win[i].g = win[i].tau = win[i].tausmooth = 0;
328 win[i].bContrib = 0;
329 win[i].ztime = 0;
330 win[i].forceAv = 0;
331 win[i].aver = win[i].sigma = 0;
332 win[i].bsWeight = 0;
334 return win;
337 //! Delete an umbrella window (may contain several histograms)
338 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
340 int i, j;
341 for (i = 0; i < nwin; i++)
343 if (win[i].Histo)
345 for (j = 0; j < win[i].nPull; j++)
347 sfree(win[i].Histo[j]);
350 if (win[i].cum)
352 for (j = 0; j < win[i].nPull; j++)
354 sfree(win[i].cum[j]);
357 if (win[i].bContrib)
359 for (j = 0; j < win[i].nPull; j++)
361 sfree(win[i].bContrib[j]);
364 sfree(win[i].Histo);
365 sfree(win[i].cum);
366 sfree(win[i].k);
367 sfree(win[i].pos);
368 sfree(win[i].z);
369 sfree(win[i].N);
370 sfree(win[i].Ntot);
371 sfree(win[i].g);
372 sfree(win[i].tau);
373 sfree(win[i].tausmooth);
374 sfree(win[i].bContrib);
375 sfree(win[i].ztime);
376 sfree(win[i].forceAv);
377 sfree(win[i].aver);
378 sfree(win[i].sigma);
379 sfree(win[i].bsWeight);
381 sfree(win);
384 /*! \brief
385 * Read and setup tabulated umbrella potential
387 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
389 int i, ny, nl;
390 double **y;
392 printf("Setting up tabulated potential from file %s\n", fn);
393 nl = read_xvg(fn, &y, &ny);
394 opt->tabNbins = nl;
395 if (ny != 2)
397 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
399 opt->tabMin = y[0][0];
400 opt->tabMax = y[0][nl-1];
401 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
402 if (opt->tabDz <= 0)
404 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
405 "ascending z-direction", fn);
407 for (i = 0; i < nl-1; i++)
409 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
411 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
414 snew(opt->tabY, nl);
415 snew(opt->tabX, nl);
416 for (i = 0; i < nl; i++)
418 opt->tabX[i] = y[0][i];
419 opt->tabY[i] = y[1][i];
421 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
422 opt->tabMin, opt->tabMax, opt->tabDz);
425 //! Read the header of an PDO file (position, force const, nr of groups)
426 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
428 char line[2048];
429 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
430 int i;
431 std::istringstream ist;
433 /* line 1 */
434 if (fgets(line, 2048, file) == NULL)
436 gmx_fatal(FARGS, "Error reading header from pdo file\n");
438 ist.str(line);
439 ist >> Buffer0 >> Buffer1 >> Buffer2;
440 if (std::strcmp(Buffer1, "UMBRELLA"))
442 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
443 "(Found in first line: `%s')\n",
444 Buffer1, "UMBRELLA", line);
446 if (std::strcmp(Buffer2, "3.0"))
448 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
451 /* line 2 */
452 if (fgets(line, 2048, file) == NULL)
454 gmx_fatal(FARGS, "Error reading header from pdo file\n");
456 ist.str(line);
457 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
458 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
460 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
461 if (header->nDim != 1)
463 gmx_fatal(FARGS, "Currently only supports one dimension");
466 /* line3 */
467 if (fgets(line, 2048, file) == NULL)
469 gmx_fatal(FARGS, "Error reading header from pdo file\n");
471 ist.str(line);
472 ist >> Buffer0 >> Buffer1 >> header->nSkip;
474 /* line 4 */
475 if (fgets(line, 2048, file) == NULL)
477 gmx_fatal(FARGS, "Error reading header from pdo file\n");
479 ist.str(line);
480 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
482 /* line 5 */
483 if (fgets(line, 2048, file) == NULL)
485 gmx_fatal(FARGS, "Error reading header from pdo file\n");
487 ist.str(line);
488 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
490 if (opt->verbose)
492 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
493 header->Reference);
496 for (i = 0; i < header->nPull; ++i)
498 if (fgets(line, 2048, file) == NULL)
500 gmx_fatal(FARGS, "Error reading header from pdo file\n");
502 ist.str(line);
503 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
504 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
505 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
507 if (opt->verbose)
509 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
510 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
514 if (fgets(line, 2048, file) == NULL)
516 gmx_fatal(FARGS, "Cannot read from file\n");
518 ist.str(line);
519 ist >> Buffer3;
520 if (std::strcmp(Buffer3, "#####") != 0)
522 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
526 //! smarter fgets
527 static char *fgets3(FILE *fp, char ptr[], int *len)
529 char *p;
530 int slen;
532 if (fgets(ptr, *len-1, fp) == NULL)
534 return NULL;
536 p = ptr;
537 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
539 /* This line is longer than len characters, let's increase len! */
540 *len += STRLEN;
541 p += STRLEN;
542 srenew(ptr, *len);
543 if (fgets(p-1, STRLEN, fp) == NULL)
545 break;
548 slen = std::strlen(ptr);
549 if (ptr[slen-1] == '\n')
551 ptr[slen-1] = '\0';
554 return ptr;
557 /*! \brief Read the data columns of and PDO file.
559 * TO DO: Get rid of the scanf function to avoid the clang warning.
560 * At the moment, this warning is avoided by hiding the format string
561 * the variable fmtlf.
563 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
564 int fileno, t_UmbrellaWindow * win,
565 t_UmbrellaOptions *opt,
566 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
568 int i, inttemp, bins, count, ntot;
569 real minval, maxval, minfound = 1e20, maxfound = -1e20;
570 double temp, time, time0 = 0, dt;
571 char *ptr = 0;
572 t_UmbrellaWindow * window = 0;
573 gmx_bool timeok, dt_ok = 1;
574 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
575 int len = STRLEN, dstep = 1;
576 const int blocklen = 4096;
577 int *lennow = 0;
579 if (!bGetMinMax)
581 bins = opt->bins;
582 minval = opt->min;
583 maxval = opt->max;
585 window = win+fileno;
586 /* Need to alocate memory and set up structure */
587 window->nPull = header->nPull;
588 window->nBin = bins;
590 snew(window->Histo, window->nPull);
591 snew(window->z, window->nPull);
592 snew(window->k, window->nPull);
593 snew(window->pos, window->nPull);
594 snew(window->N, window->nPull);
595 snew(window->Ntot, window->nPull);
596 snew(window->g, window->nPull);
597 snew(window->bsWeight, window->nPull);
599 window->bContrib = 0;
601 if (opt->bCalcTauInt)
603 snew(window->ztime, window->nPull);
605 else
607 window->ztime = 0;
609 snew(lennow, window->nPull);
611 for (i = 0; i < window->nPull; ++i)
613 window->z[i] = 1;
614 window->bsWeight[i] = 1.;
615 snew(window->Histo[i], bins);
616 window->k[i] = header->UmbCons[i][0];
617 window->pos[i] = header->UmbPos[i][0];
618 window->N[i] = 0;
619 window->Ntot[i] = 0;
620 window->g[i] = 1.;
621 if (opt->bCalcTauInt)
623 window->ztime[i] = 0;
627 /* Done with setup */
629 else
631 minfound = 1e20;
632 maxfound = -1e20;
633 minval = maxval = bins = 0; /* Get rid of warnings */
636 count = 0;
637 snew(tmpbuf, len);
638 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
640 trim(ptr);
642 if (ptr[0] == '#' || std::strlen(ptr) < 2)
644 continue;
647 /* Initiate format string */
648 fmtign[0] = '\0';
649 std::strcat(fmtign, "%*s");
651 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
652 /* Round time to fs */
653 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
655 /* get time step of pdo file */
656 if (count == 0)
658 time0 = time;
660 else if (count == 1)
662 dt = time-time0;
663 if (opt->dt > 0.0)
665 dstep = static_cast<int>(opt->dt/dt+0.5);
666 if (dstep == 0)
668 dstep = 1;
671 if (!bGetMinMax)
673 window->dt = dt*dstep;
676 count++;
678 dt_ok = ((count-1)%dstep == 0);
679 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
680 /* if (opt->verbose)
681 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
682 time,opt->tmin, opt->tmax, dt_ok,timeok); */
684 if (timeok)
686 for (i = 0; i < header->nPull; ++i)
688 std::strcpy(fmt, fmtign);
689 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
690 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
691 if (sscanf(ptr, fmt, &temp))
693 temp += header->UmbPos[i][0];
694 if (bGetMinMax)
696 if (temp < minfound)
698 minfound = temp;
700 if (temp > maxfound)
702 maxfound = temp;
705 else
707 if (opt->bCalcTauInt)
709 /* save time series for autocorrelation analysis */
710 ntot = window->Ntot[i];
711 if (ntot >= lennow[i])
713 lennow[i] += blocklen;
714 srenew(window->ztime[i], lennow[i]);
716 window->ztime[i][ntot] = temp;
719 temp -= minval;
720 temp /= (maxval-minval);
721 temp *= bins;
722 temp = std::floor(temp);
724 inttemp = static_cast<int> (temp);
725 if (opt->bCycl)
727 if (inttemp < 0)
729 inttemp += bins;
731 else if (inttemp >= bins)
733 inttemp -= bins;
737 if (inttemp >= 0 && inttemp < bins)
739 window->Histo[i][inttemp] += 1.;
740 window->N[i]++;
742 window->Ntot[i]++;
747 if (time > opt->tmax)
749 if (opt->verbose)
751 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
753 break;
757 if (bGetMinMax)
759 *mintmp = minfound;
760 *maxtmp = maxfound;
763 sfree(lennow);
764 sfree(tmpbuf);
767 /*! \brief Set identical weights for all histograms
769 * Normally, the weight is given by the number data points in each
770 * histogram, together with the autocorrelation time. This can be overwritten
771 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
772 * an appropriate autocorrelation times instead of using this function.
774 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
776 int i, k, j, NEnforced;
777 double ratio;
779 NEnforced = window[0].Ntot[0];
780 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
781 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
782 /* enforce all histograms to have the same weight as the very first histogram */
784 for (j = 0; j < nWindows; ++j)
786 for (k = 0; k < window[j].nPull; ++k)
788 ratio = 1.0*NEnforced/window[j].Ntot[k];
789 for (i = 0; i < window[0].nBin; ++i)
791 window[j].Histo[k][i] *= ratio;
793 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
798 /*! \brief Simple linear interpolation between two given tabulated points
800 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
802 int jl, ju;
803 double pl, pu, dz, dp;
805 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
806 ju = jl+1;
807 if (jl < 0 || ju >= opt->tabNbins)
809 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
810 "Provide an extended table.", dist, jl, ju);
812 pl = opt->tabY[jl];
813 pu = opt->tabY[ju];
814 dz = dist-opt->tabX[jl];
815 dp = (pu-pl)*dz/opt->tabDz;
816 return pl+dp;
820 /*! \brief
821 * Check which bins substiantially contribute (accelerates WHAM)
823 * Don't worry, that routine does not mean we compute the PMF in limited precision.
824 * After rapid convergence (using only substiantal contributions), we always switch to
825 * full precision.
827 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
828 t_UmbrellaOptions *opt)
830 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
831 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
832 gmx_bool bAnyContrib;
833 static int bFirst = 1;
834 static double wham_contrib_lim;
836 if (bFirst)
838 for (i = 0; i < nWindows; ++i)
840 nGrptot += window[i].nPull;
842 wham_contrib_lim = opt->Tolerance/nGrptot;
845 ztot = opt->max-opt->min;
846 ztot_half = ztot/2;
848 for (i = 0; i < nWindows; ++i)
850 if (!window[i].bContrib)
852 snew(window[i].bContrib, window[i].nPull);
854 for (j = 0; j < window[i].nPull; ++j)
856 if (!window[i].bContrib[j])
858 snew(window[i].bContrib[j], opt->bins);
860 bAnyContrib = FALSE;
861 for (k = 0; k < opt->bins; ++k)
863 temp = (1.0*k+0.5)*dz+min;
864 distance = temp - window[i].pos[j]; /* distance to umbrella center */
865 if (opt->bCycl)
866 { /* in cyclic wham: */
867 if (distance > ztot_half) /* |distance| < ztot_half */
869 distance -= ztot;
871 else if (distance < -ztot_half)
873 distance += ztot;
876 /* Note: there are two contributions to bin k in the wham equations:
877 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
878 ii) exp(- U/(BOLTZ*opt->Temperature))
879 where U is the umbrella potential
880 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
883 if (!opt->bTab)
885 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
887 else
889 U = tabulated_pot(distance, opt); /* Use tabulated potential */
891 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
892 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
893 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
894 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
895 if (window[i].bContrib[j][k])
897 nContrib++;
899 nTot++;
901 /* If this histo is far outside min and max all bContrib may be FALSE,
902 causing a floating point exception later on. To avoid that, switch
903 them all to true.*/
904 if (!bAnyContrib)
906 for (k = 0; k < opt->bins; ++k)
908 window[i].bContrib[j][k] = TRUE;
913 if (bFirst)
915 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
916 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
919 if (opt->verbose)
921 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
922 nContrib, nTot);
924 bFirst = 0;
927 //! Compute the PMF (one of the two main WHAM routines)
928 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
929 t_UmbrellaOptions *opt, gmx_bool bExact)
931 double ztot_half, ztot, min = opt->min, dz = opt->dz;
933 ztot = opt->max-opt->min;
934 ztot_half = ztot/2;
936 #pragma omp parallel
940 int nthreads = gmx_omp_get_max_threads();
941 int thread_id = gmx_omp_get_thread_num();
942 int i;
943 int i0 = thread_id*opt->bins/nthreads;
944 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
946 for (i = i0; i < i1; ++i)
948 int j, k;
949 double num, denom, invg, temp = 0, distance, U = 0;
950 num = denom = 0.;
951 for (j = 0; j < nWindows; ++j)
953 for (k = 0; k < window[j].nPull; ++k)
955 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
956 temp = (1.0*i+0.5)*dz+min;
957 num += invg*window[j].Histo[k][i];
959 if (!(bExact || window[j].bContrib[k][i]))
961 continue;
963 distance = temp - window[j].pos[k]; /* distance to umbrella center */
964 if (opt->bCycl)
965 { /* in cyclic wham: */
966 if (distance > ztot_half) /* |distance| < ztot_half */
968 distance -= ztot;
970 else if (distance < -ztot_half)
972 distance += ztot;
976 if (!opt->bTab)
978 U = 0.5*window[j].k[k]*gmx::square(distance); /* harmonic potential assumed. */
980 else
982 U = tabulated_pot(distance, opt); /* Use tabulated potential */
984 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
987 profile[i] = num/denom;
990 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
994 //! Compute the free energy offsets z (one of the two main WHAM routines)
995 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
996 t_UmbrellaOptions *opt, gmx_bool bExact)
998 double min = opt->min, dz = opt->dz, ztot_half, ztot;
999 double maxglob = -1e20;
1001 ztot = opt->max-opt->min;
1002 ztot_half = ztot/2;
1004 #pragma omp parallel
1008 int nthreads = gmx_omp_get_max_threads();
1009 int thread_id = gmx_omp_get_thread_num();
1010 int i;
1011 int i0 = thread_id*nWindows/nthreads;
1012 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1013 double maxloc = -1e20;
1015 for (i = i0; i < i1; ++i)
1017 double total = 0, temp, distance, U = 0;
1018 int j, k;
1020 for (j = 0; j < window[i].nPull; ++j)
1022 total = 0;
1023 for (k = 0; k < window[i].nBin; ++k)
1025 if (!(bExact || window[i].bContrib[j][k]))
1027 continue;
1029 temp = (1.0*k+0.5)*dz+min;
1030 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1031 if (opt->bCycl)
1032 { /* in cyclic wham: */
1033 if (distance > ztot_half) /* |distance| < ztot_half */
1035 distance -= ztot;
1037 else if (distance < -ztot_half)
1039 distance += ztot;
1043 if (!opt->bTab)
1045 U = 0.5*window[i].k[j]*gmx::square(distance); /* harmonic potential assumed. */
1047 else
1049 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1051 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1053 /* Avoid floating point exception if window is far outside min and max */
1054 if (total != 0.0)
1056 total = -std::log(total);
1058 else
1060 total = 1000.0;
1062 temp = std::abs(total - window[i].z[j]);
1063 if (temp > maxloc)
1065 maxloc = temp;
1067 window[i].z[j] = total;
1070 /* Now get maximum maxloc from the threads and put in maxglob */
1071 if (maxloc > maxglob)
1073 #pragma omp critical
1075 if (maxloc > maxglob)
1077 maxglob = maxloc;
1082 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1085 return maxglob;
1088 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1089 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1091 int i, j, bins = opt->bins;
1092 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1093 double z, z1;
1095 if (min > 0. || max < 0.)
1097 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1098 opt->min, opt->max);
1101 snew(prof2, bins);
1103 for (i = 0; i < bins; i++)
1105 z = min+(i+0.5)*dz;
1106 zsym = -z;
1107 /* bin left of zsym */
1108 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1109 if (j >= 0 && (j+1) < bins)
1111 /* interpolate profile linearly between bins j and j+1 */
1112 z1 = min+(j+0.5)*dz;
1113 deltaz = zsym-z1;
1114 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1115 /* average between left and right */
1116 prof2[i] = 0.5*(profsym+profile[i]);
1118 else
1120 prof2[i] = profile[i];
1124 std::memcpy(profile, prof2, bins*sizeof(double));
1125 sfree(prof2);
1128 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1129 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1131 int i, bins, imin;
1132 double unit_factor = 1., diff;
1134 bins = opt->bins;
1136 /* Not log? Nothing to do! */
1137 if (!opt->bLog)
1139 return;
1142 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1143 if (opt->unit == en_kT)
1145 unit_factor = 1.0;
1147 else if (opt->unit == en_kJ)
1149 unit_factor = BOLTZ*opt->Temperature;
1151 else if (opt->unit == en_kCal)
1153 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1155 else
1157 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1160 for (i = 0; i < bins; i++)
1162 if (profile[i] > 0.0)
1164 profile[i] = -std::log(profile[i])*unit_factor;
1168 /* shift to zero at z=opt->zProf0 */
1169 if (!opt->bProf0Set)
1171 diff = profile[0];
1173 else
1175 /* Get bin with shortest distance to opt->zProf0
1176 (-0.5 from bin position and +0.5 from rounding cancel) */
1177 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1178 if (imin < 0)
1180 imin = 0;
1182 else if (imin >= bins)
1184 imin = bins-1;
1186 diff = profile[imin];
1189 /* Shift to zero */
1190 for (i = 0; i < bins; i++)
1192 profile[i] -= diff;
1196 //! Make an array of random integers (used for bootstrapping)
1197 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx::DefaultRandomEngine * rng)
1199 gmx::UniformIntDistribution<int> dist(0, blockLength-1);
1201 int ipull, blockBase, nr, ipullRandom;
1203 if (blockLength == 0)
1205 blockLength = nPull;
1208 for (ipull = 0; ipull < nPull; ipull++)
1210 blockBase = (ipull/blockLength)*blockLength;
1212 { /* make sure nothing bad happens in the last block */
1213 nr = dist(*rng); // [0,blockLength-1]
1214 ipullRandom = blockBase + nr;
1216 while (ipullRandom >= nPull);
1217 if (ipullRandom < 0 || ipullRandom >= nPull)
1219 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1220 "blockLength = %d, blockBase = %d\n",
1221 ipullRandom, nPull, nr, blockLength, blockBase);
1223 randomArray[ipull] = ipullRandom;
1225 /*for (ipull=0; ipull<nPull; ipull++)
1226 printf("%d ",randomArray[ipull]); printf("\n"); */
1229 /*! \brief Set pull group information of a synthetic histogram
1231 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1232 * but it is not required if we bootstrap complete histograms.
1234 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1235 t_UmbrellaWindow *thisWindow, int pullid)
1237 synthWindow->N [0] = thisWindow->N [pullid];
1238 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1239 synthWindow->pos [0] = thisWindow->pos [pullid];
1240 synthWindow->z [0] = thisWindow->z [pullid];
1241 synthWindow->k [0] = thisWindow->k [pullid];
1242 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1243 synthWindow->g [0] = thisWindow->g [pullid];
1244 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1247 /*! \brief Calculate cumulative distribution function of of all histograms.
1249 * This allow to create random number sequences
1250 * which are distributed according to the histograms. Required to generate
1251 * the "synthetic" histograms for the Bootstrap method
1253 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1254 t_UmbrellaOptions *opt, const char *fnhist, const char *xlabel)
1256 int i, j, k, nbin;
1257 double last;
1258 char *fn = 0, *buf = 0;
1259 FILE *fp = 0;
1261 if (opt->bs_verbose)
1263 snew(fn, std::strlen(fnhist)+10);
1264 snew(buf, std::strlen(fnhist)+10);
1265 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1266 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1269 nbin = opt->bins;
1270 for (i = 0; i < nWindows; i++)
1272 snew(window[i].cum, window[i].nPull);
1273 for (j = 0; j < window[i].nPull; j++)
1275 snew(window[i].cum[j], nbin+1);
1276 window[i].cum[j][0] = 0.;
1277 for (k = 1; k <= nbin; k++)
1279 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1282 /* normalize CDFs. Ensure cum[nbin]==1 */
1283 last = window[i].cum[j][nbin];
1284 for (k = 0; k <= nbin; k++)
1286 window[i].cum[j][k] /= last;
1291 printf("Cumulative distribution functions of all histograms created.\n");
1292 if (opt->bs_verbose)
1294 for (k = 0; k <= nbin; k++)
1296 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1297 for (i = 0; i < nWindows; i++)
1299 for (j = 0; j < window[i].nPull; j++)
1301 fprintf(fp, "%g\t", window[i].cum[j][k]);
1304 fprintf(fp, "\n");
1306 printf("Wrote cumulative distribution functions to %s\n", fn);
1307 xvgrclose(fp);
1308 sfree(fn);
1309 sfree(buf);
1314 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1316 * This is used to generate a random sequence distributed according to a histogram
1318 void searchCumulative(double xx[], int n, double x, int *j)
1320 int ju, jm, jl;
1322 jl = -1;
1323 ju = n;
1324 while (ju-jl > 1)
1326 jm = (ju+jl) >> 1;
1327 if (x >= xx[jm])
1329 jl = jm;
1331 else
1333 ju = jm;
1336 if (x == xx[0])
1338 *j = 0;
1340 else if (x == xx[n-1])
1342 *j = n-2;
1344 else
1346 *j = jl;
1350 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1351 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1352 int pullid, t_UmbrellaOptions *opt)
1354 int N, i, nbins, r_index, ibin;
1355 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1356 char errstr[1024];
1358 N = thisWindow->N[pullid];
1359 dt = thisWindow->dt;
1360 nbins = thisWindow->nBin;
1362 /* tau = autocorrelation time */
1363 if (opt->tauBootStrap > 0.0)
1365 tausteps = opt->tauBootStrap/dt;
1367 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1369 /* calc tausteps from g=1+2tausteps */
1370 g = thisWindow->g[pullid];
1371 tausteps = (g-1)/2;
1373 else
1375 sprintf(errstr,
1376 "When generating hypothetical trajectories from given umbrella histograms,\n"
1377 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1378 "cannot be predicted. You have 3 options:\n"
1379 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1380 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1381 std::strcat(errstr,
1382 " with option -iiact for all umbrella windows.\n"
1383 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1384 " Use option (3) only if you are sure what you're doing, you may severely\n"
1385 " underestimate the error if a too small ACT is given.\n");
1386 gmx_fatal(FARGS, errstr);
1389 synthWindow->N [0] = N;
1390 synthWindow->pos [0] = thisWindow->pos[pullid];
1391 synthWindow->z [0] = thisWindow->z[pullid];
1392 synthWindow->k [0] = thisWindow->k[pullid];
1393 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1394 synthWindow->g [0] = thisWindow->g [pullid];
1395 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1397 for (i = 0; i < nbins; i++)
1399 synthWindow->Histo[0][i] = 0.;
1402 if (opt->bsMethod == bsMethod_trajGauss)
1404 sig = thisWindow->sigma [pullid];
1405 mu = thisWindow->aver [pullid];
1408 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1409 Use the following:
1410 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1411 then
1412 z = a*x + sqrt(1-a^2)*y
1413 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1414 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1415 function
1416 C(t) = exp(-t/tau) with tau=-1/ln(a)
1418 Then, use error function to turn the Gaussian random variable into a uniformly
1419 distributed one in [0,1]. Eventually, use cumulative distribution function of
1420 histogram to get random variables distributed according to histogram.
1421 Note: The ACT of the flat distribution and of the generated histogram is not
1422 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1424 a = std::exp(-1.0/tausteps);
1425 ap = std::sqrt(1.0-a*a);
1426 invsqrt2 = 1.0/std::sqrt(2.0);
1428 /* init random sequence */
1429 x = opt->normalDistribution(opt->rng);
1431 if (opt->bsMethod == bsMethod_traj)
1433 /* bootstrap points from the umbrella histograms */
1434 for (i = 0; i < N; i++)
1436 y = opt->normalDistribution(opt->rng);
1437 x = a*x+ap*y;
1438 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1439 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1441 r = 0.5*(1+std::erf(x*invsqrt2));
1442 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1443 synthWindow->Histo[0][r_index] += 1.;
1446 else if (opt->bsMethod == bsMethod_trajGauss)
1448 /* bootstrap points from a Gaussian with the same average and sigma
1449 as the respective umbrella histogram. The idea was, that -given
1450 limited sampling- the bootstrapped histograms are otherwise biased
1451 from the limited sampling of the US histos. However, bootstrapping from
1452 the Gaussian seems to yield a similar estimate. */
1453 i = 0;
1454 while (i < N)
1456 y = opt->normalDistribution(opt->rng);
1457 x = a*x+ap*y;
1458 z = x*sig+mu;
1459 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1460 if (opt->bCycl)
1462 if (ibin < 0)
1464 while ( (ibin += nbins) < 0)
1469 else if (ibin >= nbins)
1471 while ( (ibin -= nbins) >= nbins)
1478 if (ibin >= 0 && ibin < nbins)
1480 synthWindow->Histo[0][ibin] += 1.;
1481 i++;
1485 else
1487 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1491 /*! \brief Write all histograms to a file
1493 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1494 * sets of bootstrapped histograms.
1496 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1497 int bs_index, t_UmbrellaOptions *opt, const char *xlabel)
1499 char *fn = 0, *buf = 0, title[256];
1500 FILE *fp;
1501 int bins, l, i, j;
1503 if (bs_index >= 0)
1505 snew(fn, std::strlen(fnhist)+10);
1506 snew(buf, std::strlen(fnhist)+1);
1507 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1508 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1510 else
1512 fn = gmx_strdup(fnhist);
1513 std::strcpy(title, "Umbrella histograms");
1516 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1517 bins = opt->bins;
1519 /* Write histograms */
1520 for (l = 0; l < bins; ++l)
1522 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1523 for (i = 0; i < nWindows; ++i)
1525 for (j = 0; j < window[i].nPull; ++j)
1527 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1530 fprintf(fp, "\n");
1533 xvgrclose(fp);
1534 printf("Wrote %s\n", fn);
1535 if (bs_index >= 0)
1537 sfree(buf);
1539 sfree(fn);
1542 //! Used for qsort to sort random numbers
1543 int func_wham_is_larger(const void *a, const void *b)
1545 double *aa, *bb;
1546 aa = (double*)a;
1547 bb = (double*)b;
1548 if (*aa < *bb)
1550 return -1;
1552 else if (*aa > *bb)
1554 return 1;
1556 else
1558 return 0;
1562 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1563 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1565 int i;
1566 double *r;
1567 gmx::UniformRealDistribution<real> dist(0, nAllPull);
1569 snew(r, nAllPull);
1571 /* generate ordered random numbers between 0 and nAllPull */
1572 for (i = 0; i < nAllPull-1; i++)
1574 r[i] = dist(opt->rng);
1576 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1577 r[nAllPull-1] = 1.0*nAllPull;
1579 synthwin[0].bsWeight[0] = r[0];
1580 for (i = 1; i < nAllPull; i++)
1582 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1585 /* avoid to have zero weight by adding a tiny value */
1586 for (i = 0; i < nAllPull; i++)
1588 if (synthwin[i].bsWeight[0] < 1e-5)
1590 synthwin[i].bsWeight[0] = 1e-5;
1594 sfree(r);
1597 //! The main bootstrapping routine
1598 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1599 const char *xlabel, char* ylabel, double *profile,
1600 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1602 t_UmbrellaWindow * synthWindow;
1603 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1604 int i, j, *randomArray = 0, winid, pullid, ib;
1605 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1606 FILE *fp;
1607 gmx_bool bExact = FALSE;
1609 /* init random generator */
1610 if (opt->bsSeed == 0)
1612 opt->bsSeed = static_cast<int>(gmx::makeRandomSeed());
1614 opt->rng.seed(opt->bsSeed);
1616 snew(bsProfile, opt->bins);
1617 snew(bsProfiles_av, opt->bins);
1618 snew(bsProfiles_av2, opt->bins);
1620 /* Create array of all pull groups. Note that different windows
1621 may have different nr of pull groups
1622 First: Get total nr of pull groups */
1623 nAllPull = 0;
1624 for (i = 0; i < nWindows; i++)
1626 nAllPull += window[i].nPull;
1628 snew(allPull_winId, nAllPull);
1629 snew(allPull_pullId, nAllPull);
1630 iAllPull = 0;
1631 /* Setup one array of all pull groups */
1632 for (i = 0; i < nWindows; i++)
1634 for (j = 0; j < window[i].nPull; j++)
1636 allPull_winId[iAllPull] = i;
1637 allPull_pullId[iAllPull] = j;
1638 iAllPull++;
1642 /* setup stuff for synthetic windows */
1643 snew(synthWindow, nAllPull);
1644 for (i = 0; i < nAllPull; i++)
1646 synthWindow[i].nPull = 1;
1647 synthWindow[i].nBin = opt->bins;
1648 snew(synthWindow[i].Histo, 1);
1649 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1651 snew(synthWindow[i].Histo[0], opt->bins);
1653 snew(synthWindow[i].N, 1);
1654 snew(synthWindow[i].pos, 1);
1655 snew(synthWindow[i].z, 1);
1656 snew(synthWindow[i].k, 1);
1657 snew(synthWindow[i].bContrib, 1);
1658 snew(synthWindow[i].g, 1);
1659 snew(synthWindow[i].bsWeight, 1);
1662 switch (opt->bsMethod)
1664 case bsMethod_hist:
1665 snew(randomArray, nAllPull);
1666 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1667 please_cite(stdout, "Hub2006");
1668 break;
1669 case bsMethod_BayesianHist:
1670 /* just copy all histogams into synthWindow array */
1671 for (i = 0; i < nAllPull; i++)
1673 winid = allPull_winId [i];
1674 pullid = allPull_pullId[i];
1675 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1677 break;
1678 case bsMethod_traj:
1679 case bsMethod_trajGauss:
1680 calc_cumulatives(window, nWindows, opt, fnhist, xlabel);
1681 break;
1682 default:
1683 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1686 /* do bootstrapping */
1687 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1688 for (ib = 0; ib < opt->nBootStrap; ib++)
1690 printf(" *******************************************\n"
1691 " ******** Start bootstrap nr %d ************\n"
1692 " *******************************************\n", ib+1);
1694 switch (opt->bsMethod)
1696 case bsMethod_hist:
1697 /* bootstrap complete histograms from given histograms */
1698 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, &opt->rng);
1699 for (i = 0; i < nAllPull; i++)
1701 winid = allPull_winId [randomArray[i]];
1702 pullid = allPull_pullId[randomArray[i]];
1703 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1705 break;
1706 case bsMethod_BayesianHist:
1707 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1708 setRandomBsWeights(synthWindow, nAllPull, opt);
1709 break;
1710 case bsMethod_traj:
1711 case bsMethod_trajGauss:
1712 /* create new histos from given histos, that is generate new hypothetical
1713 trajectories */
1714 for (i = 0; i < nAllPull; i++)
1716 winid = allPull_winId[i];
1717 pullid = allPull_pullId[i];
1718 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1720 break;
1723 /* write histos in case of verbose output */
1724 if (opt->bs_verbose)
1726 print_histograms(fnhist, synthWindow, nAllPull, ib, opt, xlabel);
1729 /* do wham */
1730 i = 0;
1731 bExact = FALSE;
1732 maxchange = 1e20;
1733 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1736 if ( (i%opt->stepUpdateContrib) == 0)
1738 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1740 if (maxchange < opt->Tolerance)
1742 bExact = TRUE;
1744 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1746 printf("\t%4d) Maximum change %e\n", i, maxchange);
1748 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1749 i++;
1751 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1752 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1754 if (opt->bLog)
1756 prof_normalization_and_unit(bsProfile, opt);
1759 /* symmetrize profile around z=0 */
1760 if (opt->bSym)
1762 symmetrizeProfile(bsProfile, opt);
1765 /* save stuff to get average and stddev */
1766 for (i = 0; i < opt->bins; i++)
1768 tmp = bsProfile[i];
1769 bsProfiles_av[i] += tmp;
1770 bsProfiles_av2[i] += tmp*tmp;
1771 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1773 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1775 xvgrclose(fp);
1777 /* write average and stddev */
1778 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1779 if (output_env_get_print_xvgr_codes(opt->oenv))
1781 fprintf(fp, "@TYPE xydy\n");
1783 for (i = 0; i < opt->bins; i++)
1785 bsProfiles_av [i] /= opt->nBootStrap;
1786 bsProfiles_av2[i] /= opt->nBootStrap;
1787 tmp = bsProfiles_av2[i]-gmx::square(bsProfiles_av[i]);
1788 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1789 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1791 xvgrclose(fp);
1792 printf("Wrote boot strap result to %s\n", fnres);
1795 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1796 int whaminFileType(char *fn)
1798 int len;
1799 len = std::strlen(fn);
1800 if (std::strcmp(fn+len-3, "tpr") == 0)
1802 return whamin_tpr;
1804 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1806 return whamin_pullxf;
1808 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1810 return whamin_pdo;
1812 else
1814 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1816 return whamin_unknown;
1819 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1820 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1821 t_UmbrellaOptions *opt)
1823 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1824 int nread, sizenow, i, block = 1;
1825 FILE *fp;
1827 fp = gmx_ffopen(fn, "r");
1828 nread = 0;
1829 sizenow = 0;
1830 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1832 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1834 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1836 if (nread >= sizenow)
1838 sizenow += block;
1839 srenew(filename, sizenow);
1840 for (i = sizenow-block; i < sizenow; i++)
1842 snew(filename[i], WHAM_MAXFILELEN);
1845 /* remove newline if there is one */
1846 if (tmp[std::strlen(tmp)-1] == '\n')
1848 tmp[std::strlen(tmp)-1] = '\0';
1850 std::strcpy(filename[nread], tmp);
1851 if (opt->verbose)
1853 printf("Found file %s in %s\n", filename[nread], fn);
1855 nread++;
1857 *filenamesRet = filename;
1858 *nfilesRet = nread;
1861 //! Open a file or a pipe to a gzipped file
1862 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1864 char Buffer[1024], gunzip[1024], *Path = 0;
1865 FILE *pipe = 0;
1866 static gmx_bool bFirst = 1;
1868 /* gzipped pdo file? */
1869 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1871 /* search gunzip executable */
1872 if (!(Path = getenv("GMX_PATH_GZIP")))
1874 if (gmx_fexist("/bin/gunzip"))
1876 sprintf(gunzip, "%s", "/bin/gunzip");
1878 else if (gmx_fexist("/usr/bin/gunzip"))
1880 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1882 else
1884 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1885 "You may want to define the path to gunzip "
1886 "with the environment variable GMX_PATH_GZIP.", gunzip);
1889 else
1891 sprintf(gunzip, "%s/gunzip", Path);
1892 if (!gmx_fexist(gunzip))
1894 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1895 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1898 if (bFirst)
1900 printf("Using gunzip executable %s\n", gunzip);
1901 bFirst = 0;
1903 if (!gmx_fexist(fn))
1905 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1907 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1908 if (opt->verbose)
1910 printf("Executing command '%s'\n", Buffer);
1912 #if HAVE_PIPES
1913 if ((pipe = popen(Buffer, "r")) == NULL)
1915 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1917 #else
1918 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1919 #endif
1920 *bPipeOpen = TRUE;
1922 else
1924 pipe = gmx_ffopen(fn, "r");
1925 *bPipeOpen = FALSE;
1928 return pipe;
1931 //! Close file or pipe
1932 void pdo_close_file(FILE *fp)
1934 #if HAVE_PIPES
1935 pclose(fp);
1936 #else
1937 gmx_ffclose(fp);
1938 #endif
1941 //! Reading all pdo files
1942 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1943 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1945 FILE *file;
1946 real mintmp, maxtmp, done = 0.;
1947 int i;
1948 gmx_bool bPipeOpen;
1949 /* char Buffer0[1000]; */
1951 if (nfiles < 1)
1953 gmx_fatal(FARGS, "No files found. Hick.");
1956 /* if min and max are not given, get min and max from the input files */
1957 if (opt->bAuto)
1959 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1960 opt->min = 1e20;
1961 opt->max = -1e20;
1962 for (i = 0; i < nfiles; ++i)
1964 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1965 /*fgets(Buffer0,999,file);
1966 fprintf(stderr,"First line '%s'\n",Buffer0); */
1967 done = 100.0*(i+1)/nfiles;
1968 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1969 if (opt->verbose)
1971 printf("\n");
1973 read_pdo_header(file, header, opt);
1974 /* here only determine min and max of this window */
1975 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1976 if (maxtmp > opt->max)
1978 opt->max = maxtmp;
1980 if (mintmp < opt->min)
1982 opt->min = mintmp;
1984 if (bPipeOpen)
1986 pdo_close_file(file);
1988 else
1990 gmx_ffclose(file);
1993 printf("\n");
1994 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1995 if (opt->bBoundsOnly)
1997 printf("Found option -boundsonly, now exiting.\n");
1998 exit (0);
2001 /* store stepsize in profile */
2002 opt->dz = (opt->max-opt->min)/opt->bins;
2004 /* Having min and max, we read in all files */
2005 /* Loop over all files */
2006 for (i = 0; i < nfiles; ++i)
2008 done = 100.0*(i+1)/nfiles;
2009 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2010 if (opt->verbose)
2012 printf("\n");
2014 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2015 read_pdo_header(file, header, opt);
2016 /* load data into window */
2017 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2018 if ((window+i)->Ntot[0] == 0)
2020 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2022 if (bPipeOpen)
2024 pdo_close_file(file);
2026 else
2028 gmx_ffclose(file);
2031 printf("\n");
2032 for (i = 0; i < nfiles; ++i)
2034 sfree(fn[i]);
2036 sfree(fn);
2039 //! translate 0/1 to N/Y to write pull dimensions
2040 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2042 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2043 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2045 t_inputrec ir;
2046 int i, ncrd;
2047 t_state state;
2048 static int first = 1;
2050 /* printf("Reading %s \n",fn); */
2051 read_tpx_state(fn, &ir, &state, NULL);
2053 if (!ir.bPull)
2055 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2058 /* nr of pull groups */
2059 ncrd = 0;
2060 for (i = 0; i < ir.pull->ncoord; i++)
2062 if (ir.pull->coord[i].eType == epullUMBRELLA)
2064 int m;
2066 if (ncrd == 0)
2068 header->pull_geometry = ir.pull->coord[i].eGeom;
2069 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2072 if (ncrd != i)
2074 /* TODO: remove this restriction */
2075 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2078 for (m = 0; m < DIM; m++)
2080 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2082 /* TODO: remove the restriction */
2083 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2086 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2088 /* TODO: remove the restriction */
2089 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2093 ncrd++;
2097 if (ncrd < 1)
2099 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2102 header->npullcrds_tot = ir.pull->ncoord;
2103 header->npullcrds = ncrd;
2104 header->bPrintRef = ir.pull->bPrintCOM;
2105 if (ir.pull->bPrintRefValue)
2107 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2109 header->bPrintComp = ir.pull->bPrintComp;
2110 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2112 /* It is currently assumed that all pull coordinates have the same geometry, so they also have the same coordinate units.
2113 We can therefore get the units for the xlabel from the first coordinate. */
2114 std::strcpy(header->coord_units, pull_coordinate_units(&ir.pull->coord[0]));
2116 /* We should add a struct for each pull coord with all pull coord data
2117 instead of allocating many arrays for each property */
2118 snew(header->k, ncrd);
2119 snew(header->init_dist, ncrd);
2120 snew(header->umbInitDist, ncrd);
2122 /* only z-direction with epullgCYL? */
2123 if (header->pull_geometry == epullgCYL)
2125 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2127 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2128 "However, found dimensions [%s %s %s]\n",
2129 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2130 int2YN(header->pull_dim[ZZ]));
2134 for (i = 0; i < ncrd; i++)
2136 /* For angle pull geometries, the pull code uses coordinate units of degrees externally and radians internally.
2137 The force constant has units kJ/mol/rad^2 both externally and internally. Here, we convert the force constants to
2138 units of degrees (kJ/mol/deg^2) for internal use. This requires less code modification than converting the coordinates */
2139 double k_pull_external_to_wham_internal = 1./(gmx::square(pull_conversion_factor_internal2userinput(&ir.pull->coord[i])));
2141 header->k[i] = ir.pull->coord[i].k*k_pull_external_to_wham_internal;
2142 if (header->k[i] == 0.0)
2144 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2145 "That doesn't seem to be an Umbrella tpr.\n",
2146 i, fn);
2148 header->init_dist[i] = ir.pull->coord[i].init;
2150 /* initial distance to reference */
2151 switch (header->pull_geometry)
2153 case epullgCYL:
2154 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2155 case epullgDIST:
2156 case epullgDIR:
2157 case epullgDIRPBC:
2158 case epullgANGLE:
2159 case epullgDIHEDRAL:
2160 case epullgANGLEAXIS:
2161 header->umbInitDist[i] = header->init_dist[i];
2162 break;
2163 default:
2164 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2168 if (opt->verbose || first)
2170 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2171 "\tPull group coordinates%s expected in pullx files.\n",
2172 fn, header->npullcrds, epullg_names[header->pull_geometry],
2173 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2174 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2175 for (i = 0; i < ncrd; i++)
2177 double k_wham_internal_to_pull_external = gmx::square(pull_conversion_factor_internal2userinput(&ir.pull->coord[i]));
2178 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i]*k_wham_internal_to_pull_external, header->umbInitDist[i]);
2181 if (!opt->verbose && first)
2183 printf("\tUse option -v to see this output for all input tpr files\n\n");
2186 first = 0;
2189 //! 2-norm in a ndim-dimensional space
2190 double dist_ndim(double **dx, int ndim, int line)
2192 int i;
2193 double r2 = 0.;
2194 for (i = 0; i < ndim; i++)
2196 r2 += gmx::square(dx[i][line]);
2198 return std::sqrt(r2);
2201 //! Read pullx.xvg or pullf.xvg
2202 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2203 t_UmbrellaWindow * window,
2204 t_UmbrellaOptions *opt,
2205 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2206 t_groupselection *groupsel)
2208 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2209 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2210 real min, max, minfound = 1e20, maxfound = -1e20;
2211 gmx_bool dt_ok, timeok, bHaveForce;
2212 const char *quantity;
2213 const int blocklen = 4096;
2214 int *lennow = 0;
2215 static gmx_bool bFirst = TRUE;
2218 * Data columns in pull output:
2219 * - in force output pullf.xvg:
2220 * No reference columns, one column per pull coordinate
2222 * - in position output pullx.xvg
2223 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2224 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2227 nColPerCrd = 1;
2228 if (opt->bPullx && header->bPrintComp)
2230 nColPerCrd += header->pull_ndim;
2232 quantity = opt->bPullx ? "position" : "force";
2234 if (opt->bPullx && header->bPrintRef)
2236 nColRefEachCrd = header->pull_ndim;
2238 else
2240 nColRefEachCrd = 0;
2243 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2244 bHaveForce = opt->bPullf;
2246 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2247 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2248 Sorry for the laziness, this is a To-do. */
2249 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2250 && opt->bPullx)
2252 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2253 "reading \n(option -if) is supported at present, "
2254 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2255 "forces (pullf.xvg files)\nand provide them to gmx wham with option -if.",
2256 epullg_names[header->pull_geometry]);
2259 nt = read_xvg(fn, &y, &ny);
2261 /* Check consistency */
2262 if (nt < 1)
2264 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2266 if (bFirst)
2268 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2269 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2270 header->pull_ndim);
2271 /* Since this tool code has not updated yet to allow difference pull
2272 * dimensions per pull coordinate, we can't easily check the exact
2273 * number of expected columns, so we only check what we expect for
2274 * the pull coordinates selected for the WHAM analysis.
2276 printf("Expecting these columns in pull file:\n"
2277 "\t%d reference columns for each individual pull coordinate\n"
2278 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2279 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2280 header->npullcrds,
2281 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2282 nColExpect);
2283 bFirst = FALSE;
2285 if (ny < nColExpect ||
2286 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2288 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2289 "\nMaybe you confused options -ix and -if ?\n",
2290 header->npullcrds, fntpr, ny-1, fn,
2291 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2292 nColExpect-1);
2295 if (opt->verbose)
2297 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2300 if (!bGetMinMax)
2302 bins = opt->bins;
2303 min = opt->min;
2304 max = opt->max;
2305 if (nt > 1)
2307 window->dt = y[0][1]-y[0][0];
2309 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2311 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2314 /* Need to alocate memory and set up structure */
2316 if (groupsel)
2318 /* Use only groups selected with option -is file */
2319 if (header->npullcrds != groupsel->n)
2321 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2322 header->npullcrds, groupsel->n);
2324 window->nPull = groupsel->nUse;
2326 else
2328 window->nPull = header->npullcrds;
2331 window->nBin = bins;
2332 snew(window->Histo, window->nPull);
2333 snew(window->z, window->nPull);
2334 snew(window->k, window->nPull);
2335 snew(window->pos, window->nPull);
2336 snew(window->N, window->nPull);
2337 snew(window->Ntot, window->nPull);
2338 snew(window->g, window->nPull);
2339 snew(window->bsWeight, window->nPull);
2340 window->bContrib = 0;
2342 if (opt->bCalcTauInt)
2344 snew(window->ztime, window->nPull);
2346 else
2348 window->ztime = NULL;
2350 snew(lennow, window->nPull);
2352 for (g = 0; g < window->nPull; ++g)
2354 window->z[g] = 1;
2355 window->bsWeight[g] = 1.;
2356 snew(window->Histo[g], bins);
2357 window->N[g] = 0;
2358 window->Ntot[g] = 0;
2359 window->g[g] = 1.;
2360 if (opt->bCalcTauInt)
2362 window->ztime[g] = NULL;
2366 /* Copying umbrella center and force const is more involved since not
2367 all pull groups from header (tpr file) may be used in window variable */
2368 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2370 if (groupsel && (groupsel->bUse[g] == FALSE))
2372 continue;
2374 window->k[gUsed] = header->k[g];
2375 window->pos[gUsed] = header->umbInitDist[g];
2376 gUsed++;
2379 else
2380 { /* only determine min and max */
2381 minfound = 1e20;
2382 maxfound = -1e20;
2383 min = max = bins = 0; /* Get rid of warnings */
2387 for (i = 0; i < nt; i++)
2389 /* Do you want that time frame? */
2390 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2392 /* get time step of pdo file and get dstep from opt->dt */
2393 if (i == 0)
2395 time0 = t;
2397 else if (i == 1)
2399 dt = t-time0;
2400 if (opt->dt > 0.0)
2402 dstep = static_cast<int>(opt->dt/dt+0.5);
2403 if (dstep == 0)
2405 dstep = 1;
2408 if (!bGetMinMax)
2410 window->dt = dt*dstep;
2414 dt_ok = (i%dstep == 0);
2415 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2416 /*if (opt->verbose)
2417 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2418 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2419 if (timeok)
2421 /* Note: if groupsel == NULL:
2422 * all groups in pullf/x file are stored in this window, and gUsed == g
2423 * if groupsel != NULL:
2424 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2426 gUsed = -1;
2427 for (g = 0; g < header->npullcrds; ++g)
2429 /* was this group selected for application in WHAM? */
2430 if (groupsel && (groupsel->bUse[g] == FALSE))
2432 continue;
2435 gUsed++;
2437 if (bHaveForce)
2439 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2440 force = y[g+1][i];
2441 pos = -force/header->k[g] + header->umbInitDist[g];
2443 else
2445 /* pick the right column from:
2446 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2448 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2449 pos = y[column][i];
2452 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2453 if (bGetMinMax)
2455 if (pos < minfound)
2457 minfound = pos;
2459 if (pos > maxfound)
2461 maxfound = pos;
2464 else
2466 if (gUsed >= window->nPull)
2468 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been caught before.\n",
2469 gUsed, window->nPull);
2472 if (opt->bCalcTauInt && !bGetMinMax)
2474 /* save time series for autocorrelation analysis */
2475 ntot = window->Ntot[gUsed];
2476 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2477 if (ntot >= lennow[gUsed])
2479 lennow[gUsed] += blocklen;
2480 srenew(window->ztime[gUsed], lennow[gUsed]);
2482 window->ztime[gUsed][ntot] = pos;
2485 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2486 if (opt->bCycl)
2488 if (ibin < 0)
2490 while ( (ibin += bins) < 0)
2495 else if (ibin >= bins)
2497 while ( (ibin -= bins) >= bins)
2503 if (ibin >= 0 && ibin < bins)
2505 window->Histo[gUsed][ibin] += 1.;
2506 window->N[gUsed]++;
2508 window->Ntot[gUsed]++;
2512 else if (t > opt->tmax)
2514 if (opt->verbose)
2516 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2518 break;
2522 if (bGetMinMax)
2524 *mintmp = minfound;
2525 *maxtmp = maxfound;
2527 sfree(lennow);
2528 for (i = 0; i < ny; i++)
2530 sfree(y[i]);
2534 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2535 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2536 t_UmbrellaHeader* header,
2537 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2539 int i;
2540 real mintmp, maxtmp;
2542 printf("Reading %d tpr and pullf files\n", nfiles/2);
2544 /* min and max not given? */
2545 if (opt->bAuto)
2547 printf("Automatic determination of boundaries...\n");
2548 opt->min = 1e20;
2549 opt->max = -1e20;
2550 for (i = 0; i < nfiles; i++)
2552 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2554 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2556 read_tpr_header(fnTprs[i], header, opt);
2557 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2559 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2561 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2562 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2563 if (maxtmp > opt->max)
2565 opt->max = maxtmp;
2567 if (mintmp < opt->min)
2569 opt->min = mintmp;
2572 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2573 if (opt->bBoundsOnly)
2575 printf("Found option -boundsonly, now exiting.\n");
2576 exit (0);
2579 /* store stepsize in profile */
2580 opt->dz = (opt->max-opt->min)/opt->bins;
2582 for (i = 0; i < nfiles; i++)
2584 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2586 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2588 read_tpr_header(fnTprs[i], header, opt);
2589 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2591 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2593 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2594 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2595 if (window[i].Ntot[0] == 0)
2597 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2601 for (i = 0; i < nfiles; i++)
2603 sfree(fnTprs[i]);
2604 sfree(fnPull[i]);
2606 sfree(fnTprs);
2607 sfree(fnPull);
2610 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2612 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2613 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2615 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2617 int nlines, ny, i, ig;
2618 double **iact;
2620 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2621 nlines = read_xvg(fn, &iact, &ny);
2622 if (nlines != nwins)
2624 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2625 nlines, fn, nwins);
2627 for (i = 0; i < nlines; i++)
2629 if (window[i].nPull != ny)
2631 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2632 "number of pull groups is different in different simulations. That is not\n"
2633 "supported yet. Sorry.\n");
2635 for (ig = 0; ig < window[i].nPull; ig++)
2637 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2638 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2640 if (iact[ig][i] <= 0.0)
2642 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2649 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2651 * This is useful
2652 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2653 * that -in case of limited sampling- the ACT may be severely underestimated.
2654 * Note: the g=1+2tau are overwritten.
2655 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2656 * by the smoothing
2658 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2660 int i, ig, j, jg;
2661 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2663 /* only evaluate within +- 3sigma of the Gausian */
2664 siglim = 3.0*opt->sigSmoothIact;
2665 siglim2 = gmx::square(siglim);
2666 /* pre-factor of Gaussian */
2667 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2668 invtwosig2 = 0.5/gmx::square(opt->sigSmoothIact);
2670 for (i = 0; i < nwins; i++)
2672 snew(window[i].tausmooth, window[i].nPull);
2673 for (ig = 0; ig < window[i].nPull; ig++)
2675 tausm = 0.;
2676 weight = 0;
2677 pos = window[i].pos[ig];
2678 for (j = 0; j < nwins; j++)
2680 for (jg = 0; jg < window[j].nPull; jg++)
2682 dpos2 = gmx::square(window[j].pos[jg]-pos);
2683 if (dpos2 < siglim2)
2685 w = gaufact*std::exp(-dpos2*invtwosig2);
2686 weight += w;
2687 tausm += w*window[j].tau[jg];
2688 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2689 w,dpos2,pos,gaufact,invtwosig2); */
2693 tausm /= weight;
2694 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2696 window[i].tausmooth[ig] = tausm;
2698 else
2700 window[i].tausmooth[ig] = window[i].tau[ig];
2702 window[i].g[ig] = 1+2*tausm/window[i].dt;
2707 //! Stop integrating autoccorelation function when ACF drops under this value
2708 #define WHAM_AC_ZERO_LIMIT 0.05
2710 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2712 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2713 t_UmbrellaOptions *opt, const char *fn, const char *xlabel)
2715 int i, ig, ncorr, ntot, j, k, *count, restart;
2716 real *corr, c0, dt, tmp;
2717 real *ztime, av, tausteps;
2718 FILE *fp, *fpcorr = 0;
2720 if (opt->verbose)
2722 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2723 "time [ps]", "autocorrelation function", opt->oenv);
2726 printf("\n");
2727 for (i = 0; i < nwins; i++)
2729 printf("\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2730 fflush(stdout);
2731 ntot = window[i].Ntot[0];
2733 /* using half the maximum time as length of autocorrelation function */
2734 ncorr = ntot/2;
2735 if (ntot < 10)
2737 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2738 " points. Provide more pull data!", ntot);
2740 snew(corr, ncorr);
2741 /* snew(corrSq,ncorr); */
2742 snew(count, ncorr);
2743 dt = window[i].dt;
2744 snew(window[i].tau, window[i].nPull);
2745 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2746 if (restart == 0)
2748 restart = 1;
2751 for (ig = 0; ig < window[i].nPull; ig++)
2753 if (ntot != window[i].Ntot[ig])
2755 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2756 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2758 ztime = window[i].ztime[ig];
2760 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2761 for (j = 0, av = 0; (j < ntot); j++)
2763 av += ztime[j];
2765 av /= ntot;
2766 for (k = 0; (k < ncorr); k++)
2768 corr[k] = 0.;
2769 count[k] = 0;
2771 for (j = 0; (j < ntot); j += restart)
2773 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2775 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2776 corr [k] += tmp;
2777 /* corrSq[k] += tmp*tmp; */
2778 count[k]++;
2781 /* divide by nr of frames for each time displacement */
2782 for (k = 0; (k < ncorr); k++)
2784 /* count probably = (ncorr-k+(restart-1))/restart; */
2785 corr[k] = corr[k]/count[k];
2786 /* variance of autocorrelation function */
2787 /* corrSq[k]=corrSq[k]/count[k]; */
2789 /* normalize such that corr[0] == 0 */
2790 c0 = 1./corr[0];
2791 for (k = 0; (k < ncorr); k++)
2793 corr[k] *= c0;
2794 /* corrSq[k]*=c0*c0; */
2797 /* write ACFs in verbose mode */
2798 if (fpcorr)
2800 for (k = 0; (k < ncorr); k++)
2802 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2804 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2807 /* esimate integrated correlation time, fitting is too unstable */
2808 tausteps = 0.5*corr[0];
2809 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2810 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2812 tausteps += corr[j];
2815 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2816 Kumar et al, eq. 28 ff. */
2817 window[i].tau[ig] = tausteps*dt;
2818 window[i].g[ig] = 1+2*tausteps;
2819 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2820 } /* ig loop */
2821 sfree(corr);
2822 sfree(count);
2824 printf(" done\n");
2825 if (fpcorr)
2827 xvgrclose(fpcorr);
2830 /* plot IACT along reaction coordinate */
2831 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2832 if (output_env_get_print_xvgr_codes(opt->oenv))
2834 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2835 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2836 for (i = 0; i < nwins; i++)
2838 fprintf(fp, "# %3d ", i);
2839 for (ig = 0; ig < window[i].nPull; ig++)
2841 fprintf(fp, " %11g", window[i].tau[ig]);
2843 fprintf(fp, "\n");
2846 for (i = 0; i < nwins; i++)
2848 for (ig = 0; ig < window[i].nPull; ig++)
2850 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2853 if (opt->sigSmoothIact > 0.0)
2855 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2856 opt->sigSmoothIact);
2857 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2858 smoothIact(window, nwins, opt);
2859 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2860 if (output_env_get_print_xvgr_codes(opt->oenv))
2862 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2863 fprintf(fp, "@ s1 symbol color 2\n");
2865 for (i = 0; i < nwins; i++)
2867 for (ig = 0; ig < window[i].nPull; ig++)
2869 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2873 xvgrclose(fp);
2874 printf("Wrote %s\n", fn);
2877 /*! \brief
2878 * compute average and sigma of each umbrella histogram
2880 void averageSigma(t_UmbrellaWindow *window, int nwins)
2882 int i, ig, ntot, k;
2883 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2885 for (i = 0; i < nwins; i++)
2887 snew(window[i].aver, window[i].nPull);
2888 snew(window[i].sigma, window[i].nPull);
2890 ntot = window[i].Ntot[0];
2891 for (ig = 0; ig < window[i].nPull; ig++)
2893 ztime = window[i].ztime[ig];
2894 for (k = 0, av = 0.; k < ntot; k++)
2896 av += ztime[k];
2898 av /= ntot;
2899 for (k = 0, sum2 = 0.; k < ntot; k++)
2901 diff = ztime[k]-av;
2902 sum2 += diff*diff;
2904 sig = std::sqrt(sum2/ntot);
2905 window[i].aver[ig] = av;
2907 /* Note: This estimate for sigma is biased from the limited sampling.
2908 Correct sigma by n/(n-1) where n = number of independent
2909 samples. Only possible if IACT is known.
2911 if (window[i].tau)
2913 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2914 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2916 else
2918 window[i].sigma[ig] = sig;
2920 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2926 /*! \brief
2927 * Use histograms to compute average force on pull group.
2929 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2931 int i, j, bins = opt->bins, k;
2932 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2933 double posmirrored;
2935 dz = (max-min)/bins;
2936 ztot = opt->max-min;
2937 ztot_half = ztot/2;
2939 /* Compute average displacement from histograms */
2940 for (j = 0; j < nWindows; ++j)
2942 snew(window[j].forceAv, window[j].nPull);
2943 for (k = 0; k < window[j].nPull; ++k)
2945 displAv = 0.0;
2946 weight = 0.0;
2947 for (i = 0; i < opt->bins; ++i)
2949 temp = (1.0*i+0.5)*dz+min;
2950 distance = temp - window[j].pos[k];
2951 if (opt->bCycl)
2952 { /* in cyclic wham: */
2953 if (distance > ztot_half) /* |distance| < ztot_half */
2955 distance -= ztot;
2957 else if (distance < -ztot_half)
2959 distance += ztot;
2962 w = window[j].Histo[k][i]/window[j].g[k];
2963 displAv += w*distance;
2964 weight += w;
2965 /* Are we near min or max? We are getting wrong forces from the histgrams since
2966 the histograms are zero outside [min,max). Therefore, assume that the position
2967 on the other side of the histomgram center is equally likely. */
2968 if (!opt->bCycl)
2970 posmirrored = window[j].pos[k]-distance;
2971 if (posmirrored >= max || posmirrored < min)
2973 displAv += -w*distance;
2974 weight += w;
2978 displAv /= weight;
2980 /* average force from average displacement */
2981 window[j].forceAv[k] = displAv*window[j].k[k];
2982 /* sigma from average square displacement */
2983 /* window[j].sigma [k] = sqrt(displAv2); */
2984 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2989 /*! \brief
2990 * Check if the complete reaction coordinate is covered by the histograms
2992 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2993 t_UmbrellaOptions *opt)
2995 int i, ig, j, bins = opt->bins, bBoundary;
2996 real avcount = 0, z, relcount, *count;
2997 snew(count, opt->bins);
2999 for (j = 0; j < opt->bins; ++j)
3001 for (i = 0; i < nwins; i++)
3003 for (ig = 0; ig < window[i].nPull; ig++)
3005 count[j] += window[i].Histo[ig][j];
3008 avcount += 1.0*count[j];
3010 avcount /= bins;
3011 for (j = 0; j < bins; ++j)
3013 relcount = count[j]/avcount;
3014 z = (j+0.5)*opt->dz+opt->min;
3015 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3016 /* check for bins with no data */
3017 if (count[j] == 0)
3019 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3020 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3022 /* and check for poor sampling */
3023 else if (relcount < 0.005 && !bBoundary)
3025 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3028 sfree(count);
3031 /*! \brief Compute initial potential by integrating the average force
3033 * This speeds up the convergence by roughly a factor of 2
3035 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt, const char *xlabel)
3037 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3038 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3039 FILE *fp;
3041 dz = (opt->max-min)/bins;
3043 printf("Getting initial potential by integration.\n");
3045 /* Compute average displacement from histograms */
3046 computeAverageForce(window, nWindows, opt);
3048 /* Get force for each bin from all histograms in this bin, or, alternatively,
3049 if no histograms are inside this bin, from the closest histogram */
3050 snew(pot, bins);
3051 snew(f, bins);
3052 for (j = 0; j < opt->bins; ++j)
3054 pos = (1.0*j+0.5)*dz+min;
3055 nHist = 0;
3056 fAv = 0.;
3057 distmin = 1e20;
3058 groupmin = winmin = 0;
3059 for (i = 0; i < nWindows; i++)
3061 for (ig = 0; ig < window[i].nPull; ig++)
3063 hispos = window[i].pos[ig];
3064 dist = std::abs(hispos-pos);
3065 /* average force within bin */
3066 if (dist < dz/2)
3068 nHist++;
3069 fAv += window[i].forceAv[ig];
3071 /* at the same time, remember closest histogram */
3072 if (dist < distmin)
3074 winmin = i;
3075 groupmin = ig;
3076 distmin = dist;
3080 /* if no histogram found in this bin, use closest histogram */
3081 if (nHist > 0)
3083 fAv = fAv/nHist;
3085 else
3087 fAv = window[winmin].forceAv[groupmin];
3089 f[j] = fAv;
3091 for (j = 1; j < opt->bins; ++j)
3093 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3096 /* cyclic wham: linearly correct possible offset */
3097 if (opt->bCycl)
3099 diff = (pot[bins-1]-pot[0])/(bins-1);
3100 for (j = 1; j < opt->bins; ++j)
3102 pot[j] -= j*diff;
3105 if (opt->verbose)
3107 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3108 for (j = 0; j < opt->bins; ++j)
3110 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3112 xvgrclose(fp);
3113 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3116 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3117 energy offsets which are usually determined by wham
3118 First: turn pot into probabilities:
3120 for (j = 0; j < opt->bins; ++j)
3122 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3124 calc_z(pot, window, nWindows, opt, TRUE);
3126 sfree(pot);
3127 sfree(f);
3130 //! Count number of words in an line
3131 static int wordcount(char *ptr)
3133 int i, n, is[2];
3134 int cur = 0;
3136 if (std::strlen(ptr) == 0)
3138 return 0;
3140 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3141 n = 1;
3142 for (i = 0; (ptr[i] != '\0'); i++)
3144 is[cur] = isspace(ptr[i]);
3145 if ((i > 0) && (is[cur] && !is[1-cur]))
3147 n++;
3149 cur = 1-cur;
3151 return n;
3154 /*! \brief Read input file for pull group selection (option -is)
3156 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3158 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3160 FILE *fp;
3161 int i, iline, n, len = STRLEN, temp;
3162 char *ptr = 0, *tmpbuf = 0;
3163 char fmt[1024], fmtign[1024];
3164 int block = 1, sizenow;
3166 fp = gmx_ffopen(opt->fnGroupsel, "r");
3167 opt->groupsel = NULL;
3169 snew(tmpbuf, len);
3170 sizenow = 0;
3171 iline = 0;
3172 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3174 trim(ptr);
3175 n = wordcount(ptr);
3177 if (iline >= sizenow)
3179 sizenow += block;
3180 srenew(opt->groupsel, sizenow);
3182 opt->groupsel[iline].n = n;
3183 opt->groupsel[iline].nUse = 0;
3184 snew(opt->groupsel[iline].bUse, n);
3186 fmtign[0] = '\0';
3187 for (i = 0; i < n; i++)
3189 std::strcpy(fmt, fmtign);
3190 std::strcat(fmt, "%d");
3191 if (sscanf(ptr, fmt, &temp))
3193 opt->groupsel[iline].bUse[i] = (temp > 0);
3194 if (opt->groupsel[iline].bUse[i])
3196 opt->groupsel[iline].nUse++;
3199 std::strcat(fmtign, "%*s");
3201 iline++;
3203 opt->nGroupsel = iline;
3204 if (nTpr != opt->nGroupsel)
3206 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3207 opt->fnGroupsel);
3210 printf("\nUse only these pull groups:\n");
3211 for (iline = 0; iline < nTpr; iline++)
3213 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3214 for (i = 0; i < opt->groupsel[iline].n; i++)
3216 if (opt->groupsel[iline].bUse[i])
3218 printf(" %d", i+1);
3221 printf("\n");
3223 printf("\n");
3225 sfree(tmpbuf);
3228 //! Boolean XOR
3229 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3231 //! Number of elements in fnm (used for command line parsing)
3232 #define NFILE asize(fnm)
3234 //! The main gmx wham routine
3235 int gmx_wham(int argc, char *argv[])
3237 const char *desc[] = {
3238 "[THISMODULE] is an analysis program that implements the Weighted",
3239 "Histogram Analysis Method (WHAM). It is intended to analyze",
3240 "output files generated by umbrella sampling simulations to ",
3241 "compute a potential of mean force (PMF).[PAR]",
3243 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3244 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3245 "and, if multiple coordinates need to be analyzed, all used the same",
3246 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3248 "At present, three input modes are supported.",
3250 "* With option [TT]-it[tt], the user provides a file which contains the",
3251 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3252 " AND, with option [TT]-ix[tt], a file which contains file names of",
3253 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3254 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3255 " first pullx, etc.",
3256 "* Same as the previous input mode, except that the the user",
3257 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3258 " From the pull force the position in the umbrella potential is",
3259 " computed. This does not work with tabulated umbrella potentials.",
3260 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3261 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3262 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3263 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3264 " must be similar to the following::",
3266 " # UMBRELLA 3.0",
3267 " # Component selection: 0 0 1",
3268 " # nSkip 1",
3269 " # Ref. Group 'TestAtom'",
3270 " # Nr. of pull groups 2",
3271 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3272 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3273 " #####",
3275 " The number of pull groups, umbrella positions, force constants, and names ",
3276 " may (of course) differ. Following the header, a time column and ",
3277 " a data column for each pull group follows (i.e. the displacement",
3278 " with respect to the umbrella center). Up to four pull groups are possible ",
3279 " per [REF].pdo[ref] file at present.",
3281 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3282 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3283 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3284 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3285 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3286 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3287 "used, groupsel.dat looks like this::",
3289 " 1 1 0 0",
3290 " 1 1 0 0",
3291 " 1 1 0 0",
3293 "By default, the output files are",
3295 "* [TT]-o[tt] PMF output file",
3296 "* [TT]-hist[tt] Histograms output file",
3298 "Always check whether the histograms sufficiently overlap.[PAR]",
3299 "The umbrella potential is assumed to be harmonic and the force constants are ",
3300 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3301 "a tabulated potential can be provided with [TT]-tab[tt].",
3303 "WHAM options",
3304 "^^^^^^^^^^^^",
3306 "* [TT]-bins[tt] Number of bins used in analysis",
3307 "* [TT]-temp[tt] Temperature in the simulations",
3308 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3309 "* [TT]-auto[tt] Automatic determination of boundaries",
3310 "* [TT]-min,-max[tt] Boundaries of the profile",
3312 "The data points that are used to compute the profile",
3313 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3314 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3315 "umbrella window.[PAR]",
3316 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3317 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3318 "With energy output, the energy in the first bin is defined to be zero. ",
3319 "If you want the free energy at a different ",
3320 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3321 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3322 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3323 "[THISMODULE] will make use of the",
3324 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3325 "reaction coordinate will assumed be be neighbors.[PAR]",
3326 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3327 "which may be useful for, e.g. membranes.",
3329 "Parallelization",
3330 "^^^^^^^^^^^^^^^",
3332 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3334 "Autocorrelations",
3335 "^^^^^^^^^^^^^^^^",
3337 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3338 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3339 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3340 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3341 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3342 "Because the IACTs can be severely underestimated in case of limited ",
3343 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3344 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3345 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3346 "integration of the ACFs while the ACFs are larger 0.05.",
3347 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3348 "less robust) method such as fitting to a double exponential, you can ",
3349 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3350 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3351 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3353 "Error analysis",
3354 "^^^^^^^^^^^^^^",
3356 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3357 "otherwise the statistical error may be substantially underestimated. ",
3358 "More background and examples for the bootstrap technique can be found in ",
3359 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3360 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3361 "Four bootstrapping methods are supported and ",
3362 "selected with [TT]-bs-method[tt].",
3364 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3365 " data points, and the bootstrap is carried out by assigning random weights to the ",
3366 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3367 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3368 " statistical error is underestimated.",
3369 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3370 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3371 " (allowing duplication, i.e. sampling with replacement).",
3372 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3373 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3374 " divided into blocks and only histograms within each block are mixed. Note that ",
3375 " the histograms within each block must be representative for all possible histograms, ",
3376 " otherwise the statistical error is underestimated.",
3377 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3378 " such that the generated data points are distributed according the given histograms ",
3379 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3380 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3381 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3382 " Note that this method may severely underestimate the error in case of limited sampling, ",
3383 " that is if individual histograms do not represent the complete phase space at ",
3384 " the respective positions.",
3385 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3386 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3387 " and width of the umbrella histograms. That method yields similar error estimates ",
3388 " like method [TT]traj[tt].",
3390 "Bootstrapping output:",
3392 "* [TT]-bsres[tt] Average profile and standard deviations",
3393 "* [TT]-bsprof[tt] All bootstrapping profiles",
3395 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3396 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3397 "the histograms."
3400 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3401 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3402 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3403 static t_UmbrellaOptions opt;
3404 static int nthreads = -1;
3406 t_pargs pa[] = {
3407 #if GMX_OPENMP
3408 { "-nt", FALSE, etINT, {&nthreads},
3409 "Number of threads used by gmx wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3410 #endif
3411 { "-min", FALSE, etREAL, {&opt.min},
3412 "Minimum coordinate in profile"},
3413 { "-max", FALSE, etREAL, {&opt.max},
3414 "Maximum coordinate in profile"},
3415 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3416 "Determine min and max automatically"},
3417 { "-bins", FALSE, etINT, {&opt.bins},
3418 "Number of bins in profile"},
3419 { "-temp", FALSE, etREAL, {&opt.Temperature},
3420 "Temperature"},
3421 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3422 "Tolerance"},
3423 { "-v", FALSE, etBOOL, {&opt.verbose},
3424 "Verbose mode"},
3425 { "-b", FALSE, etREAL, {&opt.tmin},
3426 "First time to analyse (ps)"},
3427 { "-e", FALSE, etREAL, {&opt.tmax},
3428 "Last time to analyse (ps)"},
3429 { "-dt", FALSE, etREAL, {&opt.dt},
3430 "Analyse only every dt ps"},
3431 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3432 "Write histograms and exit"},
3433 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3434 "Determine min and max and exit (with [TT]-auto[tt])"},
3435 { "-log", FALSE, etBOOL, {&opt.bLog},
3436 "Calculate the log of the profile before printing"},
3437 { "-unit", FALSE, etENUM, {en_unit},
3438 "Energy unit in case of log output" },
3439 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3440 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3441 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3442 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3443 { "-sym", FALSE, etBOOL, {&opt.bSym},
3444 "Symmetrize profile around z=0"},
3445 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3446 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3447 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3448 "Calculate integrated autocorrelation times and use in wham"},
3449 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3450 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3451 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3452 "When computing autocorrelation functions, restart computing every .. (ps)"},
3453 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3454 "HIDDENWhen smoothing the ACTs, allows one to reduce ACTs. Otherwise, only increase ACTs "
3455 "during smoothing"},
3456 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3457 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3458 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3459 "Bootstrap method" },
3460 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3461 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3462 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3463 "Seed for bootstrapping. (-1 = use time)"},
3464 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3465 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3466 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3467 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3468 { "-stepout", FALSE, etINT, {&opt.stepchange},
3469 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3470 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3471 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3474 t_filenm fnm[] = {
3475 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3476 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3477 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3478 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3479 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3480 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3481 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3482 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3483 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3484 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3485 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3486 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3489 int i, j, l, nfiles, nwins, nfiles2;
3490 t_UmbrellaHeader header;
3491 t_UmbrellaWindow * window = NULL;
3492 double *profile, maxchange = 1e20;
3493 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3494 char **fninTpr, **fninPull, **fninPdo;
3495 const char *fnPull;
3496 FILE *histout, *profout;
3497 char xlabel[STRLEN], ylabel[256], title[256];
3499 opt.bins = 200;
3500 opt.verbose = FALSE;
3501 opt.bHistOnly = FALSE;
3502 opt.bCycl = FALSE;
3503 opt.tmin = 50;
3504 opt.tmax = 1e20;
3505 opt.dt = 0.0;
3506 opt.min = 0;
3507 opt.max = 0;
3508 opt.bAuto = TRUE;
3509 opt.nGroupsel = 0;
3511 /* bootstrapping stuff */
3512 opt.nBootStrap = 0;
3513 opt.bsMethod = bsMethod_hist;
3514 opt.tauBootStrap = 0.0;
3515 opt.bsSeed = -1;
3516 opt.histBootStrapBlockLength = 8;
3517 opt.bs_verbose = FALSE;
3519 opt.bLog = TRUE;
3520 opt.unit = en_kJ;
3521 opt.zProf0 = 0.;
3522 opt.Temperature = 298;
3523 opt.Tolerance = 1e-6;
3524 opt.bBoundsOnly = FALSE;
3525 opt.bSym = FALSE;
3526 opt.bCalcTauInt = FALSE;
3527 opt.sigSmoothIact = 0.0;
3528 opt.bAllowReduceIact = TRUE;
3529 opt.bInitPotByIntegration = TRUE;
3530 opt.acTrestart = 1.0;
3531 opt.stepchange = 100;
3532 opt.stepUpdateContrib = 100;
3534 if (!parse_common_args(&argc, argv, 0,
3535 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3537 return 0;
3540 opt.unit = nenum(en_unit);
3541 opt.bsMethod = nenum(en_bsMethod);
3543 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3545 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3546 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3547 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3548 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3549 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3550 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3551 if (opt.bTab && opt.bPullf)
3553 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3554 "Provide pullx.xvg or pdo files!");
3557 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3559 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3561 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3563 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3564 "\n\n Check gmx wham -h !");
3567 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3568 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3569 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3570 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3571 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3573 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3574 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3575 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3576 if ( (bMinSet || bMaxSet) && bAutoSet)
3578 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3581 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3583 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3586 if (bMinSet && opt.bAuto)
3588 printf("Note: min and max given, switching off -auto.\n");
3589 opt.bAuto = FALSE;
3592 if (opt.bTauIntGiven && opt.bCalcTauInt)
3594 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3595 "the autocorrelation times. Not both.");
3598 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3600 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3601 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3603 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3605 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3606 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3609 /* Set # of OpenMP threads */
3610 if (nthreads > 0)
3612 gmx_omp_set_num_threads(nthreads);
3614 else
3616 nthreads = gmx_omp_get_max_threads();
3618 if (nthreads > 1)
3620 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3623 /* Reading gmx4 pull output and tpr files */
3624 if (opt.bTpr || opt.bPullf || opt.bPullx)
3626 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3628 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3629 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3630 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3631 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3632 if (nfiles != nfiles2)
3634 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3635 opt.fnTpr, nfiles2, fnPull);
3638 /* Read file that selects the pull group to be used */
3639 if (opt.fnGroupsel != NULL)
3641 readPullGroupSelection(&opt, fninTpr, nfiles);
3644 window = initUmbrellaWindows(nfiles);
3645 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3647 else
3648 { /* reading pdo files */
3649 if (opt.fnGroupsel != NULL)
3651 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3652 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3654 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3655 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3656 window = initUmbrellaWindows(nfiles);
3657 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3659 /* Assume the default coordinate units */
3660 std::strcpy(header.coord_units, "nm");
3663 sprintf(xlabel, "\\xx\\f{} (%s)", header.coord_units);
3665 nwins = nfiles;
3667 /* enforce equal weight for all histograms? */
3668 if (opt.bHistEq)
3670 enforceEqualWeights(window, nwins);
3673 /* write histograms */
3674 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3675 xlabel, "count", opt.oenv);
3676 for (l = 0; l < opt.bins; ++l)
3678 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3679 for (i = 0; i < nwins; ++i)
3681 for (j = 0; j < window[i].nPull; ++j)
3683 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3686 fprintf(histout, "\n");
3688 xvgrclose(histout);
3689 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3690 if (opt.bHistOnly)
3692 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3693 return 0;
3696 /* Using tabulated umbrella potential */
3697 if (opt.bTab)
3699 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3702 /* Integrated autocorrelation times provided ? */
3703 if (opt.bTauIntGiven)
3705 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3708 /* Compute integrated autocorrelation times */
3709 if (opt.bCalcTauInt)
3711 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm), xlabel);
3714 /* calc average and sigma for each histogram
3715 (maybe required for bootstrapping. If not, this is fast anyhow) */
3716 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3718 averageSigma(window, nwins);
3721 /* Get initial potential by simple integration */
3722 if (opt.bInitPotByIntegration)
3724 guessPotByIntegration(window, nwins, &opt, xlabel);
3727 /* Check if complete reaction coordinate is covered */
3728 checkReactionCoordinateCovered(window, nwins, &opt);
3730 /* Calculate profile */
3731 snew(profile, opt.bins);
3732 if (opt.verbose)
3734 opt.stepchange = 1;
3736 i = 0;
3739 if ( (i%opt.stepUpdateContrib) == 0)
3741 setup_acc_wham(profile, window, nwins, &opt);
3743 if (maxchange < opt.Tolerance)
3745 bExact = TRUE;
3746 /* if (opt.verbose) */
3747 printf("Switched to exact iteration in iteration %d\n", i);
3749 calc_profile(profile, window, nwins, &opt, bExact);
3750 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3752 printf("\t%4d) Maximum change %e\n", i, maxchange);
3754 i++;
3756 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3757 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3759 /* calc error from Kumar's formula */
3760 /* Unclear how the error propagates along reaction coordinate, therefore
3761 commented out */
3762 /* calc_error_kumar(profile,window, nwins,&opt); */
3764 /* Write profile in energy units? */
3765 if (opt.bLog)
3767 prof_normalization_and_unit(profile, &opt);
3768 std::strcpy(ylabel, en_unit_label[opt.unit]);
3769 std::strcpy(title, "Umbrella potential");
3771 else
3773 std::strcpy(ylabel, "Density of states");
3774 std::strcpy(title, "Density of states");
3777 /* symmetrize profile around z=0? */
3778 if (opt.bSym)
3780 symmetrizeProfile(profile, &opt);
3783 /* write profile or density of states */
3784 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3785 for (i = 0; i < opt.bins; ++i)
3787 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3789 xvgrclose(profout);
3790 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3792 /* Bootstrap Method */
3793 if (opt.nBootStrap)
3795 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3796 opt2fn("-hist", NFILE, fnm),
3797 xlabel, ylabel, profile, window, nwins, &opt);
3800 sfree(profile);
3801 freeUmbrellaWindows(window, nfiles);
3803 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3804 please_cite(stdout, "Hub2010");
3806 return 0;