Moved pull params to mdtypes/pull-params.h
[gromacs.git] / src / gromacs / gmxana / gmx_wham.cpp
blob25856fad41466fe5a2ec1c128fc0e11eb79288fc
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 /*! \internal \file
39 * \brief Implementation of the Weighted Histogram Analysis Method (WHAM)
41 * \author Jochen Hub <jhub@gwdg.de>
43 #include "gmxpre.h"
45 #include "config.h"
47 #include <cctype>
48 #include <cmath>
49 #include <cstdio>
50 #include <cstdlib>
51 #include <cstring>
53 #include <algorithm>
54 #include <sstream>
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/copyrite.h"
58 #include "gromacs/fileio/tpxio.h"
59 #include "gromacs/fileio/xvgr.h"
60 #include "gromacs/gmxana/gmx_ana.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/pull-params.h"
65 #include "gromacs/random/random.h"
66 #include "gromacs/utility/arraysize.h"
67 #include "gromacs/utility/cstringutil.h"
68 #include "gromacs/utility/exceptions.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/futil.h"
71 #include "gromacs/utility/gmxomp.h"
72 #include "gromacs/utility/smalloc.h"
74 //! longest file names allowed in input files
75 #define WHAM_MAXFILELEN 2048
77 /*! \brief
78 * x-axis legend for output files
80 static const char *xlabel = "\\xx\\f{} (nm)";
82 /*! \brief
83 * enum for energy units
85 enum {
86 enSel, en_kJ, en_kCal, en_kT, enNr
88 /*! \brief
89 * enum for type of input files (pdos, tpr, or pullf)
91 enum {
92 whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo
95 /*! \brief enum for bootstrapping method
97 * These bootstrap methods are supported:
98 * - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
99 * (bsMethod_BayesianHist)
100 * - bootstrap complete histograms (bsMethod_hist)
101 * - bootstrap trajectories from given umbrella histograms. This generates new
102 * "synthetic" histograms (bsMethod_traj)
103 * - bootstrap trajectories from Gaussian with mu/sigma computed from
104 * the respective histogram (bsMethod_trajGauss). This gives very similar
105 * results compared to bsMethod_traj.
107 * ********************************************************************
108 * FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
109 * JS Hub, BL de Groot, D van der Spoel
110 * g_wham - A free weighted histogram analysis implementation including
111 * robust error and autocorrelation estimates,
112 * J Chem Theory Comput, 6(12), 3713-3720 (2010)
113 * ********************************************************************
115 enum {
116 bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
117 bsMethod_traj, bsMethod_trajGauss
121 //! Parameters of the umbrella potentials
122 typedef struct
125 * \name Using umbrella pull code since gromacs 4.x
127 /*!\{*/
128 int npullcrds_tot; //!< nr of pull coordinates in tpr file
129 int npullcrds; //!< nr of umbrella pull coordinates for reading
130 int pull_geometry; //!< such as distance, direction
131 ivec pull_dim; //!< pull dimension with geometry distance
132 int pull_ndim; //!< nr of pull_dim != 0
133 gmx_bool bPrintRef; //!< Coordinates of reference groups written to pullx.xvg ?
134 gmx_bool bPrintComp; //!< Components of pull distance written to pullx.xvg ?
135 real *k; //!< force constants in tpr file
136 real *init_dist; //!< reference displacements
137 real *umbInitDist; //!< reference displacement in umbrella direction
138 /*!\}*/
140 * \name Using PDO files common until gromacs 3.x
142 /*!\{*/
143 int nSkip;
144 char Reference[256];
145 int nPull;
146 int nDim;
147 ivec Dims;
148 char PullName[4][256];
149 double UmbPos[4][3];
150 double UmbCons[4][3];
151 /*!\}*/
152 } t_UmbrellaHeader;
154 //! Data in the umbrella histograms
155 typedef struct
157 int nPull; //!< nr of pull groups in this pdo or pullf/x file
158 double **Histo; //!< nPull histograms
159 double **cum; //!< nPull cumulative distribution functions
160 int nBin; //!< nr of bins. identical to opt->bins
161 double *k; //!< force constants for the nPull groups
162 double *pos; //!< umbrella positions for the nPull groups
163 double *z; //!< z=(-Fi/kT) for the nPull groups. These values are iteratively computed during wham
164 int *N; //!< nr of data points in nPull histograms.
165 int *Ntot; //!< also nr of data points. N and Ntot only differ if bHistEq==TRUE
167 /*! \brief g = 1 + 2*tau[int]/dt where tau is the integrated autocorrelation time.
169 * Compare, e.g. Ferrenberg/Swendsen, PRL 63:1195 (1989),
170 * Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28
172 double *g;
173 double *tau; //!< intetrated autocorrelation time (IACT)
174 double *tausmooth; //!< smoothed IACT
176 double dt; //!< timestep in the input data. Can be adapted with gmx wham option -dt
178 /*! \brief TRUE, if any data point of the histogram is within min and max, otherwise FALSE */
179 gmx_bool **bContrib;
180 real **ztime; //!< input data z(t) as a function of time. Required to compute ACTs
182 /*! \brief average force estimated from average displacement, fAv=dzAv*k
184 * Used for integration to guess the potential.
186 real *forceAv;
187 real *aver; //!< average of histograms
188 real *sigma; //!< stddev of histograms
189 double *bsWeight; //!< for bootstrapping complete histograms with continuous weights
190 } t_UmbrellaWindow;
192 //! Selection of pull groups to be used in WHAM (one structure for each tpr file)
193 typedef struct
195 int n; //!< total nr of pull groups in this tpr file
196 int nUse; //!< nr of pull groups used
197 gmx_bool *bUse; //!< boolean array of size n. =1 if used, =0 if not
198 } t_groupselection;
200 //! Parameters of WHAM
201 typedef struct
204 * \name Input stuff
206 /*!\{*/
207 const char *fnTpr, *fnPullf, *fnGroupsel;
208 const char *fnPdo, *fnPullx; //!< file names of input
209 gmx_bool bTpr, bPullf, bPdo, bPullx; //!< input file types given?
210 real tmin, tmax, dt; //!< only read input within tmin and tmax with dt
212 gmx_bool bInitPotByIntegration; //!< before WHAM, guess potential by force integration. Yields 1.5 to 2 times faster convergence
213 int stepUpdateContrib; //!< update contribution table every ... iterations. Accelerates WHAM.
214 int nGroupsel; //!< if >0: use only certain group in WHAM, if ==0: use all groups
215 t_groupselection *groupsel; //!< for each tpr file: which pull groups to use in WHAM?
216 /*!\}*/
218 * \name Basic WHAM options
220 /*!\{*/
221 int bins; //!< nr of bins, min, max, and dz of profile
222 real min, max, dz;
223 real Temperature, Tolerance; //!< temperature, converged when probability changes less than Tolerance
224 gmx_bool bCycl; //!< generate cyclic (periodic) PMF
225 /*!\}*/
227 * \name Output control
229 /*!\{*/
230 gmx_bool bLog; //!< energy output (instead of probability) for profile
231 int unit; //!< unit for PMF output kJ/mol or kT or kCal/mol
232 gmx_bool bSym; //!< symmetrize PMF around z=0 after WHAM, useful for membranes etc.
233 /*! \brief after wham, set prof to zero at this z-position.
234 * When bootstrapping, set zProf0 to a "stable" reference position.
236 real zProf0;
237 gmx_bool bProf0Set; //!< setting profile to 0 at zProf0?
239 gmx_bool bBoundsOnly, bHistOnly; //!< determine min and max, or write histograms and exit
240 gmx_bool bAuto; //!< determine min and max automatically but do not exit
242 gmx_bool verbose; //!< more noisy wham mode
243 int stepchange; //!< print maximum change in prof after how many interations
244 gmx_output_env_t *oenv; //!< xvgr options
245 /*!\}*/
247 * \name Autocorrelation stuff
249 /*!\{*/
250 gmx_bool bTauIntGiven, bCalcTauInt; //!< IACT given or should be calculated?
251 real sigSmoothIact; //!< sigma of Gaussian to smooth ACTs
252 gmx_bool bAllowReduceIact; //!< Allow to reduce ACTs during smoothing. Otherwise ACT are only increased during smoothing
253 real acTrestart; //!< when computing ACT, time between restarting points
255 /* \brief Enforce the same weight for each umbella window, that is
256 * calculate with the same number of data points for
257 * each window. That can be reasonable, if the histograms
258 * have different length, but due to autocorrelation,
259 * a longer simulation should not have larger weightin wham.
261 gmx_bool bHistEq;
262 /*!\}*/
265 * \name Bootstrapping stuff
267 /*!\{*/
268 int nBootStrap; //!< nr of bootstraps (50 is usually enough)
270 /* \brief bootstrap method
272 * if == bsMethod_hist, consider complete histograms as independent
273 * data points and, hence, only mix complete histograms.
274 * if == bsMethod_BayesianHist, consider complete histograms
275 * as independent data points, but assign random weights
276 * to the histograms during the bootstrapping ("Bayesian bootstrap")
277 * In case of long correlations (e.g., inside a channel), these
278 * will yield a more realistic error.
279 * if == bsMethod_traj(Gauss), generate synthetic histograms
280 * for each given
281 * histogram by generating an autocorrelated random sequence
282 * that is distributed according to the respective given
283 * histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
284 * (instead of from the umbrella histogram) to generate a new
285 * histogram.
287 int bsMethod;
289 /* \brief autocorrelation time (ACT) used to generate synthetic histograms. If ==0, use calculated ACF */
290 real tauBootStrap;
292 /* \brief when mixing histograms, mix only histograms withing blocks
293 long the reaction coordinate xi. Avoids gaps along xi. */
294 int histBootStrapBlockLength;
296 int bsSeed; //!< random seed for bootstrapping
298 /* \brief Write cumulative distribution functions (CDFs) of histograms
299 and write the generated histograms for each bootstrap */
300 gmx_bool bs_verbose;
301 /*!\}*/
303 * \name tabulated umbrella potential stuff
305 /*!\{*/
306 gmx_bool bTab;
307 double *tabX, *tabY, tabMin, tabMax, tabDz;
308 int tabNbins;
309 /*!\}*/
310 gmx_rng_t rng; //!< gromacs random number generator
311 } t_UmbrellaOptions;
313 //! Make an umbrella window (may contain several histograms)
314 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
316 t_UmbrellaWindow *win;
317 int i;
318 snew(win, nwin);
319 for (i = 0; i < nwin; i++)
321 win[i].Histo = win[i].cum = 0;
322 win[i].k = win[i].pos = win[i].z = 0;
323 win[i].N = win[i].Ntot = 0;
324 win[i].g = win[i].tau = win[i].tausmooth = 0;
325 win[i].bContrib = 0;
326 win[i].ztime = 0;
327 win[i].forceAv = 0;
328 win[i].aver = win[i].sigma = 0;
329 win[i].bsWeight = 0;
331 return win;
334 //! Delete an umbrella window (may contain several histograms)
335 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
337 int i, j;
338 for (i = 0; i < nwin; i++)
340 if (win[i].Histo)
342 for (j = 0; j < win[i].nPull; j++)
344 sfree(win[i].Histo[j]);
347 if (win[i].cum)
349 for (j = 0; j < win[i].nPull; j++)
351 sfree(win[i].cum[j]);
354 if (win[i].bContrib)
356 for (j = 0; j < win[i].nPull; j++)
358 sfree(win[i].bContrib[j]);
361 sfree(win[i].Histo);
362 sfree(win[i].cum);
363 sfree(win[i].k);
364 sfree(win[i].pos);
365 sfree(win[i].z);
366 sfree(win[i].N);
367 sfree(win[i].Ntot);
368 sfree(win[i].g);
369 sfree(win[i].tau);
370 sfree(win[i].tausmooth);
371 sfree(win[i].bContrib);
372 sfree(win[i].ztime);
373 sfree(win[i].forceAv);
374 sfree(win[i].aver);
375 sfree(win[i].sigma);
376 sfree(win[i].bsWeight);
378 sfree(win);
381 /*! \brief
382 * Read and setup tabulated umbrella potential
384 void setup_tab(const char *fn, t_UmbrellaOptions *opt)
386 int i, ny, nl;
387 double **y;
389 printf("Setting up tabulated potential from file %s\n", fn);
390 nl = read_xvg(fn, &y, &ny);
391 opt->tabNbins = nl;
392 if (ny != 2)
394 gmx_fatal(FARGS, "Found %d columns in %s. Expected 2.\n", ny, fn);
396 opt->tabMin = y[0][0];
397 opt->tabMax = y[0][nl-1];
398 opt->tabDz = (opt->tabMax-opt->tabMin)/(nl-1);
399 if (opt->tabDz <= 0)
401 gmx_fatal(FARGS, "The tabulated potential in %s must be provided in \n"
402 "ascending z-direction", fn);
404 for (i = 0; i < nl-1; i++)
406 if (std::abs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
408 gmx_fatal(FARGS, "z-values in %s are not equally spaced.\n", ny, fn);
411 snew(opt->tabY, nl);
412 snew(opt->tabX, nl);
413 for (i = 0; i < nl; i++)
415 opt->tabX[i] = y[0][i];
416 opt->tabY[i] = y[1][i];
418 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
419 opt->tabMin, opt->tabMax, opt->tabDz);
422 //! Read the header of an PDO file (position, force const, nr of groups)
423 void read_pdo_header(FILE * file, t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
425 char line[2048];
426 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
427 int i;
428 std::istringstream ist;
430 /* line 1 */
431 if (fgets(line, 2048, file) == NULL)
433 gmx_fatal(FARGS, "Error reading header from pdo file\n");
435 ist.str(line);
436 ist >> Buffer0 >> Buffer1 >> Buffer2;
437 if (std::strcmp(Buffer1, "UMBRELLA"))
439 gmx_fatal(FARGS, "This does not appear to be a valid pdo file. Found %s, expected %s\n"
440 "(Found in first line: `%s')\n",
441 Buffer1, "UMBRELLA", line);
443 if (std::strcmp(Buffer2, "3.0"))
445 gmx_fatal(FARGS, "This does not appear to be a version 3.0 pdo file");
448 /* line 2 */
449 if (fgets(line, 2048, file) == NULL)
451 gmx_fatal(FARGS, "Error reading header from pdo file\n");
453 ist.str(line);
454 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Dims[0] >> header->Dims[1] >> header->Dims[2];
455 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
457 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
458 if (header->nDim != 1)
460 gmx_fatal(FARGS, "Currently only supports one dimension");
463 /* line3 */
464 if (fgets(line, 2048, file) == NULL)
466 gmx_fatal(FARGS, "Error reading header from pdo file\n");
468 ist.str(line);
469 ist >> Buffer0 >> Buffer1 >> header->nSkip;
471 /* line 4 */
472 if (fgets(line, 2048, file) == NULL)
474 gmx_fatal(FARGS, "Error reading header from pdo file\n");
476 ist.str(line);
477 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->Reference;
479 /* line 5 */
480 if (fgets(line, 2048, file) == NULL)
482 gmx_fatal(FARGS, "Error reading header from pdo file\n");
484 ist.str(line);
485 ist >> Buffer0 >> Buffer1 >> Buffer2 >> Buffer3 >> Buffer4 >> header->nPull;
487 if (opt->verbose)
489 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n", header->nPull, header->nSkip,
490 header->Reference);
493 for (i = 0; i < header->nPull; ++i)
495 if (fgets(line, 2048, file) == NULL)
497 gmx_fatal(FARGS, "Error reading header from pdo file\n");
499 ist.str(line);
500 ist >> Buffer0 >> Buffer1 >> Buffer2 >> header->PullName[i];
501 ist >> Buffer0 >> Buffer1 >> header->UmbPos[i][0];
502 ist >> Buffer0 >> Buffer1 >> header->UmbCons[i][0];
504 if (opt->verbose)
506 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
507 i, header->PullName[i], header->UmbPos[i][0], header->UmbCons[i][0]);
511 if (fgets(line, 2048, file) == NULL)
513 gmx_fatal(FARGS, "Cannot read from file\n");
515 ist.str(line);
516 ist >> Buffer3;
517 if (std::strcmp(Buffer3, "#####") != 0)
519 gmx_fatal(FARGS, "Expected '#####', found %s. Hick.\n", Buffer3);
523 //! smarter fgets
524 static char *fgets3(FILE *fp, char ptr[], int *len)
526 char *p;
527 int slen;
529 if (fgets(ptr, *len-1, fp) == NULL)
531 return NULL;
533 p = ptr;
534 while ((std::strchr(ptr, '\n') == NULL) && (!feof(fp)))
536 /* This line is longer than len characters, let's increase len! */
537 *len += STRLEN;
538 p += STRLEN;
539 srenew(ptr, *len);
540 if (fgets(p-1, STRLEN, fp) == NULL)
542 break;
545 slen = std::strlen(ptr);
546 if (ptr[slen-1] == '\n')
548 ptr[slen-1] = '\0';
551 return ptr;
554 /*! \brief Read the data columns of and PDO file.
556 * TO DO: Get rid of the scanf function to avoid the clang warning.
557 * At the moment, this warning is avoided by hiding the format string
558 * the variable fmtlf.
560 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
561 int fileno, t_UmbrellaWindow * win,
562 t_UmbrellaOptions *opt,
563 gmx_bool bGetMinMax, real *mintmp, real *maxtmp)
565 int i, inttemp, bins, count, ntot;
566 real minval, maxval, minfound = 1e20, maxfound = -1e20;
567 double temp, time, time0 = 0, dt;
568 char *ptr = 0;
569 t_UmbrellaWindow * window = 0;
570 gmx_bool timeok, dt_ok = 1;
571 char *tmpbuf = 0, fmt[256], fmtign[256], fmtlf[5] = "%lf";
572 int len = STRLEN, dstep = 1;
573 const int blocklen = 4096;
574 int *lennow = 0;
576 if (!bGetMinMax)
578 bins = opt->bins;
579 minval = opt->min;
580 maxval = opt->max;
582 window = win+fileno;
583 /* Need to alocate memory and set up structure */
584 window->nPull = header->nPull;
585 window->nBin = bins;
587 snew(window->Histo, window->nPull);
588 snew(window->z, window->nPull);
589 snew(window->k, window->nPull);
590 snew(window->pos, window->nPull);
591 snew(window->N, window->nPull);
592 snew(window->Ntot, window->nPull);
593 snew(window->g, window->nPull);
594 snew(window->bsWeight, window->nPull);
596 window->bContrib = 0;
598 if (opt->bCalcTauInt)
600 snew(window->ztime, window->nPull);
602 else
604 window->ztime = 0;
606 snew(lennow, window->nPull);
608 for (i = 0; i < window->nPull; ++i)
610 window->z[i] = 1;
611 window->bsWeight[i] = 1.;
612 snew(window->Histo[i], bins);
613 window->k[i] = header->UmbCons[i][0];
614 window->pos[i] = header->UmbPos[i][0];
615 window->N[i] = 0;
616 window->Ntot[i] = 0;
617 window->g[i] = 1.;
618 if (opt->bCalcTauInt)
620 window->ztime[i] = 0;
624 /* Done with setup */
626 else
628 minfound = 1e20;
629 maxfound = -1e20;
630 minval = maxval = bins = 0; /* Get rid of warnings */
633 count = 0;
634 snew(tmpbuf, len);
635 while ( (ptr = fgets3(file, tmpbuf, &len)) != NULL)
637 trim(ptr);
639 if (ptr[0] == '#' || std::strlen(ptr) < 2)
641 continue;
644 /* Initiate format string */
645 fmtign[0] = '\0';
646 std::strcat(fmtign, "%*s");
648 sscanf(ptr, fmtlf, &time); /* printf("Time %f\n",time); */
649 /* Round time to fs */
650 time = 1.0/1000*( static_cast<gmx_int64_t> (time*1000+0.5) );
652 /* get time step of pdo file */
653 if (count == 0)
655 time0 = time;
657 else if (count == 1)
659 dt = time-time0;
660 if (opt->dt > 0.0)
662 dstep = static_cast<int>(opt->dt/dt+0.5);
663 if (dstep == 0)
665 dstep = 1;
668 if (!bGetMinMax)
670 window->dt = dt*dstep;
673 count++;
675 dt_ok = ((count-1)%dstep == 0);
676 timeok = (dt_ok && time >= opt->tmin && time <= opt->tmax);
677 /* if (opt->verbose)
678 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
679 time,opt->tmin, opt->tmax, dt_ok,timeok); */
681 if (timeok)
683 for (i = 0; i < header->nPull; ++i)
685 std::strcpy(fmt, fmtign);
686 std::strcat(fmt, "%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
687 std::strcat(fmtign, "%*s"); /* ignoring one more entry in the next loop */
688 if (sscanf(ptr, fmt, &temp))
690 temp += header->UmbPos[i][0];
691 if (bGetMinMax)
693 if (temp < minfound)
695 minfound = temp;
697 if (temp > maxfound)
699 maxfound = temp;
702 else
704 if (opt->bCalcTauInt)
706 /* save time series for autocorrelation analysis */
707 ntot = window->Ntot[i];
708 if (ntot >= lennow[i])
710 lennow[i] += blocklen;
711 srenew(window->ztime[i], lennow[i]);
713 window->ztime[i][ntot] = temp;
716 temp -= minval;
717 temp /= (maxval-minval);
718 temp *= bins;
719 temp = std::floor(temp);
721 inttemp = static_cast<int> (temp);
722 if (opt->bCycl)
724 if (inttemp < 0)
726 inttemp += bins;
728 else if (inttemp >= bins)
730 inttemp -= bins;
734 if (inttemp >= 0 && inttemp < bins)
736 window->Histo[i][inttemp] += 1.;
737 window->N[i]++;
739 window->Ntot[i]++;
744 if (time > opt->tmax)
746 if (opt->verbose)
748 printf("time %f larger than tmax %f, stop reading pdo file\n", time, opt->tmax);
750 break;
754 if (bGetMinMax)
756 *mintmp = minfound;
757 *maxtmp = maxfound;
760 sfree(lennow);
761 sfree(tmpbuf);
764 /*! \brief Set identical weights for all histograms
766 * Normally, the weight is given by the number data points in each
767 * histogram, together with the autocorrelation time. This can be overwritten
768 * by this routine (not recommended). Since we now support autocorrelations, it is better to set
769 * an appropriate autocorrelation times instead of using this function.
771 void enforceEqualWeights(t_UmbrellaWindow * window, int nWindows)
773 int i, k, j, NEnforced;
774 double ratio;
776 NEnforced = window[0].Ntot[0];
777 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
778 "non-weighted histogram analysis method. Ndata = %d\n", NEnforced);
779 /* enforce all histograms to have the same weight as the very first histogram */
781 for (j = 0; j < nWindows; ++j)
783 for (k = 0; k < window[j].nPull; ++k)
785 ratio = 1.0*NEnforced/window[j].Ntot[k];
786 for (i = 0; i < window[0].nBin; ++i)
788 window[j].Histo[k][i] *= ratio;
790 window[j].N[k] = static_cast<int>(ratio*window[j].N[k] + 0.5);
795 /*! \brief Simple linear interpolation between two given tabulated points
797 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
799 int jl, ju;
800 double pl, pu, dz, dp;
802 jl = static_cast<int> (std::floor((dist-opt->tabMin)/opt->tabDz));
803 ju = jl+1;
804 if (jl < 0 || ju >= opt->tabNbins)
806 gmx_fatal(FARGS, "Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
807 "Provide an extended table.", dist, jl, ju);
809 pl = opt->tabY[jl];
810 pu = opt->tabY[ju];
811 dz = dist-opt->tabX[jl];
812 dp = (pu-pl)*dz/opt->tabDz;
813 return pl+dp;
817 /*! \brief
818 * Check which bins substiantially contribute (accelerates WHAM)
820 * Don't worry, that routine does not mean we compute the PMF in limited precision.
821 * After rapid convergence (using only substiantal contributions), we always switch to
822 * full precision.
824 void setup_acc_wham(double *profile, t_UmbrellaWindow * window, int nWindows,
825 t_UmbrellaOptions *opt)
827 int i, j, k, nGrptot = 0, nContrib = 0, nTot = 0;
828 double U, min = opt->min, dz = opt->dz, temp, ztot_half, distance, ztot, contrib1, contrib2;
829 gmx_bool bAnyContrib;
830 static int bFirst = 1;
831 static double wham_contrib_lim;
833 if (bFirst)
835 for (i = 0; i < nWindows; ++i)
837 nGrptot += window[i].nPull;
839 wham_contrib_lim = opt->Tolerance/nGrptot;
842 ztot = opt->max-opt->min;
843 ztot_half = ztot/2;
845 for (i = 0; i < nWindows; ++i)
847 if (!window[i].bContrib)
849 snew(window[i].bContrib, window[i].nPull);
851 for (j = 0; j < window[i].nPull; ++j)
853 if (!window[i].bContrib[j])
855 snew(window[i].bContrib[j], opt->bins);
857 bAnyContrib = FALSE;
858 for (k = 0; k < opt->bins; ++k)
860 temp = (1.0*k+0.5)*dz+min;
861 distance = temp - window[i].pos[j]; /* distance to umbrella center */
862 if (opt->bCycl)
863 { /* in cyclic wham: */
864 if (distance > ztot_half) /* |distance| < ztot_half */
866 distance -= ztot;
868 else if (distance < -ztot_half)
870 distance += ztot;
873 /* Note: there are two contributions to bin k in the wham equations:
874 i) N[j]*exp(- U/(BOLTZ*opt->Temperature) + window[i].z[j])
875 ii) exp(- U/(BOLTZ*opt->Temperature))
876 where U is the umbrella potential
877 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
880 if (!opt->bTab)
882 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
884 else
886 U = tabulated_pot(distance, opt); /* Use tabulated potential */
888 contrib1 = profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
889 contrib2 = window[i].N[j]*std::exp(-U/(BOLTZ*opt->Temperature) + window[i].z[j]);
890 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
891 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
892 if (window[i].bContrib[j][k])
894 nContrib++;
896 nTot++;
898 /* If this histo is far outside min and max all bContrib may be FALSE,
899 causing a floating point exception later on. To avoid that, switch
900 them all to true.*/
901 if (!bAnyContrib)
903 for (k = 0; k < opt->bins; ++k)
905 window[i].bContrib[j][k] = TRUE;
910 if (bFirst)
912 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
913 "Evaluating only %d of %d expressions.\n\n", wham_contrib_lim, nContrib, nTot);
916 if (opt->verbose)
918 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
919 nContrib, nTot);
921 bFirst = 0;
924 //! Compute the PMF (one of the two main WHAM routines)
925 void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows,
926 t_UmbrellaOptions *opt, gmx_bool bExact)
928 double ztot_half, ztot, min = opt->min, dz = opt->dz;
930 ztot = opt->max-opt->min;
931 ztot_half = ztot/2;
933 #pragma omp parallel
937 int nthreads = gmx_omp_get_max_threads();
938 int thread_id = gmx_omp_get_thread_num();
939 int i;
940 int i0 = thread_id*opt->bins/nthreads;
941 int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads);
943 for (i = i0; i < i1; ++i)
945 int j, k;
946 double num, denom, invg, temp = 0, distance, U = 0;
947 num = denom = 0.;
948 for (j = 0; j < nWindows; ++j)
950 for (k = 0; k < window[j].nPull; ++k)
952 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
953 temp = (1.0*i+0.5)*dz+min;
954 num += invg*window[j].Histo[k][i];
956 if (!(bExact || window[j].bContrib[k][i]))
958 continue;
960 distance = temp - window[j].pos[k]; /* distance to umbrella center */
961 if (opt->bCycl)
962 { /* in cyclic wham: */
963 if (distance > ztot_half) /* |distance| < ztot_half */
965 distance -= ztot;
967 else if (distance < -ztot_half)
969 distance += ztot;
973 if (!opt->bTab)
975 U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
977 else
979 U = tabulated_pot(distance, opt); /* Use tabulated potential */
981 denom += invg*window[j].N[k]*std::exp(-U/(BOLTZ*opt->Temperature) + window[j].z[k]);
984 profile[i] = num/denom;
987 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
991 //! Compute the free energy offsets z (one of the two main WHAM routines)
992 double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows,
993 t_UmbrellaOptions *opt, gmx_bool bExact)
995 double min = opt->min, dz = opt->dz, ztot_half, ztot;
996 double maxglob = -1e20;
998 ztot = opt->max-opt->min;
999 ztot_half = ztot/2;
1001 #pragma omp parallel
1005 int nthreads = gmx_omp_get_max_threads();
1006 int thread_id = gmx_omp_get_thread_num();
1007 int i;
1008 int i0 = thread_id*nWindows/nthreads;
1009 int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads);
1010 double maxloc = -1e20;
1012 for (i = i0; i < i1; ++i)
1014 double total = 0, temp, distance, U = 0;
1015 int j, k;
1017 for (j = 0; j < window[i].nPull; ++j)
1019 total = 0;
1020 for (k = 0; k < window[i].nBin; ++k)
1022 if (!(bExact || window[i].bContrib[j][k]))
1024 continue;
1026 temp = (1.0*k+0.5)*dz+min;
1027 distance = temp - window[i].pos[j]; /* distance to umbrella center */
1028 if (opt->bCycl)
1029 { /* in cyclic wham: */
1030 if (distance > ztot_half) /* |distance| < ztot_half */
1032 distance -= ztot;
1034 else if (distance < -ztot_half)
1036 distance += ztot;
1040 if (!opt->bTab)
1042 U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
1044 else
1046 U = tabulated_pot(distance, opt); /* Use tabulated potential */
1048 total += profile[k]*std::exp(-U/(BOLTZ*opt->Temperature));
1050 /* Avoid floating point exception if window is far outside min and max */
1051 if (total != 0.0)
1053 total = -std::log(total);
1055 else
1057 total = 1000.0;
1059 temp = std::abs(total - window[i].z[j]);
1060 if (temp > maxloc)
1062 maxloc = temp;
1064 window[i].z[j] = total;
1067 /* Now get maximum maxloc from the threads and put in maxglob */
1068 if (maxloc > maxglob)
1070 #pragma omp critical
1072 if (maxloc > maxglob)
1074 maxglob = maxloc;
1079 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
1082 return maxglob;
1085 //! Make PMF symmetric around 0 (useful e.g. for membranes)
1086 void symmetrizeProfile(double* profile, t_UmbrellaOptions *opt)
1088 int i, j, bins = opt->bins;
1089 double *prof2, min = opt->min, max = opt->max, dz = opt->dz, zsym, deltaz, profsym;
1090 double z, z1;
1092 if (min > 0. || max < 0.)
1094 gmx_fatal(FARGS, "Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
1095 opt->min, opt->max);
1098 snew(prof2, bins);
1100 for (i = 0; i < bins; i++)
1102 z = min+(i+0.5)*dz;
1103 zsym = -z;
1104 /* bin left of zsym */
1105 j = static_cast<int> (std::floor((zsym-min)/dz-0.5));
1106 if (j >= 0 && (j+1) < bins)
1108 /* interpolate profile linearly between bins j and j+1 */
1109 z1 = min+(j+0.5)*dz;
1110 deltaz = zsym-z1;
1111 profsym = profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
1112 /* average between left and right */
1113 prof2[i] = 0.5*(profsym+profile[i]);
1115 else
1117 prof2[i] = profile[i];
1121 std::memcpy(profile, prof2, bins*sizeof(double));
1122 sfree(prof2);
1125 //! Set energy unit (kJ/mol,kT,kCal/mol) and set it to zero at opt->zProf0
1126 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
1128 int i, bins, imin;
1129 double unit_factor = 1., diff;
1131 bins = opt->bins;
1133 /* Not log? Nothing to do! */
1134 if (!opt->bLog)
1136 return;
1139 /* Get profile in units of kT, kJ/mol, or kCal/mol */
1140 if (opt->unit == en_kT)
1142 unit_factor = 1.0;
1144 else if (opt->unit == en_kJ)
1146 unit_factor = BOLTZ*opt->Temperature;
1148 else if (opt->unit == en_kCal)
1150 unit_factor = (BOLTZ/CAL2JOULE)*opt->Temperature;
1152 else
1154 gmx_fatal(FARGS, "Sorry, I don't know this energy unit.");
1157 for (i = 0; i < bins; i++)
1159 if (profile[i] > 0.0)
1161 profile[i] = -std::log(profile[i])*unit_factor;
1165 /* shift to zero at z=opt->zProf0 */
1166 if (!opt->bProf0Set)
1168 diff = profile[0];
1170 else
1172 /* Get bin with shortest distance to opt->zProf0
1173 (-0.5 from bin position and +0.5 from rounding cancel) */
1174 imin = static_cast<int>((opt->zProf0-opt->min)/opt->dz);
1175 if (imin < 0)
1177 imin = 0;
1179 else if (imin >= bins)
1181 imin = bins-1;
1183 diff = profile[imin];
1186 /* Shift to zero */
1187 for (i = 0; i < bins; i++)
1189 profile[i] -= diff;
1193 //! Make an array of random integers (used for bootstrapping)
1194 void getRandomIntArray(int nPull, int blockLength, int* randomArray, gmx_rng_t rng)
1196 int ipull, blockBase, nr, ipullRandom;
1198 if (blockLength == 0)
1200 blockLength = nPull;
1203 for (ipull = 0; ipull < nPull; ipull++)
1205 blockBase = (ipull/blockLength)*blockLength;
1207 { /* make sure nothing bad happens in the last block */
1208 nr = static_cast<int>(gmx_rng_uniform_real(rng)*blockLength);
1209 ipullRandom = blockBase + nr;
1211 while (ipullRandom >= nPull);
1212 if (ipullRandom < 0 || ipullRandom >= nPull)
1214 gmx_fatal(FARGS, "Ups, random iWin = %d, nPull = %d, nr = %d, "
1215 "blockLength = %d, blockBase = %d\n",
1216 ipullRandom, nPull, nr, blockLength, blockBase);
1218 randomArray[ipull] = ipullRandom;
1220 /*for (ipull=0; ipull<nPull; ipull++)
1221 printf("%d ",randomArray[ipull]); printf("\n"); */
1224 /*! \brief Set pull group information of a synthetic histogram
1226 * This is used when bootstapping new trajectories and thereby create new histogtrams,
1227 * but it is not required if we bootstrap complete histograms.
1229 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
1230 t_UmbrellaWindow *thisWindow, int pullid)
1232 synthWindow->N [0] = thisWindow->N [pullid];
1233 synthWindow->Histo [0] = thisWindow->Histo [pullid];
1234 synthWindow->pos [0] = thisWindow->pos [pullid];
1235 synthWindow->z [0] = thisWindow->z [pullid];
1236 synthWindow->k [0] = thisWindow->k [pullid];
1237 synthWindow->bContrib[0] = thisWindow->bContrib [pullid];
1238 synthWindow->g [0] = thisWindow->g [pullid];
1239 synthWindow->bsWeight[0] = thisWindow->bsWeight [pullid];
1242 /*! \brief Calculate cumulative distribution function of of all histograms.
1244 * This allow to create random number sequences
1245 * which are distributed according to the histograms. Required to generate
1246 * the "synthetic" histograms for the Bootstrap method
1248 void calc_cumulatives(t_UmbrellaWindow *window, int nWindows,
1249 t_UmbrellaOptions *opt, const char *fnhist)
1251 int i, j, k, nbin;
1252 double last;
1253 char *fn = 0, *buf = 0;
1254 FILE *fp = 0;
1256 if (opt->bs_verbose)
1258 snew(fn, std::strlen(fnhist)+10);
1259 snew(buf, std::strlen(fnhist)+10);
1260 sprintf(fn, "%s_cumul.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4));
1261 fp = xvgropen(fn, "CDFs of umbrella windows", xlabel, "CDF", opt->oenv);
1264 nbin = opt->bins;
1265 for (i = 0; i < nWindows; i++)
1267 snew(window[i].cum, window[i].nPull);
1268 for (j = 0; j < window[i].nPull; j++)
1270 snew(window[i].cum[j], nbin+1);
1271 window[i].cum[j][0] = 0.;
1272 for (k = 1; k <= nbin; k++)
1274 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
1277 /* normalize CDFs. Ensure cum[nbin]==1 */
1278 last = window[i].cum[j][nbin];
1279 for (k = 0; k <= nbin; k++)
1281 window[i].cum[j][k] /= last;
1286 printf("Cumulative distribution functions of all histograms created.\n");
1287 if (opt->bs_verbose)
1289 for (k = 0; k <= nbin; k++)
1291 fprintf(fp, "%g\t", opt->min+k*opt->dz);
1292 for (i = 0; i < nWindows; i++)
1294 for (j = 0; j < window[i].nPull; j++)
1296 fprintf(fp, "%g\t", window[i].cum[j][k]);
1299 fprintf(fp, "\n");
1301 printf("Wrote cumulative distribution functions to %s\n", fn);
1302 xvgrclose(fp);
1303 sfree(fn);
1304 sfree(buf);
1309 /*! \brief Return j such that xx[j] <= x < xx[j+1]
1311 * This is used to generate a random sequence distributed according to a histogram
1313 void searchCumulative(double xx[], int n, double x, int *j)
1315 int ju, jm, jl;
1317 jl = -1;
1318 ju = n;
1319 while (ju-jl > 1)
1321 jm = (ju+jl) >> 1;
1322 if (x >= xx[jm])
1324 jl = jm;
1326 else
1328 ju = jm;
1331 if (x == xx[0])
1333 *j = 0;
1335 else if (x == xx[n-1])
1337 *j = n-2;
1339 else
1341 *j = jl;
1345 //! Bootstrap new trajectories and thereby generate new (bootstrapped) histograms
1346 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1347 int pullid, t_UmbrellaOptions *opt)
1349 int N, i, nbins, r_index, ibin;
1350 double r, tausteps = 0.0, a, ap, dt, x, invsqrt2, g, y, sig = 0., z, mu = 0.;
1351 char errstr[1024];
1353 N = thisWindow->N[pullid];
1354 dt = thisWindow->dt;
1355 nbins = thisWindow->nBin;
1357 /* tau = autocorrelation time */
1358 if (opt->tauBootStrap > 0.0)
1360 tausteps = opt->tauBootStrap/dt;
1362 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1364 /* calc tausteps from g=1+2tausteps */
1365 g = thisWindow->g[pullid];
1366 tausteps = (g-1)/2;
1368 else
1370 sprintf(errstr,
1371 "When generating hypothetical trajectories from given umbrella histograms,\n"
1372 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1373 "cannot be predicted. You have 3 options:\n"
1374 "1) Make gmx wham estimate the ACTs (options -ac and -acsig).\n"
1375 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1376 std::strcat(errstr,
1377 " with option -iiact for all umbrella windows.\n"
1378 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1379 " Use option (3) only if you are sure what you're doing, you may severely\n"
1380 " underestimate the error if a too small ACT is given.\n");
1381 gmx_fatal(FARGS, errstr);
1384 synthWindow->N [0] = N;
1385 synthWindow->pos [0] = thisWindow->pos[pullid];
1386 synthWindow->z [0] = thisWindow->z[pullid];
1387 synthWindow->k [0] = thisWindow->k[pullid];
1388 synthWindow->bContrib[0] = thisWindow->bContrib[pullid];
1389 synthWindow->g [0] = thisWindow->g [pullid];
1390 synthWindow->bsWeight[0] = thisWindow->bsWeight[pullid];
1392 for (i = 0; i < nbins; i++)
1394 synthWindow->Histo[0][i] = 0.;
1397 if (opt->bsMethod == bsMethod_trajGauss)
1399 sig = thisWindow->sigma [pullid];
1400 mu = thisWindow->aver [pullid];
1403 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1404 Use the following:
1405 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1406 then
1407 z = a*x + sqrt(1-a^2)*y
1408 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1409 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1410 function
1411 C(t) = exp(-t/tau) with tau=-1/ln(a)
1413 Then, use error function to turn the Gaussian random variable into a uniformly
1414 distributed one in [0,1]. Eventually, use cumulative distribution function of
1415 histogram to get random variables distributed according to histogram.
1416 Note: The ACT of the flat distribution and of the generated histogram is not
1417 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1419 a = std::exp(-1.0/tausteps);
1420 ap = std::sqrt(1.0-a*a);
1421 invsqrt2 = 1.0/std::sqrt(2.0);
1423 /* init random sequence */
1424 x = gmx_rng_gaussian_table(opt->rng);
1426 if (opt->bsMethod == bsMethod_traj)
1428 /* bootstrap points from the umbrella histograms */
1429 for (i = 0; i < N; i++)
1431 y = gmx_rng_gaussian_table(opt->rng);
1432 x = a*x+ap*y;
1433 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1434 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1436 r = 0.5*(1+std::erf(x*invsqrt2));
1437 searchCumulative(thisWindow->cum[pullid], nbins+1, r, &r_index);
1438 synthWindow->Histo[0][r_index] += 1.;
1441 else if (opt->bsMethod == bsMethod_trajGauss)
1443 /* bootstrap points from a Gaussian with the same average and sigma
1444 as the respective umbrella histogram. The idea was, that -given
1445 limited sampling- the bootstrapped histograms are otherwise biased
1446 from the limited sampling of the US histos. However, bootstrapping from
1447 the Gaussian seems to yield a similar estimate. */
1448 i = 0;
1449 while (i < N)
1451 y = gmx_rng_gaussian_table(opt->rng);
1452 x = a*x+ap*y;
1453 z = x*sig+mu;
1454 ibin = static_cast<int> (std::floor((z-opt->min)/opt->dz));
1455 if (opt->bCycl)
1457 if (ibin < 0)
1459 while ( (ibin += nbins) < 0)
1464 else if (ibin >= nbins)
1466 while ( (ibin -= nbins) >= nbins)
1473 if (ibin >= 0 && ibin < nbins)
1475 synthWindow->Histo[0][ibin] += 1.;
1476 i++;
1480 else
1482 gmx_fatal(FARGS, "Unknown bsMethod (id %d). That should not happen.\n", opt->bsMethod);
1486 /*! \brief Write all histograms to a file
1488 * If bs_index>=0, a number is added to the output file name to allow the ouput of all
1489 * sets of bootstrapped histograms.
1491 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1492 int bs_index, t_UmbrellaOptions *opt)
1494 char *fn = 0, *buf = 0, title[256];
1495 FILE *fp;
1496 int bins, l, i, j;
1498 if (bs_index >= 0)
1500 snew(fn, std::strlen(fnhist)+10);
1501 snew(buf, std::strlen(fnhist)+1);
1502 sprintf(fn, "%s_bs%d.xvg", std::strncpy(buf, fnhist, std::strlen(fnhist)-4), bs_index);
1503 sprintf(title, "Umbrella histograms. Bootstrap #%d", bs_index);
1505 else
1507 fn = gmx_strdup(fnhist);
1508 std::strcpy(title, "Umbrella histograms");
1511 fp = xvgropen(fn, title, xlabel, "count", opt->oenv);
1512 bins = opt->bins;
1514 /* Write histograms */
1515 for (l = 0; l < bins; ++l)
1517 fprintf(fp, "%e\t", (l+0.5)*opt->dz+opt->min);
1518 for (i = 0; i < nWindows; ++i)
1520 for (j = 0; j < window[i].nPull; ++j)
1522 fprintf(fp, "%e\t", window[i].Histo[j][l]);
1525 fprintf(fp, "\n");
1528 xvgrclose(fp);
1529 printf("Wrote %s\n", fn);
1530 if (bs_index >= 0)
1532 sfree(buf);
1534 sfree(fn);
1537 //! Used for qsort to sort random numbers
1538 int func_wham_is_larger(const void *a, const void *b)
1540 double *aa, *bb;
1541 aa = (double*)a;
1542 bb = (double*)b;
1543 if (*aa < *bb)
1545 return -1;
1547 else if (*aa > *bb)
1549 return 1;
1551 else
1553 return 0;
1557 //! Make random weights for histograms for the Bayesian bootstrap of complete histograms)
1558 void setRandomBsWeights(t_UmbrellaWindow *synthwin, int nAllPull, t_UmbrellaOptions *opt)
1560 int i;
1561 double *r;
1563 snew(r, nAllPull);
1565 /* generate ordered random numbers between 0 and nAllPull */
1566 for (i = 0; i < nAllPull-1; i++)
1568 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1570 qsort((void *)r, nAllPull-1, sizeof(double), &func_wham_is_larger);
1571 r[nAllPull-1] = 1.0*nAllPull;
1573 synthwin[0].bsWeight[0] = r[0];
1574 for (i = 1; i < nAllPull; i++)
1576 synthwin[i].bsWeight[0] = r[i]-r[i-1];
1579 /* avoid to have zero weight by adding a tiny value */
1580 for (i = 0; i < nAllPull; i++)
1582 if (synthwin[i].bsWeight[0] < 1e-5)
1584 synthwin[i].bsWeight[0] = 1e-5;
1588 sfree(r);
1591 //! The main bootstrapping routine
1592 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1593 char* ylabel, double *profile,
1594 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1596 t_UmbrellaWindow * synthWindow;
1597 double *bsProfile, *bsProfiles_av, *bsProfiles_av2, maxchange = 1e20, tmp, stddev;
1598 int i, j, *randomArray = 0, winid, pullid, ib;
1599 int iAllPull, nAllPull, *allPull_winId, *allPull_pullId;
1600 FILE *fp;
1601 gmx_bool bExact = FALSE;
1603 /* init random generator */
1604 if (opt->bsSeed == -1)
1606 opt->rng = gmx_rng_init(gmx_rng_make_seed());
1608 else
1610 opt->rng = gmx_rng_init(opt->bsSeed);
1613 snew(bsProfile, opt->bins);
1614 snew(bsProfiles_av, opt->bins);
1615 snew(bsProfiles_av2, opt->bins);
1617 /* Create array of all pull groups. Note that different windows
1618 may have different nr of pull groups
1619 First: Get total nr of pull groups */
1620 nAllPull = 0;
1621 for (i = 0; i < nWindows; i++)
1623 nAllPull += window[i].nPull;
1625 snew(allPull_winId, nAllPull);
1626 snew(allPull_pullId, nAllPull);
1627 iAllPull = 0;
1628 /* Setup one array of all pull groups */
1629 for (i = 0; i < nWindows; i++)
1631 for (j = 0; j < window[i].nPull; j++)
1633 allPull_winId[iAllPull] = i;
1634 allPull_pullId[iAllPull] = j;
1635 iAllPull++;
1639 /* setup stuff for synthetic windows */
1640 snew(synthWindow, nAllPull);
1641 for (i = 0; i < nAllPull; i++)
1643 synthWindow[i].nPull = 1;
1644 synthWindow[i].nBin = opt->bins;
1645 snew(synthWindow[i].Histo, 1);
1646 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1648 snew(synthWindow[i].Histo[0], opt->bins);
1650 snew(synthWindow[i].N, 1);
1651 snew(synthWindow[i].pos, 1);
1652 snew(synthWindow[i].z, 1);
1653 snew(synthWindow[i].k, 1);
1654 snew(synthWindow[i].bContrib, 1);
1655 snew(synthWindow[i].g, 1);
1656 snew(synthWindow[i].bsWeight, 1);
1659 switch (opt->bsMethod)
1661 case bsMethod_hist:
1662 snew(randomArray, nAllPull);
1663 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1664 please_cite(stdout, "Hub2006");
1665 break;
1666 case bsMethod_BayesianHist:
1667 /* just copy all histogams into synthWindow array */
1668 for (i = 0; i < nAllPull; i++)
1670 winid = allPull_winId [i];
1671 pullid = allPull_pullId[i];
1672 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1674 break;
1675 case bsMethod_traj:
1676 case bsMethod_trajGauss:
1677 calc_cumulatives(window, nWindows, opt, fnhist);
1678 break;
1679 default:
1680 gmx_fatal(FARGS, "Unknown bootstrap method. That should not have happened.\n");
1683 /* do bootstrapping */
1684 fp = xvgropen(fnprof, "Bootstrap profiles", xlabel, ylabel, opt->oenv);
1685 for (ib = 0; ib < opt->nBootStrap; ib++)
1687 printf(" *******************************************\n"
1688 " ******** Start bootstrap nr %d ************\n"
1689 " *******************************************\n", ib+1);
1691 switch (opt->bsMethod)
1693 case bsMethod_hist:
1694 /* bootstrap complete histograms from given histograms */
1695 getRandomIntArray(nAllPull, opt->histBootStrapBlockLength, randomArray, opt->rng);
1696 for (i = 0; i < nAllPull; i++)
1698 winid = allPull_winId [randomArray[i]];
1699 pullid = allPull_pullId[randomArray[i]];
1700 copy_pullgrp_to_synthwindow(synthWindow+i, window+winid, pullid);
1702 break;
1703 case bsMethod_BayesianHist:
1704 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1705 setRandomBsWeights(synthWindow, nAllPull, opt);
1706 break;
1707 case bsMethod_traj:
1708 case bsMethod_trajGauss:
1709 /* create new histos from given histos, that is generate new hypothetical
1710 trajectories */
1711 for (i = 0; i < nAllPull; i++)
1713 winid = allPull_winId[i];
1714 pullid = allPull_pullId[i];
1715 create_synthetic_histo(synthWindow+i, window+winid, pullid, opt);
1717 break;
1720 /* write histos in case of verbose output */
1721 if (opt->bs_verbose)
1723 print_histograms(fnhist, synthWindow, nAllPull, ib, opt);
1726 /* do wham */
1727 i = 0;
1728 bExact = FALSE;
1729 maxchange = 1e20;
1730 std::memcpy(bsProfile, profile, opt->bins*sizeof(double)); /* use profile as guess */
1733 if ( (i%opt->stepUpdateContrib) == 0)
1735 setup_acc_wham(bsProfile, synthWindow, nAllPull, opt);
1737 if (maxchange < opt->Tolerance)
1739 bExact = TRUE;
1741 if (((i%opt->stepchange) == 0 || i == 1) && i != 0)
1743 printf("\t%4d) Maximum change %e\n", i, maxchange);
1745 calc_profile(bsProfile, synthWindow, nAllPull, opt, bExact);
1746 i++;
1748 while ( (maxchange = calc_z(bsProfile, synthWindow, nAllPull, opt, bExact)) > opt->Tolerance || !bExact);
1749 printf("\tConverged in %d iterations. Final maximum change %g\n", i, maxchange);
1751 if (opt->bLog)
1753 prof_normalization_and_unit(bsProfile, opt);
1756 /* symmetrize profile around z=0 */
1757 if (opt->bSym)
1759 symmetrizeProfile(bsProfile, opt);
1762 /* save stuff to get average and stddev */
1763 for (i = 0; i < opt->bins; i++)
1765 tmp = bsProfile[i];
1766 bsProfiles_av[i] += tmp;
1767 bsProfiles_av2[i] += tmp*tmp;
1768 fprintf(fp, "%e\t%e\n", (i+0.5)*opt->dz+opt->min, tmp);
1770 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
1772 xvgrclose(fp);
1774 /* write average and stddev */
1775 fp = xvgropen(fnres, "Average and stddev from bootstrapping", xlabel, ylabel, opt->oenv);
1776 if (output_env_get_print_xvgr_codes(opt->oenv))
1778 fprintf(fp, "@TYPE xydy\n");
1780 for (i = 0; i < opt->bins; i++)
1782 bsProfiles_av [i] /= opt->nBootStrap;
1783 bsProfiles_av2[i] /= opt->nBootStrap;
1784 tmp = bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1785 stddev = (tmp >= 0.) ? std::sqrt(tmp) : 0.; /* Catch rouding errors */
1786 fprintf(fp, "%e\t%e\t%e\n", (i+0.5)*opt->dz+opt->min, bsProfiles_av [i], stddev);
1788 xvgrclose(fp);
1789 printf("Wrote boot strap result to %s\n", fnres);
1792 //! Return type of input file based on file extension (xvg, pdo, or tpr)
1793 int whaminFileType(char *fn)
1795 int len;
1796 len = std::strlen(fn);
1797 if (std::strcmp(fn+len-3, "tpr") == 0)
1799 return whamin_tpr;
1801 else if (std::strcmp(fn+len-3, "xvg") == 0 || std::strcmp(fn+len-6, "xvg.gz") == 0)
1803 return whamin_pullxf;
1805 else if (std::strcmp(fn+len-3, "pdo") == 0 || std::strcmp(fn+len-6, "pdo.gz") == 0)
1807 return whamin_pdo;
1809 else
1811 gmx_fatal(FARGS, "Unknown file type of %s. Should be tpr, xvg, or pdo.\n", fn);
1813 return whamin_unknown;
1816 //! Read the files names in pdo-files.dat, pullf/x-files.dat, tpr-files.dat
1817 void read_wham_in(const char *fn, char ***filenamesRet, int *nfilesRet,
1818 t_UmbrellaOptions *opt)
1820 char **filename = 0, tmp[WHAM_MAXFILELEN+2];
1821 int nread, sizenow, i, block = 1;
1822 FILE *fp;
1824 fp = gmx_ffopen(fn, "r");
1825 nread = 0;
1826 sizenow = 0;
1827 while (fgets(tmp, sizeof(tmp), fp) != NULL)
1829 if (std::strlen(tmp) >= WHAM_MAXFILELEN)
1831 gmx_fatal(FARGS, "Filename too long in %s. Only %d characters allowed.\n", fn, WHAM_MAXFILELEN);
1833 if (nread >= sizenow)
1835 sizenow += block;
1836 srenew(filename, sizenow);
1837 for (i = sizenow-block; i < sizenow; i++)
1839 snew(filename[i], WHAM_MAXFILELEN);
1842 /* remove newline if there is one */
1843 if (tmp[std::strlen(tmp)-1] == '\n')
1845 tmp[std::strlen(tmp)-1] = '\0';
1847 std::strcpy(filename[nread], tmp);
1848 if (opt->verbose)
1850 printf("Found file %s in %s\n", filename[nread], fn);
1852 nread++;
1854 *filenamesRet = filename;
1855 *nfilesRet = nread;
1858 //! Open a file or a pipe to a gzipped file
1859 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt, gmx_bool *bPipeOpen)
1861 char Buffer[1024], gunzip[1024], *Path = 0;
1862 FILE *pipe = 0;
1863 static gmx_bool bFirst = 1;
1865 /* gzipped pdo file? */
1866 if ((std::strcmp(fn+std::strlen(fn)-3, ".gz") == 0))
1868 /* search gunzip executable */
1869 if (!(Path = getenv("GMX_PATH_GZIP")))
1871 if (gmx_fexist("/bin/gunzip"))
1873 sprintf(gunzip, "%s", "/bin/gunzip");
1875 else if (gmx_fexist("/usr/bin/gunzip"))
1877 sprintf(gunzip, "%s", "/usr/bin/gunzip");
1879 else
1881 gmx_fatal(FARGS, "Cannot find executable gunzip in /bin or /usr/bin.\n"
1882 "You may want to define the path to gunzip "
1883 "with the environment variable GMX_PATH_GZIP.", gunzip);
1886 else
1888 sprintf(gunzip, "%s/gunzip", Path);
1889 if (!gmx_fexist(gunzip))
1891 gmx_fatal(FARGS, "Cannot find executable %s. Please define the path to gunzip"
1892 " in the environmental varialbe GMX_PATH_GZIP.", gunzip);
1895 if (bFirst)
1897 printf("Using gunzip executable %s\n", gunzip);
1898 bFirst = 0;
1900 if (!gmx_fexist(fn))
1902 gmx_fatal(FARGS, "File %s does not exist.\n", fn);
1904 sprintf(Buffer, "%s -c < %s", gunzip, fn);
1905 if (opt->verbose)
1907 printf("Executing command '%s'\n", Buffer);
1909 #if HAVE_PIPES
1910 if ((pipe = popen(Buffer, "r")) == NULL)
1912 gmx_fatal(FARGS, "Unable to open pipe to `%s'\n", Buffer);
1914 #else
1915 gmx_fatal(FARGS, "Cannot open a compressed file on platform without pipe support");
1916 #endif
1917 *bPipeOpen = TRUE;
1919 else
1921 pipe = gmx_ffopen(fn, "r");
1922 *bPipeOpen = FALSE;
1925 return pipe;
1928 //! Close file or pipe
1929 void pdo_close_file(FILE *fp)
1931 #if HAVE_PIPES
1932 pclose(fp);
1933 #else
1934 gmx_ffclose(fp);
1935 #endif
1938 //! Reading all pdo files
1939 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1940 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1942 FILE *file;
1943 real mintmp, maxtmp, done = 0.;
1944 int i;
1945 gmx_bool bPipeOpen;
1946 /* char Buffer0[1000]; */
1948 if (nfiles < 1)
1950 gmx_fatal(FARGS, "No files found. Hick.");
1953 /* if min and max are not given, get min and max from the input files */
1954 if (opt->bAuto)
1956 printf("Automatic determination of boundaries from %d pdo files...\n", nfiles);
1957 opt->min = 1e20;
1958 opt->max = -1e20;
1959 for (i = 0; i < nfiles; ++i)
1961 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
1962 /*fgets(Buffer0,999,file);
1963 fprintf(stderr,"First line '%s'\n",Buffer0); */
1964 done = 100.0*(i+1)/nfiles;
1965 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
1966 if (opt->verbose)
1968 printf("\n");
1970 read_pdo_header(file, header, opt);
1971 /* here only determine min and max of this window */
1972 read_pdo_data(file, header, i, NULL, opt, TRUE, &mintmp, &maxtmp);
1973 if (maxtmp > opt->max)
1975 opt->max = maxtmp;
1977 if (mintmp < opt->min)
1979 opt->min = mintmp;
1981 if (bPipeOpen)
1983 pdo_close_file(file);
1985 else
1987 gmx_ffclose(file);
1990 printf("\n");
1991 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
1992 if (opt->bBoundsOnly)
1994 printf("Found option -boundsonly, now exiting.\n");
1995 exit (0);
1998 /* store stepsize in profile */
1999 opt->dz = (opt->max-opt->min)/opt->bins;
2001 /* Having min and max, we read in all files */
2002 /* Loop over all files */
2003 for (i = 0; i < nfiles; ++i)
2005 done = 100.0*(i+1)/nfiles;
2006 printf("\rOpening %s ... [%2.0f%%]", fn[i], done); fflush(stdout);
2007 if (opt->verbose)
2009 printf("\n");
2011 file = open_pdo_pipe(fn[i], opt, &bPipeOpen);
2012 read_pdo_header(file, header, opt);
2013 /* load data into window */
2014 read_pdo_data(file, header, i, window, opt, FALSE, NULL, NULL);
2015 if ((window+i)->Ntot[0] == 0)
2017 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
2019 if (bPipeOpen)
2021 pdo_close_file(file);
2023 else
2025 gmx_ffclose(file);
2028 printf("\n");
2029 for (i = 0; i < nfiles; ++i)
2031 sfree(fn[i]);
2033 sfree(fn);
2036 //! translate 0/1 to N/Y to write pull dimensions
2037 #define int2YN(a) (((a) == 0) ? ("N") : ("Y"))
2039 //! Read pull groups from a tpr file (including position, force const, geometry, number of groups)
2040 void read_tpr_header(const char *fn, t_UmbrellaHeader* header, t_UmbrellaOptions *opt)
2042 t_inputrec ir;
2043 int i, ncrd;
2044 t_state state;
2045 static int first = 1;
2047 /* printf("Reading %s \n",fn); */
2048 read_tpx_state(fn, &ir, &state, NULL);
2050 if (!ir.bPull)
2052 gmx_fatal(FARGS, "This is not a tpr with COM pulling");
2055 /* nr of pull groups */
2056 ncrd = 0;
2057 for (i = 0; i < ir.pull->ncoord; i++)
2059 if (ir.pull->coord[i].eType == epullUMBRELLA)
2061 int m;
2063 if (ncrd == 0)
2065 header->pull_geometry = ir.pull->coord[i].eGeom;
2066 copy_ivec(ir.pull->coord[i].dim, header->pull_dim);
2069 if (ncrd != i)
2071 /* TODO: remove this restriction */
2072 gmx_fatal(FARGS, "Currently tpr files where a non-umbrella pull coordinate is followed by an umbrella pull coordinate are not supported");
2075 for (m = 0; m < DIM; m++)
2077 if (ir.pull->coord[i].eGeom != header->pull_geometry)
2079 /* TODO: remove the restriction */
2080 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should use the same geometry");
2083 if (ir.pull->coord[i].dim[m] != header->pull_dim[m])
2085 /* TODO: remove the restriction */
2086 gmx_fatal(FARGS, "Currently all umbrella pull coordinates should operate on the same dimensions");
2090 ncrd++;
2094 if (ncrd < 1)
2096 gmx_fatal(FARGS, "This is not a tpr of umbrella simulation. Found only %d umbrella pull coordinates\n", ncrd);
2099 header->npullcrds_tot = ir.pull->ncoord;
2100 header->npullcrds = ncrd;
2101 header->bPrintRef = ir.pull->bPrintCOM1;
2102 if (ir.pull->bPrintCOM2)
2104 gmx_fatal(FARGS, "Reading pull output with printing of the COM of group 2 is currently not supported");
2106 if (ir.pull->bPrintRefValue)
2108 gmx_fatal(FARGS, "Reading pull output with printing of the reference value of the coordinates is currently not supported");
2110 header->bPrintComp = ir.pull->bPrintComp;
2111 header->pull_ndim = header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
2112 /* We should add a struct for each pull coord with all pull coord data
2113 instead of allocating many arrays for each property */
2114 snew(header->k, ncrd);
2115 snew(header->init_dist, ncrd);
2116 snew(header->umbInitDist, ncrd);
2118 /* only z-direction with epullgCYL? */
2119 if (header->pull_geometry == epullgCYL)
2121 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
2123 gmx_fatal(FARGS, "With pull geometry 'cylinder', expected pulling in Z direction only.\n"
2124 "However, found dimensions [%s %s %s]\n",
2125 int2YN(header->pull_dim[XX]), int2YN(header->pull_dim[YY]),
2126 int2YN(header->pull_dim[ZZ]));
2130 for (i = 0; i < ncrd; i++)
2132 header->k[i] = ir.pull->coord[i].k;
2133 if (header->k[i] == 0.0)
2135 gmx_fatal(FARGS, "Pull coordinate %d has force constant of of 0.0 in %s.\n"
2136 "That doesn't seem to be an Umbrella tpr.\n",
2137 i, fn);
2139 header->init_dist[i] = ir.pull->coord[i].init;
2141 /* initial distance to reference */
2142 switch (header->pull_geometry)
2144 case epullgCYL:
2145 /* umbrella distance stored in init_dist[i] for geometry cylinder (not in ...[i][ZZ]) */
2146 case epullgDIST:
2147 case epullgDIR:
2148 case epullgDIRPBC:
2149 header->umbInitDist[i] = header->init_dist[i];
2150 break;
2151 default:
2152 gmx_fatal(FARGS, "Pull geometry %s not supported\n", epullg_names[header->pull_geometry]);
2156 if (opt->verbose || first)
2158 printf("File %s, %d coordinates, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n"
2159 "\tPull group coordinates%s expected in pullx files.\n",
2160 fn, header->npullcrds, epullg_names[header->pull_geometry],
2161 int2YN(header->pull_dim[0]), int2YN(header->pull_dim[1]), int2YN(header->pull_dim[2]),
2162 header->pull_ndim, (header->bPrintRef ? "" : " not"));
2163 for (i = 0; i < ncrd; i++)
2165 printf("\tcrd %d) k = %-5g position = %g\n", i, header->k[i], header->umbInitDist[i]);
2168 if (!opt->verbose && first)
2170 printf("\tUse option -v to see this output for all input tpr files\n\n");
2173 first = 0;
2176 //! 2-norm in a ndim-dimensional space
2177 double dist_ndim(double **dx, int ndim, int line)
2179 int i;
2180 double r2 = 0.;
2181 for (i = 0; i < ndim; i++)
2183 r2 += sqr(dx[i][line]);
2185 return std::sqrt(r2);
2188 //! Read pullx.xvg or pullf.xvg
2189 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
2190 t_UmbrellaWindow * window,
2191 t_UmbrellaOptions *opt,
2192 gmx_bool bGetMinMax, real *mintmp, real *maxtmp,
2193 t_groupselection *groupsel)
2195 double **y = 0, pos = 0., t, force, time0 = 0., dt;
2196 int ny, nt, bins, ibin, i, g, gUsed, dstep = 1, nColPerCrd, nColRefEachCrd, nColExpect, ntot, column;
2197 real min, max, minfound = 1e20, maxfound = -1e20;
2198 gmx_bool dt_ok, timeok, bHaveForce;
2199 const char *quantity;
2200 const int blocklen = 4096;
2201 int *lennow = 0;
2202 static gmx_bool bFirst = TRUE;
2205 * Data columns in pull output:
2206 * - in force output pullf.xvg:
2207 * No reference columns, one column per pull coordinate
2209 * - in position output pullx.xvg
2210 * bPrintRef == TRUE: for each pull coordinate: ndim reference columns, and ndim dx columns
2211 * bPrintRef == FALSE: for each pull coordinate: no reference columns, but ndim dx columns
2214 nColPerCrd = 1;
2215 if (opt->bPullx && header->bPrintComp)
2217 nColPerCrd += header->pull_ndim;
2219 quantity = opt->bPullx ? "position" : "force";
2221 if (opt->bPullx && header->bPrintRef)
2223 nColRefEachCrd = header->pull_ndim;
2225 else
2227 nColRefEachCrd = 0;
2230 nColExpect = 1 + header->npullcrds*(nColRefEachCrd+nColPerCrd);
2231 bHaveForce = opt->bPullf;
2233 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
2234 That avoids the somewhat tedious extraction of the reaction coordinate from the pullx files.
2235 Sorry for the laziness, this is a To-do. */
2236 if ( (header->pull_geometry == epullgDIR || header->pull_geometry == epullgDIRPBC)
2237 && opt->bPullx)
2239 gmx_fatal(FARGS, "With pull geometries \"direction\" and \"direction_periodic\", only pull force "
2240 "reading \n(option -if) is supported at present, "
2241 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
2242 "forces (pullf.xvg files)\nand provide them to gmx wham with option -if.",
2243 epullg_names[header->pull_geometry]);
2246 nt = read_xvg(fn, &y, &ny);
2248 /* Check consistency */
2249 if (nt < 1)
2251 gmx_fatal(FARGS, "Empty pull %s file %s\n", quantity, fn);
2253 if (bFirst)
2255 printf("Reading pull %s file with pull geometry %s and %d pull dimensions\n",
2256 bHaveForce ? "force" : "position", epullg_names[header->pull_geometry],
2257 header->pull_ndim);
2258 /* Since this tool code has not updated yet to allow difference pull
2259 * dimensions per pull coordinate, we can't easily check the exact
2260 * number of expected columns, so we only check what we expect for
2261 * the pull coordinates selected for the WHAM analysis.
2263 printf("Expecting these columns in pull file:\n"
2264 "\t%d reference columns for each individual pull coordinate\n"
2265 "\t%d data columns for each pull coordinate\n", nColRefEachCrd, nColPerCrd);
2266 printf("With %d pull groups, expect %s%d columns (including the time column)\n",
2267 header->npullcrds,
2268 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2269 nColExpect);
2270 bFirst = FALSE;
2272 if (ny < nColExpect ||
2273 (header->npullcrds == header->npullcrds_tot && ny > nColExpect))
2275 gmx_fatal(FARGS, "Using %d pull coodinates from %s,\n but found %d data columns in %s (expected %s%d)\n"
2276 "\nMaybe you confused options -ix and -if ?\n",
2277 header->npullcrds, fntpr, ny-1, fn,
2278 header->npullcrds < header->npullcrds_tot ? "at least " : "",
2279 nColExpect-1);
2282 if (opt->verbose)
2284 printf("Found %d times and %d %s sets %s\n", nt, (ny-1)/nColPerCrd, quantity, fn);
2287 if (!bGetMinMax)
2289 bins = opt->bins;
2290 min = opt->min;
2291 max = opt->max;
2292 if (nt > 1)
2294 window->dt = y[0][1]-y[0][0];
2296 else if (opt->nBootStrap && opt->tauBootStrap != 0.0)
2298 fprintf(stderr, "\n *** WARNING, Could not determine time step in %s\n", fn);
2301 /* Need to alocate memory and set up structure */
2303 if (groupsel)
2305 /* Use only groups selected with option -is file */
2306 if (header->npullcrds != groupsel->n)
2308 gmx_fatal(FARGS, "tpr file contains %d pull groups, but expected %d from group selection file\n",
2309 header->npullcrds, groupsel->n);
2311 window->nPull = groupsel->nUse;
2313 else
2315 window->nPull = header->npullcrds;
2318 window->nBin = bins;
2319 snew(window->Histo, window->nPull);
2320 snew(window->z, window->nPull);
2321 snew(window->k, window->nPull);
2322 snew(window->pos, window->nPull);
2323 snew(window->N, window->nPull);
2324 snew(window->Ntot, window->nPull);
2325 snew(window->g, window->nPull);
2326 snew(window->bsWeight, window->nPull);
2327 window->bContrib = 0;
2329 if (opt->bCalcTauInt)
2331 snew(window->ztime, window->nPull);
2333 else
2335 window->ztime = NULL;
2337 snew(lennow, window->nPull);
2339 for (g = 0; g < window->nPull; ++g)
2341 window->z[g] = 1;
2342 window->bsWeight[g] = 1.;
2343 snew(window->Histo[g], bins);
2344 window->N[g] = 0;
2345 window->Ntot[g] = 0;
2346 window->g[g] = 1.;
2347 if (opt->bCalcTauInt)
2349 window->ztime[g] = NULL;
2353 /* Copying umbrella center and force const is more involved since not
2354 all pull groups from header (tpr file) may be used in window variable */
2355 for (g = 0, gUsed = 0; g < header->npullcrds; ++g)
2357 if (groupsel && (groupsel->bUse[g] == FALSE))
2359 continue;
2361 window->k[gUsed] = header->k[g];
2362 window->pos[gUsed] = header->umbInitDist[g];
2363 gUsed++;
2366 else
2367 { /* only determine min and max */
2368 minfound = 1e20;
2369 maxfound = -1e20;
2370 min = max = bins = 0; /* Get rid of warnings */
2374 for (i = 0; i < nt; i++)
2376 /* Do you want that time frame? */
2377 t = 1.0/1000*( static_cast<gmx_int64_t> ((y[0][i]*1000) + 0.5)); /* round time to fs */
2379 /* get time step of pdo file and get dstep from opt->dt */
2380 if (i == 0)
2382 time0 = t;
2384 else if (i == 1)
2386 dt = t-time0;
2387 if (opt->dt > 0.0)
2389 dstep = static_cast<int>(opt->dt/dt+0.5);
2390 if (dstep == 0)
2392 dstep = 1;
2395 if (!bGetMinMax)
2397 window->dt = dt*dstep;
2401 dt_ok = (i%dstep == 0);
2402 timeok = (dt_ok && t >= opt->tmin && t <= opt->tmax);
2403 /*if (opt->verbose)
2404 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
2405 t,opt->tmin, opt->tmax, dt_ok,timeok); */
2406 if (timeok)
2408 /* Note: if groupsel == NULL:
2409 * all groups in pullf/x file are stored in this window, and gUsed == g
2410 * if groupsel != NULL:
2411 * only groups with groupsel.bUse[g]==TRUE are stored. gUsed is not always equal g
2413 gUsed = -1;
2414 for (g = 0; g < header->npullcrds; ++g)
2416 /* was this group selected for application in WHAM? */
2417 if (groupsel && (groupsel->bUse[g] == FALSE))
2419 continue;
2422 gUsed++;
2424 if (bHaveForce)
2426 /* y has 1 time column y[0] and one column per force y[1],...,y[nCrds] */
2427 force = y[g+1][i];
2428 pos = -force/header->k[g] + header->umbInitDist[g];
2430 else
2432 /* pick the right column from:
2433 * time ref1[ndim] coord1[1(ndim)] ref2[ndim] coord2[1(ndim)] ...
2435 column = 1 + nColRefEachCrd + g*(nColRefEachCrd+nColPerCrd);
2436 pos = y[column][i];
2439 /* printf("crd %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
2440 if (bGetMinMax)
2442 if (pos < minfound)
2444 minfound = pos;
2446 if (pos > maxfound)
2448 maxfound = pos;
2451 else
2453 if (gUsed >= window->nPull)
2455 gmx_fatal(FARGS, "gUsed too large (%d, nPull=%d). This error should have been catched before.\n",
2456 gUsed, window->nPull);
2459 if (opt->bCalcTauInt && !bGetMinMax)
2461 /* save time series for autocorrelation analysis */
2462 ntot = window->Ntot[gUsed];
2463 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
2464 if (ntot >= lennow[gUsed])
2466 lennow[gUsed] += blocklen;
2467 srenew(window->ztime[gUsed], lennow[gUsed]);
2469 window->ztime[gUsed][ntot] = pos;
2472 ibin = static_cast<int> (std::floor((pos-min)/(max-min)*bins));
2473 if (opt->bCycl)
2475 if (ibin < 0)
2477 while ( (ibin += bins) < 0)
2482 else if (ibin >= bins)
2484 while ( (ibin -= bins) >= bins)
2490 if (ibin >= 0 && ibin < bins)
2492 window->Histo[gUsed][ibin] += 1.;
2493 window->N[gUsed]++;
2495 window->Ntot[gUsed]++;
2499 else if (t > opt->tmax)
2501 if (opt->verbose)
2503 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n", t, opt->tmax);
2505 break;
2509 if (bGetMinMax)
2511 *mintmp = minfound;
2512 *maxtmp = maxfound;
2514 sfree(lennow);
2515 for (i = 0; i < ny; i++)
2517 sfree(y[i]);
2521 //! read pullf-files.dat or pullx-files.dat and tpr-files.dat
2522 void read_tpr_pullxf_files(char **fnTprs, char **fnPull, int nfiles,
2523 t_UmbrellaHeader* header,
2524 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2526 int i;
2527 real mintmp, maxtmp;
2529 printf("Reading %d tpr and pullf files\n", nfiles/2);
2531 /* min and max not given? */
2532 if (opt->bAuto)
2534 printf("Automatic determination of boundaries...\n");
2535 opt->min = 1e20;
2536 opt->max = -1e20;
2537 for (i = 0; i < nfiles; i++)
2539 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2541 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2543 read_tpr_header(fnTprs[i], header, opt);
2544 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2546 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2548 read_pull_xf(fnPull[i], fnTprs[i], header, NULL, opt, TRUE, &mintmp, &maxtmp,
2549 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2550 if (maxtmp > opt->max)
2552 opt->max = maxtmp;
2554 if (mintmp < opt->min)
2556 opt->min = mintmp;
2559 printf("\nDetermined boundaries to %f and %f\n\n", opt->min, opt->max);
2560 if (opt->bBoundsOnly)
2562 printf("Found option -boundsonly, now exiting.\n");
2563 exit (0);
2566 /* store stepsize in profile */
2567 opt->dz = (opt->max-opt->min)/opt->bins;
2569 for (i = 0; i < nfiles; i++)
2571 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2573 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a tpr file\n", i);
2575 read_tpr_header(fnTprs[i], header, opt);
2576 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2578 gmx_fatal(FARGS, "Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n", i);
2580 read_pull_xf(fnPull[i], fnTprs[i], header, window+i, opt, FALSE, NULL, NULL,
2581 (opt->nGroupsel > 0) ? &opt->groupsel[i] : NULL);
2582 if (window[i].Ntot[0] == 0)
2584 fprintf(stderr, "\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2588 for (i = 0; i < nfiles; i++)
2590 sfree(fnTprs[i]);
2591 sfree(fnPull[i]);
2593 sfree(fnTprs);
2594 sfree(fnPull);
2597 /*! \brief Read integrated autocorrelation times from input file (option -iiact)
2599 * Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation times.
2600 * The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2602 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins, const char* fn)
2604 int nlines, ny, i, ig;
2605 double **iact;
2607 printf("Readging Integrated autocorrelation times from %s ...\n", fn);
2608 nlines = read_xvg(fn, &iact, &ny);
2609 if (nlines != nwins)
2611 gmx_fatal(FARGS, "Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2612 nlines, fn, nwins);
2614 for (i = 0; i < nlines; i++)
2616 if (window[i].nPull != ny)
2618 gmx_fatal(FARGS, "You are providing autocorrelation times with option -iiact and the\n"
2619 "number of pull groups is different in different simulations. That is not\n"
2620 "supported yet. Sorry.\n");
2622 for (ig = 0; ig < window[i].nPull; ig++)
2624 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2625 window[i].g[ig] = 1+2*iact[ig][i]/window[i].dt;
2627 if (iact[ig][i] <= 0.0)
2629 fprintf(stderr, "\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i], i, ig);
2636 /*! \brief Smooth autocorreltion times along the reaction coordinate.
2638 * This is useful
2639 * if the ACT is subject to high uncertainty in case if limited sampling. Note
2640 * that -in case of limited sampling- the ACT may be severely underestimated.
2641 * Note: the g=1+2tau are overwritten.
2642 * If opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2643 * by the smoothing
2645 void smoothIact(t_UmbrellaWindow *window, int nwins, t_UmbrellaOptions *opt)
2647 int i, ig, j, jg;
2648 double pos, dpos2, siglim, siglim2, gaufact, invtwosig2, w, weight, tausm;
2650 /* only evaluate within +- 3sigma of the Gausian */
2651 siglim = 3.0*opt->sigSmoothIact;
2652 siglim2 = dsqr(siglim);
2653 /* pre-factor of Gaussian */
2654 gaufact = 1.0/(std::sqrt(2*M_PI)*opt->sigSmoothIact);
2655 invtwosig2 = 0.5/dsqr(opt->sigSmoothIact);
2657 for (i = 0; i < nwins; i++)
2659 snew(window[i].tausmooth, window[i].nPull);
2660 for (ig = 0; ig < window[i].nPull; ig++)
2662 tausm = 0.;
2663 weight = 0;
2664 pos = window[i].pos[ig];
2665 for (j = 0; j < nwins; j++)
2667 for (jg = 0; jg < window[j].nPull; jg++)
2669 dpos2 = dsqr(window[j].pos[jg]-pos);
2670 if (dpos2 < siglim2)
2672 w = gaufact*std::exp(-dpos2*invtwosig2);
2673 weight += w;
2674 tausm += w*window[j].tau[jg];
2675 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2676 w,dpos2,pos,gaufact,invtwosig2); */
2680 tausm /= weight;
2681 if (opt->bAllowReduceIact || tausm > window[i].tau[ig])
2683 window[i].tausmooth[ig] = tausm;
2685 else
2687 window[i].tausmooth[ig] = window[i].tau[ig];
2689 window[i].g[ig] = 1+2*tausm/window[i].dt;
2694 //! Stop integrating autoccorelation function when ACF drops under this value
2695 #define WHAM_AC_ZERO_LIMIT 0.05
2697 /*! \brief Try to compute the autocorrelation time for each umbrealla window
2699 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window, int nwins,
2700 t_UmbrellaOptions *opt, const char *fn)
2702 int i, ig, ncorr, ntot, j, k, *count, restart;
2703 real *corr, c0, dt, tmp;
2704 real *ztime, av, tausteps;
2705 FILE *fp, *fpcorr = 0;
2707 if (opt->verbose)
2709 fpcorr = xvgropen("hist_autocorr.xvg", "Autocorrelation functions of umbrella windows",
2710 "time [ps]", "autocorrelation function", opt->oenv);
2713 printf("\n");
2714 for (i = 0; i < nwins; i++)
2716 printf("\rEstimating integrated autocorrelation times ... [%2.0f%%] ...", 100.*(i+1)/nwins);
2717 fflush(stdout);
2718 ntot = window[i].Ntot[0];
2720 /* using half the maximum time as length of autocorrelation function */
2721 ncorr = ntot/2;
2722 if (ntot < 10)
2724 gmx_fatal(FARGS, "Tryig to estimtate autocorrelation time from only %d"
2725 " points. Provide more pull data!", ntot);
2727 snew(corr, ncorr);
2728 /* snew(corrSq,ncorr); */
2729 snew(count, ncorr);
2730 dt = window[i].dt;
2731 snew(window[i].tau, window[i].nPull);
2732 restart = static_cast<int>(opt->acTrestart/dt+0.5);
2733 if (restart == 0)
2735 restart = 1;
2738 for (ig = 0; ig < window[i].nPull; ig++)
2740 if (ntot != window[i].Ntot[ig])
2742 gmx_fatal(FARGS, "Encountered different nr of frames in different pull groups.\n"
2743 "That should not happen. (%d and %d)\n", ntot, window[i].Ntot[ig]);
2745 ztime = window[i].ztime[ig];
2747 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2748 for (j = 0, av = 0; (j < ntot); j++)
2750 av += ztime[j];
2752 av /= ntot;
2753 for (k = 0; (k < ncorr); k++)
2755 corr[k] = 0.;
2756 count[k] = 0;
2758 for (j = 0; (j < ntot); j += restart)
2760 for (k = 0; (k < ncorr) && (j+k < ntot); k++)
2762 tmp = (ztime[j]-av)*(ztime[j+k]-av);
2763 corr [k] += tmp;
2764 /* corrSq[k] += tmp*tmp; */
2765 count[k]++;
2768 /* divide by nr of frames for each time displacement */
2769 for (k = 0; (k < ncorr); k++)
2771 /* count probably = (ncorr-k+(restart-1))/restart; */
2772 corr[k] = corr[k]/count[k];
2773 /* variance of autocorrelation function */
2774 /* corrSq[k]=corrSq[k]/count[k]; */
2776 /* normalize such that corr[0] == 0 */
2777 c0 = 1./corr[0];
2778 for (k = 0; (k < ncorr); k++)
2780 corr[k] *= c0;
2781 /* corrSq[k]*=c0*c0; */
2784 /* write ACFs in verbose mode */
2785 if (fpcorr)
2787 for (k = 0; (k < ncorr); k++)
2789 fprintf(fpcorr, "%g %g\n", k*dt, corr[k]);
2791 fprintf(fpcorr, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2794 /* esimate integrated correlation time, fitting is too unstable */
2795 tausteps = 0.5*corr[0];
2796 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2797 for (j = 1; (j < ncorr) && (corr[j] > WHAM_AC_ZERO_LIMIT); j++)
2799 tausteps += corr[j];
2802 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2803 Kumar et al, eq. 28 ff. */
2804 window[i].tau[ig] = tausteps*dt;
2805 window[i].g[ig] = 1+2*tausteps;
2806 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2807 } /* ig loop */
2808 sfree(corr);
2809 sfree(count);
2811 printf(" done\n");
2812 if (fpcorr)
2814 xvgrclose(fpcorr);
2817 /* plot IACT along reaction coordinate */
2818 fp = xvgropen(fn, "Integrated autocorrelation times", xlabel, "IACT [ps]", opt->oenv);
2819 if (output_env_get_print_xvgr_codes(opt->oenv))
2821 fprintf(fp, "@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2822 fprintf(fp, "# WIN tau(gr1) tau(gr2) ...\n");
2823 for (i = 0; i < nwins; i++)
2825 fprintf(fp, "# %3d ", i);
2826 for (ig = 0; ig < window[i].nPull; ig++)
2828 fprintf(fp, " %11g", window[i].tau[ig]);
2830 fprintf(fp, "\n");
2833 for (i = 0; i < nwins; i++)
2835 for (ig = 0; ig < window[i].nPull; ig++)
2837 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tau[ig]);
2840 if (opt->sigSmoothIact > 0.0)
2842 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2843 opt->sigSmoothIact);
2844 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2845 smoothIact(window, nwins, opt);
2846 fprintf(fp, "%s\n", output_env_get_print_xvgr_codes(opt->oenv) ? "&" : "");
2847 if (output_env_get_print_xvgr_codes(opt->oenv))
2849 fprintf(fp, "@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2850 fprintf(fp, "@ s1 symbol color 2\n");
2852 for (i = 0; i < nwins; i++)
2854 for (ig = 0; ig < window[i].nPull; ig++)
2856 fprintf(fp, "%8g %8g\n", window[i].pos[ig], window[i].tausmooth[ig]);
2860 xvgrclose(fp);
2861 printf("Wrote %s\n", fn);
2864 /*! \brief
2865 * compute average and sigma of each umbrella histogram
2867 void averageSigma(t_UmbrellaWindow *window, int nwins)
2869 int i, ig, ntot, k;
2870 real av, sum2, sig, diff, *ztime, nSamplesIndep;
2872 for (i = 0; i < nwins; i++)
2874 snew(window[i].aver, window[i].nPull);
2875 snew(window[i].sigma, window[i].nPull);
2877 ntot = window[i].Ntot[0];
2878 for (ig = 0; ig < window[i].nPull; ig++)
2880 ztime = window[i].ztime[ig];
2881 for (k = 0, av = 0.; k < ntot; k++)
2883 av += ztime[k];
2885 av /= ntot;
2886 for (k = 0, sum2 = 0.; k < ntot; k++)
2888 diff = ztime[k]-av;
2889 sum2 += diff*diff;
2891 sig = std::sqrt(sum2/ntot);
2892 window[i].aver[ig] = av;
2894 /* Note: This estimate for sigma is biased from the limited sampling.
2895 Correct sigma by n/(n-1) where n = number of independent
2896 samples. Only possible if IACT is known.
2898 if (window[i].tau)
2900 nSamplesIndep = window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2901 window[i].sigma[ig] = sig * nSamplesIndep/(nSamplesIndep-1);
2903 else
2905 window[i].sigma[ig] = sig;
2907 printf("win %d, aver = %f sig = %f\n", i, av, window[i].sigma[ig]);
2913 /*! \brief
2914 * Use histograms to compute average force on pull group.
2916 void computeAverageForce(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
2918 int i, j, bins = opt->bins, k;
2919 double dz, min = opt->min, max = opt->max, displAv, temp, distance, ztot, ztot_half, w, weight;
2920 double posmirrored;
2922 dz = (max-min)/bins;
2923 ztot = opt->max-min;
2924 ztot_half = ztot/2;
2926 /* Compute average displacement from histograms */
2927 for (j = 0; j < nWindows; ++j)
2929 snew(window[j].forceAv, window[j].nPull);
2930 for (k = 0; k < window[j].nPull; ++k)
2932 displAv = 0.0;
2933 weight = 0.0;
2934 for (i = 0; i < opt->bins; ++i)
2936 temp = (1.0*i+0.5)*dz+min;
2937 distance = temp - window[j].pos[k];
2938 if (opt->bCycl)
2939 { /* in cyclic wham: */
2940 if (distance > ztot_half) /* |distance| < ztot_half */
2942 distance -= ztot;
2944 else if (distance < -ztot_half)
2946 distance += ztot;
2949 w = window[j].Histo[k][i]/window[j].g[k];
2950 displAv += w*distance;
2951 weight += w;
2952 /* Are we near min or max? We are getting wrong forces from the histgrams since
2953 the histograms are zero outside [min,max). Therefore, assume that the position
2954 on the other side of the histomgram center is equally likely. */
2955 if (!opt->bCycl)
2957 posmirrored = window[j].pos[k]-distance;
2958 if (posmirrored >= max || posmirrored < min)
2960 displAv += -w*distance;
2961 weight += w;
2965 displAv /= weight;
2967 /* average force from average displacement */
2968 window[j].forceAv[k] = displAv*window[j].k[k];
2969 /* sigma from average square displacement */
2970 /* window[j].sigma [k] = sqrt(displAv2); */
2971 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2976 /*! \brief
2977 * Check if the complete reaction coordinate is covered by the histograms
2979 void checkReactionCoordinateCovered(t_UmbrellaWindow *window, int nwins,
2980 t_UmbrellaOptions *opt)
2982 int i, ig, j, bins = opt->bins, bBoundary;
2983 real avcount = 0, z, relcount, *count;
2984 snew(count, opt->bins);
2986 for (j = 0; j < opt->bins; ++j)
2988 for (i = 0; i < nwins; i++)
2990 for (ig = 0; ig < window[i].nPull; ig++)
2992 count[j] += window[i].Histo[ig][j];
2995 avcount += 1.0*count[j];
2997 avcount /= bins;
2998 for (j = 0; j < bins; ++j)
3000 relcount = count[j]/avcount;
3001 z = (j+0.5)*opt->dz+opt->min;
3002 bBoundary = ( j<bins/20 || (bins-j)>bins/20 );
3003 /* check for bins with no data */
3004 if (count[j] == 0)
3006 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
3007 "You may not get a reasonable profile. Check your histograms!\n", j, z);
3009 /* and check for poor sampling */
3010 else if (relcount < 0.005 && !bBoundary)
3012 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n", j, z);
3015 sfree(count);
3018 /*! \brief Compute initial potential by integrating the average force
3020 * This speeds up the convergence by roughly a factor of 2
3022 void guessPotByIntegration(t_UmbrellaWindow *window, int nWindows, t_UmbrellaOptions *opt)
3024 int i, j, ig, bins = opt->bins, nHist, winmin, groupmin;
3025 double dz, min = opt->min, *pot, pos, hispos, dist, diff, fAv, distmin, *f;
3026 FILE *fp;
3028 dz = (opt->max-min)/bins;
3030 printf("Getting initial potential by integration.\n");
3032 /* Compute average displacement from histograms */
3033 computeAverageForce(window, nWindows, opt);
3035 /* Get force for each bin from all histograms in this bin, or, alternatively,
3036 if no histograms are inside this bin, from the closest histogram */
3037 snew(pot, bins);
3038 snew(f, bins);
3039 for (j = 0; j < opt->bins; ++j)
3041 pos = (1.0*j+0.5)*dz+min;
3042 nHist = 0;
3043 fAv = 0.;
3044 distmin = 1e20;
3045 groupmin = winmin = 0;
3046 for (i = 0; i < nWindows; i++)
3048 for (ig = 0; ig < window[i].nPull; ig++)
3050 hispos = window[i].pos[ig];
3051 dist = std::abs(hispos-pos);
3052 /* average force within bin */
3053 if (dist < dz/2)
3055 nHist++;
3056 fAv += window[i].forceAv[ig];
3058 /* at the same time, remember closest histogram */
3059 if (dist < distmin)
3061 winmin = i;
3062 groupmin = ig;
3063 distmin = dist;
3067 /* if no histogram found in this bin, use closest histogram */
3068 if (nHist > 0)
3070 fAv = fAv/nHist;
3072 else
3074 fAv = window[winmin].forceAv[groupmin];
3076 f[j] = fAv;
3078 for (j = 1; j < opt->bins; ++j)
3080 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
3083 /* cyclic wham: linearly correct possible offset */
3084 if (opt->bCycl)
3086 diff = (pot[bins-1]-pot[0])/(bins-1);
3087 for (j = 1; j < opt->bins; ++j)
3089 pot[j] -= j*diff;
3092 if (opt->verbose)
3094 fp = xvgropen("pmfintegrated.xvg", "PMF from force integration", xlabel, "PMF (kJ/mol)", opt->oenv);
3095 for (j = 0; j < opt->bins; ++j)
3097 fprintf(fp, "%g %g\n", (j+0.5)*dz+opt->min, pot[j]);
3099 xvgrclose(fp);
3100 printf("verbose mode: wrote %s with PMF from interated forces\n", "pmfintegrated.xvg");
3103 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
3104 energy offsets which are usually determined by wham
3105 First: turn pot into probabilities:
3107 for (j = 0; j < opt->bins; ++j)
3109 pot[j] = std::exp(-pot[j]/(BOLTZ*opt->Temperature));
3111 calc_z(pot, window, nWindows, opt, TRUE);
3113 sfree(pot);
3114 sfree(f);
3117 //! Count number of words in an line
3118 static int wordcount(char *ptr)
3120 int i, n, is[2];
3121 int cur = 0;
3123 if (std::strlen(ptr) == 0)
3125 return 0;
3127 /* fprintf(stderr,"ptr='%s'\n",ptr); */
3128 n = 1;
3129 for (i = 0; (ptr[i] != '\0'); i++)
3131 is[cur] = isspace(ptr[i]);
3132 if ((i > 0) && (is[cur] && !is[1-cur]))
3134 n++;
3136 cur = 1-cur;
3138 return n;
3141 /*! \brief Read input file for pull group selection (option -is)
3143 * TO DO: ptr=fgets(...) is never freed (small memory leak)
3145 void readPullGroupSelection(t_UmbrellaOptions *opt, char **fnTpr, int nTpr)
3147 FILE *fp;
3148 int i, iline, n, len = STRLEN, temp;
3149 char *ptr = 0, *tmpbuf = 0;
3150 char fmt[1024], fmtign[1024];
3151 int block = 1, sizenow;
3153 fp = gmx_ffopen(opt->fnGroupsel, "r");
3154 opt->groupsel = NULL;
3156 snew(tmpbuf, len);
3157 sizenow = 0;
3158 iline = 0;
3159 while ( (ptr = fgets3(fp, tmpbuf, &len)) != NULL)
3161 trim(ptr);
3162 n = wordcount(ptr);
3164 if (iline >= sizenow)
3166 sizenow += block;
3167 srenew(opt->groupsel, sizenow);
3169 opt->groupsel[iline].n = n;
3170 opt->groupsel[iline].nUse = 0;
3171 snew(opt->groupsel[iline].bUse, n);
3173 fmtign[0] = '\0';
3174 for (i = 0; i < n; i++)
3176 std::strcpy(fmt, fmtign);
3177 std::strcat(fmt, "%d");
3178 if (sscanf(ptr, fmt, &temp))
3180 opt->groupsel[iline].bUse[i] = (temp > 0);
3181 if (opt->groupsel[iline].bUse[i])
3183 opt->groupsel[iline].nUse++;
3186 std::strcat(fmtign, "%*s");
3188 iline++;
3190 opt->nGroupsel = iline;
3191 if (nTpr != opt->nGroupsel)
3193 gmx_fatal(FARGS, "Found %d tpr files but %d lines in %s\n", nTpr, opt->nGroupsel,
3194 opt->fnGroupsel);
3197 printf("\nUse only these pull groups:\n");
3198 for (iline = 0; iline < nTpr; iline++)
3200 printf("%s (%d of %d groups):", fnTpr[iline], opt->groupsel[iline].nUse, opt->groupsel[iline].n);
3201 for (i = 0; i < opt->groupsel[iline].n; i++)
3203 if (opt->groupsel[iline].bUse[i])
3205 printf(" %d", i+1);
3208 printf("\n");
3210 printf("\n");
3212 sfree(tmpbuf);
3215 //! Boolean XOR
3216 #define WHAMBOOLXOR(a, b) ( ((!(a)) && (b)) || ((a) && (!(b))))
3218 //! Number of elements in fnm (used for command line parsing)
3219 #define NFILE asize(fnm)
3221 //! The main gmx wham routine
3222 int gmx_wham(int argc, char *argv[])
3224 const char *desc[] = {
3225 "[THISMODULE] is an analysis program that implements the Weighted",
3226 "Histogram Analysis Method (WHAM). It is intended to analyze",
3227 "output files generated by umbrella sampling simulations to ",
3228 "compute a potential of mean force (PMF).[PAR]",
3230 "[THISMODULE] is currently not fully up to date. It only supports pull setups",
3231 "where the first pull coordinate(s) is/are umbrella pull coordinates",
3232 "and, if multiple coordinates need to be analyzed, all used the same",
3233 "geometry and dimensions. In most cases this is not an issue.[PAR]",
3235 "At present, three input modes are supported.",
3237 "* With option [TT]-it[tt], the user provides a file which contains the",
3238 " file names of the umbrella simulation run-input files ([REF].tpr[ref] files),",
3239 " AND, with option [TT]-ix[tt], a file which contains file names of",
3240 " the pullx [TT]mdrun[tt] output files. The [REF].tpr[ref] and pullx files must",
3241 " be in corresponding order, i.e. the first [REF].tpr[ref] created the",
3242 " first pullx, etc.",
3243 "* Same as the previous input mode, except that the the user",
3244 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
3245 " From the pull force the position in the umbrella potential is",
3246 " computed. This does not work with tabulated umbrella potentials.",
3247 "* With option [TT]-ip[tt], the user provides file names of (gzipped) [REF].pdo[ref] files, i.e.",
3248 " the GROMACS 3.3 umbrella output files. If you have some unusual"
3249 " reaction coordinate you may also generate your own [REF].pdo[ref] files and",
3250 " feed them with the [TT]-ip[tt] option into to [THISMODULE]. The [REF].pdo[ref] file header",
3251 " must be similar to the following::",
3253 " # UMBRELLA 3.0",
3254 " # Component selection: 0 0 1",
3255 " # nSkip 1",
3256 " # Ref. Group 'TestAtom'",
3257 " # Nr. of pull groups 2",
3258 " # Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0",
3259 " # Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0",
3260 " #####",
3262 " The number of pull groups, umbrella positions, force constants, and names ",
3263 " may (of course) differ. Following the header, a time column and ",
3264 " a data column for each pull group follows (i.e. the displacement",
3265 " with respect to the umbrella center). Up to four pull groups are possible ",
3266 " per [REF].pdo[ref] file at present.",
3268 "By default, all pull groups found in all pullx/pullf files are used in WHAM. If only ",
3269 "some of the pull groups should be used, a pull group selection file (option [TT]-is[tt]) can ",
3270 "be provided. The selection file must contain one line for each tpr file in tpr-files.dat.",
3271 "Each of these lines must contain one digit (0 or 1) for each pull group in the tpr file. ",
3272 "Here, 1 indicates that the pull group is used in WHAM, and 0 means it is omitted. Example:",
3273 "If you have three tpr files, each containing 4 pull groups, but only pull group 1 and 2 should be ",
3274 "used, groupsel.dat looks like this::",
3276 " 1 1 0 0",
3277 " 1 1 0 0",
3278 " 1 1 0 0",
3280 "By default, the output files are",
3282 "* [TT]-o[tt] PMF output file",
3283 "* [TT]-hist[tt] Histograms output file",
3285 "Always check whether the histograms sufficiently overlap.[PAR]",
3286 "The umbrella potential is assumed to be harmonic and the force constants are ",
3287 "read from the [REF].tpr[ref] or [REF].pdo[ref] files. If a non-harmonic umbrella force was applied ",
3288 "a tabulated potential can be provided with [TT]-tab[tt].",
3290 "WHAM options",
3291 "^^^^^^^^^^^^",
3293 "* [TT]-bins[tt] Number of bins used in analysis",
3294 "* [TT]-temp[tt] Temperature in the simulations",
3295 "* [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance",
3296 "* [TT]-auto[tt] Automatic determination of boundaries",
3297 "* [TT]-min,-max[tt] Boundaries of the profile",
3299 "The data points that are used to compute the profile",
3300 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
3301 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
3302 "umbrella window.[PAR]",
3303 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
3304 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
3305 "With energy output, the energy in the first bin is defined to be zero. ",
3306 "If you want the free energy at a different ",
3307 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
3308 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
3309 "without osmotic gradient), the option [TT]-cycl[tt] is useful.",
3310 "[THISMODULE] will make use of the",
3311 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
3312 "reaction coordinate will assumed be be neighbors.[PAR]",
3313 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
3314 "which may be useful for, e.g. membranes.",
3316 "Parallelization",
3317 "^^^^^^^^^^^^^^^",
3319 "If available, the number of OpenMP threads used by gmx wham is controlled with [TT]-nt[tt].",
3321 "Autocorrelations",
3322 "^^^^^^^^^^^^^^^^",
3324 "With [TT]-ac[tt], [THISMODULE] estimates the integrated autocorrelation ",
3325 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
3326 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
3327 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
3328 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
3329 "Because the IACTs can be severely underestimated in case of limited ",
3330 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
3331 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
3332 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
3333 "integration of the ACFs while the ACFs are larger 0.05.",
3334 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
3335 "less robust) method such as fitting to a double exponential, you can ",
3336 "compute the IACTs with [gmx-analyze] and provide them to [THISMODULE] with the file ",
3337 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
3338 "input file ([REF].pdo[ref] or pullx/f file) and one column per pull group in the respective file.",
3340 "Error analysis",
3341 "^^^^^^^^^^^^^^",
3343 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
3344 "otherwise the statistical error may be substantially underestimated. ",
3345 "More background and examples for the bootstrap technique can be found in ",
3346 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.",
3347 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
3348 "Four bootstrapping methods are supported and ",
3349 "selected with [TT]-bs-method[tt].",
3351 "* [TT]b-hist[tt] Default: complete histograms are considered as independent ",
3352 " data points, and the bootstrap is carried out by assigning random weights to the ",
3353 " histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
3354 " must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
3355 " statistical error is underestimated.",
3356 "* [TT]hist[tt] Complete histograms are considered as independent data points. ",
3357 " For each bootstrap, N histograms are randomly chosen from the N given histograms ",
3358 " (allowing duplication, i.e. sampling with replacement).",
3359 " To avoid gaps without data along the reaction coordinate blocks of histograms ",
3360 " ([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
3361 " divided into blocks and only histograms within each block are mixed. Note that ",
3362 " the histograms within each block must be representative for all possible histograms, ",
3363 " otherwise the statistical error is underestimated.",
3364 "* [TT]traj[tt] The given histograms are used to generate new random trajectories,",
3365 " such that the generated data points are distributed according the given histograms ",
3366 " and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
3367 " known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
3368 " windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
3369 " Note that this method may severely underestimate the error in case of limited sampling, ",
3370 " that is if individual histograms do not represent the complete phase space at ",
3371 " the respective positions.",
3372 "* [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
3373 " not bootstrapped from the umbrella histograms but from Gaussians with the average ",
3374 " and width of the umbrella histograms. That method yields similar error estimates ",
3375 " like method [TT]traj[tt].",
3377 "Bootstrapping output:",
3379 "* [TT]-bsres[tt] Average profile and standard deviations",
3380 "* [TT]-bsprof[tt] All bootstrapping profiles",
3382 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
3383 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
3384 "the histograms."
3387 const char *en_unit[] = {NULL, "kJ", "kCal", "kT", NULL};
3388 const char *en_unit_label[] = {"", "E (kJ mol\\S-1\\N)", "E (kcal mol\\S-1\\N)", "E (kT)", NULL};
3389 const char *en_bsMethod[] = { NULL, "b-hist", "hist", "traj", "traj-gauss", NULL };
3390 static t_UmbrellaOptions opt;
3391 static int nthreads = -1;
3393 t_pargs pa[] = {
3394 #ifdef GMX_OPENMP
3395 { "-nt", FALSE, etINT, {&nthreads},
3396 "Number of threads used by gmx wham (if -1, all threads will be used or what is specified by the environment variable OMP_NUM_THREADS)"},
3397 #endif
3398 { "-min", FALSE, etREAL, {&opt.min},
3399 "Minimum coordinate in profile"},
3400 { "-max", FALSE, etREAL, {&opt.max},
3401 "Maximum coordinate in profile"},
3402 { "-auto", FALSE, etBOOL, {&opt.bAuto},
3403 "Determine min and max automatically"},
3404 { "-bins", FALSE, etINT, {&opt.bins},
3405 "Number of bins in profile"},
3406 { "-temp", FALSE, etREAL, {&opt.Temperature},
3407 "Temperature"},
3408 { "-tol", FALSE, etREAL, {&opt.Tolerance},
3409 "Tolerance"},
3410 { "-v", FALSE, etBOOL, {&opt.verbose},
3411 "Verbose mode"},
3412 { "-b", FALSE, etREAL, {&opt.tmin},
3413 "First time to analyse (ps)"},
3414 { "-e", FALSE, etREAL, {&opt.tmax},
3415 "Last time to analyse (ps)"},
3416 { "-dt", FALSE, etREAL, {&opt.dt},
3417 "Analyse only every dt ps"},
3418 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
3419 "Write histograms and exit"},
3420 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
3421 "Determine min and max and exit (with [TT]-auto[tt])"},
3422 { "-log", FALSE, etBOOL, {&opt.bLog},
3423 "Calculate the log of the profile before printing"},
3424 { "-unit", FALSE, etENUM, {en_unit},
3425 "Energy unit in case of log output" },
3426 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
3427 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
3428 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
3429 "Create cyclic/periodic profile. Assumes min and max are the same point."},
3430 { "-sym", FALSE, etBOOL, {&opt.bSym},
3431 "Symmetrize profile around z=0"},
3432 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
3433 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
3434 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
3435 "Calculate integrated autocorrelation times and use in wham"},
3436 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
3437 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
3438 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
3439 "When computing autocorrelation functions, restart computing every .. (ps)"},
3440 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
3441 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
3442 "during smoothing"},
3443 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
3444 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
3445 { "-bs-method", FALSE, etENUM, {en_bsMethod},
3446 "Bootstrap method" },
3447 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
3448 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
3449 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
3450 "Seed for bootstrapping. (-1 = use time)"},
3451 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
3452 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
3453 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
3454 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
3455 { "-stepout", FALSE, etINT, {&opt.stepchange},
3456 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
3457 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
3458 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
3461 t_filenm fnm[] = {
3462 { efDAT, "-ix", "pullx-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3463 { efDAT, "-if", "pullf-files", ffOPTRD}, /* wham input: pullf.xvg's and tprs */
3464 { efDAT, "-it", "tpr-files", ffOPTRD}, /* wham input: tprs */
3465 { efDAT, "-ip", "pdo-files", ffOPTRD}, /* wham input: pdo files (gmx3 style) */
3466 { efDAT, "-is", "groupsel", ffOPTRD}, /* input: select pull groups to use */
3467 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
3468 { efXVG, "-hist", "histo", ffWRITE}, /* output file for histograms */
3469 { efXVG, "-oiact", "iact", ffOPTWR}, /* writing integrated autocorrelation times */
3470 { efDAT, "-iiact", "iact-in", ffOPTRD}, /* reading integrated autocorrelation times */
3471 { efXVG, "-bsres", "bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
3472 { efXVG, "-bsprof", "bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
3473 { efDAT, "-tab", "umb-pot", ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
3476 int i, j, l, nfiles, nwins, nfiles2;
3477 t_UmbrellaHeader header;
3478 t_UmbrellaWindow * window = NULL;
3479 double *profile, maxchange = 1e20;
3480 gmx_bool bMinSet, bMaxSet, bAutoSet, bExact = FALSE;
3481 char **fninTpr, **fninPull, **fninPdo;
3482 const char *fnPull;
3483 FILE *histout, *profout;
3484 char ylabel[256], title[256];
3486 opt.bins = 200;
3487 opt.verbose = FALSE;
3488 opt.bHistOnly = FALSE;
3489 opt.bCycl = FALSE;
3490 opt.tmin = 50;
3491 opt.tmax = 1e20;
3492 opt.dt = 0.0;
3493 opt.min = 0;
3494 opt.max = 0;
3495 opt.bAuto = TRUE;
3496 opt.nGroupsel = 0;
3498 /* bootstrapping stuff */
3499 opt.nBootStrap = 0;
3500 opt.bsMethod = bsMethod_hist;
3501 opt.tauBootStrap = 0.0;
3502 opt.bsSeed = -1;
3503 opt.histBootStrapBlockLength = 8;
3504 opt.bs_verbose = FALSE;
3506 opt.bLog = TRUE;
3507 opt.unit = en_kJ;
3508 opt.zProf0 = 0.;
3509 opt.Temperature = 298;
3510 opt.Tolerance = 1e-6;
3511 opt.bBoundsOnly = FALSE;
3512 opt.bSym = FALSE;
3513 opt.bCalcTauInt = FALSE;
3514 opt.sigSmoothIact = 0.0;
3515 opt.bAllowReduceIact = TRUE;
3516 opt.bInitPotByIntegration = TRUE;
3517 opt.acTrestart = 1.0;
3518 opt.stepchange = 100;
3519 opt.stepUpdateContrib = 100;
3521 if (!parse_common_args(&argc, argv, 0,
3522 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &opt.oenv))
3524 return 0;
3527 opt.unit = nenum(en_unit);
3528 opt.bsMethod = nenum(en_bsMethod);
3530 opt.bProf0Set = opt2parg_bSet("-zprof0", asize(pa), pa);
3532 opt.bTab = opt2bSet("-tab", NFILE, fnm);
3533 opt.bPdo = opt2bSet("-ip", NFILE, fnm);
3534 opt.bTpr = opt2bSet("-it", NFILE, fnm);
3535 opt.bPullx = opt2bSet("-ix", NFILE, fnm);
3536 opt.bPullf = opt2bSet("-if", NFILE, fnm);
3537 opt.bTauIntGiven = opt2bSet("-iiact", NFILE, fnm);
3538 if (opt.bTab && opt.bPullf)
3540 gmx_fatal(FARGS, "Force input does not work with tabulated potentials. "
3541 "Provide pullx.xvg or pdo files!");
3544 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx, opt.bPullf))
3546 gmx_fatal(FARGS, "Give either pullx (-ix) OR pullf (-if) data. Not both.");
3548 if (!opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
3550 gmx_fatal(FARGS, "gmx wham supports three input modes, pullx, pullf, or pdo file input."
3551 "\n\n Check gmx wham -h !");
3554 opt.fnPdo = opt2fn("-ip", NFILE, fnm);
3555 opt.fnTpr = opt2fn("-it", NFILE, fnm);
3556 opt.fnPullf = opt2fn("-if", NFILE, fnm);
3557 opt.fnPullx = opt2fn("-ix", NFILE, fnm);
3558 opt.fnGroupsel = opt2fn_null("-is", NFILE, fnm);
3560 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
3561 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
3562 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
3563 if ( (bMinSet || bMaxSet) && bAutoSet)
3565 gmx_fatal(FARGS, "With -auto, do not give -min or -max\n");
3568 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
3570 gmx_fatal(FARGS, "When giving -min, you must give -max (and vice versa), too\n");
3573 if (bMinSet && opt.bAuto)
3575 printf("Note: min and max given, switching off -auto.\n");
3576 opt.bAuto = FALSE;
3579 if (opt.bTauIntGiven && opt.bCalcTauInt)
3581 gmx_fatal(FARGS, "Either read (option -iiact) or calculate (option -ac) the\n"
3582 "the autocorrelation times. Not both.");
3585 if (opt.tauBootStrap > 0.0 && opt2parg_bSet("-ac", asize(pa), pa))
3587 gmx_fatal(FARGS, "Either compute autocorrelation times (ACTs) (option -ac) or "
3588 "provide it with -bs-tau for bootstrapping. Not Both.\n");
3590 if (opt.tauBootStrap > 0.0 && opt2bSet("-iiact", NFILE, fnm))
3592 gmx_fatal(FARGS, "Either provide autocorrelation times (ACTs) with file iact-in.dat "
3593 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
3596 /* Set # of OpenMP threads */
3597 if (nthreads > 0)
3599 gmx_omp_set_num_threads(nthreads);
3601 else
3603 nthreads = gmx_omp_get_max_threads();
3605 if (nthreads > 1)
3607 printf("\nNote: Will use %d OpenMP threads.\n\n", nthreads);
3610 /* Reading gmx4 pull output and tpr files */
3611 if (opt.bTpr || opt.bPullf || opt.bPullx)
3613 read_wham_in(opt.fnTpr, &fninTpr, &nfiles, &opt);
3615 fnPull = opt.bPullf ? opt.fnPullf : opt.fnPullx;
3616 read_wham_in(fnPull, &fninPull, &nfiles2, &opt);
3617 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
3618 nfiles, nfiles2, opt.bPullf ? "force" : "position", opt.fnTpr, fnPull);
3619 if (nfiles != nfiles2)
3621 gmx_fatal(FARGS, "Found %d file names in %s, but %d in %s\n", nfiles,
3622 opt.fnTpr, nfiles2, fnPull);
3625 /* Read file that selects the pull group to be used */
3626 if (opt.fnGroupsel != NULL)
3628 readPullGroupSelection(&opt, fninTpr, nfiles);
3631 window = initUmbrellaWindows(nfiles);
3632 read_tpr_pullxf_files(fninTpr, fninPull, nfiles, &header, window, &opt);
3634 else
3635 { /* reading pdo files */
3636 if (opt.fnGroupsel != NULL)
3638 gmx_fatal(FARGS, "Reading a -is file is not supported with PDO input files.\n"
3639 "Use awk or a similar tool to pick the required pull groups from your PDO files\n");
3641 read_wham_in(opt.fnPdo, &fninPdo, &nfiles, &opt);
3642 printf("Found %d pdo files in %s\n", nfiles, opt.fnPdo);
3643 window = initUmbrellaWindows(nfiles);
3644 read_pdo_files(fninPdo, nfiles, &header, window, &opt);
3646 nwins = nfiles;
3648 /* enforce equal weight for all histograms? */
3649 if (opt.bHistEq)
3651 enforceEqualWeights(window, nwins);
3654 /* write histograms */
3655 histout = xvgropen(opt2fn("-hist", NFILE, fnm), "Umbrella histograms",
3656 xlabel, "count", opt.oenv);
3657 for (l = 0; l < opt.bins; ++l)
3659 fprintf(histout, "%e\t", (l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
3660 for (i = 0; i < nwins; ++i)
3662 for (j = 0; j < window[i].nPull; ++j)
3664 fprintf(histout, "%e\t", window[i].Histo[j][l]);
3667 fprintf(histout, "\n");
3669 xvgrclose(histout);
3670 printf("Wrote %s\n", opt2fn("-hist", NFILE, fnm));
3671 if (opt.bHistOnly)
3673 printf("Wrote histograms to %s, now exiting.\n", opt2fn("-hist", NFILE, fnm));
3674 return 0;
3677 /* Using tabulated umbrella potential */
3678 if (opt.bTab)
3680 setup_tab(opt2fn("-tab", NFILE, fnm), &opt);
3683 /* Integrated autocorrelation times provided ? */
3684 if (opt.bTauIntGiven)
3686 readIntegratedAutocorrelationTimes(window, nwins, opt2fn("-iiact", NFILE, fnm));
3689 /* Compute integrated autocorrelation times */
3690 if (opt.bCalcTauInt)
3692 calcIntegratedAutocorrelationTimes(window, nwins, &opt, opt2fn("-oiact", NFILE, fnm));
3695 /* calc average and sigma for each histogram
3696 (maybe required for bootstrapping. If not, this is fast anyhow) */
3697 if (opt.nBootStrap && opt.bsMethod == bsMethod_trajGauss)
3699 averageSigma(window, nwins);
3702 /* Get initial potential by simple integration */
3703 if (opt.bInitPotByIntegration)
3705 guessPotByIntegration(window, nwins, &opt);
3708 /* Check if complete reaction coordinate is covered */
3709 checkReactionCoordinateCovered(window, nwins, &opt);
3711 /* Calculate profile */
3712 snew(profile, opt.bins);
3713 if (opt.verbose)
3715 opt.stepchange = 1;
3717 i = 0;
3720 if ( (i%opt.stepUpdateContrib) == 0)
3722 setup_acc_wham(profile, window, nwins, &opt);
3724 if (maxchange < opt.Tolerance)
3726 bExact = TRUE;
3727 /* if (opt.verbose) */
3728 printf("Switched to exact iteration in iteration %d\n", i);
3730 calc_profile(profile, window, nwins, &opt, bExact);
3731 if (((i%opt.stepchange) == 0 || i == 1) && i != 0)
3733 printf("\t%4d) Maximum change %e\n", i, maxchange);
3735 i++;
3737 while ( (maxchange = calc_z(profile, window, nwins, &opt, bExact)) > opt.Tolerance || !bExact);
3738 printf("Converged in %d iterations. Final maximum change %g\n", i, maxchange);
3740 /* calc error from Kumar's formula */
3741 /* Unclear how the error propagates along reaction coordinate, therefore
3742 commented out */
3743 /* calc_error_kumar(profile,window, nwins,&opt); */
3745 /* Write profile in energy units? */
3746 if (opt.bLog)
3748 prof_normalization_and_unit(profile, &opt);
3749 std::strcpy(ylabel, en_unit_label[opt.unit]);
3750 std::strcpy(title, "Umbrella potential");
3752 else
3754 std::strcpy(ylabel, "Density of states");
3755 std::strcpy(title, "Density of states");
3758 /* symmetrize profile around z=0? */
3759 if (opt.bSym)
3761 symmetrizeProfile(profile, &opt);
3764 /* write profile or density of states */
3765 profout = xvgropen(opt2fn("-o", NFILE, fnm), title, xlabel, ylabel, opt.oenv);
3766 for (i = 0; i < opt.bins; ++i)
3768 fprintf(profout, "%e\t%e\n", (i+0.5)/opt.bins*(opt.max-opt.min)+opt.min, profile[i]);
3770 xvgrclose(profout);
3771 printf("Wrote %s\n", opt2fn("-o", NFILE, fnm));
3773 /* Bootstrap Method */
3774 if (opt.nBootStrap)
3776 do_bootstrapping(opt2fn("-bsres", NFILE, fnm), opt2fn("-bsprof", NFILE, fnm),
3777 opt2fn("-hist", NFILE, fnm),
3778 ylabel, profile, window, nwins, &opt);
3781 sfree(profile);
3782 freeUmbrellaWindows(window, nfiles);
3784 printf("\nIn case you use results from gmx wham for a publication, please cite:\n");
3785 please_cite(stdout, "Hub2010");
3787 return 0;