Fixes #882 - looping bug in trxio.c
[gromacs.git] / src / tools / gmx_wham.c
blob68b4b7ef3e08b5d267e64e9ce3c98801bd05b9e2
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
2 /*
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40 #include <stdio.h>
42 #include "statutil.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "vec.h"
46 #include "copyrite.h"
47 #include "statutil.h"
48 #include "tpxio.h"
49 #include "names.h"
50 #include "gmx_random.h"
51 #include "gmx_ana.h"
53 #ifndef HAVE_STRDUP
54 #define HAVE_STRDUP
55 #endif
56 #include "string2.h"
57 #include "xvgr.h"
60 #define WHAM_MAXFILELEN 2048
62 /* enum for energy units */
63 enum { enSel, en_kJ, en_kCal, en_kT, enNr };
64 /* enum for type of input files (pdos, tpr, or pullf) */
65 enum { whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo };
66 /* enum for bootstrapping method (
67 - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
68 - bootstrap complete histograms
69 - bootstrap trajectories from given umbrella histograms
70 - bootstrap trajectories from Gaussian with mu/sigam computed from
71 the respective histogram
73 ********************************************************************
74 FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
75 JS Hub, BL de Groot, D van der Spoel
76 [TT]g_wham[tt] - A free weighted histogram analysis implementation including
77 robust error and autocorrelation estimates,
78 J Chem Theory Comput, accepted (2010)
79 ********************************************************************
81 enum { bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
82 bsMethod_traj, bsMethod_trajGauss };
85 typedef struct
87 /* umbrella with pull code of gromacs 4.x */
88 int npullgrps; /* nr of pull groups in tpr file */
89 int pull_geometry; /* such as distance, position */
90 ivec pull_dim; /* pull dimension with geometry distance */
91 int pull_ndim; /* nr of pull_dim != 0 */
92 real *k; /* force constants in tpr file */
93 rvec *init_dist; /* reference displacements */
94 real *umbInitDist; /* reference displacement in umbrella direction */
96 /* From here, old pdo stuff */
97 int nSkip;
98 char Reference[256];
99 int nPull;
100 int nDim;
101 ivec Dims;
102 char PullName[4][256];
103 double UmbPos[4][3];
104 double UmbCons[4][3];
105 } t_UmbrellaHeader;
107 typedef struct
109 int nPull; /* nr of pull groups in this pdo or pullf/x file */
110 double **Histo,**cum; /* nPull histograms and nPull cumulative distr. funct */
111 int nBin; /* nr of bins. identical to opt->bins */
112 double *k; /* force constants for the nPull groups */
113 double *pos; /* umbrella positions for the nPull groups */
114 double *z; /* z=(-Fi/kT) for the nPull groups. These values are
115 iteratively computed during wham */
116 double *N, *Ntot; /* nr of data points in nPull histograms. N and Ntot
117 only differ if bHistEq==TRUE */
119 double *g,*tau,*tausmooth; /* g = 1 + 2*tau[int]/dt where tau is the integrated
120 autocorrelation time. Compare, e.g.
121 Ferrenberg/Swendsen, PRL 63:1195 (1989)
122 Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28 */
124 double dt; /* timestep in the input data. Can be adapted with
125 g_wham option -dt */
126 gmx_bool **bContrib; /* TRUE, if any data point of the histogram is within min
127 and max, otherwise FALSE. */
128 real **ztime; /* input data z(t) as a function of time. Required to
129 compute ACTs */
130 real *forceAv; /* average force estimated from average displacement, fAv=dzAv*k
131 Used for integration to guess the potential. */
132 real *aver,*sigma; /* average and stddev of histograms */
133 double *bsWeight; /* for bootstrapping complete histograms with continuous weights */
134 } t_UmbrellaWindow;
137 typedef struct
139 /* INPUT STUFF */
140 const char *fnTpr,*fnPullf;
141 const char *fnPdo,*fnPullx; /* file names of input */
142 gmx_bool bTpr,bPullf,bPdo,bPullx;/* input file types given? */
143 real tmin, tmax, dt; /* only read input within tmin and tmax with dt */
145 gmx_bool bInitPotByIntegration; /* before WHAM, guess potential by force integration. Yields
146 1.5 to 2 times faster convergence */
147 int stepUpdateContrib; /* update contribution table every ... iterations. Accelerates
148 WHAM. */
150 /* BASIC WHAM OPTIONS */
151 int bins; /* nr of bins, min, max, and dz of profile */
152 real min,max,dz;
153 real Temperature,Tolerance; /* temperature, converged when probability changes less
154 than Tolerance */
155 gmx_bool bCycl; /* generate cyclic (periodic) PMF */
157 /* OUTPUT CONTROL */
158 gmx_bool bLog; /* energy output (instead of probability) for profile */
159 int unit; /* unit for PMF output kJ/mol or kT or kCal/mol */
160 gmx_bool bSym; /* symmetrize PMF around z=0 after WHAM, useful for
161 membranes etc. */
162 real zProf0; /* after wham, set prof to zero at this z-position
163 When bootstrapping, set zProf0 to a "stable" reference
164 position. */
165 gmx_bool bProf0Set; /* setting profile to 0 at zProf0? */
167 gmx_bool bBoundsOnly,bHistOnly; /* determine min and max, or write histograms and exit */
168 gmx_bool bAuto; /* determine min and max automatically but do not exit */
170 gmx_bool verbose; /* more noisy wham mode */
171 int stepchange; /* print maximum change in prof after how many interations */
172 output_env_t oenv; /* xvgr options */
174 /* AUTOCORRELATION STUFF */
175 gmx_bool bTauIntGiven,bCalcTauInt;/* IACT given or should be calculated? */
176 real sigSmoothIact; /* sigma of Gaussian to smooth ACTs */
177 gmx_bool bAllowReduceIact; /* Allow to reduce ACTs during smoothing. Otherwise
178 ACT are only increased during smoothing */
179 real acTrestart; /* when computing ACT, time between restarting points */
180 gmx_bool bHistEq; /* Enforce the same weight for each umbella window, that is
181 calculate with the same number of data points for
182 each window. That can be reasonable, if the histograms
183 have different length, but due to autocorrelation,
184 a longer simulation should not have larger weightin wham. */
186 /* BOOTSTRAPPING STUFF */
187 int nBootStrap; /* nr of bootstraps (50 is usually enough) */
188 int bsMethod; /* if == bsMethod_hist, consider complete histograms as independent
189 data points and, hence, only mix complete histograms.
190 if == bsMethod_BayesianHist, consider complete histograms
191 as independent data points, but assign random weights
192 to the histograms during the bootstrapping ("Bayesian bootstrap")
193 In case of long correlations (e.g., inside a channel), these
194 will yield a more realistic error.
195 if == bsMethod_traj(Gauss), generate synthetic histograms
196 for each given
197 histogram by generating an autocorrelated random sequence
198 that is distributed according to the respective given
199 histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
200 (instead of from the umbrella histogram) to generate a new
201 histogram
203 real tauBootStrap; /* autocorrelation time (ACT) used to generate synthetic
204 histograms. If ==0, use calculated ACF */
205 int histBootStrapBlockLength; /* when mixing histograms, mix only histograms withing blocks
206 long the reaction coordinate xi. Avoids gaps along xi. */
207 int bsSeed; /* random seed for bootstrapping */
208 gmx_bool bs_verbose; /* Write cumulative distribution functions (CDFs) of histograms
209 and write the generated histograms for each bootstrap */
211 /* tabulated umbrella potential stuff */
212 gmx_bool bTab;
213 double *tabX,*tabY,tabMin,tabMax,tabDz;
214 int tabNbins;
216 gmx_rng_t rng; /* gromacs random number generator */
217 } t_UmbrellaOptions;
220 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
222 t_UmbrellaWindow *win;
223 int i;
224 snew(win,nwin);
225 for (i=0; i<nwin; i++)
227 win[i].Histo = win[i].cum = 0;
228 win[i].k = win[i].pos = win[i].z =0;
229 win[i].N = win[i].Ntot = 0;
230 win[i].g = win[i].tau = win[i].tausmooth = 0;
231 win[i].bContrib=0;
232 win[i].ztime=0;
233 win[i].forceAv=0;
234 win[i].aver = win[i].sigma = 0;
235 win[i].bsWeight = 0;
237 return win;
240 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
242 int i,j;
243 for (i=0; i<nwin; i++)
245 if (win[i].Histo)
246 for (j=0;j<win[i].nPull;j++)
247 sfree(win[i].Histo[j]);
248 if (win[i].cum)
249 for (j=0;j<win[i].nPull;j++)
250 sfree(win[i].cum[j]);
251 if (win[i].bContrib)
252 for (j=0;j<win[i].nPull;j++)
253 sfree(win[i].bContrib[j]);
254 sfree(win[i].Histo);
255 sfree(win[i].cum);
256 sfree(win[i].k);
257 sfree(win[i].pos);
258 sfree(win[i].z);
259 sfree(win[i].N);
260 sfree(win[i].Ntot);
261 sfree(win[i].g);
262 sfree(win[i].tau);
263 sfree(win[i].tausmooth);
264 sfree(win[i].bContrib);
265 sfree(win[i].ztime);
266 sfree(win[i].forceAv);
267 sfree(win[i].aver);
268 sfree(win[i].sigma);
269 sfree(win[i].bsWeight);
271 sfree(win);
274 /* Return j such that xx[j] <= x < xx[j+1] */
275 void searchOrderedTable(double xx[], int n, double x, int *j)
277 int ju,jm,jl;
278 int ascending;
280 jl=-1;
281 ju=n;
282 ascending=(xx[n-1] > xx[0]);
283 while (ju-jl > 1)
285 jm=(ju+jl) >> 1;
286 if ((x >= xx[jm]) == ascending)
287 jl=jm;
288 else
289 ju=jm;
291 if (x==xx[0]) *j=0;
292 else if (x==xx[n-1]) *j=n-2;
293 else *j=jl;
296 /* Read and setup tabulated umbrella potential */
297 void setup_tab(const char *fn,t_UmbrellaOptions *opt)
299 int i,ny,nl;
300 double **y;
302 printf("Setting up tabulated potential from file %s\n",fn);
303 nl=read_xvg(fn,&y,&ny);
304 opt->tabNbins=nl;
305 if (ny!=2)
306 gmx_fatal(FARGS,"Found %d columns in %s. Expected 2.\n",ny,fn);
307 opt->tabMin=y[0][0];
308 opt->tabMax=y[0][nl-1];
309 opt->tabDz=(opt->tabMax-opt->tabMin)/(nl-1);
310 if (opt->tabDz<=0)
311 gmx_fatal(FARGS,"The tabulated potential in %s must be provided in \n"
312 "ascending z-direction",fn);
313 for (i=0;i<nl-1;i++)
314 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
315 gmx_fatal(FARGS,"z-values in %s are not equally spaced.\n",ny,fn);
316 snew(opt->tabY,nl);
317 snew(opt->tabX,nl);
318 for (i=0;i<nl;i++)
320 opt->tabX[i]=y[0][i];
321 opt->tabY[i]=y[1][i];
323 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
324 opt->tabMin,opt->tabMax,opt->tabDz);
327 void read_pdo_header(FILE * file,t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
329 char line[2048];
330 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
331 int i;
333 /* line 1 */
334 if (fgets(line,2048,file) == NULL)
335 gmx_fatal(FARGS,"Error reading header from pdo file\n");
336 sscanf(line,"%s%s%s",Buffer0,Buffer1,Buffer2);
337 if(strcmp(Buffer1,"UMBRELLA"))
338 gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s\n"
339 "(Found in first line: `%s')\n",
340 Buffer1, "UMBRELLA",line);
341 if(strcmp(Buffer2,"3.0"))
342 gmx_fatal(FARGS,"This does not appear to be a version 3.0 pdo file");
344 /* line 2 */
345 if (fgets(line,2048,file) == NULL)
346 gmx_fatal(FARGS,"Error reading header from pdo file\n");
347 sscanf(line,"%s%s%s%d%d%d",Buffer0,Buffer1,Buffer2,
348 &(header->Dims[0]),&(header->Dims[1]),&(header->Dims[2]));
350 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
352 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
353 if(header->nDim!=1)
354 gmx_fatal(FARGS,"Currently only supports one dimension");
356 /* line3 */
357 if (fgets(line,2048,file) == NULL)
358 gmx_fatal(FARGS,"Error reading header from pdo file\n");
359 sscanf(line,"%s%s%d",Buffer0,Buffer1,&(header->nSkip));
361 /* line 4 */
362 if (fgets(line,2048,file) == NULL)
363 gmx_fatal(FARGS,"Error reading header from pdo file\n");
364 sscanf(line,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->Reference);
366 /* line 5 */
367 if (fgets(line,2048,file) == NULL)
368 gmx_fatal(FARGS,"Error reading header from pdo file\n");
369 sscanf(line,"%s%s%s%s%s%d",Buffer0,Buffer1,Buffer2,Buffer3,Buffer4,&(header->nPull));
371 if (opt->verbose)
372 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
373 header->Reference);
375 for(i=0;i<header->nPull;++i)
377 if (fgets(line,2048,file) == NULL)
378 gmx_fatal(FARGS,"Error reading header from pdo file\n");
379 sscanf(line,"%*s%*s%*s%s%*s%*s%lf%*s%*s%lf",header->PullName[i]
380 ,&(header->UmbPos[i][0]),&(header->UmbCons[i][0]));
381 if (opt->verbose)
382 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
383 i,header->PullName[i],header->UmbPos[i][0],header->UmbCons[i][0]);
386 if (fgets(line,2048,file) == NULL)
387 gmx_fatal(FARGS,"Cannot read from file\n");
388 sscanf(line,"%s",Buffer3);
389 if (strcmp(Buffer3,"#####") != 0)
390 gmx_fatal(FARGS,"Expected '#####', found %s. Hick.\n",Buffer3);
394 static char *fgets3(FILE *fp,char ptr[],int *len)
396 char *p;
397 int slen;
399 if (fgets(ptr,*len-1,fp) == NULL)
400 return NULL;
401 p = ptr;
402 while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
404 /* This line is longer than len characters, let's increase len! */
405 *len += STRLEN;
406 p += STRLEN;
407 srenew(ptr,*len);
408 if (fgets(p-1,STRLEN,fp) == NULL)
409 break;
411 slen = strlen(ptr);
412 if (ptr[slen-1] == '\n')
413 ptr[slen-1] = '\0';
415 return ptr;
418 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
419 int fileno, t_UmbrellaWindow * win,
420 t_UmbrellaOptions *opt,
421 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
423 int i,inttemp,bins,count,ntot;
424 real min,max,minfound=1e20,maxfound=-1e20;
425 double temp,time,time0=0,dt;
426 char *ptr=0;
427 t_UmbrellaWindow * window=0;
428 gmx_bool timeok,dt_ok=1;
429 char *tmpbuf=0,fmt[256],fmtign[256];
430 int len=STRLEN,dstep=1;
431 const int blocklen=4096;
432 int *lennow=0;
434 if (!bGetMinMax)
436 bins=opt->bins;
437 min=opt->min;
438 max=opt->max;
440 window=win+fileno;
441 /* Need to alocate memory and set up structure */
442 window->nPull=header->nPull;
443 window->nBin=bins;
445 snew(window->Histo,window->nPull);
446 snew(window->z,window->nPull);
447 snew(window->k,window->nPull);
448 snew(window->pos,window->nPull);
449 snew(window->N, window->nPull);
450 snew(window->Ntot, window->nPull);
451 snew(window->g, window->nPull);
452 snew(window->bsWeight, window->nPull);
454 window->bContrib=0;
456 if (opt->bCalcTauInt)
457 snew(window->ztime, window->nPull);
458 else
459 window->ztime=0;
460 snew(lennow,window->nPull);
462 for(i=0;i<window->nPull;++i)
464 window->z[i]=1;
465 window->bsWeight[i]=1.;
466 snew(window->Histo[i],bins);
467 window->k[i]=header->UmbCons[i][0];
468 window->pos[i]=header->UmbPos[i][0];
469 window->N[i]=0;
470 window->Ntot[i]=0;
471 window->g[i]=1.;
472 if (opt->bCalcTauInt)
473 window->ztime[i]=0;
476 /* Done with setup */
478 else
480 minfound=1e20;
481 maxfound=-1e20;
482 min=max=bins=0; /* Get rid of warnings */
485 count=0;
486 snew(tmpbuf,len);
487 while ( (ptr=fgets3(file,tmpbuf,&len)) != NULL)
489 trim(ptr);
491 if (ptr[0] == '#' || strlen(ptr)<2)
492 continue;
494 /* Initiate format string */
495 fmtign[0] = '\0';
496 strcat(fmtign,"%*s");
498 sscanf(ptr,"%lf",&time); /* printf("Time %f\n",time); */
499 /* Round time to fs */
500 time=1.0/1000*( (int) (time*1000+0.5) );
502 /* get time step of pdo file */
503 if (count==0)
504 time0=time;
505 else if (count==1)
507 dt=time-time0;
508 if (opt->dt>0.0)
510 dstep=(int)(opt->dt/dt+0.5);
511 if (dstep==0)
512 dstep=1;
514 if (!bGetMinMax)
515 window->dt=dt*dstep;
517 count++;
519 dt_ok=((count-1)%dstep == 0);
520 timeok=(dt_ok && time >= opt->tmin && time <= opt->tmax);
521 /* if (opt->verbose)
522 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
523 time,opt->tmin, opt->tmax, dt_ok,timeok); */
525 if (timeok)
527 for(i=0;i<header->nPull;++i)
529 strcpy(fmt,fmtign);
530 strcat(fmt,"%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
531 strcat(fmtign,"%*s"); /* ignoring one more entry in the next loop */
532 if(sscanf(ptr,fmt,&temp))
534 temp+=header->UmbPos[i][0];
535 if (bGetMinMax)
537 if (temp<minfound)
538 minfound=temp;
539 if (temp>maxfound)
540 maxfound=temp;
542 else{
543 if (opt->bCalcTauInt)
545 /* save time series for autocorrelation analysis */
546 ntot=window->Ntot[i];
547 if (ntot>=lennow[i])
549 lennow[i]+=blocklen;
550 srenew(window->ztime[i],lennow[i]);
552 window->ztime[i][ntot]=temp;
555 temp-=min;
556 temp/=(max-min);
557 temp*=bins;
558 temp=floor(temp);
560 inttemp = (int)temp;
561 if (opt->bCycl)
563 if (inttemp < 0)
564 inttemp+=bins;
565 else if (inttemp >= bins)
566 inttemp-=bins;
569 if(inttemp >= 0 && inttemp < bins)
571 window->Histo[i][inttemp]+=1.;
572 window->N[i]++;
574 window->Ntot[i]++;
579 if (time>opt->tmax)
581 if (opt->verbose)
582 printf("time %f larger than tmax %f, stop reading pdo file\n",time,opt->tmax);
583 break;
587 if (bGetMinMax)
589 *mintmp=minfound;
590 *maxtmp=maxfound;
593 sfree(lennow);
594 sfree(tmpbuf);
597 void enforceEqualWeights(t_UmbrellaWindow * window,int nWindows)
599 int i,k,j,NEnforced;
600 double ratio;
602 NEnforced=window[0].Ntot[0];
603 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
604 "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
605 /* enforce all histograms to have the same weight as the very first histogram */
607 for(j=0;j<nWindows;++j)
609 for(k=0;k<window[j].nPull;++k)
611 ratio=1.0*NEnforced/window[j].Ntot[k];
612 for(i=0;i<window[0].nBin;++i)
614 window[j].Histo[k][i]*=ratio;
616 window[j].N[k]=(int)(ratio*window[j].N[k] + 0.5);
621 /* Simple linear interpolation between two given tabulated points */
622 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
624 int jl,ju;
625 double pl,pu,dz,dp;
627 jl=floor((dist-opt->tabMin)/opt->tabDz);
628 ju=jl+1;
629 if (jl<0 || ju>=opt->tabNbins)
631 gmx_fatal(FARGS,"Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
632 "Provide an extended table.",dist,jl,ju);
634 pl=opt->tabY[jl];
635 pu=opt->tabY[ju];
636 dz=dist-opt->tabX[jl];
637 dp=(pu-pl)*dz/opt->tabDz;
638 return pl+dp;
642 /* Don't worry, that routine does not mean we compute the PMF in limited precision.
643 After rapid convergence (using only substiantal contributions), we always switch to
644 full precision. */
645 void setup_acc_wham(double *profile,t_UmbrellaWindow * window,int nWindows,
646 t_UmbrellaOptions *opt)
648 int i,j,k,nGrptot=0,nContrib=0,nTot=0;
649 double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib1,contrib2;
650 gmx_bool bAnyContrib;
651 static int bFirst=1;
652 static double wham_contrib_lim;
654 if (bFirst)
656 for(i=0;i<nWindows;++i)
658 nGrptot+=window[i].nPull;
660 wham_contrib_lim=opt->Tolerance/nGrptot;
663 ztot=opt->max-opt->min;
664 ztot_half=ztot/2;
666 for(i=0;i<nWindows;++i)
668 if ( ! window[i].bContrib)
670 snew(window[i].bContrib,window[i].nPull);
672 for(j=0;j<window[i].nPull;++j)
674 if ( ! window[i].bContrib[j])
675 snew(window[i].bContrib[j],opt->bins);
676 bAnyContrib=FALSE;
677 for(k=0;k<opt->bins;++k)
679 temp=(1.0*k+0.5)*dz+min;
680 distance = temp - window[i].pos[j]; /* distance to umbrella center */
681 if (opt->bCycl)
682 { /* in cyclic wham: */
683 if (distance > ztot_half) /* |distance| < ztot_half */
684 distance-=ztot;
685 else if (distance < -ztot_half)
686 distance+=ztot;
688 /* Note: there are two contributions to bin k in the wham equations:
689 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
690 ii) exp(- U/(8.314e-3*opt->Temperature))
691 where U is the umbrella potential
692 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
695 if (!opt->bTab)
696 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
697 else
698 U=tabulated_pot(distance,opt); /* Use tabulated potential */
700 contrib1=profile[k]*exp(- U/(8.314e-3*opt->Temperature));
701 contrib2=window[i].N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j]);
702 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
703 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
704 if (window[i].bContrib[j][k])
705 nContrib++;
706 nTot++;
708 /* If this histo is far outside min and max all bContrib may be FALSE,
709 causing a floating point exception later on. To avoid that, switch
710 them all to true.*/
711 if (!bAnyContrib)
712 for(k=0;k<opt->bins;++k)
713 window[i].bContrib[j][k]=TRUE;
716 if (bFirst)
717 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
718 "Evaluating only %d of %d expressions.\n\n",wham_contrib_lim,nContrib, nTot);
720 if (opt->verbose)
721 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
722 nContrib,nTot);
723 bFirst=0;
727 void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows,
728 t_UmbrellaOptions *opt, gmx_bool bExact)
730 int i,k,j;
731 double num,ztot_half,ztot,distance,min=opt->min,dz=opt->dz;
732 double denom,U=0,temp=0,invg;
734 ztot=opt->max-opt->min;
735 ztot_half=ztot/2;
737 for(i=0;i<opt->bins;++i)
739 num=denom=0.;
740 for(j=0;j<nWindows;++j)
742 for(k=0;k<window[j].nPull;++k)
744 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
745 temp = (1.0*i+0.5)*dz+min;
746 num += invg*window[j].Histo[k][i];
748 if (! (bExact || window[j].bContrib[k][i]))
749 continue;
750 distance = temp - window[j].pos[k]; /* distance to umbrella center */
751 if (opt->bCycl)
752 { /* in cyclic wham: */
753 if (distance > ztot_half) /* |distance| < ztot_half */
754 distance-=ztot;
755 else if (distance < -ztot_half)
756 distance+=ztot;
759 if (!opt->bTab)
760 U=0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
761 else
762 U=tabulated_pot(distance,opt); /* Use tabulated potential */
763 denom+=invg*window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
766 profile[i]=num/denom;
771 double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows,
772 t_UmbrellaOptions *opt, gmx_bool bExact)
774 int i,j,k,binMax=-1;
775 double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,totalMax;
776 double MAX=-1e20, total=0;
778 ztot=opt->max-opt->min;
779 ztot_half=ztot/2;
781 for(i=0;i<nWindows;++i)
783 for(j=0;j<window[i].nPull;++j)
785 total=0;
786 for(k=0;k<window[i].nBin;++k)
788 if (! (bExact || window[i].bContrib[j][k]))
789 continue;
790 temp=(1.0*k+0.5)*dz+min;
791 distance = temp - window[i].pos[j]; /* distance to umbrella center */
792 if (opt->bCycl)
793 { /* in cyclic wham: */
794 if (distance > ztot_half) /* |distance| < ztot_half */
795 distance-=ztot;
796 else if (distance < -ztot_half)
797 distance+=ztot;
800 if (!opt->bTab)
801 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
802 else
803 U=tabulated_pot(distance,opt); /* Use tabulated potential */
805 total+=profile[k]*exp(-U/(8.314e-3*opt->Temperature));
807 /* Avoid floating point exception if window is far outside min and max */
808 if (total != 0.0)
809 total = -log(total);
810 else
811 total = 1000.0;
812 temp = fabs(total - window[i].z[j]);
813 if(temp > MAX){
814 MAX=temp;
815 binMax=k;
816 totalMax=total;
818 window[i].z[j] = total;
821 return MAX;
824 void symmetrizeProfile(double* profile,t_UmbrellaOptions *opt)
826 int i,j;
827 double *prof2,bins=opt->bins,min=opt->min,max=opt->max,dz=opt->dz,zsym,deltaz,profsym;
828 double z,z1;
830 if (min>0. || max<0.)
831 gmx_fatal(FARGS,"Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
832 opt->min,opt->max);
834 snew(prof2,bins);
836 for (i=0;i<bins;i++)
838 z=min+(i+0.5)*dz;
839 zsym=-z;
840 /* bin left of zsym */
841 j=floor((zsym-min)/dz-0.5);
842 if (j>=0 && (j+1)<bins)
844 /* interpolate profile linearly between bins j and j+1 */
845 z1=min+(j+0.5)*dz;
846 deltaz=zsym-z1;
847 profsym=profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
848 /* average between left and right */
849 prof2[i]=0.5*(profsym+profile[i]);
851 else
853 prof2[i]=profile[i];
857 memcpy(profile,prof2,bins*sizeof(double));
858 sfree(prof2);
861 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
863 int i,bins,imin;
864 double unit_factor=1., R_MolarGasConst, diff;
866 R_MolarGasConst=8.314472e-3; /* in kJ/(mol*K) */
867 bins=opt->bins;
869 /* Not log? Nothing to do! */
870 if (!opt->bLog)
871 return;
873 /* Get profile in units of kT, kJ/mol, or kCal/mol */
874 if (opt->unit == en_kT)
875 unit_factor=1.0;
876 else if (opt->unit == en_kJ)
877 unit_factor=R_MolarGasConst*opt->Temperature;
878 else if (opt->unit == en_kCal)
879 unit_factor=R_MolarGasConst*opt->Temperature/4.1868;
880 else
881 gmx_fatal(FARGS,"Sorry, I don't know this energy unit.");
883 for (i=0;i<bins;i++)
884 if (profile[i]>0.0)
885 profile[i]=-log(profile[i])*unit_factor;
887 /* shift to zero at z=opt->zProf0 */
888 if (!opt->bProf0Set)
890 diff=profile[0];
892 else
894 /* Get bin with shortest distance to opt->zProf0
895 (-0.5 from bin position and +0.5 from rounding cancel) */
896 imin=(int)((opt->zProf0-opt->min)/opt->dz);
897 if (imin<0)
898 imin=0;
899 else if (imin>=bins)
900 imin=bins-1;
901 diff=profile[imin];
904 /* Shift to zero */
905 for (i=0;i<bins;i++)
906 profile[i]-=diff;
909 void getRandomIntArray(int nPull,int blockLength,int* randomArray,gmx_rng_t rng)
911 int ipull,blockBase,nr,ipullRandom;
913 if (blockLength==0)
914 blockLength=nPull;
916 for (ipull=0; ipull<nPull; ipull++)
918 blockBase=(ipull/blockLength)*blockLength;
920 { /* make sure nothing bad happens in the last block */
921 nr=(int)(gmx_rng_uniform_real(rng)*blockLength);
922 ipullRandom = blockBase + nr;
923 } while (ipullRandom >= nPull);
924 if (ipullRandom<0 || ipullRandom>=nPull)
925 gmx_fatal(FARGS,"Ups, random iWin = %d, nPull = %d, nr = %d, "
926 "blockLength = %d, blockBase = %d\n",
927 ipullRandom,nPull,nr,blockLength,blockBase);
928 randomArray[ipull]=ipullRandom;
930 /*for (ipull=0; ipull<nPull; ipull++)
931 printf("%d ",randomArray[ipull]); printf("\n"); */
934 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
935 t_UmbrellaWindow *thisWindow,int pullid)
937 synthWindow->N [0]=thisWindow->N [pullid];
938 synthWindow->Histo [0]=thisWindow->Histo [pullid];
939 synthWindow->pos [0]=thisWindow->pos [pullid];
940 synthWindow->z [0]=thisWindow->z [pullid];
941 synthWindow->k [0]=thisWindow->k [pullid];
942 synthWindow->bContrib[0]=thisWindow->bContrib [pullid];
943 synthWindow->g [0]=thisWindow->g [pullid];
944 synthWindow->bsWeight[0]=thisWindow->bsWeight [pullid];
947 /* Calculate cumulative distribution function of of all histograms. They
948 allow to create random number sequences
949 which are distributed according to the histograms. Required to generate
950 the "synthetic" histograms for the Bootstrap method */
951 void calc_cumulatives(t_UmbrellaWindow *window,int nWindows,
952 t_UmbrellaOptions *opt,const char *fnhist)
954 int i,j,k,nbin;
955 double last;
956 char *fn=0,*buf=0;
957 FILE *fp=0;
959 if (opt->bs_verbose)
961 snew(fn,strlen(fnhist)+10);
962 snew(buf,strlen(fnhist)+10);
963 sprintf(fn,"%s_cumul.xvg",strncpy(buf,fnhist,strlen(fnhist)-4));
964 fp=xvgropen(fn,"CDFs of umbrella windows","z","CDF",opt->oenv);
967 nbin=opt->bins;
968 for (i=0; i<nWindows; i++)
970 snew(window[i].cum,window[i].nPull);
971 for (j=0; j<window[i].nPull; j++)
973 snew(window[i].cum[j],nbin+1);
974 window[i].cum[j][0]=0.;
975 for (k=1; k<=nbin; k++)
976 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
978 /* normalize CDFs. Ensure cum[nbin]==1 */
979 last = window[i].cum[j][nbin];
980 for (k=0; k<=nbin; k++)
981 window[i].cum[j][k] /= last;
985 printf("Cumulative distriubtion functions of all histograms created.\n");
986 if (opt->bs_verbose)
988 for (k=0; k<=nbin; k++)
990 fprintf(fp,"%g\t",opt->min+k*opt->dz);
991 for (i=0; i<nWindows; i++)
992 for (j=0; j<window[i].nPull; j++)
993 fprintf(fp,"%g\t",window[i].cum[j][k]);
994 fprintf(fp,"\n");
996 printf("Wrote cumulative distribution functions to %s\n",fn);
997 ffclose(fp);
998 sfree(fn);
999 sfree(buf);
1004 /* Return j such that xx[j] <= x < xx[j+1] */
1005 void searchCumulative(double xx[], int n, double x, int *j)
1007 int ju,jm,jl;
1009 jl=-1;
1010 ju=n;
1011 while (ju-jl > 1)
1013 jm=(ju+jl) >> 1;
1014 if (x >= xx[jm])
1015 jl=jm;
1016 else
1017 ju=jm;
1019 if (x==xx[0])
1020 *j=0;
1021 else if (x==xx[n-1])
1022 *j=n-2;
1023 else
1024 *j=jl;
1027 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1028 int pullid,t_UmbrellaOptions *opt)
1030 int N,i,nbins,r_index,ibin;
1031 double r,tausteps=0.0,a,ap,dt,x,invsqrt2,g,y,sig=0.,z,mu=0.;
1032 char errstr[1024];
1034 N=thisWindow->N[pullid];
1035 dt=thisWindow->dt;
1036 nbins=thisWindow->nBin;
1038 /* tau = autocorrelation time */
1039 if (opt->tauBootStrap>0.0)
1040 tausteps=opt->tauBootStrap/dt;
1041 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1043 /* calc tausteps from g=1+2tausteps */
1044 g=thisWindow->g[pullid];
1045 tausteps=(g-1)/2;
1047 else
1049 sprintf(errstr,
1050 "When generating hypothetical trajctories from given umbrella histograms,\n"
1051 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1052 "cannot be predicted. You have 3 options:\n"
1053 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1054 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1055 strcat(errstr,
1056 " with option -iiact for all umbrella windows.\n"
1057 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1058 " Use option (3) only if you are sure what you're doing, you may severely\n"
1059 " underestimate the error if a too small ACT is given.\n");
1060 gmx_fatal(FARGS,errstr);
1063 synthWindow->N [0]=N;
1064 synthWindow->pos [0]=thisWindow->pos[pullid];
1065 synthWindow->z [0]=thisWindow->z[pullid];
1066 synthWindow->k [0]=thisWindow->k[pullid];
1067 synthWindow->bContrib[0]=thisWindow->bContrib[pullid];
1068 synthWindow->g [0]=thisWindow->g [pullid];
1069 synthWindow->bsWeight[0]=thisWindow->bsWeight[pullid];
1071 for (i=0;i<nbins;i++)
1072 synthWindow->Histo[0][i]=0.;
1074 if (opt->bsMethod==bsMethod_trajGauss)
1076 sig = thisWindow->sigma [pullid];
1077 mu = thisWindow->aver [pullid];
1080 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1081 Use the following:
1082 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1083 then
1084 z = a*x + sqrt(1-a^2)*y
1085 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1086 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1087 function
1088 C(t) = exp(-t/tau) with tau=-1/ln(a)
1090 Then, use error function to turn the Gaussian random variable into a uniformly
1091 distributed one in [0,1]. Eventually, use cumulative distribution function of
1092 histogram to get random variables distributed according to histogram.
1093 Note: The ACT of the flat distribution and of the generated histogram is not
1094 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1096 a=exp(-1.0/tausteps);
1097 ap=sqrt(1-a*a);
1098 invsqrt2=1./sqrt(2.0);
1100 /* init random sequence */
1101 x=gmx_rng_gaussian_table(opt->rng);
1103 if (opt->bsMethod==bsMethod_traj)
1105 /* bootstrap points from the umbrella histograms */
1106 for (i=0;i<N;i++)
1108 y=gmx_rng_gaussian_table(opt->rng);
1109 x=a*x+ap*y;
1110 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1111 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1113 r=0.5*(1+gmx_erf(x*invsqrt2));
1114 searchCumulative(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
1115 synthWindow->Histo[0][r_index]+=1.;
1118 else if (opt->bsMethod==bsMethod_trajGauss)
1120 /* bootstrap points from a Gaussian with the same average and sigma
1121 as the respective umbrella histogram. The idea was, that -given
1122 limited sampling- the bootstrapped histograms are otherwise biased
1123 from the limited sampling of the US histos. However, bootstrapping from
1124 the Gaussian seems to yield a similar estimate. */
1125 i=0;
1126 while (i<N)
1128 y=gmx_rng_gaussian_table(opt->rng);
1129 x=a*x+ap*y;
1130 z = x*sig+mu;
1131 ibin=floor((z-opt->min)/opt->dz);
1132 if (opt->bCycl)
1134 if (ibin<0)
1135 while ( (ibin+=nbins) < 0);
1136 else if (ibin>=nbins)
1137 while ( (ibin-=nbins) >= nbins);
1140 if (ibin>=0 && ibin<nbins)
1142 synthWindow->Histo[0][ibin]+=1.;
1143 i++;
1147 else
1149 gmx_fatal(FARGS,"Unknown bsMethod (id %d). That should not happen.\n",opt->bsMethod);
1154 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1155 int bs_index,t_UmbrellaOptions *opt)
1157 char *fn=0,*buf=0,title[256];
1158 FILE *fp;
1159 int bins,l,i,j;
1161 if (bs_index<0)
1163 fn=strdup(fnhist);
1164 strcpy(title,"Umbrella histograms");
1166 else
1168 snew(fn,strlen(fnhist)+10);
1169 snew(buf,strlen(fnhist)+1);
1170 sprintf(fn,"%s_bs%d.xvg",strncpy(buf,fnhist,strlen(fnhist)-4),bs_index);
1171 sprintf(title,"Umbrella histograms. Bootstrap #%d",bs_index);
1174 fp=xvgropen(fn,title,"z","count",opt->oenv);
1175 bins=opt->bins;
1177 /* Write histograms */
1178 for(l=0;l<bins;++l)
1180 fprintf(fp,"%e\t",(double)(l+0.5)*opt->dz+opt->min);
1181 for(i=0;i<nWindows;++i)
1183 for(j=0;j<window[i].nPull;++j)
1185 fprintf(fp,"%e\t",window[i].Histo[j][l]);
1188 fprintf(fp,"\n");
1191 ffclose(fp);
1192 printf("Wrote %s\n",fn);
1193 if (buf)
1195 sfree(buf);
1196 sfree(fn);
1200 int func_wham_is_larger(const void *a, const void *b)
1202 double *aa,*bb;
1203 aa=(double*)a;
1204 bb=(double*)b;
1205 if (*aa < *bb)
1206 return -1;
1207 else if (*aa > *bb)
1208 return 1;
1209 else
1210 return 0;
1214 void setRandomBsWeights(t_UmbrellaWindow *synthwin,int nAllPull, t_UmbrellaOptions *opt)
1216 int i;
1217 double *r;
1219 snew(r,nAllPull);
1221 /* generate ordered random numbers between 0 and nAllPull */
1222 for (i=0; i<nAllPull-1; i++)
1224 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1226 qsort((void *)r,nAllPull-1, sizeof(double), &func_wham_is_larger);
1227 r[nAllPull-1]=1.0*nAllPull;
1229 synthwin[0].bsWeight[0]=r[0];
1230 for (i=1; i<nAllPull; i++)
1232 synthwin[i].bsWeight[0]=r[i]-r[i-1];
1235 /* avoid to have zero weight by adding a tiny value */
1236 for (i=0; i<nAllPull; i++)
1237 if (synthwin[i].bsWeight[0] < 1e-5)
1238 synthwin[i].bsWeight[0] = 1e-5;
1240 sfree(r);
1243 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1244 char* ylabel, double *profile,
1245 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1247 t_UmbrellaWindow * synthWindow;
1248 double *bsProfile,*bsProfiles_av, *bsProfiles_av2,maxchange=1e20,tmp,stddev;
1249 int i,j,*randomArray=0,winid,pullid,ib;
1250 int iAllPull,nAllPull,*allPull_winId,*allPull_pullId;
1251 FILE *fp;
1252 gmx_bool bExact=FALSE;
1254 /* init random generator */
1255 if (opt->bsSeed==-1)
1256 opt->rng=gmx_rng_init(gmx_rng_make_seed());
1257 else
1258 opt->rng=gmx_rng_init(opt->bsSeed);
1260 snew(bsProfile, opt->bins);
1261 snew(bsProfiles_av, opt->bins);
1262 snew(bsProfiles_av2,opt->bins);
1264 /* Create array of all pull groups. Note that different windows
1265 may have different nr of pull groups
1266 First: Get total nr of pull groups */
1267 nAllPull=0;
1268 for (i=0;i<nWindows;i++)
1269 nAllPull+=window[i].nPull;
1270 snew(allPull_winId,nAllPull);
1271 snew(allPull_pullId,nAllPull);
1272 iAllPull=0;
1273 /* Setup one array of all pull groups */
1274 for (i=0;i<nWindows;i++)
1276 for (j=0;j<window[i].nPull;j++)
1278 allPull_winId[iAllPull]=i;
1279 allPull_pullId[iAllPull]=j;
1280 iAllPull++;
1284 /* setup stuff for synthetic windows */
1285 snew(synthWindow,nAllPull);
1286 for (i=0;i<nAllPull;i++)
1288 synthWindow[i].nPull=1;
1289 synthWindow[i].nBin=opt->bins;
1290 snew(synthWindow[i].Histo,1);
1291 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1292 snew(synthWindow[i].Histo[0],opt->bins);
1293 snew(synthWindow[i].N,1);
1294 snew(synthWindow[i].pos,1);
1295 snew(synthWindow[i].z,1);
1296 snew(synthWindow[i].k,1);
1297 snew(synthWindow[i].bContrib,1);
1298 snew(synthWindow[i].g,1);
1299 snew(synthWindow[i].bsWeight,1);
1302 switch(opt->bsMethod)
1304 case bsMethod_hist:
1305 snew(randomArray,nAllPull);
1306 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1307 please_cite(stdout,"Hub2006");
1308 break;
1309 case bsMethod_BayesianHist :
1310 /* just copy all histogams into synthWindow array */
1311 for (i=0;i<nAllPull;i++)
1313 winid =allPull_winId [i];
1314 pullid=allPull_pullId[i];
1315 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1317 break;
1318 case bsMethod_traj:
1319 case bsMethod_trajGauss:
1320 calc_cumulatives(window,nWindows,opt,fnhist);
1321 break;
1322 default:
1323 gmx_fatal(FARGS,"Unknown bootstrap method. That should not have happened.\n");
1326 /* do bootstrapping */
1327 fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,opt->oenv);
1328 for (ib=0;ib<opt->nBootStrap;ib++)
1330 printf(" *******************************************\n"
1331 " ******** Start bootstrap nr %d ************\n"
1332 " *******************************************\n",ib+1);
1334 switch(opt->bsMethod)
1336 case bsMethod_hist:
1337 /* bootstrap complete histograms from given histograms */
1338 getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray,opt->rng);
1339 for (i=0;i<nAllPull;i++){
1340 winid =allPull_winId [randomArray[i]];
1341 pullid=allPull_pullId[randomArray[i]];
1342 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1344 break;
1345 case bsMethod_BayesianHist:
1346 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1347 setRandomBsWeights(synthWindow,nAllPull,opt);
1348 break;
1349 case bsMethod_traj:
1350 case bsMethod_trajGauss:
1351 /* create new histos from given histos, that is generate new hypothetical
1352 trajectories */
1353 for (i=0;i<nAllPull;i++)
1355 winid=allPull_winId[i];
1356 pullid=allPull_pullId[i];
1357 create_synthetic_histo(synthWindow+i,window+winid,pullid,opt);
1359 break;
1362 /* write histos in case of verbose output */
1363 if (opt->bs_verbose)
1364 print_histograms(fnhist,synthWindow,nAllPull,ib,opt);
1366 /* do wham */
1367 i=0;
1368 bExact=FALSE;
1369 maxchange=1e20;
1370 memcpy(bsProfile,profile,opt->bins*sizeof(double)); /* use profile as guess */
1373 if ( (i%opt->stepUpdateContrib) == 0)
1374 setup_acc_wham(bsProfile,synthWindow,nAllPull,opt);
1375 if (maxchange<opt->Tolerance)
1376 bExact=TRUE;
1377 if (((i%opt->stepchange) == 0 || i==1) && !i==0)
1378 printf("\t%4d) Maximum change %e\n",i,maxchange);
1379 calc_profile(bsProfile,synthWindow,nAllPull,opt,bExact);
1380 i++;
1381 } while( (maxchange=calc_z(bsProfile, synthWindow, nAllPull, opt,bExact)) > opt->Tolerance || !bExact);
1382 printf("\tConverged in %d iterations. Final maximum change %g\n",i,maxchange);
1384 if (opt->bLog)
1385 prof_normalization_and_unit(bsProfile,opt);
1387 /* symmetrize profile around z=0 */
1388 if (opt->bSym)
1389 symmetrizeProfile(bsProfile,opt);
1391 /* save stuff to get average and stddev */
1392 for (i=0;i<opt->bins;i++)
1394 tmp=bsProfile[i];
1395 bsProfiles_av[i]+=tmp;
1396 bsProfiles_av2[i]+=tmp*tmp;
1397 fprintf(fp,"%e\t%e\n",(i+0.5)*opt->dz+opt->min,tmp);
1399 fprintf(fp,"&\n");
1401 ffclose(fp);
1403 /* write average and stddev */
1404 fp=xvgropen(fnres,"Average and stddev from bootstrapping","z",ylabel,opt->oenv);
1405 fprintf(fp,"@TYPE xydy\n");
1406 for (i=0;i<opt->bins;i++)
1408 bsProfiles_av [i]/=opt->nBootStrap;
1409 bsProfiles_av2[i]/=opt->nBootStrap;
1410 tmp=bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1411 stddev=(tmp>=0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1412 fprintf(fp,"%e\t%e\t%e\n",(i+0.5)*opt->dz+opt->min,bsProfiles_av [i],stddev);
1414 ffclose(fp);
1415 printf("Wrote boot strap result to %s\n",fnres);
1418 int whaminFileType(char *fn)
1420 int len;
1421 len=strlen(fn);
1422 if (strcmp(fn+len-3,"tpr")==0)
1423 return whamin_tpr;
1424 else if (strcmp(fn+len-3,"xvg")==0 || strcmp(fn+len-6,"xvg.gz")==0)
1425 return whamin_pullxf;
1426 else if (strcmp(fn+len-3,"pdo")==0 || strcmp(fn+len-6,"pdo.gz")==0)
1427 return whamin_pdo;
1428 else
1429 gmx_fatal(FARGS,"Unknown file type of %s. Should be tpr, xvg, or pdo.\n",fn);
1430 return whamin_unknown;
1433 void read_wham_in(const char *fn,char ***filenamesRet, int *nfilesRet,
1434 t_UmbrellaOptions *opt)
1436 char **filename=0,tmp[STRLEN];
1437 int nread,sizenow,i,block=1;
1438 FILE *fp;
1440 fp=ffopen(fn,"r");
1441 nread=0;
1442 sizenow=0;
1443 while (fscanf(fp,"%s",tmp) != EOF)
1445 if (strlen(tmp)>=WHAM_MAXFILELEN)
1446 gmx_fatal(FARGS,"Filename too long. Only %d characters allowed\n",WHAM_MAXFILELEN);
1447 if (nread>=sizenow)
1449 sizenow+=block;
1450 srenew(filename,sizenow);
1451 for (i=sizenow-block;i<sizenow;i++)
1452 snew(filename[i],WHAM_MAXFILELEN);
1454 strcpy(filename[nread],tmp);
1455 if (opt->verbose)
1456 printf("Found file %s in %s\n",filename[nread],fn);
1457 nread++;
1459 *filenamesRet=filename;
1460 *nfilesRet=nread;
1464 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt,gmx_bool *bPipeOpen)
1466 char Buffer[1024],gunzip[1024],*Path=0;
1467 FILE *pipe=0;
1468 static gmx_bool bFirst=1;
1470 /* gzipped pdo file? */
1471 if ((strcmp(fn+strlen(fn)-3,".gz")==0))
1473 /* search gunzip executable */
1474 if(!(Path=getenv("GMX_PATH_GZIP")))
1476 if (gmx_fexist("/bin/gunzip"))
1477 sprintf(gunzip,"%s","/bin/gunzip");
1478 else if (gmx_fexist("/usr/bin/gunzip"))
1479 sprintf(gunzip,"%s","/usr/bin/gunzip");
1480 else
1481 gmx_fatal(FARGS,"Cannot find executable gunzip in /bin or /usr/bin.\n"
1482 "You may want to define the path to gunzip "
1483 "with the environment variable GMX_PATH_GZIP.",gunzip);
1485 else
1487 sprintf(gunzip,"%s/gunzip",Path);
1488 if (!gmx_fexist(gunzip))
1489 gmx_fatal(FARGS,"Cannot find executable %s. Please define the path to gunzip"
1490 " in the environmental varialbe GMX_PATH_GZIP.",gunzip);
1492 if (bFirst)
1494 printf("Using gunzig executable %s\n",gunzip);
1495 bFirst=0;
1497 if (!gmx_fexist(fn))
1499 gmx_fatal(FARGS,"File %s does not exist.\n",fn);
1501 sprintf(Buffer,"%s -c < %s",gunzip,fn);
1502 if (opt->verbose)
1503 printf("Executing command '%s'\n",Buffer);
1504 #ifdef HAVE_PIPES
1505 if((pipe=popen(Buffer,"r"))==NULL)
1507 gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
1509 #else
1510 gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
1511 #endif
1512 *bPipeOpen=TRUE;
1514 else{
1515 pipe=ffopen(fn,"r");
1516 *bPipeOpen=FALSE;
1519 return pipe;
1523 FILE *open_pdo_pipe_gmx(const char *fn)
1525 char *fnNoGz=0;
1526 FILE *pipe;
1528 /* gzipped pdo file? */
1529 if (strcmp(fn+strlen(fn)-3,".gz")==0)
1531 snew(fnNoGz,strlen(fn));
1532 strncpy(fnNoGz,fn,strlen(fn)-3);
1533 fnNoGz[strlen(fn)-3]='\0';
1534 if (gmx_fexist(fnNoGz) && gmx_fexist(fn))
1535 gmx_fatal(FARGS,"Found file %s and %s. That confuses me. Please remove one of them\n",
1536 fnNoGz,fn);
1537 pipe=ffopen(fnNoGz,"r");
1538 sfree(fnNoGz);
1540 else
1542 pipe=ffopen(fn,"r");
1545 return pipe;
1548 void pdo_close_file(FILE *fp)
1550 #ifdef HAVE_PIPES
1551 pclose(fp);
1552 #else
1553 ffclose(fp);
1554 #endif
1558 /* Reading pdo files */
1559 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1560 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1562 FILE *file;
1563 real mintmp,maxtmp,done=0.;
1564 int i;
1565 gmx_bool bPipeOpen;
1566 /* char Buffer0[1000]; */
1568 if(nfiles<1)
1569 gmx_fatal(FARGS,"No files found. Hick.");
1571 /* if min and max are not given, get min and max from the input files */
1572 if (opt->bAuto)
1574 printf("Automatic determination of boundaries from %d pdo files...\n",nfiles);
1575 opt->min=1e20;
1576 opt->max=-1e20;
1577 for(i=0;i<nfiles;++i)
1579 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1580 /*fgets(Buffer0,999,file);
1581 fprintf(stderr,"First line '%s'\n",Buffer0); */
1582 done=100.0*(i+1)/nfiles;
1583 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1584 if (opt->verbose)
1585 printf("\n");
1586 read_pdo_header(file,header,opt);
1587 /* here only determine min and max of this window */
1588 read_pdo_data(file,header,i,NULL,opt,TRUE,&mintmp,&maxtmp);
1589 if (maxtmp>opt->max)
1590 opt->max=maxtmp;
1591 if (mintmp<opt->min)
1592 opt->min=mintmp;
1593 if (bPipeOpen)
1594 pdo_close_file(file);
1595 else
1596 ffclose(file);
1598 printf("\n");
1599 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
1600 if (opt->bBoundsOnly)
1602 printf("Found option -boundsonly, now exiting.\n");
1603 exit (0);
1606 /* store stepsize in profile */
1607 opt->dz=(opt->max-opt->min)/opt->bins;
1609 /* Having min and max, we read in all files */
1610 /* Loop over all files */
1611 for(i=0;i<nfiles;++i)
1613 done=100.0*(i+1)/nfiles;
1614 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1615 if (opt->verbose)
1616 printf("\n");
1617 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1618 read_pdo_header(file,header,opt);
1619 /* load data into window */
1620 read_pdo_data(file,header,i,window,opt,FALSE,NULL,NULL);
1621 if ((window+i)->Ntot[0] == 0.0)
1622 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1623 if (bPipeOpen)
1624 pdo_close_file(file);
1625 else
1626 ffclose(file);
1628 printf("\n");
1629 for(i=0;i<nfiles;++i)
1630 sfree(fn[i]);
1631 sfree(fn);
1634 #define int2YN(a) (((a)==0)?("N"):("Y"))
1636 void read_tpr_header(const char *fn,t_UmbrellaHeader* header,t_UmbrellaOptions *opt)
1638 t_inputrec ir;
1639 int i,ngrp,d;
1640 t_state state;
1641 static int first=1;
1643 /* printf("Reading %s \n",fn); */
1644 read_tpx_state(fn,&ir,&state,NULL,NULL);
1646 if (ir.ePull != epullUMBRELLA)
1647 gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1648 " (ir.ePull = %d)\n", epull_names[ir.ePull],ir.ePull);
1650 /* nr of pull groups */
1651 ngrp=ir.pull->ngrp;
1652 if (ngrp < 1)
1653 gmx_fatal(FARGS,"This is not a tpr of umbrella simulation. Found only %d pull groups\n",ngrp);
1655 header->npullgrps=ir.pull->ngrp;
1656 header->pull_geometry=ir.pull->eGeom;
1657 copy_ivec(ir.pull->dim,header->pull_dim);
1658 header->pull_ndim=header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
1659 if (header->pull_geometry==epullgPOS && header->pull_ndim>1)
1661 gmx_fatal(FARGS,"Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
1662 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
1663 "If you have some special umbrella setup you may want to write your own pdo files\n"
1664 "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
1666 snew(header->k,ngrp);
1667 snew(header->init_dist,ngrp);
1668 snew(header->umbInitDist,ngrp);
1670 /* only z-direction with epullgCYL? */
1671 if (header->pull_geometry == epullgCYL)
1673 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
1674 gmx_fatal(FARGS,"With pull geometry 'cylinder', expected pulling in Z direction only.\n"
1675 "However, found dimensions [%s %s %s]\n",
1676 int2YN(header->pull_dim[XX]),int2YN(header->pull_dim[YY]),
1677 int2YN(header->pull_dim[ZZ]));
1680 for (i=0;i<ngrp;i++)
1682 header->k[i]=ir.pull->grp[i+1].k;
1683 if (header->k[i]==0.0)
1684 gmx_fatal(FARGS,"Pull group %d has force constant of of 0.0 in %s.\n"
1685 "That doesn't seem to be an Umbrella tpr.\n",
1686 i,fn);
1687 copy_rvec(ir.pull->grp[i+1].init,header->init_dist[i]);
1689 /* initial distance to reference */
1690 switch(header->pull_geometry)
1692 case epullgPOS:
1693 for (d=0;d<DIM;d++)
1694 if (header->pull_dim[d])
1695 header->umbInitDist[i]=header->init_dist[i][d];
1696 break;
1697 case epullgCYL:
1698 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
1699 case epullgDIST:
1700 case epullgDIR:
1701 case epullgDIRPBC:
1702 header->umbInitDist[i]=header->init_dist[i][0];
1703 break;
1704 default:
1705 gmx_fatal(FARGS,"Pull geometry %s not supported\n",epullg_names[header->pull_geometry]);
1709 if (opt->verbose || first)
1711 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
1712 fn,header->npullgrps,epullg_names[header->pull_geometry],
1713 int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
1714 header->pull_ndim);
1715 for (i=0;i<ngrp;i++)
1716 printf("\tgrp %d) k = %-5g position = %g\n",i,header->k[i],header->umbInitDist[i]);
1718 if (!opt->verbose && first)
1719 printf("\tUse option -v to see this output for all input tpr files\n");
1721 first=0;
1725 double dist_ndim(double **dx,int ndim,int line)
1727 int i;
1728 double r2=0.;
1729 for (i=0;i<ndim;i++)
1730 r2+=sqr(dx[i][line]);
1731 return sqrt(r2);
1734 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
1735 t_UmbrellaWindow * window,
1736 t_UmbrellaOptions *opt,
1737 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
1739 double **y=0,pos=0.,t,force,time0=0.,dt;
1740 int ny,nt,bins,ibin,i,g,dstep=1,nColPerGrp,nColRefOnce,nColRefEachGrp,nColExpect,ntot;
1741 real min,max,minfound=1e20,maxfound=-1e20;
1742 gmx_bool dt_ok,timeok,bHaveForce;
1743 const char *quantity;
1744 const int blocklen=4096;
1745 int *lennow=0;
1748 in force output pullf.xvg:
1749 No reference, one column per pull group
1750 in position output pullx.xvg (not cylinder)
1751 ndim reference, ndim columns per pull group
1752 in position output pullx.xvg (in geometry cylinder):
1753 ndim*2 columns per pull group (ndim for ref, ndim for group)
1756 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
1757 quantity = opt->bPullx ? "position" : "force";
1759 if (opt->bPullx)
1761 if (header->pull_geometry == epullgCYL)
1763 /* Geometry cylinder -> reference group before each pull group */
1764 nColRefEachGrp=header->pull_ndim;
1765 nColRefOnce=0;
1767 else
1769 /* Geometry NOT cylinder -> reference group only once after time column */
1770 nColRefEachGrp=0;
1771 nColRefOnce=header->pull_ndim;
1774 else /* read forces, no reference groups */
1776 nColRefEachGrp=0;
1777 nColRefOnce=0;
1780 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
1781 bHaveForce = opt->bPullf;
1783 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
1784 That avoids the somewhat tedious extraction of the right columns from the pullx files
1785 to compute the distances projection on the vector. Sorry for the laziness. */
1786 if ( (header->pull_geometry==epullgDIR || header->pull_geometry==epullgDIRPBC)
1787 && opt->bPullx)
1789 gmx_fatal(FARGS,"With pull geometries \"direction\" and \"direction_periodic\", only pull force "
1790 "reading \n(option -if) is supported at present, "
1791 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
1792 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
1793 epullg_names[header->pull_geometry]);
1796 nt=read_xvg(fn,&y,&ny);
1798 /* Check consistency */
1799 if (nt<1)
1801 gmx_fatal(FARGS,"Empty pull %s file %s\n",quantity,fn);
1803 if (ny != nColExpect)
1805 gmx_fatal(FARGS,"Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n"
1806 "\nMaybe you confused options -ix and -if ?\n",
1807 header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
1810 if (opt->verbose)
1811 printf("Found %d times and %d %s sets %s\n",nt,(ny-1)/nColPerGrp,quantity,fn);
1813 if (!bGetMinMax)
1815 bins=opt->bins;
1816 min=opt->min;
1817 max=opt->max;
1818 if (nt>1)
1820 window->dt=y[0][1]-y[0][0];
1822 else if (opt->nBootStrap && opt->tauBootStrap!=0.0)
1824 fprintf(stderr,"\n *** WARNING, Could not determine time step in %s\n",fn);
1827 /* Need to alocate memory and set up structure */
1828 window->nPull=header->npullgrps;
1829 window->nBin=bins;
1831 snew(window->Histo,window->nPull);
1832 snew(window->z,window->nPull);
1833 snew(window->k,window->nPull);
1834 snew(window->pos,window->nPull);
1835 snew(window->N, window->nPull);
1836 snew(window->Ntot, window->nPull);
1837 snew(window->g, window->nPull);
1838 snew(window->bsWeight, window->nPull);
1839 window->bContrib=0;
1841 if (opt->bCalcTauInt)
1842 snew(window->ztime,window->nPull);
1843 else
1844 window->ztime=NULL;
1845 snew(lennow,window->nPull);
1847 for(g=0;g<window->nPull;++g)
1849 window->z[g]=1;
1850 window->bsWeight[g]=1.;
1851 snew(window->Histo[g],bins);
1852 window->k[g]=header->k[g];
1853 window->N[g]=0;
1854 window->Ntot[g]=0;
1855 window->g[g]=1.;
1856 window->pos[g]=header->umbInitDist[g];
1857 if (opt->bCalcTauInt)
1858 window->ztime[g]=NULL;
1862 else
1863 { /* only determine min and max */
1864 minfound=1e20;
1865 maxfound=-1e20;
1866 min=max=bins=0; /* Get rid of warnings */
1869 for (i=0;i<nt;i++)
1871 /* Do you want that time frame? */
1872 t=1.0/1000*( (int) ((y[0][i]*1000) + 0.5)); /* round time to fs */
1874 /* get time step of pdo file and get dstep from opt->dt */
1875 if (i==0)
1877 time0=t;
1879 else if (i==1)
1881 dt=t-time0;
1882 if (opt->dt>0.0)
1884 dstep=(int)(opt->dt/dt+0.5);
1885 if (dstep==0)
1886 dstep=1;
1888 if (!bGetMinMax)
1889 window->dt=dt*dstep;
1892 dt_ok=(i%dstep == 0);
1893 timeok=(dt_ok && t >= opt->tmin && t <= opt->tmax);
1894 /*if (opt->verbose)
1895 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
1896 t,opt->tmin, opt->tmax, dt_ok,timeok); */
1898 if (timeok)
1900 for(g=0;g<header->npullgrps;++g)
1902 if (bHaveForce)
1904 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
1905 force=y[g+1][i];
1906 pos= -force/header->k[g] + header->umbInitDist[g];
1908 else
1910 switch (header->pull_geometry)
1912 case epullgDIST:
1913 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
1914 Distance to reference: */
1915 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
1916 pos=dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp,header->pull_ndim,i);
1917 break;
1918 case epullgPOS:
1919 /* Columns
1920 Time ref[ndim] group1[ndim] group2[ndim] ... */
1921 case epullgCYL:
1922 /* Columns
1923 Time ref1[ndim] group1[ndim] ref2[ndim] group2[ndim] ... */
1925 /* * with geometry==position, we have the reference once (nColRefOnce==ndim), but
1926 no extra reference group columns before each group (nColRefEachGrp==0)
1928 * with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
1929 but ndim ref group colums before every group (nColRefEachGrp==ndim)
1930 Distance to reference: */
1931 pos=y[1 + nColRefOnce + nColRefEachGrp + g*(nColRefEachGrp+nColPerGrp)][i];
1932 break;
1933 default:
1934 gmx_fatal(FARGS,"Bad error, this error should have been catched before. Ups.\n");
1938 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
1939 if (bGetMinMax)
1941 if (pos<minfound)
1942 minfound=pos;
1943 if (pos>maxfound)
1944 maxfound=pos;
1946 else
1948 if (opt->bCalcTauInt && !bGetMinMax)
1950 /* save time series for autocorrelation analysis */
1951 ntot=window->Ntot[g];
1952 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
1953 if (ntot>=lennow[g])
1955 lennow[g]+=blocklen;
1956 srenew(window->ztime[g],lennow[g]);
1958 window->ztime[g][ntot]=pos;
1961 ibin=(int) floor((pos-min)/(max-min)*bins);
1962 if (opt->bCycl)
1964 if (ibin<0)
1965 while ( (ibin+=bins) < 0);
1966 else if (ibin>=bins)
1967 while ( (ibin-=bins) >= bins);
1969 if(ibin >= 0 && ibin < bins)
1971 window->Histo[g][ibin]+=1.;
1972 window->N[g]++;
1974 window->Ntot[g]++;
1978 else if (t>opt->tmax)
1980 if (opt->verbose)
1981 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n",t,opt->tmax);
1982 break;
1986 if (bGetMinMax)
1988 *mintmp=minfound;
1989 *maxtmp=maxfound;
1991 sfree(lennow);
1992 for (i=0;i<ny;i++)
1993 sfree(y[i]);
1996 void read_tpr_pullxf_files(char **fnTprs,char **fnPull,int nfiles,
1997 t_UmbrellaHeader* header,
1998 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
2000 int i;
2001 real mintmp,maxtmp;
2003 printf("Reading %d tpr and pullf files\n",nfiles/2);
2005 /* min and max not given? */
2006 if (opt->bAuto)
2008 printf("Automatic determination of boundaries...\n");
2009 opt->min=1e20;
2010 opt->max=-1e20;
2011 for (i=0;i<nfiles; i++)
2013 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2014 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2015 read_tpr_header(fnTprs[i],header,opt);
2016 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2017 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2018 read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp);
2019 if (maxtmp>opt->max)
2020 opt->max=maxtmp;
2021 if (mintmp<opt->min)
2022 opt->min=mintmp;
2024 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
2025 if (opt->bBoundsOnly)
2027 printf("Found option -boundsonly, now exiting.\n");
2028 exit (0);
2031 /* store stepsize in profile */
2032 opt->dz=(opt->max-opt->min)/opt->bins;
2034 for (i=0;i<nfiles; i++)
2036 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2037 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2038 read_tpr_header(fnTprs[i],header,opt);
2039 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2040 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2041 read_pull_xf(fnPull[i],fnTprs[i],header,window+i,opt,FALSE,NULL,NULL);
2042 if (window[i].Ntot[0] == 0.0)
2043 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2046 for (i=0;i<nfiles; i++)
2048 sfree(fnTprs[i]);
2049 sfree(fnPull[i]);
2051 sfree(fnTprs);
2052 sfree(fnPull);
2055 /* Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation time.
2056 The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2058 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt,
2059 const char* fn)
2061 int nlines,ny,i,ig;
2062 double **iact;
2064 printf("Readging Integrated autocorrelation times from %s ...\n",fn);
2065 nlines=read_xvg(fn,&iact,&ny);
2066 if (nlines!=nwins)
2067 gmx_fatal(FARGS,"Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2068 nlines,fn,nwins);
2069 for (i=0;i<nlines;i++){
2070 if (window[i].nPull != ny)
2071 gmx_fatal(FARGS,"You are providing autocorrelation times with option -iiact and the\n"
2072 "number of pull groups is different in different simulations. That is not\n"
2073 "supported yet. Sorry.\n");
2074 for (ig=0;ig<window[i].nPull;ig++){
2075 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2076 window[i].g[ig]=1+2*iact[ig][i]/window[i].dt;
2078 if (iact[ig][i] <= 0.0)
2079 fprintf(stderr,"\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i],i,ig);
2085 /* Smooth autocorreltion times along the reaction coordinate. This is useful
2086 if the ACT is subject to high uncertainty in case if limited sampling. Note
2087 that -in case of limited sampling- the ACT may be severely underestimated.
2088 Note: the g=1+2tau are overwritten.
2089 if opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2090 by the smoothing
2092 void smoothIact(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2094 int i,ig,j,jg;
2095 double pos,dpos2,siglim,siglim2,gaufact,invtwosig2,w,weight,tausm;
2097 /* only evaluate within +- 3sigma of the Gausian */
2098 siglim=3.0*opt->sigSmoothIact;
2099 siglim2=dsqr(siglim);
2100 /* pre-factor of Gaussian */
2101 gaufact=1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2102 invtwosig2=0.5/dsqr(opt->sigSmoothIact);
2104 for (i=0;i<nwins;i++)
2106 snew(window[i].tausmooth,window[i].nPull);
2107 for (ig=0;ig<window[i].nPull;ig++)
2109 tausm=0.;
2110 weight=0;
2111 pos=window[i].pos[ig];
2112 for (j=0;j<nwins;j++)
2114 for (jg=0;jg<window[j].nPull;jg++)
2116 dpos2=dsqr(window[j].pos[jg]-pos);
2117 if (dpos2<siglim2){
2118 w=gaufact*exp(-dpos2*invtwosig2);
2119 weight+=w;
2120 tausm+=w*window[j].tau[jg];
2121 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2122 w,dpos2,pos,gaufact,invtwosig2); */
2126 tausm/=weight;
2127 if (opt->bAllowReduceIact || tausm>window[i].tau[ig])
2128 window[i].tausmooth[ig]=tausm;
2129 else
2130 window[i].tausmooth[ig]=window[i].tau[ig];
2131 window[i].g[ig] = 1+2*tausm/window[i].dt;
2136 /* try to compute the autocorrelation time for each umbrealla window */
2137 #define WHAM_AC_ZERO_LIMIT 0.05
2138 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,
2139 t_UmbrellaOptions *opt, const char *fn)
2141 int i,ig,ncorr,ntot,j,k,*count,restart;
2142 real *corr,c0,dt,timemax,tmp;
2143 real *ztime,av,tausteps;
2144 FILE *fp,*fpcorr=0;
2146 if (opt->verbose)
2147 fpcorr=xvgropen("hist_autocorr.xvg","Autocorrelation functions of umbrella windows",
2148 "time [ps]","autocorrelation function",opt->oenv);
2150 printf("\n");
2151 for (i=0;i<nwins;i++)
2153 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...",100.*(i+1)/nwins);
2154 fflush(stdout);
2155 ntot=window[i].Ntot[0];
2157 /* using half the maximum time as length of autocorrelation function */
2158 ncorr=ntot/2;
2159 if (ntot<10)
2160 gmx_fatal(FARGS,"Tryig to estimtate autocorrelation time from only %d"
2161 " points. Provide more pull data!",ntot);
2162 snew(corr,ncorr);
2163 /* snew(corrSq,ncorr); */
2164 snew(count,ncorr);
2165 dt=window[i].dt;
2166 timemax=dt*ncorr;
2167 snew(window[i].tau,window[i].nPull);
2168 restart=(int)(opt->acTrestart/dt+0.5);
2169 if (restart==0)
2170 restart=1;
2172 for (ig=0;ig<window[i].nPull;ig++)
2174 if (ntot != window[i].Ntot[ig])
2175 gmx_fatal(FARGS,"Encountered different nr of frames in different pull groups.\n"
2176 "That should not happen. (%d and %d)\n", ntot,window[i].Ntot[ig]);
2177 ztime=window[i].ztime[ig];
2179 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2180 for(j=0, av=0; (j<ntot); j++)
2181 av+=ztime[j];
2182 av/=ntot;
2183 for(k=0; (k<ncorr); k++)
2185 corr[k]=0.;
2186 count[k]=0;
2188 for(j=0; (j<ntot); j+=restart)
2189 for(k=0; (k<ncorr) && (j+k < ntot); k++)
2191 tmp=(ztime[j]-av)*(ztime[j+k]-av);
2192 corr [k] += tmp;
2193 /* corrSq[k] += tmp*tmp; */
2194 count[k]++;
2196 /* divide by nr of frames for each time displacement */
2197 for(k=0; (k<ncorr); k++)
2199 /* count probably = (ncorr-k+(restart-1))/restart; */
2200 corr[k] = corr[k]/count[k];
2201 /* variance of autocorrelation function */
2202 /* corrSq[k]=corrSq[k]/count[k]; */
2204 /* normalize such that corr[0] == 0 */
2205 c0=1./corr[0];
2206 for(k=0; (k<ncorr); k++)
2208 corr[k]*=c0;
2209 /* corrSq[k]*=c0*c0; */
2212 /* write ACFs in verbose mode */
2213 if (fpcorr)
2215 for(k=0; (k<ncorr); k++)
2216 fprintf(fpcorr,"%g %g\n",k*dt,corr[k]);
2217 fprintf(fpcorr,"&\n");
2220 /* esimate integrated correlation time, fitting is too unstable */
2221 tausteps = 0.5*corr[0];
2222 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2223 for(j=1; (j<ncorr) && (corr[j]>WHAM_AC_ZERO_LIMIT); j++)
2224 tausteps += corr[j];
2226 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2227 Kumar et al, eq. 28 ff. */
2228 window[i].tau[ig] = tausteps*dt;
2229 window[i].g[ig] = 1+2*tausteps;
2230 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2231 } /* ig loop */
2232 sfree(corr);
2233 sfree(count);
2235 printf(" done\n");
2236 if (fpcorr)
2237 ffclose(fpcorr);
2239 /* plot IACT along reaction coordinate */
2240 fp=xvgropen(fn,"Integrated autocorrelation times","z","IACT [ps]",opt->oenv);
2241 fprintf(fp,"@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2242 fprintf(fp,"# WIN tau(gr1) tau(gr2) ...\n");
2243 for (i=0;i<nwins;i++)
2245 fprintf(fp,"# %3d ",i);
2246 for (ig=0;ig<window[i].nPull;ig++)
2247 fprintf(fp," %11g",window[i].tau[ig]);
2248 fprintf(fp,"\n");
2250 for (i=0;i<nwins;i++)
2251 for (ig=0;ig<window[i].nPull;ig++)
2252 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tau[ig]);
2253 if (opt->sigSmoothIact > 0.0)
2255 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2256 opt->sigSmoothIact);
2257 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2258 smoothIact(window,nwins,opt);
2259 fprintf(fp,"&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2260 fprintf(fp,"@ s1 symbol color 2\n");
2261 for (i=0;i<nwins;i++)
2262 for (ig=0;ig<window[i].nPull;ig++)
2263 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tausmooth[ig]);
2265 ffclose(fp);
2266 printf("Wrote %s\n",fn);
2269 /* compute average and sigma of each umbrella window */
2270 void averageSigma(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2272 int i,ig,ntot,k;
2273 real av,sum2,sig,diff,*ztime,nSamplesIndep;
2275 for (i=0;i<nwins;i++)
2277 snew(window[i].aver, window[i].nPull);
2278 snew(window[i].sigma,window[i].nPull);
2280 ntot=window[i].Ntot[0];
2281 for (ig=0;ig<window[i].nPull;ig++)
2283 ztime=window[i].ztime[ig];
2284 for (k=0, av=0.; k<ntot; k++)
2285 av+=ztime[k];
2286 av/=ntot;
2287 for (k=0, sum2=0.; k<ntot; k++)
2289 diff=ztime[k]-av;
2290 sum2+=diff*diff;
2292 sig=sqrt(sum2/ntot);
2293 window[i].aver[ig]=av;
2295 /* Note: This estimate for sigma is biased from the limited sampling.
2296 Correct sigma by n/(n-1) where n = number of independent
2297 samples. Only possible if IACT is known.
2299 if (window[i].tau)
2301 nSamplesIndep=window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2302 window[i].sigma[ig]=sig * nSamplesIndep/(nSamplesIndep-1);
2304 else
2305 window[i].sigma[ig]=sig;
2306 printf("win %d, aver = %f sig = %f\n",i,av,window[i].sigma[ig]);
2312 /* Use histograms to compute average force on pull group.
2313 In addition, compute the sigma of the histogram.
2315 void computeAverageForce(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt)
2317 int i,j,bins=opt->bins,k;
2318 double dz,min=opt->min,max=opt->max,displAv,displAv2,temp,distance,ztot,ztot_half,w,weight;
2319 double posmirrored;
2321 dz=(max-min)/bins;
2322 ztot=opt->max-min;
2323 ztot_half=ztot/2;
2325 /* Compute average displacement from histograms */
2326 for(j=0;j<nWindows;++j)
2328 snew(window[j].forceAv,window[j].nPull);
2329 for(k=0;k<window[j].nPull;++k)
2331 displAv = 0.0;
2332 displAv2 = 0.0;
2333 weight = 0.0;
2334 for(i=0;i<opt->bins;++i)
2336 temp=(1.0*i+0.5)*dz+min;
2337 distance = temp - window[j].pos[k];
2338 if (opt->bCycl)
2339 { /* in cyclic wham: */
2340 if (distance > ztot_half) /* |distance| < ztot_half */
2341 distance-=ztot;
2342 else if (distance < -ztot_half)
2343 distance+=ztot;
2345 w=window[j].Histo[k][i]/window[j].g[k];
2346 displAv += w*distance;
2347 displAv2 += w*sqr(distance);
2348 weight+=w;
2349 /* Are we near min or max? We are getting wron forces from the histgrams since
2350 the histigrams are zero outside [min,max). Therefore, assume that the position
2351 on the other side of the histomgram center is equally likely. */
2352 if (!opt->bCycl)
2354 posmirrored=window[j].pos[k]-distance;
2355 if (posmirrored>=max || posmirrored<min)
2357 displAv += -w*distance;
2358 displAv2 += w*sqr(-distance);
2359 weight+=w;
2363 displAv /= weight;
2364 displAv2 /= weight;
2366 /* average force from average displacement */
2367 window[j].forceAv[k] = displAv*window[j].k[k];
2368 /* sigma from average square displacement */
2369 /* window[j].sigma [k] = sqrt(displAv2); */
2370 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2375 /* Check if the complete reaction coordinate is covered by the histograms */
2376 void checkReactionCoordinateCovered(t_UmbrellaWindow *window,int nwins,
2377 t_UmbrellaOptions *opt)
2379 int i,ig,j,bins=opt->bins,bBoundary;
2380 real avcount=0,z,relcount,*count;
2381 snew(count,opt->bins);
2383 for(j=0;j<opt->bins;++j)
2385 for (i=0;i<nwins;i++){
2386 for (ig=0;ig<window[i].nPull;ig++)
2387 count[j]+=window[i].Histo[ig][j];
2389 avcount+=1.0*count[j];
2391 avcount/=bins;
2392 for(j=0;j<bins;++j)
2394 relcount=count[j]/avcount;
2395 z=(j+0.5)*opt->dz+opt->min;
2396 bBoundary=( j<bins/20 || (bins-j)>bins/20 );
2397 /* check for bins with no data */
2398 if (count[j] == 0)
2399 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2400 "You may not get a reasonable profile. Check your histograms!\n",j,z);
2401 /* and check for poor sampling */
2402 else if (relcount<0.005 && !bBoundary)
2403 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n",j,z);
2405 sfree(count);
2409 void guessPotByIntegration(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt,
2410 char *fn)
2412 int i,j,ig,bins=opt->bins,nHist,winmin,groupmin;
2413 double dz,min=opt->min,*pot,pos,hispos,dist,diff,fAv,distmin,*f;
2414 FILE *fp;
2416 dz=(opt->max-min)/bins;
2418 printf("Getting initial potential by integration.\n");
2420 /* Compute average displacement from histograms */
2421 computeAverageForce(window,nWindows,opt);
2423 /* Get force for each bin from all histograms in this bin, or, alternatively,
2424 if no histograms are inside this bin, from the closest histogram */
2425 snew(pot,bins);
2426 snew(f,bins);
2427 for(j=0;j<opt->bins;++j)
2429 pos=(1.0*j+0.5)*dz+min;
2430 nHist=0;
2431 fAv=0.;
2432 distmin=1e20;
2433 groupmin=winmin=0;
2434 for (i=0;i<nWindows;i++)
2436 for (ig=0;ig<window[i].nPull;ig++)
2438 hispos=window[i].pos[ig];
2439 dist=fabs(hispos-pos);
2440 /* average force within bin */
2441 if (dist < dz/2)
2443 nHist++;
2444 fAv+=window[i].forceAv[ig];
2446 /* at the same time, rememer closest histogram */
2447 if (dist<distmin){
2448 winmin=i;
2449 groupmin=ig;
2450 distmin=dist;
2454 /* if no histogram found in this bin, use closest histogram */
2455 if (nHist>0)
2456 fAv=fAv/nHist;
2457 else{
2458 fAv=window[winmin].forceAv[groupmin];
2460 f[j]=fAv;
2462 for(j=1;j<opt->bins;++j)
2463 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2465 /* cyclic wham: linearly correct possible offset */
2466 if (opt->bCycl)
2468 diff=(pot[bins-1]-pot[0])/(bins-1);
2469 for(j=1;j<opt->bins;++j)
2470 pot[j]-=j*diff;
2472 if (opt->verbose)
2474 fp=xvgropen("pmfintegrated.xvg","PMF from force integration","z","PMF [kJ/mol]",opt->oenv);
2475 for(j=0;j<opt->bins;++j)
2476 fprintf(fp,"%g %g\n",(j+0.5)*dz+opt->min,pot[j]);
2477 ffclose(fp);
2478 printf("verbose mode: wrote %s with PMF from interated forces\n","pmfintegrated.xvg");
2481 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2482 energy offsets which are usually determined by wham
2483 First: turn pot into probabilities:
2485 for(j=0;j<opt->bins;++j)
2486 pot[j]=exp(-pot[j]/(8.314e-3*opt->Temperature));
2487 calc_z(pot,window,nWindows,opt,TRUE);
2489 sfree(pot);
2490 sfree(f);
2494 int gmx_wham(int argc,char *argv[])
2496 const char *desc[] = {
2497 "This is an analysis program that implements the Weighted",
2498 "Histogram Analysis Method (WHAM). It is intended to analyze",
2499 "output files generated by umbrella sampling simulations to ",
2500 "compute a potential of mean force (PMF). [PAR] ",
2501 "At present, three input modes are supported.[BR]",
2502 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
2503 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
2504 " AND, with option [TT]-ix[tt], a file which contains file names of",
2505 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
2506 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
2507 " first pullx, etc.[BR]",
2508 "[TT]*[tt] Same as the previous input mode, except that the the user",
2509 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
2510 " From the pull force the position in the umbrella potential is",
2511 " computed. This does not work with tabulated umbrella potentials.[BR]"
2512 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
2513 " the GROMACS 3.3 umbrella output files. If you have some unusual"
2514 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
2515 " feed them with the [TT]-ip[tt] option into to [TT]g_wham[tt]. The [TT].pdo[tt] file header",
2516 " must be similar to the following:[PAR]",
2517 "[TT]# UMBRELLA 3.0[BR]",
2518 "# Component selection: 0 0 1[BR]",
2519 "# nSkip 1[BR]",
2520 "# Ref. Group 'TestAtom'[BR]",
2521 "# Nr. of pull groups 2[BR]",
2522 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
2523 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
2524 "#####[tt][PAR]",
2525 "The number of pull groups, umbrella positions, force constants, and names ",
2526 "may (of course) differ. Following the header, a time column and ",
2527 "a data column for each pull group follows (i.e. the displacement",
2528 "with respect to the umbrella center). Up to four pull groups are possible ",
2529 "per [TT].pdo[tt] file at present.[PAR]",
2530 "By default, the output files are[BR]",
2531 " [TT]-o[tt] PMF output file[BR]",
2532 " [TT]-hist[tt] Histograms output file[BR]",
2533 "Always check whether the histograms sufficiently overlap.[PAR]",
2534 "The umbrella potential is assumed to be harmonic and the force constants are ",
2535 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
2536 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
2537 "WHAM OPTIONS[BR]------------[BR]",
2538 " [TT]-bins[tt] Number of bins used in analysis[BR]",
2539 " [TT]-temp[tt] Temperature in the simulations[BR]",
2540 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
2541 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
2542 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
2543 "The data points that are used to compute the profile",
2544 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
2545 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
2546 "umbrella window.[PAR]",
2547 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
2548 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
2549 "With energy output, the energy in the first bin is defined to be zero. ",
2550 "If you want the free energy at a different ",
2551 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
2552 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
2553 "without osmotic gradient), the option [TT]-cycl[tt] is useful. [TT]g_wham[tt] will make use of the ",
2554 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
2555 "reaction coordinate will assumed be be neighbors.[PAR]",
2556 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
2557 "which may be useful for, e.g. membranes.[PAR]",
2558 "AUTOCORRELATIONS[BR]----------------[BR]",
2559 "With [TT]-ac[tt], [TT]g_wham[tt] estimates the integrated autocorrelation ",
2560 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
2561 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
2562 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
2563 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
2564 "Because the IACTs can be severely underestimated in case of limited ",
2565 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
2566 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
2567 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
2568 "integration of the ACFs while the ACFs are larger 0.05.",
2569 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
2570 "less robust) method such as fitting to a double exponential, you can ",
2571 "compute the IACTs with [TT]g_analyze[tt] and provide them to [TT]g_wham[tt] with the file ",
2572 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
2573 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
2574 "ERROR ANALYSIS[BR]--------------[BR]",
2575 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
2576 "otherwise the statistical error may be substantially underestimated. ",
2577 "More background and examples for the bootstrap technique can be found in ",
2578 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
2579 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
2580 "Four bootstrapping methods are supported and ",
2581 "selected with [TT]-bs-method[tt].[BR]",
2582 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
2583 "data points, and the bootstrap is carried out by assigning random weights to the ",
2584 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
2585 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
2586 "statistical error is underestimated.[BR]",
2587 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
2588 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
2589 "(allowing duplication, i.e. sampling with replacement).",
2590 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
2591 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
2592 "divided into blocks and only histograms within each block are mixed. Note that ",
2593 "the histograms within each block must be representative for all possible histograms, ",
2594 "otherwise the statistical error is underestimated.[BR]",
2595 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
2596 "such that the generated data points are distributed according the given histograms ",
2597 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
2598 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
2599 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
2600 "Note that this method may severely underestimate the error in case of limited sampling, ",
2601 "that is if individual histograms do not represent the complete phase space at ",
2602 "the respective positions.[BR]",
2603 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
2604 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
2605 "and width of the umbrella histograms. That method yields similar error estimates ",
2606 "like method [TT]traj[tt].[PAR]"
2607 "Bootstrapping output:[BR]",
2608 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
2609 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
2610 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
2611 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
2612 "the histograms."
2615 const char *en_unit[]={NULL,"kJ","kCal","kT",NULL};
2616 const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",NULL};
2617 const char *en_bsMethod[]={ NULL,"b-hist", "hist", "traj", "traj-gauss", NULL };
2619 static t_UmbrellaOptions opt;
2621 t_pargs pa[] = {
2622 { "-min", FALSE, etREAL, {&opt.min},
2623 "Minimum coordinate in profile"},
2624 { "-max", FALSE, etREAL, {&opt.max},
2625 "Maximum coordinate in profile"},
2626 { "-auto", FALSE, etBOOL, {&opt.bAuto},
2627 "Determine min and max automatically"},
2628 { "-bins",FALSE, etINT, {&opt.bins},
2629 "Number of bins in profile"},
2630 { "-temp", FALSE, etREAL, {&opt.Temperature},
2631 "Temperature"},
2632 { "-tol", FALSE, etREAL, {&opt.Tolerance},
2633 "Tolerance"},
2634 { "-v", FALSE, etBOOL, {&opt.verbose},
2635 "Verbose mode"},
2636 { "-b", FALSE, etREAL, {&opt.tmin},
2637 "First time to analyse (ps)"},
2638 { "-e", FALSE, etREAL, {&opt.tmax},
2639 "Last time to analyse (ps)"},
2640 { "-dt", FALSE, etREAL, {&opt.dt},
2641 "Analyse only every dt ps"},
2642 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
2643 "Write histograms and exit"},
2644 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
2645 "Determine min and max and exit (with [TT]-auto[tt])"},
2646 { "-log", FALSE, etBOOL, {&opt.bLog},
2647 "Calculate the log of the profile before printing"},
2648 { "-unit", FALSE, etENUM, {en_unit},
2649 "Energy unit in case of log output" },
2650 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
2651 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
2652 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
2653 "Create cyclic/periodic profile. Assumes min and max are the same point."},
2654 { "-sym", FALSE, etBOOL, {&opt.bSym},
2655 "Symmetrize profile around z=0"},
2656 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
2657 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
2658 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
2659 "Calculate integrated autocorrelation times and use in wham"},
2660 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
2661 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
2662 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
2663 "When computing autocorrelation functions, restart computing every .. (ps)"},
2664 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
2665 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
2666 "during smoothing"},
2667 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
2668 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
2669 { "-bs-method", FALSE, etENUM, {en_bsMethod},
2670 "Bootstrap method" },
2671 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
2672 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
2673 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
2674 "Seed for bootstrapping. (-1 = use time)"},
2675 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
2676 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
2677 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
2678 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
2679 { "-stepout", FALSE, etINT, {&opt.stepchange},
2680 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
2681 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
2682 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
2685 t_filenm fnm[] = {
2686 { efDAT, "-ix","pullx-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2687 { efDAT, "-if","pullf-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2688 { efDAT, "-it","tpr-files",ffOPTRD}, /* wham input: tprs */
2689 { efDAT, "-ip","pdo-files",ffOPTRD}, /* wham input: pdo files (gmx3 style) */
2690 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
2691 { efXVG, "-hist","histo", ffWRITE}, /* output file for histograms */
2692 { efXVG, "-oiact","iact",ffOPTWR}, /* writing integrated autocorrelation times */
2693 { efDAT, "-iiact","iact-in",ffOPTRD}, /* reading integrated autocorrelation times */
2694 { efXVG, "-bsres","bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
2695 { efXVG, "-bsprof","bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
2696 { efDAT, "-tab","umb-pot",ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
2699 int i,j,l,nfiles,nwins,nfiles2;
2700 t_UmbrellaHeader header;
2701 t_UmbrellaWindow * window=NULL;
2702 double *profile,maxchange=1e20;
2703 gmx_bool bMinSet,bMaxSet,bAutoSet,bExact=FALSE;
2704 char **fninTpr,**fninPull,**fninPdo;
2705 const char *fnPull;
2706 FILE *histout,*profout;
2707 char ylabel[256],title[256];
2709 #define NFILE asize(fnm)
2711 CopyRight(stderr,argv[0]);
2713 opt.bins=200;
2714 opt.verbose=FALSE;
2715 opt.bHistOnly=FALSE;
2716 opt.bCycl=FALSE;
2717 opt.tmin=50;
2718 opt.tmax=1e20;
2719 opt.dt=0.0;
2720 opt.min=0;
2721 opt.max=0;
2722 opt.bAuto=TRUE;
2724 /* bootstrapping stuff */
2725 opt.nBootStrap=0;
2726 opt.bsMethod=bsMethod_hist;
2727 opt.tauBootStrap=0.0;
2728 opt.bsSeed=-1;
2729 opt.histBootStrapBlockLength=8;
2730 opt.bs_verbose=FALSE;
2732 opt.bLog=TRUE;
2733 opt.unit=en_kJ;
2734 opt.zProf0=0.;
2735 opt.Temperature=298;
2736 opt.Tolerance=1e-6;
2737 opt.bBoundsOnly=FALSE;
2738 opt.bSym=FALSE;
2739 opt.bCalcTauInt=FALSE;
2740 opt.sigSmoothIact=0.0;
2741 opt.bAllowReduceIact=TRUE;
2742 opt.bInitPotByIntegration=TRUE;
2743 opt.acTrestart=1.0;
2744 opt.stepchange=100;
2745 opt.stepUpdateContrib=100;
2747 parse_common_args(&argc,argv,PCA_BE_NICE,
2748 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&opt.oenv);
2750 opt.unit=nenum(en_unit);
2751 opt.bsMethod=nenum(en_bsMethod);
2753 opt.bProf0Set=opt2parg_bSet("-zprof0", asize(pa), pa);
2755 opt.bTab=opt2bSet("-tab",NFILE,fnm);
2756 opt.bPdo=opt2bSet("-ip",NFILE,fnm);
2757 opt.bTpr=opt2bSet("-it",NFILE,fnm);
2758 opt.bPullx=opt2bSet("-ix",NFILE,fnm);
2759 opt.bPullf=opt2bSet("-if",NFILE,fnm);
2760 opt.bTauIntGiven=opt2bSet("-iiact",NFILE,fnm);
2761 if (opt.bTab && opt.bPullf)
2762 gmx_fatal(FARGS,"Force input does not work with tabulated potentials. "
2763 "Provide pullx.xvg or pdo files!");
2765 #define WHAMBOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
2766 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx,opt.bPullf))
2767 gmx_fatal(FARGS,"Give either pullx (-ix) OR pullf (-if) data. Not both.");
2768 if ( !opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
2769 gmx_fatal(FARGS,"g_wham supports three input modes, pullx, pullf, or pdo file input."
2770 "\n\n Check g_wham -h !");
2772 opt.fnPdo=opt2fn("-ip",NFILE,fnm);
2773 opt.fnTpr=opt2fn("-it",NFILE,fnm);
2774 opt.fnPullf=opt2fn("-if",NFILE,fnm);
2775 opt.fnPullx=opt2fn("-ix",NFILE,fnm);
2777 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
2778 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
2779 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
2780 if ( (bMinSet || bMaxSet) && bAutoSet)
2781 gmx_fatal(FARGS,"With -auto, do not give -min or -max\n");
2783 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
2784 gmx_fatal(FARGS,"When giving -min, you must give -max (and vice versa), too\n");
2786 if (bMinSet && opt.bAuto)
2788 printf("Note: min and max given, switching off -auto.\n");
2789 opt.bAuto=FALSE;
2792 if (opt.bTauIntGiven && opt.bCalcTauInt)
2793 gmx_fatal(FARGS,"Either read (option -iiact) or calculate (option -ac) the\n"
2794 "the autocorrelation times. Not both.");
2796 if (opt.tauBootStrap>0.0 && opt2parg_bSet("-ac",asize(pa), pa))
2797 gmx_fatal(FARGS,"Either compute autocorrelation times (ACTs) (option -ac) or "
2798 "provide it with -bs-tau for bootstrapping. Not Both.\n");
2799 if (opt.tauBootStrap>0.0 && opt2bSet("-iiact",NFILE,fnm))
2800 gmx_fatal(FARGS,"Either provide autocorrelation times (ACTs) with file iact-in.dat "
2801 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
2804 /* Reading gmx4 pull output and tpr files */
2805 if (opt.bTpr || opt.bPullf || opt.bPullx)
2807 read_wham_in(opt.fnTpr,&fninTpr,&nfiles,&opt);
2809 fnPull=opt.bPullf ? opt.fnPullf : opt.fnPullx;
2810 read_wham_in(fnPull,&fninPull,&nfiles2,&opt);
2811 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
2812 nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
2813 if (nfiles!=nfiles2)
2814 gmx_fatal(FARGS,"Found %d file names in %s, but %d in %s\n",nfiles,
2815 opt.fnTpr,nfiles2,fnPull);
2816 window=initUmbrellaWindows(nfiles);
2817 read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, window, &opt);
2819 else
2820 { /* reading pdo files */
2821 read_wham_in(opt.fnPdo,&fninPdo,&nfiles,&opt);
2822 printf("Found %d pdo files in %s\n",nfiles,opt.fnPdo);
2823 window=initUmbrellaWindows(nfiles);
2824 read_pdo_files(fninPdo,nfiles, &header, window, &opt);
2826 nwins=nfiles;
2828 /* enforce equal weight for all histograms? */
2829 if (opt.bHistEq)
2830 enforceEqualWeights(window,nwins);
2832 /* write histograms */
2833 histout=xvgropen(opt2fn("-hist",NFILE,fnm),"Umbrella histograms",
2834 "z","count",opt.oenv);
2835 for(l=0;l<opt.bins;++l)
2837 fprintf(histout,"%e\t",(double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
2838 for(i=0;i<nwins;++i)
2840 for(j=0;j<window[i].nPull;++j)
2842 fprintf(histout,"%e\t",window[i].Histo[j][l]);
2845 fprintf(histout,"\n");
2847 ffclose(histout);
2848 printf("Wrote %s\n",opt2fn("-hist",NFILE,fnm));
2849 if (opt.bHistOnly)
2851 printf("Wrote histograms to %s, now exiting.\n",opt2fn("-hist",NFILE,fnm));
2852 return 0;
2855 /* Using tabulated umbrella potential */
2856 if (opt.bTab)
2857 setup_tab(opt2fn("-tab",NFILE,fnm),&opt);
2859 /* Integrated autocorrelation times provided ? */
2860 if (opt.bTauIntGiven)
2861 readIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-iiact",NFILE,fnm));
2863 /* Compute integrated autocorrelation times */
2864 if (opt.bCalcTauInt)
2865 calcIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-oiact",NFILE,fnm));
2867 /* calc average and sigma for each histogram
2868 (maybe required for bootstrapping. If not, this is fast anyhow) */
2869 if (opt.nBootStrap && opt.bsMethod==bsMethod_trajGauss)
2870 averageSigma(window,nwins,&opt);
2872 /* Get initial potential by simple integration */
2873 if (opt.bInitPotByIntegration)
2874 guessPotByIntegration(window,nwins,&opt,0);
2876 /* Check if complete reaction coordinate is covered */
2877 checkReactionCoordinateCovered(window,nwins,&opt);
2879 /* Calculate profile */
2880 snew(profile,opt.bins);
2881 if (opt.verbose)
2882 opt.stepchange=1;
2883 i=0;
2886 if ( (i%opt.stepUpdateContrib) == 0)
2887 setup_acc_wham(profile,window,nwins,&opt);
2888 if (maxchange<opt.Tolerance)
2890 bExact=TRUE;
2891 /* if (opt.verbose) */
2892 printf("Switched to exact iteration in iteration %d\n",i);
2894 calc_profile(profile,window,nwins,&opt,bExact);
2895 if (((i%opt.stepchange) == 0 || i==1) && !i==0)
2896 printf("\t%4d) Maximum change %e\n",i,maxchange);
2897 i++;
2898 } while ( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
2899 printf("Converged in %d iterations. Final maximum change %g\n",i,maxchange);
2901 /* calc error from Kumar's formula */
2902 /* Unclear how the error propagates along reaction coordinate, therefore
2903 commented out */
2904 /* calc_error_kumar(profile,window, nwins,&opt); */
2906 /* Write profile in energy units? */
2907 if (opt.bLog)
2909 prof_normalization_and_unit(profile,&opt);
2910 strcpy(ylabel,en_unit_label[opt.unit]);
2911 strcpy(title,"Umbrella potential");
2913 else
2915 strcpy(ylabel,"Density of states");
2916 strcpy(title,"Density of states");
2919 /* symmetrize profile around z=0? */
2920 if (opt.bSym)
2921 symmetrizeProfile(profile,&opt);
2923 /* write profile or density of states */
2924 profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,opt.oenv);
2925 for(i=0;i<opt.bins;++i)
2926 fprintf(profout,"%e\t%e\n",(double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
2927 ffclose(profout);
2928 printf("Wrote %s\n",opt2fn("-o",NFILE,fnm));
2930 /* Bootstrap Method */
2931 if (opt.nBootStrap)
2932 do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm),
2933 opt2fn("-hist",NFILE,fnm),
2934 ylabel, profile, window, nwins, &opt);
2936 sfree(profile);
2937 freeUmbrellaWindows(window,nfiles);
2939 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
2940 please_cite(stdout,"Hub2010");
2942 thanx(stderr);
2943 return 0;