Fixed bugzilla 616 about formatting in the help text.
[gromacs/rigid-bodies.git] / src / tools / gmx_wham.c
blob2c909747b9d5e26528144064574c003c9f40b51d
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
2 /*
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
40 #include <stdio.h>
42 #include "statutil.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "vec.h"
46 #include "copyrite.h"
47 #include "statutil.h"
48 #include "tpxio.h"
49 #include "names.h"
50 #include "gmx_random.h"
51 #include "gmx_ana.h"
53 #ifndef HAVE_STRDUP
54 #define HAVE_STRDUP
55 #endif
56 #include "string2.h"
57 #include "xvgr.h"
60 #define WHAM_MAXFILELEN 2048
62 /* enum for energy units */
63 enum { enSel, en_kJ, en_kCal, en_kT, enNr };
64 /* enum for type of input files (pdos, tpr, or pullf) */
65 enum { whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo };
66 /* enum for bootstrapping method (
67 - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
68 - bootstrap complete histograms
69 - bootstrap trajectories from given umbrella histograms
70 - bootstrap trajectories from Gaussian with mu/sigam computed from
71 the respective histogram
73 ********************************************************************
74 FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
75 JS Hub, BL de Groot, D van der Spoel
76 g_wham - A free weighted histogram analysis implementation including
77 robust error and autocorrelation estimates,
78 J Chem Theory Comput, accepted (2010)
79 ********************************************************************
81 enum { bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
82 bsMethod_traj, bsMethod_trajGauss };
85 typedef struct
87 /* umbrella with pull code of gromacs 4.x */
88 int npullgrps; /* nr of pull groups in tpr file */
89 int pull_geometry; /* such as distance, position */
90 ivec pull_dim; /* pull dimension with geometry distance */
91 int pull_ndim; /* nr of pull_dim != 0 */
92 real *k; /* force constants in tpr file */
93 rvec *init_dist; /* reference displacements */
94 real *umbInitDist; /* reference displacement in umbrella direction */
96 /* From here, old pdo stuff */
97 int nSkip;
98 char Reference[256];
99 int nPull;
100 int nDim;
101 ivec Dims;
102 char PullName[4][256];
103 double UmbPos[4][3];
104 double UmbCons[4][3];
105 } t_UmbrellaHeader;
107 typedef struct
109 int nPull; /* nr of pull groups in this pdo or pullf/x file */
110 double **Histo,**cum; /* nPull histograms and nPull cumulative distr. funct */
111 int nBin; /* nr of bins. identical to opt->bins */
112 double *k; /* force constants for the nPull groups */
113 double *pos; /* umbrella positions for the nPull groups */
114 double *z; /* z=(-Fi/kT) for the nPull groups. These values are
115 iteratively computed during wham */
116 double *N, *Ntot; /* nr of data points in nPull histograms. N and Ntot
117 only differ if bHistEq==TRUE */
119 double *g,*tau,*tausmooth; /* g = 1 + 2*tau[int]/dt where tau is the integrated
120 autocorrelation time. Compare, e.g.
121 Ferrenberg/Swendsen, PRL 63:1195 (1989)
122 Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28 */
124 double dt; /* timestep in the input data. Can be adapted with
125 g_wham option -dt */
126 gmx_bool **bContrib; /* TRUE, if any data point of the histogram is within min
127 and max, otherwise FALSE. */
128 real **ztime; /* input data z(t) as a function of time. Required to
129 compute ACTs */
130 real *forceAv; /* average force estimated from average displacement, fAv=dzAv*k
131 Used for integration to guess the potential. */
132 real *aver,*sigma; /* average and stddev of histograms */
133 double *bsWeight; /* for bootstrapping complete histograms with continuous weights */
134 } t_UmbrellaWindow;
137 typedef struct
139 /* INPUT STUFF */
140 const char *fnTpr,*fnPullf;
141 const char *fnPdo,*fnPullx; /* file names of input */
142 gmx_bool bTpr,bPullf,bPdo,bPullx;/* input file types given? */
143 real tmin, tmax, dt; /* only read input within tmin and tmax with dt */
145 gmx_bool bInitPotByIntegration; /* before WHAM, guess potential by force integration. Yields
146 1.5 to 2 times faster convergence */
147 int stepUpdateContrib; /* update contribution table every ... iterations. Accelerates
148 WHAM. */
150 /* BASIC WHAM OPTIONS */
151 int bins; /* nr of bins, min, max, and dz of profile */
152 real min,max,dz;
153 real Temperature,Tolerance; /* temperature, converged when probability changes less
154 than Tolerance */
155 gmx_bool bCycl; /* generate cyclic (periodic) PMF */
157 /* OUTPUT CONTROL */
158 gmx_bool bLog; /* energy output (instead of probability) for profile */
159 int unit; /* unit for PMF output kJ/mol or kT or kCal/mol */
160 gmx_bool bSym; /* symmetrize PMF around z=0 after WHAM, useful for
161 membranes etc. */
162 real zProf0; /* after wham, set prof to zero at this z-position
163 When bootstrapping, set zProf0 to a "stable" reference
164 position. */
165 gmx_bool bProf0Set; /* setting profile to 0 at zProf0? */
167 gmx_bool bBoundsOnly,bHistOnly; /* determine min and max, or write histograms and exit */
168 gmx_bool bAuto; /* determine min and max automatically but do not exit */
170 gmx_bool verbose; /* more noisy wham mode */
171 int stepchange; /* print maximum change in prof after how many interations */
172 output_env_t oenv; /* xvgr options */
174 /* AUTOCORRELATION STUFF */
175 gmx_bool bTauIntGiven,bCalcTauInt;/* IACT given or should be calculated? */
176 real sigSmoothIact; /* sigma of Gaussian to smooth ACTs */
177 gmx_bool bAllowReduceIact; /* Allow to reduce ACTs during smoothing. Otherwise
178 ACT are only increased during smoothing */
179 real acTrestart; /* when computing ACT, time between restarting points */
180 gmx_bool bHistEq; /* Enforce the same weight for each umbella window, that is
181 calculate with the same number of data points for
182 each window. That can be reasonable, if the histograms
183 have different length, but due to autocorrelation,
184 a longer simulation should not have larger weightin wham. */
186 /* BOOTSTRAPPING STUFF */
187 int nBootStrap; /* nr of bootstraps (50 is usually enough) */
188 int bsMethod; /* if == bsMethod_hist, consider complete histograms as independent
189 data points and, hence, only mix complete histograms.
190 if == bsMethod_BayesianHist, consider complete histograms
191 as independent data points, but assign random weights
192 to the histograms during the bootstrapping ("Bayesian bootstrap")
193 In case of long correlations (e.g., inside a channel), these
194 will yield a more realistic error.
195 if == bsMethod_traj(Gauss), generate synthetic histograms
196 for each given
197 histogram by generating an autocorrelated random sequence
198 that is distributed according to the respective given
199 histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
200 (instead of from the umbrella histogram) to generate a new
201 histogram
203 real tauBootStrap; /* autocorrelation time (ACT) used to generate synthetic
204 histograms. If ==0, use calculated ACF */
205 int histBootStrapBlockLength; /* when mixing histograms, mix only histograms withing blocks
206 long the reaction coordinate xi. Avoids gaps along xi. */
207 int bsSeed; /* random seed for bootstrapping */
208 gmx_bool bs_verbose; /* Write cumulative distribution functions (CDFs) of histograms
209 and write the generated histograms for each bootstrap */
211 /* tabulated umbrella potential stuff */
212 gmx_bool bTab;
213 double *tabX,*tabY,tabMin,tabMax,tabDz;
214 int tabNbins;
216 gmx_rng_t rng; /* gromacs random number generator */
217 } t_UmbrellaOptions;
220 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
222 t_UmbrellaWindow *win;
223 int i;
224 snew(win,nwin);
225 for (i=0; i<nwin; i++)
227 win[i].Histo = win[i].cum = 0;
228 win[i].k = win[i].pos = win[i].z =0;
229 win[i].N = win[i].Ntot = 0;
230 win[i].g = win[i].tau = win[i].tausmooth = 0;
231 win[i].bContrib=0;
232 win[i].ztime=0;
233 win[i].forceAv=0;
234 win[i].aver = win[i].sigma = 0;
235 win[i].bsWeight = 0;
237 return win;
240 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
242 int i,j;
243 for (i=0; i<nwin; i++)
245 if (win[i].Histo)
246 for (j=0;j<win[i].nPull;j++)
247 sfree(win[i].Histo[j]);
248 if (win[i].cum)
249 for (j=0;j<win[i].nPull;j++)
250 sfree(win[i].cum[j]);
251 if (win[i].bContrib)
252 for (j=0;j<win[i].nPull;j++)
253 sfree(win[i].bContrib[j]);
254 sfree(win[i].Histo);
255 sfree(win[i].cum);
256 sfree(win[i].k);
257 sfree(win[i].pos);
258 sfree(win[i].z);
259 sfree(win[i].N);
260 sfree(win[i].Ntot);
261 sfree(win[i].g);
262 sfree(win[i].tau);
263 sfree(win[i].tausmooth);
264 sfree(win[i].bContrib);
265 sfree(win[i].ztime);
266 sfree(win[i].forceAv);
267 sfree(win[i].aver);
268 sfree(win[i].sigma);
269 sfree(win[i].bsWeight);
271 sfree(win);
274 /* Return j such that xx[j] <= x < xx[j+1] */
275 void searchOrderedTable(double xx[], int n, double x, int *j)
277 int ju,jm,jl;
278 int ascending;
280 jl=-1;
281 ju=n;
282 ascending=(xx[n-1] > xx[0]);
283 while (ju-jl > 1)
285 jm=(ju+jl) >> 1;
286 if ((x >= xx[jm]) == ascending)
287 jl=jm;
288 else
289 ju=jm;
291 if (x==xx[0]) *j=0;
292 else if (x==xx[n-1]) *j=n-2;
293 else *j=jl;
296 /* Read and setup tabulated umbrella potential */
297 void setup_tab(const char *fn,t_UmbrellaOptions *opt)
299 int i,ny,nl;
300 double **y;
302 printf("Setting up tabulated potential from file %s\n",fn);
303 nl=read_xvg(fn,&y,&ny);
304 opt->tabNbins=nl;
305 if (ny!=2)
306 gmx_fatal(FARGS,"Found %d columns in %s. Expected 2.\n",ny,fn);
307 opt->tabMin=y[0][0];
308 opt->tabMax=y[0][nl-1];
309 opt->tabDz=(opt->tabMax-opt->tabMin)/(nl-1);
310 if (opt->tabDz<=0)
311 gmx_fatal(FARGS,"The tabulated potential in %s must be provided in \n"
312 "ascending z-direction",fn);
313 for (i=0;i<nl-1;i++)
314 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
315 gmx_fatal(FARGS,"z-values in %s are not equally spaced.\n",ny,fn);
316 snew(opt->tabY,nl);
317 snew(opt->tabX,nl);
318 for (i=0;i<nl;i++)
320 opt->tabX[i]=y[0][i];
321 opt->tabY[i]=y[1][i];
323 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
324 opt->tabMin,opt->tabMax,opt->tabDz);
327 void read_pdo_header(FILE * file,t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
329 char line[2048];
330 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
331 int i;
333 /* line 1 */
334 if (fgets(line,2048,file) == NULL)
335 gmx_fatal(FARGS,"Error reading header from pdo file\n");
336 sscanf(line,"%s%s%s",Buffer0,Buffer1,Buffer2);
337 if(strcmp(Buffer1,"UMBRELLA"))
338 gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s\n"
339 "(Found in first line: `%s')\n",
340 Buffer1, "UMBRELLA",line);
341 if(strcmp(Buffer2,"3.0"))
342 gmx_fatal(FARGS,"This does not appear to be a version 3.0 pdo file");
344 /* line 2 */
345 if (fgets(line,2048,file) == NULL)
346 gmx_fatal(FARGS,"Error reading header from pdo file\n");
347 sscanf(line,"%s%s%s%d%d%d",Buffer0,Buffer1,Buffer2,
348 &(header->Dims[0]),&(header->Dims[1]),&(header->Dims[2]));
350 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
352 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
353 if(header->nDim!=1)
354 gmx_fatal(FARGS,"Currently only supports one dimension");
356 /* line3 */
357 if (fgets(line,2048,file) == NULL)
358 gmx_fatal(FARGS,"Error reading header from pdo file\n");
359 sscanf(line,"%s%s%d",Buffer0,Buffer1,&(header->nSkip));
361 /* line 4 */
362 if (fgets(line,2048,file) == NULL)
363 gmx_fatal(FARGS,"Error reading header from pdo file\n");
364 sscanf(line,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->Reference);
366 /* line 5 */
367 if (fgets(line,2048,file) == NULL)
368 gmx_fatal(FARGS,"Error reading header from pdo file\n");
369 sscanf(line,"%s%s%s%s%s%d",Buffer0,Buffer1,Buffer2,Buffer3,Buffer4,&(header->nPull));
371 if (opt->verbose)
372 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
373 header->Reference);
375 for(i=0;i<header->nPull;++i)
377 if (fgets(line,2048,file) == NULL)
378 gmx_fatal(FARGS,"Error reading header from pdo file\n");
379 sscanf(line,"%*s%*s%*s%s%*s%*s%lf%*s%*s%lf",header->PullName[i]
380 ,&(header->UmbPos[i][0]),&(header->UmbCons[i][0]));
381 if (opt->verbose)
382 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
383 i,header->PullName[i],header->UmbPos[i][0],header->UmbCons[i][0]);
386 if (fgets(line,2048,file) == NULL)
387 gmx_fatal(FARGS,"Cannot read from file\n");
388 sscanf(line,"%s",Buffer3);
389 if (strcmp(Buffer3,"#####") != 0)
390 gmx_fatal(FARGS,"Expected '#####', found %s. Hick.\n",Buffer3);
394 static char *fgets3(FILE *fp,char ptr[],int *len)
396 char *p;
397 int slen;
399 if (fgets(ptr,*len-1,fp) == NULL)
400 return NULL;
401 p = ptr;
402 while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
404 /* This line is longer than len characters, let's increase len! */
405 *len += STRLEN;
406 p += STRLEN;
407 srenew(ptr,*len);
408 if (fgets(p-1,STRLEN,fp) == NULL)
409 break;
411 slen = strlen(ptr);
412 if (ptr[slen-1] == '\n')
413 ptr[slen-1] = '\0';
415 return ptr;
418 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
419 int fileno, t_UmbrellaWindow * win,
420 t_UmbrellaOptions *opt,
421 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
423 int i,inttemp,bins,count,ntot;
424 real min,max,minfound=1e20,maxfound=-1e20;
425 double temp,time,time0=0,dt;
426 char *ptr=0;
427 t_UmbrellaWindow * window=0;
428 gmx_bool timeok,dt_ok=1;
429 char *tmpbuf=0,fmt[256],fmtign[256];
430 int len=STRLEN,dstep=1;
431 const int blocklen=4096;
432 int *lennow=0;
434 if (!bGetMinMax)
436 bins=opt->bins;
437 min=opt->min;
438 max=opt->max;
440 window=win+fileno;
441 /* Need to alocate memory and set up structure */
442 window->nPull=header->nPull;
443 window->nBin=bins;
445 snew(window->Histo,window->nPull);
446 snew(window->z,window->nPull);
447 snew(window->k,window->nPull);
448 snew(window->pos,window->nPull);
449 snew(window->N, window->nPull);
450 snew(window->Ntot, window->nPull);
451 snew(window->g, window->nPull);
452 snew(window->bsWeight, window->nPull);
454 window->bContrib=0;
456 if (opt->bCalcTauInt)
457 snew(window->ztime, window->nPull);
458 else
459 window->ztime=0;
460 snew(lennow,window->nPull);
462 for(i=0;i<window->nPull;++i)
464 window->z[i]=1;
465 window->bsWeight[i]=1.;
466 snew(window->Histo[i],bins);
467 window->k[i]=header->UmbCons[i][0];
468 window->pos[i]=header->UmbPos[i][0];
469 window->N[i]=0;
470 window->Ntot[i]=0;
471 window->g[i]=1.;
472 if (opt->bCalcTauInt)
473 window->ztime[i]=0;
476 /* Done with setup */
478 else
480 minfound=1e20;
481 maxfound=-1e20;
482 min=max=bins=0; /* Get rid of warnings */
485 count=0;
486 snew(tmpbuf,len);
487 while ( (ptr=fgets3(file,tmpbuf,&len)) != NULL)
489 trim(ptr);
491 if (ptr[0] == '#' || strlen(ptr)<2)
492 continue;
494 /* Initiate format string */
495 fmtign[0] = '\0';
496 strcat(fmtign,"%*s");
498 sscanf(ptr,"%lf",&time); /* printf("Time %f\n",time); */
499 /* Round time to fs */
500 time=1.0/1000*( (int) (time*1000+0.5) );
502 /* get time step of pdo file */
503 if (count==0)
504 time0=time;
505 else if (count==1)
507 dt=time-time0;
508 if (opt->dt>0.0)
510 dstep=(int)(opt->dt/dt+0.5);
511 if (dstep==0)
512 dstep=1;
514 if (!bGetMinMax)
515 window->dt=dt*dstep;
517 count++;
519 dt_ok=((count-1)%dstep == 0);
520 timeok=(dt_ok && time >= opt->tmin && time <= opt->tmax);
521 /* if (opt->verbose)
522 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
523 time,opt->tmin, opt->tmax, dt_ok,timeok); */
525 if (timeok)
527 for(i=0;i<header->nPull;++i)
529 strcpy(fmt,fmtign);
530 strcat(fmt,"%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
531 strcat(fmtign,"%*s"); /* ignoring one more entry in the next loop */
532 if(sscanf(ptr,fmt,&temp))
534 temp+=header->UmbPos[i][0];
535 if (bGetMinMax)
537 if (temp<minfound)
538 minfound=temp;
539 if (temp>maxfound)
540 maxfound=temp;
542 else{
543 if (opt->bCalcTauInt)
545 /* save time series for autocorrelation analysis */
546 ntot=window->Ntot[i];
547 if (ntot>=lennow[i])
549 lennow[i]+=blocklen;
550 srenew(window->ztime[i],lennow[i]);
552 window->ztime[i][ntot]=temp;
555 temp-=min;
556 temp/=(max-min);
557 temp*=bins;
558 temp=floor(temp);
560 inttemp = (int)temp;
561 if (opt->bCycl)
563 if (inttemp < 0)
564 inttemp+=bins;
565 else if (inttemp >= bins)
566 inttemp-=bins;
569 if(inttemp >= 0 && inttemp < bins)
571 window->Histo[i][inttemp]+=1.;
572 window->N[i]++;
574 window->Ntot[i]++;
579 if (time>opt->tmax)
581 if (opt->verbose)
582 printf("time %f larger than tmax %f, stop reading pdo file\n",time,opt->tmax);
583 break;
587 if (bGetMinMax)
589 *mintmp=minfound;
590 *maxtmp=maxfound;
593 sfree(lennow);
594 sfree(tmpbuf);
597 void enforceEqualWeights(t_UmbrellaWindow * window,int nWindows)
599 int i,k,j,NEnforced;
600 double ratio;
602 NEnforced=window[0].Ntot[0];
603 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
604 "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
605 /* enforce all histograms to have the same weight as the very first histogram */
607 for(j=0;j<nWindows;++j)
609 for(k=0;k<window[j].nPull;++k)
611 ratio=1.0*NEnforced/window[j].Ntot[k];
612 for(i=0;i<window[0].nBin;++i)
614 window[j].Histo[k][i]*=ratio;
616 window[j].N[k]=(int)(ratio*window[j].N[k] + 0.5);
621 /* Simple linear interpolation between two given tabulated points */
622 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
624 int jl,ju;
625 double pl,pu,dz,dp;
627 jl=floor((dist-opt->tabMin)/opt->tabDz);
628 ju=jl+1;
629 if (jl<0 || ju>=opt->tabNbins)
631 gmx_fatal(FARGS,"Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
632 "Provide an extended table.",dist,jl,ju);
634 pl=opt->tabY[jl];
635 pu=opt->tabY[ju];
636 dz=dist-opt->tabX[jl];
637 dp=(pu-pl)*dz/opt->tabDz;
638 return pl+dp;
642 /* Don't worry, that routine does not mean we compute the PMF in limited precision.
643 After rapid convergence (using only substiantal contributions), we always switch to
644 full precision. */
645 void setup_acc_wham(double *profile,t_UmbrellaWindow * window,int nWindows,
646 t_UmbrellaOptions *opt)
648 int i,j,k,nGrptot=0,nContrib=0,nTot=0;
649 double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib1,contrib2;
650 gmx_bool bAnyContrib;
651 static int bFirst=1;
652 static double wham_contrib_lim;
654 if (bFirst)
656 for(i=0;i<nWindows;++i)
658 nGrptot+=window[i].nPull;
660 wham_contrib_lim=opt->Tolerance/nGrptot;
663 ztot=opt->max-opt->min;
664 ztot_half=ztot/2;
666 for(i=0;i<nWindows;++i)
668 if ( ! window[i].bContrib)
670 snew(window[i].bContrib,window[i].nPull);
672 for(j=0;j<window[i].nPull;++j)
674 if ( ! window[i].bContrib[j])
675 snew(window[i].bContrib[j],opt->bins);
676 bAnyContrib=FALSE;
677 for(k=0;k<opt->bins;++k)
679 temp=(1.0*k+0.5)*dz+min;
680 distance = temp - window[i].pos[j]; /* distance to umbrella center */
681 if (opt->bCycl)
682 { /* in cyclic wham: */
683 if (distance > ztot_half) /* |distance| < ztot_half */
684 distance-=ztot;
685 else if (distance < -ztot_half)
686 distance+=ztot;
688 /* Note: there are two contributions to bin k in the wham equations:
689 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
690 ii) exp(- U/(8.314e-3*opt->Temperature))
691 where U is the umbrella potential
692 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
695 if (!opt->bTab)
696 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
697 else
698 U=tabulated_pot(distance,opt); /* Use tabulated potential */
700 contrib1=profile[k]*exp(- U/(8.314e-3*opt->Temperature));
701 contrib2=window[i].N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j]);
702 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
703 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
704 if (window[i].bContrib[j][k])
705 nContrib++;
706 nTot++;
708 /* If this histo is far outside min and max all bContrib may be FALSE,
709 causing a floating point exception later on. To avoid that, switch
710 them all to true.*/
711 if (!bAnyContrib)
712 for(k=0;k<opt->bins;++k)
713 window[i].bContrib[j][k]=TRUE;
716 if (bFirst)
717 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
718 "Evaluating only %d of %d expressions.\n\n",wham_contrib_lim,nContrib, nTot);
720 if (opt->verbose)
721 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
722 nContrib,nTot);
723 bFirst=0;
727 void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows,
728 t_UmbrellaOptions *opt, gmx_bool bExact)
730 int i,k,j;
731 double num,ztot_half,ztot,distance,min=opt->min,dz=opt->dz;
732 double denom,U=0,temp=0,invg;
734 ztot=opt->max-opt->min;
735 ztot_half=ztot/2;
737 for(i=0;i<opt->bins;++i)
739 num=denom=0.;
740 for(j=0;j<nWindows;++j)
742 for(k=0;k<window[j].nPull;++k)
744 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
745 temp = (1.0*i+0.5)*dz+min;
746 num += invg*window[j].Histo[k][i];
748 if (! (bExact || window[j].bContrib[k][i]))
749 continue;
750 distance = temp - window[j].pos[k]; /* distance to umbrella center */
751 if (opt->bCycl)
752 { /* in cyclic wham: */
753 if (distance > ztot_half) /* |distance| < ztot_half */
754 distance-=ztot;
755 else if (distance < -ztot_half)
756 distance+=ztot;
759 if (!opt->bTab)
760 U=0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
761 else
762 U=tabulated_pot(distance,opt); /* Use tabulated potential */
763 denom+=invg*window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
766 profile[i]=num/denom;
771 double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows,
772 t_UmbrellaOptions *opt, gmx_bool bExact)
774 int i,j,k,binMax=-1;
775 double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,totalMax;
776 double MAX=-1e20, total=0;
778 ztot=opt->max-opt->min;
779 ztot_half=ztot/2;
781 for(i=0;i<nWindows;++i)
783 for(j=0;j<window[i].nPull;++j)
785 total=0;
786 for(k=0;k<window[i].nBin;++k)
788 if (! (bExact || window[i].bContrib[j][k]))
789 continue;
790 temp=(1.0*k+0.5)*dz+min;
791 distance = temp - window[i].pos[j]; /* distance to umbrella center */
792 if (opt->bCycl)
793 { /* in cyclic wham: */
794 if (distance > ztot_half) /* |distance| < ztot_half */
795 distance-=ztot;
796 else if (distance < -ztot_half)
797 distance+=ztot;
800 if (!opt->bTab)
801 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
802 else
803 U=tabulated_pot(distance,opt); /* Use tabulated potential */
805 total+=profile[k]*exp(-U/(8.314e-3*opt->Temperature));
807 /* Avoid floating point exception if window is far outside min and max */
808 if (total != 0.0)
809 total = -log(total);
810 else
811 total = 1000.0;
812 temp = fabs(total - window[i].z[j]);
813 if(temp > MAX){
814 MAX=temp;
815 binMax=k;
816 totalMax=total;
818 window[i].z[j] = total;
821 return MAX;
824 void symmetrizeProfile(double* profile,t_UmbrellaOptions *opt)
826 int i,j;
827 double *prof2,bins=opt->bins,min=opt->min,max=opt->max,dz=opt->dz,zsym,deltaz,profsym;
828 double z,z1;
830 if (min>0. || max<0.)
831 gmx_fatal(FARGS,"Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
832 opt->min,opt->max);
834 snew(prof2,bins);
836 for (i=0;i<bins;i++)
838 z=min+(i+0.5)*dz;
839 zsym=-z;
840 /* bin left of zsym */
841 j=floor((zsym-min)/dz-0.5);
842 if (j>=0 && (j+1)<bins)
844 /* interpolate profile linearly between bins j and j+1 */
845 z1=min+(j+0.5)*dz;
846 deltaz=zsym-z1;
847 profsym=profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
848 /* average between left and right */
849 prof2[i]=0.5*(profsym+profile[i]);
851 else
853 prof2[i]=profile[i];
857 memcpy(profile,prof2,bins*sizeof(double));
858 sfree(prof2);
861 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
863 int i,bins,imin;
864 double unit_factor=1., R_MolarGasConst, diff;
866 R_MolarGasConst=8.314472e-3; /* in kJ/(mol*K) */
867 bins=opt->bins;
869 /* Not log? Nothing to do! */
870 if (!opt->bLog)
871 return;
873 /* Get profile in units of kT, kJ/mol, or kCal/mol */
874 if (opt->unit == en_kT)
875 unit_factor=1.0;
876 else if (opt->unit == en_kJ)
877 unit_factor=R_MolarGasConst*opt->Temperature;
878 else if (opt->unit == en_kCal)
879 unit_factor=R_MolarGasConst*opt->Temperature/4.1868;
880 else
881 gmx_fatal(FARGS,"Sorry, I don't know this energy unit.");
883 for (i=0;i<bins;i++)
884 if (profile[i]>0.0)
885 profile[i]=-log(profile[i])*unit_factor;
887 /* shift to zero at z=opt->zProf0 */
888 if (!opt->bProf0Set)
890 diff=profile[0];
892 else
894 /* Get bin with shortest distance to opt->zProf0
895 (-0.5 from bin position and +0.5 from rounding cancel) */
896 imin=(int)((opt->zProf0-opt->min)/opt->dz);
897 if (imin<0)
898 imin=0;
899 else if (imin>=bins)
900 imin=bins-1;
901 diff=profile[imin];
904 /* Shift to zero */
905 for (i=0;i<bins;i++)
906 profile[i]-=diff;
909 void getRandomIntArray(int nPull,int blockLength,int* randomArray,gmx_rng_t rng)
911 int ipull,blockBase,nr,ipullRandom;
913 if (blockLength==0)
914 blockLength=nPull;
916 for (ipull=0; ipull<nPull; ipull++)
918 blockBase=(ipull/blockLength)*blockLength;
920 { /* make sure nothing bad happens in the last block */
921 nr=(int)(gmx_rng_uniform_real(rng)*blockLength);
922 ipullRandom = blockBase + nr;
923 } while (ipullRandom >= nPull);
924 if (ipullRandom<0 || ipullRandom>=nPull)
925 gmx_fatal(FARGS,"Ups, random iWin = %d, nPull = %d, nr = %d, "
926 "blockLength = %d, blockBase = %d\n",
927 ipullRandom,nPull,nr,blockLength,blockBase);
928 randomArray[ipull]=ipullRandom;
930 /*for (ipull=0; ipull<nPull; ipull++)
931 printf("%d ",randomArray[ipull]); printf("\n"); */
934 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
935 t_UmbrellaWindow *thisWindow,int pullid)
937 synthWindow->N [0]=thisWindow->N [pullid];
938 synthWindow->Histo [0]=thisWindow->Histo [pullid];
939 synthWindow->pos [0]=thisWindow->pos [pullid];
940 synthWindow->z [0]=thisWindow->z [pullid];
941 synthWindow->k [0]=thisWindow->k [pullid];
942 synthWindow->bContrib[0]=thisWindow->bContrib [pullid];
943 synthWindow->g [0]=thisWindow->g [pullid];
944 synthWindow->bsWeight[0]=thisWindow->bsWeight [pullid];
947 /* Calculate cumulative distribution function of of all histograms. They
948 allow to create random number sequences
949 which are distributed according to the histograms. Required to generate
950 the "synthetic" histograms for the Bootstrap method */
951 void calc_cumulatives(t_UmbrellaWindow *window,int nWindows,
952 t_UmbrellaOptions *opt,const char *fnhist)
954 int i,j,k,nbin;
955 double last;
956 char *fn=0,*buf=0;
957 FILE *fp=0;
959 if (opt->bs_verbose)
961 snew(fn,strlen(fnhist)+10);
962 snew(buf,strlen(fnhist)+10);
963 sprintf(fn,"%s_cumul.xvg",strncpy(buf,fnhist,strlen(fnhist)-4));
964 fp=xvgropen(fn,"CDFs of umbrella windows","z","CDF",opt->oenv);
967 nbin=opt->bins;
968 for (i=0; i<nWindows; i++)
970 snew(window[i].cum,window[i].nPull);
971 for (j=0; j<window[i].nPull; j++)
973 snew(window[i].cum[j],nbin+1);
974 window[i].cum[j][0]=0.;
975 for (k=1; k<=nbin; k++)
976 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
978 /* normalize CDFs. Ensure cum[nbin]==1 */
979 last = window[i].cum[j][nbin];
980 for (k=0; k<=nbin; k++)
981 window[i].cum[j][k] /= last;
985 printf("Cumulative distriubtion functions of all histograms created.\n");
986 if (opt->bs_verbose)
988 for (k=0; k<=nbin; k++)
990 fprintf(fp,"%g\t",opt->min+k*opt->dz);
991 for (i=0; i<nWindows; i++)
992 for (j=0; j<window[i].nPull; j++)
993 fprintf(fp,"%g\t",window[i].cum[j][k]);
994 fprintf(fp,"\n");
996 printf("Wrote cumulative distribution functions to %s\n",fn);
997 ffclose(fp);
998 sfree(fn);
999 sfree(buf);
1004 /* Return j such that xx[j] <= x < xx[j+1] */
1005 void searchCumulative(double xx[], int n, double x, int *j)
1007 int ju,jm,jl;
1009 jl=-1;
1010 ju=n;
1011 while (ju-jl > 1)
1013 jm=(ju+jl) >> 1;
1014 if (x >= xx[jm])
1015 jl=jm;
1016 else
1017 ju=jm;
1019 if (x==xx[0])
1020 *j=0;
1021 else if (x==xx[n-1])
1022 *j=n-2;
1023 else
1024 *j=jl;
1027 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1028 int pullid,t_UmbrellaOptions *opt)
1030 int N,i,nbins,r_index,ibin;
1031 double r,tausteps=0.0,a,ap,dt,x,invsqrt2,g,y,sig=0.,z,mu=0.;
1032 char errstr[1024];
1034 N=thisWindow->N[pullid];
1035 dt=thisWindow->dt;
1036 nbins=thisWindow->nBin;
1038 /* tau = autocorrelation time */
1039 if (opt->tauBootStrap>0.0)
1040 tausteps=opt->tauBootStrap/dt;
1041 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1043 /* calc tausteps from g=1+2tausteps */
1044 g=thisWindow->g[pullid];
1045 tausteps=(g-1)/2;
1047 else
1049 sprintf(errstr,
1050 "When generating hypothetical trajctories from given umbrella histograms,\n"
1051 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1052 "cannot be predicted. You have 3 options:\n"
1053 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1054 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1055 strcat(errstr,
1056 " with option -iiact for all umbrella windows.\n"
1057 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1058 " Use option (3) only if you are sure what you're doing, you may severely\n"
1059 " underestimate the error if a too small ACT is given.\n");
1060 gmx_fatal(FARGS,errstr);
1063 synthWindow->N [0]=N;
1064 synthWindow->pos [0]=thisWindow->pos[pullid];
1065 synthWindow->z [0]=thisWindow->z[pullid];
1066 synthWindow->k [0]=thisWindow->k[pullid];
1067 synthWindow->bContrib[0]=thisWindow->bContrib[pullid];
1068 synthWindow->g [0]=thisWindow->g [pullid];
1069 synthWindow->bsWeight[0]=thisWindow->bsWeight[pullid];
1071 for (i=0;i<nbins;i++)
1072 synthWindow->Histo[0][i]=0.;
1074 if (opt->bsMethod==bsMethod_trajGauss)
1076 sig = thisWindow->sigma [pullid];
1077 mu = thisWindow->aver [pullid];
1080 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1081 Use the following:
1082 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1083 then
1084 z = a*x + sqrt(1-a^2)*y
1085 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1086 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1087 function
1088 C(t) = exp(-t/tau) with tau=-1/ln(a)
1090 Then, use error function to turn the Gaussian random variable into a uniformly
1091 distributed one in [0,1]. Eventually, use cumulative distribution function of
1092 histogram to get random variables distributed according to histogram.
1093 Note: The ACT of the flat distribution and of the generated histogram is not
1094 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1096 a=exp(-1.0/tausteps);
1097 ap=sqrt(1-a*a);
1098 invsqrt2=1./sqrt(2.0);
1100 /* init random sequence */
1101 x=gmx_rng_gaussian_table(opt->rng);
1103 if (opt->bsMethod==bsMethod_traj)
1105 /* bootstrap points from the umbrella histograms */
1106 for (i=0;i<N;i++)
1108 y=gmx_rng_gaussian_table(opt->rng);
1109 x=a*x+ap*y;
1110 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1111 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1113 r=0.5*(1+gmx_erf(x*invsqrt2));
1114 searchCumulative(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
1115 synthWindow->Histo[0][r_index]+=1.;
1118 else if (opt->bsMethod==bsMethod_trajGauss)
1120 /* bootstrap points from a Gaussian with the same average and sigma
1121 as the respective umbrella histogram. The idea was, that -given
1122 limited sampling- the bootstrapped histograms are otherwise biased
1123 from the limited sampling of the US histos. However, bootstrapping from
1124 the Gaussian seems to yield a similar estimate. */
1125 i=0;
1126 while (i<N)
1128 y=gmx_rng_gaussian_table(opt->rng);
1129 x=a*x+ap*y;
1130 z = x*sig+mu;
1131 ibin=floor((z-opt->min)/opt->dz);
1132 if (opt->bCycl)
1134 if (ibin<0)
1135 while ( (ibin+=nbins) < 0);
1136 else if (ibin>=nbins)
1137 while ( (ibin-=nbins) >= nbins);
1140 if (ibin>=0 && ibin<nbins)
1142 synthWindow->Histo[0][ibin]+=1.;
1143 i++;
1147 else
1149 gmx_fatal(FARGS,"Unknown bsMethod (id %d). That should not happen.\n",opt->bsMethod);
1154 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1155 int bs_index,t_UmbrellaOptions *opt)
1157 char *fn=0,*buf=0,title[256];
1158 FILE *fp;
1159 int bins,l,i,j;
1161 if (bs_index<0)
1163 fn=strdup(fnhist);
1164 strcpy(title,"Umbrella histograms");
1166 else
1168 snew(fn,strlen(fnhist)+10);
1169 snew(buf,strlen(fnhist)+1);
1170 sprintf(fn,"%s_bs%d.xvg",strncpy(buf,fnhist,strlen(fnhist)-4),bs_index);
1171 sprintf(title,"Umbrella histograms. Bootstrap #%d",bs_index);
1174 fp=xvgropen(fn,title,"z","count",opt->oenv);
1175 bins=opt->bins;
1177 /* Write histograms */
1178 for(l=0;l<bins;++l)
1180 fprintf(fp,"%e\t",(double)(l+0.5)*opt->dz+opt->min);
1181 for(i=0;i<nWindows;++i)
1183 for(j=0;j<window[i].nPull;++j)
1185 fprintf(fp,"%e\t",window[i].Histo[j][l]);
1188 fprintf(fp,"\n");
1191 ffclose(fp);
1192 printf("Wrote %s\n",fn);
1193 if (buf)
1195 sfree(buf);
1196 sfree(fn);
1200 int func_wham_is_larger(const void *a, const void *b)
1202 double *aa,*bb;
1203 aa=(double*)a;
1204 bb=(double*)b;
1205 if (*aa < *bb)
1206 return -1;
1207 else if (*aa > *bb)
1208 return 1;
1209 else
1210 return 0;
1214 void setRandomBsWeights(t_UmbrellaWindow *synthwin,int nAllPull, t_UmbrellaOptions *opt)
1216 int i;
1217 double *r;
1219 snew(r,nAllPull);
1221 /* generate ordered random numbers between 0 and nAllPull */
1222 for (i=0; i<nAllPull-1; i++)
1224 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1226 qsort((void *)r,nAllPull-1, sizeof(double), &func_wham_is_larger);
1227 r[nAllPull-1]=1.0*nAllPull;
1229 synthwin[0].bsWeight[0]=r[0];
1230 for (i=1; i<nAllPull; i++)
1232 synthwin[i].bsWeight[0]=r[i]-r[i-1];
1235 /* avoid to have zero weight by adding a tiny value */
1236 for (i=0; i<nAllPull; i++)
1237 if (synthwin[i].bsWeight[0] < 1e-5)
1238 synthwin[i].bsWeight[0] = 1e-5;
1240 sfree(r);
1243 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1244 char* ylabel, double *profile,
1245 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1247 t_UmbrellaWindow * synthWindow;
1248 double *bsProfile,*bsProfiles_av, *bsProfiles_av2,maxchange=1e20,tmp,stddev;
1249 int i,j,*randomArray=0,winid,pullid,ib;
1250 int iAllPull,nAllPull,*allPull_winId,*allPull_pullId;
1251 FILE *fp;
1252 gmx_bool bExact=FALSE;
1254 /* init random generator */
1255 if (opt->bsSeed==-1)
1256 opt->rng=gmx_rng_init(gmx_rng_make_seed());
1257 else
1258 opt->rng=gmx_rng_init(opt->bsSeed);
1260 snew(bsProfile, opt->bins);
1261 snew(bsProfiles_av, opt->bins);
1262 snew(bsProfiles_av2,opt->bins);
1264 /* Create array of all pull groups. Note that different windows
1265 may have different nr of pull groups
1266 First: Get total nr of pull groups */
1267 nAllPull=0;
1268 for (i=0;i<nWindows;i++)
1269 nAllPull+=window[i].nPull;
1270 snew(allPull_winId,nAllPull);
1271 snew(allPull_pullId,nAllPull);
1272 iAllPull=0;
1273 /* Setup one array of all pull groups */
1274 for (i=0;i<nWindows;i++)
1276 for (j=0;j<window[i].nPull;j++)
1278 allPull_winId[iAllPull]=i;
1279 allPull_pullId[iAllPull]=j;
1280 iAllPull++;
1284 /* setup stuff for synthetic windows */
1285 snew(synthWindow,nAllPull);
1286 for (i=0;i<nAllPull;i++)
1288 synthWindow[i].nPull=1;
1289 synthWindow[i].nBin=opt->bins;
1290 snew(synthWindow[i].Histo,1);
1291 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1292 snew(synthWindow[i].Histo[0],opt->bins);
1293 snew(synthWindow[i].N,1);
1294 snew(synthWindow[i].pos,1);
1295 snew(synthWindow[i].z,1);
1296 snew(synthWindow[i].k,1);
1297 snew(synthWindow[i].bContrib,1);
1298 snew(synthWindow[i].g,1);
1299 snew(synthWindow[i].bsWeight,1);
1302 switch(opt->bsMethod)
1304 case bsMethod_hist:
1305 snew(randomArray,nAllPull);
1306 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1307 please_cite(stdout,"Hub2006");
1308 break;
1309 case bsMethod_BayesianHist :
1310 /* just copy all histogams into synthWindow array */
1311 for (i=0;i<nAllPull;i++)
1313 winid =allPull_winId [i];
1314 pullid=allPull_pullId[i];
1315 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1317 break;
1318 case bsMethod_traj:
1319 case bsMethod_trajGauss:
1320 calc_cumulatives(window,nWindows,opt,fnhist);
1321 break;
1322 default:
1323 gmx_fatal(FARGS,"Unknown bootstrap method. That should not have happened.\n");
1326 /* do bootstrapping */
1327 fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,opt->oenv);
1328 for (ib=0;ib<opt->nBootStrap;ib++)
1330 printf(" *******************************************\n"
1331 " ******** Start bootstrap nr %d ************\n"
1332 " *******************************************\n",ib+1);
1334 switch(opt->bsMethod)
1336 case bsMethod_hist:
1337 /* bootstrap complete histograms from given histograms */
1338 getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray,opt->rng);
1339 for (i=0;i<nAllPull;i++){
1340 winid =allPull_winId [randomArray[i]];
1341 pullid=allPull_pullId[randomArray[i]];
1342 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1344 break;
1345 case bsMethod_BayesianHist:
1346 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1347 setRandomBsWeights(synthWindow,nAllPull,opt);
1348 break;
1349 case bsMethod_traj:
1350 case bsMethod_trajGauss:
1351 /* create new histos from given histos, that is generate new hypothetical
1352 trajectories */
1353 for (i=0;i<nAllPull;i++)
1355 winid=allPull_winId[i];
1356 pullid=allPull_pullId[i];
1357 create_synthetic_histo(synthWindow+i,window+winid,pullid,opt);
1359 break;
1362 /* write histos in case of verbose output */
1363 if (opt->bs_verbose)
1364 print_histograms(fnhist,synthWindow,nAllPull,ib,opt);
1366 /* do wham */
1367 i=0;
1368 bExact=FALSE;
1369 maxchange=1e20;
1370 memcpy(bsProfile,profile,opt->bins*sizeof(double)); /* use profile as guess */
1373 if ( (i%opt->stepUpdateContrib) == 0)
1374 setup_acc_wham(bsProfile,synthWindow,nAllPull,opt);
1375 if (maxchange<opt->Tolerance)
1376 bExact=TRUE;
1377 if (((i%opt->stepchange) == 0 || i==1) && !i==0)
1378 printf("\t%4d) Maximum change %e\n",i,maxchange);
1379 calc_profile(bsProfile,synthWindow,nAllPull,opt,bExact);
1380 i++;
1381 } while( (maxchange=calc_z(bsProfile, synthWindow, nAllPull, opt,bExact)) > opt->Tolerance || !bExact);
1382 printf("\tConverged in %d iterations. Final maximum change %g\n",i,maxchange);
1384 if (opt->bLog)
1385 prof_normalization_and_unit(bsProfile,opt);
1387 /* symmetrize profile around z=0 */
1388 if (opt->bSym)
1389 symmetrizeProfile(bsProfile,opt);
1391 /* save stuff to get average and stddev */
1392 for (i=0;i<opt->bins;i++)
1394 tmp=bsProfile[i];
1395 bsProfiles_av[i]+=tmp;
1396 bsProfiles_av2[i]+=tmp*tmp;
1397 fprintf(fp,"%e\t%e\n",(i+0.5)*opt->dz+opt->min,tmp);
1399 fprintf(fp,"&\n");
1401 ffclose(fp);
1403 /* write average and stddev */
1404 fp=xvgropen(fnres,"Average and stddev from bootstrapping","z",ylabel,opt->oenv);
1405 fprintf(fp,"@TYPE xydy\n");
1406 for (i=0;i<opt->bins;i++)
1408 bsProfiles_av [i]/=opt->nBootStrap;
1409 bsProfiles_av2[i]/=opt->nBootStrap;
1410 tmp=bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1411 stddev=(tmp>=0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1412 fprintf(fp,"%e\t%e\t%e\n",(i+0.5)*opt->dz+opt->min,bsProfiles_av [i],stddev);
1414 ffclose(fp);
1415 printf("Wrote boot strap result to %s\n",fnres);
1418 int whaminFileType(char *fn)
1420 int len;
1421 len=strlen(fn);
1422 if (strcmp(fn+len-3,"tpr")==0)
1423 return whamin_tpr;
1424 else if (strcmp(fn+len-3,"xvg")==0 || strcmp(fn+len-6,"xvg.gz")==0)
1425 return whamin_pullxf;
1426 else if (strcmp(fn+len-3,"pdo")==0 || strcmp(fn+len-6,"pdo.gz")==0)
1427 return whamin_pdo;
1428 else
1429 gmx_fatal(FARGS,"Unknown file type of %s. Should be tpr, xvg, or pdo.\n",fn);
1430 return whamin_unknown;
1433 void read_wham_in(const char *fn,char ***filenamesRet, int *nfilesRet,
1434 t_UmbrellaOptions *opt)
1436 char **filename=0,tmp[STRLEN];
1437 int nread,sizenow,i,block=1;
1438 FILE *fp;
1440 fp=ffopen(fn,"r");
1441 nread=0;
1442 sizenow=0;
1443 while (fscanf(fp,"%s",tmp) != EOF)
1445 if (strlen(tmp)>=WHAM_MAXFILELEN)
1446 gmx_fatal(FARGS,"Filename too long. Only %d characters allowed\n",WHAM_MAXFILELEN);
1447 if (nread>=sizenow)
1449 sizenow+=block;
1450 srenew(filename,sizenow);
1451 for (i=sizenow-block;i<sizenow;i++)
1452 snew(filename[i],WHAM_MAXFILELEN);
1454 strcpy(filename[nread],tmp);
1455 if (opt->verbose)
1456 printf("Found file %s in %s\n",filename[nread],fn);
1457 nread++;
1459 *filenamesRet=filename;
1460 *nfilesRet=nread;
1464 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt,gmx_bool *bPipeOpen)
1466 char Buffer[1024],gunzip[1024],*Path=0;
1467 FILE *pipe=0;
1468 static gmx_bool bFirst=1;
1470 /* gzipped pdo file? */
1471 if ((strcmp(fn+strlen(fn)-3,".gz")==0))
1473 /* search gunzip executable */
1474 if(!(Path=getenv("GMX_PATH_GZIP")))
1476 if (gmx_fexist("/bin/gunzip"))
1477 sprintf(gunzip,"%s","/bin/gunzip");
1478 else if (gmx_fexist("/usr/bin/gunzip"))
1479 sprintf(gunzip,"%s","/usr/bin/gunzip");
1480 else
1481 gmx_fatal(FARGS,"Cannot find executable gunzip in /bin or /usr/bin.\n"
1482 "You may want to define the path to gunzip "
1483 "with the environment variable GMX_PATH_GZIP.",gunzip);
1485 else
1487 sprintf(gunzip,"%s/gunzip",Path);
1488 if (!gmx_fexist(gunzip))
1489 gmx_fatal(FARGS,"Cannot find executable %s. Please define the path to gunzip"
1490 " in the environmental varialbe GMX_PATH_GZIP.",gunzip);
1492 if (bFirst)
1494 printf("Using gunzig executable %s\n",gunzip);
1495 bFirst=0;
1497 if (!gmx_fexist(fn))
1499 gmx_fatal(FARGS,"File %s does not exist.\n",fn);
1501 sprintf(Buffer,"%s -c < %s",gunzip,fn);
1502 if (opt->verbose)
1503 printf("Executing command '%s'\n",Buffer);
1504 #ifdef HAVE_PIPES
1505 if((pipe=popen(Buffer,"r"))==NULL)
1507 gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
1509 #else
1510 gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
1511 #endif
1512 *bPipeOpen=TRUE;
1514 else{
1515 pipe=ffopen(fn,"r");
1516 *bPipeOpen=FALSE;
1519 return pipe;
1523 FILE *open_pdo_pipe_gmx(const char *fn)
1525 char *fnNoGz=0;
1526 FILE *pipe;
1528 /* gzipped pdo file? */
1529 if (strcmp(fn+strlen(fn)-3,".gz")==0)
1531 snew(fnNoGz,strlen(fn));
1532 strncpy(fnNoGz,fn,strlen(fn)-3);
1533 fnNoGz[strlen(fn)-3]='\0';
1534 if (gmx_fexist(fnNoGz) && gmx_fexist(fn))
1535 gmx_fatal(FARGS,"Found file %s and %s. That confuses me. Please remove one of them\n",
1536 fnNoGz,fn);
1537 pipe=ffopen(fnNoGz,"r");
1538 sfree(fnNoGz);
1540 else
1542 pipe=ffopen(fn,"r");
1545 return pipe;
1548 void pdo_close_file(FILE *fp)
1550 #ifdef HAVE_PIPES
1551 pclose(fp);
1552 #else
1553 ffclose(fp);
1554 #endif
1558 /* Reading pdo files */
1559 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1560 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1562 FILE *file;
1563 real mintmp,maxtmp,done=0.;
1564 int i;
1565 gmx_bool bPipeOpen;
1566 /* char Buffer0[1000]; */
1568 if(nfiles<1)
1569 gmx_fatal(FARGS,"No files found. Hick.");
1571 /* if min and max are not given, get min and max from the input files */
1572 if (opt->bAuto)
1574 printf("Automatic determination of boundaries from %d pdo files...\n",nfiles);
1575 opt->min=1e20;
1576 opt->max=-1e20;
1577 for(i=0;i<nfiles;++i)
1579 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1580 /*fgets(Buffer0,999,file);
1581 fprintf(stderr,"First line '%s'\n",Buffer0); */
1582 done=100.0*(i+1)/nfiles;
1583 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1584 if (opt->verbose)
1585 printf("\n");
1586 read_pdo_header(file,header,opt);
1587 /* here only determine min and max of this window */
1588 read_pdo_data(file,header,i,NULL,opt,TRUE,&mintmp,&maxtmp);
1589 if (maxtmp>opt->max)
1590 opt->max=maxtmp;
1591 if (mintmp<opt->min)
1592 opt->min=mintmp;
1593 if (bPipeOpen)
1594 pdo_close_file(file);
1595 else
1596 ffclose(file);
1598 printf("\n");
1599 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
1600 if (opt->bBoundsOnly)
1602 printf("Found option -boundsonly, now exiting.\n");
1603 exit (0);
1606 /* store stepsize in profile */
1607 opt->dz=(opt->max-opt->min)/opt->bins;
1609 /* Having min and max, we read in all files */
1610 /* Loop over all files */
1611 for(i=0;i<nfiles;++i)
1613 done=100.0*(i+1)/nfiles;
1614 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1615 if (opt->verbose)
1616 printf("\n");
1617 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1618 read_pdo_header(file,header,opt);
1619 /* load data into window */
1620 read_pdo_data(file,header,i,window,opt,FALSE,NULL,NULL);
1621 if ((window+i)->Ntot[0] == 0.0)
1622 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1623 if (bPipeOpen)
1624 pdo_close_file(file);
1625 else
1626 ffclose(file);
1628 printf("\n");
1629 for(i=0;i<nfiles;++i)
1630 sfree(fn[i]);
1631 sfree(fn);
1634 #define int2YN(a) (((a)==0)?("N"):("Y"))
1636 void read_tpr_header(const char *fn,t_UmbrellaHeader* header,t_UmbrellaOptions *opt)
1638 t_inputrec ir;
1639 int i,ngrp,d;
1640 t_state state;
1641 static int first=1;
1643 /* printf("Reading %s \n",fn); */
1644 read_tpx_state(fn,&ir,&state,NULL,NULL);
1646 if (ir.ePull != epullUMBRELLA)
1647 gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1648 " (ir.ePull = %d)\n", epull_names[ir.ePull],ir.ePull);
1650 /* nr of pull groups */
1651 ngrp=ir.pull->ngrp;
1652 if (ngrp < 1)
1653 gmx_fatal(FARGS,"This is not a tpr of umbrella simulation. Found only %d pull groups\n",ngrp);
1655 header->npullgrps=ir.pull->ngrp;
1656 header->pull_geometry=ir.pull->eGeom;
1657 copy_ivec(ir.pull->dim,header->pull_dim);
1658 header->pull_ndim=header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
1659 if (header->pull_geometry==epullgPOS && header->pull_ndim>1)
1661 gmx_fatal(FARGS,"Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
1662 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
1663 "If you have some special umbrella setup you may want to write your own pdo files\n"
1664 "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
1666 snew(header->k,ngrp);
1667 snew(header->init_dist,ngrp);
1668 snew(header->umbInitDist,ngrp);
1670 /* only z-direction with epullgCYL? */
1671 if (header->pull_geometry == epullgCYL)
1673 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
1674 gmx_fatal(FARGS,"With pull geometry 'cylinder', expected pulling in Z direction only.\n"
1675 "However, found dimensions [%s %s %s]\n",
1676 int2YN(header->pull_dim[XX]),int2YN(header->pull_dim[YY]),
1677 int2YN(header->pull_dim[ZZ]));
1680 for (i=0;i<ngrp;i++)
1682 header->k[i]=ir.pull->grp[i+1].k;
1683 if (header->k[i]==0.0)
1684 gmx_fatal(FARGS,"Pull group %d has force constant of of 0.0 in %s.\n"
1685 "That doesn't seem to be an Umbrella tpr.\n",
1686 i,fn);
1687 copy_rvec(ir.pull->grp[i+1].init,header->init_dist[i]);
1689 /* initial distance to reference */
1690 switch(header->pull_geometry)
1692 case epullgPOS:
1693 for (d=0;d<DIM;d++)
1694 if (header->pull_dim[d])
1695 header->umbInitDist[i]=header->init_dist[i][d];
1696 break;
1697 case epullgCYL:
1698 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
1699 case epullgDIST:
1700 case epullgDIR:
1701 case epullgDIRPBC:
1702 header->umbInitDist[i]=header->init_dist[i][0];
1703 break;
1704 default:
1705 gmx_fatal(FARGS,"Pull geometry %s not supported\n",epullg_names[header->pull_geometry]);
1709 if (opt->verbose || first)
1711 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
1712 fn,header->npullgrps,epullg_names[header->pull_geometry],
1713 int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
1714 header->pull_ndim);
1715 for (i=0;i<ngrp;i++)
1716 printf("\tgrp %d) k = %-5g position = %g\n",i,header->k[i],header->umbInitDist[i]);
1718 if (!opt->verbose && first)
1719 printf("\tUse option -v to see this output for all input tpr files\n");
1721 first=0;
1725 double dist_ndim(double **dx,int ndim,int line)
1727 int i;
1728 double r2=0.;
1729 for (i=0;i<ndim;i++)
1730 r2+=sqr(dx[i][line]);
1731 return sqrt(r2);
1734 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
1735 t_UmbrellaWindow * window,
1736 t_UmbrellaOptions *opt,
1737 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
1739 double **y=0,pos=0.,t,force,time0=0.,dt;
1740 int ny,nt,bins,ibin,i,g,dstep=1,nColPerGrp,nColRefOnce,nColRefEachGrp,nColExpect,ntot;
1741 real min,max,minfound=1e20,maxfound=-1e20;
1742 gmx_bool dt_ok,timeok,bHaveForce;
1743 const char *quantity;
1744 const int blocklen=4096;
1745 int *lennow=0;
1748 in force output pullf.xvg:
1749 No reference, one column per pull group
1750 in position output pullx.xvg (not cylinder)
1751 ndim reference, ndim columns per pull group
1752 in position output pullx.xvg (in geometry cylinder):
1753 ndim*2 columns per pull group (ndim for ref, ndim for group)
1756 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
1757 quantity = opt->bPullx ? "position" : "force";
1759 if (opt->bPullx)
1761 if (header->pull_geometry == epullgCYL)
1763 /* Geometry cylinder -> reference group before each pull group */
1764 nColRefEachGrp=header->pull_ndim;
1765 nColRefOnce=0;
1767 else
1769 /* Geometry NOT cylinder -> reference group only once after time column */
1770 nColRefEachGrp=0;
1771 nColRefOnce=header->pull_ndim;
1774 else /* read forces, no reference groups */
1776 nColRefEachGrp=0;
1777 nColRefOnce=0;
1779 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
1780 bHaveForce = opt->bPullf;
1782 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
1783 That avoids the somewhat tedious extraction of the right columns from the pullx files
1784 to compute the distances projection on the vector. Sorry for the laziness. */
1785 if ( (header->pull_geometry==epullgDIR || header->pull_geometry==epullgDIRPBC)
1786 && opt->bPullx)
1788 gmx_fatal(FARGS,"With pull geometries \"direction\" and \"direction_periodic\", only pull force "
1789 "reading \n(option -if) is supported at present, "
1790 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
1791 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
1792 epullg_names[header->pull_geometry]);
1795 nt=read_xvg(fn,&y,&ny);
1797 /* Check consistency */
1798 if (nt<1)
1800 gmx_fatal(FARGS,"Empty pull %s file %s\n",quantity,fn);
1802 if (ny != nColExpect)
1804 gmx_fatal(FARGS,"Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n",
1805 header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
1808 if (opt->verbose)
1809 printf("Found %d times and %d %s sets %s\n",nt,(ny-1)/nColPerGrp,quantity,fn);
1811 if (!bGetMinMax)
1813 bins=opt->bins;
1814 min=opt->min;
1815 max=opt->max;
1816 if (nt>1)
1818 window->dt=y[0][1]-y[0][0];
1820 else if (opt->nBootStrap && opt->tauBootStrap!=0.0)
1822 fprintf(stderr,"\n *** WARNING, Could not determine time step in %s\n",fn);
1825 /* Need to alocate memory and set up structure */
1826 window->nPull=header->npullgrps;
1827 window->nBin=bins;
1829 snew(window->Histo,window->nPull);
1830 snew(window->z,window->nPull);
1831 snew(window->k,window->nPull);
1832 snew(window->pos,window->nPull);
1833 snew(window->N, window->nPull);
1834 snew(window->Ntot, window->nPull);
1835 snew(window->g, window->nPull);
1836 snew(window->bsWeight, window->nPull);
1837 window->bContrib=0;
1839 if (opt->bCalcTauInt)
1840 snew(window->ztime,window->nPull);
1841 else
1842 window->ztime=NULL;
1843 snew(lennow,window->nPull);
1845 for(g=0;g<window->nPull;++g)
1847 window->z[g]=1;
1848 window->bsWeight[g]=1.;
1849 snew(window->Histo[g],bins);
1850 window->k[g]=header->k[g];
1851 window->N[g]=0;
1852 window->Ntot[g]=0;
1853 window->g[g]=1.;
1854 window->pos[g]=header->umbInitDist[g];
1855 if (opt->bCalcTauInt)
1856 window->ztime[g]=NULL;
1860 else
1861 { /* only determine min and max */
1862 minfound=1e20;
1863 maxfound=-1e20;
1864 min=max=bins=0; /* Get rid of warnings */
1867 for (i=0;i<nt;i++)
1869 /* Do you want that time frame? */
1870 t=1.0/1000*( (int) ((y[0][i]*1000) + 0.5)); /* round time to fs */
1872 /* get time step of pdo file and get dstep from opt->dt */
1873 if (i==0)
1875 time0=t;
1877 else if (i==1)
1879 dt=t-time0;
1880 if (opt->dt>0.0)
1882 dstep=(int)(opt->dt/dt+0.5);
1883 if (dstep==0)
1884 dstep=1;
1886 if (!bGetMinMax)
1887 window->dt=dt*dstep;
1890 dt_ok=(i%dstep == 0);
1891 timeok=(dt_ok && t >= opt->tmin && t <= opt->tmax);
1892 /*if (opt->verbose)
1893 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
1894 t,opt->tmin, opt->tmax, dt_ok,timeok); */
1896 if (timeok)
1898 for(g=0;g<header->npullgrps;++g)
1900 if (bHaveForce)
1902 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
1903 force=y[g+1][i];
1904 pos= -force/header->k[g] + header->umbInitDist[g];
1906 else
1908 switch (header->pull_geometry)
1910 case epullgDIST:
1911 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
1912 Distance to reference: */
1913 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
1914 pos=dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp,header->pull_ndim,i);
1915 break;
1916 case epullgPOS:
1917 case epullgCYL:
1918 /* with geometry==position, we have the reference once (nColRefOnce==ndim), but
1919 no extra reference group columns before each group (nColRefEachGrp==0)
1920 with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
1921 but ndim ref group colums before every group (nColRefEachGrp==ndim)
1922 Distance to reference: */
1923 pos=y[1 + nColRefOnce + g*(nColRefEachGrp+nColPerGrp)][i];
1924 break;
1925 default:
1926 gmx_fatal(FARGS,"Bad error, this error should have been catched before. Ups.\n");
1930 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
1931 if (bGetMinMax)
1933 if (pos<minfound)
1934 minfound=pos;
1935 if (pos>maxfound)
1936 maxfound=pos;
1938 else
1940 if (opt->bCalcTauInt && !bGetMinMax)
1942 /* save time series for autocorrelation analysis */
1943 ntot=window->Ntot[g];
1944 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
1945 if (ntot>=lennow[g])
1947 lennow[g]+=blocklen;
1948 srenew(window->ztime[g],lennow[g]);
1950 window->ztime[g][ntot]=pos;
1953 ibin=(int) floor((pos-min)/(max-min)*bins);
1954 if (opt->bCycl)
1956 if (ibin<0)
1957 while ( (ibin+=bins) < 0);
1958 else if (ibin>=bins)
1959 while ( (ibin-=bins) >= bins);
1961 if(ibin >= 0 && ibin < bins)
1963 window->Histo[g][ibin]+=1.;
1964 window->N[g]++;
1966 window->Ntot[g]++;
1970 else if (t>opt->tmax)
1972 if (opt->verbose)
1973 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n",t,opt->tmax);
1974 break;
1978 if (bGetMinMax)
1980 *mintmp=minfound;
1981 *maxtmp=maxfound;
1983 sfree(lennow);
1984 for (i=0;i<ny;i++)
1985 sfree(y[i]);
1988 void read_tpr_pullxf_files(char **fnTprs,char **fnPull,int nfiles,
1989 t_UmbrellaHeader* header,
1990 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1992 int i;
1993 real mintmp,maxtmp;
1995 printf("Reading %d tpr and pullf files\n",nfiles/2);
1997 /* min and max not given? */
1998 if (opt->bAuto)
2000 printf("Automatic determination of boundaries...\n");
2001 opt->min=1e20;
2002 opt->max=-1e20;
2003 for (i=0;i<nfiles; i++)
2005 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2006 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2007 read_tpr_header(fnTprs[i],header,opt);
2008 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2009 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2010 read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp);
2011 if (maxtmp>opt->max)
2012 opt->max=maxtmp;
2013 if (mintmp<opt->min)
2014 opt->min=mintmp;
2016 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
2017 if (opt->bBoundsOnly)
2019 printf("Found option -boundsonly, now exiting.\n");
2020 exit (0);
2023 /* store stepsize in profile */
2024 opt->dz=(opt->max-opt->min)/opt->bins;
2026 for (i=0;i<nfiles; i++)
2028 if (whaminFileType(fnTprs[i]) != whamin_tpr)
2029 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
2030 read_tpr_header(fnTprs[i],header,opt);
2031 if (whaminFileType(fnPull[i]) != whamin_pullxf)
2032 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
2033 read_pull_xf(fnPull[i],fnTprs[i],header,window+i,opt,FALSE,NULL,NULL);
2034 if (window[i].Ntot[0] == 0.0)
2035 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2038 for (i=0;i<nfiles; i++)
2040 sfree(fnTprs[i]);
2041 sfree(fnPull[i]);
2043 sfree(fnTprs);
2044 sfree(fnPull);
2047 /* Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation time.
2048 The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2050 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt,
2051 const char* fn)
2053 int nlines,ny,i,ig;
2054 double **iact;
2056 printf("Readging Integrated autocorrelation times from %s ...\n",fn);
2057 nlines=read_xvg(fn,&iact,&ny);
2058 if (nlines!=nwins)
2059 gmx_fatal(FARGS,"Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2060 nlines,fn,nwins);
2061 for (i=0;i<nlines;i++){
2062 if (window[i].nPull != ny)
2063 gmx_fatal(FARGS,"You are providing autocorrelation times with option -iiact and the\n"
2064 "number of pull groups is different in different simulations. That is not\n"
2065 "supported yet. Sorry.\n");
2066 for (ig=0;ig<window[i].nPull;ig++){
2067 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2068 window[i].g[ig]=1+2*iact[ig][i]/window[i].dt;
2070 if (iact[ig][i] <= 0.0)
2071 fprintf(stderr,"\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i],i,ig);
2077 /* Smooth autocorreltion times along the reaction coordinate. This is useful
2078 if the ACT is subject to high uncertainty in case if limited sampling. Note
2079 that -in case of limited sampling- the ACT may be severely underestimated.
2080 Note: the g=1+2tau are overwritten.
2081 if opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2082 by the smoothing
2084 void smoothIact(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2086 int i,ig,j,jg;
2087 double pos,dpos2,siglim,siglim2,gaufact,invtwosig2,w,weight,tausm;
2089 /* only evaluate within +- 3sigma of the Gausian */
2090 siglim=3.0*opt->sigSmoothIact;
2091 siglim2=dsqr(siglim);
2092 /* pre-factor of Gaussian */
2093 gaufact=1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2094 invtwosig2=0.5/dsqr(opt->sigSmoothIact);
2096 for (i=0;i<nwins;i++)
2098 snew(window[i].tausmooth,window[i].nPull);
2099 for (ig=0;ig<window[i].nPull;ig++)
2101 tausm=0.;
2102 weight=0;
2103 pos=window[i].pos[ig];
2104 for (j=0;j<nwins;j++)
2106 for (jg=0;jg<window[j].nPull;jg++)
2108 dpos2=dsqr(window[j].pos[jg]-pos);
2109 if (dpos2<siglim2){
2110 w=gaufact*exp(-dpos2*invtwosig2);
2111 weight+=w;
2112 tausm+=w*window[j].tau[jg];
2113 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2114 w,dpos2,pos,gaufact,invtwosig2); */
2118 tausm/=weight;
2119 if (opt->bAllowReduceIact || tausm>window[i].tau[ig])
2120 window[i].tausmooth[ig]=tausm;
2121 else
2122 window[i].tausmooth[ig]=window[i].tau[ig];
2123 window[i].g[ig] = 1+2*tausm/window[i].dt;
2128 /* try to compute the autocorrelation time for each umbrealla window */
2129 #define WHAM_AC_ZERO_LIMIT 0.05
2130 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,
2131 t_UmbrellaOptions *opt, const char *fn)
2133 int i,ig,ncorr,ntot,j,k,*count,restart;
2134 real *corr,c0,dt,timemax,tmp;
2135 real *ztime,av,tausteps;
2136 FILE *fp,*fpcorr=0;
2138 if (opt->verbose)
2139 fpcorr=xvgropen("hist_autocorr.xvg","Autocorrelation functions of umbrella windows",
2140 "time [ps]","autocorrelation function",opt->oenv);
2142 printf("\n");
2143 for (i=0;i<nwins;i++)
2145 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...",100.*(i+1)/nwins);
2146 fflush(stdout);
2147 ntot=window[i].Ntot[0];
2149 /* using half the maximum time as length of autocorrelation function */
2150 ncorr=ntot/2;
2151 if (ntot<10)
2152 gmx_fatal(FARGS,"Tryig to estimtate autocorrelation time from only %d"
2153 " points. Provide more pull data!",ntot);
2154 snew(corr,ncorr);
2155 /* snew(corrSq,ncorr); */
2156 snew(count,ncorr);
2157 dt=window[i].dt;
2158 timemax=dt*ncorr;
2159 snew(window[i].tau,window[i].nPull);
2160 restart=(int)(opt->acTrestart/dt+0.5);
2161 if (restart==0)
2162 restart=1;
2164 for (ig=0;ig<window[i].nPull;ig++)
2166 if (ntot != window[i].Ntot[ig])
2167 gmx_fatal(FARGS,"Encountered different nr of frames in different pull groups.\n"
2168 "That should not happen. (%d and %d)\n", ntot,window[i].Ntot[ig]);
2169 ztime=window[i].ztime[ig];
2171 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2172 for(j=0, av=0; (j<ntot); j++)
2173 av+=ztime[j];
2174 av/=ntot;
2175 for(k=0; (k<ncorr); k++)
2177 corr[k]=0.;
2178 count[k]=0;
2180 for(j=0; (j<ntot); j+=restart)
2181 for(k=0; (k<ncorr) && (j+k < ntot); k++)
2183 tmp=(ztime[j]-av)*(ztime[j+k]-av);
2184 corr [k] += tmp;
2185 /* corrSq[k] += tmp*tmp; */
2186 count[k]++;
2188 /* divide by nr of frames for each time displacement */
2189 for(k=0; (k<ncorr); k++)
2191 /* count probably = (ncorr-k+(restart-1))/restart; */
2192 corr[k] = corr[k]/count[k];
2193 /* variance of autocorrelation function */
2194 /* corrSq[k]=corrSq[k]/count[k]; */
2196 /* normalize such that corr[0] == 0 */
2197 c0=1./corr[0];
2198 for(k=0; (k<ncorr); k++)
2200 corr[k]*=c0;
2201 /* corrSq[k]*=c0*c0; */
2204 /* write ACFs in verbose mode */
2205 if (fpcorr)
2207 for(k=0; (k<ncorr); k++)
2208 fprintf(fpcorr,"%g %g\n",k*dt,corr[k]);
2209 fprintf(fpcorr,"&\n");
2212 /* esimate integrated correlation time, fitting is too unstable */
2213 tausteps = 0.5*corr[0];
2214 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2215 for(j=1; (j<ncorr) && (corr[j]>WHAM_AC_ZERO_LIMIT); j++)
2216 tausteps += corr[j];
2218 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2219 Kumar et al, eq. 28 ff. */
2220 window[i].tau[ig] = tausteps*dt;
2221 window[i].g[ig] = 1+2*tausteps;
2222 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2223 } /* ig loop */
2224 sfree(corr);
2225 sfree(count);
2227 printf(" done\n");
2228 if (fpcorr)
2229 ffclose(fpcorr);
2231 /* plot IACT along reaction coordinate */
2232 fp=xvgropen(fn,"Integrated autocorrelation times","z","IACT [ps]",opt->oenv);
2233 fprintf(fp,"@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2234 fprintf(fp,"# WIN tau(gr1) tau(gr2) ...\n");
2235 for (i=0;i<nwins;i++)
2237 fprintf(fp,"# %3d ",i);
2238 for (ig=0;ig<window[i].nPull;ig++)
2239 fprintf(fp," %11g",window[i].tau[ig]);
2240 fprintf(fp,"\n");
2242 for (i=0;i<nwins;i++)
2243 for (ig=0;ig<window[i].nPull;ig++)
2244 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tau[ig]);
2245 if (opt->sigSmoothIact > 0.0)
2247 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2248 opt->sigSmoothIact);
2249 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2250 smoothIact(window,nwins,opt);
2251 fprintf(fp,"&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2252 fprintf(fp,"@ s1 symbol color 2\n");
2253 for (i=0;i<nwins;i++)
2254 for (ig=0;ig<window[i].nPull;ig++)
2255 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tausmooth[ig]);
2257 ffclose(fp);
2258 printf("Wrote %s\n",fn);
2261 /* compute average and sigma of each umbrella window */
2262 void averageSigma(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2264 int i,ig,ntot,k;
2265 real av,sum2,sig,diff,*ztime,nSamplesIndep;
2267 for (i=0;i<nwins;i++)
2269 snew(window[i].aver, window[i].nPull);
2270 snew(window[i].sigma,window[i].nPull);
2272 ntot=window[i].Ntot[0];
2273 for (ig=0;ig<window[i].nPull;ig++)
2275 ztime=window[i].ztime[ig];
2276 for (k=0, av=0.; k<ntot; k++)
2277 av+=ztime[k];
2278 av/=ntot;
2279 for (k=0, sum2=0.; k<ntot; k++)
2281 diff=ztime[k]-av;
2282 sum2+=diff*diff;
2284 sig=sqrt(sum2/ntot);
2285 window[i].aver[ig]=av;
2287 /* Note: This estimate for sigma is biased from the limited sampling.
2288 Correct sigma by n/(n-1) where n = number of independent
2289 samples. Only possible if IACT is known.
2291 if (window[i].tau)
2293 nSamplesIndep=window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2294 window[i].sigma[ig]=sig * nSamplesIndep/(nSamplesIndep-1);
2296 else
2297 window[i].sigma[ig]=sig;
2298 printf("win %d, aver = %f sig = %f\n",i,av,window[i].sigma[ig]);
2304 /* Use histograms to compute average force on pull group.
2305 In addition, compute the sigma of the histogram.
2307 void computeAverageForce(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt)
2309 int i,j,bins=opt->bins,k;
2310 double dz,min=opt->min,max=opt->max,displAv,displAv2,temp,distance,ztot,ztot_half,w,weight;
2311 double posmirrored;
2313 dz=(max-min)/bins;
2314 ztot=opt->max-min;
2315 ztot_half=ztot/2;
2317 /* Compute average displacement from histograms */
2318 for(j=0;j<nWindows;++j)
2320 snew(window[j].forceAv,window[j].nPull);
2321 for(k=0;k<window[j].nPull;++k)
2323 displAv = 0.0;
2324 displAv2 = 0.0;
2325 weight = 0.0;
2326 for(i=0;i<opt->bins;++i)
2328 temp=(1.0*i+0.5)*dz+min;
2329 distance = temp - window[j].pos[k];
2330 if (opt->bCycl)
2331 { /* in cyclic wham: */
2332 if (distance > ztot_half) /* |distance| < ztot_half */
2333 distance-=ztot;
2334 else if (distance < -ztot_half)
2335 distance+=ztot;
2337 w=window[j].Histo[k][i]/window[j].g[k];
2338 displAv += w*distance;
2339 displAv2 += w*sqr(distance);
2340 weight+=w;
2341 /* Are we near min or max? We are getting wron forces from the histgrams since
2342 the histigrams are zero outside [min,max). Therefore, assume that the position
2343 on the other side of the histomgram center is equally likely. */
2344 if (!opt->bCycl)
2346 posmirrored=window[j].pos[k]-distance;
2347 if (posmirrored>=max || posmirrored<min)
2349 displAv += -w*distance;
2350 displAv2 += w*sqr(-distance);
2351 weight+=w;
2355 displAv /= weight;
2356 displAv2 /= weight;
2358 /* average force from average displacement */
2359 window[j].forceAv[k] = displAv*window[j].k[k];
2360 /* sigma from average square displacement */
2361 /* window[j].sigma [k] = sqrt(displAv2); */
2362 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2367 /* Check if the complete reaction coordinate is covered by the histograms */
2368 void checkReactionCoordinateCovered(t_UmbrellaWindow *window,int nwins,
2369 t_UmbrellaOptions *opt)
2371 int i,ig,j,bins=opt->bins,bBoundary;
2372 real avcount=0,z,relcount,*count;
2373 snew(count,opt->bins);
2375 for(j=0;j<opt->bins;++j)
2377 for (i=0;i<nwins;i++){
2378 for (ig=0;ig<window[i].nPull;ig++)
2379 count[j]+=window[i].Histo[ig][j];
2381 avcount+=1.0*count[j];
2383 avcount/=bins;
2384 for(j=0;j<bins;++j)
2386 relcount=count[j]/avcount;
2387 z=(j+0.5)*opt->dz+opt->min;
2388 bBoundary=( j<bins/20 || (bins-j)>bins/20 );
2389 /* check for bins with no data */
2390 if (count[j] == 0)
2391 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2392 "You may not get a reasonable profile. Check your histograms!\n",j,z);
2393 /* and check for poor sampling */
2394 else if (relcount<0.005 && !bBoundary)
2395 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n",j,z);
2397 sfree(count);
2401 void guessPotByIntegration(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt,
2402 char *fn)
2404 int i,j,ig,bins=opt->bins,nHist,winmin,groupmin;
2405 double dz,min=opt->min,*pot,pos,hispos,dist,diff,fAv,distmin,*f;
2406 FILE *fp;
2408 dz=(opt->max-min)/bins;
2410 printf("Getting initial potential by integration.\n");
2412 /* Compute average displacement from histograms */
2413 computeAverageForce(window,nWindows,opt);
2415 /* Get force for each bin from all histograms in this bin, or, alternatively,
2416 if no histograms are inside this bin, from the closest histogram */
2417 snew(pot,bins);
2418 snew(f,bins);
2419 for(j=0;j<opt->bins;++j)
2421 pos=(1.0*j+0.5)*dz+min;
2422 nHist=0;
2423 fAv=0.;
2424 distmin=1e20;
2425 groupmin=winmin=0;
2426 for (i=0;i<nWindows;i++)
2428 for (ig=0;ig<window[i].nPull;ig++)
2430 hispos=window[i].pos[ig];
2431 dist=fabs(hispos-pos);
2432 /* average force within bin */
2433 if (dist < dz/2)
2435 nHist++;
2436 fAv+=window[i].forceAv[ig];
2438 /* at the same time, rememer closest histogram */
2439 if (dist<distmin){
2440 winmin=i;
2441 groupmin=ig;
2442 distmin=dist;
2446 /* if no histogram found in this bin, use closest histogram */
2447 if (nHist>0)
2448 fAv=fAv/nHist;
2449 else{
2450 fAv=window[winmin].forceAv[groupmin];
2452 f[j]=fAv;
2454 for(j=1;j<opt->bins;++j)
2455 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2457 /* cyclic wham: linearly correct possible offset */
2458 if (opt->bCycl)
2460 diff=(pot[bins-1]-pot[0])/(bins-1);
2461 for(j=1;j<opt->bins;++j)
2462 pot[j]-=j*diff;
2464 if (opt->verbose)
2466 fp=xvgropen("pmfintegrated.xvg","PMF from force integration","z","PMF [kJ/mol]",opt->oenv);
2467 for(j=0;j<opt->bins;++j)
2468 fprintf(fp,"%g %g\n",(j+0.5)*dz+opt->min,pot[j]);
2469 ffclose(fp);
2470 printf("verbose mode: wrote %s with PMF from interated forces\n","pmfintegrated.xvg");
2473 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2474 energy offsets which are usually determined by wham
2475 First: turn pot into probabilities:
2477 for(j=0;j<opt->bins;++j)
2478 pot[j]=exp(-pot[j]/(8.314e-3*opt->Temperature));
2479 calc_z(pot,window,nWindows,opt,TRUE);
2481 sfree(pot);
2482 sfree(f);
2486 int gmx_wham(int argc,char *argv[])
2488 const char *desc[] = {
2489 "This is an analysis program that implements the Weighted",
2490 "Histogram Analysis Method (WHAM). It is intended to analyze",
2491 "output files generated by umbrella sampling simulations to ",
2492 "compute a potential of mean force (PMF). [PAR] ",
2493 "At present, three input modes are supported.[BR]",
2494 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the[BR]",
2495 " file names of the umbrella simulation run-input files (tpr files),[BR]",
2496 " AND, with option [TT]-ix[tt], a file which contains file names of [BR]",
2497 " the pullx mdrun output files. The tpr and pullx files must [BR]",
2498 " be in corresponding order, i.e. the first tpr created the [BR]",
2499 " first pullx, etc.[BR]",
2500 "[TT]*[tt] Same as the previous input mode, except that the the user [BR]",
2501 " provides the pull force output file names (pullf.xvg) with option [TT]-if[tt].[BR]",
2502 " From the pull force the position in the umbrella potential is [BR]",
2503 " computed. This does not work with tabulated umbrella potentials.[BR]"
2504 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) pdo files, i.e.[BR]",
2505 " the gromacs 3.3 umbrella output files. If you have some unusual[BR]"
2506 " reaction coordinate you may also generate your own pdo files and [BR]",
2507 " feed them with the [TT]-ip[tt] option into to g_wham. The pdo file header [BR]",
2508 " must be similar to the following:[BR]",
2509 "[TT]# UMBRELLA 3.0[BR]",
2510 "# Component selection: 0 0 1[BR]",
2511 "# nSkip 1[BR]",
2512 "# Ref. Group 'TestAtom'[BR]",
2513 "# Nr. of pull groups 2[BR]",
2514 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
2515 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
2516 "#####[tt][BR]",
2517 "Nr of pull groups, umbrella positions, force constants, and names ",
2518 "may (of course) differ. Following the header, a time column and ",
2519 "a data columns for each pull group follows (i.e. the displacement",
2520 "with respect to the umbrella center). Up to four pull groups are possible ",
2521 "per pdo file at present.[PAR]",
2522 "By default, the output files are[BR]",
2523 " [TT]-o[tt] PMF output file[BR]",
2524 " [TT]-hist[tt] histograms output file[BR]",
2525 "Always check whether the histograms sufficiently overlap![PAR]",
2526 "The umbrella potential is assumed to be harmonic and the force constants are ",
2527 "read from the tpr or pdo files. If a non-harmonic umbrella force was applied ",
2528 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
2529 "WHAM OPTIONS[BR]------------[BR]",
2530 " [TT]-bins[tt] Nr of bins used in analysis[BR]",
2531 " [TT]-temp[tt] Temperature in the simulations[BR]",
2532 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
2533 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
2534 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
2535 "The data points which are used ",
2536 "to compute the profile can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
2537 "Play particularly with [TT]-b[tt] to ensure sufficient equilibration in each ",
2538 "umbrella window![PAR]",
2539 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ([TT]-nolog[tt]) as ",
2540 "probability. The unit can be specified with [TT]-unit[tt]. With energy output, ",
2541 "the energy in the first bin is defined to be zero. If you want the free energy at a different ",
2542 "position to be zero, choose with [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
2543 "For cyclic (or periodic) reaction coordinates (dihedral angle, channel PMF",
2544 "without osmotic gradient), the option [TT]-cycl[tt] is useful. g_wham will make use of the ",
2545 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
2546 "reaction coordinate will assumed be be neighbors[PAR]",
2547 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output (useful for membrane etc.)[PAR]",
2548 "AUTOCORRELATIONS[BR]----------------[BR]",
2549 "With [TT]-ac[tt], g_wham estimates the integrated autocorrelation time (IACT) tau for each ",
2550 "umbrella window and weights the respective window with 1/[1+2*tau/dt]. The IACTs are written ",
2551 "to the file defined with [TT]-oiact[tt]. In verbose mode, all autocorrelation functions (ACFs) are",
2552 "written to hist_autocorr.xvg. Because the IACTs can be severely underestimated in case of ",
2553 "limited sampling, option [TT]-acsig[tt] allows to smooth the IACTs along the reaction coordinate ",
2554 "with a Gaussian (sigma provided with [TT]-acsig[tt], see output in iact.xvg). Note that the ",
2555 "IACTs are estimated by simple integration of the ACFs while the ACFs are larger 0.05.",
2556 "If you prefer to compute the IACTs by a more sophisticated (but possibly less robust) method ",
2557 "such as fitting to a double exponential, you can compute the IACTs with g_analyze and provide",
2558 "them to g_wham with the file iact-in.dat (option [TT]-iiact[tt]). iact-in.dat should contain ",
2559 "one line per input file (pdo or pullx/f file) and one column per pull group in the respective file.[PAR]"
2560 "ERROR ANALYSIS[BR]--------------[BR]",
2561 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
2562 "otherwise the statistical error may be substantially underestimated !![BR]",
2563 "More background and examples for the bootstrap technique can be found in ",
2564 "Hub, de Groot and Van der Spoel, JCTC (2010)[BR]",
2565 "-nBootstrap defines the nr of bootstraps (use, e.g., 100). Four bootstrapping methods are supported and ",
2566 "selected with [TT]-bs-method[tt].[BR]",
2567 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent data points, and ",
2568 " the bootstrap is carried out by assigning random weights to the histograms (\"Bayesian bootstrap\").",
2569 " Note that each point along the reaction coordinate",
2570 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
2571 "statistical error is underestimated![BR]",
2572 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. For each",
2573 "bootstrap, N histograms are randomly chosen from the N given histograms (allowing duplication, i.e. ",
2574 "sampling with replacement).",
2575 "To avoid gaps without data along the reaction coordinate blocks of histograms ([TT]-histbs-block[tt])",
2576 "may be defined. In that case, the given histograms are divided into blocks and ",
2577 "only histograms within each block are mixed. Note that the histograms",
2578 "within each block must be representative for all possible histograms, otherwise the",
2579 "statistical error is underestimated![BR]",
2580 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
2581 "such that the generated data points are distributed according the given histograms ",
2582 "and properly autocorrelated. The ",
2583 "autocorrelation time (ACT) for each window must be known, so use [TT]-ac[tt] or provide the ACT",
2584 "with [TT]-iiact[tt]. If the ACT of all windows are identical (and known), you can also ",
2585 "provide them with [TT]-bs-tau[tt]. Note that this method may severely underestimate the error ",
2586 "in case of limited sampling, that is if individual histograms do not represent the complete",
2587 "phase space at the respective positions.[BR]",
2588 " (4) [TT]traj-gauss[tt] The same as Method [TT]traj[tt], but the trajectories are not bootstrapped ",
2589 "from the umbrella histograms but from Gaussians with the average and width of the umbrella ",
2590 "histograms. That method yields similar error estimates like method [TT]traj[tt].[BR]"
2591 "Bootstrapping output:[BR]",
2592 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
2593 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
2594 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, and, ",
2595 "with bootstrap method [TT]traj[tt], the cumulative distribution functions of the histograms.",
2598 const char *en_unit[]={NULL,"kJ","kCal","kT",NULL};
2599 const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",NULL};
2600 const char *en_bsMethod[]={ NULL,"b-hist", "hist", "traj", "traj-gauss", NULL };
2602 static t_UmbrellaOptions opt;
2604 t_pargs pa[] = {
2605 { "-min", FALSE, etREAL, {&opt.min},
2606 "Minimum coordinate in profile"},
2607 { "-max", FALSE, etREAL, {&opt.max},
2608 "Maximum coordinate in profile"},
2609 { "-auto", FALSE, etBOOL, {&opt.bAuto},
2610 "determine min and max automatically"},
2611 { "-bins",FALSE, etINT, {&opt.bins},
2612 "Number of bins in profile"},
2613 { "-temp", FALSE, etREAL, {&opt.Temperature},
2614 "Temperature"},
2615 { "-tol", FALSE, etREAL, {&opt.Tolerance},
2616 "Tolerance"},
2617 { "-v", FALSE, etBOOL, {&opt.verbose},
2618 "verbose mode"},
2619 { "-b", FALSE, etREAL, {&opt.tmin},
2620 "first time to analyse (ps)"},
2621 { "-e", FALSE, etREAL, {&opt.tmax},
2622 "last time to analyse (ps)"},
2623 { "-dt", FALSE, etREAL, {&opt.dt},
2624 "Analyse only every dt ps"},
2625 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
2626 "Write histograms and exit"},
2627 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
2628 "Determine min and max and exit (with -auto)"},
2629 { "-log", FALSE, etBOOL, {&opt.bLog},
2630 "Calculate the log of the profile before printing"},
2631 { "-unit", FALSE, etENUM, {en_unit},
2632 "energy unit in case of log output" },
2633 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
2634 "Define profile to 0.0 at this position (with -log)"},
2635 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
2636 "Create cyclic/periodic profile. Assumes min and max are the same point."},
2637 { "-sym", FALSE, etBOOL, {&opt.bSym},
2638 "symmetrize profile around z=0"},
2639 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
2640 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
2641 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
2642 "calculate integrated autocorrelation times and use in wham"},
2643 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
2644 "Smooth autocorrelation times along reaction coordinate with Gaussian of this sigma"},
2645 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
2646 "When computing autocorrelation functions, restart computing every .. (ps)"},
2647 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
2648 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
2649 "during smoothing"},
2650 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
2651 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
2652 { "-bs-method", FALSE, etENUM, {en_bsMethod},
2653 "bootstrap method" },
2654 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
2655 "Autocorrelation time (ACT) assumed for all histograms. Use option -ac if ACT is unknown."},
2656 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
2657 "seed for bootstrapping. (-1 = use time)"},
2658 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
2659 "when mixing histograms only mix within blocks of -histbs-block."},
2660 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
2661 "verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
2662 { "-stepout", FALSE, etINT, {&opt.stepchange},
2663 "HIDDENWrite maximum change every ... (set to 1 with -v)"},
2664 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
2665 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
2668 t_filenm fnm[] = {
2669 { efDAT, "-ix","pullx-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2670 { efDAT, "-if","pullf-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2671 { efDAT, "-it","tpr-files",ffOPTRD}, /* wham input: tprs */
2672 { efDAT, "-ip","pdo-files",ffOPTRD}, /* wham input: pdo files (gmx3 style) */
2673 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
2674 { efXVG, "-hist","histo", ffWRITE}, /* output file for histograms */
2675 { efXVG, "-oiact","iact",ffOPTWR}, /* writing integrated autocorrelation times */
2676 { efDAT, "-iiact","iact-in",ffOPTRD}, /* reading integrated autocorrelation times */
2677 { efXVG, "-bsres","bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
2678 { efXVG, "-bsprof","bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
2679 { efDAT, "-tab","umb-pot",ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
2682 int i,j,l,nfiles,nwins,nfiles2;
2683 t_UmbrellaHeader header;
2684 t_UmbrellaWindow * window=NULL;
2685 double *profile,maxchange=1e20;
2686 gmx_bool bMinSet,bMaxSet,bAutoSet,bExact=FALSE;
2687 char **fninTpr,**fninPull,**fninPdo;
2688 const char *fnPull;
2689 FILE *histout,*profout;
2690 char ylabel[256],title[256];
2692 #define NFILE asize(fnm)
2694 CopyRight(stderr,argv[0]);
2696 opt.bins=200;
2697 opt.verbose=FALSE;
2698 opt.bHistOnly=FALSE;
2699 opt.bCycl=FALSE;
2700 opt.tmin=50;
2701 opt.tmax=1e20;
2702 opt.dt=0.0;
2703 opt.min=0;
2704 opt.max=0;
2705 opt.bAuto=TRUE;
2707 /* bootstrapping stuff */
2708 opt.nBootStrap=0;
2709 opt.bsMethod=bsMethod_hist;
2710 opt.tauBootStrap=0.0;
2711 opt.bsSeed=-1;
2712 opt.histBootStrapBlockLength=8;
2713 opt.bs_verbose=FALSE;
2715 opt.bLog=TRUE;
2716 opt.unit=en_kJ;
2717 opt.zProf0=0.;
2718 opt.Temperature=298;
2719 opt.Tolerance=1e-6;
2720 opt.bBoundsOnly=FALSE;
2721 opt.bSym=FALSE;
2722 opt.bCalcTauInt=FALSE;
2723 opt.sigSmoothIact=0.0;
2724 opt.bAllowReduceIact=TRUE;
2725 opt.bInitPotByIntegration=TRUE;
2726 opt.acTrestart=1.0;
2727 opt.stepchange=100;
2728 opt.stepUpdateContrib=100;
2730 parse_common_args(&argc,argv,PCA_BE_NICE,
2731 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&opt.oenv);
2733 opt.unit=nenum(en_unit);
2734 opt.bsMethod=nenum(en_bsMethod);
2736 opt.bProf0Set=opt2parg_bSet("-zprof0", asize(pa), pa);
2738 opt.bTab=opt2bSet("-tab",NFILE,fnm);
2739 opt.bPdo=opt2bSet("-ip",NFILE,fnm);
2740 opt.bTpr=opt2bSet("-it",NFILE,fnm);
2741 opt.bPullx=opt2bSet("-ix",NFILE,fnm);
2742 opt.bPullf=opt2bSet("-if",NFILE,fnm);
2743 opt.bTauIntGiven=opt2bSet("-iiact",NFILE,fnm);
2744 if (opt.bTab && opt.bPullf)
2745 gmx_fatal(FARGS,"Force input does not work with tabulated potentials. "
2746 "Provide pullx.xvg or pdo files!");
2748 #define WHAMBOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
2749 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx,opt.bPullf))
2750 gmx_fatal(FARGS,"Give either pullx (-ix) OR pullf (-if) data. Not both.");
2751 if ( !opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
2752 gmx_fatal(FARGS,"g_wham supports three input modes, pullx, pullf, or pdo file input."
2753 "\n\n Check g_wham -h !");
2755 opt.fnPdo=opt2fn("-ip",NFILE,fnm);
2756 opt.fnTpr=opt2fn("-it",NFILE,fnm);
2757 opt.fnPullf=opt2fn("-if",NFILE,fnm);
2758 opt.fnPullx=opt2fn("-ix",NFILE,fnm);
2760 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
2761 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
2762 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
2763 if ( (bMinSet || bMaxSet) && bAutoSet)
2764 gmx_fatal(FARGS,"With -auto, do not give -min or -max\n");
2766 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
2767 gmx_fatal(FARGS,"When giving -min, you must give -max (and vice versa), too\n");
2769 if (bMinSet && opt.bAuto)
2771 printf("Note: min and max given, switching off -auto.\n");
2772 opt.bAuto=FALSE;
2775 if (opt.bTauIntGiven && opt.bCalcTauInt)
2776 gmx_fatal(FARGS,"Either read (option -iiact) or calculate (option -ac) the\n"
2777 "the autocorrelation times. Not both.");
2779 if (opt.tauBootStrap>0.0 && opt2parg_bSet("-ac",asize(pa), pa))
2780 gmx_fatal(FARGS,"Either compute autocorrelation times (ACTs) (option -ac) or "
2781 "provide it with -bs-tau for bootstrapping. Not Both.\n");
2782 if (opt.tauBootStrap>0.0 && opt2bSet("-iiact",NFILE,fnm))
2783 gmx_fatal(FARGS,"Either provide autocorrelation times (ACTs) with file iact-in.dat "
2784 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
2787 /* Reading gmx4 pull output and tpr files */
2788 if (opt.bTpr || opt.bPullf || opt.bPullx)
2790 read_wham_in(opt.fnTpr,&fninTpr,&nfiles,&opt);
2792 fnPull=opt.bPullf ? opt.fnPullf : opt.fnPullx;
2793 read_wham_in(fnPull,&fninPull,&nfiles2,&opt);
2794 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
2795 nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
2796 if (nfiles!=nfiles2)
2797 gmx_fatal(FARGS,"Found %d file names in %s, but %d in %s\n",nfiles,
2798 opt.fnTpr,nfiles2,fnPull);
2799 window=initUmbrellaWindows(nfiles);
2800 read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, window, &opt);
2802 else
2803 { /* reading pdo files */
2804 read_wham_in(opt.fnPdo,&fninPdo,&nfiles,&opt);
2805 printf("Found %d pdo files in %s\n",nfiles,opt.fnPdo);
2806 window=initUmbrellaWindows(nfiles);
2807 read_pdo_files(fninPdo,nfiles, &header, window, &opt);
2809 nwins=nfiles;
2811 /* enforce equal weight for all histograms? */
2812 if (opt.bHistEq)
2813 enforceEqualWeights(window,nwins);
2815 /* write histograms */
2816 histout=xvgropen(opt2fn("-hist",NFILE,fnm),"Umbrella histograms",
2817 "z","count",opt.oenv);
2818 for(l=0;l<opt.bins;++l)
2820 fprintf(histout,"%e\t",(double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
2821 for(i=0;i<nwins;++i)
2823 for(j=0;j<window[i].nPull;++j)
2825 fprintf(histout,"%e\t",window[i].Histo[j][l]);
2828 fprintf(histout,"\n");
2830 ffclose(histout);
2831 printf("Wrote %s\n",opt2fn("-hist",NFILE,fnm));
2832 if (opt.bHistOnly)
2834 printf("Wrote histograms to %s, now exiting.\n",opt2fn("-hist",NFILE,fnm));
2835 return 0;
2838 /* Using tabulated umbrella potential */
2839 if (opt.bTab)
2840 setup_tab(opt2fn("-tab",NFILE,fnm),&opt);
2842 /* Integrated autocorrelation times provided ? */
2843 if (opt.bTauIntGiven)
2844 readIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-iiact",NFILE,fnm));
2846 /* Compute integrated autocorrelation times */
2847 if (opt.bCalcTauInt)
2848 calcIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-oiact",NFILE,fnm));
2850 /* calc average and sigma for each histogram
2851 (maybe required for bootstrapping. If not, this is fast anyhow) */
2852 if (opt.nBootStrap && opt.bsMethod==bsMethod_trajGauss)
2853 averageSigma(window,nwins,&opt);
2855 /* Get initial potential by simple integration */
2856 if (opt.bInitPotByIntegration)
2857 guessPotByIntegration(window,nwins,&opt,0);
2859 /* Check if complete reaction coordinate is covered */
2860 checkReactionCoordinateCovered(window,nwins,&opt);
2862 /* Calculate profile */
2863 snew(profile,opt.bins);
2864 if (opt.verbose)
2865 opt.stepchange=1;
2866 i=0;
2869 if ( (i%opt.stepUpdateContrib) == 0)
2870 setup_acc_wham(profile,window,nwins,&opt);
2871 if (maxchange<opt.Tolerance)
2873 bExact=TRUE;
2874 /* if (opt.verbose) */
2875 printf("Switched to exact iteration in iteration %d\n",i);
2877 calc_profile(profile,window,nwins,&opt,bExact);
2878 if (((i%opt.stepchange) == 0 || i==1) && !i==0)
2879 printf("\t%4d) Maximum change %e\n",i,maxchange);
2880 i++;
2881 } while ( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
2882 printf("Converged in %d iterations. Final maximum change %g\n",i,maxchange);
2884 /* calc error from Kumar's formula */
2885 /* Unclear how the error propagates along reaction coordinate, therefore
2886 commented out */
2887 /* calc_error_kumar(profile,window, nwins,&opt); */
2889 /* Write profile in energy units? */
2890 if (opt.bLog)
2892 prof_normalization_and_unit(profile,&opt);
2893 strcpy(ylabel,en_unit_label[opt.unit]);
2894 strcpy(title,"Umbrella potential");
2896 else
2898 strcpy(ylabel,"Density of states");
2899 strcpy(title,"Density of states");
2902 /* symmetrize profile around z=0? */
2903 if (opt.bSym)
2904 symmetrizeProfile(profile,&opt);
2906 /* write profile or density of states */
2907 profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,opt.oenv);
2908 for(i=0;i<opt.bins;++i)
2909 fprintf(profout,"%e\t%e\n",(double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
2910 ffclose(profout);
2911 printf("Wrote %s\n",opt2fn("-o",NFILE,fnm));
2913 /* Bootstrap Method */
2914 if (opt.nBootStrap)
2915 do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm),
2916 opt2fn("-hist",NFILE,fnm),
2917 ylabel, profile, window, nwins, &opt);
2919 sfree(profile);
2920 freeUmbrellaWindows(window,nfiles);
2922 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
2923 please_cite(stdout,"Hub2010");
2925 thanx(stderr);
2926 return 0;