Removed reference to nonexistent file in g_helix.
[gromacs.git] / src / tools / gmx_wham.c
blob3af6407104469b1080c6f413578c9a1ec80ab979
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- */
2 /*
3 * This file is part of the GROMACS molecular simulation package.
5 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
6 * Copyright (c) 2001-2004, The GROMACS development team,
7 * check out http://www.gromacs.org for more information.
8 * Copyright (c) 2012, by the GROMACS development team, led by
9 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
10 * others, as listed in the AUTHORS file in the top-level source
11 * directory and at http://www.gromacs.org.
13 * GROMACS is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public License
15 * as published by the Free Software Foundation; either version 2.1
16 * of the License, or (at your option) any later version.
18 * GROMACS is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with GROMACS; if not, see
25 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
26 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28 * If you want to redistribute modifications to GROMACS, please
29 * consider that scientific software is very special. Version
30 * control is crucial - bugs must be traceable. We will be happy to
31 * consider code for inclusion in the official distribution, but
32 * derived work must not be called official GROMACS. Details are found
33 * in the README & COPYING files - if they are missing, get the
34 * official version at http://www.gromacs.org.
36 * To help us fund GROMACS development, we humbly ask that you cite
37 * the research papers on the package. Check out http://www.gromacs.org.
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43 #include <stdio.h>
45 #include "statutil.h"
46 #include "typedefs.h"
47 #include "smalloc.h"
48 #include "vec.h"
49 #include "copyrite.h"
50 #include "statutil.h"
51 #include "tpxio.h"
52 #include "names.h"
53 #include "gmx_random.h"
54 #include "gmx_ana.h"
56 #ifndef HAVE_STRDUP
57 #define HAVE_STRDUP
58 #endif
59 #include "string2.h"
60 #include "xvgr.h"
63 #define WHAM_MAXFILELEN 2048
65 /* enum for energy units */
66 enum { enSel, en_kJ, en_kCal, en_kT, enNr };
67 /* enum for type of input files (pdos, tpr, or pullf) */
68 enum { whamin_unknown, whamin_tpr, whamin_pullxf, whamin_pdo };
69 /* enum for bootstrapping method (
70 - bootstrap complete histograms with continuous weights (Bayesian bootstrap)
71 - bootstrap complete histograms
72 - bootstrap trajectories from given umbrella histograms
73 - bootstrap trajectories from Gaussian with mu/sigam computed from
74 the respective histogram
76 ********************************************************************
77 FOR MORE DETAILS ON THE BOOTSTRAP METHODS (INCLUDING EXAMPLES), SEE
78 JS Hub, BL de Groot, D van der Spoel
79 [TT]g_wham[tt] - A free weighted histogram analysis implementation including
80 robust error and autocorrelation estimates,
81 J Chem Theory Comput, accepted (2010)
82 ********************************************************************
84 enum { bsMethod_unknown, bsMethod_BayesianHist, bsMethod_hist,
85 bsMethod_traj, bsMethod_trajGauss };
88 typedef struct
90 /* umbrella with pull code of gromacs 4.x */
91 int npullgrps; /* nr of pull groups in tpr file */
92 int pull_geometry; /* such as distance, position */
93 ivec pull_dim; /* pull dimension with geometry distance */
94 int pull_ndim; /* nr of pull_dim != 0 */
95 real *k; /* force constants in tpr file */
96 rvec *init_dist; /* reference displacements */
97 real *umbInitDist; /* reference displacement in umbrella direction */
99 /* From here, old pdo stuff */
100 int nSkip;
101 char Reference[256];
102 int nPull;
103 int nDim;
104 ivec Dims;
105 char PullName[4][256];
106 double UmbPos[4][3];
107 double UmbCons[4][3];
108 } t_UmbrellaHeader;
110 typedef struct
112 int nPull; /* nr of pull groups in this pdo or pullf/x file */
113 double **Histo,**cum; /* nPull histograms and nPull cumulative distr. funct */
114 int nBin; /* nr of bins. identical to opt->bins */
115 double *k; /* force constants for the nPull groups */
116 double *pos; /* umbrella positions for the nPull groups */
117 double *z; /* z=(-Fi/kT) for the nPull groups. These values are
118 iteratively computed during wham */
119 double *N, *Ntot; /* nr of data points in nPull histograms. N and Ntot
120 only differ if bHistEq==TRUE */
122 double *g,*tau,*tausmooth; /* g = 1 + 2*tau[int]/dt where tau is the integrated
123 autocorrelation time. Compare, e.g.
124 Ferrenberg/Swendsen, PRL 63:1195 (1989)
125 Kumar et al, J Comp Chem 13, 1011-1021 (1992), eq. 28 */
127 double dt; /* timestep in the input data. Can be adapted with
128 g_wham option -dt */
129 gmx_bool **bContrib; /* TRUE, if any data point of the histogram is within min
130 and max, otherwise FALSE. */
131 real **ztime; /* input data z(t) as a function of time. Required to
132 compute ACTs */
133 real *forceAv; /* average force estimated from average displacement, fAv=dzAv*k
134 Used for integration to guess the potential. */
135 real *aver,*sigma; /* average and stddev of histograms */
136 double *bsWeight; /* for bootstrapping complete histograms with continuous weights */
137 } t_UmbrellaWindow;
140 typedef struct
142 /* INPUT STUFF */
143 const char *fnTpr,*fnPullf;
144 const char *fnPdo,*fnPullx; /* file names of input */
145 gmx_bool bTpr,bPullf,bPdo,bPullx;/* input file types given? */
146 real tmin, tmax, dt; /* only read input within tmin and tmax with dt */
148 gmx_bool bInitPotByIntegration; /* before WHAM, guess potential by force integration. Yields
149 1.5 to 2 times faster convergence */
150 int stepUpdateContrib; /* update contribution table every ... iterations. Accelerates
151 WHAM. */
153 /* BASIC WHAM OPTIONS */
154 int bins; /* nr of bins, min, max, and dz of profile */
155 real min,max,dz;
156 real Temperature,Tolerance; /* temperature, converged when probability changes less
157 than Tolerance */
158 gmx_bool bCycl; /* generate cyclic (periodic) PMF */
160 /* OUTPUT CONTROL */
161 gmx_bool bLog; /* energy output (instead of probability) for profile */
162 int unit; /* unit for PMF output kJ/mol or kT or kCal/mol */
163 gmx_bool bSym; /* symmetrize PMF around z=0 after WHAM, useful for
164 membranes etc. */
165 real zProf0; /* after wham, set prof to zero at this z-position
166 When bootstrapping, set zProf0 to a "stable" reference
167 position. */
168 gmx_bool bProf0Set; /* setting profile to 0 at zProf0? */
170 gmx_bool bBoundsOnly,bHistOnly; /* determine min and max, or write histograms and exit */
171 gmx_bool bAuto; /* determine min and max automatically but do not exit */
173 gmx_bool verbose; /* more noisy wham mode */
174 int stepchange; /* print maximum change in prof after how many interations */
175 output_env_t oenv; /* xvgr options */
177 /* AUTOCORRELATION STUFF */
178 gmx_bool bTauIntGiven,bCalcTauInt;/* IACT given or should be calculated? */
179 real sigSmoothIact; /* sigma of Gaussian to smooth ACTs */
180 gmx_bool bAllowReduceIact; /* Allow to reduce ACTs during smoothing. Otherwise
181 ACT are only increased during smoothing */
182 real acTrestart; /* when computing ACT, time between restarting points */
183 gmx_bool bHistEq; /* Enforce the same weight for each umbella window, that is
184 calculate with the same number of data points for
185 each window. That can be reasonable, if the histograms
186 have different length, but due to autocorrelation,
187 a longer simulation should not have larger weightin wham. */
189 /* BOOTSTRAPPING STUFF */
190 int nBootStrap; /* nr of bootstraps (50 is usually enough) */
191 int bsMethod; /* if == bsMethod_hist, consider complete histograms as independent
192 data points and, hence, only mix complete histograms.
193 if == bsMethod_BayesianHist, consider complete histograms
194 as independent data points, but assign random weights
195 to the histograms during the bootstrapping ("Bayesian bootstrap")
196 In case of long correlations (e.g., inside a channel), these
197 will yield a more realistic error.
198 if == bsMethod_traj(Gauss), generate synthetic histograms
199 for each given
200 histogram by generating an autocorrelated random sequence
201 that is distributed according to the respective given
202 histogram. With bsMethod_trajGauss, bootstrap from a Gaussian
203 (instead of from the umbrella histogram) to generate a new
204 histogram
206 real tauBootStrap; /* autocorrelation time (ACT) used to generate synthetic
207 histograms. If ==0, use calculated ACF */
208 int histBootStrapBlockLength; /* when mixing histograms, mix only histograms withing blocks
209 long the reaction coordinate xi. Avoids gaps along xi. */
210 int bsSeed; /* random seed for bootstrapping */
211 gmx_bool bs_verbose; /* Write cumulative distribution functions (CDFs) of histograms
212 and write the generated histograms for each bootstrap */
214 /* tabulated umbrella potential stuff */
215 gmx_bool bTab;
216 double *tabX,*tabY,tabMin,tabMax,tabDz;
217 int tabNbins;
219 gmx_rng_t rng; /* gromacs random number generator */
220 } t_UmbrellaOptions;
223 t_UmbrellaWindow * initUmbrellaWindows(int nwin)
225 t_UmbrellaWindow *win;
226 int i;
227 snew(win,nwin);
228 for (i=0; i<nwin; i++)
230 win[i].Histo = win[i].cum = 0;
231 win[i].k = win[i].pos = win[i].z =0;
232 win[i].N = win[i].Ntot = 0;
233 win[i].g = win[i].tau = win[i].tausmooth = 0;
234 win[i].bContrib=0;
235 win[i].ztime=0;
236 win[i].forceAv=0;
237 win[i].aver = win[i].sigma = 0;
238 win[i].bsWeight = 0;
240 return win;
243 void freeUmbrellaWindows(t_UmbrellaWindow *win, int nwin)
245 int i,j;
246 for (i=0; i<nwin; i++)
248 if (win[i].Histo)
249 for (j=0;j<win[i].nPull;j++)
250 sfree(win[i].Histo[j]);
251 if (win[i].cum)
252 for (j=0;j<win[i].nPull;j++)
253 sfree(win[i].cum[j]);
254 if (win[i].bContrib)
255 for (j=0;j<win[i].nPull;j++)
256 sfree(win[i].bContrib[j]);
257 sfree(win[i].Histo);
258 sfree(win[i].cum);
259 sfree(win[i].k);
260 sfree(win[i].pos);
261 sfree(win[i].z);
262 sfree(win[i].N);
263 sfree(win[i].Ntot);
264 sfree(win[i].g);
265 sfree(win[i].tau);
266 sfree(win[i].tausmooth);
267 sfree(win[i].bContrib);
268 sfree(win[i].ztime);
269 sfree(win[i].forceAv);
270 sfree(win[i].aver);
271 sfree(win[i].sigma);
272 sfree(win[i].bsWeight);
274 sfree(win);
277 /* Read and setup tabulated umbrella potential */
278 void setup_tab(const char *fn,t_UmbrellaOptions *opt)
280 int i,ny,nl;
281 double **y;
283 printf("Setting up tabulated potential from file %s\n",fn);
284 nl=read_xvg(fn,&y,&ny);
285 opt->tabNbins=nl;
286 if (ny!=2)
287 gmx_fatal(FARGS,"Found %d columns in %s. Expected 2.\n",ny,fn);
288 opt->tabMin=y[0][0];
289 opt->tabMax=y[0][nl-1];
290 opt->tabDz=(opt->tabMax-opt->tabMin)/(nl-1);
291 if (opt->tabDz<=0)
292 gmx_fatal(FARGS,"The tabulated potential in %s must be provided in \n"
293 "ascending z-direction",fn);
294 for (i=0;i<nl-1;i++)
295 if (fabs(y[0][i+1]-y[0][i]-opt->tabDz) > opt->tabDz/1e6)
296 gmx_fatal(FARGS,"z-values in %s are not equally spaced.\n",ny,fn);
297 snew(opt->tabY,nl);
298 snew(opt->tabX,nl);
299 for (i=0;i<nl;i++)
301 opt->tabX[i]=y[0][i];
302 opt->tabY[i]=y[1][i];
304 printf("Found equally spaced tabulated potential from %g to %g, spacing %g\n",
305 opt->tabMin,opt->tabMax,opt->tabDz);
308 void read_pdo_header(FILE * file,t_UmbrellaHeader * header, t_UmbrellaOptions *opt)
310 char line[2048];
311 char Buffer0[256], Buffer1[256], Buffer2[256], Buffer3[256], Buffer4[256];
312 int i;
314 /* line 1 */
315 if (fgets(line,2048,file) == NULL)
316 gmx_fatal(FARGS,"Error reading header from pdo file\n");
317 sscanf(line,"%s%s%s",Buffer0,Buffer1,Buffer2);
318 if(strcmp(Buffer1,"UMBRELLA"))
319 gmx_fatal(FARGS,"This does not appear to be a valid pdo file. Found %s, expected %s\n"
320 "(Found in first line: `%s')\n",
321 Buffer1, "UMBRELLA",line);
322 if(strcmp(Buffer2,"3.0"))
323 gmx_fatal(FARGS,"This does not appear to be a version 3.0 pdo file");
325 /* line 2 */
326 if (fgets(line,2048,file) == NULL)
327 gmx_fatal(FARGS,"Error reading header from pdo file\n");
328 sscanf(line,"%s%s%s%d%d%d",Buffer0,Buffer1,Buffer2,
329 &(header->Dims[0]),&(header->Dims[1]),&(header->Dims[2]));
331 /* printf("%d %d %d\n", header->Dims[0],header->Dims[1],header->Dims[2]); */
333 header->nDim = header->Dims[0] + header->Dims[1] + header->Dims[2];
334 if(header->nDim!=1)
335 gmx_fatal(FARGS,"Currently only supports one dimension");
337 /* line3 */
338 if (fgets(line,2048,file) == NULL)
339 gmx_fatal(FARGS,"Error reading header from pdo file\n");
340 sscanf(line,"%s%s%d",Buffer0,Buffer1,&(header->nSkip));
342 /* line 4 */
343 if (fgets(line,2048,file) == NULL)
344 gmx_fatal(FARGS,"Error reading header from pdo file\n");
345 sscanf(line,"%s%s%s%s",Buffer0,Buffer1,Buffer2,header->Reference);
347 /* line 5 */
348 if (fgets(line,2048,file) == NULL)
349 gmx_fatal(FARGS,"Error reading header from pdo file\n");
350 sscanf(line,"%s%s%s%s%s%d",Buffer0,Buffer1,Buffer2,Buffer3,Buffer4,&(header->nPull));
352 if (opt->verbose)
353 printf("\tFound nPull=%d , nSkip=%d, ref=%s\n",header->nPull,header->nSkip,
354 header->Reference);
356 for(i=0;i<header->nPull;++i)
358 if (fgets(line,2048,file) == NULL)
359 gmx_fatal(FARGS,"Error reading header from pdo file\n");
360 sscanf(line,"%*s%*s%*s%s%*s%*s%lf%*s%*s%lf",header->PullName[i]
361 ,&(header->UmbPos[i][0]),&(header->UmbCons[i][0]));
362 if (opt->verbose)
363 printf("\tpullgroup %d, pullname = %s, UmbPos = %g, UmbConst = %g\n",
364 i,header->PullName[i],header->UmbPos[i][0],header->UmbCons[i][0]);
367 if (fgets(line,2048,file) == NULL)
368 gmx_fatal(FARGS,"Cannot read from file\n");
369 sscanf(line,"%s",Buffer3);
370 if (strcmp(Buffer3,"#####") != 0)
371 gmx_fatal(FARGS,"Expected '#####', found %s. Hick.\n",Buffer3);
375 static char *fgets3(FILE *fp,char ptr[],int *len)
377 char *p;
378 int slen;
380 if (fgets(ptr,*len-1,fp) == NULL)
381 return NULL;
382 p = ptr;
383 while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
385 /* This line is longer than len characters, let's increase len! */
386 *len += STRLEN;
387 p += STRLEN;
388 srenew(ptr,*len);
389 if (fgets(p-1,STRLEN,fp) == NULL)
390 break;
392 slen = strlen(ptr);
393 if (ptr[slen-1] == '\n')
394 ptr[slen-1] = '\0';
396 return ptr;
399 void read_pdo_data(FILE * file, t_UmbrellaHeader * header,
400 int fileno, t_UmbrellaWindow * win,
401 t_UmbrellaOptions *opt,
402 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
404 int i,inttemp,bins,count,ntot;
405 real min,max,minfound=1e20,maxfound=-1e20;
406 double temp,time,time0=0,dt;
407 char *ptr=0;
408 t_UmbrellaWindow * window=0;
409 gmx_bool timeok,dt_ok=1;
410 char *tmpbuf=0,fmt[256],fmtign[256];
411 int len=STRLEN,dstep=1;
412 const int blocklen=4096;
413 int *lennow=0;
415 if (!bGetMinMax)
417 bins=opt->bins;
418 min=opt->min;
419 max=opt->max;
421 window=win+fileno;
422 /* Need to alocate memory and set up structure */
423 window->nPull=header->nPull;
424 window->nBin=bins;
426 snew(window->Histo,window->nPull);
427 snew(window->z,window->nPull);
428 snew(window->k,window->nPull);
429 snew(window->pos,window->nPull);
430 snew(window->N, window->nPull);
431 snew(window->Ntot, window->nPull);
432 snew(window->g, window->nPull);
433 snew(window->bsWeight, window->nPull);
435 window->bContrib=0;
437 if (opt->bCalcTauInt)
438 snew(window->ztime, window->nPull);
439 else
440 window->ztime=0;
441 snew(lennow,window->nPull);
443 for(i=0;i<window->nPull;++i)
445 window->z[i]=1;
446 window->bsWeight[i]=1.;
447 snew(window->Histo[i],bins);
448 window->k[i]=header->UmbCons[i][0];
449 window->pos[i]=header->UmbPos[i][0];
450 window->N[i]=0;
451 window->Ntot[i]=0;
452 window->g[i]=1.;
453 if (opt->bCalcTauInt)
454 window->ztime[i]=0;
457 /* Done with setup */
459 else
461 minfound=1e20;
462 maxfound=-1e20;
463 min=max=bins=0; /* Get rid of warnings */
466 count=0;
467 snew(tmpbuf,len);
468 while ( (ptr=fgets3(file,tmpbuf,&len)) != NULL)
470 trim(ptr);
472 if (ptr[0] == '#' || strlen(ptr)<2)
473 continue;
475 /* Initiate format string */
476 fmtign[0] = '\0';
477 strcat(fmtign,"%*s");
479 sscanf(ptr,"%lf",&time); /* printf("Time %f\n",time); */
480 /* Round time to fs */
481 time=1.0/1000*( (int) (time*1000+0.5) );
483 /* get time step of pdo file */
484 if (count==0)
485 time0=time;
486 else if (count==1)
488 dt=time-time0;
489 if (opt->dt>0.0)
491 dstep=(int)(opt->dt/dt+0.5);
492 if (dstep==0)
493 dstep=1;
495 if (!bGetMinMax)
496 window->dt=dt*dstep;
498 count++;
500 dt_ok=((count-1)%dstep == 0);
501 timeok=(dt_ok && time >= opt->tmin && time <= opt->tmax);
502 /* if (opt->verbose)
503 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
504 time,opt->tmin, opt->tmax, dt_ok,timeok); */
506 if (timeok)
508 for(i=0;i<header->nPull;++i)
510 strcpy(fmt,fmtign);
511 strcat(fmt,"%lf"); /* Creating a format stings such as "%*s...%*s%lf" */
512 strcat(fmtign,"%*s"); /* ignoring one more entry in the next loop */
513 if(sscanf(ptr,fmt,&temp))
515 temp+=header->UmbPos[i][0];
516 if (bGetMinMax)
518 if (temp<minfound)
519 minfound=temp;
520 if (temp>maxfound)
521 maxfound=temp;
523 else{
524 if (opt->bCalcTauInt)
526 /* save time series for autocorrelation analysis */
527 ntot=window->Ntot[i];
528 if (ntot>=lennow[i])
530 lennow[i]+=blocklen;
531 srenew(window->ztime[i],lennow[i]);
533 window->ztime[i][ntot]=temp;
536 temp-=min;
537 temp/=(max-min);
538 temp*=bins;
539 temp=floor(temp);
541 inttemp = (int)temp;
542 if (opt->bCycl)
544 if (inttemp < 0)
545 inttemp+=bins;
546 else if (inttemp >= bins)
547 inttemp-=bins;
550 if(inttemp >= 0 && inttemp < bins)
552 window->Histo[i][inttemp]+=1.;
553 window->N[i]++;
555 window->Ntot[i]++;
560 if (time>opt->tmax)
562 if (opt->verbose)
563 printf("time %f larger than tmax %f, stop reading pdo file\n",time,opt->tmax);
564 break;
568 if (bGetMinMax)
570 *mintmp=minfound;
571 *maxtmp=maxfound;
574 sfree(lennow);
575 sfree(tmpbuf);
578 void enforceEqualWeights(t_UmbrellaWindow * window,int nWindows)
580 int i,k,j,NEnforced;
581 double ratio;
583 NEnforced=window[0].Ntot[0];
584 printf("\nFound -hist-eq. Enforcing equal weights for all histograms, \ni.e. doing a "
585 "non-weighted histogram analysis method. Ndata = %d\n",NEnforced);
586 /* enforce all histograms to have the same weight as the very first histogram */
588 for(j=0;j<nWindows;++j)
590 for(k=0;k<window[j].nPull;++k)
592 ratio=1.0*NEnforced/window[j].Ntot[k];
593 for(i=0;i<window[0].nBin;++i)
595 window[j].Histo[k][i]*=ratio;
597 window[j].N[k]=(int)(ratio*window[j].N[k] + 0.5);
602 /* Simple linear interpolation between two given tabulated points */
603 double tabulated_pot(double dist, t_UmbrellaOptions *opt)
605 int jl,ju;
606 double pl,pu,dz,dp;
608 jl=floor((dist-opt->tabMin)/opt->tabDz);
609 ju=jl+1;
610 if (jl<0 || ju>=opt->tabNbins)
612 gmx_fatal(FARGS,"Distance %f out of bounds of tabulated potential (jl=%d, ju=%d).\n"
613 "Provide an extended table.",dist,jl,ju);
615 pl=opt->tabY[jl];
616 pu=opt->tabY[ju];
617 dz=dist-opt->tabX[jl];
618 dp=(pu-pl)*dz/opt->tabDz;
619 return pl+dp;
623 /* Don't worry, that routine does not mean we compute the PMF in limited precision.
624 After rapid convergence (using only substiantal contributions), we always switch to
625 full precision. */
626 void setup_acc_wham(double *profile,t_UmbrellaWindow * window,int nWindows,
627 t_UmbrellaOptions *opt)
629 int i,j,k,nGrptot=0,nContrib=0,nTot=0;
630 double U,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,contrib1,contrib2;
631 gmx_bool bAnyContrib;
632 static int bFirst=1;
633 static double wham_contrib_lim;
635 if (bFirst)
637 for(i=0;i<nWindows;++i)
639 nGrptot+=window[i].nPull;
641 wham_contrib_lim=opt->Tolerance/nGrptot;
644 ztot=opt->max-opt->min;
645 ztot_half=ztot/2;
647 for(i=0;i<nWindows;++i)
649 if ( ! window[i].bContrib)
651 snew(window[i].bContrib,window[i].nPull);
653 for(j=0;j<window[i].nPull;++j)
655 if ( ! window[i].bContrib[j])
656 snew(window[i].bContrib[j],opt->bins);
657 bAnyContrib=FALSE;
658 for(k=0;k<opt->bins;++k)
660 temp=(1.0*k+0.5)*dz+min;
661 distance = temp - window[i].pos[j]; /* distance to umbrella center */
662 if (opt->bCycl)
663 { /* in cyclic wham: */
664 if (distance > ztot_half) /* |distance| < ztot_half */
665 distance-=ztot;
666 else if (distance < -ztot_half)
667 distance+=ztot;
669 /* Note: there are two contributions to bin k in the wham equations:
670 i) N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j])
671 ii) exp(- U/(8.314e-3*opt->Temperature))
672 where U is the umbrella potential
673 If any of these number is larger wham_contrib_lim, I set contrib=TRUE
676 if (!opt->bTab)
677 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
678 else
679 U=tabulated_pot(distance,opt); /* Use tabulated potential */
681 contrib1=profile[k]*exp(- U/(8.314e-3*opt->Temperature));
682 contrib2=window[i].N[j]*exp(- U/(8.314e-3*opt->Temperature) + window[i].z[j]);
683 window[i].bContrib[j][k] = (contrib1 > wham_contrib_lim || contrib2 > wham_contrib_lim);
684 bAnyContrib = (bAnyContrib | window[i].bContrib[j][k]);
685 if (window[i].bContrib[j][k])
686 nContrib++;
687 nTot++;
689 /* If this histo is far outside min and max all bContrib may be FALSE,
690 causing a floating point exception later on. To avoid that, switch
691 them all to true.*/
692 if (!bAnyContrib)
693 for(k=0;k<opt->bins;++k)
694 window[i].bContrib[j][k]=TRUE;
697 if (bFirst)
698 printf("Initialized rapid wham stuff (contrib tolerance %g)\n"
699 "Evaluating only %d of %d expressions.\n\n",wham_contrib_lim,nContrib, nTot);
701 if (opt->verbose)
702 printf("Updated rapid wham stuff. (evaluating only %d of %d contributions)\n",
703 nContrib,nTot);
704 bFirst=0;
708 void calc_profile(double *profile,t_UmbrellaWindow * window, int nWindows,
709 t_UmbrellaOptions *opt, gmx_bool bExact)
711 int i,k,j;
712 double num,ztot_half,ztot,distance,min=opt->min,dz=opt->dz;
713 double denom,U=0,temp=0,invg;
715 ztot=opt->max-opt->min;
716 ztot_half=ztot/2;
718 for(i=0;i<opt->bins;++i)
720 num=denom=0.;
721 for(j=0;j<nWindows;++j)
723 for(k=0;k<window[j].nPull;++k)
725 invg = 1.0/window[j].g[k] * window[j].bsWeight[k];
726 temp = (1.0*i+0.5)*dz+min;
727 num += invg*window[j].Histo[k][i];
729 if (! (bExact || window[j].bContrib[k][i]))
730 continue;
731 distance = temp - window[j].pos[k]; /* distance to umbrella center */
732 if (opt->bCycl)
733 { /* in cyclic wham: */
734 if (distance > ztot_half) /* |distance| < ztot_half */
735 distance-=ztot;
736 else if (distance < -ztot_half)
737 distance+=ztot;
740 if (!opt->bTab)
741 U=0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */
742 else
743 U=tabulated_pot(distance,opt); /* Use tabulated potential */
744 denom+=invg*window[j].N[k]*exp(- U/(8.314e-3*opt->Temperature) + window[j].z[k]);
747 profile[i]=num/denom;
752 double calc_z(double * profile,t_UmbrellaWindow * window, int nWindows,
753 t_UmbrellaOptions *opt, gmx_bool bExact)
755 int i,j,k,binMax=-1;
756 double U=0,min=opt->min,dz=opt->dz,temp,ztot_half,distance,ztot,totalMax;
757 double MAX=-1e20, total=0;
759 ztot=opt->max-opt->min;
760 ztot_half=ztot/2;
762 for(i=0;i<nWindows;++i)
764 for(j=0;j<window[i].nPull;++j)
766 total=0;
767 for(k=0;k<window[i].nBin;++k)
769 if (! (bExact || window[i].bContrib[j][k]))
770 continue;
771 temp=(1.0*k+0.5)*dz+min;
772 distance = temp - window[i].pos[j]; /* distance to umbrella center */
773 if (opt->bCycl)
774 { /* in cyclic wham: */
775 if (distance > ztot_half) /* |distance| < ztot_half */
776 distance-=ztot;
777 else if (distance < -ztot_half)
778 distance+=ztot;
781 if (!opt->bTab)
782 U=0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */
783 else
784 U=tabulated_pot(distance,opt); /* Use tabulated potential */
786 total+=profile[k]*exp(-U/(8.314e-3*opt->Temperature));
788 /* Avoid floating point exception if window is far outside min and max */
789 if (total != 0.0)
790 total = -log(total);
791 else
792 total = 1000.0;
793 temp = fabs(total - window[i].z[j]);
794 if(temp > MAX){
795 MAX=temp;
796 binMax=k;
797 totalMax=total;
799 window[i].z[j] = total;
802 return MAX;
805 void symmetrizeProfile(double* profile,t_UmbrellaOptions *opt)
807 int i,j;
808 double *prof2,bins=opt->bins,min=opt->min,max=opt->max,dz=opt->dz,zsym,deltaz,profsym;
809 double z,z1;
811 if (min>0. || max<0.)
812 gmx_fatal(FARGS,"Cannot symmetrize profile around z=0 with min=%f and max=%f\n",
813 opt->min,opt->max);
815 snew(prof2,bins);
817 for (i=0;i<bins;i++)
819 z=min+(i+0.5)*dz;
820 zsym=-z;
821 /* bin left of zsym */
822 j=floor((zsym-min)/dz-0.5);
823 if (j>=0 && (j+1)<bins)
825 /* interpolate profile linearly between bins j and j+1 */
826 z1=min+(j+0.5)*dz;
827 deltaz=zsym-z1;
828 profsym=profile[j] + (profile[j+1]-profile[j])/dz*deltaz;
829 /* average between left and right */
830 prof2[i]=0.5*(profsym+profile[i]);
832 else
834 prof2[i]=profile[i];
838 memcpy(profile,prof2,bins*sizeof(double));
839 sfree(prof2);
842 void prof_normalization_and_unit(double * profile, t_UmbrellaOptions *opt)
844 int i,bins,imin;
845 double unit_factor=1., R_MolarGasConst, diff;
847 R_MolarGasConst=8.314472e-3; /* in kJ/(mol*K) */
848 bins=opt->bins;
850 /* Not log? Nothing to do! */
851 if (!opt->bLog)
852 return;
854 /* Get profile in units of kT, kJ/mol, or kCal/mol */
855 if (opt->unit == en_kT)
856 unit_factor=1.0;
857 else if (opt->unit == en_kJ)
858 unit_factor=R_MolarGasConst*opt->Temperature;
859 else if (opt->unit == en_kCal)
860 unit_factor=R_MolarGasConst*opt->Temperature/4.1868;
861 else
862 gmx_fatal(FARGS,"Sorry, I don't know this energy unit.");
864 for (i=0;i<bins;i++)
865 if (profile[i]>0.0)
866 profile[i]=-log(profile[i])*unit_factor;
868 /* shift to zero at z=opt->zProf0 */
869 if (!opt->bProf0Set)
871 diff=profile[0];
873 else
875 /* Get bin with shortest distance to opt->zProf0
876 (-0.5 from bin position and +0.5 from rounding cancel) */
877 imin=(int)((opt->zProf0-opt->min)/opt->dz);
878 if (imin<0)
879 imin=0;
880 else if (imin>=bins)
881 imin=bins-1;
882 diff=profile[imin];
885 /* Shift to zero */
886 for (i=0;i<bins;i++)
887 profile[i]-=diff;
890 void getRandomIntArray(int nPull,int blockLength,int* randomArray,gmx_rng_t rng)
892 int ipull,blockBase,nr,ipullRandom;
894 if (blockLength==0)
895 blockLength=nPull;
897 for (ipull=0; ipull<nPull; ipull++)
899 blockBase=(ipull/blockLength)*blockLength;
901 { /* make sure nothing bad happens in the last block */
902 nr=(int)(gmx_rng_uniform_real(rng)*blockLength);
903 ipullRandom = blockBase + nr;
904 } while (ipullRandom >= nPull);
905 if (ipullRandom<0 || ipullRandom>=nPull)
906 gmx_fatal(FARGS,"Ups, random iWin = %d, nPull = %d, nr = %d, "
907 "blockLength = %d, blockBase = %d\n",
908 ipullRandom,nPull,nr,blockLength,blockBase);
909 randomArray[ipull]=ipullRandom;
911 /*for (ipull=0; ipull<nPull; ipull++)
912 printf("%d ",randomArray[ipull]); printf("\n"); */
915 void copy_pullgrp_to_synthwindow(t_UmbrellaWindow *synthWindow,
916 t_UmbrellaWindow *thisWindow,int pullid)
918 synthWindow->N [0]=thisWindow->N [pullid];
919 synthWindow->Histo [0]=thisWindow->Histo [pullid];
920 synthWindow->pos [0]=thisWindow->pos [pullid];
921 synthWindow->z [0]=thisWindow->z [pullid];
922 synthWindow->k [0]=thisWindow->k [pullid];
923 synthWindow->bContrib[0]=thisWindow->bContrib [pullid];
924 synthWindow->g [0]=thisWindow->g [pullid];
925 synthWindow->bsWeight[0]=thisWindow->bsWeight [pullid];
928 /* Calculate cumulative distribution function of of all histograms. They
929 allow to create random number sequences
930 which are distributed according to the histograms. Required to generate
931 the "synthetic" histograms for the Bootstrap method */
932 void calc_cumulatives(t_UmbrellaWindow *window,int nWindows,
933 t_UmbrellaOptions *opt,const char *fnhist)
935 int i,j,k,nbin;
936 double last;
937 char *fn=0,*buf=0;
938 FILE *fp=0;
940 if (opt->bs_verbose)
942 snew(fn,strlen(fnhist)+10);
943 snew(buf,strlen(fnhist)+10);
944 sprintf(fn,"%s_cumul.xvg",strncpy(buf,fnhist,strlen(fnhist)-4));
945 fp=xvgropen(fn,"CDFs of umbrella windows","z","CDF",opt->oenv);
948 nbin=opt->bins;
949 for (i=0; i<nWindows; i++)
951 snew(window[i].cum,window[i].nPull);
952 for (j=0; j<window[i].nPull; j++)
954 snew(window[i].cum[j],nbin+1);
955 window[i].cum[j][0]=0.;
956 for (k=1; k<=nbin; k++)
957 window[i].cum[j][k] = window[i].cum[j][k-1]+window[i].Histo[j][k-1];
959 /* normalize CDFs. Ensure cum[nbin]==1 */
960 last = window[i].cum[j][nbin];
961 for (k=0; k<=nbin; k++)
962 window[i].cum[j][k] /= last;
966 printf("Cumulative distriubtion functions of all histograms created.\n");
967 if (opt->bs_verbose)
969 for (k=0; k<=nbin; k++)
971 fprintf(fp,"%g\t",opt->min+k*opt->dz);
972 for (i=0; i<nWindows; i++)
973 for (j=0; j<window[i].nPull; j++)
974 fprintf(fp,"%g\t",window[i].cum[j][k]);
975 fprintf(fp,"\n");
977 printf("Wrote cumulative distribution functions to %s\n",fn);
978 ffclose(fp);
979 sfree(fn);
980 sfree(buf);
985 /* Return j such that xx[j] <= x < xx[j+1] */
986 void searchCumulative(double xx[], int n, double x, int *j)
988 int ju,jm,jl;
990 jl=-1;
991 ju=n;
992 while (ju-jl > 1)
994 jm=(ju+jl) >> 1;
995 if (x >= xx[jm])
996 jl=jm;
997 else
998 ju=jm;
1000 if (x==xx[0])
1001 *j=0;
1002 else if (x==xx[n-1])
1003 *j=n-2;
1004 else
1005 *j=jl;
1008 void create_synthetic_histo(t_UmbrellaWindow *synthWindow, t_UmbrellaWindow *thisWindow,
1009 int pullid,t_UmbrellaOptions *opt)
1011 int N,i,nbins,r_index,ibin;
1012 double r,tausteps=0.0,a,ap,dt,x,invsqrt2,g,y,sig=0.,z,mu=0.;
1013 char errstr[1024];
1015 N=thisWindow->N[pullid];
1016 dt=thisWindow->dt;
1017 nbins=thisWindow->nBin;
1019 /* tau = autocorrelation time */
1020 if (opt->tauBootStrap>0.0)
1021 tausteps=opt->tauBootStrap/dt;
1022 else if (opt->bTauIntGiven || opt->bCalcTauInt)
1024 /* calc tausteps from g=1+2tausteps */
1025 g=thisWindow->g[pullid];
1026 tausteps=(g-1)/2;
1028 else
1030 sprintf(errstr,
1031 "When generating hypothetical trajctories from given umbrella histograms,\n"
1032 "autocorrelation times (ACTs) are required. Otherwise the statistical error\n"
1033 "cannot be predicted. You have 3 options:\n"
1034 "1) Make g_wham estimate the ACTs (options -ac and -acsig).\n"
1035 "2) Calculate the ACTs by yourself (e.g. with g_analyze) and provide them\n");
1036 strcat(errstr,
1037 " with option -iiact for all umbrella windows.\n"
1038 "3) If all ACTs are identical and know, you can define them with -bs-tau.\n"
1039 " Use option (3) only if you are sure what you're doing, you may severely\n"
1040 " underestimate the error if a too small ACT is given.\n");
1041 gmx_fatal(FARGS,errstr);
1044 synthWindow->N [0]=N;
1045 synthWindow->pos [0]=thisWindow->pos[pullid];
1046 synthWindow->z [0]=thisWindow->z[pullid];
1047 synthWindow->k [0]=thisWindow->k[pullid];
1048 synthWindow->bContrib[0]=thisWindow->bContrib[pullid];
1049 synthWindow->g [0]=thisWindow->g [pullid];
1050 synthWindow->bsWeight[0]=thisWindow->bsWeight[pullid];
1052 for (i=0;i<nbins;i++)
1053 synthWindow->Histo[0][i]=0.;
1055 if (opt->bsMethod==bsMethod_trajGauss)
1057 sig = thisWindow->sigma [pullid];
1058 mu = thisWindow->aver [pullid];
1061 /* Genrate autocorrelated Gaussian random variable with autocorrelation time tau
1062 Use the following:
1063 If x and y are random numbers from N(0,1) (Gaussian with average 0 and sigma=1),
1064 then
1065 z = a*x + sqrt(1-a^2)*y
1066 is also from N(0,1), and cov(z,x) = a. Thus, by gerenating a sequence
1067 x' = a*x + sqrt(1-a^2)*y, the sequnce x(t) is from N(0,1) and has an autocorrelation
1068 function
1069 C(t) = exp(-t/tau) with tau=-1/ln(a)
1071 Then, use error function to turn the Gaussian random variable into a uniformly
1072 distributed one in [0,1]. Eventually, use cumulative distribution function of
1073 histogram to get random variables distributed according to histogram.
1074 Note: The ACT of the flat distribution and of the generated histogram is not
1075 100% exactly tau, but near tau (my test was 3.8 instead of 4).
1077 a=exp(-1.0/tausteps);
1078 ap=sqrt(1-a*a);
1079 invsqrt2=1./sqrt(2.0);
1081 /* init random sequence */
1082 x=gmx_rng_gaussian_table(opt->rng);
1084 if (opt->bsMethod==bsMethod_traj)
1086 /* bootstrap points from the umbrella histograms */
1087 for (i=0;i<N;i++)
1089 y=gmx_rng_gaussian_table(opt->rng);
1090 x=a*x+ap*y;
1091 /* get flat distribution in [0,1] using cumulative distribution function of Gauusian
1092 Note: CDF(Gaussian) = 0.5*{1+erf[x/sqrt(2)]}
1094 r=0.5*(1+gmx_erf(x*invsqrt2));
1095 searchCumulative(thisWindow->cum[pullid],nbins+1 ,r,&r_index);
1096 synthWindow->Histo[0][r_index]+=1.;
1099 else if (opt->bsMethod==bsMethod_trajGauss)
1101 /* bootstrap points from a Gaussian with the same average and sigma
1102 as the respective umbrella histogram. The idea was, that -given
1103 limited sampling- the bootstrapped histograms are otherwise biased
1104 from the limited sampling of the US histos. However, bootstrapping from
1105 the Gaussian seems to yield a similar estimate. */
1106 i=0;
1107 while (i<N)
1109 y=gmx_rng_gaussian_table(opt->rng);
1110 x=a*x+ap*y;
1111 z = x*sig+mu;
1112 ibin=floor((z-opt->min)/opt->dz);
1113 if (opt->bCycl)
1115 if (ibin<0)
1116 while ( (ibin+=nbins) < 0);
1117 else if (ibin>=nbins)
1118 while ( (ibin-=nbins) >= nbins);
1121 if (ibin>=0 && ibin<nbins)
1123 synthWindow->Histo[0][ibin]+=1.;
1124 i++;
1128 else
1130 gmx_fatal(FARGS,"Unknown bsMethod (id %d). That should not happen.\n",opt->bsMethod);
1135 void print_histograms(const char *fnhist, t_UmbrellaWindow * window, int nWindows,
1136 int bs_index,t_UmbrellaOptions *opt)
1138 char *fn=0,*buf=0,title[256];
1139 FILE *fp;
1140 int bins,l,i,j;
1142 if (bs_index<0)
1144 fn=strdup(fnhist);
1145 strcpy(title,"Umbrella histograms");
1147 else
1149 snew(fn,strlen(fnhist)+10);
1150 snew(buf,strlen(fnhist)+1);
1151 sprintf(fn,"%s_bs%d.xvg",strncpy(buf,fnhist,strlen(fnhist)-4),bs_index);
1152 sprintf(title,"Umbrella histograms. Bootstrap #%d",bs_index);
1155 fp=xvgropen(fn,title,"z","count",opt->oenv);
1156 bins=opt->bins;
1158 /* Write histograms */
1159 for(l=0;l<bins;++l)
1161 fprintf(fp,"%e\t",(double)(l+0.5)*opt->dz+opt->min);
1162 for(i=0;i<nWindows;++i)
1164 for(j=0;j<window[i].nPull;++j)
1166 fprintf(fp,"%e\t",window[i].Histo[j][l]);
1169 fprintf(fp,"\n");
1172 ffclose(fp);
1173 printf("Wrote %s\n",fn);
1174 if (buf)
1176 sfree(buf);
1177 sfree(fn);
1181 int func_wham_is_larger(const void *a, const void *b)
1183 double *aa,*bb;
1184 aa=(double*)a;
1185 bb=(double*)b;
1186 if (*aa < *bb)
1187 return -1;
1188 else if (*aa > *bb)
1189 return 1;
1190 else
1191 return 0;
1195 void setRandomBsWeights(t_UmbrellaWindow *synthwin,int nAllPull, t_UmbrellaOptions *opt)
1197 int i;
1198 double *r;
1200 snew(r,nAllPull);
1202 /* generate ordered random numbers between 0 and nAllPull */
1203 for (i=0; i<nAllPull-1; i++)
1205 r[i] = gmx_rng_uniform_real(opt->rng) * nAllPull;
1207 qsort((void *)r,nAllPull-1, sizeof(double), &func_wham_is_larger);
1208 r[nAllPull-1]=1.0*nAllPull;
1210 synthwin[0].bsWeight[0]=r[0];
1211 for (i=1; i<nAllPull; i++)
1213 synthwin[i].bsWeight[0]=r[i]-r[i-1];
1216 /* avoid to have zero weight by adding a tiny value */
1217 for (i=0; i<nAllPull; i++)
1218 if (synthwin[i].bsWeight[0] < 1e-5)
1219 synthwin[i].bsWeight[0] = 1e-5;
1221 sfree(r);
1224 void do_bootstrapping(const char *fnres, const char* fnprof, const char *fnhist,
1225 char* ylabel, double *profile,
1226 t_UmbrellaWindow * window, int nWindows, t_UmbrellaOptions *opt)
1228 t_UmbrellaWindow * synthWindow;
1229 double *bsProfile,*bsProfiles_av, *bsProfiles_av2,maxchange=1e20,tmp,stddev;
1230 int i,j,*randomArray=0,winid,pullid,ib;
1231 int iAllPull,nAllPull,*allPull_winId,*allPull_pullId;
1232 FILE *fp;
1233 gmx_bool bExact=FALSE;
1235 /* init random generator */
1236 if (opt->bsSeed==-1)
1237 opt->rng=gmx_rng_init(gmx_rng_make_seed());
1238 else
1239 opt->rng=gmx_rng_init(opt->bsSeed);
1241 snew(bsProfile, opt->bins);
1242 snew(bsProfiles_av, opt->bins);
1243 snew(bsProfiles_av2,opt->bins);
1245 /* Create array of all pull groups. Note that different windows
1246 may have different nr of pull groups
1247 First: Get total nr of pull groups */
1248 nAllPull=0;
1249 for (i=0;i<nWindows;i++)
1250 nAllPull+=window[i].nPull;
1251 snew(allPull_winId,nAllPull);
1252 snew(allPull_pullId,nAllPull);
1253 iAllPull=0;
1254 /* Setup one array of all pull groups */
1255 for (i=0;i<nWindows;i++)
1257 for (j=0;j<window[i].nPull;j++)
1259 allPull_winId[iAllPull]=i;
1260 allPull_pullId[iAllPull]=j;
1261 iAllPull++;
1265 /* setup stuff for synthetic windows */
1266 snew(synthWindow,nAllPull);
1267 for (i=0;i<nAllPull;i++)
1269 synthWindow[i].nPull=1;
1270 synthWindow[i].nBin=opt->bins;
1271 snew(synthWindow[i].Histo,1);
1272 if (opt->bsMethod == bsMethod_traj || opt->bsMethod == bsMethod_trajGauss)
1273 snew(synthWindow[i].Histo[0],opt->bins);
1274 snew(synthWindow[i].N,1);
1275 snew(synthWindow[i].pos,1);
1276 snew(synthWindow[i].z,1);
1277 snew(synthWindow[i].k,1);
1278 snew(synthWindow[i].bContrib,1);
1279 snew(synthWindow[i].g,1);
1280 snew(synthWindow[i].bsWeight,1);
1283 switch(opt->bsMethod)
1285 case bsMethod_hist:
1286 snew(randomArray,nAllPull);
1287 printf("\n\nWhen computing statistical errors by bootstrapping entire histograms:\n");
1288 please_cite(stdout,"Hub2006");
1289 break;
1290 case bsMethod_BayesianHist :
1291 /* just copy all histogams into synthWindow array */
1292 for (i=0;i<nAllPull;i++)
1294 winid =allPull_winId [i];
1295 pullid=allPull_pullId[i];
1296 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1298 break;
1299 case bsMethod_traj:
1300 case bsMethod_trajGauss:
1301 calc_cumulatives(window,nWindows,opt,fnhist);
1302 break;
1303 default:
1304 gmx_fatal(FARGS,"Unknown bootstrap method. That should not have happened.\n");
1307 /* do bootstrapping */
1308 fp=xvgropen(fnprof,"Boot strap profiles","z",ylabel,opt->oenv);
1309 for (ib=0;ib<opt->nBootStrap;ib++)
1311 printf(" *******************************************\n"
1312 " ******** Start bootstrap nr %d ************\n"
1313 " *******************************************\n",ib+1);
1315 switch(opt->bsMethod)
1317 case bsMethod_hist:
1318 /* bootstrap complete histograms from given histograms */
1319 getRandomIntArray(nAllPull,opt->histBootStrapBlockLength,randomArray,opt->rng);
1320 for (i=0;i<nAllPull;i++){
1321 winid =allPull_winId [randomArray[i]];
1322 pullid=allPull_pullId[randomArray[i]];
1323 copy_pullgrp_to_synthwindow(synthWindow+i,window+winid,pullid);
1325 break;
1326 case bsMethod_BayesianHist:
1327 /* keep histos, but assign random weights ("Bayesian bootstrap") */
1328 setRandomBsWeights(synthWindow,nAllPull,opt);
1329 break;
1330 case bsMethod_traj:
1331 case bsMethod_trajGauss:
1332 /* create new histos from given histos, that is generate new hypothetical
1333 trajectories */
1334 for (i=0;i<nAllPull;i++)
1336 winid=allPull_winId[i];
1337 pullid=allPull_pullId[i];
1338 create_synthetic_histo(synthWindow+i,window+winid,pullid,opt);
1340 break;
1343 /* write histos in case of verbose output */
1344 if (opt->bs_verbose)
1345 print_histograms(fnhist,synthWindow,nAllPull,ib,opt);
1347 /* do wham */
1348 i=0;
1349 bExact=FALSE;
1350 maxchange=1e20;
1351 memcpy(bsProfile,profile,opt->bins*sizeof(double)); /* use profile as guess */
1354 if ( (i%opt->stepUpdateContrib) == 0)
1355 setup_acc_wham(bsProfile,synthWindow,nAllPull,opt);
1356 if (maxchange<opt->Tolerance)
1357 bExact=TRUE;
1358 if (((i%opt->stepchange) == 0 || i==1) && !i==0)
1359 printf("\t%4d) Maximum change %e\n",i,maxchange);
1360 calc_profile(bsProfile,synthWindow,nAllPull,opt,bExact);
1361 i++;
1362 } while( (maxchange=calc_z(bsProfile, synthWindow, nAllPull, opt,bExact)) > opt->Tolerance || !bExact);
1363 printf("\tConverged in %d iterations. Final maximum change %g\n",i,maxchange);
1365 if (opt->bLog)
1366 prof_normalization_and_unit(bsProfile,opt);
1368 /* symmetrize profile around z=0 */
1369 if (opt->bSym)
1370 symmetrizeProfile(bsProfile,opt);
1372 /* save stuff to get average and stddev */
1373 for (i=0;i<opt->bins;i++)
1375 tmp=bsProfile[i];
1376 bsProfiles_av[i]+=tmp;
1377 bsProfiles_av2[i]+=tmp*tmp;
1378 fprintf(fp,"%e\t%e\n",(i+0.5)*opt->dz+opt->min,tmp);
1380 fprintf(fp,"&\n");
1382 ffclose(fp);
1384 /* write average and stddev */
1385 fp=xvgropen(fnres,"Average and stddev from bootstrapping","z",ylabel,opt->oenv);
1386 fprintf(fp,"@TYPE xydy\n");
1387 for (i=0;i<opt->bins;i++)
1389 bsProfiles_av [i]/=opt->nBootStrap;
1390 bsProfiles_av2[i]/=opt->nBootStrap;
1391 tmp=bsProfiles_av2[i]-sqr(bsProfiles_av[i]);
1392 stddev=(tmp>=0.) ? sqrt(tmp) : 0.; /* Catch rouding errors */
1393 fprintf(fp,"%e\t%e\t%e\n",(i+0.5)*opt->dz+opt->min,bsProfiles_av [i],stddev);
1395 ffclose(fp);
1396 printf("Wrote boot strap result to %s\n",fnres);
1399 int whaminFileType(char *fn)
1401 int len;
1402 len=strlen(fn);
1403 if (strcmp(fn+len-3,"tpr")==0)
1404 return whamin_tpr;
1405 else if (strcmp(fn+len-3,"xvg")==0 || strcmp(fn+len-6,"xvg.gz")==0)
1406 return whamin_pullxf;
1407 else if (strcmp(fn+len-3,"pdo")==0 || strcmp(fn+len-6,"pdo.gz")==0)
1408 return whamin_pdo;
1409 else
1410 gmx_fatal(FARGS,"Unknown file type of %s. Should be tpr, xvg, or pdo.\n",fn);
1411 return whamin_unknown;
1414 void read_wham_in(const char *fn,char ***filenamesRet, int *nfilesRet,
1415 t_UmbrellaOptions *opt)
1417 char **filename=0,tmp[STRLEN];
1418 int nread,sizenow,i,block=1;
1419 FILE *fp;
1421 fp=ffopen(fn,"r");
1422 nread=0;
1423 sizenow=0;
1424 while (fscanf(fp,"%s",tmp) != EOF)
1426 if (strlen(tmp)>=WHAM_MAXFILELEN)
1427 gmx_fatal(FARGS,"Filename too long. Only %d characters allowed\n",WHAM_MAXFILELEN);
1428 if (nread>=sizenow)
1430 sizenow+=block;
1431 srenew(filename,sizenow);
1432 for (i=sizenow-block;i<sizenow;i++)
1433 snew(filename[i],WHAM_MAXFILELEN);
1435 strcpy(filename[nread],tmp);
1436 if (opt->verbose)
1437 printf("Found file %s in %s\n",filename[nread],fn);
1438 nread++;
1440 *filenamesRet=filename;
1441 *nfilesRet=nread;
1445 FILE *open_pdo_pipe(const char *fn, t_UmbrellaOptions *opt,gmx_bool *bPipeOpen)
1447 char Buffer[1024],gunzip[1024],*Path=0;
1448 FILE *pipe=0;
1449 static gmx_bool bFirst=1;
1451 /* gzipped pdo file? */
1452 if ((strcmp(fn+strlen(fn)-3,".gz")==0))
1454 /* search gunzip executable */
1455 if(!(Path=getenv("GMX_PATH_GZIP")))
1457 if (gmx_fexist("/bin/gunzip"))
1458 sprintf(gunzip,"%s","/bin/gunzip");
1459 else if (gmx_fexist("/usr/bin/gunzip"))
1460 sprintf(gunzip,"%s","/usr/bin/gunzip");
1461 else
1462 gmx_fatal(FARGS,"Cannot find executable gunzip in /bin or /usr/bin.\n"
1463 "You may want to define the path to gunzip "
1464 "with the environment variable GMX_PATH_GZIP.",gunzip);
1466 else
1468 sprintf(gunzip,"%s/gunzip",Path);
1469 if (!gmx_fexist(gunzip))
1470 gmx_fatal(FARGS,"Cannot find executable %s. Please define the path to gunzip"
1471 " in the environmental varialbe GMX_PATH_GZIP.",gunzip);
1473 if (bFirst)
1475 printf("Using gunzig executable %s\n",gunzip);
1476 bFirst=0;
1478 if (!gmx_fexist(fn))
1480 gmx_fatal(FARGS,"File %s does not exist.\n",fn);
1482 sprintf(Buffer,"%s -c < %s",gunzip,fn);
1483 if (opt->verbose)
1484 printf("Executing command '%s'\n",Buffer);
1485 #ifdef HAVE_PIPES
1486 if((pipe=popen(Buffer,"r"))==NULL)
1488 gmx_fatal(FARGS,"Unable to open pipe to `%s'\n",Buffer);
1490 #else
1491 gmx_fatal(FARGS,"Cannot open a compressed file on platform without pipe support");
1492 #endif
1493 *bPipeOpen=TRUE;
1495 else{
1496 pipe=ffopen(fn,"r");
1497 *bPipeOpen=FALSE;
1500 return pipe;
1503 void pdo_close_file(FILE *fp)
1505 #ifdef HAVE_PIPES
1506 pclose(fp);
1507 #else
1508 ffclose(fp);
1509 #endif
1512 /* Reading pdo files */
1513 void read_pdo_files(char **fn, int nfiles, t_UmbrellaHeader* header,
1514 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1516 FILE *file;
1517 real mintmp,maxtmp,done=0.;
1518 int i;
1519 gmx_bool bPipeOpen;
1520 /* char Buffer0[1000]; */
1522 if(nfiles<1)
1523 gmx_fatal(FARGS,"No files found. Hick.");
1525 /* if min and max are not given, get min and max from the input files */
1526 if (opt->bAuto)
1528 printf("Automatic determination of boundaries from %d pdo files...\n",nfiles);
1529 opt->min=1e20;
1530 opt->max=-1e20;
1531 for(i=0;i<nfiles;++i)
1533 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1534 /*fgets(Buffer0,999,file);
1535 fprintf(stderr,"First line '%s'\n",Buffer0); */
1536 done=100.0*(i+1)/nfiles;
1537 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1538 if (opt->verbose)
1539 printf("\n");
1540 read_pdo_header(file,header,opt);
1541 /* here only determine min and max of this window */
1542 read_pdo_data(file,header,i,NULL,opt,TRUE,&mintmp,&maxtmp);
1543 if (maxtmp>opt->max)
1544 opt->max=maxtmp;
1545 if (mintmp<opt->min)
1546 opt->min=mintmp;
1547 if (bPipeOpen)
1548 pdo_close_file(file);
1549 else
1550 ffclose(file);
1552 printf("\n");
1553 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
1554 if (opt->bBoundsOnly)
1556 printf("Found option -boundsonly, now exiting.\n");
1557 exit (0);
1560 /* store stepsize in profile */
1561 opt->dz=(opt->max-opt->min)/opt->bins;
1563 /* Having min and max, we read in all files */
1564 /* Loop over all files */
1565 for(i=0;i<nfiles;++i)
1567 done=100.0*(i+1)/nfiles;
1568 printf("\rOpening %s ... [%2.0f%%]",fn[i],done); fflush(stdout);
1569 if (opt->verbose)
1570 printf("\n");
1571 file=open_pdo_pipe(fn[i],opt,&bPipeOpen);
1572 read_pdo_header(file,header,opt);
1573 /* load data into window */
1574 read_pdo_data(file,header,i,window,opt,FALSE,NULL,NULL);
1575 if ((window+i)->Ntot[0] == 0.0)
1576 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fn[i]);
1577 if (bPipeOpen)
1578 pdo_close_file(file);
1579 else
1580 ffclose(file);
1582 printf("\n");
1583 for(i=0;i<nfiles;++i)
1584 sfree(fn[i]);
1585 sfree(fn);
1588 #define int2YN(a) (((a)==0)?("N"):("Y"))
1590 void read_tpr_header(const char *fn,t_UmbrellaHeader* header,t_UmbrellaOptions *opt)
1592 t_inputrec ir;
1593 int i,ngrp,d;
1594 t_state state;
1595 static int first=1;
1597 /* printf("Reading %s \n",fn); */
1598 read_tpx_state(fn,&ir,&state,NULL,NULL);
1600 if (ir.ePull != epullUMBRELLA)
1601 gmx_fatal(FARGS,"This is not a tpr of an umbrella simulation. Found pull type \"%s\" "
1602 " (ir.ePull = %d)\n", epull_names[ir.ePull],ir.ePull);
1604 /* nr of pull groups */
1605 ngrp=ir.pull->ngrp;
1606 if (ngrp < 1)
1607 gmx_fatal(FARGS,"This is not a tpr of umbrella simulation. Found only %d pull groups\n",ngrp);
1609 header->npullgrps=ir.pull->ngrp;
1610 header->pull_geometry=ir.pull->eGeom;
1611 copy_ivec(ir.pull->dim,header->pull_dim);
1612 header->pull_ndim=header->pull_dim[0]+header->pull_dim[1]+header->pull_dim[2];
1613 if (header->pull_geometry==epullgPOS && header->pull_ndim>1)
1615 gmx_fatal(FARGS,"Found pull geometry 'position' and more than 1 pull dimension (%d).\n"
1616 "Hence, the pull potential does not correspond to a one-dimensional umbrella potential.\n"
1617 "If you have some special umbrella setup you may want to write your own pdo files\n"
1618 "and feed them into g_wham. Check g_wham -h !\n",header->pull_ndim);
1620 snew(header->k,ngrp);
1621 snew(header->init_dist,ngrp);
1622 snew(header->umbInitDist,ngrp);
1624 /* only z-direction with epullgCYL? */
1625 if (header->pull_geometry == epullgCYL)
1627 if (header->pull_dim[XX] || header->pull_dim[YY] || (!header->pull_dim[ZZ]))
1628 gmx_fatal(FARGS,"With pull geometry 'cylinder', expected pulling in Z direction only.\n"
1629 "However, found dimensions [%s %s %s]\n",
1630 int2YN(header->pull_dim[XX]),int2YN(header->pull_dim[YY]),
1631 int2YN(header->pull_dim[ZZ]));
1634 for (i=0;i<ngrp;i++)
1636 header->k[i]=ir.pull->grp[i+1].k;
1637 if (header->k[i]==0.0)
1638 gmx_fatal(FARGS,"Pull group %d has force constant of of 0.0 in %s.\n"
1639 "That doesn't seem to be an Umbrella tpr.\n",
1640 i,fn);
1641 copy_rvec(ir.pull->grp[i+1].init,header->init_dist[i]);
1643 /* initial distance to reference */
1644 switch(header->pull_geometry)
1646 case epullgPOS:
1647 for (d=0;d<DIM;d++)
1648 if (header->pull_dim[d])
1649 header->umbInitDist[i]=header->init_dist[i][d];
1650 break;
1651 case epullgCYL:
1652 /* umbrella distance stored in init_dist[i][0] for geometry cylinder (not in ...[i][ZZ]) */
1653 case epullgDIST:
1654 case epullgDIR:
1655 case epullgDIRPBC:
1656 header->umbInitDist[i]=header->init_dist[i][0];
1657 break;
1658 default:
1659 gmx_fatal(FARGS,"Pull geometry %s not supported\n",epullg_names[header->pull_geometry]);
1663 if (opt->verbose || first)
1665 printf("File %s, %d groups, geometry \"%s\", dimensions [%s %s %s], (%d dimensions)\n",
1666 fn,header->npullgrps,epullg_names[header->pull_geometry],
1667 int2YN(header->pull_dim[0]),int2YN(header->pull_dim[1]),int2YN(header->pull_dim[2]),
1668 header->pull_ndim);
1669 for (i=0;i<ngrp;i++)
1670 printf("\tgrp %d) k = %-5g position = %g\n",i,header->k[i],header->umbInitDist[i]);
1672 if (!opt->verbose && first)
1673 printf("\tUse option -v to see this output for all input tpr files\n");
1675 first=0;
1679 double dist_ndim(double **dx,int ndim,int line)
1681 int i;
1682 double r2=0.;
1683 for (i=0;i<ndim;i++)
1684 r2+=sqr(dx[i][line]);
1685 return sqrt(r2);
1688 void read_pull_xf(const char *fn, const char *fntpr, t_UmbrellaHeader * header,
1689 t_UmbrellaWindow * window,
1690 t_UmbrellaOptions *opt,
1691 gmx_bool bGetMinMax,real *mintmp,real *maxtmp)
1693 double **y=0,pos=0.,t,force,time0=0.,dt;
1694 int ny,nt,bins,ibin,i,g,dstep=1,nColPerGrp,nColRefOnce,nColRefEachGrp,nColExpect,ntot;
1695 real min,max,minfound=1e20,maxfound=-1e20;
1696 gmx_bool dt_ok,timeok,bHaveForce;
1697 const char *quantity;
1698 const int blocklen=4096;
1699 int *lennow=0;
1702 in force output pullf.xvg:
1703 No reference, one column per pull group
1704 in position output pullx.xvg (not cylinder)
1705 ndim reference, ndim columns per pull group
1706 in position output pullx.xvg (in geometry cylinder):
1707 ndim*2 columns per pull group (ndim for ref, ndim for group)
1710 nColPerGrp = opt->bPullx ? header->pull_ndim : 1;
1711 quantity = opt->bPullx ? "position" : "force";
1713 if (opt->bPullx)
1715 if (header->pull_geometry == epullgCYL)
1717 /* Geometry cylinder -> reference group before each pull group */
1718 nColRefEachGrp=header->pull_ndim;
1719 nColRefOnce=0;
1721 else
1723 /* Geometry NOT cylinder -> reference group only once after time column */
1724 nColRefEachGrp=0;
1725 nColRefOnce=header->pull_ndim;
1728 else /* read forces, no reference groups */
1730 nColRefEachGrp=0;
1731 nColRefOnce=0;
1734 nColExpect = 1 + nColRefOnce + header->npullgrps*(nColRefEachGrp+nColPerGrp);
1735 bHaveForce = opt->bPullf;
1737 /* With geometry "distance" or "distance_periodic", only force reading is supported so far.
1738 That avoids the somewhat tedious extraction of the right columns from the pullx files
1739 to compute the distances projection on the vector. Sorry for the laziness. */
1740 if ( (header->pull_geometry==epullgDIR || header->pull_geometry==epullgDIRPBC)
1741 && opt->bPullx)
1743 gmx_fatal(FARGS,"With pull geometries \"direction\" and \"direction_periodic\", only pull force "
1744 "reading \n(option -if) is supported at present, "
1745 "not pull position reading (options -ix).\nMake sure mdrun writes the pull "
1746 "forces (pullf.xvg files)\nand provide them to g_wham with option -if.",
1747 epullg_names[header->pull_geometry]);
1750 nt=read_xvg(fn,&y,&ny);
1752 /* Check consistency */
1753 if (nt<1)
1755 gmx_fatal(FARGS,"Empty pull %s file %s\n",quantity,fn);
1757 if (ny != nColExpect)
1759 gmx_fatal(FARGS,"Found %d pull groups in %s,\n but %d data columns in %s (expected %d)\n"
1760 "\nMaybe you confused options -ix and -if ?\n",
1761 header->npullgrps,fntpr,ny-1,fn,nColExpect-1);
1764 if (opt->verbose)
1765 printf("Found %d times and %d %s sets %s\n",nt,(ny-1)/nColPerGrp,quantity,fn);
1767 if (!bGetMinMax)
1769 bins=opt->bins;
1770 min=opt->min;
1771 max=opt->max;
1772 if (nt>1)
1774 window->dt=y[0][1]-y[0][0];
1776 else if (opt->nBootStrap && opt->tauBootStrap!=0.0)
1778 fprintf(stderr,"\n *** WARNING, Could not determine time step in %s\n",fn);
1781 /* Need to alocate memory and set up structure */
1782 window->nPull=header->npullgrps;
1783 window->nBin=bins;
1785 snew(window->Histo,window->nPull);
1786 snew(window->z,window->nPull);
1787 snew(window->k,window->nPull);
1788 snew(window->pos,window->nPull);
1789 snew(window->N, window->nPull);
1790 snew(window->Ntot, window->nPull);
1791 snew(window->g, window->nPull);
1792 snew(window->bsWeight, window->nPull);
1793 window->bContrib=0;
1795 if (opt->bCalcTauInt)
1796 snew(window->ztime,window->nPull);
1797 else
1798 window->ztime=NULL;
1799 snew(lennow,window->nPull);
1801 for(g=0;g<window->nPull;++g)
1803 window->z[g]=1;
1804 window->bsWeight[g]=1.;
1805 snew(window->Histo[g],bins);
1806 window->k[g]=header->k[g];
1807 window->N[g]=0;
1808 window->Ntot[g]=0;
1809 window->g[g]=1.;
1810 window->pos[g]=header->umbInitDist[g];
1811 if (opt->bCalcTauInt)
1812 window->ztime[g]=NULL;
1816 else
1817 { /* only determine min and max */
1818 minfound=1e20;
1819 maxfound=-1e20;
1820 min=max=bins=0; /* Get rid of warnings */
1823 for (i=0;i<nt;i++)
1825 /* Do you want that time frame? */
1826 t=1.0/1000*( (int) ((y[0][i]*1000) + 0.5)); /* round time to fs */
1828 /* get time step of pdo file and get dstep from opt->dt */
1829 if (i==0)
1831 time0=t;
1833 else if (i==1)
1835 dt=t-time0;
1836 if (opt->dt>0.0)
1838 dstep=(int)(opt->dt/dt+0.5);
1839 if (dstep==0)
1840 dstep=1;
1842 if (!bGetMinMax)
1843 window->dt=dt*dstep;
1846 dt_ok=(i%dstep == 0);
1847 timeok=(dt_ok && t >= opt->tmin && t <= opt->tmax);
1848 /*if (opt->verbose)
1849 printf(" time = %f, (tmin,tmax)=(%e,%e), dt_ok=%d timeok=%d\n",
1850 t,opt->tmin, opt->tmax, dt_ok,timeok); */
1852 if (timeok)
1854 for(g=0;g<header->npullgrps;++g)
1856 if (bHaveForce)
1858 /* y has 1 time column y[0] and one column per force y[1],...,y[nGrps] */
1859 force=y[g+1][i];
1860 pos= -force/header->k[g] + header->umbInitDist[g];
1862 else
1864 switch (header->pull_geometry)
1866 case epullgDIST:
1867 /* y has 1 time column y[0] and nColPerGrps columns per pull group;
1868 Distance to reference: */
1869 /* pos=dist_ndim(y+1+nColRef+g*nColPerGrp,header->pull_ndim,i); gmx 4.0 */
1870 pos=dist_ndim(y + 1 + nColRefOnce + g*nColPerGrp,header->pull_ndim,i);
1871 break;
1872 case epullgPOS:
1873 /* Columns
1874 Time ref[ndim] group1[ndim] group2[ndim] ... */
1875 case epullgCYL:
1876 /* Columns
1877 Time ref1[ndim] group1[ndim] ref2[ndim] group2[ndim] ... */
1879 /* * with geometry==position, we have the reference once (nColRefOnce==ndim), but
1880 no extra reference group columns before each group (nColRefEachGrp==0)
1882 * with geometry==cylinder, we have no initial ref group column (nColRefOnce==0),
1883 but ndim ref group colums before every group (nColRefEachGrp==ndim)
1884 Distance to reference: */
1885 pos=y[1 + nColRefOnce + nColRefEachGrp + g*(nColRefEachGrp+nColPerGrp)][i];
1886 break;
1887 default:
1888 gmx_fatal(FARGS,"Bad error, this error should have been catched before. Ups.\n");
1892 /* printf("grp %d dpos %f poseq %f pos %f \n",g,dpos,poseq,pos); */
1893 if (bGetMinMax)
1895 if (pos<minfound)
1896 minfound=pos;
1897 if (pos>maxfound)
1898 maxfound=pos;
1900 else
1902 if (opt->bCalcTauInt && !bGetMinMax)
1904 /* save time series for autocorrelation analysis */
1905 ntot=window->Ntot[g];
1906 /* printf("i %d, ntot %d, lennow[g] = %d\n",i,ntot,lennow[g]); */
1907 if (ntot>=lennow[g])
1909 lennow[g]+=blocklen;
1910 srenew(window->ztime[g],lennow[g]);
1912 window->ztime[g][ntot]=pos;
1915 ibin=(int) floor((pos-min)/(max-min)*bins);
1916 if (opt->bCycl)
1918 if (ibin<0)
1919 while ( (ibin+=bins) < 0);
1920 else if (ibin>=bins)
1921 while ( (ibin-=bins) >= bins);
1923 if(ibin >= 0 && ibin < bins)
1925 window->Histo[g][ibin]+=1.;
1926 window->N[g]++;
1928 window->Ntot[g]++;
1932 else if (t>opt->tmax)
1934 if (opt->verbose)
1935 printf("time %f larger than tmax %f, stop reading this pullx/pullf file\n",t,opt->tmax);
1936 break;
1940 if (bGetMinMax)
1942 *mintmp=minfound;
1943 *maxtmp=maxfound;
1945 sfree(lennow);
1946 for (i=0;i<ny;i++)
1947 sfree(y[i]);
1950 void read_tpr_pullxf_files(char **fnTprs,char **fnPull,int nfiles,
1951 t_UmbrellaHeader* header,
1952 t_UmbrellaWindow *window, t_UmbrellaOptions *opt)
1954 int i;
1955 real mintmp,maxtmp;
1957 printf("Reading %d tpr and pullf files\n",nfiles/2);
1959 /* min and max not given? */
1960 if (opt->bAuto)
1962 printf("Automatic determination of boundaries...\n");
1963 opt->min=1e20;
1964 opt->max=-1e20;
1965 for (i=0;i<nfiles; i++)
1967 if (whaminFileType(fnTprs[i]) != whamin_tpr)
1968 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
1969 read_tpr_header(fnTprs[i],header,opt);
1970 if (whaminFileType(fnPull[i]) != whamin_pullxf)
1971 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
1972 read_pull_xf(fnPull[i],fnTprs[i],header,NULL,opt,TRUE,&mintmp,&maxtmp);
1973 if (maxtmp>opt->max)
1974 opt->max=maxtmp;
1975 if (mintmp<opt->min)
1976 opt->min=mintmp;
1978 printf("\nDetermined boundaries to %f and %f\n\n",opt->min,opt->max);
1979 if (opt->bBoundsOnly)
1981 printf("Found option -boundsonly, now exiting.\n");
1982 exit (0);
1985 /* store stepsize in profile */
1986 opt->dz=(opt->max-opt->min)/opt->bins;
1988 for (i=0;i<nfiles; i++)
1990 if (whaminFileType(fnTprs[i]) != whamin_tpr)
1991 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a tpr file\n",i);
1992 read_tpr_header(fnTprs[i],header,opt);
1993 if (whaminFileType(fnPull[i]) != whamin_pullxf)
1994 gmx_fatal(FARGS,"Expected the %d'th file in input file to be a xvg (pullx/pullf) file\n",i);
1995 read_pull_xf(fnPull[i],fnTprs[i],header,window+i,opt,FALSE,NULL,NULL);
1996 if (window[i].Ntot[0] == 0.0)
1997 fprintf(stderr,"\nWARNING, no data points read from file %s (check -b option)\n", fnPull[i]);
2000 for (i=0;i<nfiles; i++)
2002 sfree(fnTprs[i]);
2003 sfree(fnPull[i]);
2005 sfree(fnTprs);
2006 sfree(fnPull);
2009 /* Note: Here we consider tau[int] := int_0^inf ACF(t) as the integrated autocorrelation time.
2010 The factor `g := 1 + 2*tau[int]` subsequently enters the uncertainty.
2012 void readIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt,
2013 const char* fn)
2015 int nlines,ny,i,ig;
2016 double **iact;
2018 printf("Readging Integrated autocorrelation times from %s ...\n",fn);
2019 nlines=read_xvg(fn,&iact,&ny);
2020 if (nlines!=nwins)
2021 gmx_fatal(FARGS,"Found %d lines with integrated autocorrelation times in %s.\nExpected %d",
2022 nlines,fn,nwins);
2023 for (i=0;i<nlines;i++){
2024 if (window[i].nPull != ny)
2025 gmx_fatal(FARGS,"You are providing autocorrelation times with option -iiact and the\n"
2026 "number of pull groups is different in different simulations. That is not\n"
2027 "supported yet. Sorry.\n");
2028 for (ig=0;ig<window[i].nPull;ig++){
2029 /* compare Kumar et al, J Comp Chem 13, 1011-1021 (1992) */
2030 window[i].g[ig]=1+2*iact[ig][i]/window[i].dt;
2032 if (iact[ig][i] <= 0.0)
2033 fprintf(stderr,"\nWARNING, IACT = %f (window %d, group %d)\n", iact[ig][i],i,ig);
2039 /* Smooth autocorreltion times along the reaction coordinate. This is useful
2040 if the ACT is subject to high uncertainty in case if limited sampling. Note
2041 that -in case of limited sampling- the ACT may be severely underestimated.
2042 Note: the g=1+2tau are overwritten.
2043 if opt->bAllowReduceIact==FALSE, the ACTs are never reduced, only increased
2044 by the smoothing
2046 void smoothIact(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2048 int i,ig,j,jg;
2049 double pos,dpos2,siglim,siglim2,gaufact,invtwosig2,w,weight,tausm;
2051 /* only evaluate within +- 3sigma of the Gausian */
2052 siglim=3.0*opt->sigSmoothIact;
2053 siglim2=dsqr(siglim);
2054 /* pre-factor of Gaussian */
2055 gaufact=1.0/(sqrt(2*M_PI)*opt->sigSmoothIact);
2056 invtwosig2=0.5/dsqr(opt->sigSmoothIact);
2058 for (i=0;i<nwins;i++)
2060 snew(window[i].tausmooth,window[i].nPull);
2061 for (ig=0;ig<window[i].nPull;ig++)
2063 tausm=0.;
2064 weight=0;
2065 pos=window[i].pos[ig];
2066 for (j=0;j<nwins;j++)
2068 for (jg=0;jg<window[j].nPull;jg++)
2070 dpos2=dsqr(window[j].pos[jg]-pos);
2071 if (dpos2<siglim2){
2072 w=gaufact*exp(-dpos2*invtwosig2);
2073 weight+=w;
2074 tausm+=w*window[j].tau[jg];
2075 /*printf("Weight %g dpos2=%g pos=%g gaufact=%g invtwosig2=%g\n",
2076 w,dpos2,pos,gaufact,invtwosig2); */
2080 tausm/=weight;
2081 if (opt->bAllowReduceIact || tausm>window[i].tau[ig])
2082 window[i].tausmooth[ig]=tausm;
2083 else
2084 window[i].tausmooth[ig]=window[i].tau[ig];
2085 window[i].g[ig] = 1+2*tausm/window[i].dt;
2090 /* try to compute the autocorrelation time for each umbrealla window */
2091 #define WHAM_AC_ZERO_LIMIT 0.05
2092 void calcIntegratedAutocorrelationTimes(t_UmbrellaWindow *window,int nwins,
2093 t_UmbrellaOptions *opt, const char *fn)
2095 int i,ig,ncorr,ntot,j,k,*count,restart;
2096 real *corr,c0,dt,timemax,tmp;
2097 real *ztime,av,tausteps;
2098 FILE *fp,*fpcorr=0;
2100 if (opt->verbose)
2101 fpcorr=xvgropen("hist_autocorr.xvg","Autocorrelation functions of umbrella windows",
2102 "time [ps]","autocorrelation function",opt->oenv);
2104 printf("\n");
2105 for (i=0;i<nwins;i++)
2107 printf("\rEstimating integrated autocorreltion times ... [%2.0f%%] ...",100.*(i+1)/nwins);
2108 fflush(stdout);
2109 ntot=window[i].Ntot[0];
2111 /* using half the maximum time as length of autocorrelation function */
2112 ncorr=ntot/2;
2113 if (ntot<10)
2114 gmx_fatal(FARGS,"Tryig to estimtate autocorrelation time from only %d"
2115 " points. Provide more pull data!",ntot);
2116 snew(corr,ncorr);
2117 /* snew(corrSq,ncorr); */
2118 snew(count,ncorr);
2119 dt=window[i].dt;
2120 timemax=dt*ncorr;
2121 snew(window[i].tau,window[i].nPull);
2122 restart=(int)(opt->acTrestart/dt+0.5);
2123 if (restart==0)
2124 restart=1;
2126 for (ig=0;ig<window[i].nPull;ig++)
2128 if (ntot != window[i].Ntot[ig])
2129 gmx_fatal(FARGS,"Encountered different nr of frames in different pull groups.\n"
2130 "That should not happen. (%d and %d)\n", ntot,window[i].Ntot[ig]);
2131 ztime=window[i].ztime[ig];
2133 /* calc autocorrelation function C(t) = < [z(tau)-<z>]*[z(tau+t)-<z>]> */
2134 for(j=0, av=0; (j<ntot); j++)
2135 av+=ztime[j];
2136 av/=ntot;
2137 for(k=0; (k<ncorr); k++)
2139 corr[k]=0.;
2140 count[k]=0;
2142 for(j=0; (j<ntot); j+=restart)
2143 for(k=0; (k<ncorr) && (j+k < ntot); k++)
2145 tmp=(ztime[j]-av)*(ztime[j+k]-av);
2146 corr [k] += tmp;
2147 /* corrSq[k] += tmp*tmp; */
2148 count[k]++;
2150 /* divide by nr of frames for each time displacement */
2151 for(k=0; (k<ncorr); k++)
2153 /* count probably = (ncorr-k+(restart-1))/restart; */
2154 corr[k] = corr[k]/count[k];
2155 /* variance of autocorrelation function */
2156 /* corrSq[k]=corrSq[k]/count[k]; */
2158 /* normalize such that corr[0] == 0 */
2159 c0=1./corr[0];
2160 for(k=0; (k<ncorr); k++)
2162 corr[k]*=c0;
2163 /* corrSq[k]*=c0*c0; */
2166 /* write ACFs in verbose mode */
2167 if (fpcorr)
2169 for(k=0; (k<ncorr); k++)
2170 fprintf(fpcorr,"%g %g\n",k*dt,corr[k]);
2171 fprintf(fpcorr,"&\n");
2174 /* esimate integrated correlation time, fitting is too unstable */
2175 tausteps = 0.5*corr[0];
2176 /* consider corr below WHAM_AC_ZERO_LIMIT as noise */
2177 for(j=1; (j<ncorr) && (corr[j]>WHAM_AC_ZERO_LIMIT); j++)
2178 tausteps += corr[j];
2180 /* g = 1+2*tau, see. Ferrenberg/Swendsen, PRL 63:1195 (1989) or
2181 Kumar et al, eq. 28 ff. */
2182 window[i].tau[ig] = tausteps*dt;
2183 window[i].g[ig] = 1+2*tausteps;
2184 /* printf("win %d, group %d, estimated correlation time = %g ps\n",i,ig,window[i].tau[ig]); */
2185 } /* ig loop */
2186 sfree(corr);
2187 sfree(count);
2189 printf(" done\n");
2190 if (fpcorr)
2191 ffclose(fpcorr);
2193 /* plot IACT along reaction coordinate */
2194 fp=xvgropen(fn,"Integrated autocorrelation times","z","IACT [ps]",opt->oenv);
2195 fprintf(fp,"@ s0 symbol 1\n@ s0 symbol size 0.5\n@ s0 line linestyle 0\n");
2196 fprintf(fp,"# WIN tau(gr1) tau(gr2) ...\n");
2197 for (i=0;i<nwins;i++)
2199 fprintf(fp,"# %3d ",i);
2200 for (ig=0;ig<window[i].nPull;ig++)
2201 fprintf(fp," %11g",window[i].tau[ig]);
2202 fprintf(fp,"\n");
2204 for (i=0;i<nwins;i++)
2205 for (ig=0;ig<window[i].nPull;ig++)
2206 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tau[ig]);
2207 if (opt->sigSmoothIact > 0.0)
2209 printf("Smoothing autocorrelation times along reaction coordinate with Gaussian of sig = %g\n",
2210 opt->sigSmoothIact);
2211 /* smooth IACT along reaction coordinate and overwrite g=1+2tau */
2212 smoothIact(window,nwins,opt);
2213 fprintf(fp,"&\n@ s1 symbol 1\n@ s1 symbol size 0.5\n@ s1 line linestyle 0\n");
2214 fprintf(fp,"@ s1 symbol color 2\n");
2215 for (i=0;i<nwins;i++)
2216 for (ig=0;ig<window[i].nPull;ig++)
2217 fprintf(fp,"%8g %8g\n",window[i].pos[ig],window[i].tausmooth[ig]);
2219 ffclose(fp);
2220 printf("Wrote %s\n",fn);
2223 /* compute average and sigma of each umbrella window */
2224 void averageSigma(t_UmbrellaWindow *window,int nwins,t_UmbrellaOptions *opt)
2226 int i,ig,ntot,k;
2227 real av,sum2,sig,diff,*ztime,nSamplesIndep;
2229 for (i=0;i<nwins;i++)
2231 snew(window[i].aver, window[i].nPull);
2232 snew(window[i].sigma,window[i].nPull);
2234 ntot=window[i].Ntot[0];
2235 for (ig=0;ig<window[i].nPull;ig++)
2237 ztime=window[i].ztime[ig];
2238 for (k=0, av=0.; k<ntot; k++)
2239 av+=ztime[k];
2240 av/=ntot;
2241 for (k=0, sum2=0.; k<ntot; k++)
2243 diff=ztime[k]-av;
2244 sum2+=diff*diff;
2246 sig=sqrt(sum2/ntot);
2247 window[i].aver[ig]=av;
2249 /* Note: This estimate for sigma is biased from the limited sampling.
2250 Correct sigma by n/(n-1) where n = number of independent
2251 samples. Only possible if IACT is known.
2253 if (window[i].tau)
2255 nSamplesIndep=window[i].N[ig]/(window[i].tau[ig]/window[i].dt);
2256 window[i].sigma[ig]=sig * nSamplesIndep/(nSamplesIndep-1);
2258 else
2259 window[i].sigma[ig]=sig;
2260 printf("win %d, aver = %f sig = %f\n",i,av,window[i].sigma[ig]);
2266 /* Use histograms to compute average force on pull group.
2267 In addition, compute the sigma of the histogram.
2269 void computeAverageForce(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt)
2271 int i,j,bins=opt->bins,k;
2272 double dz,min=opt->min,max=opt->max,displAv,displAv2,temp,distance,ztot,ztot_half,w,weight;
2273 double posmirrored;
2275 dz=(max-min)/bins;
2276 ztot=opt->max-min;
2277 ztot_half=ztot/2;
2279 /* Compute average displacement from histograms */
2280 for(j=0;j<nWindows;++j)
2282 snew(window[j].forceAv,window[j].nPull);
2283 for(k=0;k<window[j].nPull;++k)
2285 displAv = 0.0;
2286 displAv2 = 0.0;
2287 weight = 0.0;
2288 for(i=0;i<opt->bins;++i)
2290 temp=(1.0*i+0.5)*dz+min;
2291 distance = temp - window[j].pos[k];
2292 if (opt->bCycl)
2293 { /* in cyclic wham: */
2294 if (distance > ztot_half) /* |distance| < ztot_half */
2295 distance-=ztot;
2296 else if (distance < -ztot_half)
2297 distance+=ztot;
2299 w=window[j].Histo[k][i]/window[j].g[k];
2300 displAv += w*distance;
2301 displAv2 += w*sqr(distance);
2302 weight+=w;
2303 /* Are we near min or max? We are getting wron forces from the histgrams since
2304 the histigrams are zero outside [min,max). Therefore, assume that the position
2305 on the other side of the histomgram center is equally likely. */
2306 if (!opt->bCycl)
2308 posmirrored=window[j].pos[k]-distance;
2309 if (posmirrored>=max || posmirrored<min)
2311 displAv += -w*distance;
2312 displAv2 += w*sqr(-distance);
2313 weight+=w;
2317 displAv /= weight;
2318 displAv2 /= weight;
2320 /* average force from average displacement */
2321 window[j].forceAv[k] = displAv*window[j].k[k];
2322 /* sigma from average square displacement */
2323 /* window[j].sigma [k] = sqrt(displAv2); */
2324 /* printf("Win %d, sigma = %f\n",j,sqrt(displAv2)); */
2329 /* Check if the complete reaction coordinate is covered by the histograms */
2330 void checkReactionCoordinateCovered(t_UmbrellaWindow *window,int nwins,
2331 t_UmbrellaOptions *opt)
2333 int i,ig,j,bins=opt->bins,bBoundary;
2334 real avcount=0,z,relcount,*count;
2335 snew(count,opt->bins);
2337 for(j=0;j<opt->bins;++j)
2339 for (i=0;i<nwins;i++){
2340 for (ig=0;ig<window[i].nPull;ig++)
2341 count[j]+=window[i].Histo[ig][j];
2343 avcount+=1.0*count[j];
2345 avcount/=bins;
2346 for(j=0;j<bins;++j)
2348 relcount=count[j]/avcount;
2349 z=(j+0.5)*opt->dz+opt->min;
2350 bBoundary=( j<bins/20 || (bins-j)>bins/20 );
2351 /* check for bins with no data */
2352 if (count[j] == 0)
2353 fprintf(stderr, "\nWARNING, no data point in bin %d (z=%g) !\n"
2354 "You may not get a reasonable profile. Check your histograms!\n",j,z);
2355 /* and check for poor sampling */
2356 else if (relcount<0.005 && !bBoundary)
2357 fprintf(stderr, "Warning, poor sampling bin %d (z=%g). Check your histograms!\n",j,z);
2359 sfree(count);
2363 void guessPotByIntegration(t_UmbrellaWindow *window,int nWindows,t_UmbrellaOptions *opt,
2364 char *fn)
2366 int i,j,ig,bins=opt->bins,nHist,winmin,groupmin;
2367 double dz,min=opt->min,*pot,pos,hispos,dist,diff,fAv,distmin,*f;
2368 FILE *fp;
2370 dz=(opt->max-min)/bins;
2372 printf("Getting initial potential by integration.\n");
2374 /* Compute average displacement from histograms */
2375 computeAverageForce(window,nWindows,opt);
2377 /* Get force for each bin from all histograms in this bin, or, alternatively,
2378 if no histograms are inside this bin, from the closest histogram */
2379 snew(pot,bins);
2380 snew(f,bins);
2381 for(j=0;j<opt->bins;++j)
2383 pos=(1.0*j+0.5)*dz+min;
2384 nHist=0;
2385 fAv=0.;
2386 distmin=1e20;
2387 groupmin=winmin=0;
2388 for (i=0;i<nWindows;i++)
2390 for (ig=0;ig<window[i].nPull;ig++)
2392 hispos=window[i].pos[ig];
2393 dist=fabs(hispos-pos);
2394 /* average force within bin */
2395 if (dist < dz/2)
2397 nHist++;
2398 fAv+=window[i].forceAv[ig];
2400 /* at the same time, rememer closest histogram */
2401 if (dist<distmin){
2402 winmin=i;
2403 groupmin=ig;
2404 distmin=dist;
2408 /* if no histogram found in this bin, use closest histogram */
2409 if (nHist>0)
2410 fAv=fAv/nHist;
2411 else{
2412 fAv=window[winmin].forceAv[groupmin];
2414 f[j]=fAv;
2416 for(j=1;j<opt->bins;++j)
2417 pot[j] = pot[j-1] - 0.5*dz*(f[j-1]+f[j]);
2419 /* cyclic wham: linearly correct possible offset */
2420 if (opt->bCycl)
2422 diff=(pot[bins-1]-pot[0])/(bins-1);
2423 for(j=1;j<opt->bins;++j)
2424 pot[j]-=j*diff;
2426 if (opt->verbose)
2428 fp=xvgropen("pmfintegrated.xvg","PMF from force integration","z","PMF [kJ/mol]",opt->oenv);
2429 for(j=0;j<opt->bins;++j)
2430 fprintf(fp,"%g %g\n",(j+0.5)*dz+opt->min,pot[j]);
2431 ffclose(fp);
2432 printf("verbose mode: wrote %s with PMF from interated forces\n","pmfintegrated.xvg");
2435 /* get initial z=exp(-F[i]/kT) from integrated potential, where F[i] denote the free
2436 energy offsets which are usually determined by wham
2437 First: turn pot into probabilities:
2439 for(j=0;j<opt->bins;++j)
2440 pot[j]=exp(-pot[j]/(8.314e-3*opt->Temperature));
2441 calc_z(pot,window,nWindows,opt,TRUE);
2443 sfree(pot);
2444 sfree(f);
2448 int gmx_wham(int argc,char *argv[])
2450 const char *desc[] = {
2451 "This is an analysis program that implements the Weighted",
2452 "Histogram Analysis Method (WHAM). It is intended to analyze",
2453 "output files generated by umbrella sampling simulations to ",
2454 "compute a potential of mean force (PMF). [PAR] ",
2455 "At present, three input modes are supported.[BR]",
2456 "[TT]*[tt] With option [TT]-it[tt], the user provides a file which contains the",
2457 " file names of the umbrella simulation run-input files ([TT].tpr[tt] files),",
2458 " AND, with option [TT]-ix[tt], a file which contains file names of",
2459 " the pullx [TT]mdrun[tt] output files. The [TT].tpr[tt] and pullx files must",
2460 " be in corresponding order, i.e. the first [TT].tpr[tt] created the",
2461 " first pullx, etc.[BR]",
2462 "[TT]*[tt] Same as the previous input mode, except that the the user",
2463 " provides the pull force output file names ([TT]pullf.xvg[tt]) with option [TT]-if[tt].",
2464 " From the pull force the position in the umbrella potential is",
2465 " computed. This does not work with tabulated umbrella potentials.[BR]"
2466 "[TT]*[tt] With option [TT]-ip[tt], the user provides file names of (gzipped) [TT].pdo[tt] files, i.e.",
2467 " the GROMACS 3.3 umbrella output files. If you have some unusual"
2468 " reaction coordinate you may also generate your own [TT].pdo[tt] files and",
2469 " feed them with the [TT]-ip[tt] option into to [TT]g_wham[tt]. The [TT].pdo[tt] file header",
2470 " must be similar to the following:[PAR]",
2471 "[TT]# UMBRELLA 3.0[BR]",
2472 "# Component selection: 0 0 1[BR]",
2473 "# nSkip 1[BR]",
2474 "# Ref. Group 'TestAtom'[BR]",
2475 "# Nr. of pull groups 2[BR]",
2476 "# Group 1 'GR1' Umb. Pos. 5.0 Umb. Cons. 1000.0[BR]",
2477 "# Group 2 'GR2' Umb. Pos. 2.0 Umb. Cons. 500.0[BR]",
2478 "#####[tt][PAR]",
2479 "The number of pull groups, umbrella positions, force constants, and names ",
2480 "may (of course) differ. Following the header, a time column and ",
2481 "a data column for each pull group follows (i.e. the displacement",
2482 "with respect to the umbrella center). Up to four pull groups are possible ",
2483 "per [TT].pdo[tt] file at present.[PAR]",
2484 "By default, the output files are[BR]",
2485 " [TT]-o[tt] PMF output file[BR]",
2486 " [TT]-hist[tt] Histograms output file[BR]",
2487 "Always check whether the histograms sufficiently overlap.[PAR]",
2488 "The umbrella potential is assumed to be harmonic and the force constants are ",
2489 "read from the [TT].tpr[tt] or [TT].pdo[tt] files. If a non-harmonic umbrella force was applied ",
2490 "a tabulated potential can be provided with [TT]-tab[tt].[PAR]",
2491 "WHAM OPTIONS[BR]------------[BR]",
2492 " [TT]-bins[tt] Number of bins used in analysis[BR]",
2493 " [TT]-temp[tt] Temperature in the simulations[BR]",
2494 " [TT]-tol[tt] Stop iteration if profile (probability) changed less than tolerance[BR]",
2495 " [TT]-auto[tt] Automatic determination of boundaries[BR]",
2496 " [TT]-min,-max[tt] Boundaries of the profile [BR]",
2497 "The data points that are used to compute the profile",
2498 "can be restricted with options [TT]-b[tt], [TT]-e[tt], and [TT]-dt[tt]. ",
2499 "Adjust [TT]-b[tt] to ensure sufficient equilibration in each ",
2500 "umbrella window.[PAR]",
2501 "With [TT]-log[tt] (default) the profile is written in energy units, otherwise ",
2502 "(with [TT]-nolog[tt]) as probability. The unit can be specified with [TT]-unit[tt]. ",
2503 "With energy output, the energy in the first bin is defined to be zero. ",
2504 "If you want the free energy at a different ",
2505 "position to be zero, set [TT]-zprof0[tt] (useful with bootstrapping, see below).[PAR]",
2506 "For cyclic or periodic reaction coordinates (dihedral angle, channel PMF",
2507 "without osmotic gradient), the option [TT]-cycl[tt] is useful. [TT]g_wham[tt] will make use of the ",
2508 "periodicity of the system and generate a periodic PMF. The first and the last bin of the",
2509 "reaction coordinate will assumed be be neighbors.[PAR]",
2510 "Option [TT]-sym[tt] symmetrizes the profile around z=0 before output, ",
2511 "which may be useful for, e.g. membranes.[PAR]",
2512 "AUTOCORRELATIONS[BR]----------------[BR]",
2513 "With [TT]-ac[tt], [TT]g_wham[tt] estimates the integrated autocorrelation ",
2514 "time (IACT) [GRK]tau[grk] for each umbrella window and weights the respective ",
2515 "window with 1/[1+2*[GRK]tau[grk]/dt]. The IACTs are written ",
2516 "to the file defined with [TT]-oiact[tt]. In verbose mode, all ",
2517 "autocorrelation functions (ACFs) are written to [TT]hist_autocorr.xvg[tt]. ",
2518 "Because the IACTs can be severely underestimated in case of limited ",
2519 "sampling, option [TT]-acsig[tt] allows one to smooth the IACTs along the ",
2520 "reaction coordinate with a Gaussian ([GRK]sigma[grk] provided with [TT]-acsig[tt], ",
2521 "see output in [TT]iact.xvg[tt]). Note that the IACTs are estimated by simple ",
2522 "integration of the ACFs while the ACFs are larger 0.05.",
2523 "If you prefer to compute the IACTs by a more sophisticated (but possibly ",
2524 "less robust) method such as fitting to a double exponential, you can ",
2525 "compute the IACTs with [TT]g_analyze[tt] and provide them to [TT]g_wham[tt] with the file ",
2526 "[TT]iact-in.dat[tt] (option [TT]-iiact[tt]), which should contain one line per ",
2527 "input file ([TT].pdo[tt] or pullx/f file) and one column per pull group in the respective file.[PAR]",
2528 "ERROR ANALYSIS[BR]--------------[BR]",
2529 "Statistical errors may be estimated with bootstrap analysis. Use it with care, ",
2530 "otherwise the statistical error may be substantially underestimated. ",
2531 "More background and examples for the bootstrap technique can be found in ",
2532 "Hub, de Groot and Van der Spoel, JCTC (2010) 6: 3713-3720.[BR]",
2533 "[TT]-nBootstrap[tt] defines the number of bootstraps (use, e.g., 100). ",
2534 "Four bootstrapping methods are supported and ",
2535 "selected with [TT]-bs-method[tt].[BR]",
2536 " (1) [TT]b-hist[tt] Default: complete histograms are considered as independent ",
2537 "data points, and the bootstrap is carried out by assigning random weights to the ",
2538 "histograms (\"Bayesian bootstrap\"). Note that each point along the reaction coordinate",
2539 "must be covered by multiple independent histograms (e.g. 10 histograms), otherwise the ",
2540 "statistical error is underestimated.[BR]",
2541 " (2) [TT]hist[tt] Complete histograms are considered as independent data points. ",
2542 "For each bootstrap, N histograms are randomly chosen from the N given histograms ",
2543 "(allowing duplication, i.e. sampling with replacement).",
2544 "To avoid gaps without data along the reaction coordinate blocks of histograms ",
2545 "([TT]-histbs-block[tt]) may be defined. In that case, the given histograms are ",
2546 "divided into blocks and only histograms within each block are mixed. Note that ",
2547 "the histograms within each block must be representative for all possible histograms, ",
2548 "otherwise the statistical error is underestimated.[BR]",
2549 " (3) [TT]traj[tt] The given histograms are used to generate new random trajectories,",
2550 "such that the generated data points are distributed according the given histograms ",
2551 "and properly autocorrelated. The autocorrelation time (ACT) for each window must be ",
2552 "known, so use [TT]-ac[tt] or provide the ACT with [TT]-iiact[tt]. If the ACT of all ",
2553 "windows are identical (and known), you can also provide them with [TT]-bs-tau[tt]. ",
2554 "Note that this method may severely underestimate the error in case of limited sampling, ",
2555 "that is if individual histograms do not represent the complete phase space at ",
2556 "the respective positions.[BR]",
2557 " (4) [TT]traj-gauss[tt] The same as method [TT]traj[tt], but the trajectories are ",
2558 "not bootstrapped from the umbrella histograms but from Gaussians with the average ",
2559 "and width of the umbrella histograms. That method yields similar error estimates ",
2560 "like method [TT]traj[tt].[PAR]"
2561 "Bootstrapping output:[BR]",
2562 " [TT]-bsres[tt] Average profile and standard deviations[BR]",
2563 " [TT]-bsprof[tt] All bootstrapping profiles[BR]",
2564 "With [TT]-vbs[tt] (verbose bootstrapping), the histograms of each bootstrap are written, ",
2565 "and, with bootstrap method [TT]traj[tt], the cumulative distribution functions of ",
2566 "the histograms."
2569 const char *en_unit[]={NULL,"kJ","kCal","kT",NULL};
2570 const char *en_unit_label[]={"","E (kJ mol\\S-1\\N)","E (kcal mol\\S-1\\N)","E (kT)",NULL};
2571 const char *en_bsMethod[]={ NULL,"b-hist", "hist", "traj", "traj-gauss", NULL };
2573 static t_UmbrellaOptions opt;
2575 t_pargs pa[] = {
2576 { "-min", FALSE, etREAL, {&opt.min},
2577 "Minimum coordinate in profile"},
2578 { "-max", FALSE, etREAL, {&opt.max},
2579 "Maximum coordinate in profile"},
2580 { "-auto", FALSE, etBOOL, {&opt.bAuto},
2581 "Determine min and max automatically"},
2582 { "-bins",FALSE, etINT, {&opt.bins},
2583 "Number of bins in profile"},
2584 { "-temp", FALSE, etREAL, {&opt.Temperature},
2585 "Temperature"},
2586 { "-tol", FALSE, etREAL, {&opt.Tolerance},
2587 "Tolerance"},
2588 { "-v", FALSE, etBOOL, {&opt.verbose},
2589 "Verbose mode"},
2590 { "-b", FALSE, etREAL, {&opt.tmin},
2591 "First time to analyse (ps)"},
2592 { "-e", FALSE, etREAL, {&opt.tmax},
2593 "Last time to analyse (ps)"},
2594 { "-dt", FALSE, etREAL, {&opt.dt},
2595 "Analyse only every dt ps"},
2596 { "-histonly", FALSE, etBOOL, {&opt.bHistOnly},
2597 "Write histograms and exit"},
2598 { "-boundsonly", FALSE, etBOOL, {&opt.bBoundsOnly},
2599 "Determine min and max and exit (with [TT]-auto[tt])"},
2600 { "-log", FALSE, etBOOL, {&opt.bLog},
2601 "Calculate the log of the profile before printing"},
2602 { "-unit", FALSE, etENUM, {en_unit},
2603 "Energy unit in case of log output" },
2604 { "-zprof0", FALSE, etREAL, {&opt.zProf0},
2605 "Define profile to 0.0 at this position (with [TT]-log[tt])"},
2606 { "-cycl", FALSE, etBOOL, {&opt.bCycl},
2607 "Create cyclic/periodic profile. Assumes min and max are the same point."},
2608 { "-sym", FALSE, etBOOL, {&opt.bSym},
2609 "Symmetrize profile around z=0"},
2610 { "-hist-eq", FALSE, etBOOL, {&opt.bHistEq},
2611 "HIDDENEnforce equal weight for all histograms. (Non-Weighed-HAM)"},
2612 { "-ac", FALSE, etBOOL, {&opt.bCalcTauInt},
2613 "Calculate integrated autocorrelation times and use in wham"},
2614 { "-acsig", FALSE, etREAL, {&opt.sigSmoothIact},
2615 "Smooth autocorrelation times along reaction coordinate with Gaussian of this [GRK]sigma[grk]"},
2616 { "-ac-trestart", FALSE, etREAL, {&opt.acTrestart},
2617 "When computing autocorrelation functions, restart computing every .. (ps)"},
2618 { "-acred", FALSE, etBOOL, {&opt.bAllowReduceIact},
2619 "HIDDENWhen smoothing the ACTs, allow to reduce ACTs. Otherwise, only increase ACTs "
2620 "during smoothing"},
2621 { "-nBootstrap", FALSE, etINT, {&opt.nBootStrap},
2622 "nr of bootstraps to estimate statistical uncertainty (e.g., 200)" },
2623 { "-bs-method", FALSE, etENUM, {en_bsMethod},
2624 "Bootstrap method" },
2625 { "-bs-tau", FALSE, etREAL, {&opt.tauBootStrap},
2626 "Autocorrelation time (ACT) assumed for all histograms. Use option [TT]-ac[tt] if ACT is unknown."},
2627 { "-bs-seed", FALSE, etINT, {&opt.bsSeed},
2628 "Seed for bootstrapping. (-1 = use time)"},
2629 { "-histbs-block", FALSE, etINT, {&opt.histBootStrapBlockLength},
2630 "When mixing histograms only mix within blocks of [TT]-histbs-block[tt]."},
2631 { "-vbs", FALSE, etBOOL, {&opt.bs_verbose},
2632 "Verbose bootstrapping. Print the CDFs and a histogram file for each bootstrap."},
2633 { "-stepout", FALSE, etINT, {&opt.stepchange},
2634 "HIDDENWrite maximum change every ... (set to 1 with [TT]-v[tt])"},
2635 { "-updateContr", FALSE, etINT, {&opt.stepUpdateContrib},
2636 "HIDDENUpdate table with significan contributions to WHAM every ... iterations"},
2639 t_filenm fnm[] = {
2640 { efDAT, "-ix","pullx-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2641 { efDAT, "-if","pullf-files",ffOPTRD}, /* wham input: pullf.xvg's and tprs */
2642 { efDAT, "-it","tpr-files",ffOPTRD}, /* wham input: tprs */
2643 { efDAT, "-ip","pdo-files",ffOPTRD}, /* wham input: pdo files (gmx3 style) */
2644 { efXVG, "-o", "profile", ffWRITE }, /* output file for profile */
2645 { efXVG, "-hist","histo", ffWRITE}, /* output file for histograms */
2646 { efXVG, "-oiact","iact",ffOPTWR}, /* writing integrated autocorrelation times */
2647 { efDAT, "-iiact","iact-in",ffOPTRD}, /* reading integrated autocorrelation times */
2648 { efXVG, "-bsres","bsResult", ffOPTWR}, /* average and errors of bootstrap analysis */
2649 { efXVG, "-bsprof","bsProfs", ffOPTWR}, /* output file for bootstrap profiles */
2650 { efDAT, "-tab","umb-pot",ffOPTRD}, /* Tabulated umbrella potential (if not harmonic) */
2653 int i,j,l,nfiles,nwins,nfiles2;
2654 t_UmbrellaHeader header;
2655 t_UmbrellaWindow * window=NULL;
2656 double *profile,maxchange=1e20;
2657 gmx_bool bMinSet,bMaxSet,bAutoSet,bExact=FALSE;
2658 char **fninTpr,**fninPull,**fninPdo;
2659 const char *fnPull;
2660 FILE *histout,*profout;
2661 char ylabel[256],title[256];
2663 #define NFILE asize(fnm)
2665 CopyRight(stderr,argv[0]);
2667 opt.bins=200;
2668 opt.verbose=FALSE;
2669 opt.bHistOnly=FALSE;
2670 opt.bCycl=FALSE;
2671 opt.tmin=50;
2672 opt.tmax=1e20;
2673 opt.dt=0.0;
2674 opt.min=0;
2675 opt.max=0;
2676 opt.bAuto=TRUE;
2678 /* bootstrapping stuff */
2679 opt.nBootStrap=0;
2680 opt.bsMethod=bsMethod_hist;
2681 opt.tauBootStrap=0.0;
2682 opt.bsSeed=-1;
2683 opt.histBootStrapBlockLength=8;
2684 opt.bs_verbose=FALSE;
2686 opt.bLog=TRUE;
2687 opt.unit=en_kJ;
2688 opt.zProf0=0.;
2689 opt.Temperature=298;
2690 opt.Tolerance=1e-6;
2691 opt.bBoundsOnly=FALSE;
2692 opt.bSym=FALSE;
2693 opt.bCalcTauInt=FALSE;
2694 opt.sigSmoothIact=0.0;
2695 opt.bAllowReduceIact=TRUE;
2696 opt.bInitPotByIntegration=TRUE;
2697 opt.acTrestart=1.0;
2698 opt.stepchange=100;
2699 opt.stepUpdateContrib=100;
2701 parse_common_args(&argc,argv,PCA_BE_NICE,
2702 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&opt.oenv);
2704 opt.unit=nenum(en_unit);
2705 opt.bsMethod=nenum(en_bsMethod);
2707 opt.bProf0Set=opt2parg_bSet("-zprof0", asize(pa), pa);
2709 opt.bTab=opt2bSet("-tab",NFILE,fnm);
2710 opt.bPdo=opt2bSet("-ip",NFILE,fnm);
2711 opt.bTpr=opt2bSet("-it",NFILE,fnm);
2712 opt.bPullx=opt2bSet("-ix",NFILE,fnm);
2713 opt.bPullf=opt2bSet("-if",NFILE,fnm);
2714 opt.bTauIntGiven=opt2bSet("-iiact",NFILE,fnm);
2715 if (opt.bTab && opt.bPullf)
2716 gmx_fatal(FARGS,"Force input does not work with tabulated potentials. "
2717 "Provide pullx.xvg or pdo files!");
2719 #define WHAMBOOLXOR(a,b) ( ((!(a))&&(b)) || ((a)&&(!(b))))
2720 if (!opt.bPdo && !WHAMBOOLXOR(opt.bPullx,opt.bPullf))
2721 gmx_fatal(FARGS,"Give either pullx (-ix) OR pullf (-if) data. Not both.");
2722 if ( !opt.bPdo && !(opt.bTpr || opt.bPullf || opt.bPullx))
2723 gmx_fatal(FARGS,"g_wham supports three input modes, pullx, pullf, or pdo file input."
2724 "\n\n Check g_wham -h !");
2726 opt.fnPdo=opt2fn("-ip",NFILE,fnm);
2727 opt.fnTpr=opt2fn("-it",NFILE,fnm);
2728 opt.fnPullf=opt2fn("-if",NFILE,fnm);
2729 opt.fnPullx=opt2fn("-ix",NFILE,fnm);
2731 bMinSet = opt2parg_bSet("-min", asize(pa), pa);
2732 bMaxSet = opt2parg_bSet("-max", asize(pa), pa);
2733 bAutoSet = opt2parg_bSet("-auto", asize(pa), pa);
2734 if ( (bMinSet || bMaxSet) && bAutoSet)
2735 gmx_fatal(FARGS,"With -auto, do not give -min or -max\n");
2737 if ( (bMinSet && !bMaxSet) || (!bMinSet && bMaxSet))
2738 gmx_fatal(FARGS,"When giving -min, you must give -max (and vice versa), too\n");
2740 if (bMinSet && opt.bAuto)
2742 printf("Note: min and max given, switching off -auto.\n");
2743 opt.bAuto=FALSE;
2746 if (opt.bTauIntGiven && opt.bCalcTauInt)
2747 gmx_fatal(FARGS,"Either read (option -iiact) or calculate (option -ac) the\n"
2748 "the autocorrelation times. Not both.");
2750 if (opt.tauBootStrap>0.0 && opt2parg_bSet("-ac",asize(pa), pa))
2751 gmx_fatal(FARGS,"Either compute autocorrelation times (ACTs) (option -ac) or "
2752 "provide it with -bs-tau for bootstrapping. Not Both.\n");
2753 if (opt.tauBootStrap>0.0 && opt2bSet("-iiact",NFILE,fnm))
2754 gmx_fatal(FARGS,"Either provide autocorrelation times (ACTs) with file iact-in.dat "
2755 "(option -iiact) or define all ACTs with -bs-tau for bootstrapping\n. Not Both.");
2758 /* Reading gmx4 pull output and tpr files */
2759 if (opt.bTpr || opt.bPullf || opt.bPullx)
2761 read_wham_in(opt.fnTpr,&fninTpr,&nfiles,&opt);
2763 fnPull=opt.bPullf ? opt.fnPullf : opt.fnPullx;
2764 read_wham_in(fnPull,&fninPull,&nfiles2,&opt);
2765 printf("Found %d tpr and %d pull %s files in %s and %s, respectively\n",
2766 nfiles,nfiles2,opt.bPullf ? "force" : "position",opt.fnTpr,fnPull);
2767 if (nfiles!=nfiles2)
2768 gmx_fatal(FARGS,"Found %d file names in %s, but %d in %s\n",nfiles,
2769 opt.fnTpr,nfiles2,fnPull);
2770 window=initUmbrellaWindows(nfiles);
2771 read_tpr_pullxf_files(fninTpr,fninPull,nfiles, &header, window, &opt);
2773 else
2774 { /* reading pdo files */
2775 read_wham_in(opt.fnPdo,&fninPdo,&nfiles,&opt);
2776 printf("Found %d pdo files in %s\n",nfiles,opt.fnPdo);
2777 window=initUmbrellaWindows(nfiles);
2778 read_pdo_files(fninPdo,nfiles, &header, window, &opt);
2780 nwins=nfiles;
2782 /* enforce equal weight for all histograms? */
2783 if (opt.bHistEq)
2784 enforceEqualWeights(window,nwins);
2786 /* write histograms */
2787 histout=xvgropen(opt2fn("-hist",NFILE,fnm),"Umbrella histograms",
2788 "z","count",opt.oenv);
2789 for(l=0;l<opt.bins;++l)
2791 fprintf(histout,"%e\t",(double)(l+0.5)/opt.bins*(opt.max-opt.min)+opt.min);
2792 for(i=0;i<nwins;++i)
2794 for(j=0;j<window[i].nPull;++j)
2796 fprintf(histout,"%e\t",window[i].Histo[j][l]);
2799 fprintf(histout,"\n");
2801 ffclose(histout);
2802 printf("Wrote %s\n",opt2fn("-hist",NFILE,fnm));
2803 if (opt.bHistOnly)
2805 printf("Wrote histograms to %s, now exiting.\n",opt2fn("-hist",NFILE,fnm));
2806 return 0;
2809 /* Using tabulated umbrella potential */
2810 if (opt.bTab)
2811 setup_tab(opt2fn("-tab",NFILE,fnm),&opt);
2813 /* Integrated autocorrelation times provided ? */
2814 if (opt.bTauIntGiven)
2815 readIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-iiact",NFILE,fnm));
2817 /* Compute integrated autocorrelation times */
2818 if (opt.bCalcTauInt)
2819 calcIntegratedAutocorrelationTimes(window,nwins,&opt,opt2fn("-oiact",NFILE,fnm));
2821 /* calc average and sigma for each histogram
2822 (maybe required for bootstrapping. If not, this is fast anyhow) */
2823 if (opt.nBootStrap && opt.bsMethod==bsMethod_trajGauss)
2824 averageSigma(window,nwins,&opt);
2826 /* Get initial potential by simple integration */
2827 if (opt.bInitPotByIntegration)
2828 guessPotByIntegration(window,nwins,&opt,0);
2830 /* Check if complete reaction coordinate is covered */
2831 checkReactionCoordinateCovered(window,nwins,&opt);
2833 /* Calculate profile */
2834 snew(profile,opt.bins);
2835 if (opt.verbose)
2836 opt.stepchange=1;
2837 i=0;
2840 if ( (i%opt.stepUpdateContrib) == 0)
2841 setup_acc_wham(profile,window,nwins,&opt);
2842 if (maxchange<opt.Tolerance)
2844 bExact=TRUE;
2845 /* if (opt.verbose) */
2846 printf("Switched to exact iteration in iteration %d\n",i);
2848 calc_profile(profile,window,nwins,&opt,bExact);
2849 if (((i%opt.stepchange) == 0 || i==1) && !i==0)
2850 printf("\t%4d) Maximum change %e\n",i,maxchange);
2851 i++;
2852 } while ( (maxchange=calc_z(profile, window, nwins, &opt,bExact)) > opt.Tolerance || !bExact);
2853 printf("Converged in %d iterations. Final maximum change %g\n",i,maxchange);
2855 /* calc error from Kumar's formula */
2856 /* Unclear how the error propagates along reaction coordinate, therefore
2857 commented out */
2858 /* calc_error_kumar(profile,window, nwins,&opt); */
2860 /* Write profile in energy units? */
2861 if (opt.bLog)
2863 prof_normalization_and_unit(profile,&opt);
2864 strcpy(ylabel,en_unit_label[opt.unit]);
2865 strcpy(title,"Umbrella potential");
2867 else
2869 strcpy(ylabel,"Density of states");
2870 strcpy(title,"Density of states");
2873 /* symmetrize profile around z=0? */
2874 if (opt.bSym)
2875 symmetrizeProfile(profile,&opt);
2877 /* write profile or density of states */
2878 profout=xvgropen(opt2fn("-o",NFILE,fnm),title,"z",ylabel,opt.oenv);
2879 for(i=0;i<opt.bins;++i)
2880 fprintf(profout,"%e\t%e\n",(double)(i+0.5)/opt.bins*(opt.max-opt.min)+opt.min,profile[i]);
2881 ffclose(profout);
2882 printf("Wrote %s\n",opt2fn("-o",NFILE,fnm));
2884 /* Bootstrap Method */
2885 if (opt.nBootStrap)
2886 do_bootstrapping(opt2fn("-bsres",NFILE,fnm),opt2fn("-bsprof",NFILE,fnm),
2887 opt2fn("-hist",NFILE,fnm),
2888 ylabel, profile, window, nwins, &opt);
2890 sfree(profile);
2891 freeUmbrellaWindows(window,nfiles);
2893 printf("\nIn case you use results from g_wham for a publication, please cite:\n");
2894 please_cite(stdout,"Hub2010");
2896 thanx(stderr);
2897 return 0;