added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / tools / gmx_analyze.c
blob97774b2d788dd9bfe0244ba0f322b6bb065f8372
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include "statutil.h"
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "macros.h"
46 #include "gmx_fatal.h"
47 #include "vec.h"
48 #include "copyrite.h"
49 #include "futil.h"
50 #include "readinp.h"
51 #include "statutil.h"
52 #include "txtdump.h"
53 #include "gstat.h"
54 #include "gmx_matrix.h"
55 #include "gmx_statistics.h"
56 #include "xvgr.h"
57 #include "gmx_ana.h"
58 #include "geminate.h"
60 /* must correspond to char *avbar_opt[] declared in main() */
61 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
63 static void power_fit(int n,int nset,real **val,real *t)
65 real *x,*y,quality,a,b,r;
66 int s,i;
68 snew(x,n);
69 snew(y,n);
71 if (t[0]>0) {
72 for(i=0; i<n; i++)
73 if (t[0]>0)
74 x[i] = log(t[i]);
75 } else {
76 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
77 for(i=0; i<n; i++)
78 x[i] = log(i+1);
81 for(s=0; s<nset; s++) {
82 i=0;
83 for(i=0; i<n && val[s][i]>=0; i++)
84 y[i] = log(val[s][i]);
85 if (i < n)
86 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
87 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
88 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
89 s+1,quality,a,exp(b));
92 sfree(y);
93 sfree(x);
96 static real cosine_content(int nhp,int n,real *y)
97 /* Assumes n equidistant points */
99 double fac,cosyint,yyint;
100 int i;
102 if (n < 2)
103 return 0;
105 fac = M_PI*nhp/(n-1);
107 cosyint = 0;
108 yyint = 0;
109 for(i=0; i<n; i++) {
110 cosyint += cos(fac*i)*y[i];
111 yyint += y[i]*y[i];
114 return 2*cosyint*cosyint/(n*yyint);
117 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
118 const output_env_t oenv)
120 FILE *fp;
121 int s;
122 real cc;
124 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
125 oenv);
127 for(s=0; s<nset; s++) {
128 cc = cosine_content(s+1,n,val[s]);
129 fprintf(fp," %d %g\n",s+1,cc);
130 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
131 s+1,0.5*(s+1),cc);
133 fprintf(stdout,"\n");
135 ffclose(fp);
138 static void regression_analysis(int n,gmx_bool bXYdy,
139 real *x,int nset,real **val)
141 real S,chi2,a,b,da,db,r=0;
142 int ok;
144 if (bXYdy || (nset == 1))
146 printf("Fitting data to a function f(x) = ax + b\n");
147 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
148 printf("Error estimates will be given if w_i (sigma) values are given\n");
149 printf("(use option -xydy).\n\n");
150 if (bXYdy)
152 if ((ok = lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S)) != estatsOK)
153 gmx_fatal(FARGS,"Error fitting the data: %s",
154 gmx_stats_message(ok));
156 else
158 if ((ok = lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S)) != estatsOK)
159 gmx_fatal(FARGS,"Error fitting the data: %s",
160 gmx_stats_message(ok));
162 chi2 = sqr((n-2)*S);
163 printf("Chi2 = %g\n",chi2);
164 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
165 printf("Correlation coefficient = %.1f%%\n",100*r);
166 printf("\n");
167 if (bXYdy) {
168 printf("a = %g +/- %g\n",a,da);
169 printf("b = %g +/- %g\n",b,db);
171 else {
172 printf("a = %g\n",a);
173 printf("b = %g\n",b);
176 else
178 double chi2,*a,**xx,*y;
179 int i,j;
181 snew(y,n);
182 snew(xx,nset-1);
183 for(j=0; (j<nset-1); j++)
184 snew(xx[j],n);
185 for(i=0; (i<n); i++)
187 y[i] = val[0][i];
188 for(j=1; (j<nset); j++)
189 xx[j-1][i] = val[j][i];
191 snew(a,nset-1);
192 chi2 = multi_regression(NULL,n,y,nset-1,xx,a);
193 printf("Fitting %d data points in %d sets\n",n,nset-1);
194 printf("chi2 = %g\n",chi2);
195 printf("A =");
196 for(i=0; (i<nset-1); i++)
198 printf(" %g",a[i]);
199 sfree(xx[i]);
201 printf("\n");
202 sfree(xx);
203 sfree(y);
204 sfree(a);
208 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
209 const output_env_t oenv)
211 FILE *fp;
212 int i,s;
213 double min,max;
214 int nbin;
215 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
216 long long int *histo;
217 #else
218 double *histo;
219 #endif
221 min = val[0][0];
222 max = val[0][0];
223 for(s=0; s<nset; s++)
224 for(i=0; i<n; i++)
225 if (val[s][i] < min)
226 min = val[s][i];
227 else if (val[s][i] > max)
228 max = val[s][i];
230 min = binwidth*floor(min/binwidth);
231 max = binwidth*ceil(max/binwidth);
232 if (min != 0)
233 min -= binwidth;
234 max += binwidth;
236 nbin = (int)((max - min)/binwidth + 0.5) + 1;
237 fprintf(stderr,"Making distributions with %d bins\n",nbin);
238 snew(histo,nbin);
239 fp = xvgropen(distfile,"Distribution","","",oenv);
240 for(s=0; s<nset; s++) {
241 for(i=0; i<nbin; i++)
242 histo[i] = 0;
243 for(i=0; i<n; i++)
244 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
245 for(i=0; i<nbin; i++)
246 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
247 if (s < nset-1)
248 fprintf(fp,"&\n");
250 ffclose(fp);
253 static int real_comp(const void *a,const void *b)
255 real dif = *(real *)a - *(real *)b;
257 if (dif < 0)
258 return -1;
259 else if (dif > 0)
260 return 1;
261 else
262 return 0;
265 static void average(const char *avfile,int avbar_opt,
266 int n, int nset,real **val,real *t)
268 FILE *fp;
269 int i,s,edge=0;
270 double av,var,err;
271 real *tmp=NULL;
273 fp = ffopen(avfile,"w");
274 if ((avbar_opt == avbarERROR) && (nset == 1))
275 avbar_opt = avbarNONE;
276 if (avbar_opt != avbarNONE) {
277 if (avbar_opt == avbar90) {
278 snew(tmp,nset);
279 fprintf(fp,"@TYPE xydydy\n");
280 edge = (int)(nset*0.05+0.5);
281 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
282 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
283 } else
284 fprintf(fp,"@TYPE xydy\n");
287 for(i=0; i<n; i++) {
288 av = 0;
289 for(s=0; s<nset; s++)
290 av += val[s][i];
291 av /= nset;
292 fprintf(fp," %g %g",t[i],av);
293 var = 0;
294 if (avbar_opt != avbarNONE) {
295 if (avbar_opt == avbar90) {
296 for(s=0; s<nset; s++)
297 tmp[s] = val[s][i];
298 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
299 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
300 } else {
301 for(s=0; s<nset; s++)
302 var += sqr(val[s][i]-av);
303 if (avbar_opt == avbarSTDDEV)
304 err = sqrt(var/nset);
305 else
306 err = sqrt(var/(nset*(nset-1)));
307 fprintf(fp," %g",err);
310 fprintf(fp,"\n");
312 ffclose(fp);
314 if (avbar_opt == avbar90)
315 sfree(tmp);
318 static real anal_ee_inf(real *parm,real T)
320 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
323 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
324 int nset, double *av,double *sig,real **val,real dt,
325 gmx_bool bFitAc,gmx_bool bSingleExpFit,gmx_bool bAllowNegLTCorr,
326 const output_env_t oenv)
328 FILE *fp;
329 int bs,prev_bs,nbs,nb;
330 real spacing,nbr;
331 int s,i,j;
332 double blav,var;
333 char **leg;
334 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
335 real fitparm[4];
336 real ee,a,tau1,tau2;
338 if (n < 4)
340 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
342 return;
345 fp = xvgropen(eefile,"Error estimates",
346 "Block size (time)","Error estimate", oenv);
347 if (output_env_get_print_xvgr_codes(oenv))
349 fprintf(fp,
350 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
351 (n-1)*dt,n);
353 snew(leg,2*nset);
354 xvgr_legend(fp,2*nset,(const char**)leg,oenv);
355 sfree(leg);
357 spacing = pow(2,1.0/resol);
358 snew(tbs,n);
359 snew(ybs,n);
360 snew(fitsig,n);
361 for(s=0; s<nset; s++)
363 nbs = 0;
364 prev_bs = 0;
365 nbr = nb_min;
366 while (nbr <= n)
368 bs = n/(int)nbr;
369 if (bs != prev_bs)
371 nb = n/bs;
372 var = 0;
373 for(i=0; i<nb; i++)
375 blav=0;
376 for (j=0; j<bs; j++)
378 blav += val[s][bs*i+j];
380 var += sqr(av[s] - blav/bs);
382 tbs[nbs] = bs*dt;
383 if (sig[s] == 0)
385 ybs[nbs] = 0;
387 else
389 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
391 nbs++;
393 nbr *= spacing;
394 nb = (int)(nbr+0.5);
395 prev_bs = bs;
397 if (sig[s] == 0)
399 ee = 0;
400 a = 1;
401 tau1 = 0;
402 tau2 = 0;
404 else
406 for(i=0; i<nbs/2; i++)
408 rtmp = tbs[i];
409 tbs[i] = tbs[nbs-1-i];
410 tbs[nbs-1-i] = rtmp;
411 rtmp = ybs[i];
412 ybs[i] = ybs[nbs-1-i];
413 ybs[nbs-1-i] = rtmp;
415 /* The initial slope of the normalized ybs^2 is 1.
416 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
417 * From this we take our initial guess for tau1.
419 twooe = 2/exp(1);
420 i = -1;
423 i++;
424 tau1_est = tbs[i];
425 } while (i < nbs - 1 &&
426 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
428 if (ybs[0] > ybs[1])
430 fprintf(stdout,"Data set %d has strange time correlations:\n"
431 "the std. error using single points is larger than that of blocks of 2 points\n"
432 "The error estimate might be inaccurate, check the fit\n",
433 s+1);
434 /* Use the total time as tau for the fitting weights */
435 tau_sig = (n - 1)*dt;
437 else
439 tau_sig = tau1_est;
442 if (debug)
444 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
447 /* Generate more or less appropriate sigma's,
448 * also taking the density of points into account.
450 for(i=0; i<nbs; i++)
452 if (i == 0)
454 dens = tbs[1]/tbs[0] - 1;
456 else if (i == nbs-1)
458 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
460 else
462 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
464 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
467 if (!bSingleExpFit)
469 fitparm[0] = tau1_est;
470 fitparm[1] = 0.95;
471 /* We set the initial guess for tau2
472 * to halfway between tau1_est and the total time (on log scale).
474 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
475 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
476 bDebugMode(),effnERREST,fitparm,0);
477 fitparm[3] = 1-fitparm[1];
479 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
480 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
482 if (!bSingleExpFit)
484 if (fitparm[2] > (n-1)*dt)
486 fprintf(stdout,
487 "Warning: tau2 is longer than the length of the data (%g)\n"
488 " the statistics might be bad\n",
489 (n-1)*dt);
491 else
493 fprintf(stdout,"a fitted parameter is negative\n");
495 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
496 sig[s]*anal_ee_inf(fitparm,n*dt),
497 fitparm[1],fitparm[0],fitparm[2]);
498 /* Do a fit with tau2 fixed at the total time.
499 * One could also choose any other large value for tau2.
501 fitparm[0] = tau1_est;
502 fitparm[1] = 0.95;
503 fitparm[2] = (n-1)*dt;
504 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
505 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
506 effnERREST,fitparm,4);
507 fitparm[3] = 1-fitparm[1];
509 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
510 || (fitparm[1]>1 && !bAllowNegLTCorr))
512 if (!bSingleExpFit) {
513 fprintf(stdout,"a fitted parameter is negative\n");
514 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
515 sig[s]*anal_ee_inf(fitparm,n*dt),
516 fitparm[1],fitparm[0],fitparm[2]);
518 /* Do a single exponential fit */
519 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
520 fitparm[0] = tau1_est;
521 fitparm[1] = 1.0;
522 fitparm[2] = 0.0;
523 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
524 effnERREST,fitparm,6);
525 fitparm[3] = 1-fitparm[1];
528 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
529 a = fitparm[1];
530 tau1 = fitparm[0];
531 tau2 = fitparm[2];
533 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
534 s+1,ee,a,tau1,tau2);
535 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
536 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
537 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
538 for(i=0; i<nbs; i++)
540 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
541 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
544 if (bFitAc)
546 int fitlen;
547 real *ac,acint,ac_fit[4];
549 snew(ac,n);
550 for(i=0; i<n; i++) {
551 ac[i] = val[s][i] - av[s];
552 if (i > 0)
553 fitsig[i] = sqrt(i);
554 else
555 fitsig[i] = 1;
557 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
558 dt,eacNormal,1,FALSE,TRUE,
559 FALSE,0,0,effnNONE,0);
561 fitlen = n/nb_min;
563 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
564 acint = 0.5*ac[0];
565 for(i=1; i<=fitlen/2; i++)
567 acint += ac[i];
569 acint *= dt;
571 /* Generate more or less appropriate sigma's */
572 for(i=0; i<=fitlen; i++)
574 fitsig[i] = sqrt(acint + dt*i);
577 ac_fit[0] = 0.5*acint;
578 ac_fit[1] = 0.95;
579 ac_fit[2] = 10*acint;
580 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
581 bDebugMode(),effnEXP3,ac_fit,0);
582 ac_fit[3] = 1 - ac_fit[1];
584 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
585 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
586 ac_fit[1],ac_fit[0],ac_fit[2]);
588 fprintf(fp,"&\n");
589 for(i=0; i<nbs; i++)
591 fprintf(fp,"%g %g\n",tbs[i],
592 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
595 sfree(ac);
597 if (s < nset-1)
599 fprintf(fp,"&\n");
602 sfree(fitsig);
603 sfree(ybs);
604 sfree(tbs);
605 ffclose(fp);
608 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
609 gmx_bool bError,real fit_start,real smooth_tail_start,
610 const output_env_t oenv)
612 const real tol = 1e-8;
613 real *kt;
614 real weight,d2;
615 int j;
617 please_cite(stdout,"Spoel2006b");
619 /* Compute negative derivative k(t) = -dc(t)/dt */
620 if (!bError) {
621 snew(kt,nn);
622 compute_derivative(nn,time,val[0],kt);
623 for(j=0; (j<nn); j++)
624 kt[j] = -kt[j];
625 if (debug) {
626 d2 = 0;
627 for(j=0; (j<nn); j++)
628 d2 += sqr(kt[j] - val[3][j]);
629 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
631 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
632 temp,smooth_tail_start,oenv);
633 sfree(kt);
635 else if (nset == 6) {
636 analyse_corr(nn,time,val[0],val[2],val[4],
637 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
639 else {
640 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
641 printf("Not doing anything. Sorry.\n");
645 static void filter(real flen,int n,int nset,real **val,real dt,
646 const output_env_t oenv)
648 int f,s,i,j;
649 double *filt,sum,vf,fluc,fluctot;
651 f = (int)(flen/(2*dt));
652 snew(filt,f+1);
653 filt[0] = 1;
654 sum = 1;
655 for(i=1; i<=f; i++) {
656 filt[i] = cos(M_PI*dt*i/flen);
657 sum += 2*filt[i];
659 for(i=0; i<=f; i++)
660 filt[i] /= sum;
661 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
662 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
663 fluctot = 0;
664 for(s=0; s<nset; s++) {
665 fluc = 0;
666 for(i=f; i<n-f; i++) {
667 vf = filt[0]*val[s][i];
668 for(j=1; j<=f; j++)
669 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
670 fluc += sqr(val[s][i] - vf);
672 fluc /= n - 2*f;
673 fluctot += fluc;
674 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
676 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
677 fprintf(stdout,"\n");
679 sfree(filt);
682 static void do_fit(FILE *out,int n,gmx_bool bYdy,
683 int ny,real *x0,real **val,
684 int npargs,t_pargs *ppa,const output_env_t oenv)
686 real *c1=NULL,*sig=NULL,*fitparm;
687 real tendfit,tbeginfit;
688 int i,efitfn,nparm;
690 efitfn = get_acffitfn();
691 nparm = nfp_ffn[efitfn];
692 fprintf(out,"Will fit to the following function:\n");
693 fprintf(out,"%s\n",longs_ffn[efitfn]);
694 c1 = val[n];
695 if (bYdy) {
696 c1 = val[n];
697 sig = val[n+1];
698 fprintf(out,"Using two columns as y and sigma values\n");
699 } else {
700 snew(sig,ny);
702 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
703 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
704 } else {
705 tbeginfit = x0[0];
707 if (opt2parg_bSet("-endfit",npargs,ppa)) {
708 tendfit = opt2parg_real("-endfit",npargs,ppa);
709 } else {
710 tendfit = x0[ny-1];
713 snew(fitparm,nparm);
714 switch(efitfn) {
715 case effnEXP1:
716 fitparm[0] = 0.5;
717 break;
718 case effnEXP2:
719 fitparm[0] = 0.5;
720 fitparm[1] = c1[0];
721 break;
722 case effnEXP3:
723 fitparm[0] = 1.0;
724 fitparm[1] = 0.5*c1[0];
725 fitparm[2] = 10.0;
726 break;
727 case effnEXP5:
728 fitparm[0] = fitparm[2] = 0.5*c1[0];
729 fitparm[1] = 10;
730 fitparm[3] = 40;
731 fitparm[4] = 0;
732 break;
733 case effnEXP7:
734 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
735 fitparm[1] = 1;
736 fitparm[3] = 10;
737 fitparm[5] = 100;
738 fitparm[6] = 0;
739 break;
740 case effnEXP9:
741 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
742 fitparm[1] = 0.1;
743 fitparm[3] = 1;
744 fitparm[5] = 10;
745 fitparm[7] = 100;
746 fitparm[8] = 0;
747 break;
748 default:
749 fprintf(out,"Warning: don't know how to initialize the parameters\n");
750 for(i=0; (i<nparm); i++)
751 fitparm[i] = 1;
753 fprintf(out,"Starting parameters:\n");
754 for(i=0; (i<nparm); i++)
755 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
756 if (do_lmfit(ny,c1,sig,0,x0,tbeginfit,tendfit,
757 oenv,bDebugMode(),efitfn,fitparm,0)) {
758 for(i=0; (i<nparm); i++)
759 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
761 else {
762 fprintf(out,"No solution was found\n");
766 static void do_ballistic(const char *balFile, int nData,
767 real *t, real **val, int nSet,
768 real balTime, int nBalExp,
769 gmx_bool bDerivative,
770 const output_env_t oenv)
772 double **ctd=NULL, *td=NULL;
773 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
774 static char *leg[] = {"Ac'(t)"};
775 FILE *fp;
776 int i, set;
778 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
780 snew(ctd, nSet);
781 snew(td, nData);
783 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
784 xvgr_legend(fp,asize(leg),(const char**)leg,oenv);
786 for (set=0; set<nSet; set++)
788 snew(ctd[set], nData);
789 for (i=0; i<nData; i++) {
790 ctd[set][i] = (double)val[set][i];
791 if (set==0)
792 td[i] = (double)t[i];
795 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
798 for (i=0; i<nData; i++)
800 fprintf(fp, " %g",t[i]);
801 for (set=0; set<nSet; set++)
803 fprintf(fp, " %g", ctd[set][i]);
805 fprintf(fp, "\n");
809 for (set=0; set<nSet; set++)
810 sfree(ctd[set]);
811 sfree(ctd);
812 sfree(td);
814 else
815 printf("Number of data points is less than the number of parameters to fit\n."
816 "The system is underdetermined, hence no ballistic term can be found.\n\n");
819 static void do_geminate(const char *gemFile, int nData,
820 real *t, real **val, int nSet,
821 const real D, const real rcut, const real balTime,
822 const int nFitPoints, const real begFit, const real endFit,
823 const output_env_t oenv)
825 double **ctd=NULL, **ctdGem=NULL, *td=NULL;
826 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
827 begFit, endFit, balTime, 1, FALSE);
828 const char *leg[] = {"Ac\\sgem\\N(t)"};
829 FILE *fp;
830 int i, set;
832 snew(ctd, nSet);
833 snew(ctdGem, nSet);
834 snew(td, nData);
836 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
837 xvgr_legend(fp,asize(leg),leg,oenv);
839 for (set=0; set<nSet; set++)
841 snew(ctd[set], nData);
842 snew(ctdGem[set], nData);
843 for (i=0; i<nData; i++) {
844 ctd[set][i] = (double)val[set][i];
845 if (set==0)
846 td[i] = (double)t[i];
848 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
851 for (i=0; i<nData; i++)
853 fprintf(fp, " %g",t[i]);
854 for (set=0; set<nSet; set++)
856 fprintf(fp, " %g", ctdGem[set][i]);
858 fprintf(fp, "\n");
861 for (set=0; set<nSet; set++)
863 sfree(ctd[set]);
864 sfree(ctdGem[set]);
866 sfree(ctd);
867 sfree(ctdGem);
868 sfree(td);
871 int gmx_analyze(int argc,char *argv[])
873 static const char *desc[] = {
874 "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
875 "A line in the input file may start with a time",
876 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
877 "Multiple sets can also be",
878 "read when they are separated by & (option [TT]-n[tt]);",
879 "in this case only one [IT]y[it]-value is read from each line.",
880 "All lines starting with # and @ are skipped.",
881 "All analyses can also be done for the derivative of a set",
882 "(option [TT]-d[tt]).[PAR]",
884 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
885 "points are equidistant in time.[PAR]",
887 "[TT]g_analyze[tt] always shows the average and standard deviation of each",
888 "set, as well as the relative deviation of the third",
889 "and fourth cumulant from those of a Gaussian distribution with the same",
890 "standard deviation.[PAR]",
892 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
893 "Be sure that the time interval between data points is",
894 "much shorter than the time scale of the autocorrelation.[PAR]",
896 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
897 "i/2 periods. The formula is:[BR]"
898 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
899 "This is useful for principal components obtained from covariance",
900 "analysis, since the principal components of random diffusion are",
901 "pure cosines.[PAR]",
903 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
905 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
907 "Option [TT]-av[tt] produces the average over the sets.",
908 "Error bars can be added with the option [TT]-errbar[tt].",
909 "The errorbars can represent the standard deviation, the error",
910 "(assuming the points are independent) or the interval containing",
911 "90% of the points, by discarding 5% of the points at the top and",
912 "the bottom.[PAR]",
914 "Option [TT]-ee[tt] produces error estimates using block averaging.",
915 "A set is divided in a number of blocks and averages are calculated for",
916 "each block. The error for the total average is calculated from",
917 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
918 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
919 "These errors are plotted as a function of the block size.",
920 "Also an analytical block average curve is plotted, assuming",
921 "that the autocorrelation is a sum of two exponentials.",
922 "The analytical curve for the block average is:[BR]",
923 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
924 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
925 "where T is the total time.",
926 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
927 "When the actual block average is very close to the analytical curve,",
928 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
929 "The complete derivation is given in",
930 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
932 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
933 "component from a hydrogen bond autocorrelation function by the fitting",
934 "of a sum of exponentials, as described in e.g.",
935 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
936 "is the one with the most negative coefficient in the exponential,",
937 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
938 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
940 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
941 "(and optionally kD) to the hydrogen bond autocorrelation function",
942 "according to the reversible geminate recombination model. Removal of",
943 "the ballistic component first is strongly advised. The model is presented in",
944 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
946 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
947 "of each set and over all sets with respect to a filtered average.",
948 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
949 "to len/2. len is supplied with the option [TT]-filter[tt].",
950 "This filter reduces oscillations with period len/2 and len by a factor",
951 "of 0.79 and 0.33 respectively.[PAR]",
953 "Option [TT]-g[tt] fits the data to the function given with option",
954 "[TT]-fitfn[tt].[PAR]",
956 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
957 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
958 "zero or with a negative value are ignored.[PAR]"
960 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
961 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
962 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
964 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
965 static gmx_bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
966 static gmx_bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
967 static gmx_bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
968 static int nsets_in=1,d=1,nb_min=4,resol=10, nBalExp=4, nFitPoints=100;
969 static real temp=298.15,fit_start=1, fit_end=60, smooth_tail_start=-1, balTime=0.2, diffusion=5e-5,rcut=0.35;
971 /* must correspond to enum avbar* declared at beginning of file */
972 static const char *avbar_opt[avbarNR+1] = {
973 NULL, "none", "stddev", "error", "90", NULL
976 t_pargs pa[] = {
977 { "-time", FALSE, etBOOL, {&bHaveT},
978 "Expect a time in the input" },
979 { "-b", FALSE, etREAL, {&tb},
980 "First time to read from set" },
981 { "-e", FALSE, etREAL, {&te},
982 "Last time to read from set" },
983 { "-n", FALSE, etINT, {&nsets_in},
984 "Read this number of sets separated by &" },
985 { "-d", FALSE, etBOOL, {&bDer},
986 "Use the derivative" },
987 { "-dp", FALSE, etINT, {&d},
988 "HIDDENThe derivative is the difference over this number of points" },
989 { "-bw", FALSE, etREAL, {&binwidth},
990 "Binwidth for the distribution" },
991 { "-errbar", FALSE, etENUM, {avbar_opt},
992 "Error bars for [TT]-av[tt]" },
993 { "-integrate",FALSE,etBOOL, {&bIntegrate},
994 "Integrate data function(s) numerically using trapezium rule" },
995 { "-aver_start",FALSE, etREAL, {&aver_start},
996 "Start averaging the integral from here" },
997 { "-xydy", FALSE, etBOOL, {&bXYdy},
998 "Interpret second data set as error in the y values for integrating" },
999 { "-regression",FALSE,etBOOL,{&bRegression},
1000 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1001 { "-luzar", FALSE, etBOOL, {&bLuzar},
1002 "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1003 { "-temp", FALSE, etREAL, {&temp},
1004 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1005 { "-fitstart", FALSE, etREAL, {&fit_start},
1006 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1007 { "-fitend", FALSE, etREAL, {&fit_end},
1008 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1009 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
1010 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1011 { "-nbmin", FALSE, etINT, {&nb_min},
1012 "HIDDENMinimum number of blocks for block averaging" },
1013 { "-resol", FALSE, etINT, {&resol},
1014 "HIDDENResolution for the block averaging, block size increases with"
1015 " a factor 2^(1/resol)" },
1016 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1017 "HIDDENAlways use a single exponential fit for the error estimate" },
1018 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1019 "HIDDENAllow a negative long-time correlation" },
1020 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1021 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1022 { "-filter", FALSE, etREAL, {&filtlen},
1023 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1024 { "-power", FALSE, etBOOL, {&bPower},
1025 "Fit data to: b t^a" },
1026 { "-subav", FALSE, etBOOL, {&bSubAv},
1027 "Subtract the average before autocorrelating" },
1028 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1029 "Calculate one ACF over all sets" },
1030 { "-nbalexp", FALSE, etINT, {&nBalExp},
1031 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1032 { "-baltime", FALSE, etREAL, {&balTime},
1033 "HIDDENTime up to which the ballistic component will be fitted" },
1034 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1035 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1036 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1037 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1038 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1039 /* "What type of gminate recombination to use"}, */
1040 /* { "-D", FALSE, etREAL, {&diffusion}, */
1041 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1043 #define NPA asize(pa)
1045 FILE *out,*out_fit;
1046 int n,nlast,s,nset,i,j=0;
1047 real **val,*t,dt,tot,error;
1048 double *av,*sig,cum1,cum2,cum3,cum4,db;
1049 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*balfile,*gemfile,*fitfile;
1050 output_env_t oenv;
1052 t_filenm fnm[] = {
1053 { efXVG, "-f", "graph", ffREAD },
1054 { efXVG, "-ac", "autocorr", ffOPTWR },
1055 { efXVG, "-msd", "msd", ffOPTWR },
1056 { efXVG, "-cc", "coscont", ffOPTWR },
1057 { efXVG, "-dist", "distr", ffOPTWR },
1058 { efXVG, "-av", "average", ffOPTWR },
1059 { efXVG, "-ee", "errest", ffOPTWR },
1060 { efXVG, "-bal", "ballisitc",ffOPTWR },
1061 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1062 { efLOG, "-g", "fitlog", ffOPTWR }
1064 #define NFILE asize(fnm)
1066 int npargs;
1067 t_pargs *ppa;
1069 npargs = asize(pa);
1070 ppa = add_acf_pargs(&npargs,pa);
1072 CopyRight(stderr,argv[0]);
1073 parse_common_args(&argc,argv,PCA_CAN_VIEW,
1074 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
1076 acfile = opt2fn_null("-ac",NFILE,fnm);
1077 msdfile = opt2fn_null("-msd",NFILE,fnm);
1078 ccfile = opt2fn_null("-cc",NFILE,fnm);
1079 distfile = opt2fn_null("-dist",NFILE,fnm);
1080 avfile = opt2fn_null("-av",NFILE,fnm);
1081 eefile = opt2fn_null("-ee",NFILE,fnm);
1082 balfile = opt2fn_null("-bal",NFILE,fnm);
1083 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1084 /* When doing autocorrelation we don't want a fitlog for fitting
1085 * the function itself (not the acf) when the user did not ask for it.
1087 if (opt2parg_bSet("-fitfn",npargs,ppa) && acfile == NULL)
1089 fitfile = opt2fn("-g",NFILE,fnm);
1091 else
1093 fitfile = opt2fn_null("-g",NFILE,fnm);
1096 val = read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
1097 opt2parg_bSet("-b",npargs,ppa),tb,
1098 opt2parg_bSet("-e",npargs,ppa),te,
1099 nsets_in,&nset,&n,&dt,&t);
1100 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
1102 if (bDer)
1104 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1105 d,d);
1106 n -= d;
1107 for(s=0; s<nset; s++)
1109 for(i=0; (i<n); i++)
1111 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1116 if (bIntegrate)
1118 real sum,stddev;
1120 printf("Calculating the integral using the trapezium rule\n");
1122 if (bXYdy)
1124 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
1125 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
1127 else
1129 for(s=0; s<nset; s++)
1131 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
1132 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
1137 if (fitfile != NULL)
1139 out_fit = ffopen(fitfile,"w");
1140 if (bXYdy && nset >= 2)
1142 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
1144 else
1146 for(s=0; s<nset; s++)
1148 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
1151 ffclose(out_fit);
1154 printf(" std. dev. relative deviation of\n");
1155 printf(" standard --------- cumulants from those of\n");
1156 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1157 printf(" cum. 3 cum. 4\n");
1158 snew(av,nset);
1159 snew(sig,nset);
1160 for(s=0; (s<nset); s++) {
1161 cum1 = 0;
1162 cum2 = 0;
1163 cum3 = 0;
1164 cum4 = 0;
1165 for(i=0; (i<n); i++)
1166 cum1 += val[s][i];
1167 cum1 /= n;
1168 for(i=0; (i<n); i++) {
1169 db = val[s][i]-cum1;
1170 cum2 += db*db;
1171 cum3 += db*db*db;
1172 cum4 += db*db*db*db;
1174 cum2 /= n;
1175 cum3 /= n;
1176 cum4 /= n;
1177 av[s] = cum1;
1178 sig[s] = sqrt(cum2);
1179 if (n > 1)
1180 error = sqrt(cum2/(n-1));
1181 else
1182 error = 0;
1183 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1184 s+1,av[s],sig[s],error,
1185 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1186 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1188 printf("\n");
1190 if (filtlen)
1191 filter(filtlen,n,nset,val,dt,oenv);
1193 if (msdfile) {
1194 out=xvgropen(msdfile,"Mean square displacement",
1195 "time","MSD (nm\\S2\\N)",oenv);
1196 nlast = (int)(n*frac);
1197 for(s=0; s<nset; s++) {
1198 for(j=0; j<=nlast; j++) {
1199 if (j % 100 == 0)
1200 fprintf(stderr,"\r%d",j);
1201 tot=0;
1202 for(i=0; i<n-j; i++)
1203 tot += sqr(val[s][i]-val[s][i+j]);
1204 tot /= (real)(n-j);
1205 fprintf(out," %g %8g\n",dt*j,tot);
1207 if (s<nset-1)
1208 fprintf(out,"&\n");
1210 ffclose(out);
1211 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1213 if (ccfile)
1214 plot_coscont(ccfile,n,nset,val,oenv);
1216 if (distfile)
1217 histogram(distfile,binwidth,n,nset,val,oenv);
1218 if (avfile)
1219 average(avfile,nenum(avbar_opt),n,nset,val,t);
1220 if (eefile)
1221 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1222 bEeFitAc,bEESEF,bEENLC,oenv);
1223 if (balfile)
1224 do_ballistic(balfile,n,t,val,nset,balTime,nBalExp,bDer,oenv);
1225 /* if (gemfile) */
1226 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1227 /* nFitPoints, fit_start, fit_end, oenv); */
1228 if (bPower)
1229 power_fit(n,nset,val,t);
1231 if (acfile != NULL)
1233 if (bSubAv)
1235 for(s=0; s<nset; s++)
1237 for(i=0; i<n; i++)
1239 val[s][i] -= av[s];
1243 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1244 eacNormal,bAverCorr);
1247 if (bRegression)
1248 regression_analysis(n,bXYdy,t,nset,val);
1250 if (bLuzar)
1251 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1253 view_all(oenv,NFILE, fnm);
1255 thanx(stderr);
1257 return 0;