Fixed documentation text according to bugzilla 486
[gromacs/rigid-bodies.git] / src / tools / gmx_analyze.c
blob70406219373a55d6f82208d9e39a83ef7d24ada9
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include "statutil.h"
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "macros.h"
46 #include "gmx_fatal.h"
47 #include "vec.h"
48 #include "copyrite.h"
49 #include "futil.h"
50 #include "readinp.h"
51 #include "statutil.h"
52 #include "txtdump.h"
53 #include "gstat.h"
54 #include "gmx_matrix.h"
55 #include "gmx_statistics.h"
56 #include "xvgr.h"
57 #include "gmx_ana.h"
58 #include "geminate.h"
60 /* must correspond to char *avbar_opt[] declared in main() */
61 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
63 static void power_fit(int n,int nset,real **val,real *t)
65 real *x,*y,quality,a,b,r;
66 int s,i;
68 snew(x,n);
69 snew(y,n);
71 if (t[0]>0) {
72 for(i=0; i<n; i++)
73 if (t[0]>0)
74 x[i] = log(t[i]);
75 } else {
76 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
77 for(i=0; i<n; i++)
78 x[i] = log(i+1);
81 for(s=0; s<nset; s++) {
82 i=0;
83 for(i=0; i<n && val[s][i]>=0; i++)
84 y[i] = log(val[s][i]);
85 if (i < n)
86 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
87 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
88 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
89 s+1,quality,a,exp(b));
92 sfree(y);
93 sfree(x);
96 static real cosine_content(int nhp,int n,real *y)
97 /* Assumes n equidistant points */
99 double fac,cosyint,yyint;
100 int i;
102 if (n < 2)
103 return 0;
105 fac = M_PI*nhp/(n-1);
107 cosyint = 0;
108 yyint = 0;
109 for(i=0; i<n; i++) {
110 cosyint += cos(fac*i)*y[i];
111 yyint += y[i]*y[i];
114 return 2*cosyint*cosyint/(n*yyint);
117 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
118 const output_env_t oenv)
120 FILE *fp;
121 int s;
122 real cc;
124 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
125 oenv);
127 for(s=0; s<nset; s++) {
128 cc = cosine_content(s+1,n,val[s]);
129 fprintf(fp," %d %g\n",s+1,cc);
130 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
131 s+1,0.5*(s+1),cc);
133 fprintf(stdout,"\n");
135 ffclose(fp);
138 static void regression_analysis(int n,bool bXYdy,
139 real *x,int nset,real **val)
141 real S,chi2,a,b,da,db,r=0;
143 if (bXYdy || (nset == 1))
145 printf("Fitting data to a function f(x) = ax + b\n");
146 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
147 printf("Error estimates will be given if w_i (sigma) values are given\n");
148 printf("(use option -xydy).\n\n");
149 if (bXYdy)
150 lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
151 else
152 lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
153 chi2 = sqr((n-2)*S);
154 printf("Chi2 = %g\n",chi2);
155 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
156 printf("Correlation coefficient = %.1f%%\n",100*r);
157 printf("\n");
158 if (bXYdy) {
159 printf("a = %g +/- %g\n",a,da);
160 printf("b = %g +/- %g\n",b,db);
162 else {
163 printf("a = %g\n",a);
164 printf("b = %g\n",b);
167 else
169 double chi2,*a,**xx,*y;
170 int i,j;
172 snew(y,n);
173 snew(xx,nset-1);
174 for(j=0; (j<nset-1); j++)
175 snew(xx[j],n);
176 for(i=0; (i<n); i++)
178 y[i] = val[0][i];
179 for(j=1; (j<nset); j++)
180 xx[j-1][i] = val[j][i];
182 snew(a,nset-1);
183 chi2 = multi_regression(NULL,n,y,nset-1,xx,a);
184 printf("Fitting %d data points in %d sets\n",n,nset-1);
185 printf("chi2 = %g\n",chi2);
186 printf("A =");
187 for(i=0; (i<nset-1); i++)
189 printf(" %g",a[i]);
190 sfree(xx[i]);
192 printf("\n");
193 sfree(xx);
194 sfree(y);
195 sfree(a);
199 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
200 const output_env_t oenv)
202 FILE *fp;
203 int i,s;
204 double min,max;
205 int nbin;
206 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
207 long long int *histo;
208 #else
209 double *histo;
210 #endif
212 min = val[0][0];
213 max = val[0][0];
214 for(s=0; s<nset; s++)
215 for(i=0; i<n; i++)
216 if (val[s][i] < min)
217 min = val[s][i];
218 else if (val[s][i] > max)
219 max = val[s][i];
221 min = binwidth*floor(min/binwidth);
222 max = binwidth*ceil(max/binwidth);
223 if (min != 0)
224 min -= binwidth;
225 max += binwidth;
227 nbin = (int)((max - min)/binwidth + 0.5) + 1;
228 fprintf(stderr,"Making distributions with %d bins\n",nbin);
229 snew(histo,nbin);
230 fp = xvgropen(distfile,"Distribution","","",oenv);
231 for(s=0; s<nset; s++) {
232 for(i=0; i<nbin; i++)
233 histo[i] = 0;
234 for(i=0; i<n; i++)
235 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
236 for(i=0; i<nbin; i++)
237 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
238 if (s < nset-1)
239 fprintf(fp,"&\n");
241 ffclose(fp);
244 static int real_comp(const void *a,const void *b)
246 real dif = *(real *)a - *(real *)b;
248 if (dif < 0)
249 return -1;
250 else if (dif > 0)
251 return 1;
252 else
253 return 0;
256 static void average(const char *avfile,int avbar_opt,
257 int n, int nset,real **val,real *t)
259 FILE *fp;
260 int i,s,edge=0;
261 double av,var,err;
262 real *tmp=NULL;
264 fp = ffopen(avfile,"w");
265 if ((avbar_opt == avbarERROR) && (nset == 1))
266 avbar_opt = avbarNONE;
267 if (avbar_opt != avbarNONE) {
268 if (avbar_opt == avbar90) {
269 snew(tmp,nset);
270 fprintf(fp,"@TYPE xydydy\n");
271 edge = (int)(nset*0.05+0.5);
272 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
273 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
274 } else
275 fprintf(fp,"@TYPE xydy\n");
278 for(i=0; i<n; i++) {
279 av = 0;
280 for(s=0; s<nset; s++)
281 av += val[s][i];
282 av /= nset;
283 fprintf(fp," %g %g",t[i],av);
284 var = 0;
285 if (avbar_opt != avbarNONE) {
286 if (avbar_opt == avbar90) {
287 for(s=0; s<nset; s++)
288 tmp[s] = val[s][i];
289 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
290 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
291 } else {
292 for(s=0; s<nset; s++)
293 var += sqr(val[s][i]-av);
294 if (avbar_opt == avbarSTDDEV)
295 err = sqrt(var/nset);
296 else
297 err = sqrt(var/(nset*(nset-1)));
298 fprintf(fp," %g",err);
301 fprintf(fp,"\n");
303 ffclose(fp);
305 if (avbar_opt == avbar90)
306 sfree(tmp);
309 static real anal_ee_inf(real *parm,real T)
311 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
314 static real anal_ee(real *parm,real T,real t)
316 real e1,e2;
318 if (parm[0])
319 e1 = exp(-t/parm[0]);
320 else
321 e1 = 1;
322 if (parm[2])
323 e2 = exp(-t/parm[2]);
324 else
325 e2 = 1;
327 return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
328 parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
331 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
332 int nset, double *av,double *sig,real **val,real dt,
333 bool bFitAc,bool bSingleExpFit,bool bAllowNegLTCorr,
334 const output_env_t oenv)
336 FILE *fp;
337 int bs,prev_bs,nbs,nb;
338 real spacing,nbr;
339 int s,i,j;
340 double blav,var;
341 char **leg;
342 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
343 real fitparm[4];
344 real ee,a,tau1,tau2;
346 if (n < 4)
348 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
350 return;
353 fp = xvgropen(eefile,"Error estimates",
354 "Block size (time)","Error estimate", oenv);
355 if (output_env_get_print_xvgr_codes(oenv))
357 fprintf(fp,
358 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
359 (n-1)*dt,n);
361 snew(leg,2*nset);
362 xvgr_legend(fp,2*nset,(const char**)leg,oenv);
363 sfree(leg);
365 spacing = pow(2,1.0/resol);
366 snew(tbs,n);
367 snew(ybs,n);
368 snew(fitsig,n);
369 for(s=0; s<nset; s++)
371 nbs = 0;
372 prev_bs = 0;
373 nbr = nb_min;
374 while (nbr <= n)
376 bs = n/(int)nbr;
377 if (bs != prev_bs)
379 nb = n/bs;
380 var = 0;
381 for(i=0; i<nb; i++)
383 blav=0;
384 for (j=0; j<bs; j++)
386 blav += val[s][bs*i+j];
388 var += sqr(av[s] - blav/bs);
390 tbs[nbs] = bs*dt;
391 if (sig[s] == 0)
393 ybs[nbs] = 0;
395 else
397 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
399 nbs++;
401 nbr *= spacing;
402 nb = (int)(nbr+0.5);
403 prev_bs = bs;
405 if (sig[s] == 0)
407 ee = 0;
408 a = 1;
409 tau1 = 0;
410 tau2 = 0;
412 else
414 for(i=0; i<nbs/2; i++)
416 rtmp = tbs[i];
417 tbs[i] = tbs[nbs-1-i];
418 tbs[nbs-1-i] = rtmp;
419 rtmp = ybs[i];
420 ybs[i] = ybs[nbs-1-i];
421 ybs[nbs-1-i] = rtmp;
423 /* The initial slope of the normalized ybs^2 is 1.
424 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
425 * From this we take our initial guess for tau1.
427 twooe = 2/exp(1);
428 i = -1;
431 i++;
432 tau1_est = tbs[i];
433 } while (i < nbs - 1 &&
434 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
436 if (ybs[0] > ybs[1])
438 fprintf(stdout,"Data set %d has strange time correlations:\n"
439 "the std. error using single points is larger than that of blocks of 2 points\n"
440 "The error estimate might be inaccurate, check the fit\n",
441 s+1);
442 /* Use the total time as tau for the fitting weights */
443 tau_sig = (n - 1)*dt;
445 else
447 tau_sig = tau1_est;
450 if (debug)
452 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
455 /* Generate more or less appropriate sigma's,
456 * also taking the density of points into account.
458 for(i=0; i<nbs; i++)
460 if (i == 0)
462 dens = tbs[1]/tbs[0] - 1;
464 else if (i == nbs-1)
466 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
468 else
470 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
472 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
475 if (!bSingleExpFit)
477 fitparm[0] = tau1_est;
478 fitparm[1] = 0.95;
479 /* We set the initial guess for tau2
480 * to halfway between tau1_est and the total time (on log scale).
482 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
483 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
484 bDebugMode(),effnERREST,fitparm,0);
485 fitparm[3] = 1-fitparm[1];
487 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
488 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
490 if (!bSingleExpFit)
492 if (fitparm[2] > (n-1)*dt)
494 fprintf(stdout,
495 "Warning: tau2 is longer than the length of the data (%g)\n"
496 " the statistics might be bad\n",
497 (n-1)*dt);
499 else
501 fprintf(stdout,"a fitted parameter is negative\n");
503 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
504 sig[s]*anal_ee_inf(fitparm,n*dt),
505 fitparm[1],fitparm[0],fitparm[2]);
506 /* Do a fit with tau2 fixed at the total time.
507 * One could also choose any other large value for tau2.
509 fitparm[0] = tau1_est;
510 fitparm[1] = 0.95;
511 fitparm[2] = (n-1)*dt;
512 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
513 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
514 effnERREST,fitparm,4);
515 fitparm[3] = 1-fitparm[1];
517 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
518 || (fitparm[1]>1 && !bAllowNegLTCorr))
520 if (!bSingleExpFit) {
521 fprintf(stdout,"a fitted parameter is negative\n");
522 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
523 sig[s]*anal_ee_inf(fitparm,n*dt),
524 fitparm[1],fitparm[0],fitparm[2]);
526 /* Do a single exponential fit */
527 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
528 fitparm[0] = tau1_est;
529 fitparm[1] = 1.0;
530 fitparm[2] = 0.0;
531 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
532 effnERREST,fitparm,6);
533 fitparm[3] = 1-fitparm[1];
536 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
537 a = fitparm[1];
538 tau1 = fitparm[0];
539 tau2 = fitparm[2];
541 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
542 s+1,ee,a,tau1,tau2);
543 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
544 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
545 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
546 for(i=0; i<nbs; i++)
548 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
549 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
552 if (bFitAc)
554 int fitlen;
555 real *ac,acint,ac_fit[4];
557 snew(ac,n);
558 for(i=0; i<n; i++) {
559 ac[i] = val[s][i] - av[s];
560 if (i > 0)
561 fitsig[i] = sqrt(i);
562 else
563 fitsig[i] = 1;
565 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
566 dt,eacNormal,1,FALSE,TRUE,
567 FALSE,0,0,effnNONE,0);
569 fitlen = n/nb_min;
571 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
572 acint = 0.5*ac[0];
573 for(i=1; i<=fitlen/2; i++)
575 acint += ac[i];
577 acint *= dt;
579 /* Generate more or less appropriate sigma's */
580 for(i=0; i<=fitlen; i++)
582 fitsig[i] = sqrt(acint + dt*i);
585 ac_fit[0] = 0.5*acint;
586 ac_fit[1] = 0.95;
587 ac_fit[2] = 10*acint;
588 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
589 bDebugMode(),effnEXP3,ac_fit,0);
590 ac_fit[3] = 1 - ac_fit[1];
592 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
593 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
594 ac_fit[1],ac_fit[0],ac_fit[2]);
596 fprintf(fp,"&\n");
597 for(i=0; i<nbs; i++)
599 fprintf(fp,"%g %g\n",tbs[i],
600 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
603 sfree(ac);
605 if (s < nset-1)
607 fprintf(fp,"&\n");
610 sfree(fitsig);
611 sfree(ybs);
612 sfree(tbs);
613 ffclose(fp);
616 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
617 bool bError,real fit_start,real smooth_tail_start,
618 const output_env_t oenv)
620 const real tol = 1e-8;
621 real *kt;
622 real weight,d2;
623 int j;
625 please_cite(stdout,"Spoel2006b");
627 /* Compute negative derivative k(t) = -dc(t)/dt */
628 if (!bError) {
629 snew(kt,nn);
630 compute_derivative(nn,time,val[0],kt);
631 for(j=0; (j<nn); j++)
632 kt[j] = -kt[j];
633 if (debug) {
634 d2 = 0;
635 for(j=0; (j<nn); j++)
636 d2 += sqr(kt[j] - val[3][j]);
637 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
639 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
640 temp,smooth_tail_start,oenv);
641 sfree(kt);
643 else if (nset == 6) {
644 analyse_corr(nn,time,val[0],val[2],val[4],
645 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
647 else {
648 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
649 printf("Not doing anything. Sorry.\n");
653 static void filter(real flen,int n,int nset,real **val,real dt,
654 const output_env_t oenv)
656 int f,s,i,j;
657 double *filt,sum,vf,fluc,fluctot;
659 f = (int)(flen/(2*dt));
660 snew(filt,f+1);
661 filt[0] = 1;
662 sum = 1;
663 for(i=1; i<=f; i++) {
664 filt[i] = cos(M_PI*dt*i/flen);
665 sum += 2*filt[i];
667 for(i=0; i<=f; i++)
668 filt[i] /= sum;
669 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
670 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
671 fluctot = 0;
672 for(s=0; s<nset; s++) {
673 fluc = 0;
674 for(i=f; i<n-f; i++) {
675 vf = filt[0]*val[s][i];
676 for(j=1; j<=f; j++)
677 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
678 fluc += sqr(val[s][i] - vf);
680 fluc /= n - 2*f;
681 fluctot += fluc;
682 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
684 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
685 fprintf(stdout,"\n");
687 sfree(filt);
690 static void do_fit(FILE *out,int n,bool bYdy,int ny,real *x0,real **val,
691 int npargs,t_pargs *ppa,const output_env_t oenv)
693 real *c1=NULL,*sig=NULL,*fitparm;
694 real dt=0,tendfit,tbeginfit;
695 int i,efitfn,nparm;
697 efitfn = get_acffitfn();
698 nparm = nfp_ffn[efitfn];
699 fprintf(out,"Will fit to the following function:\n");
700 fprintf(out,"%s\n",longs_ffn[efitfn]);
701 c1 = val[n];
702 if (bYdy) {
703 c1 = val[n];
704 sig = val[n+1];
705 fprintf(out,"Using two columns as y and sigma values\n");
706 } else {
707 snew(sig,ny);
709 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
710 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
711 } else {
712 tbeginfit = x0 ? x0[0] : 0;
714 if (opt2parg_bSet("-endfit",npargs,ppa)) {
715 tendfit = opt2parg_real("-endfit",npargs,ppa);
716 } else {
717 tendfit = x0 ? x0[ny-1] : (ny-1)*dt;
720 snew(fitparm,nparm);
721 switch(efitfn) {
722 case effnEXP1:
723 fitparm[0] = 0.5;
724 break;
725 case effnEXP2:
726 fitparm[0] = 0.5;
727 fitparm[1] = c1[0];
728 break;
729 case effnEXP3:
730 fitparm[0] = 1.0;
731 fitparm[1] = 0.5*c1[0];
732 fitparm[2] = 10.0;
733 break;
734 case effnEXP5:
735 fitparm[0] = fitparm[2] = 0.5*c1[0];
736 fitparm[1] = 10;
737 fitparm[3] = 40;
738 fitparm[4] = 0;
739 break;
740 case effnEXP7:
741 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
742 fitparm[1] = 1;
743 fitparm[3] = 10;
744 fitparm[5] = 100;
745 fitparm[6] = 0;
746 break;
747 case effnEXP9:
748 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
749 fitparm[1] = 0.1;
750 fitparm[3] = 1;
751 fitparm[5] = 10;
752 fitparm[7] = 100;
753 fitparm[8] = 0;
754 break;
755 default:
756 fprintf(out,"Warning: don't know how to initialize the parameters\n");
757 for(i=0; (i<nparm); i++)
758 fitparm[i] = 1;
760 fprintf(out,"Starting parameters:\n");
761 for(i=0; (i<nparm); i++)
762 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
763 if (do_lmfit(ny,c1,sig,dt,x0,tbeginfit,tendfit,
764 oenv,bDebugMode(),efitfn,fitparm,0)) {
765 for(i=0; (i<nparm); i++)
766 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
768 else {
769 fprintf(out,"No solution was found\n");
773 static void do_ballistic(const char *balFile, int nData,
774 real *t, real **val, int nSet,
775 real balTime, int nBalExp,
776 bool bDerivative,
777 const output_env_t oenv)
779 double **ctd=NULL, *td=NULL;
780 t_gemParams *GP = init_gemParams(0, 0, t, nData, 0, 0, 0, balTime, nBalExp, bDerivative);
781 static char *leg[] = {"Ac'(t)"};
782 FILE *fp;
783 int i, set;
785 if (GP->ballistic/GP->tDelta >= GP->nExpFit*2+1)
787 snew(ctd, nSet);
788 snew(td, nData);
790 fp = xvgropen(balFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
791 xvgr_legend(fp,asize(leg),(const char**)leg,oenv);
793 for (set=0; set<nSet; set++)
795 snew(ctd[set], nData);
796 for (i=0; i<nData; i++) {
797 ctd[set][i] = (double)val[set][i];
798 if (set==0)
799 td[i] = (double)t[i];
802 takeAwayBallistic(ctd[set], td, nData, GP->ballistic, GP->nExpFit, GP->bDt);
805 for (i=0; i<nData; i++)
807 fprintf(fp, " %g",t[i]);
808 for (set=0; set<nSet; set++)
810 fprintf(fp, " %g", ctd[set][i]);
812 fprintf(fp, "\n");
816 for (set=0; set<nSet; set++)
817 sfree(ctd[set]);
818 sfree(ctd);
819 sfree(td);
821 else
822 printf("Number of data points is less than the number of parameters to fit\n."
823 "The system is underdetermined, hence no ballistic term can be found.\n\n");
826 static void do_geminate(const char *gemFile, int nData,
827 real *t, real **val, int nSet,
828 const real D, const real rcut, const real balTime,
829 const int nFitPoints, const real begFit, const real endFit,
830 const output_env_t oenv)
832 double **ctd=NULL, **ctdGem=NULL, *td=NULL;
833 t_gemParams *GP = init_gemParams(rcut, D, t, nData, nFitPoints,
834 begFit, endFit, balTime, 1, FALSE);
835 const char *leg[] = {"Ac\\sgem\\N(t)"};
836 FILE *fp;
837 int i, set;
839 snew(ctd, nSet);
840 snew(ctdGem, nSet);
841 snew(td, nData);
843 fp = xvgropen(gemFile, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv);
844 xvgr_legend(fp,asize(leg),leg,oenv);
846 for (set=0; set<nSet; set++)
848 snew(ctd[set], nData);
849 snew(ctdGem[set], nData);
850 for (i=0; i<nData; i++) {
851 ctd[set][i] = (double)val[set][i];
852 if (set==0)
853 td[i] = (double)t[i];
855 fitGemRecomb(ctd[set], td, &(ctd[set]), nData, GP);
858 for (i=0; i<nData; i++)
860 fprintf(fp, " %g",t[i]);
861 for (set=0; set<nSet; set++)
863 fprintf(fp, " %g", ctdGem[set][i]);
865 fprintf(fp, "\n");
868 for (set=0; set<nSet; set++)
870 sfree(ctd[set]);
871 sfree(ctdGem[set]);
873 sfree(ctd);
874 sfree(ctdGem);
875 sfree(td);
878 int gmx_analyze(int argc,char *argv[])
880 static const char *desc[] = {
881 "g_analyze reads an ascii file and analyzes data sets.",
882 "A line in the input file may start with a time",
883 "(see option [TT]-time[tt]) and any number of y values may follow.",
884 "Multiple sets can also be",
885 "read when they are separated by & (option [TT]-n[tt]),",
886 "in this case only one y value is read from each line.",
887 "All lines starting with # and @ are skipped.",
888 "All analyses can also be done for the derivative of a set",
889 "(option [TT]-d[tt]).[PAR]",
891 "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
892 "points are equidistant in time.[PAR]",
894 "g_analyze always shows the average and standard deviation of each",
895 "set. For each set it also shows the relative deviation of the third",
896 "and fourth cumulant from those of a Gaussian distribution with the same",
897 "standard deviation.[PAR]",
899 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
901 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
902 "i/2 periods. The formula is:[BR]"
903 "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
904 "This is useful for principal components obtained from covariance",
905 "analysis, since the principal components of random diffusion are",
906 "pure cosines.[PAR]",
908 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
910 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
912 "Option [TT]-av[tt] produces the average over the sets.",
913 "Error bars can be added with the option [TT]-errbar[tt].",
914 "The errorbars can represent the standard deviation, the error",
915 "(assuming the points are independent) or the interval containing",
916 "90% of the points, by discarding 5% of the points at the top and",
917 "the bottom.[PAR]",
919 "Option [TT]-ee[tt] produces error estimates using block averaging.",
920 "A set is divided in a number of blocks and averages are calculated for",
921 "each block. The error for the total average is calculated from",
922 "the variance between averages of the m blocks B_i as follows:",
923 "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
924 "These errors are plotted as a function of the block size.",
925 "Also an analytical block average curve is plotted, assuming",
926 "that the autocorrelation is a sum of two exponentials.",
927 "The analytical curve for the block average is:[BR]",
928 "f(t) = sigma sqrt(2/T ( a (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
929 " (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
930 "where T is the total time.",
931 "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
932 "When the actual block average is very close to the analytical curve,",
933 "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
934 "The complete derivation is given in",
935 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
937 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
938 "component from a hydrogen bond autocorrelation function by the fitting",
939 "of a sum of exponentials, as described in e.g.",
940 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
941 "is the one with the most negative coefficient in the exponential,",
942 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
943 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
945 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
946 "(and optionally kD) to the hydrogen bond autocorrelation function",
947 "according to the reversible geminate recombination model. Removal of",
948 "the ballistic component first is strongly adviced. The model is presented in",
949 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
951 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
952 "of each set and over all sets with respect to a filtered average.",
953 "The filter is proportional to cos(pi t/len) where t goes from -len/2",
954 "to len/2. len is supplied with the option [TT]-filter[tt].",
955 "This filter reduces oscillations with period len/2 and len by a factor",
956 "of 0.79 and 0.33 respectively.[PAR]",
958 "Option [TT]-g[tt] fits the data to the function given with option",
959 "[TT]-fitfn[tt].[PAR]",
961 "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
962 "by fitting to a t + b on log-log scale. All points after the first",
963 "zero or negative value are ignored.[PAR]"
965 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
966 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
967 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
969 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
970 static bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
971 static bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
972 static bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
973 static int nsets_in=1,d=1,nb_min=4,resol=10, nBalExp=4, nFitPoints=100;
974 static real temp=298.15,fit_start=1, fit_end=60, smooth_tail_start=-1, balTime=0.2, diffusion=5e-5,rcut=0.35;
976 /* must correspond to enum avbar* declared at beginning of file */
977 static const char *avbar_opt[avbarNR+1] = {
978 NULL, "none", "stddev", "error", "90", NULL
981 t_pargs pa[] = {
982 { "-time", FALSE, etBOOL, {&bHaveT},
983 "Expect a time in the input" },
984 { "-b", FALSE, etREAL, {&tb},
985 "First time to read from set" },
986 { "-e", FALSE, etREAL, {&te},
987 "Last time to read from set" },
988 { "-n", FALSE, etINT, {&nsets_in},
989 "Read # sets separated by &" },
990 { "-d", FALSE, etBOOL, {&bDer},
991 "Use the derivative" },
992 { "-dp", FALSE, etINT, {&d},
993 "HIDDENThe derivative is the difference over # points" },
994 { "-bw", FALSE, etREAL, {&binwidth},
995 "Binwidth for the distribution" },
996 { "-errbar", FALSE, etENUM, {avbar_opt},
997 "Error bars for -av" },
998 { "-integrate",FALSE,etBOOL, {&bIntegrate},
999 "Integrate data function(s) numerically using trapezium rule" },
1000 { "-aver_start",FALSE, etREAL, {&aver_start},
1001 "Start averaging the integral from here" },
1002 { "-xydy", FALSE, etBOOL, {&bXYdy},
1003 "Interpret second data set as error in the y values for integrating" },
1004 { "-regression",FALSE,etBOOL,{&bRegression},
1005 "Perform a linear regression analysis on the data. If -xydy is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize chi^2 = (y - A0 x0 - A1 x1 - ... - AN xN)^2 where now Y is the first data set in the input file and xi the others. Do read the information at the option [TT]-time[tt]." },
1006 { "-luzar", FALSE, etBOOL, {&bLuzar},
1007 "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1008 { "-temp", FALSE, etREAL, {&temp},
1009 "Temperature for the Luzar hydrogen bonding kinetics analysis" },
1010 { "-fitstart", FALSE, etREAL, {&fit_start},
1011 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1012 { "-fitend", FALSE, etREAL, {&fit_end},
1013 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with -gem" },
1014 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
1015 "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
1016 { "-nbmin", FALSE, etINT, {&nb_min},
1017 "HIDDENMinimum number of blocks for block averaging" },
1018 { "-resol", FALSE, etINT, {&resol},
1019 "HIDDENResolution for the block averaging, block size increases with"
1020 " a factor 2^(1/#)" },
1021 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
1022 "HIDDENAlways use a single exponential fit for the error estimate" },
1023 { "-eenlc", FALSE, etBOOL, {&bEENLC},
1024 "HIDDENAllow a negative long-time correlation" },
1025 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
1026 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1027 { "-filter", FALSE, etREAL, {&filtlen},
1028 "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
1029 { "-power", FALSE, etBOOL, {&bPower},
1030 "Fit data to: b t^a" },
1031 { "-subav", FALSE, etBOOL, {&bSubAv},
1032 "Subtract the average before autocorrelating" },
1033 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
1034 "Calculate one ACF over all sets" },
1035 { "-nbalexp", FALSE, etINT, {&nBalExp},
1036 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1037 { "-baltime", FALSE, etREAL, {&balTime},
1038 "HIDDENTime up to which the ballistic component will be fitted" },
1039 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1040 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1041 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1042 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1043 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1044 /* "What type of gminate recombination to use"}, */
1045 /* { "-D", FALSE, etREAL, {&diffusion}, */
1046 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1048 #define NPA asize(pa)
1050 FILE *out,*out_fit;
1051 int n,nlast,s,nset,i,j=0;
1052 real **val,*t,dt,tot,error;
1053 double *av,*sig,cum1,cum2,cum3,cum4,db;
1054 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*balfile,*gemfile,*fitfile;
1055 output_env_t oenv;
1057 t_filenm fnm[] = {
1058 { efXVG, "-f", "graph", ffREAD },
1059 { efXVG, "-ac", "autocorr", ffOPTWR },
1060 { efXVG, "-msd", "msd", ffOPTWR },
1061 { efXVG, "-cc", "coscont", ffOPTWR },
1062 { efXVG, "-dist", "distr", ffOPTWR },
1063 { efXVG, "-av", "average", ffOPTWR },
1064 { efXVG, "-ee", "errest", ffOPTWR },
1065 { efXVG, "-bal", "ballisitc",ffOPTWR },
1066 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1067 { efLOG, "-g", "fitlog", ffOPTWR }
1069 #define NFILE asize(fnm)
1071 int npargs;
1072 t_pargs *ppa;
1074 npargs = asize(pa);
1075 ppa = add_acf_pargs(&npargs,pa);
1077 CopyRight(stderr,argv[0]);
1078 parse_common_args(&argc,argv,PCA_CAN_VIEW,
1079 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
1081 acfile = opt2fn_null("-ac",NFILE,fnm);
1082 msdfile = opt2fn_null("-msd",NFILE,fnm);
1083 ccfile = opt2fn_null("-cc",NFILE,fnm);
1084 distfile = opt2fn_null("-dist",NFILE,fnm);
1085 avfile = opt2fn_null("-av",NFILE,fnm);
1086 eefile = opt2fn_null("-ee",NFILE,fnm);
1087 balfile = opt2fn_null("-bal",NFILE,fnm);
1088 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1089 if (opt2parg_bSet("-fitfn",npargs,ppa))
1090 fitfile = opt2fn("-g",NFILE,fnm);
1091 else
1092 fitfile = opt2fn_null("-g",NFILE,fnm);
1094 val=read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
1095 opt2parg_bSet("-b",npargs,ppa),tb,
1096 opt2parg_bSet("-e",npargs,ppa),te,
1097 nsets_in,&nset,&n,&dt,&t);
1098 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
1100 if (bDer) {
1101 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1102 d,d);
1103 n -= d;
1104 for(s=0; s<nset; s++)
1105 for(i=0; (i<n); i++)
1106 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
1108 if (bIntegrate) {
1109 real sum,stddev;
1110 printf("Calculating the integral using the trapezium rule\n");
1112 if (bXYdy) {
1113 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
1114 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
1116 else {
1117 for(s=0; s<nset; s++) {
1118 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
1119 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
1123 if (fitfile) {
1124 out_fit = ffopen(fitfile,"w");
1125 if (bXYdy && nset>=2) {
1126 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
1127 } else {
1128 for(s=0; s<nset; s++)
1129 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
1131 ffclose(out_fit);
1134 printf(" std. dev. relative deviation of\n");
1135 printf(" standard --------- cumulants from those of\n");
1136 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1137 printf(" cum. 3 cum. 4\n");
1138 snew(av,nset);
1139 snew(sig,nset);
1140 for(s=0; (s<nset); s++) {
1141 cum1 = 0;
1142 cum2 = 0;
1143 cum3 = 0;
1144 cum4 = 0;
1145 for(i=0; (i<n); i++)
1146 cum1 += val[s][i];
1147 cum1 /= n;
1148 for(i=0; (i<n); i++) {
1149 db = val[s][i]-cum1;
1150 cum2 += db*db;
1151 cum3 += db*db*db;
1152 cum4 += db*db*db*db;
1154 cum2 /= n;
1155 cum3 /= n;
1156 cum4 /= n;
1157 av[s] = cum1;
1158 sig[s] = sqrt(cum2);
1159 if (n > 1)
1160 error = sqrt(cum2/(n-1));
1161 else
1162 error = 0;
1163 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1164 s+1,av[s],sig[s],error,
1165 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
1166 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
1168 printf("\n");
1170 if (filtlen)
1171 filter(filtlen,n,nset,val,dt,oenv);
1173 if (msdfile) {
1174 out=xvgropen(msdfile,"Mean square displacement",
1175 "time","MSD (nm\\S2\\N)",oenv);
1176 nlast = (int)(n*frac);
1177 for(s=0; s<nset; s++) {
1178 for(j=0; j<=nlast; j++) {
1179 if (j % 100 == 0)
1180 fprintf(stderr,"\r%d",j);
1181 tot=0;
1182 for(i=0; i<n-j; i++)
1183 tot += sqr(val[s][i]-val[s][i+j]);
1184 tot /= (real)(n-j);
1185 fprintf(out," %g %8g\n",dt*j,tot);
1187 if (s<nset-1)
1188 fprintf(out,"&\n");
1190 ffclose(out);
1191 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1193 if (ccfile)
1194 plot_coscont(ccfile,n,nset,val,oenv);
1196 if (distfile)
1197 histogram(distfile,binwidth,n,nset,val,oenv);
1198 if (avfile)
1199 average(avfile,nenum(avbar_opt),n,nset,val,t);
1200 if (eefile)
1201 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1202 bEeFitAc,bEESEF,bEENLC,oenv);
1203 if (balfile)
1204 do_ballistic(balfile,n,t,val,nset,balTime,nBalExp,bDer,oenv);
1205 /* if (gemfile) */
1206 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1207 /* nFitPoints, fit_start, fit_end, oenv); */
1208 if (bPower)
1209 power_fit(n,nset,val,t);
1210 if (acfile) {
1211 if (bSubAv)
1212 for(s=0; s<nset; s++)
1213 for(i=0; i<n; i++)
1214 val[s][i] -= av[s];
1215 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1216 eacNormal,bAverCorr);
1218 if (bRegression)
1219 regression_analysis(n,bXYdy,t,nset,val);
1221 if (bLuzar)
1222 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1224 view_all(oenv,NFILE, fnm);
1226 thanx(stderr);
1228 return 0;