Merge branch 'master' of git@git.gromacs.org:gromacs
[gromacs/rigid-bodies.git] / src / tools / gmx_analyze.c
blob833f5239fd82a5d94c5269e3cdf94a9777cfe679
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * Green Red Orange Magenta Azure Cyan Skyblue
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39 #include <math.h>
40 #include <string.h>
41 #include "statutil.h"
42 #include "sysstuff.h"
43 #include "typedefs.h"
44 #include "smalloc.h"
45 #include "macros.h"
46 #include "gmx_fatal.h"
47 #include "vec.h"
48 #include "copyrite.h"
49 #include "futil.h"
50 #include "readinp.h"
51 #include "statutil.h"
52 #include "txtdump.h"
53 #include "gstat.h"
54 #include "gmx_statistics.h"
55 #include "xvgr.h"
56 #include "gmx_ana.h"
58 /* must correspond to char *avbar_opt[] declared in main() */
59 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
61 static void power_fit(int n,int nset,real **val,real *t)
63 real *x,*y,quality,a,b,r;
64 int s,i;
66 snew(x,n);
67 snew(y,n);
69 if (t[0]>0) {
70 for(i=0; i<n; i++)
71 if (t[0]>0)
72 x[i] = log(t[i]);
73 } else {
74 fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
75 for(i=0; i<n; i++)
76 x[i] = log(i+1);
79 for(s=0; s<nset; s++) {
80 i=0;
81 for(i=0; i<n && val[s][i]>=0; i++)
82 y[i] = log(val[s][i]);
83 if (i < n)
84 fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
85 lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
86 fprintf(stdout,"Power fit set %3d: error %.3f a %g b %g\n",
87 s+1,quality,a,exp(b));
90 sfree(y);
91 sfree(x);
94 static real cosine_content(int nhp,int n,real *y)
95 /* Assumes n equidistant points */
97 double fac,cosyint,yyint;
98 int i;
100 if (n < 2)
101 return 0;
103 fac = M_PI*nhp/(n-1);
105 cosyint = 0;
106 yyint = 0;
107 for(i=0; i<n; i++) {
108 cosyint += cos(fac*i)*y[i];
109 yyint += y[i]*y[i];
112 return 2*cosyint*cosyint/(n*yyint);
115 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
116 const output_env_t oenv)
118 FILE *fp;
119 int s;
120 real cc;
122 fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
123 oenv);
125 for(s=0; s<nset; s++) {
126 cc = cosine_content(s+1,n,val[s]);
127 fprintf(fp," %d %g\n",s+1,cc);
128 fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
129 s+1,0.5*(s+1),cc);
131 fprintf(stdout,"\n");
133 ffclose(fp);
136 static void regression_analysis(int n,bool bXYdy,real *x,real **val)
138 real S,chi2,a,b,da,db,r=0;
140 printf("Fitting data to a function f(x) = ax + b\n");
141 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
142 printf("Error estimates will be given if w_i (sigma) values are given\n");
143 printf("(use option -xydy).\n\n");
144 if (bXYdy)
145 lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
146 else
147 lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
148 chi2 = sqr((n-2)*S);
149 printf("Chi2 = %g\n",chi2);
150 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S);
151 printf("Correlation coefficient = %.1f%%\n",100*r);
152 printf("\n");
153 if (bXYdy) {
154 printf("a = %g +/- %g\n",a,da);
155 printf("b = %g +/- %g\n",b,db);
157 else {
158 printf("a = %g\n",a);
159 printf("b = %g\n",b);
163 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
164 const output_env_t oenv)
166 FILE *fp;
167 int i,s;
168 double min,max;
169 int nbin;
170 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
171 long long int *histo;
172 #else
173 double *histo;
174 #endif
176 min = val[0][0];
177 max = val[0][0];
178 for(s=0; s<nset; s++)
179 for(i=0; i<n; i++)
180 if (val[s][i] < min)
181 min = val[s][i];
182 else if (val[s][i] > max)
183 max = val[s][i];
185 min = binwidth*floor(min/binwidth);
186 max = binwidth*ceil(max/binwidth);
187 if (min != 0)
188 min -= binwidth;
189 max += binwidth;
191 nbin = (int)((max - min)/binwidth + 0.5) + 1;
192 fprintf(stderr,"Making distributions with %d bins\n",nbin);
193 snew(histo,nbin);
194 fp = xvgropen(distfile,"Distribution","","",oenv);
195 for(s=0; s<nset; s++) {
196 for(i=0; i<nbin; i++)
197 histo[i] = 0;
198 for(i=0; i<n; i++)
199 histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
200 for(i=0; i<nbin; i++)
201 fprintf(fp," %g %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
202 if (s < nset-1)
203 fprintf(fp,"&\n");
205 ffclose(fp);
208 static int real_comp(const void *a,const void *b)
210 real dif = *(real *)a - *(real *)b;
212 if (dif < 0)
213 return -1;
214 else if (dif > 0)
215 return 1;
216 else
217 return 0;
220 static void average(const char *avfile,int avbar_opt,
221 int n, int nset,real **val,real *t)
223 FILE *fp;
224 int i,s,edge=0;
225 double av,var,err;
226 real *tmp=NULL;
228 fp = ffopen(avfile,"w");
229 if ((avbar_opt == avbarERROR) && (nset == 1))
230 avbar_opt = avbarNONE;
231 if (avbar_opt != avbarNONE) {
232 if (avbar_opt == avbar90) {
233 snew(tmp,nset);
234 fprintf(fp,"@TYPE xydydy\n");
235 edge = (int)(nset*0.05+0.5);
236 fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
237 " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
238 } else
239 fprintf(fp,"@TYPE xydy\n");
242 for(i=0; i<n; i++) {
243 av = 0;
244 for(s=0; s<nset; s++)
245 av += val[s][i];
246 av /= nset;
247 fprintf(fp," %g %g",t[i],av);
248 var = 0;
249 if (avbar_opt != avbarNONE) {
250 if (avbar_opt == avbar90) {
251 for(s=0; s<nset; s++)
252 tmp[s] = val[s][i];
253 qsort(tmp,nset,sizeof(tmp[0]),real_comp);
254 fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
255 } else {
256 for(s=0; s<nset; s++)
257 var += sqr(val[s][i]-av);
258 if (avbar_opt == avbarSTDDEV)
259 err = sqrt(var/nset);
260 else
261 err = sqrt(var/(nset*(nset-1)));
262 fprintf(fp," %g",err);
265 fprintf(fp,"\n");
267 ffclose(fp);
269 if (avbar_opt == avbar90)
270 sfree(tmp);
273 static real anal_ee_inf(real *parm,real T)
275 return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
278 static real anal_ee(real *parm,real T,real t)
280 real e1,e2;
282 if (parm[0])
283 e1 = exp(-t/parm[0]);
284 else
285 e1 = 1;
286 if (parm[2])
287 e2 = exp(-t/parm[2]);
288 else
289 e2 = 1;
291 return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
292 parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
295 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
296 int nset, double *av,double *sig,real **val,real dt,
297 bool bFitAc,bool bSingleExpFit,bool bAllowNegLTCorr,
298 const output_env_t oenv)
300 FILE *fp;
301 int bs,prev_bs,nbs,nb;
302 real spacing,nbr;
303 int s,i,j;
304 double blav,var;
305 char **leg;
306 real *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
307 real fitparm[4];
308 real ee,a,tau1,tau2;
310 if (n < 4)
312 fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
314 return;
317 fp = xvgropen(eefile,"Error estimates",
318 "Block size (time)","Error estimate", oenv);
319 if (output_env_get_print_xvgr_codes(oenv))
321 fprintf(fp,
322 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
323 (n-1)*dt,n);
325 snew(leg,2*nset);
326 xvgr_legend(fp,2*nset,leg,oenv);
327 sfree(leg);
329 spacing = pow(2,1.0/resol);
330 snew(tbs,n);
331 snew(ybs,n);
332 snew(fitsig,n);
333 for(s=0; s<nset; s++)
335 nbs = 0;
336 prev_bs = 0;
337 nbr = nb_min;
338 while (nbr <= n)
340 bs = n/(int)nbr;
341 if (bs != prev_bs)
343 nb = n/bs;
344 var = 0;
345 for(i=0; i<nb; i++)
347 blav=0;
348 for (j=0; j<bs; j++)
350 blav += val[s][bs*i+j];
352 var += sqr(av[s] - blav/bs);
354 tbs[nbs] = bs*dt;
355 if (sig[s] == 0)
357 ybs[nbs] = 0;
359 else
361 ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
363 nbs++;
365 nbr *= spacing;
366 nb = (int)(nbr+0.5);
367 prev_bs = bs;
369 if (sig[s] == 0)
371 ee = 0;
372 a = 1;
373 tau1 = 0;
374 tau2 = 0;
376 else
378 for(i=0; i<nbs/2; i++)
380 rtmp = tbs[i];
381 tbs[i] = tbs[nbs-1-i];
382 tbs[nbs-1-i] = rtmp;
383 rtmp = ybs[i];
384 ybs[i] = ybs[nbs-1-i];
385 ybs[nbs-1-i] = rtmp;
387 /* The initial slope of the normalized ybs^2 is 1.
388 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
389 * From this we take our initial guess for tau1.
391 twooe = 2/exp(1);
392 i = -1;
395 i++;
396 tau1_est = tbs[i];
397 } while (i < nbs - 1 &&
398 (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
400 if (ybs[0] > ybs[1])
402 fprintf(stdout,"Data set %d has strange time correlations:\n"
403 "the std. error using single points is larger than that of blocks of 2 points\n"
404 "The error estimate might be inaccurate, check the fit\n",
405 s+1);
406 /* Use the total time as tau for the fitting weights */
407 tau_sig = (n - 1)*dt;
409 else
411 tau_sig = tau1_est;
414 if (debug)
416 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
419 /* Generate more or less appropriate sigma's,
420 * also taking the density of points into account.
422 for(i=0; i<nbs; i++)
424 if (i == 0)
426 dens = tbs[1]/tbs[0] - 1;
428 else if (i == nbs-1)
430 dens = tbs[nbs-1]/tbs[nbs-2] - 1;
432 else
434 dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
436 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
439 if (!bSingleExpFit)
441 fitparm[0] = tau1_est;
442 fitparm[1] = 0.95;
443 /* We set the initial guess for tau2
444 * to halfway between tau1_est and the total time (on log scale).
446 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
447 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
448 bDebugMode(),effnERREST,fitparm,0);
449 fitparm[3] = 1-fitparm[1];
451 if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
452 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
454 if (!bSingleExpFit)
456 if (fitparm[2] > (n-1)*dt)
458 fprintf(stdout,
459 "Warning: tau2 is longer than the length of the data (%g)\n"
460 " the statistics might be bad\n",
461 (n-1)*dt);
463 else
465 fprintf(stdout,"a fitted parameter is negative\n");
467 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
468 sig[s]*anal_ee_inf(fitparm,n*dt),
469 fitparm[1],fitparm[0],fitparm[2]);
470 /* Do a fit with tau2 fixed at the total time.
471 * One could also choose any other large value for tau2.
473 fitparm[0] = tau1_est;
474 fitparm[1] = 0.95;
475 fitparm[2] = (n-1)*dt;
476 fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
477 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
478 effnERREST,fitparm,4);
479 fitparm[3] = 1-fitparm[1];
481 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
482 || (fitparm[1]>1 && !bAllowNegLTCorr))
484 if (!bSingleExpFit) {
485 fprintf(stdout,"a fitted parameter is negative\n");
486 fprintf(stdout,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
487 sig[s]*anal_ee_inf(fitparm,n*dt),
488 fitparm[1],fitparm[0],fitparm[2]);
490 /* Do a single exponential fit */
491 fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
492 fitparm[0] = tau1_est;
493 fitparm[1] = 1.0;
494 fitparm[2] = 0.0;
495 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
496 effnERREST,fitparm,6);
497 fitparm[3] = 1-fitparm[1];
500 ee = sig[s]*anal_ee_inf(fitparm,n*dt);
501 a = fitparm[1];
502 tau1 = fitparm[0];
503 tau2 = fitparm[2];
505 fprintf(stdout,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
506 s+1,ee,a,tau1,tau2);
507 fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
508 fprintf(fp,"@ legend string %d \"ee %6g\"\n",
509 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
510 for(i=0; i<nbs; i++)
512 fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
513 sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
516 if (bFitAc)
518 int fitlen;
519 real *ac,acint,ac_fit[4];
521 snew(ac,n);
522 for(i=0; i<n; i++) {
523 ac[i] = val[s][i] - av[s];
524 if (i > 0)
525 fitsig[i] = sqrt(i);
526 else
527 fitsig[i] = 1;
529 low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
530 dt,eacNormal,1,FALSE,TRUE,
531 FALSE,0,0,effnNONE,0);
533 fitlen = n/nb_min;
535 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
536 acint = 0.5*ac[0];
537 for(i=1; i<=fitlen/2; i++)
539 acint += ac[i];
541 acint *= dt;
543 /* Generate more or less appropriate sigma's */
544 for(i=0; i<=fitlen; i++)
546 fitsig[i] = sqrt(acint + dt*i);
549 ac_fit[0] = 0.5*acint;
550 ac_fit[1] = 0.95;
551 ac_fit[2] = 10*acint;
552 do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
553 bDebugMode(),effnEXP3,ac_fit,0);
554 ac_fit[3] = 1 - ac_fit[1];
556 fprintf(stdout,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
557 s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
558 ac_fit[1],ac_fit[0],ac_fit[2]);
560 fprintf(fp,"&\n");
561 for(i=0; i<nbs; i++)
563 fprintf(fp,"%g %g\n",tbs[i],
564 sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
567 sfree(ac);
569 if (s < nset-1)
571 fprintf(fp,"&\n");
574 sfree(fitsig);
575 sfree(ybs);
576 sfree(tbs);
577 ffclose(fp);
580 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
581 bool bError,real fit_start,real smooth_tail_start,
582 const output_env_t oenv)
584 const real tol = 1e-8;
585 real *kt;
586 real weight,d2;
587 int j;
589 please_cite(stdout,"Spoel2006b");
591 /* Compute negative derivative k(t) = -dc(t)/dt */
592 if (!bError) {
593 snew(kt,nn);
594 compute_derivative(nn,time,val[0],kt);
595 for(j=0; (j<nn); j++)
596 kt[j] = -kt[j];
597 if (debug) {
598 d2 = 0;
599 for(j=0; (j<nn); j++)
600 d2 += sqr(kt[j] - val[3][j]);
601 fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
603 analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
604 temp,smooth_tail_start,oenv);
605 sfree(kt);
607 else if (nset == 6) {
608 analyse_corr(nn,time,val[0],val[2],val[4],
609 val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
611 else {
612 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
613 printf("Not doing anything. Sorry.\n");
617 static void filter(real flen,int n,int nset,real **val,real dt,
618 const output_env_t oenv)
620 int f,s,i,j;
621 double *filt,sum,vf,fluc,fluctot;
623 f = (int)(flen/(2*dt));
624 snew(filt,f+1);
625 filt[0] = 1;
626 sum = 1;
627 for(i=1; i<=f; i++) {
628 filt[i] = cos(M_PI*dt*i/flen);
629 sum += 2*filt[i];
631 for(i=0; i<=f; i++)
632 filt[i] /= sum;
633 fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
634 fprintf(stdout," using a filter of length %g of %d points\n",flen,2*f+1);
635 fluctot = 0;
636 for(s=0; s<nset; s++) {
637 fluc = 0;
638 for(i=f; i<n-f; i++) {
639 vf = filt[0]*val[s][i];
640 for(j=1; j<=f; j++)
641 vf += filt[j]*(val[s][i-f]+val[s][i+f]);
642 fluc += sqr(val[s][i] - vf);
644 fluc /= n - 2*f;
645 fluctot += fluc;
646 fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
648 fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
649 fprintf(stdout,"\n");
651 sfree(filt);
654 static void do_fit(FILE *out,int n,bool bYdy,int ny,real *x0,real **val,
655 int npargs,t_pargs *ppa,const output_env_t oenv)
657 real *c1=NULL,*sig=NULL,*fitparm;
658 real dt=0,tendfit,tbeginfit;
659 int i,efitfn,nparm;
661 efitfn = get_acffitfn();
662 nparm = nfp_ffn[efitfn];
663 fprintf(out,"Will fit to the following function:\n");
664 fprintf(out,"%s\n",longs_ffn[efitfn]);
665 c1 = val[n];
666 if (bYdy) {
667 c1 = val[n];
668 sig = val[n+1];
669 fprintf(out,"Using two columns as y and sigma values\n");
670 } else {
671 snew(sig,ny);
673 if (opt2parg_bSet("-beginfit",npargs,ppa)) {
674 tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
675 } else {
676 tbeginfit = x0 ? x0[0] : 0;
678 if (opt2parg_bSet("-endfit",npargs,ppa)) {
679 tendfit = opt2parg_real("-endfit",npargs,ppa);
680 } else {
681 tendfit = x0 ? x0[ny-1] : (ny-1)*dt;
684 snew(fitparm,nparm);
685 switch(efitfn) {
686 case effnEXP1:
687 fitparm[0] = 0.5;
688 break;
689 case effnEXP2:
690 fitparm[0] = 0.5;
691 fitparm[1] = c1[0];
692 break;
693 case effnEXP3:
694 fitparm[0] = 1.0;
695 fitparm[1] = 0.5*c1[0];
696 fitparm[2] = 10.0;
697 break;
698 case effnEXP5:
699 fitparm[0] = fitparm[2] = 0.5*c1[0];
700 fitparm[1] = 10;
701 fitparm[3] = 40;
702 fitparm[4] = 0;
703 break;
704 case effnEXP7:
705 fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
706 fitparm[1] = 1;
707 fitparm[3] = 10;
708 fitparm[5] = 100;
709 fitparm[6] = 0;
710 break;
711 case effnEXP9:
712 fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
713 fitparm[1] = 0.1;
714 fitparm[3] = 1;
715 fitparm[5] = 10;
716 fitparm[7] = 100;
717 fitparm[8] = 0;
718 break;
719 default:
720 fprintf(out,"Warning: don't know how to initialize the parameters\n");
721 for(i=0; (i<nparm); i++)
722 fitparm[i] = 1;
724 fprintf(out,"Starting parameters:\n");
725 for(i=0; (i<nparm); i++)
726 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
727 if (do_lmfit(ny,c1,sig,dt,x0,tbeginfit,tendfit,
728 oenv,bDebugMode(),efitfn,fitparm,0)) {
729 for(i=0; (i<nparm); i++)
730 fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
732 else {
733 fprintf(out,"No solution was found\n");
737 int gmx_analyze(int argc,char *argv[])
739 static const char *desc[] = {
740 "g_analyze reads an ascii file and analyzes data sets.",
741 "A line in the input file may start with a time",
742 "(see option [TT]-time[tt]) and any number of y values may follow.",
743 "Multiple sets can also be",
744 "read when they are seperated by & (option [TT]-n[tt]),",
745 "in this case only one y value is read from each line.",
746 "All lines starting with # and @ are skipped.",
747 "All analyses can also be done for the derivative of a set",
748 "(option [TT]-d[tt]).[PAR]",
750 "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
751 "points are equidistant in time.[PAR]",
753 "g_analyze always shows the average and standard deviation of each",
754 "set. For each set it also shows the relative deviation of the third",
755 "and forth cumulant from those of a Gaussian distribution with the same",
756 "standard deviation.[PAR]",
758 "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
760 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
761 "i/2 periods. The formula is:[BR]"
762 "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
763 "This is useful for principal components obtained from covariance",
764 "analysis, since the principal components of random diffusion are",
765 "pure cosines.[PAR]",
767 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
769 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
771 "Option [TT]-av[tt] produces the average over the sets.",
772 "Error bars can be added with the option [TT]-errbar[tt].",
773 "The errorbars can represent the standard deviation, the error",
774 "(assuming the points are independent) or the interval containing",
775 "90% of the points, by discarding 5% of the points at the top and",
776 "the bottom.[PAR]",
778 "Option [TT]-ee[tt] produces error estimates using block averaging.",
779 "A set is divided in a number of blocks and averages are calculated for",
780 "each block. The error for the total average is calculated from",
781 "the variance between averages of the m blocks B_i as follows:",
782 "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
783 "These errors are plotted as a function of the block size.",
784 "Also an analytical block average curve is plotted, assuming",
785 "that the autocorrelation is a sum of two exponentials.",
786 "The analytical curve for the block average is:[BR]",
787 "f(t) = sigma sqrt(2/T ( a (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
788 " (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
789 "where T is the total time.",
790 "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
791 "When the actual block average is very close to the analytical curve,",
792 "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
793 "The complete derivation is given in",
794 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
796 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
797 "of each set and over all sets with respect to a filtered average.",
798 "The filter is proportional to cos(pi t/len) where t goes from -len/2",
799 "to len/2. len is supplied with the option [TT]-filter[tt].",
800 "This filter reduces oscillations with period len/2 and len by a factor",
801 "of 0.79 and 0.33 respectively.[PAR]",
803 "Option [TT]-g[tt] fits the data to the function given with option",
804 "[TT]-fitfn[tt].[PAR]",
806 "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
807 "by fitting to a t + b on log-log scale. All points after the first",
808 "zero or negative value are ignored.[PAR]"
810 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
811 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
812 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
814 static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
815 static bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
816 static bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
817 static bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
818 static int nsets_in=1,d=1,nb_min=4,resol=10;
819 static real temp=298.15,fit_start=1,smooth_tail_start=-1;
821 /* must correspond to enum avbar* declared at beginning of file */
822 static const char *avbar_opt[avbarNR+1] = {
823 NULL, "none", "stddev", "error", "90", NULL
826 t_pargs pa[] = {
827 { "-time", FALSE, etBOOL, {&bHaveT},
828 "Expect a time in the input" },
829 { "-b", FALSE, etREAL, {&tb},
830 "First time to read from set" },
831 { "-e", FALSE, etREAL, {&te},
832 "Last time to read from set" },
833 { "-n", FALSE, etINT, {&nsets_in},
834 "Read # sets seperated by &" },
835 { "-d", FALSE, etBOOL, {&bDer},
836 "Use the derivative" },
837 { "-dp", FALSE, etINT, {&d},
838 "HIDDENThe derivative is the difference over # points" },
839 { "-bw", FALSE, etREAL, {&binwidth},
840 "Binwidth for the distribution" },
841 { "-errbar", FALSE, etENUM, {avbar_opt},
842 "Error bars for -av" },
843 { "-integrate",FALSE,etBOOL, {&bIntegrate},
844 "Integrate data function(s) numerically using trapezium rule" },
845 { "-aver_start",FALSE, etREAL, {&aver_start},
846 "Start averaging the integral from here" },
847 { "-xydy", FALSE, etBOOL, {&bXYdy},
848 "Interpret second data set as error in the y values for integrating" },
849 { "-regression",FALSE,etBOOL,{&bRegression},
850 "Perform a linear regression analysis on the data" },
851 { "-luzar", FALSE, etBOOL, {&bLuzar},
852 "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
853 { "-temp", FALSE, etREAL, {&temp},
854 "Temperature for the Luzar hydrogen bonding kinetics analysis" },
855 { "-fitstart", FALSE, etREAL, {&fit_start},
856 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
857 { "-smooth",FALSE, etREAL, {&smooth_tail_start},
858 "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
859 { "-nbmin", FALSE, etINT, {&nb_min},
860 "HIDDENMinimum number of blocks for block averaging" },
861 { "-resol", FALSE, etINT, {&resol},
862 "HIDDENResolution for the block averaging, block size increases with"
863 " a factor 2^(1/#)" },
864 { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
865 "HIDDENAlways use a single exponential fit for the error estimate" },
866 { "-eenlc", FALSE, etBOOL, {&bEENLC},
867 "HIDDENAllow a negative long-time correlation" },
868 { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
869 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
870 { "-filter", FALSE, etREAL, {&filtlen},
871 "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
872 { "-power", FALSE, etBOOL, {&bPower},
873 "Fit data to: b t^a" },
874 { "-subav", FALSE, etBOOL, {&bSubAv},
875 "Subtract the average before autocorrelating" },
876 { "-oneacf", FALSE, etBOOL, {&bAverCorr},
877 "Calculate one ACF over all sets" }
879 #define NPA asize(pa)
881 FILE *out,*out_fit;
882 int n,nlast,s,nset,i,j=0;
883 real **val,*t,dt,tot,error;
884 double *av,*sig,cum1,cum2,cum3,cum4,db;
885 const char *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*fitfile;
886 output_env_t oenv;
888 t_filenm fnm[] = {
889 { efXVG, "-f", "graph", ffREAD },
890 { efXVG, "-ac", "autocorr", ffOPTWR },
891 { efXVG, "-msd", "msd", ffOPTWR },
892 { efXVG, "-cc", "coscont", ffOPTWR },
893 { efXVG, "-dist", "distr", ffOPTWR },
894 { efXVG, "-av", "average", ffOPTWR },
895 { efXVG, "-ee", "errest", ffOPTWR },
896 { efLOG, "-g", "fitlog", ffOPTWR }
898 #define NFILE asize(fnm)
900 int npargs;
901 t_pargs *ppa;
903 npargs = asize(pa);
904 ppa = add_acf_pargs(&npargs,pa);
906 CopyRight(stderr,argv[0]);
907 parse_common_args(&argc,argv,PCA_CAN_VIEW,
908 NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
910 acfile = opt2fn_null("-ac",NFILE,fnm);
911 msdfile = opt2fn_null("-msd",NFILE,fnm);
912 ccfile = opt2fn_null("-cc",NFILE,fnm);
913 distfile = opt2fn_null("-dist",NFILE,fnm);
914 avfile = opt2fn_null("-av",NFILE,fnm);
915 eefile = opt2fn_null("-ee",NFILE,fnm);
916 if (opt2parg_bSet("-fitfn",npargs,ppa))
917 fitfile = opt2fn("-g",NFILE,fnm);
918 else
919 fitfile = opt2fn_null("-g",NFILE,fnm);
921 val=read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
922 opt2parg_bSet("-b",npargs,ppa),tb,
923 opt2parg_bSet("-e",npargs,ppa),te,
924 nsets_in,&nset,&n,&dt,&t);
925 printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
927 if (bDer) {
928 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
929 d,d);
930 n -= d;
931 for(s=0; s<nset; s++)
932 for(i=0; (i<n); i++)
933 val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
935 if (bIntegrate) {
936 real sum,stddev;
937 printf("Calculating the integral using the trapezium rule\n");
939 if (bXYdy) {
940 sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
941 printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
943 else {
944 for(s=0; s<nset; s++) {
945 sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
946 printf("Integral %d %10.5f +/- %10.5f\n",s+1,sum,stddev);
950 if (fitfile) {
951 out_fit = ffopen(fitfile,"w");
952 if (bXYdy && nset>=2) {
953 do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
954 } else {
955 for(s=0; s<nset; s++)
956 do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
958 ffclose(out_fit);
961 printf(" std. dev. relative deviation of\n");
962 printf(" standard --------- cumulants from those of\n");
963 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
964 printf(" cum. 3 cum. 4\n");
965 snew(av,nset);
966 snew(sig,nset);
967 for(s=0; (s<nset); s++) {
968 cum1 = 0;
969 cum2 = 0;
970 cum3 = 0;
971 cum4 = 0;
972 for(i=0; (i<n); i++)
973 cum1 += val[s][i];
974 cum1 /= n;
975 for(i=0; (i<n); i++) {
976 db = val[s][i]-cum1;
977 cum2 += db*db;
978 cum3 += db*db*db;
979 cum4 += db*db*db*db;
981 cum2 /= n;
982 cum3 /= n;
983 cum4 /= n;
984 av[s] = cum1;
985 sig[s] = sqrt(cum2);
986 if (n > 1)
987 error = sqrt(cum2/(n-1));
988 else
989 error = 0;
990 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
991 s+1,av[s],sig[s],error,
992 sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
993 sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
995 printf("\n");
997 if (filtlen)
998 filter(filtlen,n,nset,val,dt,oenv);
1000 if (msdfile) {
1001 out=xvgropen(msdfile,"Mean square displacement",
1002 "time","MSD (nm\\S2\\N)",oenv);
1003 nlast = (int)(n*frac);
1004 for(s=0; s<nset; s++) {
1005 for(j=0; j<=nlast; j++) {
1006 if (j % 100 == 0)
1007 fprintf(stderr,"\r%d",j);
1008 tot=0;
1009 for(i=0; i<n-j; i++)
1010 tot += sqr(val[s][i]-val[s][i+j]);
1011 tot /= (real)(n-j);
1012 fprintf(out," %g %8g\n",dt*j,tot);
1014 if (s<nset-1)
1015 fprintf(out,"&\n");
1017 ffclose(out);
1018 fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1020 if (ccfile)
1021 plot_coscont(ccfile,n,nset,val,oenv);
1023 if (distfile)
1024 histogram(distfile,binwidth,n,nset,val,oenv);
1025 if (avfile)
1026 average(avfile,nenum(avbar_opt),n,nset,val,t);
1027 if (eefile)
1028 estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1029 bEeFitAc,bEESEF,bEENLC,oenv);
1030 if (bPower)
1031 power_fit(n,nset,val,t);
1032 if (acfile) {
1033 if (bSubAv)
1034 for(s=0; s<nset; s++)
1035 for(i=0; i<n; i++)
1036 val[s][i] -= av[s];
1037 do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1038 eacNormal,bAverCorr);
1040 if (bRegression)
1041 regression_analysis(n,bXYdy,t,val);
1043 if (bLuzar)
1044 luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1046 view_all(oenv,NFILE, fnm);
1048 thanx(stderr);
1050 return 0;