2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gmx_fatal.h"
56 #include "gmx_matrix.h"
57 #include "gmx_statistics.h"
62 /* must correspond to char *avbar_opt[] declared in main() */
63 enum { avbarSEL
, avbarNONE
, avbarSTDDEV
, avbarERROR
, avbar90
, avbarNR
};
65 static void power_fit(int n
,int nset
,real
**val
,real
*t
)
67 real
*x
,*y
,quality
,a
,b
,r
;
78 fprintf(stdout
,"First time is not larger than 0, using index number as time for power fit\n");
83 for(s
=0; s
<nset
; s
++) {
85 for(i
=0; i
<n
&& val
[s
][i
]>=0; i
++)
86 y
[i
] = log(val
[s
][i
]);
88 fprintf(stdout
,"Will power fit up to point %d, since it is not larger than 0\n",i
);
89 lsq_y_ax_b(i
,x
,y
,&a
,&b
,&r
,&quality
);
90 fprintf(stdout
,"Power fit set %3d: error %.3f a %g b %g\n",
91 s
+1,quality
,a
,exp(b
));
98 static real
cosine_content(int nhp
,int n
,real
*y
)
99 /* Assumes n equidistant points */
101 double fac
,cosyint
,yyint
;
107 fac
= M_PI
*nhp
/(n
-1);
112 cosyint
+= cos(fac
*i
)*y
[i
];
116 return 2*cosyint
*cosyint
/(n
*yyint
);
119 static void plot_coscont(const char *ccfile
,int n
,int nset
,real
**val
,
120 const output_env_t oenv
)
126 fp
= xvgropen(ccfile
,"Cosine content","set / half periods","cosine content",
129 for(s
=0; s
<nset
; s
++) {
130 cc
= cosine_content(s
+1,n
,val
[s
]);
131 fprintf(fp
," %d %g\n",s
+1,cc
);
132 fprintf(stdout
,"Cosine content of set %d with %.1f periods: %g\n",
135 fprintf(stdout
,"\n");
140 static void regression_analysis(int n
,gmx_bool bXYdy
,
141 real
*x
,int nset
,real
**val
)
143 real S
,chi2
,a
,b
,da
,db
,r
=0;
146 if (bXYdy
|| (nset
== 1))
148 printf("Fitting data to a function f(x) = ax + b\n");
149 printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
150 printf("Error estimates will be given if w_i (sigma) values are given\n");
151 printf("(use option -xydy).\n\n");
154 if ((ok
= lsq_y_ax_b_error(n
,x
,val
[0],val
[1],&a
,&b
,&da
,&db
,&r
,&S
)) != estatsOK
)
155 gmx_fatal(FARGS
,"Error fitting the data: %s",
156 gmx_stats_message(ok
));
160 if ((ok
= lsq_y_ax_b(n
,x
,val
[0],&a
,&b
,&r
,&S
)) != estatsOK
)
161 gmx_fatal(FARGS
,"Error fitting the data: %s",
162 gmx_stats_message(ok
));
165 printf("Chi2 = %g\n",chi2
);
166 printf("S (Sqrt(Chi2/(n-2)) = %g\n",S
);
167 printf("Correlation coefficient = %.1f%%\n",100*r
);
170 printf("a = %g +/- %g\n",a
,da
);
171 printf("b = %g +/- %g\n",b
,db
);
174 printf("a = %g\n",a
);
175 printf("b = %g\n",b
);
180 double chi2
,*a
,**xx
,*y
;
185 for(j
=0; (j
<nset
-1); j
++)
190 for(j
=1; (j
<nset
); j
++)
191 xx
[j
-1][i
] = val
[j
][i
];
194 chi2
= multi_regression(NULL
,n
,y
,nset
-1,xx
,a
);
195 printf("Fitting %d data points in %d sets\n",n
,nset
-1);
196 printf("chi2 = %g\n",chi2
);
198 for(i
=0; (i
<nset
-1); i
++)
210 void histogram(const char *distfile
,real binwidth
,int n
, int nset
, real
**val
,
211 const output_env_t oenv
)
217 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
218 long long int *histo
;
225 for(s
=0; s
<nset
; s
++)
229 else if (val
[s
][i
] > max
)
232 min
= binwidth
*floor(min
/binwidth
);
233 max
= binwidth
*ceil(max
/binwidth
);
238 nbin
= (int)((max
- min
)/binwidth
+ 0.5) + 1;
239 fprintf(stderr
,"Making distributions with %d bins\n",nbin
);
241 fp
= xvgropen(distfile
,"Distribution","","",oenv
);
242 for(s
=0; s
<nset
; s
++) {
243 for(i
=0; i
<nbin
; i
++)
246 histo
[(int)((val
[s
][i
] - min
)/binwidth
+ 0.5)]++;
247 for(i
=0; i
<nbin
; i
++)
248 fprintf(fp
," %g %g\n",min
+i
*binwidth
,(double)histo
[i
]/(n
*binwidth
));
255 static int real_comp(const void *a
,const void *b
)
257 real dif
= *(real
*)a
- *(real
*)b
;
267 static void average(const char *avfile
,int avbar_opt
,
268 int n
, int nset
,real
**val
,real
*t
)
275 fp
= ffopen(avfile
,"w");
276 if ((avbar_opt
== avbarERROR
) && (nset
== 1))
277 avbar_opt
= avbarNONE
;
278 if (avbar_opt
!= avbarNONE
) {
279 if (avbar_opt
== avbar90
) {
281 fprintf(fp
,"@TYPE xydydy\n");
282 edge
= (int)(nset
*0.05+0.5);
283 fprintf(stdout
,"Errorbars: discarding %d points on both sides: %d%%"
284 " interval\n",edge
,(int)(100*(nset
-2*edge
)/nset
+0.5));
286 fprintf(fp
,"@TYPE xydy\n");
291 for(s
=0; s
<nset
; s
++)
294 fprintf(fp
," %g %g",t
[i
],av
);
296 if (avbar_opt
!= avbarNONE
) {
297 if (avbar_opt
== avbar90
) {
298 for(s
=0; s
<nset
; s
++)
300 qsort(tmp
,nset
,sizeof(tmp
[0]),real_comp
);
301 fprintf(fp
," %g %g",tmp
[nset
-1-edge
]-av
,av
-tmp
[edge
]);
303 for(s
=0; s
<nset
; s
++)
304 var
+= sqr(val
[s
][i
]-av
);
305 if (avbar_opt
== avbarSTDDEV
)
306 err
= sqrt(var
/nset
);
308 err
= sqrt(var
/(nset
*(nset
-1)));
309 fprintf(fp
," %g",err
);
316 if (avbar_opt
== avbar90
)
320 static real
anal_ee_inf(real
*parm
,real T
)
322 return sqrt(parm
[1]*2*parm
[0]/T
+parm
[3]*2*parm
[2]/T
);
325 static void estimate_error(const char *eefile
,int nb_min
,int resol
,int n
,
326 int nset
, double *av
,double *sig
,real
**val
,real dt
,
327 gmx_bool bFitAc
,gmx_bool bSingleExpFit
,gmx_bool bAllowNegLTCorr
,
328 const output_env_t oenv
)
331 int bs
,prev_bs
,nbs
,nb
;
336 real
*tbs
,*ybs
,rtmp
,dens
,*fitsig
,twooe
,tau1_est
,tau_sig
;
342 fprintf(stdout
,"The number of points is smaller than 4, can not make an error estimate\n");
347 fp
= xvgropen(eefile
,"Error estimates",
348 "Block size (time)","Error estimate", oenv
);
349 if (output_env_get_print_xvgr_codes(oenv
))
352 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
356 xvgr_legend(fp
,2*nset
,(const char**)leg
,oenv
);
359 spacing
= pow(2,1.0/resol
);
363 for(s
=0; s
<nset
; s
++)
380 blav
+= val
[s
][bs
*i
+j
];
382 var
+= sqr(av
[s
] - blav
/bs
);
391 ybs
[nbs
] = var
/(nb
*(nb
-1.0))*(n
*dt
)/(sig
[s
]*sig
[s
]);
408 for(i
=0; i
<nbs
/2; i
++)
411 tbs
[i
] = tbs
[nbs
-1-i
];
414 ybs
[i
] = ybs
[nbs
-1-i
];
417 /* The initial slope of the normalized ybs^2 is 1.
418 * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
419 * From this we take our initial guess for tau1.
427 } while (i
< nbs
- 1 &&
428 (ybs
[i
] > ybs
[i
+1] || ybs
[i
] > twooe
*tau1_est
));
432 fprintf(stdout
,"Data set %d has strange time correlations:\n"
433 "the std. error using single points is larger than that of blocks of 2 points\n"
434 "The error estimate might be inaccurate, check the fit\n",
436 /* Use the total time as tau for the fitting weights */
437 tau_sig
= (n
- 1)*dt
;
446 fprintf(debug
,"set %d tau1 estimate %f\n",s
+1,tau1_est
);
449 /* Generate more or less appropriate sigma's,
450 * also taking the density of points into account.
456 dens
= tbs
[1]/tbs
[0] - 1;
460 dens
= tbs
[nbs
-1]/tbs
[nbs
-2] - 1;
464 dens
= 0.5*(tbs
[i
+1]/tbs
[i
-1] - 1);
466 fitsig
[i
] = sqrt((tau_sig
+ tbs
[i
])/dens
);
471 fitparm
[0] = tau1_est
;
473 /* We set the initial guess for tau2
474 * to halfway between tau1_est and the total time (on log scale).
476 fitparm
[2] = sqrt(tau1_est
*(n
-1)*dt
);
477 do_lmfit(nbs
,ybs
,fitsig
,0,tbs
,0,dt
*n
,oenv
,
478 bDebugMode(),effnERREST
,fitparm
,0);
479 fitparm
[3] = 1-fitparm
[1];
481 if (bSingleExpFit
|| fitparm
[0]<0 || fitparm
[2]<0 || fitparm
[1]<0
482 || (fitparm
[1]>1 && !bAllowNegLTCorr
) || fitparm
[2]>(n
-1)*dt
)
486 if (fitparm
[2] > (n
-1)*dt
)
489 "Warning: tau2 is longer than the length of the data (%g)\n"
490 " the statistics might be bad\n",
495 fprintf(stdout
,"a fitted parameter is negative\n");
497 fprintf(stdout
,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
498 sig
[s
]*anal_ee_inf(fitparm
,n
*dt
),
499 fitparm
[1],fitparm
[0],fitparm
[2]);
500 /* Do a fit with tau2 fixed at the total time.
501 * One could also choose any other large value for tau2.
503 fitparm
[0] = tau1_est
;
505 fitparm
[2] = (n
-1)*dt
;
506 fprintf(stderr
,"Will fix tau2 at the total time: %g\n",fitparm
[2]);
507 do_lmfit(nbs
,ybs
,fitsig
,0,tbs
,0,dt
*n
,oenv
,bDebugMode(),
508 effnERREST
,fitparm
,4);
509 fitparm
[3] = 1-fitparm
[1];
511 if (bSingleExpFit
|| fitparm
[0]<0 || fitparm
[1]<0
512 || (fitparm
[1]>1 && !bAllowNegLTCorr
))
514 if (!bSingleExpFit
) {
515 fprintf(stdout
,"a fitted parameter is negative\n");
516 fprintf(stdout
,"invalid fit: e.e. %g a %g tau1 %g tau2 %g\n",
517 sig
[s
]*anal_ee_inf(fitparm
,n
*dt
),
518 fitparm
[1],fitparm
[0],fitparm
[2]);
520 /* Do a single exponential fit */
521 fprintf(stderr
,"Will use a single exponential fit for set %d\n",s
+1);
522 fitparm
[0] = tau1_est
;
525 do_lmfit(nbs
,ybs
,fitsig
,0,tbs
,0,dt
*n
,oenv
,bDebugMode(),
526 effnERREST
,fitparm
,6);
527 fitparm
[3] = 1-fitparm
[1];
530 ee
= sig
[s
]*anal_ee_inf(fitparm
,n
*dt
);
535 fprintf(stdout
,"Set %3d: err.est. %g a %g tau1 %g tau2 %g\n",
537 fprintf(fp
,"@ legend string %d \"av %f\"\n",2*s
,av
[s
]);
538 fprintf(fp
,"@ legend string %d \"ee %6g\"\n",
539 2*s
+1,sig
[s
]*anal_ee_inf(fitparm
,n
*dt
));
542 fprintf(fp
,"%g %g %g\n",tbs
[i
],sig
[s
]*sqrt(ybs
[i
]/(n
*dt
)),
543 sig
[s
]*sqrt(fit_function(effnERREST
,fitparm
,tbs
[i
])/(n
*dt
)));
549 real
*ac
,acint
,ac_fit
[4];
553 ac
[i
] = val
[s
][i
] - av
[s
];
559 low_do_autocorr(NULL
,oenv
,NULL
,n
,1,-1,&ac
,
560 dt
,eacNormal
,1,FALSE
,TRUE
,
561 FALSE
,0,0,effnNONE
,0);
565 /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
567 for(i
=1; i
<=fitlen
/2; i
++)
573 /* Generate more or less appropriate sigma's */
574 for(i
=0; i
<=fitlen
; i
++)
576 fitsig
[i
] = sqrt(acint
+ dt
*i
);
579 ac_fit
[0] = 0.5*acint
;
581 ac_fit
[2] = 10*acint
;
582 do_lmfit(n
/nb_min
,ac
,fitsig
,dt
,0,0,fitlen
*dt
,oenv
,
583 bDebugMode(),effnEXP3
,ac_fit
,0);
584 ac_fit
[3] = 1 - ac_fit
[1];
586 fprintf(stdout
,"Set %3d: ac erest %g a %g tau1 %g tau2 %g\n",
587 s
+1,sig
[s
]*anal_ee_inf(ac_fit
,n
*dt
),
588 ac_fit
[1],ac_fit
[0],ac_fit
[2]);
593 fprintf(fp
,"%g %g\n",tbs
[i
],
594 sig
[s
]*sqrt(fit_function(effnERREST
,ac_fit
,tbs
[i
]))/(n
*dt
));
610 static void luzar_correl(int nn
,real
*time
,int nset
,real
**val
,real temp
,
611 gmx_bool bError
,real fit_start
,real smooth_tail_start
,
612 const output_env_t oenv
)
614 const real tol
= 1e-8;
619 please_cite(stdout
,"Spoel2006b");
621 /* Compute negative derivative k(t) = -dc(t)/dt */
624 compute_derivative(nn
,time
,val
[0],kt
);
625 for(j
=0; (j
<nn
); j
++)
629 for(j
=0; (j
<nn
); j
++)
630 d2
+= sqr(kt
[j
] - val
[3][j
]);
631 fprintf(debug
,"RMS difference in derivatives is %g\n",sqrt(d2
/nn
));
633 analyse_corr(nn
,time
,val
[0],val
[2],kt
,NULL
,NULL
,NULL
,fit_start
,
634 temp
,smooth_tail_start
,oenv
);
637 else if (nset
== 6) {
638 analyse_corr(nn
,time
,val
[0],val
[2],val
[4],
639 val
[1],val
[3],val
[5],fit_start
,temp
,smooth_tail_start
,oenv
);
642 printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
643 printf("Not doing anything. Sorry.\n");
647 static void filter(real flen
,int n
,int nset
,real
**val
,real dt
,
648 const output_env_t oenv
)
651 double *filt
,sum
,vf
,fluc
,fluctot
;
653 f
= (int)(flen
/(2*dt
));
657 for(i
=1; i
<=f
; i
++) {
658 filt
[i
] = cos(M_PI
*dt
*i
/flen
);
663 fprintf(stdout
,"Will calculate the fluctuation over %d points\n",n
-2*f
);
664 fprintf(stdout
," using a filter of length %g of %d points\n",flen
,2*f
+1);
666 for(s
=0; s
<nset
; s
++) {
668 for(i
=f
; i
<n
-f
; i
++) {
669 vf
= filt
[0]*val
[s
][i
];
671 vf
+= filt
[j
]*(val
[s
][i
-f
]+val
[s
][i
+f
]);
672 fluc
+= sqr(val
[s
][i
] - vf
);
676 fprintf(stdout
,"Set %3d filtered fluctuation: %12.6e\n",s
+1,sqrt(fluc
));
678 fprintf(stdout
,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot
/nset
));
679 fprintf(stdout
,"\n");
684 static void do_fit(FILE *out
,int n
,gmx_bool bYdy
,
685 int ny
,real
*x0
,real
**val
,
686 int npargs
,t_pargs
*ppa
,const output_env_t oenv
)
688 real
*c1
=NULL
,*sig
=NULL
,*fitparm
;
689 real tendfit
,tbeginfit
;
692 efitfn
= get_acffitfn();
693 nparm
= nfp_ffn
[efitfn
];
694 fprintf(out
,"Will fit to the following function:\n");
695 fprintf(out
,"%s\n",longs_ffn
[efitfn
]);
700 fprintf(out
,"Using two columns as y and sigma values\n");
704 if (opt2parg_bSet("-beginfit",npargs
,ppa
)) {
705 tbeginfit
= opt2parg_real("-beginfit",npargs
,ppa
);
709 if (opt2parg_bSet("-endfit",npargs
,ppa
)) {
710 tendfit
= opt2parg_real("-endfit",npargs
,ppa
);
726 fitparm
[1] = 0.5*c1
[0];
730 fitparm
[0] = fitparm
[2] = 0.5*c1
[0];
736 fitparm
[0] = fitparm
[2] = fitparm
[4] = 0.33*c1
[0];
743 fitparm
[0] = fitparm
[2] = fitparm
[4] = fitparm
[6] = 0.25*c1
[0];
751 fprintf(out
,"Warning: don't know how to initialize the parameters\n");
752 for(i
=0; (i
<nparm
); i
++)
755 fprintf(out
,"Starting parameters:\n");
756 for(i
=0; (i
<nparm
); i
++)
757 fprintf(out
,"a%-2d = %12.5e\n",i
+1,fitparm
[i
]);
758 if (do_lmfit(ny
,c1
,sig
,0,x0
,tbeginfit
,tendfit
,
759 oenv
,bDebugMode(),efitfn
,fitparm
,0)) {
760 for(i
=0; (i
<nparm
); i
++)
761 fprintf(out
,"a%-2d = %12.5e\n",i
+1,fitparm
[i
]);
764 fprintf(out
,"No solution was found\n");
768 static void do_ballistic(const char *balFile
, int nData
,
769 real
*t
, real
**val
, int nSet
,
770 real balTime
, int nBalExp
,
771 gmx_bool bDerivative
,
772 const output_env_t oenv
)
774 double **ctd
=NULL
, *td
=NULL
;
775 t_gemParams
*GP
= init_gemParams(0, 0, t
, nData
, 0, 0, 0, balTime
, nBalExp
, bDerivative
);
776 static char *leg
[] = {"Ac'(t)"};
780 if (GP
->ballistic
/GP
->tDelta
>= GP
->nExpFit
*2+1)
785 fp
= xvgropen(balFile
, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv
);
786 xvgr_legend(fp
,asize(leg
),(const char**)leg
,oenv
);
788 for (set
=0; set
<nSet
; set
++)
790 snew(ctd
[set
], nData
);
791 for (i
=0; i
<nData
; i
++) {
792 ctd
[set
][i
] = (double)val
[set
][i
];
794 td
[i
] = (double)t
[i
];
797 takeAwayBallistic(ctd
[set
], td
, nData
, GP
->ballistic
, GP
->nExpFit
, GP
->bDt
);
800 for (i
=0; i
<nData
; i
++)
802 fprintf(fp
, " %g",t
[i
]);
803 for (set
=0; set
<nSet
; set
++)
805 fprintf(fp
, " %g", ctd
[set
][i
]);
811 for (set
=0; set
<nSet
; set
++)
817 printf("Number of data points is less than the number of parameters to fit\n."
818 "The system is underdetermined, hence no ballistic term can be found.\n\n");
821 static void do_geminate(const char *gemFile
, int nData
,
822 real
*t
, real
**val
, int nSet
,
823 const real D
, const real rcut
, const real balTime
,
824 const int nFitPoints
, const real begFit
, const real endFit
,
825 const output_env_t oenv
)
827 double **ctd
=NULL
, **ctdGem
=NULL
, *td
=NULL
;
828 t_gemParams
*GP
= init_gemParams(rcut
, D
, t
, nData
, nFitPoints
,
829 begFit
, endFit
, balTime
, 1, FALSE
);
830 const char *leg
[] = {"Ac\\sgem\\N(t)"};
838 fp
= xvgropen(gemFile
, "Hydrogen Bond Autocorrelation","Time (ps)","C'(t)", oenv
);
839 xvgr_legend(fp
,asize(leg
),leg
,oenv
);
841 for (set
=0; set
<nSet
; set
++)
843 snew(ctd
[set
], nData
);
844 snew(ctdGem
[set
], nData
);
845 for (i
=0; i
<nData
; i
++) {
846 ctd
[set
][i
] = (double)val
[set
][i
];
848 td
[i
] = (double)t
[i
];
850 fitGemRecomb(ctd
[set
], td
, &(ctd
[set
]), nData
, GP
);
853 for (i
=0; i
<nData
; i
++)
855 fprintf(fp
, " %g",t
[i
]);
856 for (set
=0; set
<nSet
; set
++)
858 fprintf(fp
, " %g", ctdGem
[set
][i
]);
863 for (set
=0; set
<nSet
; set
++)
873 int gmx_analyze(int argc
,char *argv
[])
875 static const char *desc
[] = {
876 "[TT]g_analyze[tt] reads an ASCII file and analyzes data sets.",
877 "A line in the input file may start with a time",
878 "(see option [TT]-time[tt]) and any number of [IT]y[it]-values may follow.",
879 "Multiple sets can also be",
880 "read when they are separated by & (option [TT]-n[tt]);",
881 "in this case only one [IT]y[it]-value is read from each line.",
882 "All lines starting with # and @ are skipped.",
883 "All analyses can also be done for the derivative of a set",
884 "(option [TT]-d[tt]).[PAR]",
886 "All options, except for [TT]-av[tt] and [TT]-power[tt], assume that the",
887 "points are equidistant in time.[PAR]",
889 "[TT]g_analyze[tt] always shows the average and standard deviation of each",
890 "set, as well as the relative deviation of the third",
891 "and fourth cumulant from those of a Gaussian distribution with the same",
892 "standard deviation.[PAR]",
894 "Option [TT]-ac[tt] produces the autocorrelation function(s).",
895 "Be sure that the time interval between data points is",
896 "much shorter than the time scale of the autocorrelation.[PAR]",
898 "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
899 "i/2 periods. The formula is:[BR]"
900 "[MATH]2 ([INT][FROM]0[from][TO]T[to][int] y(t) [COS]i [GRK]pi[grk] t[cos] dt)^2 / [INT][FROM]0[from][TO]T[to][int] y^2(t) dt[math][BR]",
901 "This is useful for principal components obtained from covariance",
902 "analysis, since the principal components of random diffusion are",
903 "pure cosines.[PAR]",
905 "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
907 "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
909 "Option [TT]-av[tt] produces the average over the sets.",
910 "Error bars can be added with the option [TT]-errbar[tt].",
911 "The errorbars can represent the standard deviation, the error",
912 "(assuming the points are independent) or the interval containing",
913 "90% of the points, by discarding 5% of the points at the top and",
916 "Option [TT]-ee[tt] produces error estimates using block averaging.",
917 "A set is divided in a number of blocks and averages are calculated for",
918 "each block. The error for the total average is calculated from",
919 "the variance between averages of the m blocks B[SUB]i[sub] as follows:",
920 "error^2 = [SUM][sum] (B[SUB]i[sub] - [CHEVRON]B[chevron])^2 / (m*(m-1)).",
921 "These errors are plotted as a function of the block size.",
922 "Also an analytical block average curve is plotted, assuming",
923 "that the autocorrelation is a sum of two exponentials.",
924 "The analytical curve for the block average is:[BR]",
925 "[MATH]f(t) = [GRK]sigma[grk][TT]*[tt][SQRT]2/T ( [GRK]alpha[grk] ([GRK]tau[grk][SUB]1[sub] (([EXP]-t/[GRK]tau[grk][SUB]1[sub][exp] - 1) [GRK]tau[grk][SUB]1[sub]/t + 1)) +[BR]",
926 " (1-[GRK]alpha[grk]) ([GRK]tau[grk][SUB]2[sub] (([EXP]-t/[GRK]tau[grk][SUB]2[sub][exp] - 1) [GRK]tau[grk][SUB]2[sub]/t + 1)))[sqrt][math],[BR]"
927 "where T is the total time.",
928 "[GRK]alpha[grk], [GRK]tau[grk][SUB]1[sub] and [GRK]tau[grk][SUB]2[sub] are obtained by fitting f^2(t) to error^2.",
929 "When the actual block average is very close to the analytical curve,",
930 "the error is [MATH][GRK]sigma[grk][TT]*[tt][SQRT]2/T (a [GRK]tau[grk][SUB]1[sub] + (1-a) [GRK]tau[grk][SUB]2[sub])[sqrt][math].",
931 "The complete derivation is given in",
932 "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
934 "Option [TT]-bal[tt] finds and subtracts the ultrafast \"ballistic\"",
935 "component from a hydrogen bond autocorrelation function by the fitting",
936 "of a sum of exponentials, as described in e.g.",
937 "O. Markovitch, J. Chem. Phys. 129:084505, 2008. The fastest term",
938 "is the one with the most negative coefficient in the exponential,",
939 "or with [TT]-d[tt], the one with most negative time derivative at time 0.",
940 "[TT]-nbalexp[tt] sets the number of exponentials to fit.[PAR]",
942 "Option [TT]-gem[tt] fits bimolecular rate constants ka and kb",
943 "(and optionally kD) to the hydrogen bond autocorrelation function",
944 "according to the reversible geminate recombination model. Removal of",
945 "the ballistic component first is strongly advised. The model is presented in",
946 "O. Markovitch, J. Chem. Phys. 129:084505, 2008.[PAR]",
948 "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
949 "of each set and over all sets with respect to a filtered average.",
950 "The filter is proportional to cos([GRK]pi[grk] t/len) where t goes from -len/2",
951 "to len/2. len is supplied with the option [TT]-filter[tt].",
952 "This filter reduces oscillations with period len/2 and len by a factor",
953 "of 0.79 and 0.33 respectively.[PAR]",
955 "Option [TT]-g[tt] fits the data to the function given with option",
956 "[TT]-fitfn[tt].[PAR]",
958 "Option [TT]-power[tt] fits the data to [MATH]b t^a[math], which is accomplished",
959 "by fitting to [MATH]a t + b[math] on log-log scale. All points after the first",
960 "zero or with a negative value are ignored.[PAR]"
962 "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
963 "on output from [TT]g_hbond[tt]. The input file can be taken directly",
964 "from [TT]g_hbond -ac[tt], and then the same result should be produced."
966 static real tb
=-1,te
=-1,frac
=0.5,filtlen
=0,binwidth
=0.1,aver_start
=0;
967 static gmx_bool bHaveT
=TRUE
,bDer
=FALSE
,bSubAv
=TRUE
,bAverCorr
=FALSE
,bXYdy
=FALSE
;
968 static gmx_bool bEESEF
=FALSE
,bEENLC
=FALSE
,bEeFitAc
=FALSE
,bPower
=FALSE
;
969 static gmx_bool bIntegrate
=FALSE
,bRegression
=FALSE
,bLuzar
=FALSE
,bLuzarError
=FALSE
;
970 static int nsets_in
=1,d
=1,nb_min
=4,resol
=10, nBalExp
=4, nFitPoints
=100;
971 static real temp
=298.15,fit_start
=1, fit_end
=60, smooth_tail_start
=-1, balTime
=0.2, diffusion
=5e-5,rcut
=0.35;
973 /* must correspond to enum avbar* declared at beginning of file */
974 static const char *avbar_opt
[avbarNR
+1] = {
975 NULL
, "none", "stddev", "error", "90", NULL
979 { "-time", FALSE
, etBOOL
, {&bHaveT
},
980 "Expect a time in the input" },
981 { "-b", FALSE
, etREAL
, {&tb
},
982 "First time to read from set" },
983 { "-e", FALSE
, etREAL
, {&te
},
984 "Last time to read from set" },
985 { "-n", FALSE
, etINT
, {&nsets_in
},
986 "Read this number of sets separated by &" },
987 { "-d", FALSE
, etBOOL
, {&bDer
},
988 "Use the derivative" },
989 { "-dp", FALSE
, etINT
, {&d
},
990 "HIDDENThe derivative is the difference over this number of points" },
991 { "-bw", FALSE
, etREAL
, {&binwidth
},
992 "Binwidth for the distribution" },
993 { "-errbar", FALSE
, etENUM
, {avbar_opt
},
994 "Error bars for [TT]-av[tt]" },
995 { "-integrate",FALSE
,etBOOL
, {&bIntegrate
},
996 "Integrate data function(s) numerically using trapezium rule" },
997 { "-aver_start",FALSE
, etREAL
, {&aver_start
},
998 "Start averaging the integral from here" },
999 { "-xydy", FALSE
, etBOOL
, {&bXYdy
},
1000 "Interpret second data set as error in the y values for integrating" },
1001 { "-regression",FALSE
,etBOOL
,{&bRegression
},
1002 "Perform a linear regression analysis on the data. If [TT]-xydy[tt] is set a second set will be interpreted as the error bar in the Y value. Otherwise, if multiple data sets are present a multilinear regression will be performed yielding the constant A that minimize [MATH][GRK]chi[grk]^2 = (y - A[SUB]0[sub] x[SUB]0[sub] - A[SUB]1[sub] x[SUB]1[sub] - ... - A[SUB]N[sub] x[SUB]N[sub])^2[math] where now Y is the first data set in the input file and x[SUB]i[sub] the others. Do read the information at the option [TT]-time[tt]." },
1003 { "-luzar", FALSE
, etBOOL
, {&bLuzar
},
1004 "Do a Luzar and Chandler analysis on a correlation function and related as produced by [TT]g_hbond[tt]. When in addition the [TT]-xydy[tt] flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
1005 { "-temp", FALSE
, etREAL
, {&temp
},
1006 "Temperature for the Luzar hydrogen bonding kinetics analysis (K)" },
1007 { "-fitstart", FALSE
, etREAL
, {&fit_start
},
1008 "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
1009 { "-fitend", FALSE
, etREAL
, {&fit_end
},
1010 "Time (ps) where to stop fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation. Only with [TT]-gem[tt]" },
1011 { "-smooth",FALSE
, etREAL
, {&smooth_tail_start
},
1012 "If this value is >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: [MATH]y = A [EXP]-x/[GRK]tau[grk][exp][math]" },
1013 { "-nbmin", FALSE
, etINT
, {&nb_min
},
1014 "HIDDENMinimum number of blocks for block averaging" },
1015 { "-resol", FALSE
, etINT
, {&resol
},
1016 "HIDDENResolution for the block averaging, block size increases with"
1017 " a factor 2^(1/resol)" },
1018 { "-eeexpfit", FALSE
, etBOOL
, {&bEESEF
},
1019 "HIDDENAlways use a single exponential fit for the error estimate" },
1020 { "-eenlc", FALSE
, etBOOL
, {&bEENLC
},
1021 "HIDDENAllow a negative long-time correlation" },
1022 { "-eefitac", FALSE
, etBOOL
, {&bEeFitAc
},
1023 "HIDDENAlso plot analytical block average using a autocorrelation fit" },
1024 { "-filter", FALSE
, etREAL
, {&filtlen
},
1025 "Print the high-frequency fluctuation after filtering with a cosine filter of this length" },
1026 { "-power", FALSE
, etBOOL
, {&bPower
},
1027 "Fit data to: b t^a" },
1028 { "-subav", FALSE
, etBOOL
, {&bSubAv
},
1029 "Subtract the average before autocorrelating" },
1030 { "-oneacf", FALSE
, etBOOL
, {&bAverCorr
},
1031 "Calculate one ACF over all sets" },
1032 { "-nbalexp", FALSE
, etINT
, {&nBalExp
},
1033 "HIDDENNumber of exponentials to fit to the ultrafast component" },
1034 { "-baltime", FALSE
, etREAL
, {&balTime
},
1035 "HIDDENTime up to which the ballistic component will be fitted" },
1036 /* { "-gemnp", FALSE, etINT, {&nFitPoints}, */
1037 /* "HIDDENNumber of data points taken from the ACF to use for fitting to rev. gem. recomb. model."}, */
1038 /* { "-rcut", FALSE, etREAL, {&rcut}, */
1039 /* "Cut-off for hydrogen bonds in geminate algorithms" }, */
1040 /* { "-gemtype", FALSE, etENUM, {gemType}, */
1041 /* "What type of gminate recombination to use"}, */
1042 /* { "-D", FALSE, etREAL, {&diffusion}, */
1043 /* "The self diffusion coefficient which is used for the reversible geminate recombination model."} */
1045 #define NPA asize(pa)
1048 int n
,nlast
,s
,nset
,i
,j
=0;
1049 real
**val
,*t
,dt
,tot
,error
;
1050 double *av
,*sig
,cum1
,cum2
,cum3
,cum4
,db
;
1051 const char *acfile
,*msdfile
,*ccfile
,*distfile
,*avfile
,*eefile
,*balfile
,*gemfile
,*fitfile
;
1055 { efXVG
, "-f", "graph", ffREAD
},
1056 { efXVG
, "-ac", "autocorr", ffOPTWR
},
1057 { efXVG
, "-msd", "msd", ffOPTWR
},
1058 { efXVG
, "-cc", "coscont", ffOPTWR
},
1059 { efXVG
, "-dist", "distr", ffOPTWR
},
1060 { efXVG
, "-av", "average", ffOPTWR
},
1061 { efXVG
, "-ee", "errest", ffOPTWR
},
1062 { efXVG
, "-bal", "ballisitc",ffOPTWR
},
1063 /* { efXVG, "-gem", "geminate", ffOPTWR }, */
1064 { efLOG
, "-g", "fitlog", ffOPTWR
}
1066 #define NFILE asize(fnm)
1072 ppa
= add_acf_pargs(&npargs
,pa
);
1074 CopyRight(stderr
,argv
[0]);
1075 parse_common_args(&argc
,argv
,PCA_CAN_VIEW
,
1076 NFILE
,fnm
,npargs
,ppa
,asize(desc
),desc
,0,NULL
,&oenv
);
1078 acfile
= opt2fn_null("-ac",NFILE
,fnm
);
1079 msdfile
= opt2fn_null("-msd",NFILE
,fnm
);
1080 ccfile
= opt2fn_null("-cc",NFILE
,fnm
);
1081 distfile
= opt2fn_null("-dist",NFILE
,fnm
);
1082 avfile
= opt2fn_null("-av",NFILE
,fnm
);
1083 eefile
= opt2fn_null("-ee",NFILE
,fnm
);
1084 balfile
= opt2fn_null("-bal",NFILE
,fnm
);
1085 /* gemfile = opt2fn_null("-gem",NFILE,fnm); */
1086 /* When doing autocorrelation we don't want a fitlog for fitting
1087 * the function itself (not the acf) when the user did not ask for it.
1089 if (opt2parg_bSet("-fitfn",npargs
,ppa
) && acfile
== NULL
)
1091 fitfile
= opt2fn("-g",NFILE
,fnm
);
1095 fitfile
= opt2fn_null("-g",NFILE
,fnm
);
1098 val
= read_xvg_time(opt2fn("-f",NFILE
,fnm
),bHaveT
,
1099 opt2parg_bSet("-b",npargs
,ppa
),tb
,
1100 opt2parg_bSet("-e",npargs
,ppa
),te
,
1101 nsets_in
,&nset
,&n
,&dt
,&t
);
1102 printf("Read %d sets of %d points, dt = %g\n\n",nset
,n
,dt
);
1106 printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
1109 for(s
=0; s
<nset
; s
++)
1111 for(i
=0; (i
<n
); i
++)
1113 val
[s
][i
] = (val
[s
][i
+d
]-val
[s
][i
])/(d
*dt
);
1122 printf("Calculating the integral using the trapezium rule\n");
1126 sum
= evaluate_integral(n
,t
,val
[0],val
[1],aver_start
,&stddev
);
1127 printf("Integral %10.3f +/- %10.5f\n",sum
,stddev
);
1131 for(s
=0; s
<nset
; s
++)
1133 sum
= evaluate_integral(n
,t
,val
[s
],NULL
,aver_start
,&stddev
);
1134 printf("Integral %d %10.5f +/- %10.5f\n",s
+1,sum
,stddev
);
1139 if (fitfile
!= NULL
)
1141 out_fit
= ffopen(fitfile
,"w");
1142 if (bXYdy
&& nset
>= 2)
1144 do_fit(out_fit
,0,TRUE
,n
,t
,val
,npargs
,ppa
,oenv
);
1148 for(s
=0; s
<nset
; s
++)
1150 do_fit(out_fit
,s
,FALSE
,n
,t
,val
,npargs
,ppa
,oenv
);
1156 printf(" std. dev. relative deviation of\n");
1157 printf(" standard --------- cumulants from those of\n");
1158 printf("set average deviation sqrt(n-1) a Gaussian distribition\n");
1159 printf(" cum. 3 cum. 4\n");
1162 for(s
=0; (s
<nset
); s
++) {
1167 for(i
=0; (i
<n
); i
++)
1170 for(i
=0; (i
<n
); i
++) {
1171 db
= val
[s
][i
]-cum1
;
1174 cum4
+= db
*db
*db
*db
;
1180 sig
[s
] = sqrt(cum2
);
1182 error
= sqrt(cum2
/(n
-1));
1185 printf("SS%d %13.6e %12.6e %12.6e %6.3f %6.3f\n",
1186 s
+1,av
[s
],sig
[s
],error
,
1187 sig
[s
] ? cum3
/(sig
[s
]*sig
[s
]*sig
[s
]*sqrt(8/M_PI
)) : 0,
1188 sig
[s
] ? cum4
/(sig
[s
]*sig
[s
]*sig
[s
]*sig
[s
]*3)-1 : 0);
1193 filter(filtlen
,n
,nset
,val
,dt
,oenv
);
1196 out
=xvgropen(msdfile
,"Mean square displacement",
1197 "time","MSD (nm\\S2\\N)",oenv
);
1198 nlast
= (int)(n
*frac
);
1199 for(s
=0; s
<nset
; s
++) {
1200 for(j
=0; j
<=nlast
; j
++) {
1202 fprintf(stderr
,"\r%d",j
);
1204 for(i
=0; i
<n
-j
; i
++)
1205 tot
+= sqr(val
[s
][i
]-val
[s
][i
+j
]);
1207 fprintf(out
," %g %8g\n",dt
*j
,tot
);
1213 fprintf(stderr
,"\r%d, time=%g\n",j
-1,(j
-1)*dt
);
1216 plot_coscont(ccfile
,n
,nset
,val
,oenv
);
1219 histogram(distfile
,binwidth
,n
,nset
,val
,oenv
);
1221 average(avfile
,nenum(avbar_opt
),n
,nset
,val
,t
);
1223 estimate_error(eefile
,nb_min
,resol
,n
,nset
,av
,sig
,val
,dt
,
1224 bEeFitAc
,bEESEF
,bEENLC
,oenv
);
1226 do_ballistic(balfile
,n
,t
,val
,nset
,balTime
,nBalExp
,bDer
,oenv
);
1228 /* do_geminate(gemfile,n,t,val,nset,diffusion,rcut,balTime, */
1229 /* nFitPoints, fit_start, fit_end, oenv); */
1231 power_fit(n
,nset
,val
,t
);
1237 for(s
=0; s
<nset
; s
++)
1245 do_autocorr(acfile
,oenv
,"Autocorrelation",n
,nset
,val
,dt
,
1246 eacNormal
,bAverCorr
);
1250 regression_analysis(n
,bXYdy
,t
,nset
,val
);
1253 luzar_correl(n
,t
,nset
,val
,temp
,bXYdy
,fit_start
,smooth_tail_start
,oenv
);
1255 view_all(oenv
,NFILE
, fnm
);