src/tools/gmx_analyze.c

   1 /*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Green Red Orange Magenta Azure Cyan Skyblue
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39 #include <math.h>
  40 #include <string.h>
  41 #include "statutil.h"
  42 #include "sysstuff.h"
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "macros.h"
  46 #include "gmx_fatal.h"
  47 #include "vec.h"
  48 #include "copyrite.h"
  49 #include "futil.h"
  50 #include "readinp.h"
  51 #include "statutil.h"
  52 #include "txtdump.h"
  53 #include "gstat.h"
  54 #include "gmx_statistics.h"
  55 #include "xvgr.h"
  56 #include "gmx_ana.h"
  57
  58 /* must correspond to char *avbar_opt[] declared in main() */
  59 enum { avbarSEL, avbarNONE, avbarSTDDEV, avbarERROR, avbar90, avbarNR };
  60
  61 static void power_fit(int n,int nset,real **val,real *t)
  62 {
  63   real *x,*y,quality,a,b,r;
  64   int  s,i;
  65
  66   snew(x,n);
  67   snew(y,n);
  68
  69   if (t[0]>0) {
  70     for(i=0; i<n; i++)
  71       if (t[0]>0)
  72         x[i] = log(t[i]);
  73   } else {
  74     fprintf(stdout,"First time is not larger than 0, using index number as time for power fit\n");
  75     for(i=0; i<n; i++)
  76       x[i] = log(i+1);
  77   }
  78
  79   for(s=0; s<nset; s++) {
  80     i=0;
  81     for(i=0; i<n && val[s][i]>=0; i++)
  82       y[i] = log(val[s][i]);
  83     if (i < n)
  84       fprintf(stdout,"Will power fit up to point %d, since it is not larger than 0\n",i);
  85     lsq_y_ax_b(i,x,y,&a,&b,&r,&quality);
  86     fprintf(stdout,"Power fit set %3d:  error %.3f  a %g  b %g\n",
  87             s+1,quality,a,exp(b));
  88   }
  89
  90   sfree(y);
  91   sfree(x);
  92 }
  93
  94 static real cosine_content(int nhp,int n,real *y)
  95      /* Assumes n equidistant points */
  96 {
  97   double fac,cosyint,yyint;
  98   int i;
  99
 100   if (n < 2)
 101     return 0;
 102
 103   fac = M_PI*nhp/(n-1);
 104
 105   cosyint = 0;
 106   yyint = 0;
 107   for(i=0; i<n; i++) {
 108     cosyint += cos(fac*i)*y[i];
 109     yyint += y[i]*y[i];
 110   }
 111
 112   return 2*cosyint*cosyint/(n*yyint);
 113 }
 114
 115 static void plot_coscont(const char *ccfile,int n,int nset,real **val,
 116                          const output_env_t oenv)
 117 {
 118   FILE *fp;
 119   int  s;
 120   real cc;
 121
 122   fp = xvgropen(ccfile,"Cosine content","set / half periods","cosine content",
 123                 oenv);
 124
 125   for(s=0; s<nset; s++) {
 126     cc = cosine_content(s+1,n,val[s]);
 127     fprintf(fp," %d %g\n",s+1,cc);
 128     fprintf(stdout,"Cosine content of set %d with %.1f periods: %g\n",
 129             s+1,0.5*(s+1),cc);
 130   }
 131   fprintf(stdout,"\n");
 132
 133   ffclose(fp);
 134 }
 135
 136 static void regression_analysis(int n,bool bXYdy,real *x,real **val)
 137 {
 138   real S,chi2,a,b,da,db,r=0;
 139
 140   printf("Fitting data to a function f(x) = ax + b\n");
 141   printf("Minimizing residual chi2 = Sum_i w_i [f(x_i) - y_i]2\n");
 142   printf("Error estimates will be given if w_i (sigma) values are given\n");
 143   printf("(use option -xydy).\n\n");
 144   if (bXYdy)
 145     lsq_y_ax_b_error(n,x,val[0],val[1],&a,&b,&da,&db,&r,&S);
 146   else
 147     lsq_y_ax_b(n,x,val[0],&a,&b,&r,&S);
 148   chi2 = sqr((n-2)*S);
 149   printf("Chi2                    = %g\n",chi2);
 150   printf("S (Sqrt(Chi2/(n-2))     = %g\n",S);
 151   printf("Correlation coefficient = %.1f%%\n",100*r);
 152   printf("\n");
 153   if (bXYdy) {
 154     printf("a    = %g +/- %g\n",a,da);
 155     printf("b    = %g +/- %g\n",b,db);
 156   }
 157   else {
 158     printf("a    = %g\n",a);
 159     printf("b    = %g\n",b);
 160   }
 161 }
 162
 163 void histogram(const char *distfile,real binwidth,int n, int nset, real **val,
 164                const output_env_t oenv)
 165 {
 166   FILE *fp;
 167   int  i,s;
 168   double min,max;
 169   int  nbin;
 170 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 171   long long int *histo;
 172 #else
 173   double        *histo;
 174 #endif
 175
 176   min = val[0][0];
 177   max = val[0][0];
 178   for(s=0; s<nset; s++)
 179     for(i=0; i<n; i++)
 180       if (val[s][i] < min)
 181         min = val[s][i];
 182       else if (val[s][i] > max)
 183         max = val[s][i];
 184
 185   min = binwidth*floor(min/binwidth);
 186   max = binwidth*ceil(max/binwidth);
 187   if (min != 0)
 188     min -= binwidth;
 189   max += binwidth;
 190
 191   nbin = (int)((max - min)/binwidth + 0.5) + 1;
 192   fprintf(stderr,"Making distributions with %d bins\n",nbin);
 193   snew(histo,nbin);
 194   fp = xvgropen(distfile,"Distribution","","",oenv);
 195   for(s=0; s<nset; s++) {
 196     for(i=0; i<nbin; i++)
 197       histo[i] = 0;
 198     for(i=0; i<n; i++)
 199       histo[(int)((val[s][i] - min)/binwidth + 0.5)]++;
 200     for(i=0; i<nbin; i++)
 201       fprintf(fp," %g  %g\n",min+i*binwidth,(double)histo[i]/(n*binwidth));
 202     if (s < nset-1)
 203       fprintf(fp,"&\n");
 204   }
 205   ffclose(fp);
 206 }
 207
 208 static int real_comp(const void *a,const void *b)
 209 {
 210   real dif = *(real *)a - *(real *)b;
 211
 212   if (dif < 0)
 213     return -1;
 214   else if (dif > 0)
 215     return 1;
 216   else
 217     return 0;
 218 }
 219
 220 static void average(const char *avfile,int avbar_opt,
 221                     int n, int nset,real **val,real *t)
 222 {
 223   FILE   *fp;
 224   int    i,s,edge=0;
 225   double av,var,err;
 226   real   *tmp=NULL;
 227
 228   fp = ffopen(avfile,"w");
 229   if ((avbar_opt == avbarERROR) && (nset == 1))
 230     avbar_opt = avbarNONE;
 231   if (avbar_opt != avbarNONE) {
 232     if (avbar_opt == avbar90) {
 233       snew(tmp,nset);
 234       fprintf(fp,"@TYPE xydydy\n");
 235       edge = (int)(nset*0.05+0.5);
 236       fprintf(stdout,"Errorbars: discarding %d points on both sides: %d%%"
 237               " interval\n",edge,(int)(100*(nset-2*edge)/nset+0.5));
 238     } else
 239       fprintf(fp,"@TYPE xydy\n");
 240   }
 241
 242   for(i=0; i<n; i++) {
 243     av = 0;
 244     for(s=0; s<nset; s++)
 245       av += val[s][i];
 246     av /= nset;
 247     fprintf(fp," %g %g",t[i],av);
 248     var = 0;
 249     if (avbar_opt != avbarNONE) {
 250       if (avbar_opt == avbar90) {
 251         for(s=0; s<nset; s++)
 252           tmp[s] = val[s][i];
 253         qsort(tmp,nset,sizeof(tmp[0]),real_comp);
 254         fprintf(fp," %g %g",tmp[nset-1-edge]-av,av-tmp[edge]);
 255       } else {
 256         for(s=0; s<nset; s++)
 257           var += sqr(val[s][i]-av);
 258         if (avbar_opt == avbarSTDDEV)
 259           err = sqrt(var/nset);
 260         else
 261           err = sqrt(var/(nset*(nset-1)));
 262         fprintf(fp," %g",err);
 263       }
 264     }
 265     fprintf(fp,"\n");
 266   }
 267   ffclose(fp);
 268
 269   if (avbar_opt == avbar90)
 270     sfree(tmp);
 271 }
 272
 273 static real anal_ee_inf(real *parm,real T)
 274 {
 275   return sqrt(parm[1]*2*parm[0]/T+parm[3]*2*parm[2]/T);
 276 }
 277
 278 static real anal_ee(real *parm,real T,real t)
 279 {
 280   real e1,e2;
 281
 282   if (parm[0])
 283     e1 = exp(-t/parm[0]);
 284   else
 285     e1 = 1;
 286   if (parm[2])
 287     e2 = exp(-t/parm[2]);
 288   else
 289     e2 = 1;
 290
 291   return sqrt(parm[1]*2*parm[0]/T*((e1 - 1)*parm[0]/t + 1) +
 292               parm[3]*2*parm[2]/T*((e2 - 1)*parm[2]/t + 1));
 293 }
 294
 295 static void estimate_error(const char *eefile,int nb_min,int resol,int n,
 296                            int nset, double *av,double *sig,real **val,real dt,
 297                            bool bFitAc,bool bSingleExpFit,bool bAllowNegLTCorr,
 298                            const output_env_t oenv)
 299 {
 300     FILE   *fp;
 301     int    bs,prev_bs,nbs,nb;
 302     real   spacing,nbr;
 303     int    s,i,j;
 304     double blav,var;
 305     char   **leg;
 306     real   *tbs,*ybs,rtmp,dens,*fitsig,twooe,tau1_est,tau_sig;
 307     real   fitparm[4];
 308     real   ee,a,tau1,tau2;
 309
 310     if (n < 4)
 311     {
 312       fprintf(stdout,"The number of points is smaller than 4, can not make an error estimate\n");
 313
 314       return;
 315     }
 316
 317     fp = xvgropen(eefile,"Error estimates",
 318                   "Block size (time)","Error estimate", oenv);
 319     if (output_env_get_print_xvgr_codes(oenv))
 320     {
 321         fprintf(fp,
 322                 "@ subtitle \"using block averaging, total time %g (%d points)\"\n",
 323                 (n-1)*dt,n);
 324     }
 325     snew(leg,2*nset);
 326     xvgr_legend(fp,2*nset,leg,oenv);
 327     sfree(leg);
 328
 329     spacing = pow(2,1.0/resol);
 330     snew(tbs,n);
 331     snew(ybs,n);
 332     snew(fitsig,n);
 333     for(s=0; s<nset; s++)
 334     {
 335         nbs = 0;
 336         prev_bs = 0;
 337         nbr = nb_min;
 338         while (nbr <= n)
 339         {
 340             bs = n/(int)nbr;
 341             if (bs != prev_bs)
 342             {
 343                 nb = n/bs;
 344                 var = 0;
 345                 for(i=0; i<nb; i++)
 346                 {
 347                     blav=0;
 348                     for (j=0; j<bs; j++)
 349                     {
 350                         blav += val[s][bs*i+j];
 351                     }
 352                     var += sqr(av[s] - blav/bs);
 353                 }
 354                 tbs[nbs] = bs*dt;
 355                 if (sig[s] == 0)
 356                 {
 357                     ybs[nbs] = 0;
 358                 }
 359                 else
 360                 {
 361                     ybs[nbs] = var/(nb*(nb-1.0))*(n*dt)/(sig[s]*sig[s]);
 362                 }
 363                 nbs++;
 364             }
 365             nbr *= spacing;
 366             nb = (int)(nbr+0.5);
 367             prev_bs = bs;
 368         }
 369         if (sig[s] == 0)
 370         {
 371             ee   = 0;
 372             a    = 1;
 373             tau1 = 0;
 374             tau2 = 0;
 375         }
 376         else
 377         {
 378             for(i=0; i<nbs/2; i++)
 379             {
 380                 rtmp         = tbs[i];
 381                 tbs[i]       = tbs[nbs-1-i];
 382                 tbs[nbs-1-i] = rtmp;
 383                 rtmp         = ybs[i];
 384                 ybs[i]       = ybs[nbs-1-i];
 385                 ybs[nbs-1-i] = rtmp;
 386             }
 387             /* The initial slope of the normalized ybs^2 is 1.
 388              * For a single exponential autocorrelation: ybs(tau1) = 2/e tau1
 389              * From this we take our initial guess for tau1.
 390              */
 391             twooe = 2/exp(1);
 392             i = -1;
 393             do
 394             {
 395                 i++;
 396                 tau1_est = tbs[i];
 397             } while (i < nbs - 1 &&
 398                      (ybs[i] > ybs[i+1] || ybs[i] > twooe*tau1_est));
 399
 400             if (ybs[0] > ybs[1])
 401             {
 402                 fprintf(stdout,"Data set %d has strange time correlations:\n"
 403                         "the std. error using single points is larger than that of blocks of 2 points\n"
 404                         "The error estimate might be inaccurate, check the fit\n",
 405                         s+1);
 406                 /* Use the total time as tau for the fitting weights */
 407                 tau_sig = (n - 1)*dt;
 408             }
 409             else
 410             {
 411                 tau_sig = tau1_est;
 412             }
 413
 414             if (debug)
 415             {
 416                 fprintf(debug,"set %d tau1 estimate %f\n",s+1,tau1_est);
 417             }
 418
 419             /* Generate more or less appropriate sigma's,
 420              * also taking the density of points into account.
 421              */
 422             for(i=0; i<nbs; i++)
 423             {
 424                 if (i == 0)
 425                 {
 426                     dens = tbs[1]/tbs[0] - 1;
 427                 }
 428                 else if (i == nbs-1)
 429                 {
 430                     dens = tbs[nbs-1]/tbs[nbs-2] - 1;
 431                 }
 432                 else
 433                 {
 434                     dens = 0.5*(tbs[i+1]/tbs[i-1] - 1);
 435                 }
 436                 fitsig[i] = sqrt((tau_sig + tbs[i])/dens);
 437             }
 438
 439             if (!bSingleExpFit)
 440             {
 441                 fitparm[0] = tau1_est;
 442                 fitparm[1] = 0.95;
 443                 /* We set the initial guess for tau2
 444                  * to halfway between tau1_est and the total time (on log scale).
 445                  */
 446                 fitparm[2] = sqrt(tau1_est*(n-1)*dt);
 447                 do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,
 448                          bDebugMode(),effnERREST,fitparm,0);
 449                 fitparm[3] = 1-fitparm[1];
 450             }
 451             if (bSingleExpFit || fitparm[0]<0 || fitparm[2]<0 || fitparm[1]<0
 452                 || (fitparm[1]>1 && !bAllowNegLTCorr) || fitparm[2]>(n-1)*dt)
 453             {
 454                 if (!bSingleExpFit)
 455                 {
 456                     if (fitparm[2] > (n-1)*dt)
 457                     {
 458                         fprintf(stdout,
 459                                 "Warning: tau2 is longer than the length of the data (%g)\n"
 460                                 "         the statistics might be bad\n",
 461                                 (n-1)*dt);
 462                     }
 463                     else
 464                     {
 465                         fprintf(stdout,"a fitted parameter is negative\n");
 466                     }
 467                     fprintf(stdout,"invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 468                             sig[s]*anal_ee_inf(fitparm,n*dt),
 469                             fitparm[1],fitparm[0],fitparm[2]);
 470                     /* Do a fit with tau2 fixed at the total time.
 471                      * One could also choose any other large value for tau2.
 472                      */
 473                     fitparm[0] = tau1_est;
 474                     fitparm[1] = 0.95;
 475                     fitparm[2] = (n-1)*dt;
 476                     fprintf(stderr,"Will fix tau2 at the total time: %g\n",fitparm[2]);
 477                     do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
 478                              effnERREST,fitparm,4);
 479                     fitparm[3] = 1-fitparm[1];
 480                 }
 481                 if (bSingleExpFit || fitparm[0]<0 || fitparm[1]<0
 482                     || (fitparm[1]>1 && !bAllowNegLTCorr))
 483                 {
 484                     if (!bSingleExpFit) {
 485                         fprintf(stdout,"a fitted parameter is negative\n");
 486                         fprintf(stdout,"invalid fit:  e.e. %g  a %g  tau1 %g  tau2 %g\n",
 487                                 sig[s]*anal_ee_inf(fitparm,n*dt),
 488                                 fitparm[1],fitparm[0],fitparm[2]);
 489                     }
 490                     /* Do a single exponential fit */
 491                     fprintf(stderr,"Will use a single exponential fit for set %d\n",s+1);
 492                     fitparm[0] = tau1_est;
 493                     fitparm[1] = 1.0;
 494                     fitparm[2] = 0.0;
 495                     do_lmfit(nbs,ybs,fitsig,0,tbs,0,dt*n,oenv,bDebugMode(),
 496                              effnERREST,fitparm,6);
 497                     fitparm[3] = 1-fitparm[1];
 498                 }
 499             }
 500             ee   = sig[s]*anal_ee_inf(fitparm,n*dt);
 501             a    = fitparm[1];
 502             tau1 = fitparm[0];
 503             tau2 = fitparm[2];
 504         }
 505         fprintf(stdout,"Set %3d:  err.est. %g  a %g  tau1 %g  tau2 %g\n",
 506                 s+1,ee,a,tau1,tau2);
 507         fprintf(fp,"@ legend string %d \"av %f\"\n",2*s,av[s]);
 508         fprintf(fp,"@ legend string %d \"ee %6g\"\n",
 509                 2*s+1,sig[s]*anal_ee_inf(fitparm,n*dt));
 510         for(i=0; i<nbs; i++)
 511         {
 512             fprintf(fp,"%g %g %g\n",tbs[i],sig[s]*sqrt(ybs[i]/(n*dt)),
 513                     sig[s]*sqrt(fit_function(effnERREST,fitparm,tbs[i])/(n*dt)));
 514         }
 515
 516         if (bFitAc)
 517         {
 518             int fitlen;
 519             real *ac,acint,ac_fit[4];
 520
 521             snew(ac,n);
 522             for(i=0; i<n; i++) {
 523                 ac[i] = val[s][i] - av[s];
 524                 if (i > 0)
 525                     fitsig[i] = sqrt(i);
 526                 else
 527                     fitsig[i] = 1;
 528             }
 529             low_do_autocorr(NULL,oenv,NULL,n,1,-1,&ac,
 530                             dt,eacNormal,1,FALSE,TRUE,
 531                             FALSE,0,0,effnNONE,0);
 532
 533             fitlen = n/nb_min;
 534
 535             /* Integrate ACF only up to fitlen/2 to avoid integrating noise */
 536             acint = 0.5*ac[0];
 537             for(i=1; i<=fitlen/2; i++)
 538             {
 539                 acint += ac[i];
 540             }
 541             acint *= dt;
 542
 543             /* Generate more or less appropriate sigma's */
 544             for(i=0; i<=fitlen; i++)
 545             {
 546                 fitsig[i] = sqrt(acint + dt*i);
 547             }
 548
 549             ac_fit[0] = 0.5*acint;
 550             ac_fit[1] = 0.95;
 551             ac_fit[2] = 10*acint;
 552             do_lmfit(n/nb_min,ac,fitsig,dt,0,0,fitlen*dt,oenv,
 553                      bDebugMode(),effnEXP3,ac_fit,0);
 554             ac_fit[3] = 1 - ac_fit[1];
 555
 556             fprintf(stdout,"Set %3d:  ac erest %g  a %g  tau1 %g  tau2 %g\n",
 557                     s+1,sig[s]*anal_ee_inf(ac_fit,n*dt),
 558                     ac_fit[1],ac_fit[0],ac_fit[2]);
 559
 560             fprintf(fp,"&\n");
 561             for(i=0; i<nbs; i++)
 562             {
 563                 fprintf(fp,"%g %g\n",tbs[i],
 564                         sig[s]*sqrt(fit_function(effnERREST,ac_fit,tbs[i]))/(n*dt));
 565             }
 566
 567             sfree(ac);
 568         }
 569         if (s < nset-1)
 570         {
 571             fprintf(fp,"&\n");
 572         }
 573     }
 574     sfree(fitsig);
 575     sfree(ybs);
 576     sfree(tbs);
 577     ffclose(fp);
 578 }
 579
 580 static void luzar_correl(int nn,real *time,int nset,real **val,real temp,
 581                          bool bError,real fit_start,real smooth_tail_start,
 582                          const output_env_t oenv)
 583 {
 584   const real tol = 1e-8;
 585   real *kt;
 586   real weight,d2;
 587   int  j;
 588
 589   please_cite(stdout,"Spoel2006b");
 590
 591   /* Compute negative derivative k(t) = -dc(t)/dt */
 592   if (!bError) {
 593     snew(kt,nn);
 594     compute_derivative(nn,time,val[0],kt);
 595     for(j=0; (j<nn); j++)
 596       kt[j] = -kt[j];
 597     if (debug) {
 598       d2 = 0;
 599       for(j=0; (j<nn); j++)
 600         d2 += sqr(kt[j] - val[3][j]);
 601       fprintf(debug,"RMS difference in derivatives is %g\n",sqrt(d2/nn));
 602     }
 603     analyse_corr(nn,time,val[0],val[2],kt,NULL,NULL,NULL,fit_start,
 604                  temp,smooth_tail_start,oenv);
 605     sfree(kt);
 606   }
 607   else if (nset == 6) {
 608     analyse_corr(nn,time,val[0],val[2],val[4],
 609                  val[1],val[3],val[5],fit_start,temp,smooth_tail_start,oenv);
 610   }
 611   else {
 612     printf("Inconsistent input. I need c(t) sigma_c(t) n(t) sigma_n(t) K(t) sigma_K(t)\n");
 613     printf("Not doing anything. Sorry.\n");
 614   }
 615 }
 616
 617 static void filter(real flen,int n,int nset,real **val,real dt,
 618                    const output_env_t oenv)
 619 {
 620   int    f,s,i,j;
 621   double *filt,sum,vf,fluc,fluctot;
 622
 623   f = (int)(flen/(2*dt));
 624   snew(filt,f+1);
 625   filt[0] = 1;
 626   sum = 1;
 627   for(i=1; i<=f; i++) {
 628     filt[i] = cos(M_PI*dt*i/flen);
 629     sum += 2*filt[i];
 630   }
 631   for(i=0; i<=f; i++)
 632     filt[i] /= sum;
 633   fprintf(stdout,"Will calculate the fluctuation over %d points\n",n-2*f);
 634   fprintf(stdout,"  using a filter of length %g of %d points\n",flen,2*f+1);
 635   fluctot = 0;
 636   for(s=0; s<nset; s++) {
 637     fluc = 0;
 638     for(i=f; i<n-f; i++) {
 639       vf = filt[0]*val[s][i];
 640       for(j=1; j<=f; j++)
 641         vf += filt[j]*(val[s][i-f]+val[s][i+f]);
 642       fluc += sqr(val[s][i] - vf);
 643     }
 644     fluc /= n - 2*f;
 645     fluctot += fluc;
 646     fprintf(stdout,"Set %3d filtered fluctuation: %12.6e\n",s+1,sqrt(fluc));
 647   }
 648   fprintf(stdout,"Overall filtered fluctuation: %12.6e\n",sqrt(fluctot/nset));
 649   fprintf(stdout,"\n");
 650
 651   sfree(filt);
 652 }
 653
 654 static void do_fit(FILE *out,int n,bool bYdy,int ny,real *x0,real **val,
 655                    int npargs,t_pargs *ppa,const output_env_t oenv)
 656 {
 657   real *c1=NULL,*sig=NULL,*fitparm;
 658   real dt=0,tendfit,tbeginfit;
 659   int  i,efitfn,nparm;
 660
 661   efitfn = get_acffitfn();
 662   nparm  = nfp_ffn[efitfn];
 663   fprintf(out,"Will fit to the following function:\n");
 664   fprintf(out,"%s\n",longs_ffn[efitfn]);
 665   c1 = val[n];
 666   if (bYdy) {
 667     c1  = val[n];
 668     sig = val[n+1];
 669     fprintf(out,"Using two columns as y and sigma values\n");
 670   } else {
 671     snew(sig,ny);
 672   }
 673   if (opt2parg_bSet("-beginfit",npargs,ppa)) {
 674     tbeginfit = opt2parg_real("-beginfit",npargs,ppa);
 675   } else {
 676     tbeginfit = x0 ? x0[0]    : 0;
 677   }
 678   if (opt2parg_bSet("-endfit",npargs,ppa)) {
 679     tendfit   = opt2parg_real("-endfit",npargs,ppa);
 680   } else {
 681     tendfit   = x0 ? x0[ny-1] : (ny-1)*dt;
 682   }
 683
 684   snew(fitparm,nparm);
 685   switch(efitfn) {
 686   case effnEXP1:
 687     fitparm[0] = 0.5;
 688     break;
 689   case effnEXP2:
 690     fitparm[0] = 0.5;
 691     fitparm[1] = c1[0];
 692     break;
 693   case effnEXP3:
 694     fitparm[0] = 1.0;
 695     fitparm[1] = 0.5*c1[0];
 696     fitparm[2] = 10.0;
 697     break;
 698   case effnEXP5:
 699     fitparm[0] = fitparm[2] = 0.5*c1[0];
 700     fitparm[1] = 10;
 701     fitparm[3] = 40;
 702     fitparm[4] = 0;
 703     break;
 704   case effnEXP7:
 705     fitparm[0] = fitparm[2] = fitparm[4] = 0.33*c1[0];
 706     fitparm[1] = 1;
 707     fitparm[3] = 10;
 708     fitparm[5] = 100;
 709     fitparm[6] = 0;
 710     break;
 711   case effnEXP9:
 712     fitparm[0] = fitparm[2] = fitparm[4] = fitparm[6] = 0.25*c1[0];
 713     fitparm[1] = 0.1;
 714     fitparm[3] = 1;
 715     fitparm[5] = 10;
 716     fitparm[7] = 100;
 717     fitparm[8] = 0;
 718     break;
 719   default:
 720     fprintf(out,"Warning: don't know how to initialize the parameters\n");
 721     for(i=0; (i<nparm); i++)
 722       fitparm[i] = 1;
 723   }
 724   fprintf(out,"Starting parameters:\n");
 725   for(i=0; (i<nparm); i++)
 726     fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
 727   if (do_lmfit(ny,c1,sig,dt,x0,tbeginfit,tendfit,
 728                oenv,bDebugMode(),efitfn,fitparm,0)) {
 729     for(i=0; (i<nparm); i++)
 730       fprintf(out,"a%-2d = %12.5e\n",i+1,fitparm[i]);
 731   }
 732   else {
 733     fprintf(out,"No solution was found\n");
 734   }
 735 }
 736
 737 int gmx_analyze(int argc,char *argv[])
 738 {
 739   static const char *desc[] = {
 740     "g_analyze reads an ascii file and analyzes data sets.",
 741     "A line in the input file may start with a time",
 742     "(see option [TT]-time[tt]) and any number of y values may follow.",
 743     "Multiple sets can also be",
 744     "read when they are seperated by & (option [TT]-n[tt]),",
 745     "in this case only one y value is read from each line.",
 746     "All lines starting with # and @ are skipped.",
 747     "All analyses can also be done for the derivative of a set",
 748     "(option [TT]-d[tt]).[PAR]",
 749
 750     "All options, except for [TT]-av[tt] and [TT]-power[tt] assume that the",
 751     "points are equidistant in time.[PAR]",
 752
 753     "g_analyze always shows the average and standard deviation of each",
 754     "set. For each set it also shows the relative deviation of the third",
 755     "and forth cumulant from those of a Gaussian distribution with the same",
 756     "standard deviation.[PAR]",
 757
 758     "Option [TT]-ac[tt] produces the autocorrelation function(s).[PAR]",
 759
 760     "Option [TT]-cc[tt] plots the resemblance of set i with a cosine of",
 761     "i/2 periods. The formula is:[BR]"
 762     "2 (int0-T y(t) cos(i pi t) dt)^2 / int0-T y(t) y(t) dt[BR]",
 763     "This is useful for principal components obtained from covariance",
 764     "analysis, since the principal components of random diffusion are",
 765     "pure cosines.[PAR]",
 766
 767     "Option [TT]-msd[tt] produces the mean square displacement(s).[PAR]",
 768
 769     "Option [TT]-dist[tt] produces distribution plot(s).[PAR]",
 770
 771     "Option [TT]-av[tt] produces the average over the sets.",
 772     "Error bars can be added with the option [TT]-errbar[tt].",
 773     "The errorbars can represent the standard deviation, the error",
 774     "(assuming the points are independent) or the interval containing",
 775     "90% of the points, by discarding 5% of the points at the top and",
 776     "the bottom.[PAR]",
 777
 778     "Option [TT]-ee[tt] produces error estimates using block averaging.",
 779     "A set is divided in a number of blocks and averages are calculated for",
 780     "each block. The error for the total average is calculated from",
 781     "the variance between averages of the m blocks B_i as follows:",
 782     "error^2 = Sum (B_i - <B>)^2 / (m*(m-1)).",
 783     "These errors are plotted as a function of the block size.",
 784     "Also an analytical block average curve is plotted, assuming",
 785     "that the autocorrelation is a sum of two exponentials.",
 786     "The analytical curve for the block average is:[BR]",
 787     "f(t) = sigma sqrt(2/T (  a   (tau1 ((exp(-t/tau1) - 1) tau1/t + 1)) +[BR]",
 788     "                       (1-a) (tau2 ((exp(-t/tau2) - 1) tau2/t + 1)))),[BR]"
 789     "where T is the total time.",
 790     "a, tau1 and tau2 are obtained by fitting f^2(t) to error^2.",
 791     "When the actual block average is very close to the analytical curve,",
 792     "the error is sigma*sqrt(2/T (a tau1 + (1-a) tau2)).",
 793     "The complete derivation is given in",
 794     "B. Hess, J. Chem. Phys. 116:209-217, 2002.[PAR]",
 795
 796     "Option [TT]-filter[tt] prints the RMS high-frequency fluctuation",
 797     "of each set and over all sets with respect to a filtered average.",
 798     "The filter is proportional to cos(pi t/len) where t goes from -len/2",
 799     "to len/2. len is supplied with the option [TT]-filter[tt].",
 800     "This filter reduces oscillations with period len/2 and len by a factor",
 801     "of 0.79 and 0.33 respectively.[PAR]",
 802
 803     "Option [TT]-g[tt] fits the data to the function given with option",
 804     "[TT]-fitfn[tt].[PAR]",
 805
 806     "Option [TT]-power[tt] fits the data to b t^a, which is accomplished",
 807     "by fitting to a t + b on log-log scale. All points after the first",
 808     "zero or negative value are ignored.[PAR]"
 809
 810     "Option [TT]-luzar[tt] performs a Luzar & Chandler kinetics analysis",
 811     "on output from [TT]g_hbond[tt]. The input file can be taken directly",
 812     "from [TT]g_hbond -ac[tt], and then the same result should be produced."
 813   };
 814   static real tb=-1,te=-1,frac=0.5,filtlen=0,binwidth=0.1,aver_start=0;
 815   static bool bHaveT=TRUE,bDer=FALSE,bSubAv=TRUE,bAverCorr=FALSE,bXYdy=FALSE;
 816   static bool bEESEF=FALSE,bEENLC=FALSE,bEeFitAc=FALSE,bPower=FALSE;
 817   static bool bIntegrate=FALSE,bRegression=FALSE,bLuzar=FALSE,bLuzarError=FALSE;
 818   static int  nsets_in=1,d=1,nb_min=4,resol=10;
 819   static real temp=298.15,fit_start=1,smooth_tail_start=-1;
 820
 821   /* must correspond to enum avbar* declared at beginning of file */
 822   static const char *avbar_opt[avbarNR+1] = {
 823     NULL, "none", "stddev", "error", "90", NULL
 824   };
 825
 826   t_pargs pa[] = {
 827     { "-time",    FALSE, etBOOL, {&bHaveT},
 828       "Expect a time in the input" },
 829     { "-b",       FALSE, etREAL, {&tb},
 830       "First time to read from set" },
 831     { "-e",       FALSE, etREAL, {&te},
 832       "Last time to read from set" },
 833     { "-n",       FALSE, etINT, {&nsets_in},
 834       "Read # sets seperated by &" },
 835     { "-d",       FALSE, etBOOL, {&bDer},
 836         "Use the derivative" },
 837     { "-dp",      FALSE, etINT, {&d},
 838       "HIDDENThe derivative is the difference over # points" },
 839     { "-bw",      FALSE, etREAL, {&binwidth},
 840       "Binwidth for the distribution" },
 841     { "-errbar",  FALSE, etENUM, {avbar_opt},
 842       "Error bars for -av" },
 843     { "-integrate",FALSE,etBOOL, {&bIntegrate},
 844       "Integrate data function(s) numerically using trapezium rule" },
 845     { "-aver_start",FALSE, etREAL, {&aver_start},
 846       "Start averaging the integral from here" },
 847     { "-xydy",    FALSE, etBOOL, {&bXYdy},
 848       "Interpret second data set as error in the y values for integrating" },
 849     { "-regression",FALSE,etBOOL,{&bRegression},
 850       "Perform a linear regression analysis on the data" },
 851     { "-luzar",   FALSE, etBOOL, {&bLuzar},
 852       "Do a Luzar and Chandler analysis on a correlation function and related as produced by g_hbond. When in addition the -xydy flag is given the second and fourth column will be interpreted as errors in c(t) and n(t)." },
 853     { "-temp",    FALSE, etREAL, {&temp},
 854       "Temperature for the Luzar hydrogen bonding kinetics analysis" },
 855     { "-fitstart", FALSE, etREAL, {&fit_start},
 856       "Time (ps) from which to start fitting the correlation functions in order to obtain the forward and backward rate constants for HB breaking and formation" },
 857     { "-smooth",FALSE, etREAL, {&smooth_tail_start},
 858       "If >= 0, the tail of the ACF will be smoothed by fitting it to an exponential function: y = A exp(-x/tau)" },
 859     { "-nbmin",   FALSE, etINT, {&nb_min},
 860       "HIDDENMinimum number of blocks for block averaging" },
 861     { "-resol", FALSE, etINT, {&resol},
 862       "HIDDENResolution for the block averaging, block size increases with"
 863     " a factor 2^(1/#)" },
 864     { "-eeexpfit", FALSE, etBOOL, {&bEESEF},
 865       "HIDDENAlways use a single exponential fit for the error estimate" },
 866     { "-eenlc", FALSE, etBOOL, {&bEENLC},
 867       "HIDDENAllow a negative long-time correlation" },
 868     { "-eefitac", FALSE, etBOOL, {&bEeFitAc},
 869       "HIDDENAlso plot analytical block average using a autocorrelation fit" },
 870     { "-filter",  FALSE, etREAL, {&filtlen},
 871       "Print the high-frequency fluctuation after filtering with a cosine filter of length #" },
 872     { "-power", FALSE, etBOOL, {&bPower},
 873       "Fit data to: b t^a" },
 874     { "-subav", FALSE, etBOOL, {&bSubAv},
 875       "Subtract the average before autocorrelating" },
 876     { "-oneacf", FALSE, etBOOL, {&bAverCorr},
 877       "Calculate one ACF over all sets" }
 878   };
 879 #define NPA asize(pa)
 880
 881   FILE     *out,*out_fit;
 882   int      n,nlast,s,nset,i,j=0;
 883   real     **val,*t,dt,tot,error;
 884   double   *av,*sig,cum1,cum2,cum3,cum4,db;
 885   const char     *acfile,*msdfile,*ccfile,*distfile,*avfile,*eefile,*fitfile;
 886   output_env_t oenv;
 887
 888   t_filenm fnm[] = {
 889     { efXVG, "-f",    "graph",    ffREAD   },
 890     { efXVG, "-ac",   "autocorr", ffOPTWR  },
 891     { efXVG, "-msd",  "msd",      ffOPTWR  },
 892     { efXVG, "-cc",   "coscont",  ffOPTWR  },
 893     { efXVG, "-dist", "distr",    ffOPTWR  },
 894     { efXVG, "-av",   "average",  ffOPTWR  },
 895     { efXVG, "-ee",   "errest",   ffOPTWR  },
 896     { efLOG, "-g",    "fitlog",   ffOPTWR  }
 897   };
 898 #define NFILE asize(fnm)
 899
 900   int     npargs;
 901   t_pargs *ppa;
 902
 903   npargs = asize(pa);
 904   ppa    = add_acf_pargs(&npargs,pa);
 905
 906   CopyRight(stderr,argv[0]);
 907   parse_common_args(&argc,argv,PCA_CAN_VIEW,
 908                     NFILE,fnm,npargs,ppa,asize(desc),desc,0,NULL,&oenv);
 909
 910   acfile   = opt2fn_null("-ac",NFILE,fnm);
 911   msdfile  = opt2fn_null("-msd",NFILE,fnm);
 912   ccfile   = opt2fn_null("-cc",NFILE,fnm);
 913   distfile = opt2fn_null("-dist",NFILE,fnm);
 914   avfile   = opt2fn_null("-av",NFILE,fnm);
 915   eefile   = opt2fn_null("-ee",NFILE,fnm);
 916   if (opt2parg_bSet("-fitfn",npargs,ppa))
 917     fitfile  = opt2fn("-g",NFILE,fnm);
 918   else
 919     fitfile  = opt2fn_null("-g",NFILE,fnm);
 920
 921   val=read_xvg_time(opt2fn("-f",NFILE,fnm),bHaveT,
 922                     opt2parg_bSet("-b",npargs,ppa),tb,
 923                     opt2parg_bSet("-e",npargs,ppa),te,
 924                     nsets_in,&nset,&n,&dt,&t);
 925   printf("Read %d sets of %d points, dt = %g\n\n",nset,n,dt);
 926
 927   if (bDer) {
 928     printf("Calculating the derivative as (f[i+%d]-f[i])/(%d*dt)\n\n",
 929             d,d);
 930     n -= d;
 931     for(s=0; s<nset; s++)
 932       for(i=0; (i<n); i++)
 933         val[s][i] = (val[s][i+d]-val[s][i])/(d*dt);
 934   }
 935   if (bIntegrate) {
 936     real sum,stddev;
 937     printf("Calculating the integral using the trapezium rule\n");
 938
 939     if (bXYdy) {
 940       sum = evaluate_integral(n,t,val[0],val[1],aver_start,&stddev);
 941       printf("Integral %10.3f +/- %10.5f\n",sum,stddev);
 942     }
 943     else {
 944       for(s=0; s<nset; s++) {
 945         sum = evaluate_integral(n,t,val[s],NULL,aver_start,&stddev);
 946         printf("Integral %d  %10.5f  +/- %10.5f\n",s+1,sum,stddev);
 947       }
 948     }
 949   }
 950   if (fitfile) {
 951     out_fit = ffopen(fitfile,"w");
 952     if (bXYdy && nset>=2) {
 953       do_fit(out_fit,0,TRUE,n,t,val,npargs,ppa,oenv);
 954     } else {
 955       for(s=0; s<nset; s++)
 956         do_fit(out_fit,s,FALSE,n,t,val,npargs,ppa,oenv);
 957     }
 958     ffclose(out_fit);
 959   }
 960
 961   printf("                                      std. dev.    relative deviation of\n");
 962   printf("                       standard       ---------   cumulants from those of\n");
 963   printf("set      average       deviation      sqrt(n-1)   a Gaussian distribition\n");
 964   printf("                                                      cum. 3   cum. 4\n");
 965   snew(av,nset);
 966   snew(sig,nset);
 967   for(s=0; (s<nset); s++) {
 968     cum1 = 0;
 969     cum2 = 0;
 970     cum3 = 0;
 971     cum4 = 0;
 972     for(i=0; (i<n); i++)
 973       cum1 += val[s][i];
 974     cum1 /= n;
 975     for(i=0; (i<n); i++) {
 976       db = val[s][i]-cum1;
 977       cum2 += db*db;
 978       cum3 += db*db*db;
 979       cum4 += db*db*db*db;
 980     }
 981     cum2  /= n;
 982     cum3  /= n;
 983     cum4  /= n;
 984     av[s]  = cum1;
 985     sig[s] = sqrt(cum2);
 986     if (n > 1)
 987       error = sqrt(cum2/(n-1));
 988     else
 989       error = 0;
 990     printf("SS%d  %13.6e   %12.6e   %12.6e      %6.3f   %6.3f\n",
 991            s+1,av[s],sig[s],error,
 992            sig[s] ? cum3/(sig[s]*sig[s]*sig[s]*sqrt(8/M_PI)) : 0,
 993            sig[s] ? cum4/(sig[s]*sig[s]*sig[s]*sig[s]*3)-1 : 0);
 994   }
 995   printf("\n");
 996
 997   if (filtlen)
 998     filter(filtlen,n,nset,val,dt,oenv);
 999
1000   if (msdfile) {
1001     out=xvgropen(msdfile,"Mean square displacement",
1002                  "time","MSD (nm\\S2\\N)",oenv);
1003     nlast = (int)(n*frac);
1004     for(s=0; s<nset; s++) {
1005       for(j=0; j<=nlast; j++) {
1006         if (j % 100 == 0)
1007           fprintf(stderr,"\r%d",j);
1008         tot=0;
1009         for(i=0; i<n-j; i++)
1010           tot += sqr(val[s][i]-val[s][i+j]);
1011         tot /= (real)(n-j);
1012         fprintf(out," %g %8g\n",dt*j,tot);
1013       }
1014       if (s<nset-1)
1015         fprintf(out,"&\n");
1016     }
1017     ffclose(out);
1018     fprintf(stderr,"\r%d, time=%g\n",j-1,(j-1)*dt);
1019   }
1020   if (ccfile)
1021     plot_coscont(ccfile,n,nset,val,oenv);
1022
1023   if (distfile)
1024     histogram(distfile,binwidth,n,nset,val,oenv);
1025   if (avfile)
1026     average(avfile,nenum(avbar_opt),n,nset,val,t);
1027   if (eefile)
1028     estimate_error(eefile,nb_min,resol,n,nset,av,sig,val,dt,
1029                    bEeFitAc,bEESEF,bEENLC,oenv);
1030   if (bPower)
1031     power_fit(n,nset,val,t);
1032   if (acfile) {
1033     if (bSubAv)
1034       for(s=0; s<nset; s++)
1035         for(i=0; i<n; i++)
1036           val[s][i] -= av[s];
1037     do_autocorr(acfile,oenv,"Autocorrelation",n,nset,val,dt,
1038                 eacNormal,bAverCorr);
1039   }
1040   if (bRegression)
1041     regression_analysis(n,bXYdy,t,val);
1042
1043   if (bLuzar)
1044     luzar_correl(n,t,nset,val,temp,bXYdy,fit_start,smooth_tail_start,oenv);
1045
1046   view_all(oenv,NFILE, fnm);
1047
1048   thanx(stderr);
1049
1050   return 0;
1051 }
1052