2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements routine for fitting a data set to a curve
42 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
43 * \ingroup module_correlationfunctions
53 #include "gromacs/correlationfunctions/integrate.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/real.h"
60 #include "gromacs/utility/smalloc.h"
62 #include "gmx_lmcurve.h"
64 /*! \brief Number of parameters for each fitting function */
65 static const int nfp_ffn
[effnNR
] = { 0, 1, 2, 3, 5, 7, 9, 2, 4, 3, 6 };
67 /* +2 becuase parse_common_args wants leading and concluding NULL.
68 * We only allow exponential functions as choices on the command line,
69 * hence there are many more NULL field (which have to be at the end of
72 const char *s_ffn
[effnNR
+2] = {
73 nullptr, "none", "exp", "aexp", "exp_exp",
74 "exp5", "exp7", "exp9",
75 nullptr, nullptr, nullptr, nullptr, nullptr
78 /*! \brief Long description for each fitting function type */
79 static const char *longs_ffn
[effnNR
] = {
82 "y = a1 exp(-x/|a0|)",
83 "y = a1 exp(-x/|a0|) + (1-a1) exp(-x/(|a2|)), a2 > a0 > 0",
84 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4, a3 >= a1",
85 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6, a5 >= a3 >= a1",
86 "y = a0 exp(-x/|a1|) + a2 exp(-x/|a3|) + a4 exp(-x/|a5|) + a6 exp(-x/|a7|) + a8, a7 >= a5 >= a3 >= a1",
87 "y = exp(-v) (cosh(wv) + 1/w sinh(wv)), v = x/(2 a0), w = sqrt(1 - a1)",
88 "y = 1/2*(a0+a1) - 1/2*(a0-a1)*erf( (x-a2) /a3^2)",
89 "y = a1 *2*a0*((exp(-x/a0)-1)*(a0/x)+1)+(1-a1)*2*a2*((exp(-x/a2)-1)*(a2/x)+1)",
90 "y = (1-a0)*cos(x*a1)*exp(-(x/a2)^a3) + a0*exp(-(x/a4)^a5)"
93 int effnNparams(int effn
)
95 if ((0 <= effn
) && (effn
< effnNR
))
105 const char *effnDescription(int effn
)
107 if ((0 <= effn
) && (effn
< effnNR
))
109 return longs_ffn
[effn
];
117 int sffn2effn(const char **sffn
)
122 for (i
= 0; i
< effnNR
; i
++)
124 if (sffn
[i
+1] && strcmp(sffn
[0], sffn
[i
+1]) == 0)
133 /*! \brief Compute exponential function A exp(-x/tau) */
134 static double myexp(double x
, double A
, double tau
)
136 if ((A
== 0) || (tau
== 0))
140 return A
*exp(-x
/tau
);
143 /*! \brief Compute y=(a0+a1)/2-(a0-a1)/2*erf((x-a2)/a3^2) */
144 static double lmc_erffit (double x
, const double *a
)
151 erfarg
= (x
-a
[2])/(a
[3]*a
[3]);
152 myerf
= std::erf(erfarg
);
156 /* If a[3] == 0, a[3]^2 = 0 and the erfarg becomes +/- infinity */
166 return 0.5*((a
[0]+a
[1]) - (a
[0]-a
[1])*myerf
);
169 /*! \brief Exponent function that prevents overflow */
170 static double safe_exp(double x
)
172 double exp_max
= 200;
173 double exp_min
= -exp_max
;
178 else if (x
>= exp_max
)
188 /*! \brief Exponent minus 1 function that prevents overflow */
189 static double safe_expm1(double x
)
191 double exp_max
= 200;
192 double exp_min
= -exp_max
;
197 else if (x
>= exp_max
)
203 return std::expm1(x
);
207 /*! \brief Compute y = exp(-x/|a0|) */
208 static double lmc_exp_one_parm(double x
, const double *a
)
210 return safe_exp(-x
/fabs(a
[0]));
213 /*! \brief Compute y = a1 exp(-x/|a0|) */
214 static double lmc_exp_two_parm(double x
, const double *a
)
216 return a
[1]*safe_exp(-x
/fabs(a
[0]));
219 /*! \brief Compute y = a1 exp(-x/|a0|) + (1-a1) exp(-x/|a2|) */
220 static double lmc_exp_exp(double x
, const double *a
)
224 e1
= safe_exp(-x
/fabs(a
[0]));
225 e2
= safe_exp(-x
/(fabs(a
[0])+fabs(a
[2])));
226 return a
[1]*e1
+ (1-a
[1])*e2
;
229 /*! \brief Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) + a4 */
230 static double lmc_exp_5_parm(double x
, const double *a
)
234 e1
= safe_exp(-x
/fabs(a
[1]));
235 e2
= safe_exp(-x
/(fabs(a
[1])+fabs(a
[3])));
236 return a
[0]*e1
+ a
[2]*e2
+ a
[4];
239 /*! \brief Compute 7 parameter exponential function value.
241 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
242 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6
244 static double lmc_exp_7_parm(double x
, const double *a
)
247 double fa1
, fa3
, fa5
;
250 fa3
= fa1
+ fabs(a
[3]);
251 fa5
= fa3
+ fabs(a
[5]);
252 e1
= safe_exp(-x
/fa1
);
253 e2
= safe_exp(-x
/fa3
);
254 e3
= safe_exp(-x
/fa5
);
255 return a
[0]*e1
+ a
[2]*e2
+ a
[4]*e3
+ a
[6];
258 /*! \brief Compute 9 parameter exponential function value.
260 * Compute y = a0 exp(-x/|a1|) + a2 exp(-x/(|a1|+|a3|)) +
261 * a4 exp(-x/(|a1|+|a3|+|a5|)) + a6 exp(-x/(|a1|+|a3|+|a5|+|a7|)) + a8
263 static double lmc_exp_9_parm(double x
, const double *a
)
265 double e1
, e2
, e3
, e4
;
266 double fa1
, fa3
, fa5
, fa7
;
269 fa3
= fa1
+ fabs(a
[3]);
270 fa5
= fa3
+ fabs(a
[5]);
271 fa7
= fa5
+ fabs(a
[7]);
273 e1
= safe_exp(-x
/fa1
);
274 e2
= safe_exp(-x
/fa3
);
275 e3
= safe_exp(-x
/fa5
);
276 e4
= safe_exp(-x
/fa7
);
277 return a
[0]*e1
+ a
[2]*e2
+ a
[4]*e3
+ a
[6]*e4
+ a
[8];
280 /*! \brief Compute y = (1-a0)*exp(-(x/|a2|)^|a3|)*cos(x*|a1|) + a0*exp(-(x/|a4|)^|a5|) */
281 static double lmc_pres_6_parm(double x
, const double *a
)
283 double term1
, term2
, term3
;
287 if ((a
[4] != 0) && (a
[0] != 0))
289 double power
= std::min(fabs(a
[5]), pow_max
);
290 term3
= a
[0] * safe_exp(-pow((x
/fabs(a
[4])), power
));
295 if ((term1
!= 0) && (a
[2] != 0))
297 double power
= std::min(fabs(a
[3]), pow_max
);
298 term2
= safe_exp(-pow((x
/fabs(a
[2])), power
)) * cos(x
*fabs(a
[1]));
301 return term1
*term2
+ term3
;
304 /*! \brief Compute vac function */
305 static double lmc_vac_2_parm(double x
, const double *a
)
309 * y = 1/2 (1 - 1/w) exp(-(1+w)v) + 1/2 (1 + 1/w) exp(-(1-w)v)
311 * = exp(-v) (cosh(wv) + 1/w sinh(wv))
316 * For tranverse current autocorrelation functions:
318 * a1 = 4 tau (eta/rho) k^2
322 double y
, v
, det
, omega
, wv
, em
, ec
, es
;
325 v
= x
/(2*fabs(a
[0]));
330 omega
= sqrt(fabs(det
));
331 wv
= std::min(omega
*v
, wv_max
);
335 ec
= em
*0.5*(safe_exp(wv
)+safe_exp(-wv
));
336 es
= em
*0.5*(safe_exp(wv
)-safe_exp(-wv
))/omega
;
341 es
= em
*sin(wv
)/omega
;
352 /*! \brief Compute error estimate */
353 static double lmc_errest_3_parm(double x
, const double *a
)
355 double e1
, e2
, v1
, v2
;
356 double fa0
= fabs(a
[0]);
358 double fa2
= fa0
+fabs(a
[2]);
362 e1
= safe_expm1(-x
/fa0
);
370 e2
= safe_expm1(-x
/fa2
);
379 v1
= 2*fa0
*(e1
*fa0
/x
+ 1);
380 v2
= 2*fa2
*(e2
*fa2
/x
+ 1);
381 /* We need 0 <= a1 <= 1 */
382 fa1
= std::min(1.0, std::max(0.0, a
[1]));
384 return fa1
*v1
+ (1-fa1
)*v2
;
392 /*! \brief array of fitting functions corresponding to the pre-defined types */
393 t_lmcurve lmcurves
[effnNR
+1] = {
394 lmc_exp_one_parm
, lmc_exp_one_parm
, lmc_exp_two_parm
,
395 lmc_exp_exp
, lmc_exp_5_parm
, lmc_exp_7_parm
,
397 lmc_vac_2_parm
, lmc_erffit
, lmc_errest_3_parm
, lmc_pres_6_parm
400 double fit_function(const int eFitFn
, const double parm
[], const double x
)
402 if ((eFitFn
< 0) || (eFitFn
>= effnNR
))
404 fprintf(stderr
, "fitfn = %d, should be in the range 0..%d\n",
408 return lmcurves
[eFitFn
](x
, parm
);
411 /*! \brief Ensure the fitting parameters are well-behaved.
413 * In order to make sure that fitting behaves according to the
414 * constraint that time constants are positive and increasing
415 * we enforce this here by setting all time constants to their
416 * absolute value and by adding e.g. |a_0| to |a_2|. This is
417 * done in conjunction with the fitting functions themselves.
418 * When there are multiple time constants we make sure that
419 * the successive time constants are at least double the
420 * previous ones and during fitting we enforce the they remain larger.
421 * It may very well help the convergence of the fitting routine.
423 static void initiate_fit_params(int eFitFn
,
428 nparm
= effnNparams(eFitFn
);
433 GMX_ASSERT(params
[0] >= 0, "parameters should be >= 0");
438 GMX_ASSERT(params
[0] >= 0, "parameters should be >= 0");
441 GMX_ASSERT(params
[2] >= 0, "parameters should be >= 0");
442 /* In order to maintain params[2] >= params[0] in the final
443 * result, we fit the difference between the two, that is
444 * params[2]-params[0] and in the add add in params[0]
447 params
[2] = std::max(fabs(params
[2])-params
[0], params
[0]);
453 GMX_ASSERT(params
[1] >= 0, "parameters should be >= 0");
454 params
[1] = fabs(params
[1]);
457 GMX_ASSERT(params
[3] >= 0, "parameters should be >= 0");
458 /* See comment under effnEXPEXP */
459 params
[3] = std::max(fabs(params
[3])-params
[1], params
[1]);
462 GMX_ASSERT(params
[5] >= 0, "parameters should be >= 0");
463 /* See comment under effnEXPEXP */
464 params
[5] = std::max(fabs(params
[5])-params
[3], params
[3]);
467 GMX_ASSERT(params
[7] >= 0, "parameters should be >= 0");
468 /* See comment under effnEXPEXP */
469 params
[7] = std::max(fabs(params
[7])-params
[5], params
[5]);
475 GMX_ASSERT(params
[0] >= 0, "parameters should be >= 0");
476 GMX_ASSERT(params
[1] >= 0 && params
[1] <= 1, "parameter 1 should in 0 .. 1");
477 GMX_ASSERT(params
[2] >= 0, "parameters should be >= 0");
478 /* See comment under effnEXPEXP */
479 params
[2] = fabs(params
[2])-params
[0];
482 for (i
= 1; (i
< nparm
); i
++)
484 GMX_ASSERT(params
[i
] >= 0, "parameters should be >= 0");
492 /*! \brief Process the fitting parameters to get output parameters.
494 * See comment at the previous function.
496 static void extract_fit_params(int eFitFn
,
501 nparm
= effnNparams(eFitFn
);
506 params
[0] = fabs(params
[0]);
511 params
[0] = fabs(params
[0]);
514 /* Back conversion of parameters from the fitted difference
515 * to the absolute value.
517 params
[2] = fabs(params
[2])+params
[0];
523 params
[1] = fabs(params
[1]);
526 /* See comment under effnEXPEXP */
527 params
[3] = fabs(params
[3])+params
[1];
530 /* See comment under effnEXPEXP */
531 params
[5] = fabs(params
[5])+params
[3];
534 /* See comment under effnEXPEXP */
535 params
[7] = fabs(params
[7])+params
[5];
541 params
[0] = fabs(params
[0]);
546 else if (params
[1] > 1)
550 /* See comment under effnEXPEXP */
551 params
[2] = params
[0]+fabs(params
[2]);
554 for (i
= 1; (i
< nparm
); i
++)
556 params
[i
] = fabs(params
[i
]);
564 /*! \brief Print chi-squared value and the parameters */
565 static void print_chi2_params(FILE *fp
,
567 const double fitparms
[],
576 for (i
= 0; (i
< nfitpnts
); i
++)
578 double yfit
= lmcurves
[eFitFn
](x
[i
], fitparms
);
579 chi2
+= gmx::square(y
[i
] - yfit
);
581 fprintf(fp
, "There are %d data points, %d parameters, %s chi2 = %g\nparams:",
582 nfitpnts
, effnNparams(eFitFn
), label
, chi2
);
583 for (i
= 0; (i
< effnNparams(eFitFn
)); i
++)
585 fprintf(fp
, " %10g", fitparms
[i
]);
590 real
do_lmfit(int ndata
, real c1
[], real sig
[], real dt
, real
*x0
,
591 real begintimefit
, real endtimefit
, const gmx_output_env_t
*oenv
,
592 gmx_bool bVerbose
, int eFitFn
, double fitparms
[], int fix
,
593 const char *fn_fitted
)
597 double integral
, ttt
;
602 fprintf(stderr
, "Using fixed parameters in curve fitting is temporarily not working.\n");
606 fprintf(debug
, "There are %d points to fit %d vars!\n", ndata
, effnNparams(eFitFn
));
607 fprintf(debug
, "Fit to function %d from %g through %g, dt=%g\n",
608 eFitFn
, begintimefit
, endtimefit
, dt
);
615 for (i
= 0; i
< ndata
; i
++)
617 ttt
= x0
? x0
[i
] : dt
*i
;
618 if (ttt
>= begintimefit
&& ttt
<= endtimefit
)
624 // No weighting if all values are divided by 1.
629 dy
[j
] = std::max(1.0e-7, (double)sig
[i
]);
633 fprintf(debug
, "j= %d, i= %d, x= %g, y= %g, dy=%g, ttt=%g\n",
634 j
, i
, x
[j
], y
[j
], dy
[j
], ttt
);
641 if (nfitpnts
< effnNparams(eFitFn
))
643 fprintf(stderr
, "Not enough (%d) data points for fitting, dt = %g!\n",
652 print_chi2_params(stdout
, eFitFn
, fitparms
, "initial", nfitpnts
, x
, y
);
654 initiate_fit_params(eFitFn
, fitparms
);
656 bSuccess
= lmfit_exp(nfitpnts
, x
, y
, dy
, fitparms
, bVerbose
, eFitFn
, fix
);
657 extract_fit_params(eFitFn
, fitparms
);
661 fprintf(stderr
, "Fit failed!\n");
667 print_chi2_params(stdout
, eFitFn
, fitparms
, "final", nfitpnts
, x
, y
);
672 integral
= fitparms
[0]*myexp(begintimefit
, 1, fitparms
[0]);
675 integral
= fitparms
[0]*myexp(begintimefit
, fitparms
[1], fitparms
[0]);
678 integral
= (fitparms
[0]*myexp(begintimefit
, fitparms
[1], fitparms
[0]) +
679 fitparms
[2]*myexp(begintimefit
, 1-fitparms
[1], fitparms
[2]));
685 for (i
= 0; (i
< (effnNparams(eFitFn
)-1)/2); i
++)
687 integral
+= fitparms
[2*i
]*myexp(begintimefit
, fitparms
[2*i
+1], fitparms
[2*i
]);
691 /* Do numerical integration */
693 for (i
= 0; (i
< nfitpnts
-1); i
++)
695 double y0
= lmcurves
[eFitFn
](x
[i
], fitparms
);
696 double y1
= lmcurves
[eFitFn
](x
[i
+1], fitparms
);
697 integral
+= (x
[i
+1]-x
[i
])*(y1
+y0
)*0.5;
704 printf("FIT: Integral of fitted function: %g\n", integral
);
705 if ((effnEXP5
== eFitFn
) || (effnEXP7
== eFitFn
) || (effnEXP9
== eFitFn
))
707 printf("FIT: Note that the constant term is not taken into account when computing integral.\n");
710 /* Generate debug output */
711 if (nullptr != fn_fitted
)
713 fp
= xvgropen(fn_fitted
, "Data + Fit", "Time (ps)",
715 for (i
= 0; (i
< effnNparams(eFitFn
)); i
++)
717 fprintf(fp
, "# fitparms[%d] = %g\n", i
, fitparms
[i
]);
719 for (j
= 0; (j
< nfitpnts
); j
++)
721 real ttt
= x0
? x0
[j
] : dt
*j
;
722 fprintf(fp
, "%10.5e %10.5e %10.5e\n",
723 x
[j
], y
[j
], (lmcurves
[eFitFn
])(ttt
, fitparms
));
737 real
fit_acf(int ncorr
, int fitfn
, const gmx_output_env_t
*oenv
, gmx_bool bVerbose
,
738 real tbeginfit
, real tendfit
, real dt
, real c1
[], real
*fit
)
741 double tStart
, tail_corr
, sum
, sumtot
= 0, c_start
, ct_estimate
;
743 int i
, j
, jmax
, nf_int
;
746 GMX_ASSERT(effnNparams(fitfn
) < static_cast<int>(sizeof(fitparm
)/sizeof(double)),
747 "Fitting function with more than 3 parameters not supported!");
748 bPrint
= bVerbose
|| bDebugMode();
759 nf_int
= std::min(ncorr
, (int)(tendfit
/dt
));
760 sum
= print_and_integrate(debug
, nf_int
, dt
, c1
, nullptr, 1);
764 printf("COR: Correlation time (plain integral from %6.3f to %6.3f ps) = %8.5f ps\n",
765 0.0, dt
*nf_int
, sum
);
766 printf("COR: Relaxation times are computed as fit to an exponential:\n");
767 printf("COR: %s\n", effnDescription(fitfn
));
768 printf("COR: Fit to correlation function from %6.3f ps to %6.3f ps, results in a\n", tbeginfit
, std::min(ncorr
*dt
, tendfit
));
774 printf("COR:%11s%11s%11s%11s%11s%11s%11s\n",
775 "Fit from", "Integral", "Tail Value", "Sum (ps)", " a1 (ps)",
776 (effnNparams(fitfn
) >= 2) ? " a2 ()" : "",
777 (effnNparams(fitfn
) >= 3) ? " a3 (ps)" : "");
790 for (j
= 0; ((j
< jmax
) && (tStart
< tendfit
) && (tStart
< ncorr
*dt
)); j
++)
792 /* Estimate the correlation time for better fitting */
795 for (i
= 0; (i
< ncorr
) && (dt
*i
< tStart
|| c1
[i
] > 0); i
++)
802 ct_estimate
= 0.5*c1
[i
];
807 ct_estimate
+= c1
[i
];
812 ct_estimate
*= dt
/c_start
;
816 /* The data is strange, so we need to choose somehting */
817 ct_estimate
= tendfit
;
821 fprintf(debug
, "tStart %g ct_estimate: %g\n", tStart
, ct_estimate
);
824 if (fitfn
== effnEXPEXP
)
826 fitparm
[0] = 0.002*ncorr
*dt
;
828 fitparm
[2] = 0.2*ncorr
*dt
;
832 /* Good initial guess, this increases the probability of convergence */
833 fitparm
[0] = ct_estimate
;
838 /* Generate more or less appropriate sigma's */
839 for (i
= 0; i
< ncorr
; i
++)
841 sig
[i
] = sqrt(ct_estimate
+dt
*i
);
844 nf_int
= std::min(ncorr
, (int)((tStart
+1e-4)/dt
));
845 sum
= print_and_integrate(debug
, nf_int
, dt
, c1
, nullptr, 1);
846 tail_corr
= do_lmfit(ncorr
, c1
, sig
, dt
, nullptr, tStart
, tendfit
, oenv
,
847 bDebugMode(), fitfn
, fitparm
, 0, nullptr);
848 sumtot
= sum
+tail_corr
;
849 if (fit
&& ((jmax
== 1) || (j
== 1)))
852 for (i
= 0; (i
< 3); i
++)
856 for (i
= 0; (i
< ncorr
); i
++)
858 fit
[i
] = lmcurves
[fitfn
](i
*dt
, mfp
);
863 printf("COR:%11.4e%11.4e%11.4e%11.4e", tStart
, sum
, tail_corr
, sumtot
);
864 for (i
= 0; (i
< effnNparams(fitfn
)); i
++)
866 printf(" %11.4e", fitparm
[i
]);