Merge branch 'origin/release-2020' into merge-2020-into-2021
[gromacs.git] / src / gromacs / gmxana / gmx_covar.cpp
blob559c1f96372f7686f65fc3fe7ccc00bb92586c9a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gmxpre.h"
40 #include <cmath>
41 #include <cstring>
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arrayref.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/sysinfo.h"
67 namespace gmx
70 namespace
73 /*! \brief Throw an error if any element in index exceeds a given number.
75 * \param[in] indices to be acessed
76 * \param[in] largestOkayIndex to be accessed
77 * \param[in] indexUsagePurpose to be more explicit in the error message
79 * \throws RangeError if largestOkayIndex is larger than any element in indices
82 void throwErrorIfIndexOutOfBounds(ArrayRef<const int> indices,
83 const int largestOkayIndex,
84 const std::string& indexUsagePurpose)
86 // do nothing if index is empty
87 if (indices.ssize() == 0)
89 return;
91 const int largestIndex = *std::max_element(indices.begin(), indices.end());
92 if (largestIndex < largestOkayIndex)
94 GMX_THROW(RangeError("The provided structure file only contains "
95 + std::to_string(largestOkayIndex) + " coordinates, but coordinate index "
96 + std::to_string(largestIndex) + " was requested for " + indexUsagePurpose
97 + ". Make sure to update structure files "
98 "and index files if you store only a part of your system."));
102 } // namespace
104 } // namespace gmx
106 int gmx_covar(int argc, char* argv[])
108 const char* desc[] = {
109 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
110 "covariance matrix.",
111 "All structures are fitted to the structure in the structure file.",
112 "When this is not a run input file periodicity will not be taken into",
113 "account. When the fit and analysis groups are identical and the analysis",
114 "is non mass-weighted, the fit will also be non mass-weighted.",
115 "[PAR]",
116 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
117 "When the same atoms are used for the fit and the covariance analysis,",
118 "the reference structure for the fit is written first with t=-1.",
119 "The average (or reference when [TT]-ref[tt] is used) structure is",
120 "written with t=0, the eigenvectors",
121 "are written as frames with the eigenvector number and eigenvalue",
122 "as step number and timestamp, respectively.",
123 "[PAR]",
124 "The eigenvectors can be analyzed with [gmx-anaeig].",
125 "[PAR]",
126 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
127 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
128 "[PAR]",
129 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
130 "[PAR]",
131 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
132 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
133 "written.",
134 "[PAR]",
135 "Note that the diagonalization of a matrix requires memory and time",
136 "that will increase at least as fast as than the square of the number",
137 "of atoms involved. It is easy to run out of memory, in which",
138 "case this tool will probably exit with a 'Segmentation fault'. You",
139 "should consider carefully whether a reduced set of atoms will meet",
140 "your needs for lower costs."
142 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
143 static int end = -1;
144 t_pargs pa[] = {
145 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
146 { "-ref",
147 FALSE,
148 etBOOL,
149 { &bRef },
150 "Use the deviation from the conformation in the structure file instead of from the "
151 "average" },
152 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
153 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
154 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
156 FILE* out = nullptr; /* initialization makes all compilers happy */
157 t_trxstatus* status;
158 t_topology top;
159 PbcType pbcType;
160 t_atoms* atoms;
161 rvec * x, *xread, *xref, *xav, *xproj;
162 matrix box, zerobox;
163 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
164 real t, tstart, tend, **mat2;
165 real xj, *w_rls = nullptr;
166 real min, max, *axis;
167 int natoms, nat, nframes0, nframes, nlevels;
168 int64_t ndim, i, j, k, l;
169 int WriteXref;
170 const char * fitfile, *trxfile, *ndxfile;
171 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
172 const char * asciifile, *xpmfile, *xpmafile;
173 char str[STRLEN], *fitname, *ananame;
174 int d, dj, nfit;
175 int * index, *ifit;
176 gmx_bool bDiffMass1, bDiffMass2;
177 t_rgb rlo, rmi, rhi;
178 real* eigenvectors;
179 gmx_output_env_t* oenv;
180 gmx_rmpbc_t gpbc = nullptr;
182 t_filenm fnm[] = {
183 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
184 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
185 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
186 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
187 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
189 #define NFILE asize(fnm)
191 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa,
192 asize(desc), desc, 0, nullptr, &oenv))
194 return 0;
197 clear_mat(zerobox);
199 fitfile = ftp2fn(efTPS, NFILE, fnm);
200 trxfile = ftp2fn(efTRX, NFILE, fnm);
201 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
202 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
203 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
204 averfile = ftp2fn(efSTO, NFILE, fnm);
205 logfile = ftp2fn(efLOG, NFILE, fnm);
206 asciifile = opt2fn_null("-ascii", NFILE, fnm);
207 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
208 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
210 read_tps_conf(fitfile, &top, &pbcType, &xref, nullptr, box, TRUE);
211 atoms = &top.atoms;
213 if (bFit)
215 printf("\nChoose a group for the least squares fit\n");
216 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
217 // Make sure that we never attempt to access a coordinate out of range
218 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, atoms->nr, "fitting");
219 if (nfit < 3)
221 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
224 else
226 nfit = 0;
228 printf("\nChoose a group for the covariance analysis\n");
229 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
230 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, atoms->nr, "analysis");
232 bDiffMass1 = FALSE;
233 if (bFit)
235 snew(w_rls, atoms->nr);
236 for (i = 0; (i < nfit); i++)
238 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
239 if (i)
241 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
245 bDiffMass2 = FALSE;
246 snew(sqrtm, natoms);
247 for (i = 0; (i < natoms); i++)
249 if (bM)
251 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
252 if (i)
254 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
257 else
259 sqrtm[i] = 1.0;
263 if (bFit && bDiffMass1 && !bDiffMass2)
265 bDiffMass1 = natoms != nfit;
266 for (i = 0; (i < natoms) && !bDiffMass1; i++)
268 bDiffMass1 = index[i] != ifit[i];
270 if (!bDiffMass1)
272 fprintf(stderr,
273 "\n"
274 "Note: the fit and analysis group are identical,\n"
275 " while the fit is mass weighted and the analysis is not.\n"
276 " Making the fit non mass weighted.\n\n");
277 for (i = 0; (i < nfit); i++)
279 w_rls[ifit[i]] = 1.0;
284 /* Prepare reference frame */
285 if (bPBC)
287 gpbc = gmx_rmpbc_init(&top.idef, pbcType, atoms->nr);
288 gmx_rmpbc(gpbc, atoms->nr, box, xref);
290 if (bFit)
292 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
295 snew(x, natoms);
296 snew(xav, natoms);
297 ndim = natoms * DIM;
298 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
300 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
302 snew(mat, ndim * ndim);
304 fprintf(stderr, "Calculating the average structure ...\n");
305 nframes0 = 0;
306 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
307 if (nat != atoms->nr)
309 fprintf(stderr,
310 "\nWARNING: number of atoms in structure file (%d) and trajectory (%d) do not "
311 "match\n",
312 natoms, nat);
314 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, nat, "fitting");
315 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, nat, "analysis");
319 nframes0++;
320 /* calculate x: a fitted struture of the selected atoms */
321 if (bPBC)
323 gmx_rmpbc(gpbc, nat, box, xread);
325 if (bFit)
327 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
328 do_fit(nat, w_rls, xref, xread);
330 for (i = 0; i < natoms; i++)
332 rvec_inc(xav[i], xread[index[i]]);
334 } while (read_next_x(oenv, status, &t, xread, box));
335 close_trx(status);
337 inv_nframes = 1.0 / nframes0;
338 for (i = 0; i < natoms; i++)
340 for (d = 0; d < DIM; d++)
342 xav[i][d] *= inv_nframes;
343 xread[index[i]][d] = xav[i][d];
346 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr,
347 PbcType::No, zerobox, natoms, index);
348 sfree(xread);
350 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim),
351 static_cast<int>(ndim));
352 nframes = 0;
353 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
354 tstart = t;
357 nframes++;
358 tend = t;
359 /* calculate x: a (fitted) structure of the selected atoms */
360 if (bPBC)
362 gmx_rmpbc(gpbc, nat, box, xread);
364 if (bFit)
366 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
367 do_fit(nat, w_rls, xref, xread);
369 if (bRef)
371 for (i = 0; i < natoms; i++)
373 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
376 else
378 for (i = 0; i < natoms; i++)
380 rvec_sub(xread[index[i]], xav[i], x[i]);
384 for (j = 0; j < natoms; j++)
386 for (dj = 0; dj < DIM; dj++)
388 k = ndim * (DIM * j + dj);
389 xj = x[j][dj];
390 for (i = j; i < natoms; i++)
392 l = k + DIM * i;
393 for (d = 0; d < DIM; d++)
395 mat[l + d] += x[i][d] * xj;
400 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
401 close_trx(status);
402 gmx_rmpbc_done(gpbc);
404 fprintf(stderr, "Read %d frames\n", nframes);
406 if (bRef)
408 /* copy the reference structure to the ouput array x */
409 snew(xproj, natoms);
410 for (i = 0; i < natoms; i++)
412 copy_rvec(xref[index[i]], xproj[i]);
415 else
417 xproj = xav;
420 /* correct the covariance matrix for the mass */
421 inv_nframes = 1.0 / nframes;
422 for (j = 0; j < natoms; j++)
424 for (dj = 0; dj < DIM; dj++)
426 for (i = j; i < natoms; i++)
428 k = ndim * (DIM * j + dj) + DIM * i;
429 for (d = 0; d < DIM; d++)
431 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
437 /* symmetrize the matrix */
438 for (j = 0; j < ndim; j++)
440 for (i = j; i < ndim; i++)
442 mat[ndim * i + j] = mat[ndim * j + i];
446 trace = 0;
447 for (i = 0; i < ndim; i++)
449 trace += mat[i * ndim + i];
451 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
453 if (asciifile)
455 out = gmx_ffopen(asciifile, "w");
456 for (j = 0; j < ndim; j++)
458 for (i = 0; i < ndim; i += 3)
460 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1],
461 mat[ndim * j + i + 2]);
464 gmx_ffclose(out);
467 if (xpmfile)
469 min = 0;
470 max = 0;
471 snew(mat2, ndim);
472 for (j = 0; j < ndim; j++)
474 mat2[j] = &(mat[ndim * j]);
475 for (i = 0; i <= j; i++)
477 if (mat2[j][i] < min)
479 min = mat2[j][i];
481 if (mat2[j][j] > max)
483 max = mat2[j][i];
487 snew(axis, ndim);
488 for (i = 0; i < ndim; i++)
490 axis[i] = i + 1;
492 rlo.r = 0;
493 rlo.g = 0;
494 rlo.b = 1;
495 rmi.r = 1;
496 rmi.g = 1;
497 rmi.b = 1;
498 rhi.r = 1;
499 rhi.g = 0;
500 rhi.b = 0;
501 out = gmx_ffopen(xpmfile, "w");
502 nlevels = 80;
503 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "dim", "dim", ndim, ndim, axis,
504 axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
505 gmx_ffclose(out);
506 sfree(axis);
507 sfree(mat2);
510 if (xpmafile)
512 min = 0;
513 max = 0;
514 snew(mat2, ndim / DIM);
515 for (i = 0; i < ndim / DIM; i++)
517 snew(mat2[i], ndim / DIM);
519 for (j = 0; j < ndim / DIM; j++)
521 for (i = 0; i <= j; i++)
523 mat2[j][i] = 0;
524 for (d = 0; d < DIM; d++)
526 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
528 if (mat2[j][i] < min)
530 min = mat2[j][i];
532 if (mat2[j][j] > max)
534 max = mat2[j][i];
536 mat2[i][j] = mat2[j][i];
539 snew(axis, ndim / DIM);
540 for (i = 0; i < ndim / DIM; i++)
542 axis[i] = i + 1;
544 rlo.r = 0;
545 rlo.g = 0;
546 rlo.b = 1;
547 rmi.r = 1;
548 rmi.g = 1;
549 rmi.b = 1;
550 rhi.r = 1;
551 rhi.g = 0;
552 rhi.b = 0;
553 out = gmx_ffopen(xpmafile, "w");
554 nlevels = 80;
555 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "atom", "atom", ndim / DIM,
556 ndim / DIM, axis, axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
557 gmx_ffclose(out);
558 sfree(axis);
559 for (i = 0; i < ndim / DIM; i++)
561 sfree(mat2[i]);
563 sfree(mat2);
567 /* call diagonalization routine */
569 snew(eigenvalues, ndim);
570 snew(eigenvectors, ndim * ndim);
572 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
573 fprintf(stderr, "\nDiagonalizing ...\n");
574 fflush(stderr);
575 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
576 sfree(eigenvectors);
578 /* now write the output */
580 sum = 0;
581 for (i = 0; i < ndim; i++)
583 sum += eigenvalues[i];
585 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
586 if (std::abs(trace - sum) > 0.01 * trace)
588 fprintf(stderr,
589 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
592 /* Set 'end', the maximum eigenvector and -value index used for output */
593 if (end == -1)
595 if (nframes - 1 < ndim)
597 end = nframes - 1;
598 fprintf(stderr,
599 "\nWARNING: there are fewer frames in your trajectory than there are\n");
600 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
601 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
603 else
605 end = ndim;
609 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
611 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
612 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
613 for (i = 0; (i < end); i++)
615 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
617 xvgrclose(out);
619 if (bFit)
621 /* misuse lambda: 0/1 mass weighted analysis no/yes */
622 if (nfit == natoms)
624 WriteXref = eWXR_YES;
625 for (i = 0; i < nfit; i++)
627 copy_rvec(xref[ifit[i]], x[i]);
630 else
632 WriteXref = eWXR_NO;
635 else
637 /* misuse lambda: -1 for no fit */
638 WriteXref = eWXR_NOFIT;
641 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM,
642 eigenvalues);
644 out = gmx_ffopen(logfile, "w");
646 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
648 fprintf(out, "Program: %s\n", argv[0]);
649 gmx_getcwd(str, STRLEN);
651 fprintf(out, "Working directory: %s\n\n", str);
653 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
654 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend),
655 output_env_get_time_unit(oenv).c_str());
656 if (bFit)
658 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
660 if (ndxfile)
662 fprintf(out, "Read index groups from %s\n", ndxfile);
664 fprintf(out, "\n");
666 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
667 if (bFit)
669 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
671 else
673 fprintf(out, "No fit was used\n");
675 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
676 if (bFit)
678 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
680 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim),
681 static_cast<int>(ndim));
682 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
683 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
685 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
686 if (WriteXref == eWXR_YES)
688 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
690 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
691 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
693 gmx_ffclose(out);
695 fprintf(stderr, "Wrote the log to %s\n", logfile);
697 return 0;