Cleaned up memory usage in gmx traj and trjconv
[gromacs.git] / src / gromacs / gmxana / gmx_covar.cpp
blob3bdb367078d869d6c36534ca299b210b9d29041b
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstring>
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/linearalgebra/eigensolver.h"
50 #include "gromacs/math/do_fit.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/pbcutil/pbc.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc, char *argv[])
65 const char *desc[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
67 "covariance matrix.",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
72 "[PAR]",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number and eigenvalue",
79 "as step number and timestamp, respectively.",
80 "[PAR]",
81 "The eigenvectors can be analyzed with [gmx-anaeig].",
82 "[PAR]",
83 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
84 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
85 "[PAR]",
86 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
87 "[PAR]",
88 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
89 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
90 "written.",
91 "[PAR]",
92 "Note that the diagonalization of a matrix requires memory and time",
93 "that will increase at least as fast as than the square of the number",
94 "of atoms involved. It is easy to run out of memory, in which",
95 "case this tool will probably exit with a 'Segmentation fault'. You",
96 "should consider carefully whether a reduced set of atoms will meet",
97 "your needs for lower costs."
99 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
100 static int end = -1;
101 t_pargs pa[] = {
102 { "-fit", FALSE, etBOOL, {&bFit},
103 "Fit to a reference structure"},
104 { "-ref", FALSE, etBOOL, {&bRef},
105 "Use the deviation from the conformation in the structure file instead of from the average" },
106 { "-mwa", FALSE, etBOOL, {&bM},
107 "Mass-weighted covariance analysis"},
108 { "-last", FALSE, etINT, {&end},
109 "Last eigenvector to write away (-1 is till the last)" },
110 { "-pbc", FALSE, etBOOL, {&bPBC},
111 "Apply corrections for periodic boundary conditions" }
113 FILE *out = nullptr; /* initialization makes all compilers happy */
114 t_trxstatus *status;
115 t_topology top;
116 int ePBC;
117 t_atoms *atoms;
118 rvec *x, *xread, *xref, *xav, *xproj;
119 matrix box, zerobox;
120 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
121 real t, tstart, tend, **mat2;
122 real xj, *w_rls = nullptr;
123 real min, max, *axis;
124 int natoms, nat, nframes0, nframes, nlevels;
125 gmx_int64_t ndim, i, j, k, l;
126 int WriteXref;
127 const char *fitfile, *trxfile, *ndxfile;
128 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
129 const char *asciifile, *xpmfile, *xpmafile;
130 char str[STRLEN], *fitname, *ananame;
131 int d, dj, nfit;
132 int *index, *ifit;
133 gmx_bool bDiffMass1, bDiffMass2;
134 char timebuf[STRLEN];
135 t_rgb rlo, rmi, rhi;
136 real *eigenvectors;
137 gmx_output_env_t *oenv;
138 gmx_rmpbc_t gpbc = nullptr;
140 t_filenm fnm[] = {
141 { efTRX, "-f", nullptr, ffREAD },
142 { efTPS, nullptr, nullptr, ffREAD },
143 { efNDX, nullptr, nullptr, ffOPTRD },
144 { efXVG, nullptr, "eigenval", ffWRITE },
145 { efTRN, "-v", "eigenvec", ffWRITE },
146 { efSTO, "-av", "average.pdb", ffWRITE },
147 { efLOG, nullptr, "covar", ffWRITE },
148 { efDAT, "-ascii", "covar", ffOPTWR },
149 { efXPM, "-xpm", "covar", ffOPTWR },
150 { efXPM, "-xpma", "covara", ffOPTWR }
152 #define NFILE asize(fnm)
154 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT,
155 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
157 return 0;
160 clear_mat(zerobox);
162 fitfile = ftp2fn(efTPS, NFILE, fnm);
163 trxfile = ftp2fn(efTRX, NFILE, fnm);
164 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
165 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
166 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
167 averfile = ftp2fn(efSTO, NFILE, fnm);
168 logfile = ftp2fn(efLOG, NFILE, fnm);
169 asciifile = opt2fn_null("-ascii", NFILE, fnm);
170 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
171 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
173 read_tps_conf(fitfile, &top, &ePBC, &xref, nullptr, box, TRUE);
174 atoms = &top.atoms;
176 if (bFit)
178 printf("\nChoose a group for the least squares fit\n");
179 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
180 if (nfit < 3)
182 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
185 else
187 nfit = 0;
189 printf("\nChoose a group for the covariance analysis\n");
190 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
192 bDiffMass1 = FALSE;
193 if (bFit)
195 snew(w_rls, atoms->nr);
196 for (i = 0; (i < nfit); i++)
198 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
199 if (i)
201 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
205 bDiffMass2 = FALSE;
206 snew(sqrtm, natoms);
207 for (i = 0; (i < natoms); i++)
209 if (bM)
211 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
212 if (i)
214 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
217 else
219 sqrtm[i] = 1.0;
223 if (bFit && bDiffMass1 && !bDiffMass2)
225 bDiffMass1 = natoms != nfit;
226 for (i = 0; (i < natoms) && !bDiffMass1; i++)
228 bDiffMass1 = index[i] != ifit[i];
230 if (!bDiffMass1)
232 fprintf(stderr, "\n"
233 "Note: the fit and analysis group are identical,\n"
234 " while the fit is mass weighted and the analysis is not.\n"
235 " Making the fit non mass weighted.\n\n");
236 for (i = 0; (i < nfit); i++)
238 w_rls[ifit[i]] = 1.0;
243 /* Prepare reference frame */
244 if (bPBC)
246 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
247 gmx_rmpbc(gpbc, atoms->nr, box, xref);
249 if (bFit)
251 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
254 snew(x, natoms);
255 snew(xav, natoms);
256 ndim = natoms*DIM;
257 if (std::sqrt(static_cast<real>(GMX_INT64_MAX)) < static_cast<real>(ndim))
259 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
261 snew(mat, ndim*ndim);
263 fprintf(stderr, "Calculating the average structure ...\n");
264 nframes0 = 0;
265 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
266 if (nat != atoms->nr)
268 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
272 nframes0++;
273 /* calculate x: a fitted struture of the selected atoms */
274 if (bPBC)
276 gmx_rmpbc(gpbc, nat, box, xread);
278 if (bFit)
280 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
281 do_fit(nat, w_rls, xref, xread);
283 for (i = 0; i < natoms; i++)
285 rvec_inc(xav[i], xread[index[i]]);
288 while (read_next_x(oenv, status, &t, xread, box));
289 close_trx(status);
291 inv_nframes = 1.0/nframes0;
292 for (i = 0; i < natoms; i++)
294 for (d = 0; d < DIM; d++)
296 xav[i][d] *= inv_nframes;
297 xread[index[i]][d] = xav[i][d];
300 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
301 atoms, xread, nullptr, epbcNONE, zerobox, natoms, index);
302 sfree(xread);
304 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim), static_cast<int>(ndim));
305 nframes = 0;
306 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
307 tstart = t;
310 nframes++;
311 tend = t;
312 /* calculate x: a (fitted) structure of the selected atoms */
313 if (bPBC)
315 gmx_rmpbc(gpbc, nat, box, xread);
317 if (bFit)
319 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
320 do_fit(nat, w_rls, xref, xread);
322 if (bRef)
324 for (i = 0; i < natoms; i++)
326 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
329 else
331 for (i = 0; i < natoms; i++)
333 rvec_sub(xread[index[i]], xav[i], x[i]);
337 for (j = 0; j < natoms; j++)
339 for (dj = 0; dj < DIM; dj++)
341 k = ndim*(DIM*j+dj);
342 xj = x[j][dj];
343 for (i = j; i < natoms; i++)
345 l = k+DIM*i;
346 for (d = 0; d < DIM; d++)
348 mat[l+d] += x[i][d]*xj;
354 while (read_next_x(oenv, status, &t, xread, box) &&
355 (bRef || nframes < nframes0));
356 close_trx(status);
357 gmx_rmpbc_done(gpbc);
359 fprintf(stderr, "Read %d frames\n", nframes);
361 if (bRef)
363 /* copy the reference structure to the ouput array x */
364 snew(xproj, natoms);
365 for (i = 0; i < natoms; i++)
367 copy_rvec(xref[index[i]], xproj[i]);
370 else
372 xproj = xav;
375 /* correct the covariance matrix for the mass */
376 inv_nframes = 1.0/nframes;
377 for (j = 0; j < natoms; j++)
379 for (dj = 0; dj < DIM; dj++)
381 for (i = j; i < natoms; i++)
383 k = ndim*(DIM*j+dj)+DIM*i;
384 for (d = 0; d < DIM; d++)
386 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
392 /* symmetrize the matrix */
393 for (j = 0; j < ndim; j++)
395 for (i = j; i < ndim; i++)
397 mat[ndim*i+j] = mat[ndim*j+i];
401 trace = 0;
402 for (i = 0; i < ndim; i++)
404 trace += mat[i*ndim+i];
406 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
407 trace, bM ? "u " : "");
409 if (asciifile)
411 out = gmx_ffopen(asciifile, "w");
412 for (j = 0; j < ndim; j++)
414 for (i = 0; i < ndim; i += 3)
416 fprintf(out, "%g %g %g\n",
417 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
420 gmx_ffclose(out);
423 if (xpmfile)
425 min = 0;
426 max = 0;
427 snew(mat2, ndim);
428 for (j = 0; j < ndim; j++)
430 mat2[j] = &(mat[ndim*j]);
431 for (i = 0; i <= j; i++)
433 if (mat2[j][i] < min)
435 min = mat2[j][i];
437 if (mat2[j][j] > max)
439 max = mat2[j][i];
443 snew(axis, ndim);
444 for (i = 0; i < ndim; i++)
446 axis[i] = i+1;
448 rlo.r = 0; rlo.g = 0; rlo.b = 1;
449 rmi.r = 1; rmi.g = 1; rmi.b = 1;
450 rhi.r = 1; rhi.g = 0; rhi.b = 0;
451 out = gmx_ffopen(xpmfile, "w");
452 nlevels = 80;
453 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
454 "dim", "dim", ndim, ndim, axis, axis,
455 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
456 gmx_ffclose(out);
457 sfree(axis);
458 sfree(mat2);
461 if (xpmafile)
463 min = 0;
464 max = 0;
465 snew(mat2, ndim/DIM);
466 for (i = 0; i < ndim/DIM; i++)
468 snew(mat2[i], ndim/DIM);
470 for (j = 0; j < ndim/DIM; j++)
472 for (i = 0; i <= j; i++)
474 mat2[j][i] = 0;
475 for (d = 0; d < DIM; d++)
477 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
479 if (mat2[j][i] < min)
481 min = mat2[j][i];
483 if (mat2[j][j] > max)
485 max = mat2[j][i];
487 mat2[i][j] = mat2[j][i];
490 snew(axis, ndim/DIM);
491 for (i = 0; i < ndim/DIM; i++)
493 axis[i] = i+1;
495 rlo.r = 0; rlo.g = 0; rlo.b = 1;
496 rmi.r = 1; rmi.g = 1; rmi.b = 1;
497 rhi.r = 1; rhi.g = 0; rhi.b = 0;
498 out = gmx_ffopen(xpmafile, "w");
499 nlevels = 80;
500 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
501 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
502 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
503 gmx_ffclose(out);
504 sfree(axis);
505 for (i = 0; i < ndim/DIM; i++)
507 sfree(mat2[i]);
509 sfree(mat2);
513 /* call diagonalization routine */
515 snew(eigenvalues, ndim);
516 snew(eigenvectors, ndim*ndim);
518 std::memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
519 fprintf(stderr, "\nDiagonalizing ...\n");
520 fflush(stderr);
521 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
522 sfree(eigenvectors);
524 /* now write the output */
526 sum = 0;
527 for (i = 0; i < ndim; i++)
529 sum += eigenvalues[i];
531 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
532 sum, bM ? "u " : "");
533 if (std::abs(trace-sum) > 0.01*trace)
535 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
538 /* Set 'end', the maximum eigenvector and -value index used for output */
539 if (end == -1)
541 if (nframes-1 < ndim)
543 end = nframes-1;
544 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
545 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
546 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
548 else
550 end = ndim;
554 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
556 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
557 out = xvgropen(eigvalfile,
558 "Eigenvalues of the covariance matrix",
559 "Eigenvector index", str, oenv);
560 for (i = 0; (i < end); i++)
562 fprintf (out, "%10d %g\n", static_cast<int>(i+1), eigenvalues[ndim-1-i]);
564 xvgrclose(out);
566 if (bFit)
568 /* misuse lambda: 0/1 mass weighted analysis no/yes */
569 if (nfit == natoms)
571 WriteXref = eWXR_YES;
572 for (i = 0; i < nfit; i++)
574 copy_rvec(xref[ifit[i]], x[i]);
577 else
579 WriteXref = eWXR_NO;
582 else
584 /* misuse lambda: -1 for no fit */
585 WriteXref = eWXR_NOFIT;
588 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
589 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
591 out = gmx_ffopen(logfile, "w");
593 gmx_format_current_time(timebuf, STRLEN);
594 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
596 fprintf(out, "Program: %s\n", argv[0]);
597 gmx_getcwd(str, STRLEN);
599 fprintf(out, "Working directory: %s\n\n", str);
601 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
602 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv).c_str());
603 if (bFit)
605 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
607 if (ndxfile)
609 fprintf(out, "Read index groups from %s\n", ndxfile);
611 fprintf(out, "\n");
613 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
614 if (bFit)
616 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
618 else
620 fprintf(out, "No fit was used\n");
622 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
623 if (bFit)
625 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
627 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim), static_cast<int>(ndim));
628 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
629 trace);
630 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
631 sum);
633 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
634 if (WriteXref == eWXR_YES)
636 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
638 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
639 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
641 gmx_ffclose(out);
643 fprintf(stderr, "Wrote the log to %s\n", logfile);
645 return 0;