Split txtdump.*, part 1
[gromacs.git] / src / gromacs / gmxana / gmx_covar.cpp
blobb7ee514a5697bfc952d34f2463949ae537b7e774
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstring>
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/linearalgebra/eigensolver.h"
50 #include "gromacs/math/do_fit.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/pbcutil/pbc.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc, char *argv[])
65 const char *desc[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
67 "covariance matrix.",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
72 "[PAR]",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number as timestamp.",
79 "[PAR]",
80 "The eigenvectors can be analyzed with [gmx-anaeig].",
81 "[PAR]",
82 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
83 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
84 "[PAR]",
85 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
86 "[PAR]",
87 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
88 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
89 "written.",
90 "[PAR]",
91 "Note that the diagonalization of a matrix requires memory and time",
92 "that will increase at least as fast as than the square of the number",
93 "of atoms involved. It is easy to run out of memory, in which",
94 "case this tool will probably exit with a 'Segmentation fault'. You",
95 "should consider carefully whether a reduced set of atoms will meet",
96 "your needs for lower costs."
98 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
99 static int end = -1;
100 t_pargs pa[] = {
101 { "-fit", FALSE, etBOOL, {&bFit},
102 "Fit to a reference structure"},
103 { "-ref", FALSE, etBOOL, {&bRef},
104 "Use the deviation from the conformation in the structure file instead of from the average" },
105 { "-mwa", FALSE, etBOOL, {&bM},
106 "Mass-weighted covariance analysis"},
107 { "-last", FALSE, etINT, {&end},
108 "Last eigenvector to write away (-1 is till the last)" },
109 { "-pbc", FALSE, etBOOL, {&bPBC},
110 "Apply corrections for periodic boundary conditions" }
112 FILE *out = NULL; /* initialization makes all compilers happy */
113 t_trxstatus *status;
114 t_topology top;
115 int ePBC;
116 t_atoms *atoms;
117 rvec *x, *xread, *xref, *xav, *xproj;
118 matrix box, zerobox;
119 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
120 real t, tstart, tend, **mat2;
121 real xj, *w_rls = NULL;
122 real min, max, *axis;
123 int natoms, nat, nframes0, nframes, nlevels;
124 gmx_int64_t ndim, i, j, k, l;
125 int WriteXref;
126 const char *fitfile, *trxfile, *ndxfile;
127 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
128 const char *asciifile, *xpmfile, *xpmafile;
129 char str[STRLEN], *fitname, *ananame;
130 int d, dj, nfit;
131 int *index, *ifit;
132 gmx_bool bDiffMass1, bDiffMass2;
133 char timebuf[STRLEN];
134 t_rgb rlo, rmi, rhi;
135 real *eigenvectors;
136 gmx_output_env_t *oenv;
137 gmx_rmpbc_t gpbc = NULL;
139 t_filenm fnm[] = {
140 { efTRX, "-f", NULL, ffREAD },
141 { efTPS, NULL, NULL, ffREAD },
142 { efNDX, NULL, NULL, ffOPTRD },
143 { efXVG, NULL, "eigenval", ffWRITE },
144 { efTRN, "-v", "eigenvec", ffWRITE },
145 { efSTO, "-av", "average.pdb", ffWRITE },
146 { efLOG, NULL, "covar", ffWRITE },
147 { efDAT, "-ascii", "covar", ffOPTWR },
148 { efXPM, "-xpm", "covar", ffOPTWR },
149 { efXPM, "-xpma", "covara", ffOPTWR }
151 #define NFILE asize(fnm)
153 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT,
154 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
156 return 0;
159 clear_mat(zerobox);
161 fitfile = ftp2fn(efTPS, NFILE, fnm);
162 trxfile = ftp2fn(efTRX, NFILE, fnm);
163 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
164 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
165 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
166 averfile = ftp2fn(efSTO, NFILE, fnm);
167 logfile = ftp2fn(efLOG, NFILE, fnm);
168 asciifile = opt2fn_null("-ascii", NFILE, fnm);
169 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
170 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
172 read_tps_conf(fitfile, &top, &ePBC, &xref, NULL, box, TRUE);
173 atoms = &top.atoms;
175 if (bFit)
177 printf("\nChoose a group for the least squares fit\n");
178 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
179 if (nfit < 3)
181 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
184 else
186 nfit = 0;
188 printf("\nChoose a group for the covariance analysis\n");
189 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
191 bDiffMass1 = FALSE;
192 if (bFit)
194 snew(w_rls, atoms->nr);
195 for (i = 0; (i < nfit); i++)
197 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
198 if (i)
200 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
204 bDiffMass2 = FALSE;
205 snew(sqrtm, natoms);
206 for (i = 0; (i < natoms); i++)
208 if (bM)
210 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
211 if (i)
213 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
216 else
218 sqrtm[i] = 1.0;
222 if (bFit && bDiffMass1 && !bDiffMass2)
224 bDiffMass1 = natoms != nfit;
225 for (i = 0; (i < natoms) && !bDiffMass1; i++)
227 bDiffMass1 = index[i] != ifit[i];
229 if (!bDiffMass1)
231 fprintf(stderr, "\n"
232 "Note: the fit and analysis group are identical,\n"
233 " while the fit is mass weighted and the analysis is not.\n"
234 " Making the fit non mass weighted.\n\n");
235 for (i = 0; (i < nfit); i++)
237 w_rls[ifit[i]] = 1.0;
242 /* Prepare reference frame */
243 if (bPBC)
245 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
246 gmx_rmpbc(gpbc, atoms->nr, box, xref);
248 if (bFit)
250 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
253 snew(x, natoms);
254 snew(xav, natoms);
255 ndim = natoms*DIM;
256 if (std::sqrt(static_cast<real>(GMX_INT64_MAX)) < static_cast<real>(ndim))
258 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
260 snew(mat, ndim*ndim);
262 fprintf(stderr, "Calculating the average structure ...\n");
263 nframes0 = 0;
264 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
265 if (nat != atoms->nr)
267 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
271 nframes0++;
272 /* calculate x: a fitted struture of the selected atoms */
273 if (bPBC)
275 gmx_rmpbc(gpbc, nat, box, xread);
277 if (bFit)
279 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
280 do_fit(nat, w_rls, xref, xread);
282 for (i = 0; i < natoms; i++)
284 rvec_inc(xav[i], xread[index[i]]);
287 while (read_next_x(oenv, status, &t, xread, box));
288 close_trj(status);
290 inv_nframes = 1.0/nframes0;
291 for (i = 0; i < natoms; i++)
293 for (d = 0; d < DIM; d++)
295 xav[i][d] *= inv_nframes;
296 xread[index[i]][d] = xav[i][d];
299 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
300 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
301 sfree(xread);
303 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim), static_cast<int>(ndim));
304 nframes = 0;
305 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
306 tstart = t;
309 nframes++;
310 tend = t;
311 /* calculate x: a (fitted) structure of the selected atoms */
312 if (bPBC)
314 gmx_rmpbc(gpbc, nat, box, xread);
316 if (bFit)
318 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
319 do_fit(nat, w_rls, xref, xread);
321 if (bRef)
323 for (i = 0; i < natoms; i++)
325 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
328 else
330 for (i = 0; i < natoms; i++)
332 rvec_sub(xread[index[i]], xav[i], x[i]);
336 for (j = 0; j < natoms; j++)
338 for (dj = 0; dj < DIM; dj++)
340 k = ndim*(DIM*j+dj);
341 xj = x[j][dj];
342 for (i = j; i < natoms; i++)
344 l = k+DIM*i;
345 for (d = 0; d < DIM; d++)
347 mat[l+d] += x[i][d]*xj;
353 while (read_next_x(oenv, status, &t, xread, box) &&
354 (bRef || nframes < nframes0));
355 close_trj(status);
356 gmx_rmpbc_done(gpbc);
358 fprintf(stderr, "Read %d frames\n", nframes);
360 if (bRef)
362 /* copy the reference structure to the ouput array x */
363 snew(xproj, natoms);
364 for (i = 0; i < natoms; i++)
366 copy_rvec(xref[index[i]], xproj[i]);
369 else
371 xproj = xav;
374 /* correct the covariance matrix for the mass */
375 inv_nframes = 1.0/nframes;
376 for (j = 0; j < natoms; j++)
378 for (dj = 0; dj < DIM; dj++)
380 for (i = j; i < natoms; i++)
382 k = ndim*(DIM*j+dj)+DIM*i;
383 for (d = 0; d < DIM; d++)
385 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
391 /* symmetrize the matrix */
392 for (j = 0; j < ndim; j++)
394 for (i = j; i < ndim; i++)
396 mat[ndim*i+j] = mat[ndim*j+i];
400 trace = 0;
401 for (i = 0; i < ndim; i++)
403 trace += mat[i*ndim+i];
405 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
406 trace, bM ? "u " : "");
408 if (asciifile)
410 out = gmx_ffopen(asciifile, "w");
411 for (j = 0; j < ndim; j++)
413 for (i = 0; i < ndim; i += 3)
415 fprintf(out, "%g %g %g\n",
416 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
419 gmx_ffclose(out);
422 if (xpmfile)
424 min = 0;
425 max = 0;
426 snew(mat2, ndim);
427 for (j = 0; j < ndim; j++)
429 mat2[j] = &(mat[ndim*j]);
430 for (i = 0; i <= j; i++)
432 if (mat2[j][i] < min)
434 min = mat2[j][i];
436 if (mat2[j][j] > max)
438 max = mat2[j][i];
442 snew(axis, ndim);
443 for (i = 0; i < ndim; i++)
445 axis[i] = i+1;
447 rlo.r = 0; rlo.g = 0; rlo.b = 1;
448 rmi.r = 1; rmi.g = 1; rmi.b = 1;
449 rhi.r = 1; rhi.g = 0; rhi.b = 0;
450 out = gmx_ffopen(xpmfile, "w");
451 nlevels = 80;
452 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
453 "dim", "dim", ndim, ndim, axis, axis,
454 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
455 gmx_ffclose(out);
456 sfree(axis);
457 sfree(mat2);
460 if (xpmafile)
462 min = 0;
463 max = 0;
464 snew(mat2, ndim/DIM);
465 for (i = 0; i < ndim/DIM; i++)
467 snew(mat2[i], ndim/DIM);
469 for (j = 0; j < ndim/DIM; j++)
471 for (i = 0; i <= j; i++)
473 mat2[j][i] = 0;
474 for (d = 0; d < DIM; d++)
476 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
478 if (mat2[j][i] < min)
480 min = mat2[j][i];
482 if (mat2[j][j] > max)
484 max = mat2[j][i];
486 mat2[i][j] = mat2[j][i];
489 snew(axis, ndim/DIM);
490 for (i = 0; i < ndim/DIM; i++)
492 axis[i] = i+1;
494 rlo.r = 0; rlo.g = 0; rlo.b = 1;
495 rmi.r = 1; rmi.g = 1; rmi.b = 1;
496 rhi.r = 1; rhi.g = 0; rhi.b = 0;
497 out = gmx_ffopen(xpmafile, "w");
498 nlevels = 80;
499 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
500 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
501 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
502 gmx_ffclose(out);
503 sfree(axis);
504 for (i = 0; i < ndim/DIM; i++)
506 sfree(mat2[i]);
508 sfree(mat2);
512 /* call diagonalization routine */
514 snew(eigenvalues, ndim);
515 snew(eigenvectors, ndim*ndim);
517 std::memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
518 fprintf(stderr, "\nDiagonalizing ...\n");
519 fflush(stderr);
520 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
521 sfree(eigenvectors);
523 /* now write the output */
525 sum = 0;
526 for (i = 0; i < ndim; i++)
528 sum += eigenvalues[i];
530 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
531 sum, bM ? "u " : "");
532 if (std::abs(trace-sum) > 0.01*trace)
534 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
537 /* Set 'end', the maximum eigenvector and -value index used for output */
538 if (end == -1)
540 if (nframes-1 < ndim)
542 end = nframes-1;
543 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
544 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
545 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
547 else
549 end = ndim;
553 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
555 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
556 out = xvgropen(eigvalfile,
557 "Eigenvalues of the covariance matrix",
558 "Eigenvector index", str, oenv);
559 for (i = 0; (i < end); i++)
561 fprintf (out, "%10d %g\n", static_cast<int>(i+1), eigenvalues[ndim-1-i]);
563 xvgrclose(out);
565 if (bFit)
567 /* misuse lambda: 0/1 mass weighted analysis no/yes */
568 if (nfit == natoms)
570 WriteXref = eWXR_YES;
571 for (i = 0; i < nfit; i++)
573 copy_rvec(xref[ifit[i]], x[i]);
576 else
578 WriteXref = eWXR_NO;
581 else
583 /* misuse lambda: -1 for no fit */
584 WriteXref = eWXR_NOFIT;
587 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
588 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
590 out = gmx_ffopen(logfile, "w");
592 gmx_format_current_time(timebuf, STRLEN);
593 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
595 fprintf(out, "Program: %s\n", argv[0]);
596 gmx_getcwd(str, STRLEN);
598 fprintf(out, "Working directory: %s\n\n", str);
600 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
601 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
602 if (bFit)
604 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
606 if (ndxfile)
608 fprintf(out, "Read index groups from %s\n", ndxfile);
610 fprintf(out, "\n");
612 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
613 if (bFit)
615 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
617 else
619 fprintf(out, "No fit was used\n");
621 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
622 if (bFit)
624 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
626 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim), static_cast<int>(ndim));
627 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
628 trace);
629 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
630 sum);
632 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
633 if (WriteXref == eWXR_YES)
635 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
637 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
638 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
640 gmx_ffclose(out);
642 fprintf(stderr, "Wrote the log to %s\n", logfile);
644 return 0;