2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/linearalgebra/eigensolver.h"
50 #include "gromacs/math/do_fit.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/pbcutil/pbc.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc
, char *argv
[])
65 const char *desc
[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number and eigenvalue",
79 "as step number and timestamp, respectively.",
81 "The eigenvectors can be analyzed with [gmx-anaeig].",
83 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
84 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
86 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
88 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
89 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
92 "Note that the diagonalization of a matrix requires memory and time",
93 "that will increase at least as fast as than the square of the number",
94 "of atoms involved. It is easy to run out of memory, in which",
95 "case this tool will probably exit with a 'Segmentation fault'. You",
96 "should consider carefully whether a reduced set of atoms will meet",
97 "your needs for lower costs."
99 static gmx_bool bFit
= TRUE
, bRef
= FALSE
, bM
= FALSE
, bPBC
= TRUE
;
102 { "-fit", FALSE
, etBOOL
, {&bFit
},
103 "Fit to a reference structure"},
104 { "-ref", FALSE
, etBOOL
, {&bRef
},
105 "Use the deviation from the conformation in the structure file instead of from the average" },
106 { "-mwa", FALSE
, etBOOL
, {&bM
},
107 "Mass-weighted covariance analysis"},
108 { "-last", FALSE
, etINT
, {&end
},
109 "Last eigenvector to write away (-1 is till the last)" },
110 { "-pbc", FALSE
, etBOOL
, {&bPBC
},
111 "Apply corrections for periodic boundary conditions" }
113 FILE *out
= nullptr; /* initialization makes all compilers happy */
118 rvec
*x
, *xread
, *xref
, *xav
, *xproj
;
120 real
*sqrtm
, *mat
, *eigenvalues
, sum
, trace
, inv_nframes
;
121 real t
, tstart
, tend
, **mat2
;
122 real xj
, *w_rls
= nullptr;
123 real min
, max
, *axis
;
124 int natoms
, nat
, nframes0
, nframes
, nlevels
;
125 gmx_int64_t ndim
, i
, j
, k
, l
;
127 const char *fitfile
, *trxfile
, *ndxfile
;
128 const char *eigvalfile
, *eigvecfile
, *averfile
, *logfile
;
129 const char *asciifile
, *xpmfile
, *xpmafile
;
130 char str
[STRLEN
], *fitname
, *ananame
;
133 gmx_bool bDiffMass1
, bDiffMass2
;
134 char timebuf
[STRLEN
];
137 gmx_output_env_t
*oenv
;
138 gmx_rmpbc_t gpbc
= nullptr;
141 { efTRX
, "-f", nullptr, ffREAD
},
142 { efTPS
, nullptr, nullptr, ffREAD
},
143 { efNDX
, nullptr, nullptr, ffOPTRD
},
144 { efXVG
, nullptr, "eigenval", ffWRITE
},
145 { efTRN
, "-v", "eigenvec", ffWRITE
},
146 { efSTO
, "-av", "average.pdb", ffWRITE
},
147 { efLOG
, nullptr, "covar", ffWRITE
},
148 { efDAT
, "-ascii", "covar", ffOPTWR
},
149 { efXPM
, "-xpm", "covar", ffOPTWR
},
150 { efXPM
, "-xpma", "covara", ffOPTWR
}
152 #define NFILE asize(fnm)
154 if (!parse_common_args(&argc
, argv
, PCA_CAN_TIME
| PCA_TIME_UNIT
,
155 NFILE
, fnm
, asize(pa
), pa
, asize(desc
), desc
, 0, nullptr, &oenv
))
162 fitfile
= ftp2fn(efTPS
, NFILE
, fnm
);
163 trxfile
= ftp2fn(efTRX
, NFILE
, fnm
);
164 ndxfile
= ftp2fn_null(efNDX
, NFILE
, fnm
);
165 eigvalfile
= ftp2fn(efXVG
, NFILE
, fnm
);
166 eigvecfile
= ftp2fn(efTRN
, NFILE
, fnm
);
167 averfile
= ftp2fn(efSTO
, NFILE
, fnm
);
168 logfile
= ftp2fn(efLOG
, NFILE
, fnm
);
169 asciifile
= opt2fn_null("-ascii", NFILE
, fnm
);
170 xpmfile
= opt2fn_null("-xpm", NFILE
, fnm
);
171 xpmafile
= opt2fn_null("-xpma", NFILE
, fnm
);
173 read_tps_conf(fitfile
, &top
, &ePBC
, &xref
, nullptr, box
, TRUE
);
178 printf("\nChoose a group for the least squares fit\n");
179 get_index(atoms
, ndxfile
, 1, &nfit
, &ifit
, &fitname
);
182 gmx_fatal(FARGS
, "Need >= 3 points to fit!\n");
189 printf("\nChoose a group for the covariance analysis\n");
190 get_index(atoms
, ndxfile
, 1, &natoms
, &index
, &ananame
);
195 snew(w_rls
, atoms
->nr
);
196 for (i
= 0; (i
< nfit
); i
++)
198 w_rls
[ifit
[i
]] = atoms
->atom
[ifit
[i
]].m
;
201 bDiffMass1
= bDiffMass1
|| (w_rls
[ifit
[i
]] != w_rls
[ifit
[i
-1]]);
207 for (i
= 0; (i
< natoms
); i
++)
211 sqrtm
[i
] = std::sqrt(atoms
->atom
[index
[i
]].m
);
214 bDiffMass2
= bDiffMass2
|| (sqrtm
[i
] != sqrtm
[i
-1]);
223 if (bFit
&& bDiffMass1
&& !bDiffMass2
)
225 bDiffMass1
= natoms
!= nfit
;
226 for (i
= 0; (i
< natoms
) && !bDiffMass1
; i
++)
228 bDiffMass1
= index
[i
] != ifit
[i
];
233 "Note: the fit and analysis group are identical,\n"
234 " while the fit is mass weighted and the analysis is not.\n"
235 " Making the fit non mass weighted.\n\n");
236 for (i
= 0; (i
< nfit
); i
++)
238 w_rls
[ifit
[i
]] = 1.0;
243 /* Prepare reference frame */
246 gpbc
= gmx_rmpbc_init(&top
.idef
, ePBC
, atoms
->nr
);
247 gmx_rmpbc(gpbc
, atoms
->nr
, box
, xref
);
251 reset_x(nfit
, ifit
, atoms
->nr
, nullptr, xref
, w_rls
);
257 if (std::sqrt(static_cast<real
>(GMX_INT64_MAX
)) < static_cast<real
>(ndim
))
259 gmx_fatal(FARGS
, "Number of degrees of freedoms to large for matrix.\n");
261 snew(mat
, ndim
*ndim
);
263 fprintf(stderr
, "Calculating the average structure ...\n");
265 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
266 if (nat
!= atoms
->nr
)
268 fprintf(stderr
, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms
, nat
);
273 /* calculate x: a fitted struture of the selected atoms */
276 gmx_rmpbc(gpbc
, nat
, box
, xread
);
280 reset_x(nfit
, ifit
, nat
, nullptr, xread
, w_rls
);
281 do_fit(nat
, w_rls
, xref
, xread
);
283 for (i
= 0; i
< natoms
; i
++)
285 rvec_inc(xav
[i
], xread
[index
[i
]]);
288 while (read_next_x(oenv
, status
, &t
, xread
, box
));
291 inv_nframes
= 1.0/nframes0
;
292 for (i
= 0; i
< natoms
; i
++)
294 for (d
= 0; d
< DIM
; d
++)
296 xav
[i
][d
] *= inv_nframes
;
297 xread
[index
[i
]][d
] = xav
[i
][d
];
300 write_sto_conf_indexed(opt2fn("-av", NFILE
, fnm
), "Average structure",
301 atoms
, xread
, nullptr, epbcNONE
, zerobox
, natoms
, index
);
304 fprintf(stderr
, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim
), static_cast<int>(ndim
));
306 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
312 /* calculate x: a (fitted) structure of the selected atoms */
315 gmx_rmpbc(gpbc
, nat
, box
, xread
);
319 reset_x(nfit
, ifit
, nat
, nullptr, xread
, w_rls
);
320 do_fit(nat
, w_rls
, xref
, xread
);
324 for (i
= 0; i
< natoms
; i
++)
326 rvec_sub(xread
[index
[i
]], xref
[index
[i
]], x
[i
]);
331 for (i
= 0; i
< natoms
; i
++)
333 rvec_sub(xread
[index
[i
]], xav
[i
], x
[i
]);
337 for (j
= 0; j
< natoms
; j
++)
339 for (dj
= 0; dj
< DIM
; dj
++)
343 for (i
= j
; i
< natoms
; i
++)
346 for (d
= 0; d
< DIM
; d
++)
348 mat
[l
+d
] += x
[i
][d
]*xj
;
354 while (read_next_x(oenv
, status
, &t
, xread
, box
) &&
355 (bRef
|| nframes
< nframes0
));
357 gmx_rmpbc_done(gpbc
);
359 fprintf(stderr
, "Read %d frames\n", nframes
);
363 /* copy the reference structure to the ouput array x */
365 for (i
= 0; i
< natoms
; i
++)
367 copy_rvec(xref
[index
[i
]], xproj
[i
]);
375 /* correct the covariance matrix for the mass */
376 inv_nframes
= 1.0/nframes
;
377 for (j
= 0; j
< natoms
; j
++)
379 for (dj
= 0; dj
< DIM
; dj
++)
381 for (i
= j
; i
< natoms
; i
++)
383 k
= ndim
*(DIM
*j
+dj
)+DIM
*i
;
384 for (d
= 0; d
< DIM
; d
++)
386 mat
[k
+d
] = mat
[k
+d
]*inv_nframes
*sqrtm
[i
]*sqrtm
[j
];
392 /* symmetrize the matrix */
393 for (j
= 0; j
< ndim
; j
++)
395 for (i
= j
; i
< ndim
; i
++)
397 mat
[ndim
*i
+j
] = mat
[ndim
*j
+i
];
402 for (i
= 0; i
< ndim
; i
++)
404 trace
+= mat
[i
*ndim
+i
];
406 fprintf(stderr
, "\nTrace of the covariance matrix: %g (%snm^2)\n",
407 trace
, bM
? "u " : "");
411 out
= gmx_ffopen(asciifile
, "w");
412 for (j
= 0; j
< ndim
; j
++)
414 for (i
= 0; i
< ndim
; i
+= 3)
416 fprintf(out
, "%g %g %g\n",
417 mat
[ndim
*j
+i
], mat
[ndim
*j
+i
+1], mat
[ndim
*j
+i
+2]);
428 for (j
= 0; j
< ndim
; j
++)
430 mat2
[j
] = &(mat
[ndim
*j
]);
431 for (i
= 0; i
<= j
; i
++)
433 if (mat2
[j
][i
] < min
)
437 if (mat2
[j
][j
] > max
)
444 for (i
= 0; i
< ndim
; i
++)
448 rlo
.r
= 0; rlo
.g
= 0; rlo
.b
= 1;
449 rmi
.r
= 1; rmi
.g
= 1; rmi
.b
= 1;
450 rhi
.r
= 1; rhi
.g
= 0; rhi
.b
= 0;
451 out
= gmx_ffopen(xpmfile
, "w");
453 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2",
454 "dim", "dim", ndim
, ndim
, axis
, axis
,
455 mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
465 snew(mat2
, ndim
/DIM
);
466 for (i
= 0; i
< ndim
/DIM
; i
++)
468 snew(mat2
[i
], ndim
/DIM
);
470 for (j
= 0; j
< ndim
/DIM
; j
++)
472 for (i
= 0; i
<= j
; i
++)
475 for (d
= 0; d
< DIM
; d
++)
477 mat2
[j
][i
] += mat
[ndim
*(DIM
*j
+d
)+DIM
*i
+d
];
479 if (mat2
[j
][i
] < min
)
483 if (mat2
[j
][j
] > max
)
487 mat2
[i
][j
] = mat2
[j
][i
];
490 snew(axis
, ndim
/DIM
);
491 for (i
= 0; i
< ndim
/DIM
; i
++)
495 rlo
.r
= 0; rlo
.g
= 0; rlo
.b
= 1;
496 rmi
.r
= 1; rmi
.g
= 1; rmi
.b
= 1;
497 rhi
.r
= 1; rhi
.g
= 0; rhi
.b
= 0;
498 out
= gmx_ffopen(xpmafile
, "w");
500 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2",
501 "atom", "atom", ndim
/DIM
, ndim
/DIM
, axis
, axis
,
502 mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
505 for (i
= 0; i
< ndim
/DIM
; i
++)
513 /* call diagonalization routine */
515 snew(eigenvalues
, ndim
);
516 snew(eigenvectors
, ndim
*ndim
);
518 std::memcpy(eigenvectors
, mat
, ndim
*ndim
*sizeof(real
));
519 fprintf(stderr
, "\nDiagonalizing ...\n");
521 eigensolver(eigenvectors
, ndim
, 0, ndim
, eigenvalues
, mat
);
524 /* now write the output */
527 for (i
= 0; i
< ndim
; i
++)
529 sum
+= eigenvalues
[i
];
531 fprintf(stderr
, "\nSum of the eigenvalues: %g (%snm^2)\n",
532 sum
, bM
? "u " : "");
533 if (std::abs(trace
-sum
) > 0.01*trace
)
535 fprintf(stderr
, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
538 /* Set 'end', the maximum eigenvector and -value index used for output */
541 if (nframes
-1 < ndim
)
544 fprintf(stderr
, "\nWARNING: there are fewer frames in your trajectory than there are\n");
545 fprintf(stderr
, "degrees of freedom in your system. Only generating the first\n");
546 fprintf(stderr
, "%d out of %d eigenvectors and eigenvalues.\n", end
, static_cast<int>(ndim
));
554 fprintf(stderr
, "\nWriting eigenvalues to %s\n", eigvalfile
);
556 sprintf(str
, "(%snm\\S2\\N)", bM
? "u " : "");
557 out
= xvgropen(eigvalfile
,
558 "Eigenvalues of the covariance matrix",
559 "Eigenvector index", str
, oenv
);
560 for (i
= 0; (i
< end
); i
++)
562 fprintf (out
, "%10d %g\n", static_cast<int>(i
+1), eigenvalues
[ndim
-1-i
]);
568 /* misuse lambda: 0/1 mass weighted analysis no/yes */
571 WriteXref
= eWXR_YES
;
572 for (i
= 0; i
< nfit
; i
++)
574 copy_rvec(xref
[ifit
[i
]], x
[i
]);
584 /* misuse lambda: -1 for no fit */
585 WriteXref
= eWXR_NOFIT
;
588 write_eigenvectors(eigvecfile
, natoms
, mat
, TRUE
, 1, end
,
589 WriteXref
, x
, bDiffMass1
, xproj
, bM
, eigenvalues
);
591 out
= gmx_ffopen(logfile
, "w");
593 gmx_format_current_time(timebuf
, STRLEN
);
594 fprintf(out
, "Covariance analysis log, written %s\n", timebuf
);
596 fprintf(out
, "Program: %s\n", argv
[0]);
597 gmx_getcwd(str
, STRLEN
);
599 fprintf(out
, "Working directory: %s\n\n", str
);
601 fprintf(out
, "Read %d frames from %s (time %g to %g %s)\n", nframes
, trxfile
,
602 output_env_conv_time(oenv
, tstart
), output_env_conv_time(oenv
, tend
), output_env_get_time_unit(oenv
).c_str());
605 fprintf(out
, "Read reference structure for fit from %s\n", fitfile
);
609 fprintf(out
, "Read index groups from %s\n", ndxfile
);
613 fprintf(out
, "Analysis group is '%s' (%d atoms)\n", ananame
, natoms
);
616 fprintf(out
, "Fit group is '%s' (%d atoms)\n", fitname
, nfit
);
620 fprintf(out
, "No fit was used\n");
622 fprintf(out
, "Analysis is %smass weighted\n", bDiffMass2
? "" : "non-");
625 fprintf(out
, "Fit is %smass weighted\n", bDiffMass1
? "" : "non-");
627 fprintf(out
, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim
), static_cast<int>(ndim
));
628 fprintf(out
, "Trace of the covariance matrix before diagonalizing: %g\n",
630 fprintf(out
, "Trace of the covariance matrix after diagonalizing: %g\n\n",
633 fprintf(out
, "Wrote %d eigenvalues to %s\n", static_cast<int>(end
), eigvalfile
);
634 if (WriteXref
== eWXR_YES
)
636 fprintf(out
, "Wrote reference structure to %s\n", eigvecfile
);
638 fprintf(out
, "Wrote average structure to %s and %s\n", averfile
, eigvecfile
);
639 fprintf(out
, "Wrote eigenvectors %d to %d to %s\n", 1, end
, eigvecfile
);
643 fprintf(stderr
, "Wrote the log to %s\n", logfile
);