2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/linearalgebra/eigensolver.h"
50 #include "gromacs/math/do_fit.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/pbcutil/pbc.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc
, char *argv
[])
65 const char *desc
[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number as timestamp.",
80 "The eigenvectors can be analyzed with [gmx-anaeig].",
82 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
83 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
85 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
87 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
88 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
91 "Note that the diagonalization of a matrix requires memory and time",
92 "that will increase at least as fast as than the square of the number",
93 "of atoms involved. It is easy to run out of memory, in which",
94 "case this tool will probably exit with a 'Segmentation fault'. You",
95 "should consider carefully whether a reduced set of atoms will meet",
96 "your needs for lower costs."
98 static gmx_bool bFit
= TRUE
, bRef
= FALSE
, bM
= FALSE
, bPBC
= TRUE
;
101 { "-fit", FALSE
, etBOOL
, {&bFit
},
102 "Fit to a reference structure"},
103 { "-ref", FALSE
, etBOOL
, {&bRef
},
104 "Use the deviation from the conformation in the structure file instead of from the average" },
105 { "-mwa", FALSE
, etBOOL
, {&bM
},
106 "Mass-weighted covariance analysis"},
107 { "-last", FALSE
, etINT
, {&end
},
108 "Last eigenvector to write away (-1 is till the last)" },
109 { "-pbc", FALSE
, etBOOL
, {&bPBC
},
110 "Apply corrections for periodic boundary conditions" }
112 FILE *out
= NULL
; /* initialization makes all compilers happy */
117 rvec
*x
, *xread
, *xref
, *xav
, *xproj
;
119 real
*sqrtm
, *mat
, *eigenvalues
, sum
, trace
, inv_nframes
;
120 real t
, tstart
, tend
, **mat2
;
121 real xj
, *w_rls
= NULL
;
122 real min
, max
, *axis
;
123 int natoms
, nat
, nframes0
, nframes
, nlevels
;
124 gmx_int64_t ndim
, i
, j
, k
, l
;
126 const char *fitfile
, *trxfile
, *ndxfile
;
127 const char *eigvalfile
, *eigvecfile
, *averfile
, *logfile
;
128 const char *asciifile
, *xpmfile
, *xpmafile
;
129 char str
[STRLEN
], *fitname
, *ananame
;
132 gmx_bool bDiffMass1
, bDiffMass2
;
133 char timebuf
[STRLEN
];
136 gmx_output_env_t
*oenv
;
137 gmx_rmpbc_t gpbc
= NULL
;
140 { efTRX
, "-f", NULL
, ffREAD
},
141 { efTPS
, NULL
, NULL
, ffREAD
},
142 { efNDX
, NULL
, NULL
, ffOPTRD
},
143 { efXVG
, NULL
, "eigenval", ffWRITE
},
144 { efTRN
, "-v", "eigenvec", ffWRITE
},
145 { efSTO
, "-av", "average.pdb", ffWRITE
},
146 { efLOG
, NULL
, "covar", ffWRITE
},
147 { efDAT
, "-ascii", "covar", ffOPTWR
},
148 { efXPM
, "-xpm", "covar", ffOPTWR
},
149 { efXPM
, "-xpma", "covara", ffOPTWR
}
151 #define NFILE asize(fnm)
153 if (!parse_common_args(&argc
, argv
, PCA_CAN_TIME
| PCA_TIME_UNIT
,
154 NFILE
, fnm
, asize(pa
), pa
, asize(desc
), desc
, 0, NULL
, &oenv
))
161 fitfile
= ftp2fn(efTPS
, NFILE
, fnm
);
162 trxfile
= ftp2fn(efTRX
, NFILE
, fnm
);
163 ndxfile
= ftp2fn_null(efNDX
, NFILE
, fnm
);
164 eigvalfile
= ftp2fn(efXVG
, NFILE
, fnm
);
165 eigvecfile
= ftp2fn(efTRN
, NFILE
, fnm
);
166 averfile
= ftp2fn(efSTO
, NFILE
, fnm
);
167 logfile
= ftp2fn(efLOG
, NFILE
, fnm
);
168 asciifile
= opt2fn_null("-ascii", NFILE
, fnm
);
169 xpmfile
= opt2fn_null("-xpm", NFILE
, fnm
);
170 xpmafile
= opt2fn_null("-xpma", NFILE
, fnm
);
172 read_tps_conf(fitfile
, &top
, &ePBC
, &xref
, NULL
, box
, TRUE
);
177 printf("\nChoose a group for the least squares fit\n");
178 get_index(atoms
, ndxfile
, 1, &nfit
, &ifit
, &fitname
);
181 gmx_fatal(FARGS
, "Need >= 3 points to fit!\n");
188 printf("\nChoose a group for the covariance analysis\n");
189 get_index(atoms
, ndxfile
, 1, &natoms
, &index
, &ananame
);
194 snew(w_rls
, atoms
->nr
);
195 for (i
= 0; (i
< nfit
); i
++)
197 w_rls
[ifit
[i
]] = atoms
->atom
[ifit
[i
]].m
;
200 bDiffMass1
= bDiffMass1
|| (w_rls
[ifit
[i
]] != w_rls
[ifit
[i
-1]]);
206 for (i
= 0; (i
< natoms
); i
++)
210 sqrtm
[i
] = std::sqrt(atoms
->atom
[index
[i
]].m
);
213 bDiffMass2
= bDiffMass2
|| (sqrtm
[i
] != sqrtm
[i
-1]);
222 if (bFit
&& bDiffMass1
&& !bDiffMass2
)
224 bDiffMass1
= natoms
!= nfit
;
225 for (i
= 0; (i
< natoms
) && !bDiffMass1
; i
++)
227 bDiffMass1
= index
[i
] != ifit
[i
];
232 "Note: the fit and analysis group are identical,\n"
233 " while the fit is mass weighted and the analysis is not.\n"
234 " Making the fit non mass weighted.\n\n");
235 for (i
= 0; (i
< nfit
); i
++)
237 w_rls
[ifit
[i
]] = 1.0;
242 /* Prepare reference frame */
245 gpbc
= gmx_rmpbc_init(&top
.idef
, ePBC
, atoms
->nr
);
246 gmx_rmpbc(gpbc
, atoms
->nr
, box
, xref
);
250 reset_x(nfit
, ifit
, atoms
->nr
, NULL
, xref
, w_rls
);
256 if (std::sqrt(static_cast<real
>(GMX_INT64_MAX
)) < static_cast<real
>(ndim
))
258 gmx_fatal(FARGS
, "Number of degrees of freedoms to large for matrix.\n");
260 snew(mat
, ndim
*ndim
);
262 fprintf(stderr
, "Calculating the average structure ...\n");
264 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
265 if (nat
!= atoms
->nr
)
267 fprintf(stderr
, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms
, nat
);
272 /* calculate x: a fitted struture of the selected atoms */
275 gmx_rmpbc(gpbc
, nat
, box
, xread
);
279 reset_x(nfit
, ifit
, nat
, NULL
, xread
, w_rls
);
280 do_fit(nat
, w_rls
, xref
, xread
);
282 for (i
= 0; i
< natoms
; i
++)
284 rvec_inc(xav
[i
], xread
[index
[i
]]);
287 while (read_next_x(oenv
, status
, &t
, xread
, box
));
290 inv_nframes
= 1.0/nframes0
;
291 for (i
= 0; i
< natoms
; i
++)
293 for (d
= 0; d
< DIM
; d
++)
295 xav
[i
][d
] *= inv_nframes
;
296 xread
[index
[i
]][d
] = xav
[i
][d
];
299 write_sto_conf_indexed(opt2fn("-av", NFILE
, fnm
), "Average structure",
300 atoms
, xread
, NULL
, epbcNONE
, zerobox
, natoms
, index
);
303 fprintf(stderr
, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim
), static_cast<int>(ndim
));
305 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
311 /* calculate x: a (fitted) structure of the selected atoms */
314 gmx_rmpbc(gpbc
, nat
, box
, xread
);
318 reset_x(nfit
, ifit
, nat
, NULL
, xread
, w_rls
);
319 do_fit(nat
, w_rls
, xref
, xread
);
323 for (i
= 0; i
< natoms
; i
++)
325 rvec_sub(xread
[index
[i
]], xref
[index
[i
]], x
[i
]);
330 for (i
= 0; i
< natoms
; i
++)
332 rvec_sub(xread
[index
[i
]], xav
[i
], x
[i
]);
336 for (j
= 0; j
< natoms
; j
++)
338 for (dj
= 0; dj
< DIM
; dj
++)
342 for (i
= j
; i
< natoms
; i
++)
345 for (d
= 0; d
< DIM
; d
++)
347 mat
[l
+d
] += x
[i
][d
]*xj
;
353 while (read_next_x(oenv
, status
, &t
, xread
, box
) &&
354 (bRef
|| nframes
< nframes0
));
356 gmx_rmpbc_done(gpbc
);
358 fprintf(stderr
, "Read %d frames\n", nframes
);
362 /* copy the reference structure to the ouput array x */
364 for (i
= 0; i
< natoms
; i
++)
366 copy_rvec(xref
[index
[i
]], xproj
[i
]);
374 /* correct the covariance matrix for the mass */
375 inv_nframes
= 1.0/nframes
;
376 for (j
= 0; j
< natoms
; j
++)
378 for (dj
= 0; dj
< DIM
; dj
++)
380 for (i
= j
; i
< natoms
; i
++)
382 k
= ndim
*(DIM
*j
+dj
)+DIM
*i
;
383 for (d
= 0; d
< DIM
; d
++)
385 mat
[k
+d
] = mat
[k
+d
]*inv_nframes
*sqrtm
[i
]*sqrtm
[j
];
391 /* symmetrize the matrix */
392 for (j
= 0; j
< ndim
; j
++)
394 for (i
= j
; i
< ndim
; i
++)
396 mat
[ndim
*i
+j
] = mat
[ndim
*j
+i
];
401 for (i
= 0; i
< ndim
; i
++)
403 trace
+= mat
[i
*ndim
+i
];
405 fprintf(stderr
, "\nTrace of the covariance matrix: %g (%snm^2)\n",
406 trace
, bM
? "u " : "");
410 out
= gmx_ffopen(asciifile
, "w");
411 for (j
= 0; j
< ndim
; j
++)
413 for (i
= 0; i
< ndim
; i
+= 3)
415 fprintf(out
, "%g %g %g\n",
416 mat
[ndim
*j
+i
], mat
[ndim
*j
+i
+1], mat
[ndim
*j
+i
+2]);
427 for (j
= 0; j
< ndim
; j
++)
429 mat2
[j
] = &(mat
[ndim
*j
]);
430 for (i
= 0; i
<= j
; i
++)
432 if (mat2
[j
][i
] < min
)
436 if (mat2
[j
][j
] > max
)
443 for (i
= 0; i
< ndim
; i
++)
447 rlo
.r
= 0; rlo
.g
= 0; rlo
.b
= 1;
448 rmi
.r
= 1; rmi
.g
= 1; rmi
.b
= 1;
449 rhi
.r
= 1; rhi
.g
= 0; rhi
.b
= 0;
450 out
= gmx_ffopen(xpmfile
, "w");
452 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2",
453 "dim", "dim", ndim
, ndim
, axis
, axis
,
454 mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
464 snew(mat2
, ndim
/DIM
);
465 for (i
= 0; i
< ndim
/DIM
; i
++)
467 snew(mat2
[i
], ndim
/DIM
);
469 for (j
= 0; j
< ndim
/DIM
; j
++)
471 for (i
= 0; i
<= j
; i
++)
474 for (d
= 0; d
< DIM
; d
++)
476 mat2
[j
][i
] += mat
[ndim
*(DIM
*j
+d
)+DIM
*i
+d
];
478 if (mat2
[j
][i
] < min
)
482 if (mat2
[j
][j
] > max
)
486 mat2
[i
][j
] = mat2
[j
][i
];
489 snew(axis
, ndim
/DIM
);
490 for (i
= 0; i
< ndim
/DIM
; i
++)
494 rlo
.r
= 0; rlo
.g
= 0; rlo
.b
= 1;
495 rmi
.r
= 1; rmi
.g
= 1; rmi
.b
= 1;
496 rhi
.r
= 1; rhi
.g
= 0; rhi
.b
= 0;
497 out
= gmx_ffopen(xpmafile
, "w");
499 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2",
500 "atom", "atom", ndim
/DIM
, ndim
/DIM
, axis
, axis
,
501 mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
504 for (i
= 0; i
< ndim
/DIM
; i
++)
512 /* call diagonalization routine */
514 snew(eigenvalues
, ndim
);
515 snew(eigenvectors
, ndim
*ndim
);
517 std::memcpy(eigenvectors
, mat
, ndim
*ndim
*sizeof(real
));
518 fprintf(stderr
, "\nDiagonalizing ...\n");
520 eigensolver(eigenvectors
, ndim
, 0, ndim
, eigenvalues
, mat
);
523 /* now write the output */
526 for (i
= 0; i
< ndim
; i
++)
528 sum
+= eigenvalues
[i
];
530 fprintf(stderr
, "\nSum of the eigenvalues: %g (%snm^2)\n",
531 sum
, bM
? "u " : "");
532 if (std::abs(trace
-sum
) > 0.01*trace
)
534 fprintf(stderr
, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
537 /* Set 'end', the maximum eigenvector and -value index used for output */
540 if (nframes
-1 < ndim
)
543 fprintf(stderr
, "\nWARNING: there are fewer frames in your trajectory than there are\n");
544 fprintf(stderr
, "degrees of freedom in your system. Only generating the first\n");
545 fprintf(stderr
, "%d out of %d eigenvectors and eigenvalues.\n", end
, static_cast<int>(ndim
));
553 fprintf(stderr
, "\nWriting eigenvalues to %s\n", eigvalfile
);
555 sprintf(str
, "(%snm\\S2\\N)", bM
? "u " : "");
556 out
= xvgropen(eigvalfile
,
557 "Eigenvalues of the covariance matrix",
558 "Eigenvector index", str
, oenv
);
559 for (i
= 0; (i
< end
); i
++)
561 fprintf (out
, "%10d %g\n", static_cast<int>(i
+1), eigenvalues
[ndim
-1-i
]);
567 /* misuse lambda: 0/1 mass weighted analysis no/yes */
570 WriteXref
= eWXR_YES
;
571 for (i
= 0; i
< nfit
; i
++)
573 copy_rvec(xref
[ifit
[i
]], x
[i
]);
583 /* misuse lambda: -1 for no fit */
584 WriteXref
= eWXR_NOFIT
;
587 write_eigenvectors(eigvecfile
, natoms
, mat
, TRUE
, 1, end
,
588 WriteXref
, x
, bDiffMass1
, xproj
, bM
, eigenvalues
);
590 out
= gmx_ffopen(logfile
, "w");
592 gmx_format_current_time(timebuf
, STRLEN
);
593 fprintf(out
, "Covariance analysis log, written %s\n", timebuf
);
595 fprintf(out
, "Program: %s\n", argv
[0]);
596 gmx_getcwd(str
, STRLEN
);
598 fprintf(out
, "Working directory: %s\n\n", str
);
600 fprintf(out
, "Read %d frames from %s (time %g to %g %s)\n", nframes
, trxfile
,
601 output_env_conv_time(oenv
, tstart
), output_env_conv_time(oenv
, tend
), output_env_get_time_unit(oenv
));
604 fprintf(out
, "Read reference structure for fit from %s\n", fitfile
);
608 fprintf(out
, "Read index groups from %s\n", ndxfile
);
612 fprintf(out
, "Analysis group is '%s' (%d atoms)\n", ananame
, natoms
);
615 fprintf(out
, "Fit group is '%s' (%d atoms)\n", fitname
, nfit
);
619 fprintf(out
, "No fit was used\n");
621 fprintf(out
, "Analysis is %smass weighted\n", bDiffMass2
? "" : "non-");
624 fprintf(out
, "Fit is %smass weighted\n", bDiffMass1
? "" : "non-");
626 fprintf(out
, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim
), static_cast<int>(ndim
));
627 fprintf(out
, "Trace of the covariance matrix before diagonalizing: %g\n",
629 fprintf(out
, "Trace of the covariance matrix after diagonalizing: %g\n\n",
632 fprintf(out
, "Wrote %d eigenvalues to %s\n", static_cast<int>(end
), eigvalfile
);
633 if (WriteXref
== eWXR_YES
)
635 fprintf(out
, "Wrote reference structure to %s\n", eigvecfile
);
637 fprintf(out
, "Wrote average structure to %s and %s\n", averfile
, eigvecfile
);
638 fprintf(out
, "Wrote eigenvectors %d to %d to %s\n", 1, end
, eigvecfile
);
642 fprintf(stderr
, "Wrote the log to %s\n", logfile
);